1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : PT_buildtree.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include "probe.h" |
---|
12 | #include <PT_server_prototypes.h> |
---|
13 | #include "probe_tree.h" |
---|
14 | #include "pt_prototypes.h" |
---|
15 | #include "PT_partition.h" |
---|
16 | |
---|
17 | #include <arb_defs.h> |
---|
18 | #include <arb_file.h> |
---|
19 | #include <arb_misc.h> |
---|
20 | #include <arb_diff.h> |
---|
21 | |
---|
22 | #include <arb_progress.h> |
---|
23 | |
---|
24 | #include <unistd.h> |
---|
25 | #include <ctype.h> |
---|
26 | |
---|
27 | // #define PTM_TRACE_MAX_MEM_USAGE |
---|
28 | |
---|
29 | // AISC_MKPT_PROMOTE: class DataLoc; |
---|
30 | |
---|
31 | static POS_TREE1 *build_pos_tree(POS_TREE1 *const root, const ReadableDataLoc& loc) { |
---|
32 | POS_TREE1 *at = root; |
---|
33 | int height = 0; |
---|
34 | |
---|
35 | while (at->is_node()) { // now we got an inner node |
---|
36 | POS_TREE1 *pt_next = PT_read_son(at, loc[height]); |
---|
37 | if (!pt_next) { // there is no son of that type -> simply add the new son to that path |
---|
38 | POS_TREE1 *new_root = root; |
---|
39 | POS_TREE1 *leaf; |
---|
40 | { |
---|
41 | bool atRoot = (at == root); |
---|
42 | |
---|
43 | leaf = PT_create_leaf(&at, loc[height], loc); |
---|
44 | ++height; |
---|
45 | |
---|
46 | if (atRoot) new_root = at; |
---|
47 | } |
---|
48 | |
---|
49 | while (height<PT_MIN_TREE_HEIGHT && loc[height-1] != PT_QU) { |
---|
50 | at = PT_change_leaf_to_node(leaf); |
---|
51 | leaf = PT_create_leaf(&at, loc[height], loc); |
---|
52 | ++height; |
---|
53 | } |
---|
54 | |
---|
55 | pt_assert(height >= PT_MIN_TREE_HEIGHT || loc[height-1] == PT_QU); |
---|
56 | return new_root; |
---|
57 | } |
---|
58 | else { // go down the tree |
---|
59 | at = pt_next; |
---|
60 | height++; |
---|
61 | |
---|
62 | if (loc[height-1] == PT_QU) { |
---|
63 | // end of sequence reached -> change node to chain and add |
---|
64 | pt_assert(at->is_chain()); |
---|
65 | PT_add_to_chain(at, loc); |
---|
66 | return root; |
---|
67 | } |
---|
68 | } |
---|
69 | } |
---|
70 | |
---|
71 | // type == leaf or chain |
---|
72 | if (at->is_chain()) { // old chain reached |
---|
73 | PT_add_to_chain(at, loc); |
---|
74 | return root; |
---|
75 | } |
---|
76 | |
---|
77 | // change leaf to node and create two sons |
---|
78 | |
---|
79 | const ReadableDataLoc loc_ref(at); |
---|
80 | |
---|
81 | while (loc[height] == loc_ref[height]) { // creates nodes until sequences are different |
---|
82 | pt_assert(!at->is_node()); |
---|
83 | |
---|
84 | if (at->is_chain()) { |
---|
85 | PT_add_to_chain(at, loc); |
---|
86 | return root; |
---|
87 | } |
---|
88 | if (height >= PT_POS_TREE_HEIGHT) { |
---|
89 | if (at->is_leaf()) at = PT_leaf_to_chain(at); |
---|
90 | pt_assert(at->is_chain()); |
---|
91 | PT_add_to_chain(at, loc); |
---|
92 | return root; |
---|
93 | } |
---|
94 | |
---|
95 | pt_assert(at->is_leaf()); |
---|
96 | |
---|
97 | at = PT_change_leaf_to_node(at); // change tip to node and append two new leafs |
---|
98 | at = PT_create_leaf(&at, loc[height], loc_ref); // dummy leaf just to create a new node; may become a chain |
---|
99 | |
---|
100 | height++; |
---|
101 | |
---|
102 | if (loc[height-1] == PT_QU) { |
---|
103 | pt_assert(loc_ref[height-1] == PT_QU); // end of both sequences |
---|
104 | pt_assert(at->is_chain()); |
---|
105 | |
---|
106 | PT_add_to_chain(at, loc); // and add node |
---|
107 | return root; |
---|
108 | } |
---|
109 | pt_assert(loc_ref[height-1] != PT_QU); |
---|
110 | } |
---|
111 | |
---|
112 | pt_assert(loc[height] != loc_ref[height]); |
---|
113 | |
---|
114 | if (height >= PT_POS_TREE_HEIGHT) { |
---|
115 | if (at->is_leaf()) at = PT_leaf_to_chain(at); |
---|
116 | PT_add_to_chain(at, loc); |
---|
117 | return root; |
---|
118 | } |
---|
119 | if (at->is_chain()) { |
---|
120 | // not covered by test - but looks similar to case in top-loop |
---|
121 | PT_add_to_chain(at, loc); |
---|
122 | } |
---|
123 | else { |
---|
124 | at = PT_change_leaf_to_node(at); // delete leaf |
---|
125 | PT_create_leaf(&at, loc[height], loc); // two new leafs |
---|
126 | PT_create_leaf(&at, loc_ref[height], loc_ref); |
---|
127 | } |
---|
128 | return root; |
---|
129 | } |
---|
130 | |
---|
131 | |
---|
132 | inline void get_abs_align_pos(char *seq, int &pos) { |
---|
133 | // get the absolute alignment position |
---|
134 | int q_exists = 0; |
---|
135 | if (pos > 3) { |
---|
136 | pos-=3; |
---|
137 | while (pos > 0) { |
---|
138 | uint32_t c = *((uint32_t*)(seq+pos)); |
---|
139 | if (c == 0x2E2E2E2E) { |
---|
140 | q_exists = 1; |
---|
141 | pos-=4; |
---|
142 | continue; |
---|
143 | } |
---|
144 | if (c == 0x2D2D2D2D) { |
---|
145 | pos-=4; |
---|
146 | continue; |
---|
147 | } |
---|
148 | break; |
---|
149 | } |
---|
150 | pos+=3; |
---|
151 | } |
---|
152 | while (pos) { |
---|
153 | unsigned char c = seq[pos]; |
---|
154 | if (c == '.') { |
---|
155 | q_exists = 1; |
---|
156 | pos--; |
---|
157 | continue; |
---|
158 | } |
---|
159 | if (c == '-') { |
---|
160 | pos--; |
---|
161 | continue; |
---|
162 | } |
---|
163 | break; |
---|
164 | } |
---|
165 | pos+=q_exists; |
---|
166 | } |
---|
167 | |
---|
168 | static bool all_sons_saved(POS_TREE1 *node); |
---|
169 | inline bool has_unsaved_sons(POS_TREE1 *node) { |
---|
170 | POS_TREE1::TYPE type = node->get_type(); |
---|
171 | return (type == PT1_NODE) ? !all_sons_saved(node) : (type != PT1_SAVED); |
---|
172 | } |
---|
173 | static bool all_sons_saved(POS_TREE1 *node) { |
---|
174 | pt_assert(node->is_node()); |
---|
175 | |
---|
176 | for (int i = PT_QU; i < PT_BASES; i++) { |
---|
177 | POS_TREE1 *son = PT_read_son(node, (PT_base)i); |
---|
178 | if (son) { |
---|
179 | if (has_unsaved_sons(son)) return false; |
---|
180 | } |
---|
181 | } |
---|
182 | return true; |
---|
183 | } |
---|
184 | |
---|
185 | static long write_subtree(FILE *out, POS_TREE1 *node, long pos, long *node_pos, ARB_ERROR& error) { |
---|
186 | pt_assert_stage(STAGE1); |
---|
187 | node->clear_fathers(); |
---|
188 | return PTD_write_leafs_to_disk(out, node, pos, node_pos, error); |
---|
189 | } |
---|
190 | |
---|
191 | static long save_lower_subtree(FILE *out, POS_TREE1 *node, long pos, int height, ARB_ERROR& error) { |
---|
192 | if (height >= PT_MIN_TREE_HEIGHT) { // in lower part of tree |
---|
193 | long dummy; |
---|
194 | pos = write_subtree(out, node, pos, &dummy, error); |
---|
195 | } |
---|
196 | else { |
---|
197 | switch (node->get_type()) { |
---|
198 | case PT1_NODE: |
---|
199 | for (int i = PT_QU; i<PT_BASES; ++i) { |
---|
200 | POS_TREE1 *son = PT_read_son(node, PT_base(i)); |
---|
201 | if (son) pos = save_lower_subtree(out, son, pos, height+1, error); |
---|
202 | } |
---|
203 | break; |
---|
204 | |
---|
205 | case PT1_CHAIN: { |
---|
206 | long dummy; |
---|
207 | pos = write_subtree(out, node, pos, &dummy, error); |
---|
208 | break; |
---|
209 | } |
---|
210 | case PT1_LEAF: pt_assert(0); break; // leafs shall not occur above PT_MIN_TREE_HEIGHT |
---|
211 | case PT1_SAVED: break; // ok - saved by previous call |
---|
212 | case PT1_UNDEF: pt_assert(0); break; |
---|
213 | } |
---|
214 | } |
---|
215 | return pos; |
---|
216 | } |
---|
217 | |
---|
218 | static long save_upper_tree(FILE *out, POS_TREE1 *node, long pos, long& node_pos, ARB_ERROR& error) { |
---|
219 | pos = write_subtree(out, node, pos, &node_pos, error); |
---|
220 | return pos; |
---|
221 | } |
---|
222 | |
---|
223 | inline void check_tree_was_saved(POS_TREE1 *node, const char *whatTree, bool completely, ARB_ERROR& error) { |
---|
224 | if (!error) { |
---|
225 | bool saved = completely ? node->is_saved() : !has_unsaved_sons(node); |
---|
226 | if (!saved) { |
---|
227 | #if defined(DEBUG) |
---|
228 | fprintf(stderr, "%s was not completely saved:\n", whatTree); |
---|
229 | PT_dump_POS_TREE_recursive(node, " ", stderr); |
---|
230 | #endif |
---|
231 | error = GBS_global_string("%s was not saved completely", whatTree); |
---|
232 | } |
---|
233 | } |
---|
234 | } |
---|
235 | |
---|
236 | long PTD_save_lower_tree(FILE *out, POS_TREE1 *node, long pos, ARB_ERROR& error) { |
---|
237 | pos = save_lower_subtree(out, node, pos, 0, error); |
---|
238 | check_tree_was_saved(node, "lower tree", false, error); |
---|
239 | return pos; |
---|
240 | } |
---|
241 | |
---|
242 | long PTD_save_upper_tree(FILE *out, POS_TREE1*& node, long pos, long& node_pos, ARB_ERROR& error) { |
---|
243 | pt_assert(!has_unsaved_sons(node)); // forgot to call PTD_save_lower_tree? |
---|
244 | pos = save_upper_tree(out, node, pos, node_pos, error); |
---|
245 | check_tree_was_saved(node, "tree", true, error); |
---|
246 | PTD_delete_saved_node(node); |
---|
247 | return pos; |
---|
248 | } |
---|
249 | |
---|
250 | #if defined(PTM_TRACE_MAX_MEM_USAGE) |
---|
251 | static void dump_memusage() { |
---|
252 | fflush(stderr); |
---|
253 | printf("\n------------------------------ dump_memusage:\n"); fflush(stdout); |
---|
254 | |
---|
255 | malloc_stats(); |
---|
256 | |
---|
257 | pid_t pid = getpid(); |
---|
258 | char *cmd = GBS_global_string_copy("pmap -d %i | grep -v lib", pid); |
---|
259 | GB_ERROR error = GBK_system(cmd); |
---|
260 | if (error) { |
---|
261 | printf("Warning: %s\n", error); |
---|
262 | } |
---|
263 | free(cmd); |
---|
264 | printf("------------------------------ dump_memusage [end]\n"); |
---|
265 | fflush_all(); |
---|
266 | } |
---|
267 | #endif |
---|
268 | |
---|
269 | class PartitionSpec { |
---|
270 | int passes; |
---|
271 | size_t memuse; |
---|
272 | int depth; |
---|
273 | |
---|
274 | const char *passname() const { |
---|
275 | switch (depth) { |
---|
276 | case 0: return "pass"; |
---|
277 | case 1: return "Level-I-passes"; |
---|
278 | case 2: return "Level-II-passes"; |
---|
279 | case 3: return "Level-III-passes"; |
---|
280 | case 4: return "Level-IV-passes"; |
---|
281 | default : pt_assert(0); break; |
---|
282 | } |
---|
283 | return NULL; // unreached |
---|
284 | } |
---|
285 | |
---|
286 | public: |
---|
287 | PartitionSpec() : passes(0), memuse(0), depth(0) {} |
---|
288 | PartitionSpec(int passes_, size_t memuse_, int depth_) : passes(passes_), memuse(memuse_), depth(depth_) {} |
---|
289 | |
---|
290 | bool willUseMoreThan(size_t max_kb_usable) const { return memuse > max_kb_usable; } |
---|
291 | bool isBetterThan(const PartitionSpec& other, size_t max_kb_usable) const { |
---|
292 | if (!passes) return false; |
---|
293 | if (!other.passes) return true; |
---|
294 | |
---|
295 | bool swaps = willUseMoreThan(max_kb_usable); |
---|
296 | bool other_swaps = other.willUseMoreThan(max_kb_usable); |
---|
297 | |
---|
298 | int cmp = int(other_swaps)-int(swaps); // not to swap is better |
---|
299 | if (cmp == 0) { |
---|
300 | if (swaps) { // optimize for memory |
---|
301 | cmp = other.memuse-memuse; // less memuse is better (@@@ true only if probe->pass-calculation is cheap) |
---|
302 | if (cmp == 0) { |
---|
303 | cmp = other.passes-passes; // less passes are better |
---|
304 | if (cmp == 0) { |
---|
305 | cmp = other.depth-depth; |
---|
306 | } |
---|
307 | } |
---|
308 | } |
---|
309 | else { // optimize for number of passes |
---|
310 | cmp = other.passes-passes; // less passes are better |
---|
311 | if (cmp == 0) { |
---|
312 | cmp = other.depth-depth; // less depth is better |
---|
313 | if (cmp == 0) { |
---|
314 | cmp = other.memuse-memuse; // less memuse is better (@@@ true only if probe->pass-calculation is cheap) |
---|
315 | } |
---|
316 | } |
---|
317 | } |
---|
318 | } |
---|
319 | return cmp>0; |
---|
320 | } |
---|
321 | |
---|
322 | void dump(FILE *out, size_t max_kb_usable) const { |
---|
323 | fprintf(out, |
---|
324 | "Estimated memory usage for %i %s: %s%s\n", |
---|
325 | passes, |
---|
326 | passname(), |
---|
327 | GBS_readable_size(memuse*1024, "b"), |
---|
328 | memuse>max_kb_usable ? " (would swap)" : ""); |
---|
329 | } |
---|
330 | |
---|
331 | Partition partition() const { return Partition(depth, passes); } |
---|
332 | }; |
---|
333 | |
---|
334 | static Partition decide_passes_to_use(size_t overallBases, size_t max_kb_usable, int forced_passes) { |
---|
335 | // if 'forced_passes' == 0 -> decide number of passes such that estimated memuse is hard up for 'max_kb_usable' |
---|
336 | // if 'forced_passes' > 0 -> ignore available memory (for DEBUGGING memory estimation) |
---|
337 | |
---|
338 | fflush_all(); |
---|
339 | |
---|
340 | PartitionSpec best; |
---|
341 | |
---|
342 | for (int depth = 0; depth <= PT_MAX_PARTITION_DEPTH; ++depth) { |
---|
343 | PrefixProbabilities prob(depth); |
---|
344 | |
---|
345 | int maxPasses = prob.get_prefix_count(); |
---|
346 | if (forced_passes) { |
---|
347 | if (maxPasses >= forced_passes) { |
---|
348 | PartitionSpec curr(forced_passes, max_kb_for_passes(prob, forced_passes, overallBases), depth); |
---|
349 | if (curr.isBetterThan(best, max_kb_usable)) { |
---|
350 | best = curr; |
---|
351 | best.dump(stdout, max_kb_usable); |
---|
352 | } |
---|
353 | } |
---|
354 | } |
---|
355 | else { |
---|
356 | for (int passes = 1; passes <= maxPasses; ++passes) { |
---|
357 | PartitionSpec curr(passes, max_kb_for_passes(prob, passes, overallBases), depth); |
---|
358 | if (curr.isBetterThan(best, max_kb_usable)) { |
---|
359 | best = curr; |
---|
360 | best.dump(stdout, max_kb_usable); |
---|
361 | } |
---|
362 | if (!curr.willUseMoreThan(max_kb_usable)) break; |
---|
363 | } |
---|
364 | } |
---|
365 | } |
---|
366 | fflush(stdout); |
---|
367 | |
---|
368 | if (best.willUseMoreThan(max_kb_usable)) { |
---|
369 | const int allowed_passes = PrefixIterator(PT_QU, PT_T, PT_MAX_PARTITION_DEPTH).steps(); |
---|
370 | |
---|
371 | fprintf(stderr, |
---|
372 | "Warning: \n" |
---|
373 | " You try to build a ptserver from a very big database!\n" |
---|
374 | "\n" |
---|
375 | " The memory installed on your machine would require to build the ptserver\n" |
---|
376 | " in more than %i passes (the maximum allowed number of passes).\n" |
---|
377 | "\n" |
---|
378 | " As a result the build of this server may cause your machine to swap huge\n" |
---|
379 | " amounts of memory and will possibly run for days, weeks or even months.\n" |
---|
380 | "\n", allowed_passes); |
---|
381 | |
---|
382 | fflush(stderr); |
---|
383 | } |
---|
384 | |
---|
385 | return best.partition(); |
---|
386 | } |
---|
387 | |
---|
388 | ARB_ERROR enter_stage_1_build_tree(PT_main * , const char *tname, ULONG ARM_size_kb) { // __ATTR__USERESULT |
---|
389 | // initialize tree and call the build pos tree procedure |
---|
390 | |
---|
391 | ARB_ERROR error; |
---|
392 | |
---|
393 | if (unlink(tname)) { |
---|
394 | if (GB_size_of_file(tname) >= 0) { |
---|
395 | error = GBS_global_string("Cannot remove %s", tname); |
---|
396 | } |
---|
397 | } |
---|
398 | |
---|
399 | if (!error) { |
---|
400 | char *t2name = (char *)calloc(sizeof(char), strlen(tname) + 2); |
---|
401 | sprintf(t2name, "%s%%", tname); |
---|
402 | |
---|
403 | FILE *out = fopen(t2name, "w"); |
---|
404 | if (!out) { |
---|
405 | error = GBS_global_string("Cannot open %s", t2name); |
---|
406 | } |
---|
407 | else { |
---|
408 | POS_TREE1 *pt = NULL; |
---|
409 | |
---|
410 | { |
---|
411 | GB_ERROR sm_error = GB_set_mode_of_file(t2name, 0666); |
---|
412 | if (sm_error) { |
---|
413 | GB_warningf("%s\nOther users might get problems when they try to access this file.", sm_error); |
---|
414 | } |
---|
415 | } |
---|
416 | |
---|
417 | fputc(0, out); // disable zero father |
---|
418 | long pos = 1; |
---|
419 | |
---|
420 | // now temp file exists -> trigger ptserver-selectionlist-update in all |
---|
421 | // ARB applications by writing to log |
---|
422 | GBS_add_ptserver_logentry(GBS_global_string("Calculating probe tree (%s)", tname)); |
---|
423 | |
---|
424 | psg.enter_stage(STAGE1); |
---|
425 | PT_init_cache_sizes(STAGE1); |
---|
426 | |
---|
427 | pt = PT_create_leaf(NULL, PT_N, DataLoc(0, 0, 0)); // create main node |
---|
428 | pt = PT_change_leaf_to_node(pt); |
---|
429 | psg.stat.cut_offs = 0; // statistic information |
---|
430 | GB_begin_transaction(psg.gb_main); |
---|
431 | |
---|
432 | ULONG available_memory = GB_get_usable_memory() - ARM_size_kb - PTSERVER_BIN_MB*1024; |
---|
433 | printf("Memory available for build: %s\n", GBS_readable_size(available_memory*1024, "b")); |
---|
434 | |
---|
435 | int forcedPasses = 0; // means "do not force" |
---|
436 | { |
---|
437 | const char *forced = GB_getenv("ARB_PTS_FORCE_PASSES"); |
---|
438 | if (forced) { |
---|
439 | int f = atoi(forced); |
---|
440 | if (f >= 1) { |
---|
441 | forcedPasses = f; |
---|
442 | printf("Warning: Forcing %i passes (by envvar ARB_PTS_FORCE_PASSES='%s')\n", forcedPasses, forced); |
---|
443 | } |
---|
444 | } |
---|
445 | } |
---|
446 | |
---|
447 | Partition partition = decide_passes_to_use(psg.char_count, available_memory, forcedPasses); |
---|
448 | { |
---|
449 | size_t max_part = partition.estimate_max_probes_for_any_pass(psg.char_count); |
---|
450 | printf("Overall bases: %s\n", GBS_readable_size(psg.char_count, "bp")); |
---|
451 | printf("Max. partition size: %s (=%.1f%%)\n", GBS_readable_size(max_part, "bp"), max_part*100.0/psg.char_count); |
---|
452 | } |
---|
453 | |
---|
454 | int passes = partition.number_of_passes(); |
---|
455 | arb_progress pass_progress(GBS_global_string("Build index in %i passes", passes), passes); |
---|
456 | int currPass = 0; |
---|
457 | do { |
---|
458 | pt_assert(!partition.done()); |
---|
459 | |
---|
460 | ++currPass; |
---|
461 | arb_progress data_progress(GBS_global_string("pass %i/%i", currPass, passes), psg.data_count); |
---|
462 | |
---|
463 | for (int name = 0; name < psg.data_count; name++) { |
---|
464 | const probe_input_data& pid = psg.data[name]; |
---|
465 | |
---|
466 | SmartCharPtr seqPtr = pid.get_dataPtr(); |
---|
467 | const char *seq = &*seqPtr; |
---|
468 | |
---|
469 | pid.preload_rel2abs(); |
---|
470 | ReadableDataLoc insertLoc(name, 0, 0); |
---|
471 | for (int rel = pid.get_size() - 1; rel >= 0; rel--) { |
---|
472 | if (partition.contains(seq+rel)) { |
---|
473 | insertLoc.set_position(pid.get_abspos(rel), rel); |
---|
474 | pt = build_pos_tree(pt, insertLoc); |
---|
475 | } |
---|
476 | } |
---|
477 | ++data_progress; |
---|
478 | } |
---|
479 | |
---|
480 | #if defined(PTM_TRACE_MAX_MEM_USAGE) |
---|
481 | dump_memusage(); |
---|
482 | #endif |
---|
483 | |
---|
484 | pos = PTD_save_lower_tree(out, pt, pos, error); |
---|
485 | if (error) break; |
---|
486 | |
---|
487 | #ifdef PTM_DEBUG_NODES |
---|
488 | PTD_debug_nodes(); |
---|
489 | #endif |
---|
490 | } |
---|
491 | while (partition.next()); |
---|
492 | |
---|
493 | long last_obj = 0; |
---|
494 | if (!error) { |
---|
495 | pos = PTD_save_upper_tree(out, pt, pos, last_obj, error); |
---|
496 | pt_assert(!pt); |
---|
497 | } |
---|
498 | if (!error) { |
---|
499 | bool need64bit = false; // does created db need a 64bit ptserver ? |
---|
500 | |
---|
501 | pt_assert(last_obj); |
---|
502 | #ifdef ARB_64 |
---|
503 | if (last_obj >= 0xffffffff) need64bit = true; // last_obj is bigger than int |
---|
504 | #else |
---|
505 | if (last_obj <= 0) { // overflow ? |
---|
506 | GBK_terminate("Overflow - out of memory"); |
---|
507 | } |
---|
508 | #endif |
---|
509 | |
---|
510 | // write information about database |
---|
511 | long info_pos = pos; |
---|
512 | |
---|
513 | PTD_put_int(out, PT_SERVER_MAGIC); // marker to identify PT-Server file |
---|
514 | PTD_put_byte(out, PT_SERVER_VERSION); // version of PT-Server file |
---|
515 | pos += 4+1; |
---|
516 | |
---|
517 | // as last element of info block, write it's size (2byte) |
---|
518 | long info_size = pos-info_pos; |
---|
519 | PTD_put_short(out, info_size); |
---|
520 | pos += 2; |
---|
521 | |
---|
522 | // save DB footer (which is the entry point on load) |
---|
523 | |
---|
524 | if (need64bit) { // last_obj is bigger than int |
---|
525 | #ifdef ARB_64 |
---|
526 | PTD_put_longlong(out, last_obj); // write last_obj as long long (64 bit) |
---|
527 | PTD_put_int(out, 0xffffffff); // write 0xffffffff at the end to signalize 64bit ptserver is needed |
---|
528 | #else |
---|
529 | pt_assert(0); |
---|
530 | #endif |
---|
531 | } |
---|
532 | else { |
---|
533 | PTD_put_int(out, last_obj); // last_obj fits into an int -> store it as usual (compatible to old unversioned format) |
---|
534 | } |
---|
535 | } |
---|
536 | if (error) { |
---|
537 | GB_abort_transaction(psg.gb_main); |
---|
538 | fclose(out); |
---|
539 | |
---|
540 | int res = GB_unlink(t2name); |
---|
541 | if (res == -1) fputs(GB_await_error(), stderr); |
---|
542 | } |
---|
543 | else { |
---|
544 | GB_commit_transaction(psg.gb_main); |
---|
545 | fclose(out); |
---|
546 | |
---|
547 | error = GB_rename_file(t2name, tname); |
---|
548 | if (!error) { |
---|
549 | GB_ERROR sm_error = GB_set_mode_of_file(tname, 00666); |
---|
550 | if (sm_error) GB_warning(sm_error); |
---|
551 | } |
---|
552 | } |
---|
553 | |
---|
554 | if (error) pass_progress.done(); |
---|
555 | } |
---|
556 | free(t2name); |
---|
557 | } |
---|
558 | |
---|
559 | #if defined(DEBUG) |
---|
560 | { |
---|
561 | char *related = strdup(tname); |
---|
562 | char *starpos = strstr(related, ".arb.pt"); |
---|
563 | |
---|
564 | pt_assert(starpos); |
---|
565 | strcpy(starpos, ".*"); |
---|
566 | |
---|
567 | fflush_all(); |
---|
568 | |
---|
569 | char *listRelated = GBS_global_string_copy("ls -al %s", related); |
---|
570 | GB_ERROR lserror = GBK_system(listRelated); |
---|
571 | |
---|
572 | fflush_all(); |
---|
573 | |
---|
574 | if (lserror) fprintf(stderr, "Warning: %s\n", lserror); |
---|
575 | free(listRelated); |
---|
576 | free(related); |
---|
577 | } |
---|
578 | #endif |
---|
579 | |
---|
580 | return error; |
---|
581 | } |
---|
582 | |
---|
583 | ARB_ERROR enter_stage_2_load_tree(PT_main *, const char *tname) { // __ATTR__USERESULT |
---|
584 | // load tree from disk |
---|
585 | ARB_ERROR error; |
---|
586 | |
---|
587 | psg.enter_stage(STAGE2); |
---|
588 | PT_init_cache_sizes(STAGE2); |
---|
589 | |
---|
590 | { |
---|
591 | long size = GB_size_of_file(tname); |
---|
592 | if (size<0) { |
---|
593 | error = GB_IO_error("stat", tname); |
---|
594 | } |
---|
595 | else { |
---|
596 | printf("- mapping ptindex ('%s', %s) from disk\n", tname, GBS_readable_size(size, "b")); |
---|
597 | FILE *in = fopen(tname, "r"); |
---|
598 | if (!in) { |
---|
599 | error = GB_IO_error("read", tname); |
---|
600 | } |
---|
601 | else { |
---|
602 | error = PTD_read_leafs_from_disk(tname, psg.TREE_ROOT2()); |
---|
603 | fclose(in); |
---|
604 | } |
---|
605 | } |
---|
606 | } |
---|
607 | |
---|
608 | return error; |
---|
609 | } |
---|
610 | |
---|
611 | // -------------------------------------------------------------------------------- |
---|
612 | |
---|
613 | #ifdef UNIT_TESTS |
---|
614 | #ifndef TEST_UNIT_H |
---|
615 | #include <test_unit.h> |
---|
616 | #include "PT_compress.h" |
---|
617 | #endif |
---|
618 | |
---|
619 | void TEST_PrefixProbabilities() { |
---|
620 | PrefixProbabilities prob0(0); |
---|
621 | PrefixProbabilities prob1(1); |
---|
622 | PrefixProbabilities prob2(2); |
---|
623 | |
---|
624 | const double EPS = 0.00001; |
---|
625 | |
---|
626 | TEST_EXPECT_SIMILAR(prob0.of(0), 1.0000, EPS); // all |
---|
627 | |
---|
628 | TEST_EXPECT_SIMILAR(prob1.of(0), 0.0014, EPS); // PT_QU |
---|
629 | TEST_EXPECT_SIMILAR(prob1.of(1), 0.0003, EPS); // PT_N |
---|
630 | TEST_EXPECT_SIMILAR(prob1.of(2), 0.2543, EPS); |
---|
631 | TEST_EXPECT_SIMILAR(prob1.of(3), 0.2268, EPS); |
---|
632 | TEST_EXPECT_SIMILAR(prob1.of(4), 0.3074, EPS); |
---|
633 | TEST_EXPECT_SIMILAR(prob1.of(5), 0.2098, EPS); |
---|
634 | |
---|
635 | TEST_EXPECT_SIMILAR(prob2.of( 0), 0.00140, EPS); // PT_QU |
---|
636 | TEST_EXPECT_SIMILAR(prob2.of( 1), 0.00000, EPS); // PT_N PT_QU |
---|
637 | TEST_EXPECT_SIMILAR(prob2.of( 2), 0.00000, EPS); // PT_N PT_N |
---|
638 | TEST_EXPECT_SIMILAR(prob2.of( 3), 0.00008, EPS); // PT_N PT_A |
---|
639 | TEST_EXPECT_SIMILAR(prob2.of( 7), 0.00036, EPS); // PT_A PT_QU |
---|
640 | TEST_EXPECT_SIMILAR(prob2.of( 9), 0.06467, EPS); // PT_A PT_A |
---|
641 | TEST_EXPECT_SIMILAR(prob2.of(30), 0.04402, EPS); // PT_T PT_T |
---|
642 | |
---|
643 | TEST_EXPECT_SIMILAR(prob1.left_of(4), 0.4828, EPS); |
---|
644 | TEST_EXPECT_SIMILAR(prob1.left_of(6), 1.0000, EPS); // all prefixes together |
---|
645 | |
---|
646 | TEST_EXPECT_SIMILAR(prob2.left_of(19), 0.4828, EPS); |
---|
647 | TEST_EXPECT_SIMILAR(prob2.left_of(31), 1.0000, EPS); // all prefixes together |
---|
648 | |
---|
649 | TEST_EXPECT_EQUAL(prob0.find_index_near_leftsum(1.0), 1); |
---|
650 | |
---|
651 | TEST_EXPECT_EQUAL(prob1.find_index_near_leftsum(0.5), 4); |
---|
652 | TEST_EXPECT_SIMILAR(prob1.left_of(4), 0.4828, EPS); |
---|
653 | TEST_EXPECT_SIMILAR(prob1.left_of(5), 0.7902, EPS); |
---|
654 | |
---|
655 | TEST_EXPECT_EQUAL(prob2.find_index_near_leftsum(0.5), 21); |
---|
656 | TEST_EXPECT_SIMILAR(prob2.left_of(21), 0.48332, EPS); |
---|
657 | TEST_EXPECT_SIMILAR(prob2.left_of(22), 0.56149, EPS); |
---|
658 | } |
---|
659 | |
---|
660 | static int count_passes(Partition& p) { |
---|
661 | p.reset(); |
---|
662 | int count = 0; |
---|
663 | while (!p.done()) { |
---|
664 | p.next(); |
---|
665 | ++count; |
---|
666 | } |
---|
667 | p.reset(); |
---|
668 | return count; |
---|
669 | } |
---|
670 | |
---|
671 | class Compressed { |
---|
672 | size_t len; |
---|
673 | PT_compressed compressed; |
---|
674 | |
---|
675 | public: |
---|
676 | Compressed(const char *readable) |
---|
677 | : len(strlen(readable)), |
---|
678 | compressed(len) |
---|
679 | { |
---|
680 | compressed.createFrom(readable, len); |
---|
681 | } |
---|
682 | const char *seq() const { return compressed.get_seq(); } |
---|
683 | }; |
---|
684 | |
---|
685 | void TEST_MarkedPrefixes() { |
---|
686 | MarkedPrefixes mp0(0); |
---|
687 | MarkedPrefixes mp1(1); |
---|
688 | MarkedPrefixes mp2(2); |
---|
689 | |
---|
690 | mp0.predecide(); |
---|
691 | TEST_EXPECT_EQUAL(mp0.isMarked(Compressed(".").seq()), false); |
---|
692 | TEST_EXPECT_EQUAL(mp0.isMarked(Compressed("T").seq()), false); |
---|
693 | |
---|
694 | mp0.mark(0, 0); |
---|
695 | mp0.predecide(); |
---|
696 | TEST_EXPECT_EQUAL(mp0.isMarked(Compressed(".").seq()), true); |
---|
697 | TEST_EXPECT_EQUAL(mp0.isMarked(Compressed("T").seq()), true); |
---|
698 | |
---|
699 | mp1.mark(3, 5); |
---|
700 | mp1.predecide(); |
---|
701 | TEST_EXPECT_EQUAL(mp1.isMarked(Compressed(".").seq()), false); |
---|
702 | TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("N").seq()), false); |
---|
703 | TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("A").seq()), false); |
---|
704 | TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("C").seq()), true); |
---|
705 | TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("G").seq()), true); |
---|
706 | TEST_EXPECT_EQUAL(mp1.isMarked(Compressed("T").seq()), true); |
---|
707 | |
---|
708 | mp2.mark(1, 7); |
---|
709 | mp2.predecide(); |
---|
710 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed(".").seq()), false); |
---|
711 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("N.").seq()), true); |
---|
712 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NN").seq()), true); |
---|
713 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NA").seq()), true); |
---|
714 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NC").seq()), true); |
---|
715 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NG").seq()), true); |
---|
716 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("NT").seq()), true); |
---|
717 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("A.").seq()), true); |
---|
718 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AN").seq()), false); |
---|
719 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AC").seq()), false); |
---|
720 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AG").seq()), false); |
---|
721 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("AT").seq()), false); |
---|
722 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("GG").seq()), false); |
---|
723 | TEST_EXPECT_EQUAL(mp2.isMarked(Compressed("TA").seq()), false); |
---|
724 | } |
---|
725 | |
---|
726 | #if defined(ARB_64) |
---|
727 | #define VAL_64_32_BITDEP(val64,val32) (val64) |
---|
728 | #else // !defined(ARB_64) |
---|
729 | #define VAL_64_32_BITDEP(val64,val32) (val32) |
---|
730 | #endif |
---|
731 | |
---|
732 | void TEST_Partition() { |
---|
733 | PrefixProbabilities p0(0); |
---|
734 | PrefixProbabilities p1(1); |
---|
735 | PrefixProbabilities p2(2); |
---|
736 | PrefixProbabilities p3(3); |
---|
737 | PrefixProbabilities p4(4); |
---|
738 | |
---|
739 | const int BASES_100k = 100000; |
---|
740 | |
---|
741 | { |
---|
742 | Partition P01(p0, 1); |
---|
743 | TEST_EXPECT_EQUAL(P01.estimate_probes_for_pass(1, BASES_100k), 100000); |
---|
744 | TEST_EXPECT_EQUAL(P01.estimate_max_probes_for_any_pass(BASES_100k), 100000); |
---|
745 | } |
---|
746 | |
---|
747 | { |
---|
748 | // distributing memory to 6 passes on a level.1 Partitioner doesn't allow much choice: |
---|
749 | Partition P16(p1, 6); |
---|
750 | TEST_EXPECT_EQUAL(P16.number_of_passes(), 6); |
---|
751 | TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(1, BASES_100k), 140); |
---|
752 | TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(2, BASES_100k), 30); |
---|
753 | TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(3, BASES_100k), 25430); |
---|
754 | TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(4, BASES_100k), 22680); |
---|
755 | TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(5, BASES_100k), 30740); |
---|
756 | TEST_EXPECT_EQUAL(P16.estimate_probes_for_pass(6, BASES_100k), 20980); |
---|
757 | TEST_EXPECT_EQUAL(P16.estimate_max_probes_for_any_pass(BASES_100k), 30740); |
---|
758 | TEST_EXPECT_EQUAL(P16.estimate_max_kb_for_any_pass(BASES_100k), VAL_64_32_BITDEP(440583, 205015)); |
---|
759 | } |
---|
760 | |
---|
761 | { |
---|
762 | // 3 passes |
---|
763 | Partition P13(p1, 3); |
---|
764 | TEST_EXPECT_EQUAL(P13.number_of_passes(), 3); |
---|
765 | TEST_EXPECT_EQUAL(count_passes(P13), 3); |
---|
766 | |
---|
767 | TEST_EXPECT_EQUAL(P13.contains(Compressed(".").seq()), true); |
---|
768 | TEST_EXPECT_EQUAL(P13.contains(Compressed("N").seq()), true); |
---|
769 | TEST_EXPECT_EQUAL(P13.contains(Compressed("A").seq()), true); |
---|
770 | TEST_EXPECT_EQUAL(P13.contains(Compressed("C").seq()), false); |
---|
771 | TEST_EXPECT_EQUAL(P13.contains(Compressed("G").seq()), false); |
---|
772 | TEST_EXPECT_EQUAL(P13.contains(Compressed("T").seq()), false); |
---|
773 | |
---|
774 | TEST_EXPECT_EQUAL(P13.next(), true); |
---|
775 | |
---|
776 | TEST_EXPECT_EQUAL(P13.contains(Compressed(".").seq()), false); |
---|
777 | TEST_EXPECT_EQUAL(P13.contains(Compressed("N").seq()), false); |
---|
778 | TEST_EXPECT_EQUAL(P13.contains(Compressed("A").seq()), false); |
---|
779 | TEST_EXPECT_EQUAL(P13.contains(Compressed("C").seq()), true); |
---|
780 | TEST_EXPECT_EQUAL(P13.contains(Compressed("G").seq()), true); |
---|
781 | TEST_EXPECT_EQUAL(P13.contains(Compressed("T").seq()), false); |
---|
782 | |
---|
783 | TEST_EXPECT_EQUAL(P13.next(), true); |
---|
784 | |
---|
785 | TEST_EXPECT_EQUAL(P13.contains(Compressed(".").seq()), false); |
---|
786 | TEST_EXPECT_EQUAL(P13.contains(Compressed("N").seq()), false); |
---|
787 | TEST_EXPECT_EQUAL(P13.contains(Compressed("A").seq()), false); |
---|
788 | TEST_EXPECT_EQUAL(P13.contains(Compressed("C").seq()), false); |
---|
789 | TEST_EXPECT_EQUAL(P13.contains(Compressed("G").seq()), false); |
---|
790 | TEST_EXPECT_EQUAL(P13.contains(Compressed("T").seq()), true); |
---|
791 | |
---|
792 | TEST_EXPECT_EQUAL(P13.next(), false); |
---|
793 | |
---|
794 | TEST_EXPECT_EQUAL(P13.estimate_probes_for_pass(1, BASES_100k), 25600); |
---|
795 | TEST_EXPECT_EQUAL(P13.estimate_probes_for_pass(2, BASES_100k), 53420); |
---|
796 | TEST_EXPECT_EQUAL(P13.estimate_probes_for_pass(3, BASES_100k), 20980); |
---|
797 | TEST_EXPECT_EQUAL(P13.estimate_max_probes_for_any_pass(BASES_100k), 53420); |
---|
798 | TEST_EXPECT_EQUAL(P13.estimate_max_kb_for_any_pass(BASES_100k), VAL_64_32_BITDEP(440687, 205101)); |
---|
799 | } |
---|
800 | |
---|
801 | { |
---|
802 | // 2 passes |
---|
803 | Partition P12(p1, 2); |
---|
804 | TEST_EXPECT_EQUAL(P12.number_of_passes(), 2); |
---|
805 | TEST_EXPECT_EQUAL(count_passes(P12), 2); |
---|
806 | |
---|
807 | TEST_EXPECT_EQUAL(P12.contains(Compressed(".").seq()), true); |
---|
808 | TEST_EXPECT_EQUAL(P12.contains(Compressed("N").seq()), true); |
---|
809 | TEST_EXPECT_EQUAL(P12.contains(Compressed("A").seq()), true); |
---|
810 | TEST_EXPECT_EQUAL(P12.contains(Compressed("C").seq()), true); |
---|
811 | TEST_EXPECT_EQUAL(P12.contains(Compressed("G").seq()), false); |
---|
812 | TEST_EXPECT_EQUAL(P12.contains(Compressed("T").seq()), false); |
---|
813 | |
---|
814 | TEST_EXPECT_EQUAL(P12.next(), true); |
---|
815 | |
---|
816 | TEST_EXPECT_EQUAL(P12.contains(Compressed(".").seq()), false); |
---|
817 | TEST_EXPECT_EQUAL(P12.contains(Compressed("N").seq()), false); |
---|
818 | TEST_EXPECT_EQUAL(P12.contains(Compressed("A").seq()), false); |
---|
819 | TEST_EXPECT_EQUAL(P12.contains(Compressed("C").seq()), false); |
---|
820 | TEST_EXPECT_EQUAL(P12.contains(Compressed("G").seq()), true); |
---|
821 | TEST_EXPECT_EQUAL(P12.contains(Compressed("T").seq()), true); |
---|
822 | |
---|
823 | TEST_EXPECT_EQUAL(P12.next(), false); |
---|
824 | |
---|
825 | TEST_EXPECT_EQUAL(P12.estimate_probes_for_pass(1, BASES_100k), 48280); |
---|
826 | TEST_EXPECT_EQUAL(P12.estimate_probes_for_pass(2, BASES_100k), 51720); |
---|
827 | TEST_EXPECT_EQUAL(P12.estimate_max_probes_for_any_pass(BASES_100k), 51720); |
---|
828 | TEST_EXPECT_EQUAL(P12.estimate_max_kb_for_any_pass(BASES_100k), VAL_64_32_BITDEP(440679, 205095)); |
---|
829 | } |
---|
830 | |
---|
831 | TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 1, BASES_100k), 100000); |
---|
832 | TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 2, BASES_100k), 51720); |
---|
833 | TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 3, BASES_100k), 53420); |
---|
834 | TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 4, BASES_100k), 30740); |
---|
835 | TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 5, BASES_100k), 30740); |
---|
836 | TEST_EXPECT_EQUAL(max_probes_for_passes(p1, 6, BASES_100k), 30740); |
---|
837 | |
---|
838 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 1, BASES_100k), 100000); |
---|
839 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 2, BASES_100k), 51668); |
---|
840 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 3, BASES_100k), 36879); |
---|
841 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 4, BASES_100k), 27429); |
---|
842 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 5, BASES_100k), 26571); |
---|
843 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 6, BASES_100k), 21270); |
---|
844 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 7, BASES_100k), 18958); |
---|
845 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 8, BASES_100k), 16578); |
---|
846 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 9, BASES_100k), 15899); |
---|
847 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 10, BASES_100k), 14789); |
---|
848 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 15, BASES_100k), 11730); |
---|
849 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 20, BASES_100k), 9449); |
---|
850 | TEST_EXPECT_EQUAL(max_probes_for_passes(p2, 30, BASES_100k), 9449); |
---|
851 | |
---|
852 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 1, BASES_100k), 100000); |
---|
853 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 2, BASES_100k), 50333); |
---|
854 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 3, BASES_100k), 33890); |
---|
855 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 4, BASES_100k), 25853); |
---|
856 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 5, BASES_100k), 20906); |
---|
857 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 6, BASES_100k), 17668); |
---|
858 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 7, BASES_100k), 15099); |
---|
859 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 8, BASES_100k), 13854); |
---|
860 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 9, BASES_100k), 12259); |
---|
861 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 10, BASES_100k), 11073); |
---|
862 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 15, BASES_100k), 8168); |
---|
863 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 20, BASES_100k), 6401); |
---|
864 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 30, BASES_100k), 4737); |
---|
865 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 40, BASES_100k), 4176); |
---|
866 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 50, BASES_100k), 2983); |
---|
867 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 100, BASES_100k), 2905); |
---|
868 | TEST_EXPECT_EQUAL(max_probes_for_passes(p3, 150, BASES_100k), 2905); |
---|
869 | |
---|
870 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 1, BASES_100k), 100000); |
---|
871 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 2, BASES_100k), 50084); |
---|
872 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 3, BASES_100k), 33425); |
---|
873 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 4, BASES_100k), 25072); |
---|
874 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 5, BASES_100k), 20145); |
---|
875 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 6, BASES_100k), 16837); |
---|
876 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 7, BASES_100k), 14528); |
---|
877 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 8, BASES_100k), 12606); |
---|
878 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 9, BASES_100k), 11319); |
---|
879 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 10, BASES_100k), 10158); |
---|
880 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 15, BASES_100k), 6887); |
---|
881 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 20, BASES_100k), 5315); |
---|
882 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 30, BASES_100k), 3547); |
---|
883 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 40, BASES_100k), 2805); |
---|
884 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 50, BASES_100k), 2336); |
---|
885 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 100, BASES_100k), 1397); |
---|
886 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 150, BASES_100k), 1243); |
---|
887 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 200, BASES_100k), 954); |
---|
888 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 300, BASES_100k), 893); |
---|
889 | TEST_EXPECT_EQUAL(max_probes_for_passes(p4, 600, BASES_100k), 893); |
---|
890 | } |
---|
891 | |
---|
892 | static arb_test::match_expectation decides_on_passes(ULONG bp, size_t avail_mem_kb, int expected_passes, int expected_depth, size_t expected_passsize, size_t expected_memuse, bool expect_to_swap) { |
---|
893 | size_t ARM_size_kb = bp/1800; // just guess .ARM size |
---|
894 | |
---|
895 | avail_mem_kb -= ARM_size_kb + PTSERVER_BIN_MB*1024; |
---|
896 | |
---|
897 | Partition part = decide_passes_to_use(bp, avail_mem_kb, 0); |
---|
898 | int decided_passes = part.number_of_passes(); |
---|
899 | int decided_depth = part.split_depth(); |
---|
900 | size_t decided_passsize = part.estimate_max_probes_for_any_pass(bp); |
---|
901 | size_t decided_memuse = part.estimate_max_kb_for_any_pass(bp); |
---|
902 | bool decided_to_swap = decided_memuse>avail_mem_kb; |
---|
903 | |
---|
904 | using namespace arb_test; |
---|
905 | expectation_group expected; |
---|
906 | expected.add(that(decided_passes).is_equal_to(expected_passes)); |
---|
907 | expected.add(that(decided_depth).is_equal_to(expected_depth)); |
---|
908 | expected.add(that(decided_passsize).is_equal_to(expected_passsize)); |
---|
909 | expected.add(that(decided_memuse).is_equal_to(expected_memuse)); |
---|
910 | expected.add(that(decided_to_swap).is_equal_to(expect_to_swap)); |
---|
911 | return all().ofgroup(expected); |
---|
912 | } |
---|
913 | |
---|
914 | #define TEST_DECIDES_PASSES(bp,memkb,expected_passes,expected_depth,expected_passsize,expected_memuse,expect_to_swap) \ |
---|
915 | TEST_EXPECTATION(decides_on_passes(bp, memkb, expected_passes, expected_depth, expected_passsize, expected_memuse, expect_to_swap)) |
---|
916 | |
---|
917 | #define TEST_DECIDES_PASSES__BROKEN(bp,memkb,expected_passes,expected_depth,expected_passsize,expected_memuse,expect_to_swap) \ |
---|
918 | TEST_EXPECTATION__BROKEN(decides_on_passes(bp, memkb, expected_passes, expected_depth, expected_passsize, expected_memuse, expect_to_swap)) |
---|
919 | |
---|
920 | void TEST_SLOW_decide_passes_to_use() { |
---|
921 | const ULONG MB = 1024; // kb |
---|
922 | const ULONG GB = 1024*MB; // kb |
---|
923 | |
---|
924 | const ULONG BP_SILVA_108_REF = 891481251ul; |
---|
925 | const ULONG BP_SILVA_108_PARC = BP_SILVA_108_REF * (2492653/618442.0); // rough estimation by number of species |
---|
926 | const ULONG BP_SILVA_108_40K = 56223289ul; |
---|
927 | const ULONG BP_SILVA_108_12K = 17622233ul; |
---|
928 | |
---|
929 | const ULONG MINI_PC = 2 *GB; |
---|
930 | const ULONG SMALL_PC = 4 *GB; |
---|
931 | #if defined(ARB_64) |
---|
932 | const ULONG SMALL_SERVER = 12 *GB; // "bilbo" |
---|
933 | const ULONG MEDIUM_SERVER = 20 *GB; // "boarisch" |
---|
934 | const ULONG BIG_SERVER = 64 *GB; |
---|
935 | const ULONG HUGE_SERVER = 128 *GB; |
---|
936 | |
---|
937 | const ULONG MEM1 = ULONG(45.7*GB+0.5); |
---|
938 | const ULONG MEM2 = ULONG(18.7*GB+0.5); |
---|
939 | const ULONG MEM3 = ULONG(11.23*GB+0.5); |
---|
940 | const ULONG MEM4 = ULONG(8*GB+0.5); |
---|
941 | #endif |
---|
942 | const ULONG MEM5 = ULONG(4*GB+0.5); |
---|
943 | |
---|
944 | const ULONG LMEM1 = 3072*MB; |
---|
945 | const ULONG LMEM2 = 2560*MB; |
---|
946 | const ULONG LMEM3 = 2048*MB; |
---|
947 | const ULONG LMEM4 = 1536*MB; |
---|
948 | const ULONG LMEM5 = 1024*MB; |
---|
949 | const ULONG LMEM6 = 768*MB; |
---|
950 | const ULONG LMEM7 = 512*MB; |
---|
951 | |
---|
952 | const int SWAPS = 1; |
---|
953 | |
---|
954 | #if defined(ARB_64) |
---|
955 | // ---------------- database --------- machine -- passes depth ---- probes ----- memuse - swap? |
---|
956 | |
---|
957 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM1, 1, 0, 3593147643UL, 21318473, 0); |
---|
958 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM2, 2, 1, 1858375961, 13356142, 0); |
---|
959 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM3, 4, 2, 985573522, 9350115, 0); |
---|
960 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEM4, 11, 4, 333053234, 6355149, 0); |
---|
961 | |
---|
962 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM1, 1, 0, 891481251, 5620314, 0); |
---|
963 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM2, 1, 0, 891481251, 5620314, 0); |
---|
964 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM3, 1, 0, 891481251, 5620314, 0); |
---|
965 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM4, 1, 0, 891481251, 5620314, 0); |
---|
966 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM5, 2, 1, 461074103, 3644812, 0); |
---|
967 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM1, 4, 3, 230472727, 2586388, 0); |
---|
968 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM2, 8, 3, 123505999, 2095427, 0); |
---|
969 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM3, 111, 4, 11443798, 1581079, 0); |
---|
970 | |
---|
971 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM1, 1, 0, 56223289, 767008, 0); |
---|
972 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM2, 1, 0, 56223289, 767008, 0); |
---|
973 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM3, 1, 0, 56223289, 767008, 0); |
---|
974 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM4, 1, 0, 56223289, 767008, 0); |
---|
975 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM5, 1, 0, 56223289, 767008, 0); |
---|
976 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM6, 2, 1, 29078685, 642419, 0); |
---|
977 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM7, 194, 4, 502032, 511256, SWAPS); |
---|
978 | |
---|
979 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MINI_PC, 194, 4, 32084148, 4973748, SWAPS); |
---|
980 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MINI_PC, 111, 4, 11443798, 1581079, 0); |
---|
981 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, MINI_PC, 1, 0, 56223289, 767008, 0); |
---|
982 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, MINI_PC, 1, 0, 17622233, 542715, 0); |
---|
983 | |
---|
984 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, SMALL_PC, 194, 4, 32084148, 4973748, SWAPS); |
---|
985 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, SMALL_PC, 2, 1, 461074103, 3644812, 0); |
---|
986 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, SMALL_PC, 1, 0, 56223289, 767008, 0); |
---|
987 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, SMALL_PC, 1, 0, 17622233, 542715, 0); |
---|
988 | |
---|
989 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, SMALL_SERVER, 3, 3, 1217700425, 10415541, 0); |
---|
990 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, SMALL_SERVER, 1, 0, 891481251, 5620314, 0); |
---|
991 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, SMALL_SERVER, 1, 0, 56223289, 767008, 0); |
---|
992 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, SMALL_SERVER, 1, 0, 17622233, 542715, 0); |
---|
993 | |
---|
994 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MEDIUM_SERVER, 2, 1, 1858375961, 13356142, 0); |
---|
995 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEDIUM_SERVER, 1, 0, 891481251, 5620314, 0); |
---|
996 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, MEDIUM_SERVER, 1, 0, 56223289, 767008, 0); |
---|
997 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, MEDIUM_SERVER, 1, 0, 17622233, 542715, 0); |
---|
998 | |
---|
999 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, BIG_SERVER, 1, 0, 3593147643UL, 21318473, 0); |
---|
1000 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, BIG_SERVER, 1, 0, 891481251, 5620314, 0); |
---|
1001 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, BIG_SERVER, 1, 0, 56223289, 767008, 0); |
---|
1002 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, BIG_SERVER, 1, 0, 17622233, 542715, 0); |
---|
1003 | |
---|
1004 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, HUGE_SERVER, 1, 0, 3593147643UL, 21318473, 0); |
---|
1005 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, HUGE_SERVER, 1, 0, 891481251, 5620314, 0); |
---|
1006 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, HUGE_SERVER, 1, 0, 56223289, 767008, 0); |
---|
1007 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, HUGE_SERVER, 1, 0, 17622233, 542715, 0); |
---|
1008 | |
---|
1009 | #else // !defined(ARB_64) => only test for situations with at most 4Gb |
---|
1010 | // ---------------- database --------- machine -- passes depth ---- probes ----- memuse - swap? |
---|
1011 | |
---|
1012 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MEM5, 2, 1, 461074103, 2831431, 0); |
---|
1013 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM1, 3, 2, 328766946, 2327527, 0); |
---|
1014 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM2, 4, 2, 244526639, 2006690, 0); |
---|
1015 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, LMEM3, 7, 4, 129515581, 1568659, 0); |
---|
1016 | |
---|
1017 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM1, 1, 0, 56223289, 473837, 0); |
---|
1018 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM2, 1, 0, 56223289, 473837, 0); |
---|
1019 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM3, 1, 0, 56223289, 473837, 0); |
---|
1020 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM4, 1, 0, 56223289, 473837, 0); |
---|
1021 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM5, 1, 0, 56223289, 473837, 0); |
---|
1022 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM6, 1, 0, 56223289, 473837, 0); |
---|
1023 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, LMEM7, 2, 1, 29078685, 370454, 0); |
---|
1024 | |
---|
1025 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, MINI_PC, 194, 4, 32084148, 3835929, SWAPS); |
---|
1026 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, MINI_PC, 7, 4, 129515581, 1568659, 0); |
---|
1027 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, MINI_PC, 1, 0, 56223289, 473837, 0); |
---|
1028 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, MINI_PC, 1, 0, 17622233, 289125, 0); |
---|
1029 | |
---|
1030 | TEST_DECIDES_PASSES(BP_SILVA_108_PARC, SMALL_PC, 194, 4, 32084148, 3835929, SWAPS); |
---|
1031 | TEST_DECIDES_PASSES(BP_SILVA_108_REF, SMALL_PC, 2, 1, 461074103, 2831431, 0); |
---|
1032 | TEST_DECIDES_PASSES(BP_SILVA_108_40K, SMALL_PC, 1, 0, 56223289, 473837, 0); |
---|
1033 | TEST_DECIDES_PASSES(BP_SILVA_108_12K, SMALL_PC, 1, 0, 17622233, 289125, 0); |
---|
1034 | |
---|
1035 | #endif |
---|
1036 | } |
---|
1037 | |
---|
1038 | void NOTEST_SLOW_maybe_build_tree() { |
---|
1039 | // does only test sth if DB is present. |
---|
1040 | |
---|
1041 | char dbarg[] = "-D" "extra_pt_src.arb"; |
---|
1042 | char *testDB = dbarg+2; |
---|
1043 | const char *resultPT = "extra_pt_src.arb.pt"; |
---|
1044 | const char *expectedPT = "extra_pt_src.arb_expected.pt"; |
---|
1045 | bool exists = GB_is_regularfile(testDB); |
---|
1046 | |
---|
1047 | if (exists) { |
---|
1048 | char pname[] = "fake_pt_server"; |
---|
1049 | char barg[] = "-build"; |
---|
1050 | char *argv[] = { |
---|
1051 | pname, |
---|
1052 | barg, |
---|
1053 | dbarg, |
---|
1054 | }; |
---|
1055 | |
---|
1056 | // build |
---|
1057 | int res = ARB_main(ARRAY_ELEMS(argv), argv); |
---|
1058 | TEST_EXPECT_EQUAL(res, EXIT_SUCCESS); |
---|
1059 | |
---|
1060 | // #define TEST_AUTO_UPDATE |
---|
1061 | #if defined(TEST_AUTO_UPDATE) |
---|
1062 | TEST_COPY_FILE(resultPT, expectedPT); |
---|
1063 | #else // !defined(TEST_AUTO_UPDATE) |
---|
1064 | TEST_EXPECT_FILES_EQUAL(resultPT, expectedPT); |
---|
1065 | #endif |
---|
1066 | } |
---|
1067 | } |
---|
1068 | |
---|
1069 | #endif // UNIT_TESTS |
---|
1070 | |
---|
1071 | // -------------------------------------------------------------------------------- |
---|