1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : PARS_dtree.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include "PerfMeter.h" |
---|
12 | #include "pars_dtree.hxx" |
---|
13 | #include "pars_main.hxx" |
---|
14 | #include "pars_awars.h" |
---|
15 | #include "ap_tree_nlen.hxx" |
---|
16 | #include "ap_main.hxx" |
---|
17 | |
---|
18 | #include <AP_TreeColors.hxx> |
---|
19 | #include <AP_seq_dna.hxx> |
---|
20 | #include <AP_seq_protein.hxx> |
---|
21 | #include <AP_filter.hxx> |
---|
22 | |
---|
23 | #include <TreeCallbacks.hxx> |
---|
24 | |
---|
25 | #include <ColumnStat.hxx> |
---|
26 | #include <awt_sel_boxes.hxx> |
---|
27 | #include <awt_filter.hxx> |
---|
28 | |
---|
29 | #include <gui_aliview.hxx> |
---|
30 | |
---|
31 | #include <aw_preset.hxx> |
---|
32 | #include <aw_awar.hxx> |
---|
33 | #include <aw_msg.hxx> |
---|
34 | #include <arb_progress.h> |
---|
35 | #include <aw_root.hxx> |
---|
36 | #include <aw_question.hxx> |
---|
37 | |
---|
38 | static void AWT_graphic_parsimony_root_changed(void *cd, AP_tree *old, AP_tree *newroot) { |
---|
39 | AWT_graphic_tree *agt = (AWT_graphic_tree*)cd; |
---|
40 | UNCOVERED(); |
---|
41 | |
---|
42 | if (old == agt->get_logical_root()) agt->set_logical_root_to(newroot); |
---|
43 | } |
---|
44 | |
---|
45 | static AliView *pars_generate_aliview(WeightedFilter *pars_weighted_filter) { |
---|
46 | GBDATA *gb_main = pars_weighted_filter->get_gb_main(); |
---|
47 | char *ali_name; |
---|
48 | { |
---|
49 | GB_transaction ta(gb_main); |
---|
50 | ali_name = GBT_read_string(gb_main, AWAR_ALIGNMENT); |
---|
51 | } |
---|
52 | GB_ERROR error = NULp; |
---|
53 | AliView *aliview = pars_weighted_filter->create_aliview(ali_name, error); |
---|
54 | if (!aliview) aw_popup_exit(error); |
---|
55 | free(ali_name); |
---|
56 | return aliview; |
---|
57 | } |
---|
58 | |
---|
59 | void PARS_tree_init(AWT_graphic_parsimony *agt) { |
---|
60 | ap_assert(agt->get_root_node()); |
---|
61 | ap_assert(agt == ap_main->get_graphic_tree()); |
---|
62 | |
---|
63 | GBDATA *gb_main = ap_main->get_gb_main(); |
---|
64 | GB_transaction ta(gb_main); |
---|
65 | |
---|
66 | const char *use = ap_main->get_aliname(); |
---|
67 | long ali_len = GBT_get_alignment_len(gb_main, use); |
---|
68 | if (ali_len <= 1) { |
---|
69 | aw_popup_exit("No valid alignment selected! Try again"); |
---|
70 | } |
---|
71 | |
---|
72 | agt->get_tree_root()->set_root_changed_callback(AWT_graphic_parsimony_root_changed, agt); |
---|
73 | } |
---|
74 | |
---|
75 | QuadraticThreshold::QuadraticThreshold(KL_DYNAMIC_THRESHOLD_TYPE type, double startx, double maxy, double maxx, double maxDepth, Mutations pars_start) { |
---|
76 | // set a, b, and c for quadratic equation y = ax^2 + bx + c |
---|
77 | switch (type) { |
---|
78 | default: |
---|
79 | ap_assert(0); |
---|
80 | // fall-through |
---|
81 | case AP_QUADRAT_START: |
---|
82 | c = startx; |
---|
83 | a = (startx - maxy) / (maxx * maxx); |
---|
84 | b = -2.0 * a * maxx; |
---|
85 | break; |
---|
86 | |
---|
87 | case AP_QUADRAT_MAX: // unused (experimental) |
---|
88 | a = - maxy / ((maxDepth - maxx) * (maxDepth - maxx)); |
---|
89 | b = -2.0 * a * maxx; |
---|
90 | c = maxy + a * maxx * maxx; |
---|
91 | break; |
---|
92 | } |
---|
93 | c += pars_start; |
---|
94 | } |
---|
95 | |
---|
96 | void ArbParsimony::kernighan_optimize_tree(AP_tree_nlen *at, const KL_Settings& settings, const Mutations *pars_global_start, bool dumpPerf) { |
---|
97 | AP_tree_nlen *oldrootleft = get_root_node()->get_leftson(); |
---|
98 | AP_tree_nlen *oldrootright = get_root_node()->get_rightson(); |
---|
99 | Mutations pars_curr = get_root_node()->costs(); |
---|
100 | const Mutations pars_org = pars_curr; |
---|
101 | |
---|
102 | OptiPerfMeter performance("KL-recursion", pars_curr); |
---|
103 | |
---|
104 | // setup KL recursion parameters: |
---|
105 | KL_params KL; |
---|
106 | { |
---|
107 | KL.max_rec_depth = settings.maxdepth; ap_assert(KL.max_rec_depth>0); |
---|
108 | KL.inc_rec_depth = settings.incdepth; |
---|
109 | KL.thresFunctor = QuadraticThreshold(settings.Dynamic.type, settings.Dynamic.start, settings.Dynamic.maxy, settings.Dynamic.maxx, KL.max_rec_depth, pars_curr); |
---|
110 | KL.rec_type = KL_RECURSION_TYPE((settings.Dynamic.enabled*AP_DYNAMIK)|(settings.Static.enabled*AP_STATIC)); |
---|
111 | |
---|
112 | for (int x = 0; x<CUSTOM_DEPTHS; ++x) { |
---|
113 | KL.rec_width[x] = settings.Static.depth[x]; |
---|
114 | } |
---|
115 | KL.stopAtFoldedGroups = settings.whichEdges&SKIP_FOLDED_EDGES; |
---|
116 | } |
---|
117 | |
---|
118 | AP_tree_edge *startEdge = NULp; |
---|
119 | AP_tree_nlen *skipNode = NULp; |
---|
120 | bool skipStartEdge = true; |
---|
121 | |
---|
122 | if (!at->father) { |
---|
123 | startEdge = rootEdge(); |
---|
124 | skipStartEdge = false; |
---|
125 | } |
---|
126 | else if (at->is_son_of_root()) { |
---|
127 | startEdge = rootEdge(); |
---|
128 | skipNode = startEdge->otherNode(at); |
---|
129 | } |
---|
130 | else { |
---|
131 | skipNode = at->get_father(); |
---|
132 | startEdge = at->edgeTo(skipNode); |
---|
133 | } |
---|
134 | ap_assert(startEdge); |
---|
135 | |
---|
136 | EdgeChain chain(startEdge, EdgeSpec(SKIP_LEAF_EDGES|settings.whichEdges), true, skipNode, skipStartEdge); |
---|
137 | arb_progress progress(chain.size()); |
---|
138 | |
---|
139 | if (pars_global_start) { |
---|
140 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", pars_curr, *pars_global_start-pars_curr)); |
---|
141 | } |
---|
142 | else { |
---|
143 | progress.subtitle(GBS_global_string("best=%li", pars_curr)); |
---|
144 | } |
---|
145 | |
---|
146 | if (skipStartEdge) startEdge->set_visited(true); // avoid traversal beyond startEdge |
---|
147 | |
---|
148 | while (chain && !progress.aborted()) { |
---|
149 | AP_tree_edge *edge = *chain; ++chain; |
---|
150 | |
---|
151 | ap_assert(!edge->is_leaf_edge()); |
---|
152 | ap_assert(implicated(KL.stopAtFoldedGroups, !edge->next_to_folded_group())); |
---|
153 | |
---|
154 | ap_main->remember(); |
---|
155 | |
---|
156 | bool better_tree_found = edge->kl_rec(KL, 0, pars_curr); |
---|
157 | |
---|
158 | if (better_tree_found) { |
---|
159 | ap_main->accept(); |
---|
160 | Mutations pars_new = get_root_node()->costs(); |
---|
161 | KL.thresFunctor.change_parsimony_start(pars_new-pars_curr); |
---|
162 | pars_curr = pars_new; |
---|
163 | if (pars_global_start) { |
---|
164 | progress.subtitle(GBS_global_string("best=%li (gain=%li, KL=%li)", pars_curr, *pars_global_start-pars_curr, pars_org-pars_curr)); |
---|
165 | } |
---|
166 | else { |
---|
167 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", pars_curr, pars_org-pars_curr)); |
---|
168 | } |
---|
169 | } |
---|
170 | else { |
---|
171 | ap_main->revert(); |
---|
172 | } |
---|
173 | progress.inc(); |
---|
174 | } |
---|
175 | |
---|
176 | if (skipStartEdge) startEdge->set_visited(false); // reallow traversal beyond startEdge |
---|
177 | |
---|
178 | if (dumpPerf) performance.dump(stdout, pars_curr); |
---|
179 | |
---|
180 | if (oldrootleft->father == oldrootright) { |
---|
181 | oldrootleft->set_root(); |
---|
182 | } |
---|
183 | else { |
---|
184 | oldrootright->set_root(); |
---|
185 | } |
---|
186 | } |
---|
187 | |
---|
188 | |
---|
189 | |
---|
190 | void ArbParsimony::optimize_tree(AP_tree_nlen *at, const KL_Settings& settings, arb_progress& progress) { |
---|
191 | AP_tree_nlen *oldrootleft = get_root_node()->get_leftson(); |
---|
192 | AP_tree_nlen *oldrootright = get_root_node()->get_rightson(); |
---|
193 | const Mutations org_pars = get_root_node()->costs(); |
---|
194 | Mutations prev_pars = org_pars; |
---|
195 | |
---|
196 | OptiPerfMeter overallPerf("global optimization", org_pars); |
---|
197 | |
---|
198 | progress.subtitle(GBS_global_string("best=%li", org_pars)); |
---|
199 | |
---|
200 | // define available heuristics |
---|
201 | enum Heuristic { |
---|
202 | RECURSIVE_NNI, |
---|
203 | CUSTOM_KL, |
---|
204 | |
---|
205 | NO_FURTHER_HEURISTIC, |
---|
206 | HEURISTIC_COUNT = NO_FURTHER_HEURISTIC, |
---|
207 | } heuristic = RECURSIVE_NNI; |
---|
208 | |
---|
209 | struct H_Settings { |
---|
210 | const char *name; // name shown in OptiPerfMeter |
---|
211 | const KL_Settings *kl; // ==NULp -> NNI; else KL with these settings |
---|
212 | bool repeat; // retry same heuristic when tree improved |
---|
213 | Heuristic onImprove; // continue with this heuristic if improved (repeated or not) |
---|
214 | Heuristic onFailure; // continue with this heuristic if NOT improved |
---|
215 | } heuristic_setting[HEURISTIC_COUNT] = { |
---|
216 | { "recursive NNIs", NULp, true, CUSTOM_KL, CUSTOM_KL }, |
---|
217 | { "KL-optimizer", &settings, false, RECURSIVE_NNI, NO_FURTHER_HEURISTIC }, |
---|
218 | }; |
---|
219 | |
---|
220 | Mutations heu_start_pars = prev_pars; |
---|
221 | OptiPerfMeter *heuPerf = NULp; |
---|
222 | |
---|
223 | #if defined(ASSERTION_USED) |
---|
224 | bool at_is_root = at == rootNode(); |
---|
225 | #endif |
---|
226 | |
---|
227 | { |
---|
228 | arb_suppress_progress quiet; |
---|
229 | |
---|
230 | while (heuristic != NO_FURTHER_HEURISTIC && !progress.aborted()) { |
---|
231 | const H_Settings& hset = heuristic_setting[heuristic]; |
---|
232 | if (!heuPerf) { |
---|
233 | ap_assert(heu_start_pars == prev_pars); |
---|
234 | heuPerf = new OptiPerfMeter(hset.name, heu_start_pars); |
---|
235 | } |
---|
236 | |
---|
237 | Mutations this_pars(-1); |
---|
238 | if (hset.kl) { |
---|
239 | kernighan_optimize_tree(at, *hset.kl, &org_pars, false); |
---|
240 | this_pars = get_root_node()->costs(); |
---|
241 | } |
---|
242 | else { |
---|
243 | this_pars = at->nn_interchange_rec(settings.whichEdges, AP_BL_NNI_ONLY); |
---|
244 | } |
---|
245 | ap_assert(this_pars>=0); // ensure this_pars was set |
---|
246 | ap_assert(this_pars<=prev_pars); // otherwise heuristic worsened the tree |
---|
247 | |
---|
248 | ap_assert(at_is_root == (at == rootNode())); |
---|
249 | |
---|
250 | bool dumpOverall = false; |
---|
251 | Heuristic nextHeuristic = heuristic; |
---|
252 | if (this_pars<prev_pars) { // found better tree |
---|
253 | prev_pars = this_pars; |
---|
254 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", this_pars, org_pars-this_pars)); |
---|
255 | if (!hset.repeat) { |
---|
256 | nextHeuristic = hset.onImprove; |
---|
257 | dumpOverall = heuristic == CUSTOM_KL; |
---|
258 | } |
---|
259 | } |
---|
260 | else { // last step did not find better tree |
---|
261 | nextHeuristic = this_pars<heu_start_pars ? hset.onImprove : hset.onFailure; |
---|
262 | } |
---|
263 | |
---|
264 | if (nextHeuristic != heuristic) { |
---|
265 | heuristic = nextHeuristic; |
---|
266 | heu_start_pars = this_pars; |
---|
267 | |
---|
268 | heuPerf->dump(stdout, this_pars); |
---|
269 | delete heuPerf; heuPerf = NULp; |
---|
270 | } |
---|
271 | if (dumpOverall) overallPerf.dumpCustom(stdout, this_pars, "overall (so far)"); |
---|
272 | } |
---|
273 | } |
---|
274 | |
---|
275 | if (oldrootleft->father == oldrootright) { |
---|
276 | oldrootleft->set_root(); |
---|
277 | } |
---|
278 | else { |
---|
279 | oldrootright->set_root(); |
---|
280 | } |
---|
281 | |
---|
282 | overallPerf.dump(stdout, prev_pars); |
---|
283 | } |
---|
284 | |
---|
285 | AWT_graphic_parsimony::AWT_graphic_parsimony(ArbParsimony& parsimony_, GBDATA *gb_main_, AD_map_viewer_cb map_viewer_cb_) |
---|
286 | : AWT_graphic_tree(AW_root::SINGLETON, gb_main_, map_viewer_cb_), |
---|
287 | parsimony(parsimony_) |
---|
288 | {} |
---|
289 | |
---|
290 | AP_tree_root *AWT_graphic_parsimony::create_tree_root(AliView *aliview, AP_sequence *seq_prototype, bool insert_delete_cbs) { |
---|
291 | return new AP_pars_root(aliview, seq_prototype, insert_delete_cbs, &groupScale); |
---|
292 | } |
---|
293 | |
---|
294 | |
---|
295 | void ArbParsimony::generate_tree(WeightedFilter *pars_weighted_filter) { |
---|
296 | AliView *aliview = pars_generate_aliview(pars_weighted_filter); |
---|
297 | AP_sequence *seq_templ = NULp; |
---|
298 | |
---|
299 | GBDATA *gb_main = aliview->get_gb_main(); |
---|
300 | { |
---|
301 | GB_transaction ta(gb_main); |
---|
302 | bool is_aa = GBT_is_alignment_protein(gb_main, aliview->get_aliname()); |
---|
303 | |
---|
304 | if (is_aa) seq_templ = new AP_sequence_protein(aliview); |
---|
305 | else seq_templ = new AP_sequence_parsimony(aliview); |
---|
306 | } |
---|
307 | |
---|
308 | AWT_graphic_parsimony *new_tree = new AWT_graphic_parsimony(*this, aliview->get_gb_main(), PARS_map_viewer); |
---|
309 | new_tree->init(aliview, seq_templ, true, false); |
---|
310 | set_tree(new_tree); |
---|
311 | } |
---|
312 | |
---|
313 | AW_gc_manager *AWT_graphic_parsimony::init_devices(AW_window *aww, AW_device *device, AWT_canvas* ntw) { |
---|
314 | AW_init_color_group_defaults("arb_ntree"); |
---|
315 | |
---|
316 | AW_gc_manager *gc_manager = |
---|
317 | AW_manage_GC(aww, |
---|
318 | ntw->get_gc_base_name(), |
---|
319 | device, AWT_GC_MAX, AW_GCM_DATA_AREA, |
---|
320 | makeGcChangedCallback(TREE_GC_changed_cb, ntw), |
---|
321 | "#AAAA55", |
---|
322 | |
---|
323 | // Important note : |
---|
324 | // Many gc indices are shared between ABR_NTREE and ARB_PARSIMONY |
---|
325 | // e.g. the tree drawing routines use same gc's for drawing both trees |
---|
326 | // (keep in sync with ../SL/TREEDISP/TreeDisplay.cxx@init_devices) |
---|
327 | |
---|
328 | "Cursor$#FFFFFF", |
---|
329 | "Branch remarks$#DBE994", |
---|
330 | "+-Bootstrap$#DBE994", "-B.(limited)$white", |
---|
331 | "!1", // reserve GC-number used for "IRS group box" in arb_ntree |
---|
332 | "Marked$#FFFF00", |
---|
333 | "Some marked$#eeee88", |
---|
334 | "Not marked$black", |
---|
335 | "Zombies etc.$#cc5924", |
---|
336 | |
---|
337 | "!14", // reserve 14 GC-numbers which are used for probe colors in ARB_NTREE |
---|
338 | // (namely 'None', 'All' and 'P1' up to 'P12') |
---|
339 | |
---|
340 | "&color_groups", // use color groups |
---|
341 | |
---|
342 | NULp); |
---|
343 | return gc_manager; |
---|
344 | } |
---|
345 | |
---|
346 | void AWT_graphic_parsimony::show(AW_device *device) { |
---|
347 | AP_tree_nlen *root_node = parsimony.get_root_node(); |
---|
348 | AW_awar *awar_pars = get_root()->awar(AWAR_PARSIMONY); |
---|
349 | AW_awar *awar_best = get_root()->awar(AWAR_BEST_PARSIMONY); |
---|
350 | |
---|
351 | long parsval = root_node ? root_node->costs() : 0; |
---|
352 | awar_pars->write_int(parsval); |
---|
353 | |
---|
354 | long best_pars = awar_best->read_int(); |
---|
355 | if (parsval < best_pars || 0==best_pars) awar_best->write_int(parsval); |
---|
356 | |
---|
357 | AWT_graphic_tree::show(device); |
---|
358 | } |
---|
359 | |
---|
360 | void AWT_graphic_parsimony::handle_command(AW_device *device, AWT_graphic_event& event) { |
---|
361 | ClickedTarget clicked(this, event.best_click()); |
---|
362 | bool recalc_branchlengths_on_structure_change = true; |
---|
363 | |
---|
364 | switch (event.cmd()) { |
---|
365 | case AWT_MODE_NNI: |
---|
366 | case AWT_MODE_KERNINGHAN: |
---|
367 | case AWT_MODE_OPTIMIZE: { |
---|
368 | const char *what = NULp; |
---|
369 | const char *where = NULp; |
---|
370 | |
---|
371 | switch (event.cmd()) { |
---|
372 | case AWT_MODE_NNI: what = "Recursive NNI on"; break; |
---|
373 | case AWT_MODE_KERNINGHAN: what = "K.L. optimize"; break; |
---|
374 | case AWT_MODE_OPTIMIZE: what = "Global optimize"; break; |
---|
375 | default: break; |
---|
376 | } |
---|
377 | |
---|
378 | AP_tree_nlen *startNode = NULp; |
---|
379 | bool repeatOpti = true; |
---|
380 | |
---|
381 | if (event.type() == AW_Mouse_Press) { |
---|
382 | switch (event.button()) { |
---|
383 | case AW_BUTTON_LEFT: |
---|
384 | repeatOpti = false; |
---|
385 | // fall-through |
---|
386 | case AW_BUTTON_RIGHT: |
---|
387 | startNode = DOWNCAST(AP_tree_nlen*, clicked.node()); |
---|
388 | where = (startNode == get_root_node()) ? "tree" : "subtree"; |
---|
389 | break; |
---|
390 | |
---|
391 | default: |
---|
392 | break; |
---|
393 | } |
---|
394 | } |
---|
395 | |
---|
396 | if (what && where) { |
---|
397 | arb_progress progress(GBS_global_string("%s %s", what, where)); |
---|
398 | |
---|
399 | Mutations start_pars = get_root_node()->costs(); |
---|
400 | Mutations curr_pars = start_pars; |
---|
401 | |
---|
402 | KL_Settings KL(get_root()); |
---|
403 | |
---|
404 | do { |
---|
405 | Mutations prev_pars = curr_pars; |
---|
406 | |
---|
407 | switch (event.cmd()) { |
---|
408 | case AWT_MODE_NNI: |
---|
409 | startNode->nn_interchange_rec(KL.whichEdges, AP_BL_NNI_ONLY); |
---|
410 | break; |
---|
411 | case AWT_MODE_KERNINGHAN: |
---|
412 | parsimony.kernighan_optimize_tree(startNode, KL, &start_pars, true); |
---|
413 | break; |
---|
414 | case AWT_MODE_OPTIMIZE: |
---|
415 | parsimony.optimize_tree(startNode, KL, progress); |
---|
416 | repeatOpti = false; // never loop here (optimize_tree already loops until no further improvement) |
---|
417 | break; |
---|
418 | default: |
---|
419 | repeatOpti = false; |
---|
420 | break; |
---|
421 | } |
---|
422 | |
---|
423 | curr_pars = get_root_node()->costs(); |
---|
424 | repeatOpti = repeatOpti && curr_pars<prev_pars; |
---|
425 | } while (repeatOpti); |
---|
426 | |
---|
427 | exports.request_save(); |
---|
428 | ASSERT_VALID_TREE(get_root_node()); |
---|
429 | } |
---|
430 | break; |
---|
431 | } |
---|
432 | |
---|
433 | default: |
---|
434 | recalc_branchlengths_on_structure_change = false; |
---|
435 | FALLTHROUGH; // unlisted modes trigger branchlength calculation internally when needed |
---|
436 | case AWT_MODE_MOVE: |
---|
437 | AWT_graphic_tree::handle_command(device, event); |
---|
438 | break; |
---|
439 | } |
---|
440 | |
---|
441 | if (exports.needs_save() && recalc_branchlengths_on_structure_change) { |
---|
442 | arb_progress progress("Recalculating branch lengths"); |
---|
443 | rootEdge()->calc_branchlengths(); |
---|
444 | reorderTree(BIG_BRANCHES_TO_TOP); // beautify after recalc_branch_lengths |
---|
445 | } |
---|
446 | } |
---|
447 | |
---|
448 | |
---|
449 | // -------------------------------------------------------------------------------- |
---|
450 | |
---|
451 | #ifdef UNIT_TESTS |
---|
452 | #include <arb_diff.h> |
---|
453 | #include <test_unit.h> |
---|
454 | #include "test_env.h" |
---|
455 | |
---|
456 | template<typename SEQTYPE> |
---|
457 | PARSIMONY_testenv<SEQTYPE>::PARSIMONY_testenv(const char *dbname, const char *aliName) |
---|
458 | : parsimony(), |
---|
459 | klSettings(NULp) |
---|
460 | { |
---|
461 | common_init(dbname); |
---|
462 | GBDATA *gb_main = ap_main->get_gb_main(); |
---|
463 | GB_transaction ta(gb_main); |
---|
464 | |
---|
465 | size_t aliLength = GBT_get_alignment_len(gb_main, aliName); |
---|
466 | ap_assert(aliLength>0); |
---|
467 | |
---|
468 | klSettings = new KL_Settings(); |
---|
469 | |
---|
470 | AP_filter filter(aliLength); |
---|
471 | if (!filter.is_invalid()) { |
---|
472 | AP_weights weights(&filter); |
---|
473 | agt->init(new AliView(gb_main, filter, weights, aliName)); |
---|
474 | } |
---|
475 | } |
---|
476 | |
---|
477 | template PARSIMONY_testenv<AP_sequence_protein>::PARSIMONY_testenv(const char *dbname, const char *aliName); // explicit instanciation (otherwise link error in unittest) |
---|
478 | template PARSIMONY_testenv<AP_sequence_parsimony>::PARSIMONY_testenv(const char *dbname, const char *aliName); // explicit instanciation (as above, but only happens with gcc 4.6.3/NDEBUG) |
---|
479 | |
---|
480 | |
---|
481 | void TEST_basic_tree_modifications() { |
---|
482 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb"); |
---|
483 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
---|
484 | |
---|
485 | { |
---|
486 | AP_tree_nlen *root = env.root_node(); |
---|
487 | |
---|
488 | // first check initial state: |
---|
489 | { |
---|
490 | AP_tree_members& root_info = root->gr; |
---|
491 | |
---|
492 | TEST_EXPECT_EQUAL(root_info.grouped, false); |
---|
493 | TEST_EXPECT_EQUAL(root_info.hidden, false); |
---|
494 | TEST_EXPECT_EQUAL(root_info.mark_sum, 6); |
---|
495 | TEST_EXPECT_EQUAL(root_info.leaf_sum, 15); |
---|
496 | |
---|
497 | TEST_EXPECT_SIMILAR(root_info.max_tree_depth, 1.624975, 0.000001); |
---|
498 | TEST_EXPECT_SIMILAR(root_info.min_tree_depth, 0.341681, 0.000001); |
---|
499 | |
---|
500 | GB_transaction ta(env.gbmain()); |
---|
501 | GBT_mark_all(env.gbmain(), 0); // unmark all species |
---|
502 | root->compute_tree(); |
---|
503 | TEST_EXPECT_EQUAL(root_info.mark_sum, 0); |
---|
504 | } |
---|
505 | |
---|
506 | |
---|
507 | #define B1_TOP "(((((CloTyro3:1.046,CloTyro4:0.061):0.026,CloTyro2:0.017):0.017,CloTyrob:0.009):0.274,CloInnoc:0.371):0.057,CloBifer:0.388):0.124" |
---|
508 | #define B1_BOT "(CloBifer:0.388,(CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.057):0.124" |
---|
509 | #define B2_TOP "(((CloButy2:0.009,CloButyr:0.000):0.564,CloCarni:0.120):0.010,CloPaste:0.179):0.131" |
---|
510 | #define B2_BOT "(CloPaste:0.179,(CloCarni:0.120,(CloButy2:0.009,CloButyr:0.000):0.564):0.010):0.131" |
---|
511 | |
---|
512 | |
---|
513 | #define B3_LEFT_TOP_SONS "(((CorAquat:0.084,CurCitre:0.058):0.103,CorGluta:0.522):0.053,CelBiazo:0.059)" |
---|
514 | #define B3_TOP_SONS B3_LEFT_TOP_SONS ":0.207,CytAquat:0.711" |
---|
515 | #define B3_TOP_SONS_CCR "((CorAquat:0.187,CorGluta:0.522):0.053,CelBiazo:0.059):0.207,CytAquat:0.711" // CCR = CurCitre removed |
---|
516 | #define B3_TOP "(" B3_TOP_SONS "):0.081" |
---|
517 | #define B3_BOT "(CytAquat:0.711,(CelBiazo:0.059,(CorGluta:0.522,(CorAquat:0.084,CurCitre:0.058):0.103):0.053):0.207):0.081" |
---|
518 | |
---|
519 | |
---|
520 | const char *top_topo = "((" B1_TOP "," B2_TOP "):0.081," B3_TOP ");"; |
---|
521 | const char *edge_topo = "((" B1_TOP "," B2_BOT "):0.081," B3_BOT ");"; |
---|
522 | const char *bottom_topo = "(" B3_BOT ",(" B2_BOT "," B1_BOT "):0.081);"; |
---|
523 | |
---|
524 | const char *radial_topo = |
---|
525 | "(((CloPaste:0.179,((CloButy2:0.009,CloButyr:0.000):0.564,CloCarni:0.120):0.010):0.131," |
---|
526 | "((CloInnoc:0.371,((CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017,CloTyrob:0.009):0.274):0.057,CloBifer:0.388):0.124):0.081," |
---|
527 | "((CelBiazo:0.059,((CorAquat:0.084,CurCitre:0.058):0.103,CorGluta:0.522):0.053):0.207,CytAquat:0.711):0.081);"; |
---|
528 | const char *radial_topo2 = |
---|
529 | "(((CloBifer:0.388,(CloInnoc:0.371,(((CloTyro3:1.046,CloTyro4:0.061):0.026,CloTyro2:0.017):0.017,CloTyrob:0.009):0.274):0.057):0.124," B2_TOP "):0.081," |
---|
530 | "(CytAquat:0.711," B3_LEFT_TOP_SONS ":0.207):0.081);"; |
---|
531 | |
---|
532 | // expect that no mode reproduces another mode: |
---|
533 | TEST_EXPECT_DIFFERENT(top_topo, edge_topo); |
---|
534 | TEST_EXPECT_DIFFERENT(top_topo, bottom_topo); |
---|
535 | TEST_EXPECT_DIFFERENT(top_topo, radial_topo); |
---|
536 | TEST_EXPECT_DIFFERENT(top_topo, radial_topo2); |
---|
537 | TEST_EXPECT_DIFFERENT(edge_topo, bottom_topo); |
---|
538 | TEST_EXPECT_DIFFERENT(edge_topo, radial_topo); |
---|
539 | TEST_EXPECT_DIFFERENT(edge_topo, radial_topo2); |
---|
540 | TEST_EXPECT_DIFFERENT(bottom_topo, radial_topo); |
---|
541 | TEST_EXPECT_DIFFERENT(bottom_topo, radial_topo2); |
---|
542 | TEST_EXPECT_DIFFERENT(radial_topo, radial_topo2); |
---|
543 | |
---|
544 | env.push(); // 1st stack level (=top_topo) |
---|
545 | |
---|
546 | TEST_EXPECT_VALID_TREE(root); |
---|
547 | |
---|
548 | TEST_EXPECT_NEWICK(nLENGTH, root, top_topo); |
---|
549 | // test reorder_tree: |
---|
550 | root->reorder_tree(BIG_BRANCHES_TO_EDGE); TEST_EXPECT_NEWICK(nLENGTH, root, edge_topo); env.push(); // 2nd stack level (=edge_topo) |
---|
551 | root->reorder_tree(BIG_BRANCHES_TO_BOTTOM); TEST_EXPECT_NEWICK(nLENGTH, root, bottom_topo); env.push(); // 3rd stack level (=bottom_topo) |
---|
552 | root->reorder_tree(BIG_BRANCHES_TO_CENTER); TEST_EXPECT_NEWICK(nLENGTH, root, radial_topo); |
---|
553 | root->reorder_tree(BIG_BRANCHES_ALTERNATING); TEST_EXPECT_NEWICK(nLENGTH, root, radial_topo2); |
---|
554 | root->reorder_tree(BIG_BRANCHES_TO_TOP); TEST_EXPECT_NEWICK(nLENGTH, root, top_topo); |
---|
555 | |
---|
556 | TEST_EXPECT_VALID_TREE(root); |
---|
557 | |
---|
558 | // test set root: |
---|
559 | AP_tree_nlen *CloTyrob = root->findLeafNamed("CloTyrob"); |
---|
560 | TEST_REJECT_NULL(CloTyrob); |
---|
561 | |
---|
562 | ARB_edge rootEdge(root->get_leftson(), root->get_rightson()); |
---|
563 | CloTyrob->set_root(); |
---|
564 | |
---|
565 | TEST_EXPECT_VALID_TREE(root); |
---|
566 | |
---|
567 | const char *rootAtCloTyrob_topo = |
---|
568 | "(CloTyrob:0.004," |
---|
569 | "(((CloTyro3:1.046,CloTyro4:0.061):0.026,CloTyro2:0.017):0.017," |
---|
570 | "((((" B3_TOP_SONS "):0.162," B2_TOP "):0.124,CloBifer:0.388):0.057,CloInnoc:0.371):0.274):0.004);"; |
---|
571 | |
---|
572 | TEST_EXPECT_NEWICK(nLENGTH, root, rootAtCloTyrob_topo); |
---|
573 | env.push(); // 4th stack level (=rootAtCloTyrob_topo) |
---|
574 | |
---|
575 | TEST_EXPECT_VALID_TREE(root); |
---|
576 | |
---|
577 | AP_tree_nlen *CelBiazoFather = root->findLeafNamed("CelBiazo")->get_father(); |
---|
578 | TEST_REJECT_NULL(CelBiazoFather); |
---|
579 | CelBiazoFather->set_root(); |
---|
580 | |
---|
581 | const char *rootAtCelBiazoFather_topo = "(" B3_LEFT_TOP_SONS ":0.104,((" B1_TOP "," B2_TOP "):0.162,CytAquat:0.711):0.104);"; |
---|
582 | TEST_EXPECT_NEWICK(nLENGTH, root, rootAtCelBiazoFather_topo); |
---|
583 | |
---|
584 | TEST_EXPECT_VALID_TREE(root); |
---|
585 | |
---|
586 | ARB_edge oldRootEdge(rootEdge.source(), rootEdge.dest()); |
---|
587 | DOWNCAST(AP_tree_nlen*,oldRootEdge.son())->set_root(); |
---|
588 | |
---|
589 | const char *rootSetBack_topo = top_topo; |
---|
590 | TEST_EXPECT_NEWICK(nLENGTH, root, rootSetBack_topo); |
---|
591 | env.push(); // 5th stack level (=rootSetBack_topo) |
---|
592 | |
---|
593 | TEST_EXPECT_VALID_TREE(root); |
---|
594 | |
---|
595 | // test remove: |
---|
596 | AP_tree_nlen *CurCitre = root->findLeafNamed("CurCitre"); |
---|
597 | TEST_REJECT_NULL(CurCitre); |
---|
598 | TEST_REJECT_NULL(CurCitre->get_father()); |
---|
599 | |
---|
600 | CurCitre->REMOVE(); |
---|
601 | const char *CurCitre_removed_topo = "((" B1_TOP "," B2_TOP "):0.081,(" B3_TOP_SONS_CCR "):0.081);"; |
---|
602 | // ------------------------------------------------------------------- ^^^ = B3_TOP_SONS minus CurCitre |
---|
603 | TEST_EXPECT_NEWICK(nLENGTH, root, CurCitre_removed_topo); |
---|
604 | |
---|
605 | TEST_EXPECT_VALID_TREE(root); |
---|
606 | TEST_EXPECT_VALID_TREE(CurCitre); |
---|
607 | |
---|
608 | TEST_EXPECT_EQUAL(root->gr.leaf_sum, 15); // out of date |
---|
609 | root->compute_tree(); |
---|
610 | TEST_EXPECT_EQUAL(root->gr.leaf_sum, 14); |
---|
611 | |
---|
612 | env.push(); // 6th stack level (=CurCitre_removed_topo) |
---|
613 | |
---|
614 | TEST_EXPECT_VALID_TREE(root); |
---|
615 | |
---|
616 | // test insert: |
---|
617 | AP_tree_nlen *CloCarni = root->findLeafNamed("CloCarni"); |
---|
618 | TEST_REJECT_NULL(CloCarni); |
---|
619 | CurCitre->insert(CloCarni); // this creates two extra edges (not destroyed by destroy() below) and one extra node |
---|
620 | |
---|
621 | const char *CurCitre_inserted_topo = "((" B1_TOP ",(((CloButy2:0.009,CloButyr:0.000):0.564,(CurCitre:0.060,CloCarni:0.060):0.060):0.010,CloPaste:0.179):0.131):0.081,(" B3_TOP_SONS_CCR "):0.081);"; |
---|
622 | TEST_EXPECT_NEWICK(nLENGTH, root, CurCitre_inserted_topo); |
---|
623 | |
---|
624 | TEST_EXPECT_VALID_TREE(root); |
---|
625 | |
---|
626 | // now check pops: |
---|
627 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, CurCitre_removed_topo); TEST_EXPECT_VALID_TREE(root); |
---|
628 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, rootSetBack_topo); TEST_EXPECT_VALID_TREE(root); |
---|
629 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, rootAtCloTyrob_topo); TEST_EXPECT_VALID_TREE(root); |
---|
630 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, bottom_topo); TEST_EXPECT_VALID_TREE(root); |
---|
631 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, edge_topo); TEST_EXPECT_VALID_TREE(root); |
---|
632 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, top_topo); TEST_EXPECT_VALID_TREE(root); |
---|
633 | } |
---|
634 | } |
---|
635 | |
---|
636 | void TEST_calc_bootstraps() { |
---|
637 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", "ali_5s"); |
---|
638 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
---|
639 | |
---|
640 | const char *bs_origi_topo = "(((((((CloTyro3,CloTyro4)'40%',CloTyro2)'0%',CloTyrob)'97%',CloInnoc)'0%',CloBifer)'53%',(((CloButy2,CloButyr),CloCarni)'33%',CloPaste)'97%'),((((CorAquat,CurCitre),CorGluta)'17%',CelBiazo)'40%',CytAquat));"; |
---|
641 | const char *bs_limit_topo = "(((((((CloTyro3,CloTyro4)'87%',CloTyro2)'0%',CloTyrob)'100%',CloInnoc)'87%',CloBifer)'83%',(((CloButy2,CloButyr)'99%',CloCarni)'17%',CloPaste)'56%')'61%',((((CorAquat,CurCitre)'78%',CorGluta)'0%',CelBiazo)'59%',CytAquat)'61%');"; |
---|
642 | const char *bs_estim_topo = "(((((((CloTyro3,CloTyro4)'75%',CloTyro2)'0%',CloTyrob)'100%',CloInnoc)'75%',CloBifer)'78%',(((CloButy2,CloButyr)'99%',CloCarni)'13%',CloPaste)'32%')'53%',((((CorAquat,CurCitre)'74%',CorGluta)'0%',CelBiazo)'56%',CytAquat)'53%');"; |
---|
643 | |
---|
644 | { |
---|
645 | AP_tree_nlen *root = env.root_node(); |
---|
646 | AP_tree_edge *root_edge = rootEdge(); |
---|
647 | |
---|
648 | TEST_EXPECT(root && root_edge); |
---|
649 | |
---|
650 | root->reorder_tree(BIG_BRANCHES_TO_TOP); TEST_EXPECT_NEWICK(nREMARK, root, bs_origi_topo); |
---|
651 | |
---|
652 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
---|
653 | |
---|
654 | root_edge->nni_rec(ANY_EDGE, AP_BL_MODE(AP_BL_BL_ONLY|AP_BL_BOOTSTRAP_LIMIT), NULp, true); |
---|
655 | root->reorder_tree(BIG_BRANCHES_TO_TOP); |
---|
656 | TEST_EXPECT_NEWICK(nREMARK, root, bs_limit_topo); |
---|
657 | TEST_EXPECT_COMBINES_PERFORMED(env, 170); |
---|
658 | |
---|
659 | root_edge->nni_rec(ANY_EDGE, AP_BL_MODE(AP_BL_BL_ONLY|AP_BL_BOOTSTRAP_ESTIMATE), NULp, true); |
---|
660 | root->reorder_tree(BIG_BRANCHES_TO_TOP); |
---|
661 | TEST_EXPECT_NEWICK(nREMARK, root, bs_estim_topo); |
---|
662 | TEST_EXPECT_COMBINES_PERFORMED(env, 156); |
---|
663 | |
---|
664 | TEST_EXPECT_EQUAL(env.root_node(), root); |
---|
665 | } |
---|
666 | |
---|
667 | } |
---|
668 | |
---|
669 | void TEST_tree_remove_add_all() { |
---|
670 | // reproduces crash as described in #527 |
---|
671 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", "ali_5s"); |
---|
672 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_nj")); |
---|
673 | |
---|
674 | const int LEAFS = 6; |
---|
675 | AP_tree_nlen *leaf[LEAFS]; |
---|
676 | const char *name[LEAFS] = { |
---|
677 | "CloButy2", |
---|
678 | "CloButyr", |
---|
679 | "CytAquat", |
---|
680 | "CorAquat", |
---|
681 | "CurCitre", |
---|
682 | "CorGluta", |
---|
683 | }; |
---|
684 | |
---|
685 | AP_tree_nlen *root = env.root_node(); |
---|
686 | |
---|
687 | for (int i = 0; i<LEAFS; ++i) { |
---|
688 | leaf[i] = root->findLeafNamed(name[i]); |
---|
689 | TEST_REJECT_NULL(leaf[i]); |
---|
690 | } |
---|
691 | |
---|
692 | TEST_EXPECT_VALID_TREE(root); |
---|
693 | |
---|
694 | AP_pars_root *troot = leaf[0]->get_tree_root(); |
---|
695 | TEST_REJECT_NULL(troot); |
---|
696 | |
---|
697 | for (int i = 0; i<LEAFS-1; ++i) { |
---|
698 | // Note: removing the second to last leaf, "removes" both remaining |
---|
699 | // leafs (but only destroys their father node) |
---|
700 | |
---|
701 | TEST_EXPECT_VALID_TREE(root); |
---|
702 | leaf[i]->REMOVE(); |
---|
703 | TEST_EXPECT_VALID_TREE(leaf[i]); |
---|
704 | } |
---|
705 | |
---|
706 | leaf[0]->initial_insert(leaf[1], troot); |
---|
707 | for (int i = 2; i<LEAFS; ++i) { |
---|
708 | TEST_EXPECT_VALID_TREE(leaf[i-1]); |
---|
709 | TEST_EXPECT_VALID_TREE(leaf[i]); |
---|
710 | leaf[i]->insert(leaf[i-1]); |
---|
711 | } |
---|
712 | } |
---|
713 | |
---|
714 | #endif // UNIT_TESTS |
---|
715 | |
---|
716 | // -------------------------------------------------------------------------------- |
---|