| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : PARS_dtree.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #include "PerfMeter.h" |
|---|
| 12 | #include "pars_dtree.hxx" |
|---|
| 13 | #include "pars_main.hxx" |
|---|
| 14 | #include "pars_awars.h" |
|---|
| 15 | #include "ap_tree_nlen.hxx" |
|---|
| 16 | #include "ap_main.hxx" |
|---|
| 17 | |
|---|
| 18 | #include <AP_TreeColors.hxx> |
|---|
| 19 | #include <AP_seq_dna.hxx> |
|---|
| 20 | #include <AP_seq_protein.hxx> |
|---|
| 21 | #include <AP_filter.hxx> |
|---|
| 22 | |
|---|
| 23 | #include <TreeCallbacks.hxx> |
|---|
| 24 | |
|---|
| 25 | #include <ColumnStat.hxx> |
|---|
| 26 | #include <awt_sel_boxes.hxx> |
|---|
| 27 | #include <awt_filter.hxx> |
|---|
| 28 | |
|---|
| 29 | #include <gui_aliview.hxx> |
|---|
| 30 | |
|---|
| 31 | #include <aw_preset.hxx> |
|---|
| 32 | #include <aw_awar.hxx> |
|---|
| 33 | #include <aw_msg.hxx> |
|---|
| 34 | #include <arb_progress.h> |
|---|
| 35 | #include <aw_root.hxx> |
|---|
| 36 | #include <aw_question.hxx> |
|---|
| 37 | |
|---|
| 38 | static void AWT_graphic_parsimony_root_changed(void *cd, AP_tree *old, AP_tree *newroot) { |
|---|
| 39 | AWT_graphic_tree *agt = (AWT_graphic_tree*)cd; |
|---|
| 40 | UNCOVERED(); |
|---|
| 41 | |
|---|
| 42 | if (old == agt->get_logical_root()) agt->set_logical_root_to(newroot); |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | static AliView *pars_generate_aliview(WeightedFilter *pars_weighted_filter) { |
|---|
| 46 | GBDATA *gb_main = pars_weighted_filter->get_gb_main(); |
|---|
| 47 | char *ali_name; |
|---|
| 48 | { |
|---|
| 49 | GB_transaction ta(gb_main); |
|---|
| 50 | ali_name = GBT_read_string(gb_main, AWAR_ALIGNMENT); |
|---|
| 51 | } |
|---|
| 52 | GB_ERROR error = NULp; |
|---|
| 53 | AliView *aliview = pars_weighted_filter->create_aliview(ali_name, error); |
|---|
| 54 | if (!aliview) aw_popup_exit(error); |
|---|
| 55 | free(ali_name); |
|---|
| 56 | return aliview; |
|---|
| 57 | } |
|---|
| 58 | |
|---|
| 59 | void PARS_tree_init(AWT_graphic_parsimony *agt) { |
|---|
| 60 | ap_assert(agt->get_root_node()); |
|---|
| 61 | ap_assert(agt == ap_main->get_graphic_tree()); |
|---|
| 62 | |
|---|
| 63 | GBDATA *gb_main = ap_main->get_gb_main(); |
|---|
| 64 | GB_transaction ta(gb_main); |
|---|
| 65 | |
|---|
| 66 | const char *use = ap_main->get_aliname(); |
|---|
| 67 | long ali_len = GBT_get_alignment_len(gb_main, use); |
|---|
| 68 | if (ali_len <= 1) { |
|---|
| 69 | aw_popup_exit("No valid alignment selected! Try again"); |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | agt->get_tree_root()->set_root_changed_callback(AWT_graphic_parsimony_root_changed, agt); |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | QuadraticThreshold::QuadraticThreshold(KL_DYNAMIC_THRESHOLD_TYPE type, double startx, double maxy, double maxx, double maxDepth, Mutations pars_start) { |
|---|
| 76 | // set a, b, and c for quadratic equation y = ax^2 + bx + c |
|---|
| 77 | switch (type) { |
|---|
| 78 | default: |
|---|
| 79 | ap_assert(0); |
|---|
| 80 | // fall-through |
|---|
| 81 | case AP_QUADRAT_START: |
|---|
| 82 | c = startx; |
|---|
| 83 | a = (startx - maxy) / (maxx * maxx); |
|---|
| 84 | b = -2.0 * a * maxx; |
|---|
| 85 | break; |
|---|
| 86 | |
|---|
| 87 | case AP_QUADRAT_MAX: // unused (experimental) |
|---|
| 88 | a = - maxy / ((maxDepth - maxx) * (maxDepth - maxx)); |
|---|
| 89 | b = -2.0 * a * maxx; |
|---|
| 90 | c = maxy + a * maxx * maxx; |
|---|
| 91 | break; |
|---|
| 92 | } |
|---|
| 93 | c += pars_start; |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | void ArbParsimony::kernighan_optimize_tree(AP_tree_nlen *at, const KL_Settings& settings, const Mutations *pars_global_start, bool dumpPerf) { |
|---|
| 97 | AP_tree_nlen *oldrootleft = get_root_node()->get_leftson(); |
|---|
| 98 | AP_tree_nlen *oldrootright = get_root_node()->get_rightson(); |
|---|
| 99 | Mutations pars_curr = get_root_node()->costs(); |
|---|
| 100 | const Mutations pars_org = pars_curr; |
|---|
| 101 | |
|---|
| 102 | OptiPerfMeter performance("KL-recursion", pars_curr); |
|---|
| 103 | |
|---|
| 104 | // setup KL recursion parameters: |
|---|
| 105 | KL_params KL; |
|---|
| 106 | { |
|---|
| 107 | KL.max_rec_depth = settings.maxdepth; ap_assert(KL.max_rec_depth>0); |
|---|
| 108 | KL.inc_rec_depth = settings.incdepth; |
|---|
| 109 | KL.thresFunctor = QuadraticThreshold(settings.Dynamic.type, settings.Dynamic.start, settings.Dynamic.maxy, settings.Dynamic.maxx, KL.max_rec_depth, pars_curr); |
|---|
| 110 | KL.rec_type = KL_RECURSION_TYPE((settings.Dynamic.enabled*AP_DYNAMIK)|(settings.Static.enabled*AP_STATIC)); |
|---|
| 111 | |
|---|
| 112 | for (int x = 0; x<CUSTOM_DEPTHS; ++x) { |
|---|
| 113 | KL.rec_width[x] = settings.Static.depth[x]; |
|---|
| 114 | } |
|---|
| 115 | KL.stopAtFoldedGroups = settings.whichEdges&SKIP_FOLDED_EDGES; |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | AP_tree_edge *startEdge = NULp; |
|---|
| 119 | AP_tree_nlen *skipNode = NULp; |
|---|
| 120 | bool skipStartEdge = true; |
|---|
| 121 | |
|---|
| 122 | if (!at->father) { |
|---|
| 123 | startEdge = rootEdge(); |
|---|
| 124 | skipStartEdge = false; |
|---|
| 125 | } |
|---|
| 126 | else if (at->is_son_of_root()) { |
|---|
| 127 | startEdge = rootEdge(); |
|---|
| 128 | skipNode = startEdge->otherNode(at); |
|---|
| 129 | } |
|---|
| 130 | else { |
|---|
| 131 | skipNode = at->get_father(); |
|---|
| 132 | startEdge = at->edgeTo(skipNode); |
|---|
| 133 | } |
|---|
| 134 | ap_assert(startEdge); |
|---|
| 135 | |
|---|
| 136 | EdgeChain chain(startEdge, EdgeSpec(SKIP_LEAF_EDGES|settings.whichEdges), true, skipNode, skipStartEdge); |
|---|
| 137 | arb_progress progress(chain.size()); |
|---|
| 138 | |
|---|
| 139 | if (pars_global_start) { |
|---|
| 140 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", pars_curr, *pars_global_start-pars_curr)); |
|---|
| 141 | } |
|---|
| 142 | else { |
|---|
| 143 | progress.subtitle(GBS_global_string("best=%li", pars_curr)); |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | if (skipStartEdge) startEdge->set_visited(true); // avoid traversal beyond startEdge |
|---|
| 147 | |
|---|
| 148 | while (chain && !progress.aborted()) { |
|---|
| 149 | AP_tree_edge *edge = *chain; ++chain; |
|---|
| 150 | |
|---|
| 151 | ap_assert(!edge->is_leaf_edge()); |
|---|
| 152 | ap_assert(implicated(KL.stopAtFoldedGroups, !edge->next_to_folded_group())); |
|---|
| 153 | |
|---|
| 154 | ap_main->remember(); |
|---|
| 155 | |
|---|
| 156 | bool better_tree_found = edge->kl_rec(KL, 0, pars_curr); |
|---|
| 157 | |
|---|
| 158 | if (better_tree_found) { |
|---|
| 159 | ap_main->accept(); |
|---|
| 160 | Mutations pars_new = get_root_node()->costs(); |
|---|
| 161 | KL.thresFunctor.change_parsimony_start(pars_new-pars_curr); |
|---|
| 162 | pars_curr = pars_new; |
|---|
| 163 | if (pars_global_start) { |
|---|
| 164 | progress.subtitle(GBS_global_string("best=%li (gain=%li, KL=%li)", pars_curr, *pars_global_start-pars_curr, pars_org-pars_curr)); |
|---|
| 165 | } |
|---|
| 166 | else { |
|---|
| 167 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", pars_curr, pars_org-pars_curr)); |
|---|
| 168 | } |
|---|
| 169 | } |
|---|
| 170 | else { |
|---|
| 171 | ap_main->revert(); |
|---|
| 172 | } |
|---|
| 173 | progress.inc(); |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | if (skipStartEdge) startEdge->set_visited(false); // reallow traversal beyond startEdge |
|---|
| 177 | |
|---|
| 178 | if (dumpPerf) performance.dump(stdout, pars_curr); |
|---|
| 179 | |
|---|
| 180 | if (oldrootleft->father == oldrootright) { |
|---|
| 181 | oldrootleft->set_root(); |
|---|
| 182 | } |
|---|
| 183 | else { |
|---|
| 184 | oldrootright->set_root(); |
|---|
| 185 | } |
|---|
| 186 | } |
|---|
| 187 | |
|---|
| 188 | |
|---|
| 189 | |
|---|
| 190 | void ArbParsimony::optimize_tree(AP_tree_nlen *at, const KL_Settings& settings, arb_progress& progress) { |
|---|
| 191 | AP_tree_nlen *oldrootleft = get_root_node()->get_leftson(); |
|---|
| 192 | AP_tree_nlen *oldrootright = get_root_node()->get_rightson(); |
|---|
| 193 | const Mutations org_pars = get_root_node()->costs(); |
|---|
| 194 | Mutations prev_pars = org_pars; |
|---|
| 195 | |
|---|
| 196 | OptiPerfMeter overallPerf("global optimization", org_pars); |
|---|
| 197 | |
|---|
| 198 | progress.subtitle(GBS_global_string("best=%li", org_pars)); |
|---|
| 199 | |
|---|
| 200 | // define available heuristics |
|---|
| 201 | enum Heuristic { |
|---|
| 202 | RECURSIVE_NNI, |
|---|
| 203 | CUSTOM_KL, |
|---|
| 204 | |
|---|
| 205 | NO_FURTHER_HEURISTIC, |
|---|
| 206 | HEURISTIC_COUNT = NO_FURTHER_HEURISTIC, |
|---|
| 207 | } heuristic = RECURSIVE_NNI; |
|---|
| 208 | |
|---|
| 209 | struct H_Settings { |
|---|
| 210 | const char *name; // name shown in OptiPerfMeter |
|---|
| 211 | const KL_Settings *kl; // ==NULp -> NNI; else KL with these settings |
|---|
| 212 | bool repeat; // retry same heuristic when tree improved |
|---|
| 213 | Heuristic onImprove; // continue with this heuristic if improved (repeated or not) |
|---|
| 214 | Heuristic onFailure; // continue with this heuristic if NOT improved |
|---|
| 215 | } heuristic_setting[HEURISTIC_COUNT] = { |
|---|
| 216 | { "recursive NNIs", NULp, true, CUSTOM_KL, CUSTOM_KL }, |
|---|
| 217 | { "KL-optimizer", &settings, false, RECURSIVE_NNI, NO_FURTHER_HEURISTIC }, |
|---|
| 218 | }; |
|---|
| 219 | |
|---|
| 220 | Mutations heu_start_pars = prev_pars; |
|---|
| 221 | OptiPerfMeter *heuPerf = NULp; |
|---|
| 222 | |
|---|
| 223 | #if defined(ASSERTION_USED) |
|---|
| 224 | bool at_is_root = at == rootNode(); |
|---|
| 225 | #endif |
|---|
| 226 | |
|---|
| 227 | { |
|---|
| 228 | arb_suppress_progress quiet; |
|---|
| 229 | |
|---|
| 230 | while (heuristic != NO_FURTHER_HEURISTIC && !progress.aborted()) { |
|---|
| 231 | const H_Settings& hset = heuristic_setting[heuristic]; |
|---|
| 232 | if (!heuPerf) { |
|---|
| 233 | ap_assert(heu_start_pars == prev_pars); |
|---|
| 234 | heuPerf = new OptiPerfMeter(hset.name, heu_start_pars); |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | Mutations this_pars(-1); |
|---|
| 238 | if (hset.kl) { |
|---|
| 239 | kernighan_optimize_tree(at, *hset.kl, &org_pars, false); |
|---|
| 240 | this_pars = get_root_node()->costs(); |
|---|
| 241 | } |
|---|
| 242 | else { |
|---|
| 243 | this_pars = at->nn_interchange_rec(settings.whichEdges, AP_BL_NNI_ONLY); |
|---|
| 244 | } |
|---|
| 245 | ap_assert(this_pars>=0); // ensure this_pars was set |
|---|
| 246 | ap_assert(this_pars<=prev_pars); // otherwise heuristic worsened the tree |
|---|
| 247 | |
|---|
| 248 | ap_assert(at_is_root == (at == rootNode())); |
|---|
| 249 | |
|---|
| 250 | bool dumpOverall = false; |
|---|
| 251 | Heuristic nextHeuristic = heuristic; |
|---|
| 252 | if (this_pars<prev_pars) { // found better tree |
|---|
| 253 | prev_pars = this_pars; |
|---|
| 254 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", this_pars, org_pars-this_pars)); |
|---|
| 255 | if (!hset.repeat) { |
|---|
| 256 | nextHeuristic = hset.onImprove; |
|---|
| 257 | dumpOverall = heuristic == CUSTOM_KL; |
|---|
| 258 | } |
|---|
| 259 | } |
|---|
| 260 | else { // last step did not find better tree |
|---|
| 261 | nextHeuristic = this_pars<heu_start_pars ? hset.onImprove : hset.onFailure; |
|---|
| 262 | } |
|---|
| 263 | |
|---|
| 264 | if (nextHeuristic != heuristic) { |
|---|
| 265 | heuristic = nextHeuristic; |
|---|
| 266 | heu_start_pars = this_pars; |
|---|
| 267 | |
|---|
| 268 | heuPerf->dump(stdout, this_pars); |
|---|
| 269 | delete heuPerf; heuPerf = NULp; |
|---|
| 270 | } |
|---|
| 271 | if (dumpOverall) overallPerf.dumpCustom(stdout, this_pars, "overall (so far)"); |
|---|
| 272 | } |
|---|
| 273 | } |
|---|
| 274 | |
|---|
| 275 | if (oldrootleft->father == oldrootright) { |
|---|
| 276 | oldrootleft->set_root(); |
|---|
| 277 | } |
|---|
| 278 | else { |
|---|
| 279 | oldrootright->set_root(); |
|---|
| 280 | } |
|---|
| 281 | |
|---|
| 282 | overallPerf.dump(stdout, prev_pars); |
|---|
| 283 | } |
|---|
| 284 | |
|---|
| 285 | AWT_graphic_parsimony::AWT_graphic_parsimony(ArbParsimony& parsimony_, GBDATA *gb_main_, AD_map_viewer_cb map_viewer_cb_) |
|---|
| 286 | : AWT_graphic_tree(AW_root::SINGLETON, gb_main_, map_viewer_cb_), |
|---|
| 287 | parsimony(parsimony_) |
|---|
| 288 | {} |
|---|
| 289 | |
|---|
| 290 | AP_tree_root *AWT_graphic_parsimony::create_tree_root(AliView *aliview, AP_sequence *seq_prototype, bool insert_delete_cbs) { |
|---|
| 291 | return new AP_pars_root(aliview, seq_prototype, insert_delete_cbs, &groupScale); |
|---|
| 292 | } |
|---|
| 293 | |
|---|
| 294 | |
|---|
| 295 | void ArbParsimony::generate_tree(WeightedFilter *pars_weighted_filter) { |
|---|
| 296 | AliView *aliview = pars_generate_aliview(pars_weighted_filter); |
|---|
| 297 | AP_sequence *seq_templ = NULp; |
|---|
| 298 | |
|---|
| 299 | GBDATA *gb_main = aliview->get_gb_main(); |
|---|
| 300 | { |
|---|
| 301 | GB_transaction ta(gb_main); |
|---|
| 302 | bool is_aa = GBT_is_alignment_protein(gb_main, aliview->get_aliname()); |
|---|
| 303 | |
|---|
| 304 | if (is_aa) seq_templ = new AP_sequence_protein(aliview); |
|---|
| 305 | else seq_templ = new AP_sequence_parsimony(aliview); |
|---|
| 306 | } |
|---|
| 307 | |
|---|
| 308 | AWT_graphic_parsimony *new_tree = new AWT_graphic_parsimony(*this, aliview->get_gb_main(), PARS_map_viewer); |
|---|
| 309 | new_tree->init(aliview, seq_templ, true, false); |
|---|
| 310 | set_tree(new_tree); |
|---|
| 311 | } |
|---|
| 312 | |
|---|
| 313 | AW_gc_manager *AWT_graphic_parsimony::init_devices(AW_window *aww, AW_device *device, AWT_canvas* ntw) { |
|---|
| 314 | AW_init_color_group_defaults("arb_ntree"); |
|---|
| 315 | |
|---|
| 316 | AW_gc_manager *gc_manager = |
|---|
| 317 | AW_manage_GC(aww, |
|---|
| 318 | ntw->get_gc_base_name(), |
|---|
| 319 | device, AWT_GC_MAX, AW_GCM_DATA_AREA, |
|---|
| 320 | makeGcChangedCallback(TREE_GC_changed_cb, ntw), |
|---|
| 321 | "#AAAA55", |
|---|
| 322 | |
|---|
| 323 | // Important note : |
|---|
| 324 | // Many gc indices are shared between ABR_NTREE and ARB_PARSIMONY |
|---|
| 325 | // e.g. the tree drawing routines use same gc's for drawing both trees |
|---|
| 326 | // (keep in sync with ../SL/TREEDISP/TreeDisplay.cxx@init_devices) |
|---|
| 327 | |
|---|
| 328 | "Cursor$#FFFFFF", |
|---|
| 329 | "Branch remarks$#DBE994", |
|---|
| 330 | "+-Bootstrap$#DBE994", "-B.(limited)$white", |
|---|
| 331 | "!1", // reserve GC-number used for "IRS group box" in arb_ntree |
|---|
| 332 | "Marked$#FFFF00", |
|---|
| 333 | "Some marked$#eeee88", |
|---|
| 334 | "Not marked$black", |
|---|
| 335 | "Zombies etc.$#cc5924", |
|---|
| 336 | |
|---|
| 337 | "!14", // reserve 14 GC-numbers which are used for probe colors in ARB_NTREE |
|---|
| 338 | // (namely 'None', 'All' and 'P1' up to 'P12') |
|---|
| 339 | |
|---|
| 340 | "&color_groups", // use color groups |
|---|
| 341 | |
|---|
| 342 | NULp); |
|---|
| 343 | return gc_manager; |
|---|
| 344 | } |
|---|
| 345 | |
|---|
| 346 | void AWT_graphic_parsimony::show(AW_device *device) { |
|---|
| 347 | AP_tree_nlen *root_node = parsimony.get_root_node(); |
|---|
| 348 | AW_awar *awar_pars = get_root()->awar(AWAR_PARSIMONY); |
|---|
| 349 | AW_awar *awar_best = get_root()->awar(AWAR_BEST_PARSIMONY); |
|---|
| 350 | |
|---|
| 351 | long parsval = root_node ? root_node->costs() : 0; |
|---|
| 352 | awar_pars->write_int(parsval); |
|---|
| 353 | |
|---|
| 354 | long best_pars = awar_best->read_int(); |
|---|
| 355 | if (parsval < best_pars || 0==best_pars) awar_best->write_int(parsval); |
|---|
| 356 | |
|---|
| 357 | AWT_graphic_tree::show(device); |
|---|
| 358 | } |
|---|
| 359 | |
|---|
| 360 | void AWT_graphic_parsimony::handle_command(AW_device *device, AWT_graphic_event& event) { |
|---|
| 361 | ClickedTarget clicked(this, event.best_click()); |
|---|
| 362 | bool recalc_branchlengths_on_structure_change = true; |
|---|
| 363 | |
|---|
| 364 | switch (event.cmd()) { |
|---|
| 365 | case AWT_MODE_NNI: |
|---|
| 366 | case AWT_MODE_KERNINGHAN: |
|---|
| 367 | case AWT_MODE_OPTIMIZE: { |
|---|
| 368 | const char *what = NULp; |
|---|
| 369 | const char *where = NULp; |
|---|
| 370 | |
|---|
| 371 | switch (event.cmd()) { |
|---|
| 372 | case AWT_MODE_NNI: what = "Recursive NNI on"; break; |
|---|
| 373 | case AWT_MODE_KERNINGHAN: what = "K.L. optimize"; break; |
|---|
| 374 | case AWT_MODE_OPTIMIZE: what = "Global optimize"; break; |
|---|
| 375 | default: break; |
|---|
| 376 | } |
|---|
| 377 | |
|---|
| 378 | AP_tree_nlen *startNode = NULp; |
|---|
| 379 | bool repeatOpti = true; |
|---|
| 380 | |
|---|
| 381 | if (event.type() == AW_Mouse_Press) { |
|---|
| 382 | switch (event.button()) { |
|---|
| 383 | case AW_BUTTON_LEFT: |
|---|
| 384 | repeatOpti = false; |
|---|
| 385 | // fall-through |
|---|
| 386 | case AW_BUTTON_RIGHT: |
|---|
| 387 | startNode = DOWNCAST(AP_tree_nlen*, clicked.node()); |
|---|
| 388 | where = (startNode == get_root_node()) ? "tree" : "subtree"; |
|---|
| 389 | break; |
|---|
| 390 | |
|---|
| 391 | default: |
|---|
| 392 | break; |
|---|
| 393 | } |
|---|
| 394 | } |
|---|
| 395 | |
|---|
| 396 | if (what && where) { |
|---|
| 397 | arb_progress progress(GBS_global_string("%s %s", what, where)); |
|---|
| 398 | |
|---|
| 399 | Mutations start_pars = get_root_node()->costs(); |
|---|
| 400 | Mutations curr_pars = start_pars; |
|---|
| 401 | |
|---|
| 402 | KL_Settings KL(get_root()); |
|---|
| 403 | |
|---|
| 404 | do { |
|---|
| 405 | Mutations prev_pars = curr_pars; |
|---|
| 406 | |
|---|
| 407 | switch (event.cmd()) { |
|---|
| 408 | case AWT_MODE_NNI: |
|---|
| 409 | startNode->nn_interchange_rec(KL.whichEdges, AP_BL_NNI_ONLY); |
|---|
| 410 | break; |
|---|
| 411 | case AWT_MODE_KERNINGHAN: |
|---|
| 412 | parsimony.kernighan_optimize_tree(startNode, KL, &start_pars, true); |
|---|
| 413 | break; |
|---|
| 414 | case AWT_MODE_OPTIMIZE: |
|---|
| 415 | parsimony.optimize_tree(startNode, KL, progress); |
|---|
| 416 | repeatOpti = false; // never loop here (optimize_tree already loops until no further improvement) |
|---|
| 417 | break; |
|---|
| 418 | default: |
|---|
| 419 | repeatOpti = false; |
|---|
| 420 | break; |
|---|
| 421 | } |
|---|
| 422 | |
|---|
| 423 | curr_pars = get_root_node()->costs(); |
|---|
| 424 | repeatOpti = repeatOpti && curr_pars<prev_pars; |
|---|
| 425 | } while (repeatOpti); |
|---|
| 426 | |
|---|
| 427 | exports.request_save(); |
|---|
| 428 | ASSERT_VALID_TREE(get_root_node()); |
|---|
| 429 | } |
|---|
| 430 | break; |
|---|
| 431 | } |
|---|
| 432 | |
|---|
| 433 | default: |
|---|
| 434 | recalc_branchlengths_on_structure_change = false; |
|---|
| 435 | FALLTHROUGH; // unlisted modes trigger branchlength calculation internally when needed |
|---|
| 436 | case AWT_MODE_MOVE: |
|---|
| 437 | AWT_graphic_tree::handle_command(device, event); |
|---|
| 438 | break; |
|---|
| 439 | } |
|---|
| 440 | |
|---|
| 441 | if (exports.needs_save() && recalc_branchlengths_on_structure_change) { |
|---|
| 442 | arb_progress progress("Recalculating branch lengths"); |
|---|
| 443 | rootEdge()->calc_branchlengths(); |
|---|
| 444 | reorderTree(BIG_BRANCHES_TO_TOP); // beautify after recalc_branch_lengths |
|---|
| 445 | } |
|---|
| 446 | } |
|---|
| 447 | |
|---|
| 448 | |
|---|
| 449 | // -------------------------------------------------------------------------------- |
|---|
| 450 | |
|---|
| 451 | #ifdef UNIT_TESTS |
|---|
| 452 | #include <arb_diff.h> |
|---|
| 453 | #include <test_unit.h> |
|---|
| 454 | #include "test_env.h" |
|---|
| 455 | |
|---|
| 456 | template<typename SEQTYPE> |
|---|
| 457 | PARSIMONY_testenv<SEQTYPE>::PARSIMONY_testenv(const char *dbname, const char *aliName) |
|---|
| 458 | : parsimony(), |
|---|
| 459 | klSettings(NULp) |
|---|
| 460 | { |
|---|
| 461 | common_init(dbname); |
|---|
| 462 | GBDATA *gb_main = ap_main->get_gb_main(); |
|---|
| 463 | GB_transaction ta(gb_main); |
|---|
| 464 | |
|---|
| 465 | size_t aliLength = GBT_get_alignment_len(gb_main, aliName); |
|---|
| 466 | ap_assert(aliLength>0); |
|---|
| 467 | |
|---|
| 468 | klSettings = new KL_Settings(); |
|---|
| 469 | |
|---|
| 470 | AP_filter filter(aliLength); |
|---|
| 471 | if (!filter.is_invalid()) { |
|---|
| 472 | AP_weights weights(&filter); |
|---|
| 473 | agt->init(new AliView(gb_main, filter, weights, aliName)); |
|---|
| 474 | } |
|---|
| 475 | } |
|---|
| 476 | |
|---|
| 477 | template PARSIMONY_testenv<AP_sequence_protein>::PARSIMONY_testenv(const char *dbname, const char *aliName); // explicit instanciation (otherwise link error in unittest) |
|---|
| 478 | template PARSIMONY_testenv<AP_sequence_parsimony>::PARSIMONY_testenv(const char *dbname, const char *aliName); // explicit instanciation (as above, but only happens with gcc 4.6.3/NDEBUG) |
|---|
| 479 | |
|---|
| 480 | |
|---|
| 481 | void TEST_basic_tree_modifications() { |
|---|
| 482 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb"); |
|---|
| 483 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
|---|
| 484 | |
|---|
| 485 | { |
|---|
| 486 | AP_tree_nlen *root = env.root_node(); |
|---|
| 487 | |
|---|
| 488 | // first check initial state: |
|---|
| 489 | { |
|---|
| 490 | AP_tree_members& root_info = root->gr; |
|---|
| 491 | |
|---|
| 492 | TEST_EXPECT_EQUAL(root_info.grouped, false); |
|---|
| 493 | TEST_EXPECT_EQUAL(root_info.hidden, false); |
|---|
| 494 | TEST_EXPECT_EQUAL(root_info.mark_sum, 6); |
|---|
| 495 | TEST_EXPECT_EQUAL(root_info.leaf_sum, 15); |
|---|
| 496 | |
|---|
| 497 | TEST_EXPECT_SIMILAR(root_info.max_tree_depth, 1.624975, 0.000001); |
|---|
| 498 | TEST_EXPECT_SIMILAR(root_info.min_tree_depth, 0.341681, 0.000001); |
|---|
| 499 | |
|---|
| 500 | GB_transaction ta(env.gbmain()); |
|---|
| 501 | GBT_mark_all(env.gbmain(), 0); // unmark all species |
|---|
| 502 | root->compute_tree(); |
|---|
| 503 | TEST_EXPECT_EQUAL(root_info.mark_sum, 0); |
|---|
| 504 | } |
|---|
| 505 | |
|---|
| 506 | |
|---|
| 507 | #define B1_TOP "(((((CloTyro3:1.046,CloTyro4:0.061):0.026,CloTyro2:0.017):0.017,CloTyrob:0.009):0.274,CloInnoc:0.371):0.057,CloBifer:0.388):0.124" |
|---|
| 508 | #define B1_BOT "(CloBifer:0.388,(CloInnoc:0.371,(CloTyrob:0.009,(CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017):0.274):0.057):0.124" |
|---|
| 509 | #define B2_TOP "(((CloButy2:0.009,CloButyr:0.000):0.564,CloCarni:0.120):0.010,CloPaste:0.179):0.131" |
|---|
| 510 | #define B2_BOT "(CloPaste:0.179,(CloCarni:0.120,(CloButy2:0.009,CloButyr:0.000):0.564):0.010):0.131" |
|---|
| 511 | |
|---|
| 512 | |
|---|
| 513 | #define B3_LEFT_TOP_SONS "(((CorAquat:0.084,CurCitre:0.058):0.103,CorGluta:0.522):0.053,CelBiazo:0.059)" |
|---|
| 514 | #define B3_TOP_SONS B3_LEFT_TOP_SONS ":0.207,CytAquat:0.711" |
|---|
| 515 | #define B3_TOP_SONS_CCR "((CorAquat:0.187,CorGluta:0.522):0.053,CelBiazo:0.059):0.207,CytAquat:0.711" // CCR = CurCitre removed |
|---|
| 516 | #define B3_TOP "(" B3_TOP_SONS "):0.081" |
|---|
| 517 | #define B3_BOT "(CytAquat:0.711,(CelBiazo:0.059,(CorGluta:0.522,(CorAquat:0.084,CurCitre:0.058):0.103):0.053):0.207):0.081" |
|---|
| 518 | |
|---|
| 519 | |
|---|
| 520 | const char *top_topo = "((" B1_TOP "," B2_TOP "):0.081," B3_TOP ");"; |
|---|
| 521 | const char *edge_topo = "((" B1_TOP "," B2_BOT "):0.081," B3_BOT ");"; |
|---|
| 522 | const char *bottom_topo = "(" B3_BOT ",(" B2_BOT "," B1_BOT "):0.081);"; |
|---|
| 523 | |
|---|
| 524 | const char *radial_topo = |
|---|
| 525 | "(((CloPaste:0.179,((CloButy2:0.009,CloButyr:0.000):0.564,CloCarni:0.120):0.010):0.131," |
|---|
| 526 | "((CloInnoc:0.371,((CloTyro2:0.017,(CloTyro3:1.046,CloTyro4:0.061):0.026):0.017,CloTyrob:0.009):0.274):0.057,CloBifer:0.388):0.124):0.081," |
|---|
| 527 | "((CelBiazo:0.059,((CorAquat:0.084,CurCitre:0.058):0.103,CorGluta:0.522):0.053):0.207,CytAquat:0.711):0.081);"; |
|---|
| 528 | const char *radial_topo2 = |
|---|
| 529 | "(((CloBifer:0.388,(CloInnoc:0.371,(((CloTyro3:1.046,CloTyro4:0.061):0.026,CloTyro2:0.017):0.017,CloTyrob:0.009):0.274):0.057):0.124," B2_TOP "):0.081," |
|---|
| 530 | "(CytAquat:0.711," B3_LEFT_TOP_SONS ":0.207):0.081);"; |
|---|
| 531 | |
|---|
| 532 | // expect that no mode reproduces another mode: |
|---|
| 533 | TEST_EXPECT_DIFFERENT(top_topo, edge_topo); |
|---|
| 534 | TEST_EXPECT_DIFFERENT(top_topo, bottom_topo); |
|---|
| 535 | TEST_EXPECT_DIFFERENT(top_topo, radial_topo); |
|---|
| 536 | TEST_EXPECT_DIFFERENT(top_topo, radial_topo2); |
|---|
| 537 | TEST_EXPECT_DIFFERENT(edge_topo, bottom_topo); |
|---|
| 538 | TEST_EXPECT_DIFFERENT(edge_topo, radial_topo); |
|---|
| 539 | TEST_EXPECT_DIFFERENT(edge_topo, radial_topo2); |
|---|
| 540 | TEST_EXPECT_DIFFERENT(bottom_topo, radial_topo); |
|---|
| 541 | TEST_EXPECT_DIFFERENT(bottom_topo, radial_topo2); |
|---|
| 542 | TEST_EXPECT_DIFFERENT(radial_topo, radial_topo2); |
|---|
| 543 | |
|---|
| 544 | env.push(); // 1st stack level (=top_topo) |
|---|
| 545 | |
|---|
| 546 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 547 | |
|---|
| 548 | TEST_EXPECT_NEWICK(nLENGTH, root, top_topo); |
|---|
| 549 | // test reorder_tree: |
|---|
| 550 | root->reorder_tree(BIG_BRANCHES_TO_EDGE); TEST_EXPECT_NEWICK(nLENGTH, root, edge_topo); env.push(); // 2nd stack level (=edge_topo) |
|---|
| 551 | root->reorder_tree(BIG_BRANCHES_TO_BOTTOM); TEST_EXPECT_NEWICK(nLENGTH, root, bottom_topo); env.push(); // 3rd stack level (=bottom_topo) |
|---|
| 552 | root->reorder_tree(BIG_BRANCHES_TO_CENTER); TEST_EXPECT_NEWICK(nLENGTH, root, radial_topo); |
|---|
| 553 | root->reorder_tree(BIG_BRANCHES_ALTERNATING); TEST_EXPECT_NEWICK(nLENGTH, root, radial_topo2); |
|---|
| 554 | root->reorder_tree(BIG_BRANCHES_TO_TOP); TEST_EXPECT_NEWICK(nLENGTH, root, top_topo); |
|---|
| 555 | |
|---|
| 556 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 557 | |
|---|
| 558 | // test set root: |
|---|
| 559 | AP_tree_nlen *CloTyrob = root->findLeafNamed("CloTyrob"); |
|---|
| 560 | TEST_REJECT_NULL(CloTyrob); |
|---|
| 561 | |
|---|
| 562 | ARB_edge rootEdge(root->get_leftson(), root->get_rightson()); |
|---|
| 563 | CloTyrob->set_root(); |
|---|
| 564 | |
|---|
| 565 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 566 | |
|---|
| 567 | const char *rootAtCloTyrob_topo = |
|---|
| 568 | "(CloTyrob:0.004," |
|---|
| 569 | "(((CloTyro3:1.046,CloTyro4:0.061):0.026,CloTyro2:0.017):0.017," |
|---|
| 570 | "((((" B3_TOP_SONS "):0.162," B2_TOP "):0.124,CloBifer:0.388):0.057,CloInnoc:0.371):0.274):0.004);"; |
|---|
| 571 | |
|---|
| 572 | TEST_EXPECT_NEWICK(nLENGTH, root, rootAtCloTyrob_topo); |
|---|
| 573 | env.push(); // 4th stack level (=rootAtCloTyrob_topo) |
|---|
| 574 | |
|---|
| 575 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 576 | |
|---|
| 577 | AP_tree_nlen *CelBiazoFather = root->findLeafNamed("CelBiazo")->get_father(); |
|---|
| 578 | TEST_REJECT_NULL(CelBiazoFather); |
|---|
| 579 | CelBiazoFather->set_root(); |
|---|
| 580 | |
|---|
| 581 | const char *rootAtCelBiazoFather_topo = "(" B3_LEFT_TOP_SONS ":0.104,((" B1_TOP "," B2_TOP "):0.162,CytAquat:0.711):0.104);"; |
|---|
| 582 | TEST_EXPECT_NEWICK(nLENGTH, root, rootAtCelBiazoFather_topo); |
|---|
| 583 | |
|---|
| 584 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 585 | |
|---|
| 586 | ARB_edge oldRootEdge(rootEdge.source(), rootEdge.dest()); |
|---|
| 587 | DOWNCAST(AP_tree_nlen*,oldRootEdge.son())->set_root(); |
|---|
| 588 | |
|---|
| 589 | const char *rootSetBack_topo = top_topo; |
|---|
| 590 | TEST_EXPECT_NEWICK(nLENGTH, root, rootSetBack_topo); |
|---|
| 591 | env.push(); // 5th stack level (=rootSetBack_topo) |
|---|
| 592 | |
|---|
| 593 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 594 | |
|---|
| 595 | // test remove: |
|---|
| 596 | AP_tree_nlen *CurCitre = root->findLeafNamed("CurCitre"); |
|---|
| 597 | TEST_REJECT_NULL(CurCitre); |
|---|
| 598 | TEST_REJECT_NULL(CurCitre->get_father()); |
|---|
| 599 | |
|---|
| 600 | CurCitre->REMOVE(); |
|---|
| 601 | const char *CurCitre_removed_topo = "((" B1_TOP "," B2_TOP "):0.081,(" B3_TOP_SONS_CCR "):0.081);"; |
|---|
| 602 | // ------------------------------------------------------------------- ^^^ = B3_TOP_SONS minus CurCitre |
|---|
| 603 | TEST_EXPECT_NEWICK(nLENGTH, root, CurCitre_removed_topo); |
|---|
| 604 | |
|---|
| 605 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 606 | TEST_EXPECT_VALID_TREE(CurCitre); |
|---|
| 607 | |
|---|
| 608 | TEST_EXPECT_EQUAL(root->gr.leaf_sum, 15); // out of date |
|---|
| 609 | root->compute_tree(); |
|---|
| 610 | TEST_EXPECT_EQUAL(root->gr.leaf_sum, 14); |
|---|
| 611 | |
|---|
| 612 | env.push(); // 6th stack level (=CurCitre_removed_topo) |
|---|
| 613 | |
|---|
| 614 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 615 | |
|---|
| 616 | // test insert: |
|---|
| 617 | AP_tree_nlen *CloCarni = root->findLeafNamed("CloCarni"); |
|---|
| 618 | TEST_REJECT_NULL(CloCarni); |
|---|
| 619 | CurCitre->insert(CloCarni); // this creates two extra edges (not destroyed by destroy() below) and one extra node |
|---|
| 620 | |
|---|
| 621 | const char *CurCitre_inserted_topo = "((" B1_TOP ",(((CloButy2:0.009,CloButyr:0.000):0.564,(CurCitre:0.060,CloCarni:0.060):0.060):0.010,CloPaste:0.179):0.131):0.081,(" B3_TOP_SONS_CCR "):0.081);"; |
|---|
| 622 | TEST_EXPECT_NEWICK(nLENGTH, root, CurCitre_inserted_topo); |
|---|
| 623 | |
|---|
| 624 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 625 | |
|---|
| 626 | // now check pops: |
|---|
| 627 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, CurCitre_removed_topo); TEST_EXPECT_VALID_TREE(root); |
|---|
| 628 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, rootSetBack_topo); TEST_EXPECT_VALID_TREE(root); |
|---|
| 629 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, rootAtCloTyrob_topo); TEST_EXPECT_VALID_TREE(root); |
|---|
| 630 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, bottom_topo); TEST_EXPECT_VALID_TREE(root); |
|---|
| 631 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, edge_topo); TEST_EXPECT_VALID_TREE(root); |
|---|
| 632 | env.pop(); TEST_EXPECT_NEWICK(nLENGTH, root, top_topo); TEST_EXPECT_VALID_TREE(root); |
|---|
| 633 | } |
|---|
| 634 | } |
|---|
| 635 | |
|---|
| 636 | void TEST_calc_bootstraps() { |
|---|
| 637 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", "ali_5s"); |
|---|
| 638 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
|---|
| 639 | |
|---|
| 640 | const char *bs_origi_topo = "(((((((CloTyro3,CloTyro4)'40%',CloTyro2)'0%',CloTyrob)'97%',CloInnoc)'0%',CloBifer)'53%',(((CloButy2,CloButyr),CloCarni)'33%',CloPaste)'97%'),((((CorAquat,CurCitre),CorGluta)'17%',CelBiazo)'40%',CytAquat));"; |
|---|
| 641 | const char *bs_limit_topo = "(((((((CloTyro3,CloTyro4)'87%',CloTyro2)'0%',CloTyrob)'100%',CloInnoc)'87%',CloBifer)'83%',(((CloButy2,CloButyr)'99%',CloCarni)'17%',CloPaste)'56%')'61%',((((CorAquat,CurCitre)'78%',CorGluta)'0%',CelBiazo)'59%',CytAquat)'61%');"; |
|---|
| 642 | const char *bs_estim_topo = "(((((((CloTyro3,CloTyro4)'75%',CloTyro2)'0%',CloTyrob)'100%',CloInnoc)'75%',CloBifer)'78%',(((CloButy2,CloButyr)'99%',CloCarni)'13%',CloPaste)'32%')'53%',((((CorAquat,CurCitre)'74%',CorGluta)'0%',CelBiazo)'56%',CytAquat)'53%');"; |
|---|
| 643 | |
|---|
| 644 | { |
|---|
| 645 | AP_tree_nlen *root = env.root_node(); |
|---|
| 646 | AP_tree_edge *root_edge = rootEdge(); |
|---|
| 647 | |
|---|
| 648 | TEST_EXPECT(root && root_edge); |
|---|
| 649 | |
|---|
| 650 | root->reorder_tree(BIG_BRANCHES_TO_TOP); TEST_EXPECT_NEWICK(nREMARK, root, bs_origi_topo); |
|---|
| 651 | |
|---|
| 652 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
|---|
| 653 | |
|---|
| 654 | root_edge->nni_rec(ANY_EDGE, AP_BL_MODE(AP_BL_BL_ONLY|AP_BL_BOOTSTRAP_LIMIT), NULp, true); |
|---|
| 655 | root->reorder_tree(BIG_BRANCHES_TO_TOP); |
|---|
| 656 | TEST_EXPECT_NEWICK(nREMARK, root, bs_limit_topo); |
|---|
| 657 | TEST_EXPECT_COMBINES_PERFORMED(env, 170); |
|---|
| 658 | |
|---|
| 659 | root_edge->nni_rec(ANY_EDGE, AP_BL_MODE(AP_BL_BL_ONLY|AP_BL_BOOTSTRAP_ESTIMATE), NULp, true); |
|---|
| 660 | root->reorder_tree(BIG_BRANCHES_TO_TOP); |
|---|
| 661 | TEST_EXPECT_NEWICK(nREMARK, root, bs_estim_topo); |
|---|
| 662 | TEST_EXPECT_COMBINES_PERFORMED(env, 156); |
|---|
| 663 | |
|---|
| 664 | TEST_EXPECT_EQUAL(env.root_node(), root); |
|---|
| 665 | } |
|---|
| 666 | |
|---|
| 667 | } |
|---|
| 668 | |
|---|
| 669 | void TEST_tree_remove_add_all() { |
|---|
| 670 | // reproduces crash as described in #527 |
|---|
| 671 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", "ali_5s"); |
|---|
| 672 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_nj")); |
|---|
| 673 | |
|---|
| 674 | const int LEAFS = 6; |
|---|
| 675 | AP_tree_nlen *leaf[LEAFS]; |
|---|
| 676 | const char *name[LEAFS] = { |
|---|
| 677 | "CloButy2", |
|---|
| 678 | "CloButyr", |
|---|
| 679 | "CytAquat", |
|---|
| 680 | "CorAquat", |
|---|
| 681 | "CurCitre", |
|---|
| 682 | "CorGluta", |
|---|
| 683 | }; |
|---|
| 684 | |
|---|
| 685 | AP_tree_nlen *root = env.root_node(); |
|---|
| 686 | |
|---|
| 687 | for (int i = 0; i<LEAFS; ++i) { |
|---|
| 688 | leaf[i] = root->findLeafNamed(name[i]); |
|---|
| 689 | TEST_REJECT_NULL(leaf[i]); |
|---|
| 690 | } |
|---|
| 691 | |
|---|
| 692 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 693 | |
|---|
| 694 | AP_pars_root *troot = leaf[0]->get_tree_root(); |
|---|
| 695 | TEST_REJECT_NULL(troot); |
|---|
| 696 | |
|---|
| 697 | for (int i = 0; i<LEAFS-1; ++i) { |
|---|
| 698 | // Note: removing the second to last leaf, "removes" both remaining |
|---|
| 699 | // leafs (but only destroys their father node) |
|---|
| 700 | |
|---|
| 701 | TEST_EXPECT_VALID_TREE(root); |
|---|
| 702 | leaf[i]->REMOVE(); |
|---|
| 703 | TEST_EXPECT_VALID_TREE(leaf[i]); |
|---|
| 704 | } |
|---|
| 705 | |
|---|
| 706 | leaf[0]->initial_insert(leaf[1], troot); |
|---|
| 707 | for (int i = 2; i<LEAFS; ++i) { |
|---|
| 708 | TEST_EXPECT_VALID_TREE(leaf[i-1]); |
|---|
| 709 | TEST_EXPECT_VALID_TREE(leaf[i]); |
|---|
| 710 | leaf[i]->insert(leaf[i-1]); |
|---|
| 711 | } |
|---|
| 712 | } |
|---|
| 713 | |
|---|
| 714 | #endif // UNIT_TESTS |
|---|
| 715 | |
|---|
| 716 | // -------------------------------------------------------------------------------- |
|---|