| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : PARS_main.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #include "PerfMeter.h" |
|---|
| 12 | #include "pars_main.hxx" |
|---|
| 13 | #include "pars_klprops.hxx" |
|---|
| 14 | #include "pars_awars.h" |
|---|
| 15 | #include "ap_tree_nlen.hxx" |
|---|
| 16 | #include "ap_main.hxx" |
|---|
| 17 | |
|---|
| 18 | #include <ColumnStat.hxx> |
|---|
| 19 | #include <gui_aliview.hxx> |
|---|
| 20 | #include <macros.hxx> |
|---|
| 21 | #include <nds.h> |
|---|
| 22 | #include <TreeCallbacks.hxx> |
|---|
| 23 | |
|---|
| 24 | #include <aw_awars.hxx> |
|---|
| 25 | #include <aw_preset.hxx> |
|---|
| 26 | #include <aw_msg.hxx> |
|---|
| 27 | #include <aw_root.hxx> |
|---|
| 28 | #include <aw_question.hxx> |
|---|
| 29 | |
|---|
| 30 | #include <awt.hxx> |
|---|
| 31 | #include <awt_sel_boxes.hxx> |
|---|
| 32 | #include <awt_filter.hxx> |
|---|
| 33 | #include <awt_config_manager.hxx> |
|---|
| 34 | |
|---|
| 35 | #include <arb_progress.h> |
|---|
| 36 | #include <arb_misc.h> |
|---|
| 37 | #include <arb_defs.h> |
|---|
| 38 | #include <arb_global_defs.h> |
|---|
| 39 | |
|---|
| 40 | #include <ad_cb.h> |
|---|
| 41 | |
|---|
| 42 | #include <list> |
|---|
| 43 | #include <map> |
|---|
| 44 | #include <mod_rlimit.h> |
|---|
| 45 | |
|---|
| 46 | #if defined(DEBUG) |
|---|
| 47 | # define TESTMENU |
|---|
| 48 | #endif // DEBUG |
|---|
| 49 | |
|---|
| 50 | using namespace std; |
|---|
| 51 | |
|---|
| 52 | AW_HEADER_MAIN |
|---|
| 53 | |
|---|
| 54 | #define AWAR_COLUMNSTAT_BASE "tmp/pars/colstat" |
|---|
| 55 | #define AWAR_COLUMNSTAT_NAME AWAR_COLUMNSTAT_BASE "/name" |
|---|
| 56 | |
|---|
| 57 | #define AWT_TREE_PARS(ntw) DOWNCAST(AWT_graphic_parsimony*, (ntw)->gfx) |
|---|
| 58 | |
|---|
| 59 | static ArbParsimony *GLOBAL_PARS = NULp; |
|---|
| 60 | |
|---|
| 61 | inline AWT_graphic_parsimony *global_tree() { return GLOBAL_PARS->get_tree(); } |
|---|
| 62 | inline AP_pars_root *global_tree_root() { return global_tree()->get_tree_root(); } |
|---|
| 63 | |
|---|
| 64 | // waaah more globals :( |
|---|
| 65 | AP_main *ap_main; // @@@ move into ArbParsimony? or eliminate ArbParsimony |
|---|
| 66 | |
|---|
| 67 | void ArbParsimony::set_tree(AWT_graphic_parsimony *tree_) { |
|---|
| 68 | ap_assert(!tree); // only call once |
|---|
| 69 | tree = tree_; |
|---|
| 70 | ap_main->set_tree_root(tree); |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | static void set_keep_ghostnodes() { |
|---|
| 74 | // avoid that saving tree to DB does delete removed nodes |
|---|
| 75 | // (hack to fix #528) |
|---|
| 76 | // see ../ARBDB/adtree.cxx@keep_ghostnodes |
|---|
| 77 | GBDATA *gb_tree = ap_main->get_tree_root()->get_gb_tree(); |
|---|
| 78 | GB_transaction ta(gb_tree); |
|---|
| 79 | GBDATA *gb_keep = GB_searchOrCreate_int(gb_tree, "keep_ghostnodes", 1); |
|---|
| 80 | ASSERT_NO_ERROR(GB_set_temporary(gb_keep)); |
|---|
| 81 | } |
|---|
| 82 | static void delete_kept_ghostnodes() { |
|---|
| 83 | if (ap_main->get_graphic_tree()) { |
|---|
| 84 | GBDATA *gb_tree = ap_main->get_tree_root()->get_gb_tree(); |
|---|
| 85 | GB_transaction ta(gb_tree); |
|---|
| 86 | |
|---|
| 87 | GBDATA *gb_keep = GB_entry(gb_tree, "keep_ghostnodes"); |
|---|
| 88 | if (gb_keep) { // e.g. wrong for quick-add species |
|---|
| 89 | GB_ERROR error = GB_delete(gb_keep); |
|---|
| 90 | if (!error) { |
|---|
| 91 | if (ap_main->get_tree_root()->was_saved()) { |
|---|
| 92 | // if tree was saved, DB may contain ghostnodes |
|---|
| 93 | // -> save again to delete them |
|---|
| 94 | error = global_tree()->save_to_DB(GB_get_root(gb_tree), NULp); |
|---|
| 95 | } |
|---|
| 96 | } |
|---|
| 97 | if (error) aw_message(error); |
|---|
| 98 | } |
|---|
| 99 | } |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | __ATTR__NORETURN static void pars_exit(AW_window *aww) { |
|---|
| 103 | AW_root *aw_root = aww->get_root(); |
|---|
| 104 | shutdown_macro_recording(aw_root); |
|---|
| 105 | |
|---|
| 106 | ap_main->accept_all(); |
|---|
| 107 | delete_kept_ghostnodes(); |
|---|
| 108 | |
|---|
| 109 | aw_root->unlink_awars_from_DB(ap_main->get_gb_main()); |
|---|
| 110 | #if defined(DEBUG) |
|---|
| 111 | AWT_browser_forget_db(ap_main->get_gb_main()); |
|---|
| 112 | #endif // DEBUG |
|---|
| 113 | delete ap_main; // closes DB |
|---|
| 114 | ap_main = NULp; |
|---|
| 115 | |
|---|
| 116 | exit(EXIT_SUCCESS); |
|---|
| 117 | } |
|---|
| 118 | |
|---|
| 119 | static void AP_user_push_cb(AW_window *aww) { |
|---|
| 120 | ap_main->remember_user_state(); |
|---|
| 121 | aww->get_root()->awar(AWAR_STACKPOINTER)->write_int(ap_main->get_user_push_counter()); |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | static void AP_user_pop_cb(AW_window *aww, TREE_canvas *ntw) { |
|---|
| 125 | if (ap_main->get_user_push_counter()<=0) { |
|---|
| 126 | aw_message("No tree on stack."); |
|---|
| 127 | return; |
|---|
| 128 | } |
|---|
| 129 | |
|---|
| 130 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 131 | ap_main->revert_user_state(); |
|---|
| 132 | ntw->request_save(); |
|---|
| 133 | |
|---|
| 134 | aww->get_root()->awar(AWAR_STACKPOINTER)->write_int(ap_main->get_user_push_counter()); |
|---|
| 135 | if (ap_main->get_user_push_counter() <= 0) { // last tree was popped => push again |
|---|
| 136 | AP_user_push_cb(aww); |
|---|
| 137 | } |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | class InsertData { |
|---|
| 141 | bool abort_flag; |
|---|
| 142 | arb_progress progress; |
|---|
| 143 | |
|---|
| 144 | public: |
|---|
| 145 | |
|---|
| 146 | bool quick_add_flag; |
|---|
| 147 | InsertData(bool quick, long spec_count) |
|---|
| 148 | : abort_flag(false), |
|---|
| 149 | progress(GBS_global_string("Inserting %li species", spec_count), spec_count), |
|---|
| 150 | quick_add_flag(quick) |
|---|
| 151 | {} |
|---|
| 152 | |
|---|
| 153 | bool aborted() const { return abort_flag; } |
|---|
| 154 | void set_aborted(bool aborted_) { abort_flag = aborted_; } |
|---|
| 155 | |
|---|
| 156 | void inc() { |
|---|
| 157 | progress.inc(); |
|---|
| 158 | abort_flag = progress.aborted(); |
|---|
| 159 | } |
|---|
| 160 | |
|---|
| 161 | arb_progress& get_progress() { return progress; } |
|---|
| 162 | }; |
|---|
| 163 | |
|---|
| 164 | |
|---|
| 165 | static int sort_sequences_by_length(const char*, long leaf0_ptr, const char*, long leaf1_ptr) { // @@@ any chance to make this typesafe? |
|---|
| 166 | AP_tree_nlen *leaf0 = (AP_tree_nlen*)leaf0_ptr; |
|---|
| 167 | AP_tree_nlen *leaf1 = (AP_tree_nlen*)leaf1_ptr; |
|---|
| 168 | |
|---|
| 169 | AP_FLOAT len0 = leaf0->get_seq()->weighted_base_count(); |
|---|
| 170 | AP_FLOAT len1 = leaf1->get_seq()->weighted_base_count(); |
|---|
| 171 | |
|---|
| 172 | // longest sequence first |
|---|
| 173 | if (len0<len1) return 1; |
|---|
| 174 | if (len0>len1) return -1; |
|---|
| 175 | |
|---|
| 176 | // if length equal -> determine order by species name (just to have a defined order!) |
|---|
| 177 | int cmp = strcmp(leaf1->name, leaf0->name); |
|---|
| 178 | ap_assert(cmp != 0); |
|---|
| 179 | return cmp; |
|---|
| 180 | } |
|---|
| 181 | |
|---|
| 182 | static long transform_gbd_to_leaf(const char *key, long val, void *) { |
|---|
| 183 | if (!val) return val; |
|---|
| 184 | |
|---|
| 185 | // @@@ instead implement create_linked_leaf(), then use that? |
|---|
| 186 | |
|---|
| 187 | GBDATA *gb_node = (GBDATA *)val; |
|---|
| 188 | AP_pars_root *troot = ap_main->get_tree_root(); |
|---|
| 189 | AP_tree_nlen *leaf = DOWNCAST(AP_tree_nlen*, troot->makeNode()); |
|---|
| 190 | |
|---|
| 191 | leaf->forget_origin(); // new leaf is not part of tree yet |
|---|
| 192 | |
|---|
| 193 | leaf->gb_node = gb_node; |
|---|
| 194 | leaf->name = ARB_strdup(key); |
|---|
| 195 | leaf->markAsLeaf(); |
|---|
| 196 | |
|---|
| 197 | leaf->set_seq(troot->get_seqTemplate()->dup()); |
|---|
| 198 | GB_ERROR error = leaf->get_seq()->bind_to_species(gb_node); |
|---|
| 199 | if (!error) { |
|---|
| 200 | if (leaf->get_seq()->weighted_base_count() < MIN_SEQUENCE_LENGTH) { |
|---|
| 201 | error = GBS_global_string("Species %s has too short sequence (%f, minimum is %i)", |
|---|
| 202 | key, |
|---|
| 203 | leaf->get_seq()->weighted_base_count(), |
|---|
| 204 | MIN_SEQUENCE_LENGTH); |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | if (error) { |
|---|
| 208 | GBT_message(gb_node, error); |
|---|
| 209 | destroy(leaf, troot); leaf = NULp; |
|---|
| 210 | } |
|---|
| 211 | return (long)leaf; |
|---|
| 212 | } |
|---|
| 213 | |
|---|
| 214 | typedef vector<AP_tree_nlen*> InsertedSpecies; |
|---|
| 215 | |
|---|
| 216 | static long toInserted(const char *, long val, void *cd_toInsert) { |
|---|
| 217 | InsertedSpecies *toInsert = (InsertedSpecies*)cd_toInsert; |
|---|
| 218 | AP_tree_nlen *node = (AP_tree_nlen*)val; |
|---|
| 219 | |
|---|
| 220 | toInsert->push_back(node); |
|---|
| 221 | return 0; |
|---|
| 222 | } |
|---|
| 223 | |
|---|
| 224 | inline int maxAllowedInsertions(int inTree) { |
|---|
| 225 | // max. species allowed to insert (in one pass) into a tree with 'inTree' leafs |
|---|
| 226 | return inTree/2; |
|---|
| 227 | } |
|---|
| 228 | inline int calcInsertNow(int toInsert, int inTree) { |
|---|
| 229 | // calculate number of species added in next pass |
|---|
| 230 | return std::min(toInsert, maxAllowedInsertions(inTree)); |
|---|
| 231 | } |
|---|
| 232 | |
|---|
| 233 | static long calc_steps(int toInsert, int inTree) { |
|---|
| 234 | ap_assert((toInsert+inTree) >= 2); |
|---|
| 235 | |
|---|
| 236 | if (!toInsert) return 0; |
|---|
| 237 | if (!inTree) return 1 + calc_steps(toInsert-2, 2); |
|---|
| 238 | |
|---|
| 239 | int edges = leafs_2_edges(inTree, UNROOTED); |
|---|
| 240 | int insertNow = calcInsertNow(toInsert, inTree); |
|---|
| 241 | |
|---|
| 242 | return (long)(edges+1)*insertNow + calc_steps(toInsert-insertNow, inTree+insertNow); // +1 for final step (=actual insertion of species) |
|---|
| 243 | } |
|---|
| 244 | |
|---|
| 245 | class AP_subtree { // defines a subtree |
|---|
| 246 | AP_tree_nlen *subNode; |
|---|
| 247 | AP_tree_nlen *upNode; |
|---|
| 248 | |
|---|
| 249 | bool valid() const { return subNode && upNode; } |
|---|
| 250 | |
|---|
| 251 | public: |
|---|
| 252 | AP_subtree() : subNode(NULp), upNode(NULp) {} |
|---|
| 253 | AP_subtree(AP_tree_edge *e, AP_tree_nlen *sub_node) : |
|---|
| 254 | subNode(sub_node), |
|---|
| 255 | upNode(e->otherNode(subNode)) |
|---|
| 256 | {} |
|---|
| 257 | |
|---|
| 258 | AP_tree_edge *edgeToSubtree() const { ap_assert(valid()); return upNode->edgeTo(subNode); } |
|---|
| 259 | AP_tree_nlen *subtreeRoot() const { return subNode; } |
|---|
| 260 | |
|---|
| 261 | void setSubtreeRoot(AP_tree_nlen *new_subtree) { |
|---|
| 262 | ap_assert(upNode->edgeTo(new_subtree)); |
|---|
| 263 | subNode = new_subtree; |
|---|
| 264 | } |
|---|
| 265 | }; |
|---|
| 266 | |
|---|
| 267 | struct EdgeBetween : private AP_subtree { |
|---|
| 268 | // semantically same as AP_tree_edge, but survives tree-modifications which modify edges (like insert+moveNextTo/moveTo) |
|---|
| 269 | |
|---|
| 270 | EdgeBetween() {} |
|---|
| 271 | EdgeBetween(AP_tree_edge *e) : AP_subtree(e, e->sonNode()) {} |
|---|
| 272 | AP_tree_edge *find() const { return edgeToSubtree(); } |
|---|
| 273 | }; |
|---|
| 274 | |
|---|
| 275 | struct BestEdge { |
|---|
| 276 | Mutations pars; |
|---|
| 277 | EdgeBetween between; // need to store pair of AP_tree_nlen here |
|---|
| 278 | // (using AP_tree_edge is not stable; may move elsewhere by calls insert() or moveNextTo()!) |
|---|
| 279 | |
|---|
| 280 | BestEdge() : pars(-1) {} |
|---|
| 281 | BestEdge(const EdgeBetween& betw, Mutations p) : pars(p), between(betw) {} |
|---|
| 282 | |
|---|
| 283 | AP_tree_edge *edge() const { return between.find(); } |
|---|
| 284 | }; |
|---|
| 285 | |
|---|
| 286 | struct NodeInsertOrder { |
|---|
| 287 | bool operator() (AP_tree_nlen *i, AP_tree_nlen *j) { return strcmp(i->name, j->name)<0; } |
|---|
| 288 | }; |
|---|
| 289 | |
|---|
| 290 | typedef InsertedSpecies::const_iterator InsertSpeciesIterator; |
|---|
| 291 | |
|---|
| 292 | static void insert_species_into_tree(const InsertSpeciesIterator begin, const InsertSpeciesIterator end, arb_progress& progress) { |
|---|
| 293 | typedef map<AP_tree_nlen*, BestEdge> BestEdge4Node; |
|---|
| 294 | BestEdge4Node bestpos; |
|---|
| 295 | |
|---|
| 296 | ap_assert(begin != end); |
|---|
| 297 | |
|---|
| 298 | { |
|---|
| 299 | ap_main->remember(); |
|---|
| 300 | |
|---|
| 301 | EdgeChain chain(rootEdge(), ANY_EDGE, false); |
|---|
| 302 | ap_assert(chain.size()>0); |
|---|
| 303 | |
|---|
| 304 | bool speciesInserted = false; |
|---|
| 305 | |
|---|
| 306 | while (chain) { |
|---|
| 307 | AP_tree_edge *edge = *chain; ++chain; |
|---|
| 308 | edge->set_root(); |
|---|
| 309 | |
|---|
| 310 | EdgeBetween betweenNodes(edge); |
|---|
| 311 | |
|---|
| 312 | InsertSpeciesIterator curr = begin; |
|---|
| 313 | AP_tree_nlen *species = *curr++; |
|---|
| 314 | |
|---|
| 315 | if (speciesInserted) { |
|---|
| 316 | species->moveTo(edge); |
|---|
| 317 | } |
|---|
| 318 | else { |
|---|
| 319 | species->insert(edge->sonNode()); // edge is root-edge -> son does not matter |
|---|
| 320 | speciesInserted = true; |
|---|
| 321 | } |
|---|
| 322 | |
|---|
| 323 | species->set_root(); // => only needs one combine when exchanging species |
|---|
| 324 | |
|---|
| 325 | Mutations pars = rootNode()->costs(); |
|---|
| 326 | BestEdge4Node::iterator found = bestpos.find(species); |
|---|
| 327 | if (found == bestpos.end() || pars<found->second.pars) { |
|---|
| 328 | bestpos[species] = BestEdge(betweenNodes, pars); |
|---|
| 329 | } |
|---|
| 330 | ++progress; |
|---|
| 331 | |
|---|
| 332 | AP_tree_nlen *rot_node = rootNode()->get_leftson(); // rot=rest of tree |
|---|
| 333 | if (rot_node == species) { |
|---|
| 334 | rot_node = rot_node->get_brother(); |
|---|
| 335 | } |
|---|
| 336 | ap_assert(rot_node->get_brother() == species); |
|---|
| 337 | |
|---|
| 338 | AP_combinableSeq *rot_seq = rot_node->get_seq(); |
|---|
| 339 | Mutations rot_costs = rot_node->stored_costs(); |
|---|
| 340 | |
|---|
| 341 | ap_assert(species->stored_costs() == 0); // leaf has no mutations |
|---|
| 342 | |
|---|
| 343 | while (1) { |
|---|
| 344 | if (curr == end) break; |
|---|
| 345 | |
|---|
| 346 | AP_tree_nlen *nextSpec = *curr++; |
|---|
| 347 | AP_combinableSeq *nextSeq = nextSpec->get_seq(); |
|---|
| 348 | |
|---|
| 349 | pars = nextSeq->mutations_if_combined_with(rot_seq) + rot_costs; |
|---|
| 350 | found = bestpos.find(nextSpec); |
|---|
| 351 | if (found == bestpos.end() || pars<found->second.pars) { |
|---|
| 352 | bestpos[nextSpec] = BestEdge(betweenNodes, pars); |
|---|
| 353 | } |
|---|
| 354 | ++progress; |
|---|
| 355 | } |
|---|
| 356 | } |
|---|
| 357 | |
|---|
| 358 | ap_main->revert(); |
|---|
| 359 | } |
|---|
| 360 | |
|---|
| 361 | // create insert lists for each used insert position: |
|---|
| 362 | typedef list<AP_tree_nlen*> NodeList; |
|---|
| 363 | typedef map<AP_tree_edge*, NodeList> NodesAtEdge; |
|---|
| 364 | |
|---|
| 365 | NodesAtEdge atEdge; |
|---|
| 366 | for (InsertSpeciesIterator s = begin; s != end; ++s) { |
|---|
| 367 | const BestEdge& best = bestpos[*s]; |
|---|
| 368 | AP_tree_edge *edge = best.edge(); |
|---|
| 369 | |
|---|
| 370 | ap_assert(edge != NULp); |
|---|
| 371 | |
|---|
| 372 | NodesAtEdge::iterator at = atEdge.find(edge); |
|---|
| 373 | if (at == atEdge.end()) { |
|---|
| 374 | atEdge[edge] = NodeList(1, *s); |
|---|
| 375 | } |
|---|
| 376 | else { |
|---|
| 377 | at->second.push_back(*s); |
|---|
| 378 | } |
|---|
| 379 | } |
|---|
| 380 | |
|---|
| 381 | #if defined(DEVEL_RALF) |
|---|
| 382 | // testcode: test whether all found edges are members of the tree |
|---|
| 383 | // (got some problem with insert/REMOVE while root is next to inserted/removed node) |
|---|
| 384 | |
|---|
| 385 | set<AP_tree_edge*> edgeInTree; |
|---|
| 386 | { |
|---|
| 387 | EdgeChain chain(rootEdge(), ANY_EDGE, false); |
|---|
| 388 | while (chain) { |
|---|
| 389 | AP_tree_edge *edge = *chain; ++chain; |
|---|
| 390 | edgeInTree.insert(edge); |
|---|
| 391 | } |
|---|
| 392 | |
|---|
| 393 | for (BestEdge4Node::iterator b = bestpos.begin(); b != bestpos.end(); ++b) { |
|---|
| 394 | AP_tree_edge *e = b->second.edge(); |
|---|
| 395 | |
|---|
| 396 | if (edgeInTree.find(e) == edgeInTree.end()) { |
|---|
| 397 | GBK_terminate("remembered edge has been removed from tree"); |
|---|
| 398 | } |
|---|
| 399 | } |
|---|
| 400 | } |
|---|
| 401 | #endif |
|---|
| 402 | |
|---|
| 403 | // build list of edges where insert takes place (value=iterator into 'atEdge') |
|---|
| 404 | // => insert in determined order |
|---|
| 405 | typedef list<NodesAtEdge::iterator> InsertOrder; |
|---|
| 406 | InsertOrder insertOrder; |
|---|
| 407 | { |
|---|
| 408 | EdgeChain chain(rootEdge(), ANY_EDGE, false); |
|---|
| 409 | while (chain) { |
|---|
| 410 | AP_tree_edge *edge = *chain; ++chain; |
|---|
| 411 | |
|---|
| 412 | NodesAtEdge::iterator at = atEdge.find(edge); |
|---|
| 413 | if (at != atEdge.end()) { |
|---|
| 414 | insertOrder.push_back(at); |
|---|
| 415 | } |
|---|
| 416 | } |
|---|
| 417 | } |
|---|
| 418 | |
|---|
| 419 | typedef list<AP_subtree> OptiList; |
|---|
| 420 | OptiList optiPos; |
|---|
| 421 | |
|---|
| 422 | // insert species to tree according to insert-lists: |
|---|
| 423 | for (InsertOrder::iterator o = insertOrder.begin(); o != insertOrder.end(); ++o) { |
|---|
| 424 | NodesAtEdge::iterator e = *o; |
|---|
| 425 | AP_tree_edge *edge = e->first; |
|---|
| 426 | NodeList& nodes = e->second; |
|---|
| 427 | |
|---|
| 428 | edge->set_root(); |
|---|
| 429 | |
|---|
| 430 | AP_tree_nlen *brother = edge->sonNode(); |
|---|
| 431 | size_t nodes_size = nodes.size(); |
|---|
| 432 | |
|---|
| 433 | #if defined(ASSERTION_USED) |
|---|
| 434 | ap_assert(bestpos[nodes.front()].edge() == edge); |
|---|
| 435 | #endif |
|---|
| 436 | |
|---|
| 437 | if (nodes_size == 1) { |
|---|
| 438 | nodes.front()->insert(brother); |
|---|
| 439 | ASSERT_VALID_TREE(rootNode()); |
|---|
| 440 | } |
|---|
| 441 | else { |
|---|
| 442 | bool atLeaf = brother->is_leaf(); |
|---|
| 443 | if (!atLeaf && edge->is_leaf_edge()) { // at leaf edge -> make sure brother points to leaf node |
|---|
| 444 | brother = edge->notSonNode(); |
|---|
| 445 | ap_assert(brother->is_leaf()); |
|---|
| 446 | atLeaf = true; |
|---|
| 447 | } |
|---|
| 448 | |
|---|
| 449 | #if defined(UNIT_TESTS) |
|---|
| 450 | if (RUNNING_TEST()) { |
|---|
| 451 | // use a determined order to insert multiple species at one position. |
|---|
| 452 | // Does not produce "better" topologies, just makes result independent from insert order. |
|---|
| 453 | typedef vector<AP_tree_nlen*> NodeVector; |
|---|
| 454 | |
|---|
| 455 | NodeVector toSort(nodes.begin(), nodes.end()); |
|---|
| 456 | sort(toSort.begin(), toSort.end(), NodeInsertOrder()); |
|---|
| 457 | nodes = NodeList(toSort.begin(), toSort.end()); |
|---|
| 458 | } |
|---|
| 459 | #endif |
|---|
| 460 | |
|---|
| 461 | AP_tree_nlen *at = brother; |
|---|
| 462 | for (NodeList::iterator n = nodes.begin(); n != nodes.end(); ++n) { |
|---|
| 463 | (*n)->insert(at); |
|---|
| 464 | at = *n; // only insert 1st node at 'brother', insert following nodes next to previously added nodes |
|---|
| 465 | } |
|---|
| 466 | |
|---|
| 467 | ASSERT_VALID_TREE(rootNode()); |
|---|
| 468 | |
|---|
| 469 | AP_tree_nlen *ourFather = brother->get_father(); |
|---|
| 470 | AP_tree_nlen *addedSubtree = brother->get_brother(); // contains all added species |
|---|
| 471 | ap_assert(addedSubtree->is_ancestor_of(at)); |
|---|
| 472 | |
|---|
| 473 | if (atLeaf) { |
|---|
| 474 | // if inserted at leaf edge -> perform NNI at parent edge (i.e. including the leaf) |
|---|
| 475 | AP_tree_edge *toRest = ourFather->nextEdge(); |
|---|
| 476 | for (int i = 0; i<2; ++i) { |
|---|
| 477 | AP_tree_nlen *rest = toRest->otherNode(ourFather); |
|---|
| 478 | if (rest != brother && rest != addedSubtree) { |
|---|
| 479 | break; |
|---|
| 480 | } |
|---|
| 481 | toRest = ourFather->nextEdge(toRest); |
|---|
| 482 | } |
|---|
| 483 | |
|---|
| 484 | optiPos.push_back(AP_subtree(toRest, ourFather)); |
|---|
| 485 | ap_assert(optiPos.back().subtreeRoot() == ourFather); |
|---|
| 486 | } |
|---|
| 487 | else { |
|---|
| 488 | if (nodes_size>2) { // if inserted at internal edge && only 2 species inserted -> NNI makes no sense |
|---|
| 489 | // Store (directed) edge to brother (for later optimization of subtree): |
|---|
| 490 | AP_tree_edge *subEdge = ourFather->edgeTo(addedSubtree); |
|---|
| 491 | optiPos.push_back(AP_subtree(subEdge, addedSubtree)); |
|---|
| 492 | ap_assert(optiPos.back().subtreeRoot() == addedSubtree); |
|---|
| 493 | } |
|---|
| 494 | } |
|---|
| 495 | } |
|---|
| 496 | progress.inc_by(nodes_size); |
|---|
| 497 | } |
|---|
| 498 | |
|---|
| 499 | // Optimize all inserts of multiple species at one position: |
|---|
| 500 | { |
|---|
| 501 | arb_suppress_progress suppress_child; // suppress implicit progress count caused by nni_rec |
|---|
| 502 | |
|---|
| 503 | AP_FLOAT curr_pars = rootNode()->costs(); |
|---|
| 504 | AP_FLOAT prev_pars = curr_pars; |
|---|
| 505 | |
|---|
| 506 | int loop = 0; |
|---|
| 507 | |
|---|
| 508 | do { |
|---|
| 509 | ++loop; |
|---|
| 510 | prev_pars = curr_pars; |
|---|
| 511 | for (OptiList::iterator op = optiPos.begin(); op != optiPos.end(); ++op) { |
|---|
| 512 | AP_tree_edge *subtreeEdge = op->edgeToSubtree(); |
|---|
| 513 | AP_tree_nlen *subtreeRoot = op->subtreeRoot(); |
|---|
| 514 | |
|---|
| 515 | subtreeEdge->set_root(); |
|---|
| 516 | ap_assert(subtreeEdge->isConnectedTo(subtreeRoot)); |
|---|
| 517 | AP_tree_nlen *father = subtreeEdge->otherNode(subtreeRoot); |
|---|
| 518 | |
|---|
| 519 | AP_FLOAT this_pars; |
|---|
| 520 | while (1) { |
|---|
| 521 | ap_assert(subtreeEdge->isConnectedTo(father)); // otherwise block fails |
|---|
| 522 | this_pars = subtreeEdge->nni_rec(SKIP_LEAF_EDGES, AP_BL_NNI_ONLY, father, false); |
|---|
| 523 | if (!(this_pars<curr_pars)) { |
|---|
| 524 | ap_assert(!(this_pars>curr_pars)); |
|---|
| 525 | break; |
|---|
| 526 | } |
|---|
| 527 | curr_pars = this_pars; |
|---|
| 528 | } |
|---|
| 529 | |
|---|
| 530 | ap_assert(subtreeEdge->isConnectedTo(father)); // otherwise next command fails |
|---|
| 531 | AP_tree_nlen *newSubtreeRoot = subtreeEdge->otherNode(father); |
|---|
| 532 | if (newSubtreeRoot != subtreeRoot) { |
|---|
| 533 | op->setSubtreeRoot(newSubtreeRoot); |
|---|
| 534 | } |
|---|
| 535 | } |
|---|
| 536 | } |
|---|
| 537 | while (curr_pars<prev_pars); |
|---|
| 538 | } |
|---|
| 539 | } |
|---|
| 540 | |
|---|
| 541 | static void insert_all_species_into_tree(GB_HASH*& hash) { |
|---|
| 542 | // inserts all species (from hash) into tree |
|---|
| 543 | |
|---|
| 544 | AP_tree_nlen *tree = rootNode(); |
|---|
| 545 | |
|---|
| 546 | int inTree = tree ? tree->count_leafs() : 0; |
|---|
| 547 | int toInsert = GBS_hash_elements(hash); |
|---|
| 548 | |
|---|
| 549 | ap_assert(toInsert); |
|---|
| 550 | |
|---|
| 551 | long steps = calc_steps(toInsert, inTree); |
|---|
| 552 | arb_progress progress(steps); |
|---|
| 553 | |
|---|
| 554 | // move species to insert to a stack |
|---|
| 555 | InsertedSpecies speciesToInsert; |
|---|
| 556 | speciesToInsert.reserve(toInsert); |
|---|
| 557 | |
|---|
| 558 | if (maxAllowedInsertions(inTree)<toInsert) { |
|---|
| 559 | // insert longest sequences first |
|---|
| 560 | GBS_hash_do_sorted_loop(hash, toInserted, sort_sequences_by_length, &speciesToInsert); |
|---|
| 561 | } |
|---|
| 562 | else { |
|---|
| 563 | // insert all sequences (order should not matter) |
|---|
| 564 | GBS_hash_do_loop(hash, toInserted, &speciesToInsert); |
|---|
| 565 | } |
|---|
| 566 | GBS_free_hash(hash); |
|---|
| 567 | hash = NULp; |
|---|
| 568 | |
|---|
| 569 | ap_assert(toInsert != 2); // @@@ need to test this case |
|---|
| 570 | |
|---|
| 571 | InsertSpeciesIterator curr = speciesToInsert.begin(); |
|---|
| 572 | InsertSpeciesIterator end = speciesToInsert.end(); |
|---|
| 573 | |
|---|
| 574 | AP_tree_edge *oldRootEdge = NULp; |
|---|
| 575 | if (!tree) { // create initial tree |
|---|
| 576 | AP_pars_root *troot = ap_main->get_tree_root(); |
|---|
| 577 | |
|---|
| 578 | AP_tree_nlen *s1 = *curr++; |
|---|
| 579 | AP_tree_nlen *s2 = *curr++; |
|---|
| 580 | |
|---|
| 581 | s1->initial_insert(s2, troot); |
|---|
| 582 | |
|---|
| 583 | inTree = 2; |
|---|
| 584 | toInsert -= 2; |
|---|
| 585 | |
|---|
| 586 | ++progress; |
|---|
| 587 | } |
|---|
| 588 | else { |
|---|
| 589 | oldRootEdge = rootEdge(); |
|---|
| 590 | } |
|---|
| 591 | |
|---|
| 592 | ASSERT_VALID_TREE(rootNode()); |
|---|
| 593 | |
|---|
| 594 | while (1) { |
|---|
| 595 | int insertNow = calcInsertNow(toInsert, inTree); |
|---|
| 596 | ap_assert(insertNow<=toInsert); |
|---|
| 597 | if (insertNow == toInsert) break; |
|---|
| 598 | |
|---|
| 599 | { |
|---|
| 600 | InsertSpeciesIterator partEnd = curr; |
|---|
| 601 | advance(partEnd, insertNow); |
|---|
| 602 | |
|---|
| 603 | insert_species_into_tree(curr, partEnd, progress); |
|---|
| 604 | curr = partEnd; |
|---|
| 605 | } |
|---|
| 606 | |
|---|
| 607 | toInsert -= insertNow; |
|---|
| 608 | inTree += insertNow; |
|---|
| 609 | } |
|---|
| 610 | |
|---|
| 611 | insert_species_into_tree(curr, end, progress); |
|---|
| 612 | |
|---|
| 613 | if (oldRootEdge) oldRootEdge->set_root(); // set root back to old position |
|---|
| 614 | } |
|---|
| 615 | |
|---|
| 616 | enum AddWhat { |
|---|
| 617 | NT_ADD_MARKED, |
|---|
| 618 | NT_ADD_SELECTED, |
|---|
| 619 | }; |
|---|
| 620 | |
|---|
| 621 | static void nt_add(AWT_graphic_parsimony *agt, AddWhat what, bool quick) { |
|---|
| 622 | GB_ERROR error = NULp; |
|---|
| 623 | |
|---|
| 624 | AP_tree *oldrootleft = NULp; |
|---|
| 625 | AP_tree *oldrootright = NULp; |
|---|
| 626 | { |
|---|
| 627 | AP_tree_nlen *root = rootNode(); |
|---|
| 628 | if (root) { |
|---|
| 629 | root->reset_subtree_layout(); |
|---|
| 630 | oldrootleft = root->get_leftson(); |
|---|
| 631 | oldrootright = root->get_rightson(); |
|---|
| 632 | } |
|---|
| 633 | } |
|---|
| 634 | |
|---|
| 635 | GB_HASH *hash = NULp; |
|---|
| 636 | GBDATA *gb_main = agt->get_gbmain(); |
|---|
| 637 | { |
|---|
| 638 | GB_transaction ta(gb_main); |
|---|
| 639 | switch (what) { |
|---|
| 640 | case NT_ADD_SELECTED: { |
|---|
| 641 | char *name = GBT_readOrCreate_string(gb_main, AWAR_SPECIES_NAME, ""); |
|---|
| 642 | if (name && strlen(name)) { |
|---|
| 643 | GBDATA *gb_species = GBT_find_species(gb_main, name); |
|---|
| 644 | if (gb_species) { |
|---|
| 645 | hash = GBS_create_hash(1, GB_MIND_CASE); |
|---|
| 646 | GBS_write_hash(hash, name, (long)gb_species); |
|---|
| 647 | } |
|---|
| 648 | else error = GBS_global_string("Selected Species (%s) not found", name); |
|---|
| 649 | } |
|---|
| 650 | else error = "Please select a species"; |
|---|
| 651 | free(name); |
|---|
| 652 | break; |
|---|
| 653 | } |
|---|
| 654 | case NT_ADD_MARKED: { |
|---|
| 655 | hash = GBT_create_marked_species_hash(gb_main); |
|---|
| 656 | break; |
|---|
| 657 | } |
|---|
| 658 | } |
|---|
| 659 | } |
|---|
| 660 | |
|---|
| 661 | if (!error) { |
|---|
| 662 | ap_assert(hash); |
|---|
| 663 | |
|---|
| 664 | arb_progress progress(quick ? "Quick add" : "Add + NNI"); |
|---|
| 665 | |
|---|
| 666 | NT_remove_species_in_tree_from_hash(rootNode(), hash); |
|---|
| 667 | |
|---|
| 668 | size_t species_count = GBS_hash_elements(hash); |
|---|
| 669 | InsertPerfMeter insertPerf("(quick-)add", species_count); |
|---|
| 670 | |
|---|
| 671 | { |
|---|
| 672 | GB_transaction ta(gb_main); |
|---|
| 673 | GBS_hash_do_loop(hash, transform_gbd_to_leaf, NULp); |
|---|
| 674 | } |
|---|
| 675 | { |
|---|
| 676 | size_t skipped = species_count - GBS_hash_elements(hash); |
|---|
| 677 | if (skipped) { |
|---|
| 678 | GBT_message(gb_main, GBS_global_string("Skipped %zu species (no data?)", skipped)); |
|---|
| 679 | } |
|---|
| 680 | } |
|---|
| 681 | if (GBS_hash_elements(hash)) { |
|---|
| 682 | insert_all_species_into_tree(hash); |
|---|
| 683 | } |
|---|
| 684 | else { |
|---|
| 685 | GBT_message(gb_main, "No species (left) to insert"); |
|---|
| 686 | } |
|---|
| 687 | |
|---|
| 688 | if (rootNode()) { |
|---|
| 689 | if (oldrootleft) { |
|---|
| 690 | if (oldrootleft->father == oldrootright) oldrootleft->set_root(); |
|---|
| 691 | else oldrootright->set_root(); |
|---|
| 692 | } |
|---|
| 693 | else { |
|---|
| 694 | ARB_edge innermost = rootNode()->get_tree_root()->find_innermost_edge(); |
|---|
| 695 | innermost.set_root(); |
|---|
| 696 | } |
|---|
| 697 | |
|---|
| 698 | if (!quick) { |
|---|
| 699 | arb_suppress_progress quiet; |
|---|
| 700 | |
|---|
| 701 | Mutations pars_prev = rootNode()->costs(); |
|---|
| 702 | rootNode()->compute_tree(); // see AP_tree_edge.cxx@flags_broken_by_moveNextTo |
|---|
| 703 | progress.subtitle("local optimize (repeated NNI)"); |
|---|
| 704 | while (1) { |
|---|
| 705 | rootEdge()->nni_rec(EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_LEAF_EDGES), AP_BL_NNI_ONLY, NULp, true); |
|---|
| 706 | Mutations pars_curr = rootNode()->costs(); |
|---|
| 707 | if (pars_curr == pars_prev) break; |
|---|
| 708 | ap_assert(pars_curr<pars_prev); |
|---|
| 709 | pars_prev = pars_curr; |
|---|
| 710 | } |
|---|
| 711 | } |
|---|
| 712 | |
|---|
| 713 | { |
|---|
| 714 | arb_suppress_progress ignore; |
|---|
| 715 | rootEdge()->calc_branchlengths(); |
|---|
| 716 | } |
|---|
| 717 | |
|---|
| 718 | ASSERT_VALID_TREE(rootNode()); |
|---|
| 719 | rootNode()->compute_tree(); |
|---|
| 720 | } |
|---|
| 721 | else { |
|---|
| 722 | error = "Tree lost (no leafs left)"; |
|---|
| 723 | } |
|---|
| 724 | |
|---|
| 725 | insertPerf.dump(stdout); |
|---|
| 726 | } |
|---|
| 727 | |
|---|
| 728 | if (hash) GBS_free_hash(hash); |
|---|
| 729 | if (error) aw_message(error); |
|---|
| 730 | |
|---|
| 731 | // @@@ quick-add w/o NNI should sort according to original tree |
|---|
| 732 | agt->reorderTree(BIG_BRANCHES_TO_TOP); |
|---|
| 733 | } |
|---|
| 734 | |
|---|
| 735 | // ------------------------------------------ |
|---|
| 736 | // Adding partial sequences to tree |
|---|
| 737 | |
|---|
| 738 | class PartialSequence { |
|---|
| 739 | GBDATA *gb_species; |
|---|
| 740 | mutable AP_tree_nlen *self; // self converted to leaf (ready for insertion) |
|---|
| 741 | const AP_tree_nlen *best_full_match; // full sequence position which matched best |
|---|
| 742 | long overlap; // size of overlapping region |
|---|
| 743 | long penalty; // weighted mismatches |
|---|
| 744 | bool released; |
|---|
| 745 | bool multi_match; |
|---|
| 746 | string multi_list; // list of equal-rated insertion-points (not containing self) |
|---|
| 747 | |
|---|
| 748 | AP_tree_nlen *get_self() const { |
|---|
| 749 | if (!self) { |
|---|
| 750 | ap_assert(!released); // request not possible, because leaf has already been released! |
|---|
| 751 | |
|---|
| 752 | self = (AP_tree_nlen*)transform_gbd_to_leaf(GBT_get_name_or_description(gb_species), (long)gb_species, NULp); |
|---|
| 753 | ap_assert(self); |
|---|
| 754 | } |
|---|
| 755 | return self; |
|---|
| 756 | } |
|---|
| 757 | |
|---|
| 758 | public: |
|---|
| 759 | PartialSequence(GBDATA *gb_species_) : |
|---|
| 760 | gb_species(gb_species_), |
|---|
| 761 | self(NULp), |
|---|
| 762 | best_full_match(NULp), |
|---|
| 763 | overlap(0), |
|---|
| 764 | penalty(LONG_MAX), |
|---|
| 765 | released(false), |
|---|
| 766 | multi_match(false) |
|---|
| 767 | {} |
|---|
| 768 | PartialSequence(const PartialSequence& other) |
|---|
| 769 | : gb_species(other.gb_species), |
|---|
| 770 | self(other.self), |
|---|
| 771 | best_full_match(other.best_full_match), |
|---|
| 772 | overlap(other.overlap), |
|---|
| 773 | penalty(other.penalty), |
|---|
| 774 | released(other.released), |
|---|
| 775 | multi_match(other.multi_match), |
|---|
| 776 | multi_list(other.multi_list) |
|---|
| 777 | { |
|---|
| 778 | ap_assert(!self); // copying self not implemented |
|---|
| 779 | } |
|---|
| 780 | DECLARE_ASSIGNMENT_OPERATOR(PartialSequence); |
|---|
| 781 | ~PartialSequence() { ap_assert(!self); } |
|---|
| 782 | |
|---|
| 783 | GBDATA *get_species() const { return gb_species; } |
|---|
| 784 | const AP_tree_nlen *get_best_match() const { return best_full_match; } |
|---|
| 785 | AP_FLOAT get_branchlength() const { return AP_FLOAT(penalty)/overlap; } |
|---|
| 786 | void test_match(const AP_tree_nlen *leaf_full); |
|---|
| 787 | bool is_multi_match() const { return multi_match; } |
|---|
| 788 | |
|---|
| 789 | const char *get_name() const { |
|---|
| 790 | const char *name = get_self()->name; |
|---|
| 791 | ap_assert(name); |
|---|
| 792 | return name; |
|---|
| 793 | } |
|---|
| 794 | |
|---|
| 795 | string get_multilist() const { |
|---|
| 796 | ap_assert(is_multi_match()); |
|---|
| 797 | return string(best_full_match->name)+multi_list; |
|---|
| 798 | } |
|---|
| 799 | |
|---|
| 800 | AP_tree_nlen *release() { |
|---|
| 801 | AP_tree_nlen *s = self; |
|---|
| 802 | self = NULp; |
|---|
| 803 | released = true; |
|---|
| 804 | return s; |
|---|
| 805 | } |
|---|
| 806 | |
|---|
| 807 | void dump(const char *whichMatch) const { |
|---|
| 808 | ap_assert(best_full_match); |
|---|
| 809 | printf("%s match for '%s' is '%s' (overlap=%li penalty=%li)\n", |
|---|
| 810 | whichMatch, get_name(), best_full_match->name, |
|---|
| 811 | overlap, penalty); |
|---|
| 812 | } |
|---|
| 813 | |
|---|
| 814 | }; |
|---|
| 815 | |
|---|
| 816 | void PartialSequence::test_match(const AP_tree_nlen *leaf_full) { |
|---|
| 817 | long curr_overlap; |
|---|
| 818 | long curr_penalty; |
|---|
| 819 | |
|---|
| 820 | leaf_full->get_seq()->partial_match(get_self()->get_seq(), &curr_overlap, &curr_penalty); |
|---|
| 821 | |
|---|
| 822 | bool better = false; |
|---|
| 823 | |
|---|
| 824 | if (curr_overlap > overlap) { |
|---|
| 825 | better = true; |
|---|
| 826 | } |
|---|
| 827 | else if (curr_overlap == overlap) { |
|---|
| 828 | if (curr_penalty<penalty) { |
|---|
| 829 | better = true; |
|---|
| 830 | } |
|---|
| 831 | else if (curr_penalty == penalty) { |
|---|
| 832 | // found two equal-rated insertion points -> store data for warning |
|---|
| 833 | #if defined(DEBUG) |
|---|
| 834 | if (!multi_match) dump("better"); |
|---|
| 835 | printf("Another equal match is against '%s' (overlap=%li penalty=%li)\n", leaf_full->name, curr_overlap, curr_penalty); |
|---|
| 836 | #endif // DEBUG |
|---|
| 837 | |
|---|
| 838 | multi_match = true; |
|---|
| 839 | multi_list.append(1, '/'); |
|---|
| 840 | multi_list.append(leaf_full->name); |
|---|
| 841 | } |
|---|
| 842 | } |
|---|
| 843 | |
|---|
| 844 | if (better) { |
|---|
| 845 | overlap = curr_overlap; |
|---|
| 846 | penalty = curr_penalty; |
|---|
| 847 | best_full_match = leaf_full; |
|---|
| 848 | multi_match = false; |
|---|
| 849 | multi_list = ""; |
|---|
| 850 | |
|---|
| 851 | #if defined(DEBUG) |
|---|
| 852 | dump("better"); |
|---|
| 853 | #endif |
|---|
| 854 | } |
|---|
| 855 | #if defined(DEBUG) |
|---|
| 856 | else if (!multi_match) { |
|---|
| 857 | printf("Worse match against '%s' (overlap=%li penalty=%li)\n", leaf_full->name, curr_overlap, curr_penalty); |
|---|
| 858 | } |
|---|
| 859 | #endif |
|---|
| 860 | } |
|---|
| 861 | |
|---|
| 862 | static GB_ERROR nt_best_partial_match_rec(list<PartialSequence>& partial, const AP_tree_nlen *tree) { |
|---|
| 863 | GB_ERROR error = NULp; |
|---|
| 864 | |
|---|
| 865 | if (tree) { |
|---|
| 866 | if (tree->is_leaf() && tree->name) { |
|---|
| 867 | if (tree->gb_node) { |
|---|
| 868 | int is_partial = GBT_is_partial(tree->gb_node, 0, true); // marks undef as 'full sequence' |
|---|
| 869 | if (is_partial == 0) { // do not consider other partial sequences |
|---|
| 870 | list<PartialSequence>::iterator i = partial.begin(); |
|---|
| 871 | list<PartialSequence>::iterator e = partial.end(); |
|---|
| 872 | for (; i != e; ++i) { |
|---|
| 873 | i->test_match(tree); |
|---|
| 874 | } |
|---|
| 875 | } |
|---|
| 876 | else if (is_partial == -1) { |
|---|
| 877 | error = GB_await_error(); |
|---|
| 878 | } |
|---|
| 879 | } |
|---|
| 880 | } |
|---|
| 881 | else { |
|---|
| 882 | error = nt_best_partial_match_rec(partial, tree->get_leftson()); |
|---|
| 883 | if (!error) error = nt_best_partial_match_rec(partial, tree->get_rightson()); |
|---|
| 884 | } |
|---|
| 885 | } |
|---|
| 886 | return error; |
|---|
| 887 | } |
|---|
| 888 | |
|---|
| 889 | static void count_partial_and_full(const AP_tree_nlen *at, int *partial, int *full, int *zombies, int default_value, bool define_if_undef) { |
|---|
| 890 | if (at->is_leaf()) { |
|---|
| 891 | if (at->gb_node) { |
|---|
| 892 | int is_partial = GBT_is_partial(at->gb_node, default_value, define_if_undef); |
|---|
| 893 | if (is_partial) ++(*partial); |
|---|
| 894 | else ++(*full); |
|---|
| 895 | } |
|---|
| 896 | else { |
|---|
| 897 | ++(*zombies); |
|---|
| 898 | } |
|---|
| 899 | } |
|---|
| 900 | else { |
|---|
| 901 | count_partial_and_full(at->get_leftson(), partial, full, zombies, default_value, define_if_undef); |
|---|
| 902 | count_partial_and_full(at->get_rightson(), partial, full, zombies, default_value, define_if_undef); |
|---|
| 903 | } |
|---|
| 904 | } |
|---|
| 905 | |
|---|
| 906 | static const AP_tree_nlen *find_least_deep_leaf(const AP_tree_nlen *at, int depth, int *min_depth) { |
|---|
| 907 | if (depth >= *min_depth) { |
|---|
| 908 | return NULp; // already found better or equal |
|---|
| 909 | } |
|---|
| 910 | |
|---|
| 911 | if (at->is_leaf()) { |
|---|
| 912 | if (at->gb_node) { |
|---|
| 913 | *min_depth = depth; |
|---|
| 914 | return at; |
|---|
| 915 | } |
|---|
| 916 | return NULp; |
|---|
| 917 | } |
|---|
| 918 | |
|---|
| 919 | const AP_tree_nlen *left = find_least_deep_leaf(at->get_leftson(), depth+1, min_depth); |
|---|
| 920 | const AP_tree_nlen *right = find_least_deep_leaf(at->get_rightson(), depth+1, min_depth); |
|---|
| 921 | |
|---|
| 922 | return right ? right : left; |
|---|
| 923 | } |
|---|
| 924 | inline AP_tree_nlen *find_least_deep_leaf(AP_tree_nlen *at, int depth, int *min_depth) { |
|---|
| 925 | return const_cast<AP_tree_nlen*>(find_least_deep_leaf(const_cast<const AP_tree_nlen*>(at), depth, min_depth)); |
|---|
| 926 | } |
|---|
| 927 | |
|---|
| 928 | static void push_partial(const char *, long val, void *cd_partial) { |
|---|
| 929 | list<PartialSequence> *partial = reinterpret_cast<list<PartialSequence> *>(cd_partial); |
|---|
| 930 | partial->push_back(PartialSequence((GBDATA*)val)); |
|---|
| 931 | } |
|---|
| 932 | |
|---|
| 933 | // ------------------------------- |
|---|
| 934 | // Add Partial sequences |
|---|
| 935 | |
|---|
| 936 | static void nt_add_partial(AWT_graphic_parsimony *agt) { |
|---|
| 937 | GB_ERROR error = NULp; |
|---|
| 938 | GBDATA *gb_main = agt->get_gbmain(); |
|---|
| 939 | |
|---|
| 940 | GB_begin_transaction(gb_main); |
|---|
| 941 | |
|---|
| 942 | int full_marked_sequences = 0; |
|---|
| 943 | |
|---|
| 944 | arb_progress part_add_progress("Adding partial sequences"); |
|---|
| 945 | |
|---|
| 946 | { |
|---|
| 947 | list<PartialSequence> partial; |
|---|
| 948 | { |
|---|
| 949 | GB_HASH *partial_hash = GBS_create_hash(GBT_get_species_count(gb_main), GB_MIND_CASE); |
|---|
| 950 | |
|---|
| 951 | int marked_found = 0; |
|---|
| 952 | int partial_marked_sequences = 0; |
|---|
| 953 | int no_data = 0; // no data in alignment |
|---|
| 954 | |
|---|
| 955 | for (GBDATA *gb_marked = GBT_first_marked_species(gb_main); |
|---|
| 956 | !error && gb_marked; |
|---|
| 957 | gb_marked = GBT_next_marked_species(gb_marked)) |
|---|
| 958 | { |
|---|
| 959 | ++marked_found; |
|---|
| 960 | |
|---|
| 961 | if (GBT_find_sequence(gb_marked, ap_main->get_aliname())) { // species has sequence in alignment |
|---|
| 962 | const char *name = GBT_get_name_or_description(gb_marked); |
|---|
| 963 | |
|---|
| 964 | switch (GBT_is_partial(gb_marked, 1, true)) { // marks undef as 'partial sequence' |
|---|
| 965 | case 0: { // full sequences |
|---|
| 966 | GBT_message(gb_main, GBS_global_string("'%s' is a full sequence (cannot add partial)", name)); |
|---|
| 967 | ++full_marked_sequences; |
|---|
| 968 | break; |
|---|
| 969 | } |
|---|
| 970 | case 1: // partial sequences |
|---|
| 971 | ++partial_marked_sequences; |
|---|
| 972 | GBS_write_hash(partial_hash, name, (long)gb_marked); |
|---|
| 973 | break; |
|---|
| 974 | case -1: // error |
|---|
| 975 | error = GB_await_error(); |
|---|
| 976 | break; |
|---|
| 977 | default: |
|---|
| 978 | ap_assert(0); |
|---|
| 979 | break; |
|---|
| 980 | } |
|---|
| 981 | } |
|---|
| 982 | else { |
|---|
| 983 | no_data++; |
|---|
| 984 | } |
|---|
| 985 | } |
|---|
| 986 | |
|---|
| 987 | if (!error && !marked_found) error = "There are no marked species"; |
|---|
| 988 | |
|---|
| 989 | if (!error) { |
|---|
| 990 | NT_remove_species_in_tree_from_hash(rootNode(), partial_hash); // skip all species which are in tree |
|---|
| 991 | GBS_hash_do_const_loop(partial_hash, push_partial, &partial); // build partial list from hash |
|---|
| 992 | |
|---|
| 993 | int partials_already_in_tree = partial_marked_sequences - partial.size(); |
|---|
| 994 | |
|---|
| 995 | if (no_data>0) GBT_message(gb_main, GBS_global_string("%i marked species have no data in '%s'", no_data, ap_main->get_aliname())); |
|---|
| 996 | if (full_marked_sequences>0) GBT_message(gb_main, GBS_global_string("%i marked species are declared full sequences", full_marked_sequences)); |
|---|
| 997 | if (partials_already_in_tree>0) GBT_message(gb_main, GBS_global_string("%i marked species are already in tree", partials_already_in_tree)); |
|---|
| 998 | |
|---|
| 999 | if (partial.empty()) error = "No species left to add"; |
|---|
| 1000 | } |
|---|
| 1001 | |
|---|
| 1002 | GBS_free_hash(partial_hash); |
|---|
| 1003 | } |
|---|
| 1004 | |
|---|
| 1005 | if (!error) error = GBT_add_new_species_changekey(gb_main, "ARB_partial", GB_INT); |
|---|
| 1006 | |
|---|
| 1007 | if (!error) { |
|---|
| 1008 | rootNode()->reset_subtree_layout(); |
|---|
| 1009 | |
|---|
| 1010 | // find best matching full sequence for each partial sequence |
|---|
| 1011 | error = nt_best_partial_match_rec(partial, rootNode()); |
|---|
| 1012 | |
|---|
| 1013 | list<PartialSequence>::iterator i = partial.begin(); |
|---|
| 1014 | list<PartialSequence>::iterator e = partial.end(); |
|---|
| 1015 | |
|---|
| 1016 | arb_progress part_insert_progress(partial.size()); |
|---|
| 1017 | |
|---|
| 1018 | #if defined(DEBUG) |
|---|
| 1019 | // show results : |
|---|
| 1020 | for (; i != e; ++i) i->dump("best"); |
|---|
| 1021 | i = partial.begin(); |
|---|
| 1022 | #endif // DEBUG |
|---|
| 1023 | |
|---|
| 1024 | for (; i != e && !error; ++i) { |
|---|
| 1025 | const char *name = i->get_name(); |
|---|
| 1026 | |
|---|
| 1027 | if (i->is_multi_match()) { |
|---|
| 1028 | GBT_message(gb_main, GBS_global_string("Insertion of '%s' is ambiguous.\n" |
|---|
| 1029 | "(took first of equal scored insertion points: %s)", |
|---|
| 1030 | name, i->get_multilist().c_str())); |
|---|
| 1031 | } |
|---|
| 1032 | |
|---|
| 1033 | AP_tree_nlen *part_leaf = i->release(); |
|---|
| 1034 | AP_tree_nlen *full_seq = const_cast<AP_tree_nlen*>(i->get_best_match()); |
|---|
| 1035 | AP_tree_nlen *brother = full_seq->get_brother(); |
|---|
| 1036 | int is_partial = 0; |
|---|
| 1037 | AP_tree_nlen *target = NULp; |
|---|
| 1038 | |
|---|
| 1039 | if (brother->is_leaf()) { |
|---|
| 1040 | if (brother->gb_node) { |
|---|
| 1041 | is_partial = GBT_is_partial(brother->gb_node, 0, true); |
|---|
| 1042 | |
|---|
| 1043 | if (is_partial) { // brother is partial sequence |
|---|
| 1044 | target = brother; // insert as brother of brother |
|---|
| 1045 | } |
|---|
| 1046 | else { |
|---|
| 1047 | target = full_seq; // insert as brother of full_seq |
|---|
| 1048 | } |
|---|
| 1049 | } |
|---|
| 1050 | else { |
|---|
| 1051 | error = "There are zombies in your tree - please remove them"; |
|---|
| 1052 | } |
|---|
| 1053 | } |
|---|
| 1054 | else { |
|---|
| 1055 | int partial_count = 0; |
|---|
| 1056 | int full_count = 0; |
|---|
| 1057 | int zombie_count = 0; |
|---|
| 1058 | |
|---|
| 1059 | count_partial_and_full(brother, &partial_count, &full_count, &zombie_count, 0, true); |
|---|
| 1060 | |
|---|
| 1061 | if (zombie_count) { |
|---|
| 1062 | error = "There are zombies in your tree - please remove them"; |
|---|
| 1063 | } |
|---|
| 1064 | else if (full_count) { |
|---|
| 1065 | // brother is a subtree containing full sequences |
|---|
| 1066 | // -> add new brother to full_seq found above |
|---|
| 1067 | target = full_seq; |
|---|
| 1068 | } |
|---|
| 1069 | else { // brother subtree only contains partial sequences |
|---|
| 1070 | // find one of the least-deep leafs |
|---|
| 1071 | int depth = INT_MAX; |
|---|
| 1072 | target = find_least_deep_leaf(brother, 0, &depth); |
|---|
| 1073 | is_partial = 1; |
|---|
| 1074 | } |
|---|
| 1075 | } |
|---|
| 1076 | |
|---|
| 1077 | |
|---|
| 1078 | if (!error) { |
|---|
| 1079 | #if defined(DEBUG) |
|---|
| 1080 | printf("inserting '%s'\n", name); |
|---|
| 1081 | #endif // DEBUG |
|---|
| 1082 | part_leaf->insert(target); |
|---|
| 1083 | |
|---|
| 1084 | // we need to create the sequence of the father node! |
|---|
| 1085 | AP_tree_nlen *father = part_leaf->get_father(); |
|---|
| 1086 | father->costs(); |
|---|
| 1087 | |
|---|
| 1088 | // ensure full-sequence is always on top |
|---|
| 1089 | if (father->rightson == target) { |
|---|
| 1090 | father->swap_sons(); |
|---|
| 1091 | } |
|---|
| 1092 | |
|---|
| 1093 | if (!error) { // now correct the branch lengths modified by insert() |
|---|
| 1094 | // calc the original branchlen (of target leaf branch) |
|---|
| 1095 | GBT_LEN orglen = father->get_branchlength()+target->get_branchlength(); |
|---|
| 1096 | |
|---|
| 1097 | if (is_partial) { // we have a subtree of partial sequences |
|---|
| 1098 | target->set_branchlength(orglen); // restore original branchlength |
|---|
| 1099 | father->set_branchlength(0); // all father branches are zero length |
|---|
| 1100 | } |
|---|
| 1101 | else { // we have a subtree of one full+one partial sequence |
|---|
| 1102 | ap_assert(full_seq->get_father() == father); |
|---|
| 1103 | |
|---|
| 1104 | father->set_branchlength(orglen); // father branch represents original length (w/o partial seq) |
|---|
| 1105 | full_seq->set_branchlength(0); // full seq has no sub-branch length |
|---|
| 1106 | } |
|---|
| 1107 | part_leaf->set_branchlength(i->get_branchlength()); |
|---|
| 1108 | printf("Adding with branchlength=%f\n", i->get_branchlength()); |
|---|
| 1109 | } |
|---|
| 1110 | } |
|---|
| 1111 | else { |
|---|
| 1112 | destroy(part_leaf); |
|---|
| 1113 | } |
|---|
| 1114 | |
|---|
| 1115 | part_insert_progress.inc_and_check_user_abort(error); |
|---|
| 1116 | } |
|---|
| 1117 | } |
|---|
| 1118 | } |
|---|
| 1119 | |
|---|
| 1120 | if (full_marked_sequences) { |
|---|
| 1121 | GBT_message(gb_main, GBS_global_string("%i marked full sequences were not added", full_marked_sequences)); |
|---|
| 1122 | } |
|---|
| 1123 | |
|---|
| 1124 | if (error) { |
|---|
| 1125 | GBT_message(gb_main, error); |
|---|
| 1126 | GB_abort_transaction(gb_main); |
|---|
| 1127 | } |
|---|
| 1128 | else { |
|---|
| 1129 | GB_commit_transaction(gb_main); |
|---|
| 1130 | agt->exports.request_save(); |
|---|
| 1131 | } |
|---|
| 1132 | } |
|---|
| 1133 | |
|---|
| 1134 | static void NT_add_partial_and_update(UNFIXED, TREE_canvas *ntw) { |
|---|
| 1135 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1136 | nt_add_partial(AWT_TREE_PARS(ntw)); |
|---|
| 1137 | } |
|---|
| 1138 | |
|---|
| 1139 | // ------------------------------- |
|---|
| 1140 | // add marked / selected |
|---|
| 1141 | |
|---|
| 1142 | static void nt_add_and_update(AWT_canvas *ntw, AddWhat what, bool quick) { |
|---|
| 1143 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1144 | nt_add(AWT_TREE_PARS(ntw), what, quick); |
|---|
| 1145 | } |
|---|
| 1146 | |
|---|
| 1147 | static void NT_add_and_NNI(UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_add_and_update(ntw, what, false); } |
|---|
| 1148 | static void NT_add_quick (UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_add_and_update(ntw, what, true); } |
|---|
| 1149 | |
|---|
| 1150 | // ------------------------------------------ |
|---|
| 1151 | // remove and add marked / selected |
|---|
| 1152 | |
|---|
| 1153 | static void nt_reAdd(AWT_graphic_parsimony *agt, AddWhat what, bool quick) { |
|---|
| 1154 | if (agt->get_root_node()) { |
|---|
| 1155 | ap_assert(what == NT_ADD_MARKED); // code below will misbehave for NT_ADD_SELECTED |
|---|
| 1156 | agt->get_tree_root()->remove_leafs(AWT_REMOVE_MARKED); |
|---|
| 1157 | nt_add(agt, what, quick); |
|---|
| 1158 | } |
|---|
| 1159 | } |
|---|
| 1160 | |
|---|
| 1161 | static void nt_reAdd_and_update(AWT_canvas *ntw, AddWhat what, bool quick) { |
|---|
| 1162 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1163 | nt_reAdd(AWT_TREE_PARS(ntw), what, quick); |
|---|
| 1164 | } |
|---|
| 1165 | |
|---|
| 1166 | static void NT_reAdd_and_NNI(UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_reAdd_and_update(ntw, what, false); } |
|---|
| 1167 | static void NT_reAdd_quick (UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_reAdd_and_update(ntw, what, true); } |
|---|
| 1168 | |
|---|
| 1169 | // -------------------------------------------------------------------------------- |
|---|
| 1170 | |
|---|
| 1171 | static void calc_branchlengths_and_reorder(AWT_graphic_parsimony *agt) { |
|---|
| 1172 | arb_progress progress("Calculating branchlengths"); |
|---|
| 1173 | rootEdge()->calc_branchlengths(); |
|---|
| 1174 | agt->reorderTree(BIG_BRANCHES_TO_TOP); |
|---|
| 1175 | } |
|---|
| 1176 | |
|---|
| 1177 | static void NT_calc_branchlengths_reorder_and_update(AW_window *, TREE_canvas *ntw) { |
|---|
| 1178 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1179 | calc_branchlengths_and_reorder(AWT_TREE_PARS(ntw)); |
|---|
| 1180 | } |
|---|
| 1181 | |
|---|
| 1182 | static void NT_bootstrap(AW_window *, TREE_canvas *ntw, bool limit_only) { |
|---|
| 1183 | arb_progress progress("Calculating bootstrap limit"); |
|---|
| 1184 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1185 | AP_BL_MODE mode = AP_BL_MODE((limit_only ? AP_BL_BOOTSTRAP_LIMIT : AP_BL_BOOTSTRAP_ESTIMATE)|AP_BL_BL_ONLY); |
|---|
| 1186 | |
|---|
| 1187 | rootEdge()->nni_rec(ANY_EDGE, mode, NULp, true); |
|---|
| 1188 | AWT_graphic_tree *agt = AWT_TREE(ntw); |
|---|
| 1189 | agt->reorderTree(BIG_BRANCHES_TO_TOP); |
|---|
| 1190 | agt->set_logical_root_to(agt->get_root_node()); |
|---|
| 1191 | } |
|---|
| 1192 | |
|---|
| 1193 | static void optimizeTree(AWT_graphic_parsimony *agt, const KL_Settings& settings) { |
|---|
| 1194 | arb_progress progress("Optimizing tree"); |
|---|
| 1195 | agt->get_parsimony().optimize_tree(rootNode(), settings, progress); |
|---|
| 1196 | ASSERT_VALID_TREE(rootNode()); |
|---|
| 1197 | calc_branchlengths_and_reorder(agt); |
|---|
| 1198 | } |
|---|
| 1199 | static void NT_optimize(AW_window *, TREE_canvas *ntw) { |
|---|
| 1200 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1201 | optimizeTree(AWT_TREE_PARS(ntw), KL_Settings(ntw->awr)); |
|---|
| 1202 | } |
|---|
| 1203 | |
|---|
| 1204 | static void recursiveNNI(AWT_graphic_parsimony *agt, EdgeSpec whichEdges) { |
|---|
| 1205 | arb_progress progress("Recursive NNI"); |
|---|
| 1206 | Mutations orgPars = rootNode()->costs(); |
|---|
| 1207 | Mutations prevPars = orgPars; |
|---|
| 1208 | progress.subtitle(GBS_global_string("best=%li", orgPars)); |
|---|
| 1209 | |
|---|
| 1210 | { |
|---|
| 1211 | arb_suppress_progress quiet; |
|---|
| 1212 | |
|---|
| 1213 | while (!progress.aborted()) { |
|---|
| 1214 | Mutations currPars = rootEdge()->nni_rec(whichEdges, AP_BL_NNI_ONLY, NULp, true); |
|---|
| 1215 | if (currPars == prevPars) break; // no improvement -> abort |
|---|
| 1216 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", currPars, orgPars-currPars)); |
|---|
| 1217 | prevPars = currPars; |
|---|
| 1218 | } |
|---|
| 1219 | calc_branchlengths_and_reorder(agt); |
|---|
| 1220 | } |
|---|
| 1221 | } |
|---|
| 1222 | |
|---|
| 1223 | static void NT_recursiveNNI(AW_window *, TREE_canvas *ntw) { |
|---|
| 1224 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1225 | EdgeSpec whichEdges = KL_Settings(ntw->awr).whichEdges; |
|---|
| 1226 | recursiveNNI(AWT_TREE_PARS(ntw), whichEdges); |
|---|
| 1227 | } |
|---|
| 1228 | |
|---|
| 1229 | static int calculate_default_random_repeat(long leafs) { |
|---|
| 1230 | double balanced_depth = log10(leafs) / log10(2); |
|---|
| 1231 | int repeat = int(balanced_depth*2.0 + .5); |
|---|
| 1232 | if (repeat<1) repeat = 1; |
|---|
| 1233 | return repeat; |
|---|
| 1234 | } |
|---|
| 1235 | |
|---|
| 1236 | static void update_random_repeat(AW_root *awr, AWT_graphic_parsimony *agt) { |
|---|
| 1237 | long leafs = agt->get_root_node()->count_leafs(); |
|---|
| 1238 | int repeat = calculate_default_random_repeat(leafs); |
|---|
| 1239 | awr->awar(AWAR_RAND_REPEAT)->write_int(repeat); |
|---|
| 1240 | } |
|---|
| 1241 | |
|---|
| 1242 | static void mixtree_and_calclengths(AWT_graphic_parsimony *agt, int repeat, int percent, EdgeSpec whichEdges) { |
|---|
| 1243 | double allBranchProbability = double(repeat)*percent/100.0; // = ~1.0 if each branch is mixed once |
|---|
| 1244 | double phase1_weight = 1.0 - 1.0/(30.0 * allBranchProbability); |
|---|
| 1245 | arb_progress progress(WEIGHTED, "Randomizing tree", phase1_weight); |
|---|
| 1246 | |
|---|
| 1247 | progress.subtitle("mixing"); |
|---|
| 1248 | rootEdge()->mixTree(repeat, percent, whichEdges); |
|---|
| 1249 | ++progress; |
|---|
| 1250 | |
|---|
| 1251 | progress.subtitle("calculating branchlengths"); |
|---|
| 1252 | rootEdge()->calc_branchlengths(); |
|---|
| 1253 | ++progress; |
|---|
| 1254 | |
|---|
| 1255 | agt->exports.request_save(); |
|---|
| 1256 | } |
|---|
| 1257 | |
|---|
| 1258 | static void randomMixTree(AW_window *aww, TREE_canvas *ntw) { |
|---|
| 1259 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1260 | AW_root *awr = aww->get_root(); |
|---|
| 1261 | |
|---|
| 1262 | mixtree_and_calclengths(AWT_TREE_PARS(ntw), awr->awar(AWAR_RAND_REPEAT)->read_int(), awr->awar(AWAR_RAND_PERCENT)->read_int(), KL_Settings(awr).whichEdges); |
|---|
| 1263 | { |
|---|
| 1264 | ARB_edge newRootEdge = rootNode()->get_tree_root()->find_innermost_edge(); |
|---|
| 1265 | newRootEdge.son()->set_root(); |
|---|
| 1266 | } |
|---|
| 1267 | AWT_TREE_PARS(ntw)->reorderTree(BIG_BRANCHES_TO_TOP); |
|---|
| 1268 | } |
|---|
| 1269 | |
|---|
| 1270 | |
|---|
| 1271 | static AWT_config_mapping_def optimizer_config_mapping[] = { |
|---|
| 1272 | { AWAR_OPTI_MARKED_ONLY, "marked_only" }, |
|---|
| 1273 | { AWAR_OPTI_SKIP_FOLDED, "skip_folded" }, |
|---|
| 1274 | |
|---|
| 1275 | // { AWAR_RAND_REPEAT, "rand_repeat" }, // do not store (use treesize-dependent default) |
|---|
| 1276 | { AWAR_RAND_PERCENT, "rand_percent" }, |
|---|
| 1277 | |
|---|
| 1278 | { AWAR_KL_MAXDEPTH, "maxdepth" }, |
|---|
| 1279 | { AWAR_KL_INCDEPTH, "incdepth" }, |
|---|
| 1280 | |
|---|
| 1281 | { AWAR_KL_STATIC_ENABLED, "static" }, |
|---|
| 1282 | { AWAR_KL_STATIC_DEPTH1, "s_depth1" }, |
|---|
| 1283 | { AWAR_KL_STATIC_DEPTH2, "s_depth2" }, |
|---|
| 1284 | { AWAR_KL_STATIC_DEPTH3, "s_depth3" }, |
|---|
| 1285 | { AWAR_KL_STATIC_DEPTH4, "s_depth4" }, |
|---|
| 1286 | { AWAR_KL_STATIC_DEPTH5, "s_depth5" }, |
|---|
| 1287 | |
|---|
| 1288 | { AWAR_KL_DYNAMIC_ENABLED, "dynamic" }, |
|---|
| 1289 | { AWAR_KL_DYNAMIC_START, "start" }, |
|---|
| 1290 | { AWAR_KL_DYNAMIC_MAXX, "maxx" }, |
|---|
| 1291 | { AWAR_KL_DYNAMIC_MAXY, "maxy" }, |
|---|
| 1292 | |
|---|
| 1293 | { NULp, NULp } |
|---|
| 1294 | }; |
|---|
| 1295 | |
|---|
| 1296 | static AWT_predefined_config optimizer_predefined_configs[] = { |
|---|
| 1297 | { |
|---|
| 1298 | "*minimum_static_reduction", |
|---|
| 1299 | "Sets paths allowed by static reduction to maximum\n(causing the minimal reduction)", |
|---|
| 1300 | "s_depth1='8';s_depth2='6';s_depth3='6';s_depth4='6';s_depth5='6';static='1'" // only defines/affects settings related to static path reduction |
|---|
| 1301 | }, |
|---|
| 1302 | { |
|---|
| 1303 | "*whole_tree_level8", |
|---|
| 1304 | "Level-8-optimization of whole tree\n(no path reduction)", |
|---|
| 1305 | "dynamic='0';incdepth='0';marked_only='0';maxdepth='8';skip_folded='0';static='0'" |
|---|
| 1306 | }, |
|---|
| 1307 | { NULp, NULp, NULp } |
|---|
| 1308 | }; |
|---|
| 1309 | |
|---|
| 1310 | static AW_window *createOptimizeWindow(AW_root *aw_root, TREE_canvas *ntw) { |
|---|
| 1311 | AW_window_simple *aws = new AW_window_simple; |
|---|
| 1312 | aws->init(aw_root, "TREE_OPTIMIZE", "Tree optimization"); |
|---|
| 1313 | aws->load_xfig("pars/tree_opti.fig"); |
|---|
| 1314 | |
|---|
| 1315 | aws->at("close"); |
|---|
| 1316 | aws->callback(AW_POPDOWN); |
|---|
| 1317 | aws->create_button("CLOSE", "CLOSE", "C"); |
|---|
| 1318 | |
|---|
| 1319 | aws->at("help"); |
|---|
| 1320 | aws->callback(makeHelpCallback("pa_optimizer.hlp")); |
|---|
| 1321 | aws->create_button("HELP", "HELP", "H"); |
|---|
| 1322 | |
|---|
| 1323 | aws->at("marked"); |
|---|
| 1324 | aws->label("Only subtrees containing marked species"); |
|---|
| 1325 | aws->create_toggle(AWAR_OPTI_MARKED_ONLY); |
|---|
| 1326 | |
|---|
| 1327 | aws->at("folded"); |
|---|
| 1328 | aws->label("Do not modify folded subtrees"); |
|---|
| 1329 | aws->create_toggle(AWAR_OPTI_SKIP_FOLDED); |
|---|
| 1330 | |
|---|
| 1331 | aws->button_length(18); |
|---|
| 1332 | |
|---|
| 1333 | aws->at("rec_nni"); |
|---|
| 1334 | aws->callback(makeWindowCallback(NT_recursiveNNI, ntw)); |
|---|
| 1335 | aws->create_button("REC_NNI", "Recursive NNI", "N"); |
|---|
| 1336 | |
|---|
| 1337 | aws->at("heuristic"); |
|---|
| 1338 | aws->callback(makeWindowCallback(NT_optimize, ntw)); |
|---|
| 1339 | aws->create_button("HEURISTIC", "Heuristic\noptimizer", "H"); |
|---|
| 1340 | |
|---|
| 1341 | aws->at("config"); |
|---|
| 1342 | AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, "treeopti", optimizer_config_mapping, NULp, optimizer_predefined_configs); |
|---|
| 1343 | |
|---|
| 1344 | aws->at("settings"); |
|---|
| 1345 | aws->callback(makeCreateWindowCallback(create_kernighan_properties_window)); |
|---|
| 1346 | aws->create_button("SETTINGS", "Settings", "S"); |
|---|
| 1347 | |
|---|
| 1348 | aws->at("randomize"); |
|---|
| 1349 | aws->callback(makeWindowCallback(randomMixTree, ntw)); |
|---|
| 1350 | aws->create_button("RANDOMIZE", "Randomize tree", "R"); |
|---|
| 1351 | |
|---|
| 1352 | aws->button_length(5); |
|---|
| 1353 | |
|---|
| 1354 | aws->at("repeat"); aws->create_input_field(AWAR_RAND_REPEAT); |
|---|
| 1355 | aws->at("percent"); aws->create_input_field(AWAR_RAND_PERCENT); |
|---|
| 1356 | |
|---|
| 1357 | return aws; |
|---|
| 1358 | } |
|---|
| 1359 | |
|---|
| 1360 | // ----------------------- |
|---|
| 1361 | // test functions |
|---|
| 1362 | |
|---|
| 1363 | #if defined(TESTMENU) |
|---|
| 1364 | static void refreshTree(AWT_canvas *ntw) { |
|---|
| 1365 | GB_transaction ta(ntw->gb_main); |
|---|
| 1366 | AWT_auto_refresh allowed_on(ntw); |
|---|
| 1367 | ntw->request_save_and_zoom_reset(); |
|---|
| 1368 | } |
|---|
| 1369 | |
|---|
| 1370 | static void setBranchlens(AP_tree_nlen *node, double newLen) { |
|---|
| 1371 | node->setBranchlen(newLen, newLen); |
|---|
| 1372 | |
|---|
| 1373 | if (!node->is_leaf()) { |
|---|
| 1374 | setBranchlens(node->get_leftson(), newLen); |
|---|
| 1375 | setBranchlens(node->get_rightson(), newLen); |
|---|
| 1376 | } |
|---|
| 1377 | } |
|---|
| 1378 | |
|---|
| 1379 | static void TESTMENU_setBranchlen(AW_window *, AWT_canvas *ntw) { |
|---|
| 1380 | AP_tree_nlen *root = rootNode(); |
|---|
| 1381 | |
|---|
| 1382 | setBranchlens(root, 1.0); |
|---|
| 1383 | refreshTree(ntw); |
|---|
| 1384 | } |
|---|
| 1385 | |
|---|
| 1386 | static void TESTMENU_treeStats(AW_window *) { |
|---|
| 1387 | ARB_tree_info tinfo; |
|---|
| 1388 | AP_tree_nlen *root = rootNode(); |
|---|
| 1389 | |
|---|
| 1390 | if (root) { |
|---|
| 1391 | { |
|---|
| 1392 | GB_transaction ta(root->get_tree_root()->get_gb_main()); |
|---|
| 1393 | root->calcTreeInfo(tinfo); |
|---|
| 1394 | } |
|---|
| 1395 | |
|---|
| 1396 | puts("Tree stats:"); |
|---|
| 1397 | |
|---|
| 1398 | printf("nodes =%6zu\n", tinfo.nodes()); |
|---|
| 1399 | printf(" inner =%6zu\n", tinfo.innerNodes); |
|---|
| 1400 | printf(" groups =%6zu\n", tinfo.groups); |
|---|
| 1401 | printf(" leafs =%6zu\n", tinfo.leafs); |
|---|
| 1402 | printf(" unlinked =%6zu (zombies?)\n", tinfo.unlinked); |
|---|
| 1403 | printf(" linked =%6zu\n", tinfo.linked()); |
|---|
| 1404 | printf(" marked =%6zu\n", tinfo.marked); |
|---|
| 1405 | } |
|---|
| 1406 | else { |
|---|
| 1407 | puts("No tree"); |
|---|
| 1408 | } |
|---|
| 1409 | } |
|---|
| 1410 | |
|---|
| 1411 | static void TESTMENU_sortTreeByName(AW_window *, AWT_canvas *ntw) { |
|---|
| 1412 | AP_tree_nlen *root = rootNode(); |
|---|
| 1413 | |
|---|
| 1414 | root->sortByName(); |
|---|
| 1415 | refreshTree(ntw); |
|---|
| 1416 | } |
|---|
| 1417 | |
|---|
| 1418 | static void init_TEST_menu(AW_window_menu_modes *awm, AWT_canvas *ntw) { |
|---|
| 1419 | awm->create_menu("Test[debug]", "g", AWM_ALL); |
|---|
| 1420 | |
|---|
| 1421 | awm->insert_menu_topic("treestat", "Tree statistics", "s", "", AWM_ALL, TESTMENU_treeStats); |
|---|
| 1422 | awm->insert_menu_topic("setlens", "Set branchlens", "b", "", AWM_ALL, makeWindowCallback(TESTMENU_setBranchlen, ntw)); |
|---|
| 1423 | awm->insert_menu_topic("sorttreebyname", "Sort tree by name", "o", "", AWM_ALL, makeWindowCallback(TESTMENU_sortTreeByName, ntw)); |
|---|
| 1424 | } |
|---|
| 1425 | #endif // TESTMENU |
|---|
| 1426 | |
|---|
| 1427 | static GB_ERROR pars_check_size(AW_root *awr, GB_ERROR& warning, const adfiltercbstruct *filterDef) { |
|---|
| 1428 | GB_ERROR error = NULp; |
|---|
| 1429 | warning = NULp; |
|---|
| 1430 | |
|---|
| 1431 | char *tree_name = awr->awar(AWAR_TREE)->read_string(); |
|---|
| 1432 | char *filter = awr->awar(filterDef->def_filter)->read_string(); |
|---|
| 1433 | long ali_len = 0; |
|---|
| 1434 | |
|---|
| 1435 | if (strlen(filter)) { |
|---|
| 1436 | int i; |
|---|
| 1437 | for (i=0; filter[i]; i++) { |
|---|
| 1438 | if (filter[i] != '0') ali_len++; |
|---|
| 1439 | } |
|---|
| 1440 | } |
|---|
| 1441 | else { |
|---|
| 1442 | char *ali_name = awr->awar(AWAR_ALIGNMENT)->read_string(); |
|---|
| 1443 | ali_len = GBT_get_alignment_len(ap_main->get_gb_main(), ali_name); |
|---|
| 1444 | if (ali_len<=0) { |
|---|
| 1445 | error = "Please select a valid alignment"; |
|---|
| 1446 | GB_clear_error(); |
|---|
| 1447 | } |
|---|
| 1448 | free(ali_name); |
|---|
| 1449 | } |
|---|
| 1450 | |
|---|
| 1451 | if (!error) { |
|---|
| 1452 | long tree_size = GBT_size_of_tree(ap_main->get_gb_main(), tree_name); |
|---|
| 1453 | if (tree_size == -1) { |
|---|
| 1454 | error = "Please select an existing tree"; |
|---|
| 1455 | } |
|---|
| 1456 | else { |
|---|
| 1457 | size_t expected_memuse = (ali_len * tree_size * 4 / 1024); |
|---|
| 1458 | if (expected_memuse > GB_get_usable_memory()) { |
|---|
| 1459 | warning = GBS_global_string("Estimated memory usage (%s) exceeds physical memory (will swap)\n" |
|---|
| 1460 | "(did you specify a filter?)", |
|---|
| 1461 | GBS_readable_size(expected_memuse, "b")); |
|---|
| 1462 | } |
|---|
| 1463 | } |
|---|
| 1464 | } |
|---|
| 1465 | |
|---|
| 1466 | free(filter); |
|---|
| 1467 | free(tree_name); |
|---|
| 1468 | |
|---|
| 1469 | ap_assert(!GB_have_error()); |
|---|
| 1470 | return error; |
|---|
| 1471 | } |
|---|
| 1472 | |
|---|
| 1473 | static void pars_reset_optimal_parsimony(AW_window *aww) { |
|---|
| 1474 | AW_root *awr = aww->get_root(); |
|---|
| 1475 | awr->awar(AWAR_BEST_PARSIMONY)->write_int(awr->awar(AWAR_PARSIMONY)->read_int()); |
|---|
| 1476 | } |
|---|
| 1477 | |
|---|
| 1478 | class LowDataCheck { |
|---|
| 1479 | int leafs; // counts leafs with insufficiant data |
|---|
| 1480 | int inner; // same for inner nodes |
|---|
| 1481 | |
|---|
| 1482 | public: |
|---|
| 1483 | LowDataCheck() : leafs(0), inner(0) {} |
|---|
| 1484 | |
|---|
| 1485 | void count(AP_tree_nlen *node); |
|---|
| 1486 | |
|---|
| 1487 | int get_leafs() const { return leafs; } |
|---|
| 1488 | int get_inner() const { return inner; } |
|---|
| 1489 | }; |
|---|
| 1490 | |
|---|
| 1491 | void LowDataCheck::count(AP_tree_nlen *node) { |
|---|
| 1492 | const AP_combinableSeq *seq = node->get_seq(); |
|---|
| 1493 | AP_FLOAT bases = seq->weighted_base_count(); |
|---|
| 1494 | |
|---|
| 1495 | if (node->is_leaf()) { |
|---|
| 1496 | if (bases<MIN_SEQUENCE_LENGTH) ++leafs; |
|---|
| 1497 | } |
|---|
| 1498 | else { |
|---|
| 1499 | if (bases<MIN_SEQUENCE_LENGTH) ++inner; |
|---|
| 1500 | |
|---|
| 1501 | count(node->get_leftson()); |
|---|
| 1502 | count(node->get_rightson()); |
|---|
| 1503 | } |
|---|
| 1504 | } |
|---|
| 1505 | |
|---|
| 1506 | static void PARS_infomode_cb(UNFIXED, TREE_canvas *canvas, AWT_COMMAND_MODE mode) { |
|---|
| 1507 | AWT_trigger_remote_action(NULp, canvas->gb_main, "ARB_NT:species_info"); |
|---|
| 1508 | nt_mode_event(NULp, canvas, mode); |
|---|
| 1509 | } |
|---|
| 1510 | |
|---|
| 1511 | static void pars_start_cb(AW_window *aw_parent, WeightedFilter *wfilt, const PARS_commands *cmds) { |
|---|
| 1512 | ModRLimit increase_stacksize(RLIMIT_STACK, TREEDISP_STACKSIZE); |
|---|
| 1513 | |
|---|
| 1514 | AW_root *awr = aw_parent->get_root(); |
|---|
| 1515 | GBDATA *gb_main = ap_main->get_gb_main(); |
|---|
| 1516 | GB_begin_transaction(gb_main); |
|---|
| 1517 | { |
|---|
| 1518 | GB_ERROR warning; |
|---|
| 1519 | GB_ERROR error = pars_check_size(awr, warning, wfilt->get_adfiltercbstruct()); |
|---|
| 1520 | |
|---|
| 1521 | if (warning && !error) { |
|---|
| 1522 | char *question = GBS_global_string_copy("%s\nDo you want to continue?", warning); |
|---|
| 1523 | bool cont = aw_ask_sure("swap_warning", question); |
|---|
| 1524 | free(question); |
|---|
| 1525 | |
|---|
| 1526 | if (!cont) error = "User abort"; |
|---|
| 1527 | |
|---|
| 1528 | } |
|---|
| 1529 | |
|---|
| 1530 | if (!error) { |
|---|
| 1531 | // freeze value of GBT_get_default_alignment to 'ali_name': |
|---|
| 1532 | const char *ali_name = awr->awar(AWAR_ALIGNMENT)->read_char_pntr(); |
|---|
| 1533 | error = GBT_set_startup_alignment(ap_main->get_gb_main(), ali_name); |
|---|
| 1534 | } |
|---|
| 1535 | |
|---|
| 1536 | if (error) { |
|---|
| 1537 | aw_message(error); |
|---|
| 1538 | GB_commit_transaction(gb_main); |
|---|
| 1539 | return; |
|---|
| 1540 | } |
|---|
| 1541 | } |
|---|
| 1542 | |
|---|
| 1543 | |
|---|
| 1544 | AW_window_menu_modes *awm = new AW_window_menu_modes; |
|---|
| 1545 | awm->init(awr, "ARB_PARSIMONY", "ARB_PARSIMONY", 400, 200); |
|---|
| 1546 | |
|---|
| 1547 | GLOBAL_PARS->generate_tree(wfilt); |
|---|
| 1548 | |
|---|
| 1549 | TREE_canvas *ntw; |
|---|
| 1550 | { |
|---|
| 1551 | AP_tree_display_style prev_style = global_tree()->get_tree_style(); |
|---|
| 1552 | global_tree()->set_tree_style(AP_LIST_SIMPLE, NULp); // avoid NDS warnings during startup |
|---|
| 1553 | ntw = new TREE_canvas(gb_main, awm, awm->get_window_id(), global_tree(), awr->awar(AWAR_TREE)); |
|---|
| 1554 | global_tree()->set_tree_style(prev_style, ntw); |
|---|
| 1555 | } |
|---|
| 1556 | |
|---|
| 1557 | { |
|---|
| 1558 | GB_ERROR error = NULp; |
|---|
| 1559 | arb_progress progress("loading tree"); |
|---|
| 1560 | NT_reload_tree_event(awr, ntw, false); // load tree (but do not expose - first zombies need to be removed) |
|---|
| 1561 | if (!global_tree()->get_root_node()) { |
|---|
| 1562 | error = "Failed to load the selected tree"; |
|---|
| 1563 | } |
|---|
| 1564 | else { |
|---|
| 1565 | AP_tree_edge::initialize(rootNode()); // builds edges |
|---|
| 1566 | long removed = global_tree_root()->remove_leafs(AWT_REMOVE_ZOMBIES); |
|---|
| 1567 | |
|---|
| 1568 | PARS_tree_init(global_tree()); |
|---|
| 1569 | removed += global_tree_root()->remove_leafs(AWT_RemoveType(AWT_REMOVE_ZOMBIES | AWT_REMOVE_NO_SEQUENCE)); |
|---|
| 1570 | |
|---|
| 1571 | if (!global_tree()->get_root_node()) { |
|---|
| 1572 | const char *aliname = global_tree_root()->get_aliview()->get_aliname(); |
|---|
| 1573 | error = GBS_global_string("Less than 2 species contain data in '%s'\n" |
|---|
| 1574 | "Tree vanished", aliname); |
|---|
| 1575 | } |
|---|
| 1576 | else if (removed) { |
|---|
| 1577 | aw_message(GBS_global_string("Removed %li leafs (zombies or species w/o data in alignment)", removed)); |
|---|
| 1578 | } |
|---|
| 1579 | |
|---|
| 1580 | error = GB_end_transaction(ntw->gb_main, error); |
|---|
| 1581 | if (!error) { |
|---|
| 1582 | progress.subtitle("Calculating inner nodes"); |
|---|
| 1583 | GLOBAL_PARS->get_root_node()->costs(); |
|---|
| 1584 | |
|---|
| 1585 | progress.subtitle("Checking amount of data"); |
|---|
| 1586 | LowDataCheck lowData; |
|---|
| 1587 | lowData.count(GLOBAL_PARS->get_root_node()); |
|---|
| 1588 | |
|---|
| 1589 | bool warned = false; |
|---|
| 1590 | if (lowData.get_inner()>0) { |
|---|
| 1591 | aw_message(GBS_global_string("Inner nodes with insufficient data: %i", lowData.get_inner())); |
|---|
| 1592 | warned = true; |
|---|
| 1593 | } |
|---|
| 1594 | if (lowData.get_leafs()>0) { |
|---|
| 1595 | aw_message(GBS_global_string("Species with insufficient data: %i", lowData.get_leafs())); |
|---|
| 1596 | warned = true; |
|---|
| 1597 | } |
|---|
| 1598 | if (warned) { |
|---|
| 1599 | aw_message("Warning: low sequence data (<" stringize_pscan(MIN_SEQUENCE_LENGTH) " bp) detected! (filter too restrictive?)"); |
|---|
| 1600 | } |
|---|
| 1601 | } |
|---|
| 1602 | } |
|---|
| 1603 | if (error) aw_popup_exit(error); |
|---|
| 1604 | } |
|---|
| 1605 | |
|---|
| 1606 | if (cmds->add_marked) NT_add_quick(NULp, ntw, NT_ADD_MARKED); |
|---|
| 1607 | if (cmds->add_selected) NT_add_quick(NULp, ntw, NT_ADD_SELECTED); |
|---|
| 1608 | if (cmds->calc_branch_lengths) NT_calc_branchlengths_reorder_and_update(awm, ntw); |
|---|
| 1609 | if (cmds->calc_bootstrap) NT_bootstrap(awm, ntw, 0); |
|---|
| 1610 | if (cmds->quit) pars_exit(awm); |
|---|
| 1611 | |
|---|
| 1612 | GB_transaction ta(ntw->gb_main); |
|---|
| 1613 | |
|---|
| 1614 | #if defined(DEBUG) |
|---|
| 1615 | AWT_create_debug_menu(awm); |
|---|
| 1616 | #endif // DEBUG |
|---|
| 1617 | |
|---|
| 1618 | awm->create_menu("File", "F", AWM_ALL); |
|---|
| 1619 | { |
|---|
| 1620 | insert_macro_menu_entry(awm, false); |
|---|
| 1621 | awm->insert_menu_topic("print_tree", "Print Tree ...", "P", "tree2prt.hlp", AWM_ALL, makeWindowCallback(AWT_popup_print_window, static_cast<AWT_canvas*>(ntw))); |
|---|
| 1622 | awm->insert_menu_topic("quit", "Quit", "Q", "quit.hlp", AWM_ALL, pars_exit); |
|---|
| 1623 | } |
|---|
| 1624 | |
|---|
| 1625 | awm->create_menu("Species", "S", AWM_ALL); |
|---|
| 1626 | { |
|---|
| 1627 | NT_insert_mark_submenus(awm, ntw, 0); |
|---|
| 1628 | |
|---|
| 1629 | } |
|---|
| 1630 | awm->create_menu("Tree", "T", AWM_ALL); |
|---|
| 1631 | { |
|---|
| 1632 | |
|---|
| 1633 | awm->insert_menu_topic("nds", "NDS (Node Display Setup) ...", "N", "props_nds.hlp", AWM_ALL, makeCreateWindowCallback(NDS_create_window, ntw->gb_main)); |
|---|
| 1634 | |
|---|
| 1635 | awm->sep______________(); |
|---|
| 1636 | awm->insert_menu_topic("tree_print", "Print tree ...", "P", "tree2prt.hlp", AWM_ALL, makeWindowCallback(AWT_popup_print_window, static_cast<AWT_canvas*>(ntw))); |
|---|
| 1637 | awm->insert_menu_topic("tree_2_xfig", "Export tree to XFIG ...", "F", "tree2file.hlp", AWM_ALL, makeWindowCallback(AWT_popup_tree_export_window, static_cast<AWT_canvas*>(ntw))); |
|---|
| 1638 | awm->sep______________(); |
|---|
| 1639 | NT_insert_collapse_submenu(awm, ntw); |
|---|
| 1640 | awm->sep______________(); |
|---|
| 1641 | awm->insert_sub_menu("Remove Species from Tree", "R"); |
|---|
| 1642 | { |
|---|
| 1643 | awm->insert_menu_topic("tree_remove_deleted", "Remove Zombies", "Z", "trm_del.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_REMOVE_ZOMBIES)); |
|---|
| 1644 | awm->insert_menu_topic("tree_remove_marked", "Remove Marked", "M", "trm_mrkd.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_REMOVE_MARKED)); |
|---|
| 1645 | awm->insert_menu_topic("tree_keep_marked", "Keep Marked", "K", "tkeep_mrkd.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_KEEP_MARKED)); |
|---|
| 1646 | } |
|---|
| 1647 | awm->close_sub_menu(); |
|---|
| 1648 | awm->insert_sub_menu("Add Species to Tree", "A"); |
|---|
| 1649 | { |
|---|
| 1650 | awm->insert_menu_topic("add_marked", "Add Marked Species", "M", "pa_quick.hlp", AWM_ALL, makeWindowCallback(NT_add_quick, ntw, NT_ADD_MARKED)); |
|---|
| 1651 | awm->insert_menu_topic("add_marked_nni", "Add Marked Species + Local Optimization (NNI)", "N", "pa_add.hlp", AWM_ALL, makeWindowCallback(NT_add_and_NNI, ntw, NT_ADD_MARKED)); |
|---|
| 1652 | awm->insert_menu_topic("rm_add_marked", "Remove & Add Marked Species", "R", "pa_quick.hlp", AWM_ALL, makeWindowCallback(NT_reAdd_quick, ntw, NT_ADD_MARKED)); |
|---|
| 1653 | awm->insert_menu_topic("rm_add_marked_nni|", "Remove & Add Marked + Local Optimization (NNI)", "L", "pa_add.hlp", AWM_ALL, makeWindowCallback(NT_reAdd_and_NNI, ntw, NT_ADD_MARKED)); |
|---|
| 1654 | awm->sep______________(); |
|---|
| 1655 | awm->insert_menu_topic("add_marked_partial", "Add Marked Partial Species", "P", "pa_partial.hlp", AWM_ALL, makeWindowCallback(NT_add_partial_and_update, ntw)); |
|---|
| 1656 | awm->sep______________(); |
|---|
| 1657 | awm->insert_menu_topic("add_selected", "Add Selected Species", "S", "pa_quick.hlp", AWM_ALL, makeWindowCallback(NT_add_quick, ntw, NT_ADD_SELECTED)); |
|---|
| 1658 | awm->insert_menu_topic("add_selected_nni", "Add Selected Species + Local Optimization (NNI)", "O", "pa_add.hlp", AWM_ALL, makeWindowCallback(NT_add_and_NNI, ntw, NT_ADD_SELECTED)); |
|---|
| 1659 | } |
|---|
| 1660 | awm->close_sub_menu(); |
|---|
| 1661 | awm->sep______________(); |
|---|
| 1662 | awm->insert_menu_topic("optimize", "Tree Optimization ...", "O", "pa_optimizer.hlp", AWM_ALL, makeCreateWindowCallback(createOptimizeWindow, ntw)); |
|---|
| 1663 | awm->insert_menu_topic("reset", "Reset optimal parsimony", "s", "pa_reset.hlp", AWM_ALL, pars_reset_optimal_parsimony); |
|---|
| 1664 | awm->sep______________(); |
|---|
| 1665 | awm->insert_menu_topic("beautify_tree", "Beautify Tree", "B", "resorttree.hlp", AWM_ALL, makeWindowCallback(NT_resort_tree_cb, ntw, BIG_BRANCHES_TO_TOP)); |
|---|
| 1666 | awm->insert_menu_topic("calc_branch_lengths", "Calculate Branch Lengths", "L", "pa_branchlengths.hlp", AWM_ALL, makeWindowCallback(NT_calc_branchlengths_reorder_and_update, ntw)); |
|---|
| 1667 | awm->sep______________(); |
|---|
| 1668 | awm->insert_menu_topic("calc_upper_bootstrap_indep", "Calculate Upper Bootstrap Limit (dependent NNI)", "U", "pa_bootstrap.hlp", AWM_ALL, makeWindowCallback(NT_bootstrap, ntw, false)); |
|---|
| 1669 | awm->insert_menu_topic("calc_upper_bootstrap_dep", "Calculate Upper Bootstrap Limit (independent NNI)", "i", "pa_bootstrap.hlp", AWM_ALL, makeWindowCallback(NT_bootstrap, ntw, true)); |
|---|
| 1670 | awm->insert_menu_topic("tree_remove_remark", "Remove bootstrap values", "v", "trm_boot.hlp", AWM_ALL, makeWindowCallback(NT_remove_bootstrap, ntw)); |
|---|
| 1671 | } |
|---|
| 1672 | |
|---|
| 1673 | #if defined(TESTMENU) |
|---|
| 1674 | init_TEST_menu(awm, ntw); |
|---|
| 1675 | #endif // TESTMENU |
|---|
| 1676 | |
|---|
| 1677 | awm->create_menu("Reset", "R", AWM_ALL); |
|---|
| 1678 | { |
|---|
| 1679 | awm->insert_menu_topic("reset_logical_zoom", "Logical Zoom", "L", "rst_log_zoom.hlp", AWM_ALL, makeWindowCallback(NT_reset_lzoom_cb, ntw)); |
|---|
| 1680 | awm->insert_menu_topic("reset_physical_zoom", "Physical Zoom", "P", "rst_phys_zoom.hlp", AWM_ALL, makeWindowCallback(NT_reset_pzoom_cb, ntw)); |
|---|
| 1681 | } |
|---|
| 1682 | |
|---|
| 1683 | awm->create_menu("Properties", "P", AWM_ALL); |
|---|
| 1684 | { |
|---|
| 1685 | awm->insert_menu_topic("props_menu", "Frame settings ...", "F", "props_frame.hlp", AWM_ALL, AW_preset_window); |
|---|
| 1686 | awm->insert_menu_topic("props_tree2", "Tree options", "o", "nt_tree_settings.hlp", AWM_ALL, TREE_create_settings_window); |
|---|
| 1687 | awm->insert_menu_topic("props_tree", "Tree colors & fonts", "c", "color_props.hlp", AWM_ALL, makeCreateWindowCallback(AW_create_gc_window, ntw->gc_manager)); |
|---|
| 1688 | awm->insert_menu_topic("props_kl", "Optimizer settings (KL)", "K", "kernlin.hlp", AWM_ALL, makeCreateWindowCallback(create_kernighan_properties_window)); |
|---|
| 1689 | awm->sep______________(); |
|---|
| 1690 | AW_insert_common_property_menu_entries(awm); |
|---|
| 1691 | awm->sep______________(); |
|---|
| 1692 | awm->insert_menu_topic("save_props", "Save Defaults (pars.arb)", "D", "savedef.hlp", AWM_ALL, AW_save_properties); |
|---|
| 1693 | } |
|---|
| 1694 | awm->button_length(5); |
|---|
| 1695 | |
|---|
| 1696 | awm->insert_help_topic("ARB_PARSIMONY help", "P", "arb_pars.hlp", AWM_ALL, makeHelpCallback("arb_pars.hlp")); |
|---|
| 1697 | |
|---|
| 1698 | // ---------------------- |
|---|
| 1699 | // mode buttons |
|---|
| 1700 | // |
|---|
| 1701 | // keep them synchronized as far as possible with those in ARB_PARSIMONY |
|---|
| 1702 | // see ../NTREE/NT_extern.cxx@keepModesSynchronized |
|---|
| 1703 | |
|---|
| 1704 | awm->create_mode("mode_select.xpm", "mode_select.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SELECT)); |
|---|
| 1705 | awm->create_mode("mode_mark.xpm", "mode_mark.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_MARK)); |
|---|
| 1706 | awm->create_mode("mode_group.xpm", "mode_group.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_GROUP)); |
|---|
| 1707 | awm->create_mode("mode_zoom.xpm", "mode_pzoom.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_ZOOM)); |
|---|
| 1708 | awm->create_mode("mode_lzoom.xpm", "mode_lzoom.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_LZOOM)); |
|---|
| 1709 | |
|---|
| 1710 | awm->create_mode("mode_info.xpm", "mode_info.hlp", AWM_ALL, makeWindowCallback(PARS_infomode_cb, ntw, AWT_MODE_INFO)); |
|---|
| 1711 | // reserve mode-locations (to put the modes below at the same position as in ARB_NT) |
|---|
| 1712 | awm->create_mode("mode_empty.xpm", "mode.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_EMPTY)); |
|---|
| 1713 | |
|---|
| 1714 | // topology-modification-modes |
|---|
| 1715 | awm->create_mode("mode_setroot.xpm", "mode_setroot.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SETROOT)); |
|---|
| 1716 | awm->create_mode("mode_swap.xpm", "mode_swap.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SWAP)); |
|---|
| 1717 | awm->create_mode("mode_move.xpm", "mode_move.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_MOVE)); |
|---|
| 1718 | |
|---|
| 1719 | awm->create_mode("mode_nni.xpm", "mode_nni.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_NNI)); |
|---|
| 1720 | awm->create_mode("mode_kernlin.xpm", "mode_kernlin.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_KERNINGHAN)); |
|---|
| 1721 | awm->create_mode("mode_optimize.xpm", "mode_optimize.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_OPTIMIZE)); |
|---|
| 1722 | |
|---|
| 1723 | awm->at(5, 2); |
|---|
| 1724 | awm->auto_space(0, -2); |
|---|
| 1725 | awm->shadow_width(1); |
|---|
| 1726 | |
|---|
| 1727 | |
|---|
| 1728 | int db_treex, db_treey; |
|---|
| 1729 | awm->get_at_position(&db_treex, &db_treey); |
|---|
| 1730 | awm->callback(makeHelpCallback("nt_tree_select.hlp")); |
|---|
| 1731 | awm->button_length(16); |
|---|
| 1732 | awm->help_text("nt_tree_select.hlp"); |
|---|
| 1733 | awm->create_button(NULp, AWAR_TREE); |
|---|
| 1734 | |
|---|
| 1735 | |
|---|
| 1736 | int db_stackx, db_stacky; |
|---|
| 1737 | awm->label_length(8); |
|---|
| 1738 | awm->label("Stored"); |
|---|
| 1739 | awm->get_at_position(&db_stackx, &db_stacky); |
|---|
| 1740 | awm->button_length(6); |
|---|
| 1741 | awm->callback(makeHelpCallback("ap_stack.hlp")); |
|---|
| 1742 | awm->help_text("ap_stack.hlp"); |
|---|
| 1743 | awm->create_button(NULp, AWAR_STACKPOINTER); |
|---|
| 1744 | |
|---|
| 1745 | int db_parsx, db_parsy; |
|---|
| 1746 | awm->label_length(14); |
|---|
| 1747 | awm->label("Current Pars:"); |
|---|
| 1748 | awm->get_at_position(&db_parsx, &db_parsy); |
|---|
| 1749 | |
|---|
| 1750 | awm->button_length(10); |
|---|
| 1751 | awm->create_button(NULp, AWAR_PARSIMONY, NULp, "+"); |
|---|
| 1752 | |
|---|
| 1753 | awm->button_length(0); |
|---|
| 1754 | |
|---|
| 1755 | awm->callback(makeWindowCallback(NT_jump_cb, ntw, AP_JUMP_BY_BUTTON)); |
|---|
| 1756 | awm->help_text("tr_jump.hlp"); |
|---|
| 1757 | awm->create_button("JUMP", "Jump"); |
|---|
| 1758 | |
|---|
| 1759 | awm->callback(makeHelpCallback("arb_pars.hlp")); |
|---|
| 1760 | awm->help_text("help.hlp"); |
|---|
| 1761 | awm->create_button("HELP", "HELP", "H"); |
|---|
| 1762 | |
|---|
| 1763 | awm->at_newline(); |
|---|
| 1764 | |
|---|
| 1765 | awm->button_length(8); |
|---|
| 1766 | awm->at_x(db_stackx); |
|---|
| 1767 | awm->callback(makeWindowCallback(AP_user_pop_cb, ntw)); |
|---|
| 1768 | awm->help_text("ap_stack.hlp"); |
|---|
| 1769 | awm->create_button("POP", "RESTORE"); |
|---|
| 1770 | |
|---|
| 1771 | awm->button_length(6); |
|---|
| 1772 | awm->callback(AP_user_push_cb); |
|---|
| 1773 | awm->help_text("ap_stack.hlp"); |
|---|
| 1774 | awm->create_button("PUSH", "STORE"); |
|---|
| 1775 | |
|---|
| 1776 | awm->at_x(db_parsx); |
|---|
| 1777 | awm->label_length(14); |
|---|
| 1778 | awm->label("Optimal Pars:"); |
|---|
| 1779 | |
|---|
| 1780 | awm->button_length(10); |
|---|
| 1781 | awm->create_button(NULp, AWAR_BEST_PARSIMONY, NULp, "+"); |
|---|
| 1782 | |
|---|
| 1783 | awm->button_length(0); |
|---|
| 1784 | awm->auto_space(0, -2); |
|---|
| 1785 | |
|---|
| 1786 | awm->at_x(db_treex); |
|---|
| 1787 | awm->callback(makeWindowCallback(NT_set_tree_style, ntw, AP_TREE_RADIAL)); |
|---|
| 1788 | awm->help_text("tr_type_radial.hlp"); |
|---|
| 1789 | awm->create_button("RADIAL_TREE", "#radial.xpm"); |
|---|
| 1790 | |
|---|
| 1791 | awm->callback(makeWindowCallback(NT_set_tree_style, ntw, AP_TREE_NORMAL)); |
|---|
| 1792 | awm->help_text("tr_type_dendro.hlp"); |
|---|
| 1793 | awm->create_button("LIST_TREE", "#dendro.xpm"); |
|---|
| 1794 | |
|---|
| 1795 | awm->at_newline(); |
|---|
| 1796 | awm->at(db_treex, awm->get_at_yposition()); |
|---|
| 1797 | |
|---|
| 1798 | { |
|---|
| 1799 | SmartPtr<AW_at_storage> maxSize(AW_at_storage::make(awm, AW_AT_MAXSIZE)); |
|---|
| 1800 | |
|---|
| 1801 | awm->button_length(AWAR_FOOTER_MAX_LEN); |
|---|
| 1802 | awm->create_button(NULp, AWAR_FOOTER); |
|---|
| 1803 | awm->at_newline(); |
|---|
| 1804 | awm->restore_at_from(*maxSize); |
|---|
| 1805 | } |
|---|
| 1806 | |
|---|
| 1807 | awm->get_at_position(&db_treex, &db_treey); |
|---|
| 1808 | awm->set_info_area_height(db_treey); |
|---|
| 1809 | |
|---|
| 1810 | awm->set_bottom_area_height(0); |
|---|
| 1811 | |
|---|
| 1812 | aw_parent->hide(); // hide parent |
|---|
| 1813 | awm->show(); |
|---|
| 1814 | |
|---|
| 1815 | TREE_install_update_callbacks(ntw); |
|---|
| 1816 | |
|---|
| 1817 | update_random_repeat(awr, AWT_TREE_PARS(ntw)); |
|---|
| 1818 | AP_user_push_cb(aw_parent); // push initial tree |
|---|
| 1819 | set_keep_ghostnodes(); // make sure no stacked nodes get deleted |
|---|
| 1820 | } |
|---|
| 1821 | |
|---|
| 1822 | static AW_window *create_pars_init_window(AW_root *awr, const PARS_commands *cmds) { |
|---|
| 1823 | AW_window_simple *aws = new AW_window_simple; |
|---|
| 1824 | aws->init(awr, "PARS_PROPS", "SET PARSIMONY OPTIONS"); |
|---|
| 1825 | aws->load_xfig("pars/init.fig"); |
|---|
| 1826 | |
|---|
| 1827 | aws->button_length(10); |
|---|
| 1828 | aws->label_length(10); |
|---|
| 1829 | |
|---|
| 1830 | aws->callback(pars_exit); |
|---|
| 1831 | aws->at("close"); |
|---|
| 1832 | aws->create_button("ABORT", "ABORT", "A"); |
|---|
| 1833 | |
|---|
| 1834 | aws->callback(makeHelpCallback("arb_pars_init.hlp")); |
|---|
| 1835 | aws->at("help"); |
|---|
| 1836 | aws->create_button("HELP", "HELP", "H"); |
|---|
| 1837 | |
|---|
| 1838 | GBDATA *gb_main = ap_main->get_gb_main(); |
|---|
| 1839 | WeightedFilter *weighted_filter = // do NOT free (bound to callbacks) |
|---|
| 1840 | new WeightedFilter(gb_main, aws->get_root(), AWAR_FILTER_NAME, AWAR_COLUMNSTAT_NAME, aws->get_root()->awar_string(AWAR_ALIGNMENT)); |
|---|
| 1841 | |
|---|
| 1842 | aws->at("filter"); |
|---|
| 1843 | aws->callback(makeCreateWindowCallback(awt_create_select_filter_win, weighted_filter->get_adfiltercbstruct())); |
|---|
| 1844 | aws->create_button("SELECT_FILTER", AWAR_FILTER_NAME); |
|---|
| 1845 | |
|---|
| 1846 | aws->at("weights"); |
|---|
| 1847 | aws->callback(makeCreateWindowCallback(COLSTAT_create_selection_window, weighted_filter->get_column_stat())); |
|---|
| 1848 | aws->sens_mask(AWM_EXP); |
|---|
| 1849 | aws->create_button("SELECT_CSP", AWAR_COLUMNSTAT_NAME); |
|---|
| 1850 | aws->sens_mask(AWM_ALL); |
|---|
| 1851 | |
|---|
| 1852 | aws->at("alignment"); |
|---|
| 1853 | awt_create_ALI_selection_list(gb_main, aws, AWAR_ALIGNMENT, "*="); |
|---|
| 1854 | |
|---|
| 1855 | aws->at("tree"); |
|---|
| 1856 | awt_create_TREE_selection_list(gb_main, aws, AWAR_TREE); |
|---|
| 1857 | |
|---|
| 1858 | aws->callback(makeWindowCallback(pars_start_cb, weighted_filter, cmds)); |
|---|
| 1859 | aws->at("go"); |
|---|
| 1860 | aws->create_button("GO", "GO", "G"); |
|---|
| 1861 | |
|---|
| 1862 | return aws; |
|---|
| 1863 | } |
|---|
| 1864 | |
|---|
| 1865 | KL_Settings::KL_Settings(AW_root *aw_root) { |
|---|
| 1866 | maxdepth = aw_root->awar(AWAR_KL_MAXDEPTH)->read_int(); |
|---|
| 1867 | incdepth = aw_root->awar(AWAR_KL_INCDEPTH)->read_int(); |
|---|
| 1868 | |
|---|
| 1869 | Static.enabled = aw_root->awar(AWAR_KL_STATIC_ENABLED)->read_int(); |
|---|
| 1870 | Static.depth[0] = 2; // always test both possibilities at starting edge |
|---|
| 1871 | Static.depth[1] = aw_root->awar(AWAR_KL_STATIC_DEPTH1)->read_int(); |
|---|
| 1872 | Static.depth[2] = aw_root->awar(AWAR_KL_STATIC_DEPTH2)->read_int(); |
|---|
| 1873 | Static.depth[3] = aw_root->awar(AWAR_KL_STATIC_DEPTH3)->read_int(); |
|---|
| 1874 | Static.depth[4] = aw_root->awar(AWAR_KL_STATIC_DEPTH4)->read_int(); |
|---|
| 1875 | Static.depth[5] = aw_root->awar(AWAR_KL_STATIC_DEPTH5)->read_int(); |
|---|
| 1876 | |
|---|
| 1877 | Dynamic.enabled = aw_root->awar(AWAR_KL_DYNAMIC_ENABLED)->read_int(); |
|---|
| 1878 | Dynamic.start = aw_root->awar(AWAR_KL_DYNAMIC_START)->read_int(); |
|---|
| 1879 | Dynamic.maxx = aw_root->awar(AWAR_KL_DYNAMIC_MAXX)->read_int(); |
|---|
| 1880 | Dynamic.maxy = aw_root->awar(AWAR_KL_DYNAMIC_MAXY)->read_int(); |
|---|
| 1881 | Dynamic.type = (KL_DYNAMIC_THRESHOLD_TYPE)aw_root->awar(AWAR_KL_FUNCTION_TYPE)->read_int(); |
|---|
| 1882 | |
|---|
| 1883 | whichEdges = ANY_EDGE; |
|---|
| 1884 | if (aw_root->awar(AWAR_OPTI_MARKED_ONLY)->read_int()) whichEdges = EdgeSpec(whichEdges|SKIP_UNMARKED_EDGES); |
|---|
| 1885 | if (aw_root->awar(AWAR_OPTI_SKIP_FOLDED)->read_int()) whichEdges = EdgeSpec(whichEdges|SKIP_FOLDED_EDGES); |
|---|
| 1886 | } |
|---|
| 1887 | #if defined(UNIT_TESTS) |
|---|
| 1888 | KL_Settings::KL_Settings() { |
|---|
| 1889 | // set default values |
|---|
| 1890 | maxdepth = 15; |
|---|
| 1891 | |
|---|
| 1892 | Static.enabled = true; |
|---|
| 1893 | Static.depth[0] = 2; // always test both possibilities at starting edge |
|---|
| 1894 | Static.depth[1] = 8; |
|---|
| 1895 | Static.depth[2] = 6; |
|---|
| 1896 | Static.depth[3] = 6; |
|---|
| 1897 | Static.depth[4] = 6; |
|---|
| 1898 | Static.depth[5] = 6; |
|---|
| 1899 | |
|---|
| 1900 | Dynamic.enabled = true; |
|---|
| 1901 | Dynamic.start = 100; |
|---|
| 1902 | Dynamic.maxy = 150; |
|---|
| 1903 | Dynamic.maxx = 6; |
|---|
| 1904 | |
|---|
| 1905 | // these values do not seem to have any effect (i.e. are not covered by unit-tests): |
|---|
| 1906 | incdepth = 4; |
|---|
| 1907 | |
|---|
| 1908 | // const setting (not configurable) |
|---|
| 1909 | Dynamic.type = AP_QUADRAT_START; |
|---|
| 1910 | whichEdges = EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_FOLDED_EDGES); |
|---|
| 1911 | } |
|---|
| 1912 | #endif |
|---|
| 1913 | |
|---|
| 1914 | static void create_optimize_vars(AW_root *aw_root, AW_default props) { |
|---|
| 1915 | // kernighan |
|---|
| 1916 | |
|---|
| 1917 | aw_root->awar_int(AWAR_OPTI_MARKED_ONLY, 1, props); |
|---|
| 1918 | aw_root->awar_int(AWAR_OPTI_SKIP_FOLDED, 1, props); |
|---|
| 1919 | |
|---|
| 1920 | aw_root->awar_int(AWAR_KL_MAXDEPTH, 15, props); |
|---|
| 1921 | aw_root->awar_int(AWAR_KL_INCDEPTH, 4, props); |
|---|
| 1922 | |
|---|
| 1923 | aw_root->awar_int(AWAR_KL_STATIC_ENABLED, 1, props); |
|---|
| 1924 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH1, 5, props)->set_minmax(1, 8); |
|---|
| 1925 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH2, 3, props)->set_minmax(1, 6); |
|---|
| 1926 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH3, 2, props)->set_minmax(1, 6); |
|---|
| 1927 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH4, 2, props)->set_minmax(1, 6); |
|---|
| 1928 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH5, 1, props)->set_minmax(1, 6); |
|---|
| 1929 | |
|---|
| 1930 | aw_root->awar_int(AWAR_KL_DYNAMIC_ENABLED, 1, props); |
|---|
| 1931 | aw_root->awar_int(AWAR_KL_DYNAMIC_START, 100, props); |
|---|
| 1932 | aw_root->awar_int(AWAR_KL_DYNAMIC_MAXX, 6, props); |
|---|
| 1933 | aw_root->awar_int(AWAR_KL_DYNAMIC_MAXY, 150, props); |
|---|
| 1934 | |
|---|
| 1935 | aw_root->awar_int(AWAR_KL_FUNCTION_TYPE, AP_QUADRAT_START, props); |
|---|
| 1936 | } |
|---|
| 1937 | |
|---|
| 1938 | static void pars_create_all_awars(AW_root *awr, AW_default aw_def, GBDATA *gb_main) { |
|---|
| 1939 | awr->awar_string(AWAR_SPECIES_NAME, "", gb_main); |
|---|
| 1940 | awr->awar_string(AWAR_FOOTER, "", aw_def); |
|---|
| 1941 | |
|---|
| 1942 | // copy currently selected alignment to awar: |
|---|
| 1943 | { |
|---|
| 1944 | GB_transaction ta(gb_main); |
|---|
| 1945 | |
|---|
| 1946 | char *dali = GBT_get_default_alignment(gb_main); |
|---|
| 1947 | if (!dali) { |
|---|
| 1948 | GB_clear_error(); |
|---|
| 1949 | dali = ARB_strdup("no default alignment"); |
|---|
| 1950 | } |
|---|
| 1951 | awr->awar_string(AWAR_ALIGNMENT, dali, gb_main)->write_string(dali); |
|---|
| 1952 | free(dali); |
|---|
| 1953 | } |
|---|
| 1954 | |
|---|
| 1955 | awt_create_filter_awars(awr, aw_def, AWAR_FILTER_NAME, AWAR_ALIGNMENT); |
|---|
| 1956 | awt_set_awar_to_valid_filter_good_for_tree_methods(gb_main, awr, AWAR_FILTER_NAME); |
|---|
| 1957 | |
|---|
| 1958 | awr->awar_int(AWAR_PARS_TYPE, PARS_WAGNER, gb_main); |
|---|
| 1959 | |
|---|
| 1960 | { |
|---|
| 1961 | GB_transaction ta(gb_main); |
|---|
| 1962 | GBDATA *gb_tree_name = GB_search(gb_main, AWAR_TREE, GB_STRING); |
|---|
| 1963 | char *tree_name = GB_read_string(gb_tree_name); |
|---|
| 1964 | |
|---|
| 1965 | awr->awar_string(AWAR_TREE, "", aw_def)->write_string(tree_name); |
|---|
| 1966 | free(tree_name); |
|---|
| 1967 | } |
|---|
| 1968 | |
|---|
| 1969 | awr->awar_int(AWAR_PARSIMONY, 0, aw_def); |
|---|
| 1970 | awr->awar_int(AWAR_BEST_PARSIMONY, 0, aw_def); |
|---|
| 1971 | awr->awar_int(AWAR_STACKPOINTER, 0, aw_def); |
|---|
| 1972 | |
|---|
| 1973 | awr->awar_int(AWAR_RAND_REPEAT, 1, aw_def)->set_minmax(1, 1000000); // default value is overwritten by update_random_repeat() |
|---|
| 1974 | awr->awar_int(AWAR_RAND_PERCENT, 50, aw_def)->set_minmax(1, 100); |
|---|
| 1975 | |
|---|
| 1976 | create_optimize_vars(awr, aw_def); |
|---|
| 1977 | NDS_create_vars(awr, aw_def, gb_main); |
|---|
| 1978 | |
|---|
| 1979 | TREE_create_awars(awr, gb_main); |
|---|
| 1980 | |
|---|
| 1981 | #if defined(DEBUG) |
|---|
| 1982 | AWT_create_db_browser_awars(awr, aw_def); |
|---|
| 1983 | #endif // DEBUG |
|---|
| 1984 | |
|---|
| 1985 | GB_ERROR error = ARB_init_global_awars(awr, aw_def, gb_main); |
|---|
| 1986 | if (error) aw_message(error); |
|---|
| 1987 | } |
|---|
| 1988 | |
|---|
| 1989 | static AW_root *AD_map_viewer_aw_root = NULp; |
|---|
| 1990 | |
|---|
| 1991 | void PARS_map_viewer(GBDATA *gb_species, AD_MAP_VIEWER_TYPE vtype) { |
|---|
| 1992 | // Note: sync with ../NTREE/ad_spec.cxx@launch_MapViewer_cb |
|---|
| 1993 | |
|---|
| 1994 | if (AD_map_viewer_aw_root && |
|---|
| 1995 | gb_species && |
|---|
| 1996 | (vtype == ADMVT_SELECT || vtype == ADMVT_INFO)) |
|---|
| 1997 | { |
|---|
| 1998 | AD_map_viewer_aw_root->awar(AWAR_SPECIES_NAME)->write_string(null2empty(GBT_get_name(gb_species))); |
|---|
| 1999 | } |
|---|
| 2000 | } |
|---|
| 2001 | |
|---|
| 2002 | int ARB_main(int argc, char *argv[]) { |
|---|
| 2003 | aw_initstatus(); |
|---|
| 2004 | |
|---|
| 2005 | GB_shell shell; |
|---|
| 2006 | AW_root *aw_root = AWT_create_root("pars.arb", "ARB_PARS", need_macro_ability()); |
|---|
| 2007 | AD_map_viewer_aw_root = aw_root; |
|---|
| 2008 | |
|---|
| 2009 | ap_main = new AP_main; |
|---|
| 2010 | GLOBAL_PARS = new ArbParsimony(); |
|---|
| 2011 | |
|---|
| 2012 | const char *db_server = ":"; |
|---|
| 2013 | |
|---|
| 2014 | PARS_commands cmds; |
|---|
| 2015 | |
|---|
| 2016 | while (argc>=2 && argv[1][0] == '-') { |
|---|
| 2017 | argc--; |
|---|
| 2018 | argv++; |
|---|
| 2019 | if (!strcmp(argv[0], "-quit")) cmds.quit = 1; |
|---|
| 2020 | else if (!strcmp(argv[0], "-add_marked")) cmds.add_marked = 1; |
|---|
| 2021 | else if (!strcmp(argv[0], "-add_selected")) cmds.add_selected = 1; |
|---|
| 2022 | else if (!strcmp(argv[0], "-calc_branchlengths")) cmds.calc_branch_lengths = 1; |
|---|
| 2023 | else if (!strcmp(argv[0], "-calc_bootstrap")) cmds.calc_bootstrap = 1; |
|---|
| 2024 | else { |
|---|
| 2025 | fprintf(stderr, "Unknown option '%s'\n", argv[0]); |
|---|
| 2026 | |
|---|
| 2027 | printf(" Options: Meaning:\n" |
|---|
| 2028 | "\n" |
|---|
| 2029 | " -add_marked add marked species (without changing topology)\n" |
|---|
| 2030 | " -add_selected add selected species (without changing topology)\n" |
|---|
| 2031 | " -calc_branchlengths calculate branch lengths only\n" |
|---|
| 2032 | " -calc_bootstrap estimate bootstrap values\n" |
|---|
| 2033 | " -quit quit after performing operations\n" |
|---|
| 2034 | ); |
|---|
| 2035 | |
|---|
| 2036 | exit(EXIT_FAILURE); |
|---|
| 2037 | } |
|---|
| 2038 | } |
|---|
| 2039 | |
|---|
| 2040 | |
|---|
| 2041 | if (argc==2) db_server = argv[1]; |
|---|
| 2042 | |
|---|
| 2043 | GB_ERROR error = ap_main->open(db_server); |
|---|
| 2044 | if (!error) { |
|---|
| 2045 | GBDATA *gb_main = ap_main->get_gb_main(); |
|---|
| 2046 | error = configure_macro_recording(aw_root, "ARB_PARS", gb_main); |
|---|
| 2047 | |
|---|
| 2048 | if (!error) { |
|---|
| 2049 | #if defined(DEBUG) |
|---|
| 2050 | AWT_announce_db_to_browser(gb_main, GBS_global_string("ARB-database (%s)", db_server)); |
|---|
| 2051 | #endif // DEBUG |
|---|
| 2052 | |
|---|
| 2053 | pars_create_all_awars(aw_root, AW_ROOT_DEFAULT, gb_main); |
|---|
| 2054 | |
|---|
| 2055 | AW_window *aww = create_pars_init_window(aw_root, &cmds); |
|---|
| 2056 | aww->show(); |
|---|
| 2057 | |
|---|
| 2058 | AWT_install_cb_guards(); |
|---|
| 2059 | aw_root->main_loop(); |
|---|
| 2060 | } |
|---|
| 2061 | } |
|---|
| 2062 | |
|---|
| 2063 | if (error) aw_popup_exit(error); |
|---|
| 2064 | return EXIT_SUCCESS; |
|---|
| 2065 | } |
|---|
| 2066 | |
|---|
| 2067 | |
|---|
| 2068 | // -------------------------------------------------------------------------------- |
|---|
| 2069 | |
|---|
| 2070 | #ifdef UNIT_TESTS |
|---|
| 2071 | #include <arb_file.h> |
|---|
| 2072 | #include <arb_diff.h> |
|---|
| 2073 | #include <test_unit.h> |
|---|
| 2074 | #include <AP_seq_dna.hxx> |
|---|
| 2075 | #include <AP_seq_protein.hxx> |
|---|
| 2076 | #include "test_env.h" |
|---|
| 2077 | |
|---|
| 2078 | // #define AUTO_UPDATE_IF_CHANGED // uncomment to auto update expected results |
|---|
| 2079 | |
|---|
| 2080 | static arb_test::match_expectation topologyEquals(AP_tree_nlen *root_node, const char *treefile_base) { |
|---|
| 2081 | using namespace arb_test; |
|---|
| 2082 | expectation_group fulfilled; |
|---|
| 2083 | |
|---|
| 2084 | char *outfile = GBS_global_string_copy("pars/%s.tree", treefile_base); |
|---|
| 2085 | char *expected = GBS_global_string_copy("%s.expected", outfile); |
|---|
| 2086 | bool update = false; |
|---|
| 2087 | |
|---|
| 2088 | { |
|---|
| 2089 | FILE *out = fopen(outfile, "wt"); |
|---|
| 2090 | fulfilled.add(that(out).does_differ_from_NULL()); |
|---|
| 2091 | char *newick = GBT_tree_2_newick(root_node, NewickFormat(nLENGTH|nWRAP), false); |
|---|
| 2092 | fputs(newick, out); |
|---|
| 2093 | free(newick); |
|---|
| 2094 | fclose(out); |
|---|
| 2095 | } |
|---|
| 2096 | |
|---|
| 2097 | if (GB_is_regularfile(expected)) { |
|---|
| 2098 | bool match_exp_topo = textfiles_have_difflines(outfile,expected,0); |
|---|
| 2099 | #if defined(AUTO_UPDATE_IF_CHANGED) |
|---|
| 2100 | if (!match_exp_topo) update = true; |
|---|
| 2101 | #endif |
|---|
| 2102 | if (!update) fulfilled.add(that(match_exp_topo).is_equal_to(true)); |
|---|
| 2103 | } |
|---|
| 2104 | else { |
|---|
| 2105 | update = true; |
|---|
| 2106 | } |
|---|
| 2107 | |
|---|
| 2108 | if (update) TEST_COPY_FILE(outfile, expected); |
|---|
| 2109 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile)); |
|---|
| 2110 | |
|---|
| 2111 | free(expected); |
|---|
| 2112 | free(outfile); |
|---|
| 2113 | |
|---|
| 2114 | return all().ofgroup(fulfilled); |
|---|
| 2115 | } |
|---|
| 2116 | |
|---|
| 2117 | template<class ENV> |
|---|
| 2118 | arb_test::match_expectation calcCostsCausesCombines(ENV& env, AP_FLOAT exp_pars, long exp_combines) { |
|---|
| 2119 | using namespace arb_test; |
|---|
| 2120 | expectation_group fulfilled; |
|---|
| 2121 | |
|---|
| 2122 | long combines_b4_costCalc = env.combines_performed(); |
|---|
| 2123 | fulfilled.add(that(combines_b4_costCalc).is_equal_to(0)); |
|---|
| 2124 | |
|---|
| 2125 | AP_FLOAT new_pars = env.root_node()->costs(); |
|---|
| 2126 | long combines_by_costCalc = env.combines_performed(); |
|---|
| 2127 | |
|---|
| 2128 | fulfilled.add(that(new_pars).fulfills(epsilon_similar(0.001), exp_pars)); |
|---|
| 2129 | fulfilled.add(that(combines_by_costCalc).is_equal_to(exp_combines)); |
|---|
| 2130 | |
|---|
| 2131 | return all().ofgroup(fulfilled); |
|---|
| 2132 | } |
|---|
| 2133 | |
|---|
| 2134 | #define TEST_EXPECT_SAVED_TOPOLOGY(env,exp_topo) TEST_EXPECTATION(topologyEquals(env.root_node(), exp_topo)) |
|---|
| 2135 | #define TEST_EXPECT_SAVED_TOPOLOGY__BROKEN(env,exp_topo,got_topo) TEST_EXPECTATION__BROKEN(topologyEquals(env.root_node(), exp_topo), topologyEquals(env.root_node(), got_topo)) |
|---|
| 2136 | |
|---|
| 2137 | #define TEST_EXPECT_PARSVAL(env,exp_pars) TEST_EXPECT_EQUAL(env.root_node()->costs(), exp_pars); |
|---|
| 2138 | #define TEST_EXPECT_ONLY_PARSVAL_COMBINES(env,exp_pars,exp_combines) TEST_EXPECTATION(calcCostsCausesCombines(env, exp_pars, exp_combines)) |
|---|
| 2139 | // use TEST_EXPECT_ONLY_PARSVAL_COMBINES when |
|---|
| 2140 | // - no combines occurred (or combines were just tested using TEST_EXPECT_COMBINES_PERFORMED) and |
|---|
| 2141 | // - topology was modified, so that calculation of costs causes new combines. |
|---|
| 2142 | #define TEST_EXPECT_KNOWN_PARSVAL(env,exp_pars) TEST_EXPECT_ONLY_PARSVAL_COMBINES(env,exp_pars,0) |
|---|
| 2143 | |
|---|
| 2144 | enum TopoMod { |
|---|
| 2145 | MOD_REMOVE_MARKED, |
|---|
| 2146 | |
|---|
| 2147 | MOD_QUICK_READD, |
|---|
| 2148 | MOD_QUICK_ADD, |
|---|
| 2149 | MOD_READD_NNI, |
|---|
| 2150 | |
|---|
| 2151 | MOD_ADD_PARTIAL, |
|---|
| 2152 | |
|---|
| 2153 | MOD_CALC_LENS, |
|---|
| 2154 | MOD_OPTI_NNI, |
|---|
| 2155 | MOD_OPTI_GLOBAL, |
|---|
| 2156 | |
|---|
| 2157 | MOD_MIX_TREE, |
|---|
| 2158 | }; |
|---|
| 2159 | |
|---|
| 2160 | template <typename SEQ> |
|---|
| 2161 | static void modifyTopology(PARSIMONY_testenv<SEQ>& env, TopoMod mod) { |
|---|
| 2162 | switch (mod) { |
|---|
| 2163 | case MOD_REMOVE_MARKED: |
|---|
| 2164 | env.graphic_tree()->get_tree_root()->remove_leafs(AWT_REMOVE_MARKED); |
|---|
| 2165 | break; |
|---|
| 2166 | |
|---|
| 2167 | case MOD_QUICK_READD: |
|---|
| 2168 | nt_reAdd(env.graphic_tree(), NT_ADD_MARKED, true); |
|---|
| 2169 | break; |
|---|
| 2170 | |
|---|
| 2171 | case MOD_QUICK_ADD: |
|---|
| 2172 | nt_add(env.graphic_tree(), NT_ADD_MARKED, true); |
|---|
| 2173 | break; |
|---|
| 2174 | |
|---|
| 2175 | case MOD_READD_NNI: |
|---|
| 2176 | nt_reAdd(env.graphic_tree(), NT_ADD_MARKED, false); |
|---|
| 2177 | break; |
|---|
| 2178 | |
|---|
| 2179 | case MOD_ADD_PARTIAL: |
|---|
| 2180 | nt_add_partial(env.graphic_tree()); |
|---|
| 2181 | break; |
|---|
| 2182 | |
|---|
| 2183 | case MOD_CALC_LENS: |
|---|
| 2184 | calc_branchlengths_and_reorder(env.graphic_tree()); |
|---|
| 2185 | break; |
|---|
| 2186 | |
|---|
| 2187 | case MOD_OPTI_NNI: // only marked/unfolded |
|---|
| 2188 | recursiveNNI(env.graphic_tree(), env.get_KL_settings().whichEdges); |
|---|
| 2189 | break; |
|---|
| 2190 | |
|---|
| 2191 | case MOD_OPTI_GLOBAL: |
|---|
| 2192 | optimizeTree(env.graphic_tree(), env.get_KL_settings()); |
|---|
| 2193 | break; |
|---|
| 2194 | |
|---|
| 2195 | case MOD_MIX_TREE: { |
|---|
| 2196 | long leafs = rootNode()->count_leafs(); |
|---|
| 2197 | mixtree_and_calclengths(env.graphic_tree(), calculate_default_random_repeat(leafs), 100, ANY_EDGE); |
|---|
| 2198 | break; |
|---|
| 2199 | } |
|---|
| 2200 | } |
|---|
| 2201 | } |
|---|
| 2202 | |
|---|
| 2203 | template <typename SEQ> |
|---|
| 2204 | static arb_test::match_expectation modifyingTopoResultsIn(TopoMod mod, const char *topo, long pars_expected, PARSIMONY_testenv<SEQ>& env, bool restore) { |
|---|
| 2205 | using namespace arb_test; |
|---|
| 2206 | expectation_group fulfilled; |
|---|
| 2207 | |
|---|
| 2208 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 2209 | |
|---|
| 2210 | Level upc = env.get_user_push_counter(); |
|---|
| 2211 | Level fl = env.get_frame_level(); |
|---|
| 2212 | |
|---|
| 2213 | if (restore) { |
|---|
| 2214 | env.push(); |
|---|
| 2215 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 2216 | } |
|---|
| 2217 | |
|---|
| 2218 | AWT_graphic_exports& exports = env.graphic_tree()->exports; |
|---|
| 2219 | exports.clear_save_request(); |
|---|
| 2220 | modifyTopology(env, mod); |
|---|
| 2221 | if (topo) { |
|---|
| 2222 | fulfilled.add(topologyEquals(env.root_node(), topo)); |
|---|
| 2223 | if (mod != MOD_REMOVE_MARKED) { // remove_leafs doesn't request save |
|---|
| 2224 | fulfilled.add(that(exports.needs_save()).is_equal_to(true)); |
|---|
| 2225 | } |
|---|
| 2226 | } |
|---|
| 2227 | |
|---|
| 2228 | fulfilled.add(that(allBranchlengthsAreDefined(env.root_node())).is_equal_to(true)); |
|---|
| 2229 | |
|---|
| 2230 | if (pars_expected != -1) { |
|---|
| 2231 | fulfilled.add(that(env.root_node()->costs()).is_equal_to(pars_expected)); |
|---|
| 2232 | } |
|---|
| 2233 | |
|---|
| 2234 | if (restore) { |
|---|
| 2235 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 2236 | TEST_VALIDITY(env.pop_will_produce_valid_tree()); |
|---|
| 2237 | env.pop(); |
|---|
| 2238 | bool blen_def_after_pop = allBranchlengthsAreDefined(env.root_node()); |
|---|
| 2239 | fulfilled.add(that(blen_def_after_pop).is_equal_to(true)); |
|---|
| 2240 | } |
|---|
| 2241 | |
|---|
| 2242 | TEST_EXPECT_EQUAL(fl, env.get_frame_level()); |
|---|
| 2243 | TEST_EXPECT_EQUAL(upc, env.get_user_push_counter()); |
|---|
| 2244 | |
|---|
| 2245 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 2246 | |
|---|
| 2247 | return all().ofgroup(fulfilled); |
|---|
| 2248 | } |
|---|
| 2249 | |
|---|
| 2250 | static arb_test::match_expectation movingRootDoesntAffectCosts(long pars_expected) { |
|---|
| 2251 | using namespace arb_test; |
|---|
| 2252 | expectation_group fulfilled; |
|---|
| 2253 | |
|---|
| 2254 | long pars_min = LONG_MAX; |
|---|
| 2255 | long pars_max = LONG_MIN; |
|---|
| 2256 | |
|---|
| 2257 | for (int depth_first = 0; depth_first<=1; ++depth_first) { |
|---|
| 2258 | for (int push_local = 0; push_local<=1; ++push_local) { |
|---|
| 2259 | EdgeChain chain(rootEdge(), ANY_EDGE, depth_first); |
|---|
| 2260 | |
|---|
| 2261 | if (!push_local) ap_main->remember(); |
|---|
| 2262 | while (chain) { |
|---|
| 2263 | AP_tree_edge *edge = *chain; ++chain; |
|---|
| 2264 | |
|---|
| 2265 | if (push_local) ap_main->remember(); |
|---|
| 2266 | edge->set_root(); |
|---|
| 2267 | long pars = rootNode()->costs(); |
|---|
| 2268 | pars_min = std::min(pars, pars_min); |
|---|
| 2269 | pars_max = std::max(pars, pars_max); |
|---|
| 2270 | if (push_local) ap_main->revert(); |
|---|
| 2271 | } |
|---|
| 2272 | if (!push_local) ap_main->revert(); |
|---|
| 2273 | } |
|---|
| 2274 | } |
|---|
| 2275 | |
|---|
| 2276 | fulfilled.add(that(pars_min).is_equal_to(pars_expected)); |
|---|
| 2277 | fulfilled.add(that(pars_max).is_equal_to(pars_expected)); |
|---|
| 2278 | |
|---|
| 2279 | return all().ofgroup(fulfilled); |
|---|
| 2280 | } |
|---|
| 2281 | |
|---|
| 2282 | static GBDATA *copy_to(GBDATA *gb_species, const char *newShortname) { |
|---|
| 2283 | GBDATA *gb_species_data = GB_get_father(gb_species); |
|---|
| 2284 | GBDATA *gb_new_species = GB_create_container(gb_species_data, "species"); |
|---|
| 2285 | |
|---|
| 2286 | GB_ERROR error = NULp; |
|---|
| 2287 | if (!gb_new_species) { |
|---|
| 2288 | error = GB_await_error(); |
|---|
| 2289 | } |
|---|
| 2290 | else { |
|---|
| 2291 | error = GB_copy_dropProtectMarksAndTempstate(gb_new_species, gb_species); |
|---|
| 2292 | if (!error) error = GBT_write_string(gb_new_species, "name", newShortname); |
|---|
| 2293 | } |
|---|
| 2294 | |
|---|
| 2295 | ap_assert(contradicted(gb_new_species, error)); |
|---|
| 2296 | return gb_new_species; |
|---|
| 2297 | } |
|---|
| 2298 | |
|---|
| 2299 | inline void mark_only(GBDATA *gb_species) { |
|---|
| 2300 | GBDATA *gb_main = GB_get_root(gb_species); |
|---|
| 2301 | GB_transaction ta(gb_main); |
|---|
| 2302 | GBT_mark_all(gb_main, 0); |
|---|
| 2303 | GB_write_flag(gb_species, 1); |
|---|
| 2304 | } |
|---|
| 2305 | inline void mark(GBDATA *gb_species) { |
|---|
| 2306 | GBDATA *gb_main = GB_get_root(gb_species); |
|---|
| 2307 | GB_transaction ta(gb_main); |
|---|
| 2308 | GB_write_flag(gb_species, 1); |
|---|
| 2309 | } |
|---|
| 2310 | inline void mark_all(GBDATA *gb_main) { |
|---|
| 2311 | GB_transaction ta(gb_main); |
|---|
| 2312 | GBT_mark_all(gb_main, 1); |
|---|
| 2313 | } |
|---|
| 2314 | |
|---|
| 2315 | inline int is_partial(GBDATA *gb_species) { |
|---|
| 2316 | GB_transaction ta(gb_species); |
|---|
| 2317 | return GBT_is_partial(gb_species, -1, false); |
|---|
| 2318 | } |
|---|
| 2319 | |
|---|
| 2320 | template <typename SEQ> |
|---|
| 2321 | static arb_test::match_expectation addedAsBrotherOf(const char *name, const char *allowedBrothers, PARSIMONY_testenv<SEQ>& env) { |
|---|
| 2322 | using namespace arb_test; |
|---|
| 2323 | expectation_group fulfilled; |
|---|
| 2324 | |
|---|
| 2325 | AP_tree_nlen *node_in_tree = env.root_node()->findLeafNamed(name); |
|---|
| 2326 | ap_assert(node_in_tree); |
|---|
| 2327 | fulfilled.add(that(node_in_tree).does_differ_from_NULL()); |
|---|
| 2328 | |
|---|
| 2329 | const char *brother = node_in_tree->get_brother()->name; |
|---|
| 2330 | ap_assert(brother); |
|---|
| 2331 | fulfilled.add(that(allowedBrothers).does_contain(brother)); |
|---|
| 2332 | |
|---|
| 2333 | return all().ofgroup(fulfilled); |
|---|
| 2334 | } |
|---|
| 2335 | |
|---|
| 2336 | template <typename SEQ> |
|---|
| 2337 | static arb_test::match_expectation addingPartialResultsIn(GBDATA *gb_added_species, const char *allowedBrothers, const char *topo, int pars_expected, PARSIMONY_testenv<SEQ>& env) { |
|---|
| 2338 | using namespace arb_test; |
|---|
| 2339 | expectation_group fulfilled; |
|---|
| 2340 | |
|---|
| 2341 | mark_only(gb_added_species); |
|---|
| 2342 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2343 | fulfilled.add(modifyingTopoResultsIn(MOD_ADD_PARTIAL, topo, pars_expected, env, false)); |
|---|
| 2344 | fulfilled.add(that(is_partial(gb_added_species)).is_equal_to(1)); |
|---|
| 2345 | |
|---|
| 2346 | const char *name = GBT_get_name_or_description(gb_added_species); |
|---|
| 2347 | fulfilled.add(addedAsBrotherOf(name, allowedBrothers, env)); |
|---|
| 2348 | |
|---|
| 2349 | return all().ofgroup(fulfilled); |
|---|
| 2350 | } |
|---|
| 2351 | |
|---|
| 2352 | static int seqDiff(GBDATA *gb_main, const char *aliname, const char *species1, const char *species2, int startPos, int endPos) { |
|---|
| 2353 | GB_transaction ta(gb_main); |
|---|
| 2354 | |
|---|
| 2355 | GBDATA *gb_species1 = GBT_expect_species(gb_main, species1); |
|---|
| 2356 | GBDATA *gb_species2 = GBT_expect_species(gb_main, species2); |
|---|
| 2357 | int diffs = -1; |
|---|
| 2358 | |
|---|
| 2359 | if (gb_species1 && gb_species2) { |
|---|
| 2360 | GBDATA *gb_seq1 = GBT_find_sequence(gb_species1, aliname); |
|---|
| 2361 | GBDATA *gb_seq2 = GBT_find_sequence(gb_species2, aliname); |
|---|
| 2362 | |
|---|
| 2363 | if (gb_seq1 && gb_seq2) { |
|---|
| 2364 | char *seq1 = GB_read_string(gb_seq1); |
|---|
| 2365 | char *seq2 = GB_read_string(gb_seq2); |
|---|
| 2366 | |
|---|
| 2367 | int maxPos1 = strlen(seq1)-1; |
|---|
| 2368 | #if defined(ASSERTION_USED) |
|---|
| 2369 | int maxPos2 = strlen(seq2)-1; |
|---|
| 2370 | #endif |
|---|
| 2371 | ap_assert(maxPos1 == maxPos2); |
|---|
| 2372 | |
|---|
| 2373 | if (endPos>maxPos1) endPos = maxPos1; |
|---|
| 2374 | |
|---|
| 2375 | diffs = 0; |
|---|
| 2376 | for (int p = startPos; p<=endPos; ++p) { // LOOP_ VECTORIZED[!<9.1] // @@@ fails in RELEASE code! // IRRELEVANT_LOOP |
|---|
| 2377 | diffs += seq1[p] != seq2[p]; |
|---|
| 2378 | } |
|---|
| 2379 | |
|---|
| 2380 | free(seq2); |
|---|
| 2381 | free(seq1); |
|---|
| 2382 | } |
|---|
| 2383 | } |
|---|
| 2384 | |
|---|
| 2385 | return diffs; |
|---|
| 2386 | } |
|---|
| 2387 | |
|---|
| 2388 | static GBDATA *createPartialSeqFrom(GBDATA *gb_main, const char *aliname, const char *dest_species, const char *source_species, int startPos, int endPos) { |
|---|
| 2389 | GB_transaction ta(gb_main); |
|---|
| 2390 | |
|---|
| 2391 | GBDATA *gb_result = NULp; |
|---|
| 2392 | GBDATA *gb_source_species = GBT_expect_species(gb_main, source_species); |
|---|
| 2393 | |
|---|
| 2394 | if (gb_source_species) { |
|---|
| 2395 | GBDATA *gb_dest_species = copy_to(gb_source_species, dest_species); |
|---|
| 2396 | GBDATA *gb_dest_seq = GBT_find_sequence(gb_dest_species, aliname); // =same as source seq |
|---|
| 2397 | char *seq = GB_read_string(gb_dest_seq); |
|---|
| 2398 | |
|---|
| 2399 | if (seq) { |
|---|
| 2400 | int maxPos = strlen(seq)-1; |
|---|
| 2401 | |
|---|
| 2402 | startPos = std::min(startPos, maxPos); |
|---|
| 2403 | endPos = std::min(endPos, maxPos); |
|---|
| 2404 | |
|---|
| 2405 | if (startPos>0) memset(seq, '.', startPos); |
|---|
| 2406 | if (endPos<maxPos) memset(seq+endPos+1, '.', maxPos-endPos); |
|---|
| 2407 | |
|---|
| 2408 | GB_ERROR error = GB_write_string(gb_dest_seq, seq); |
|---|
| 2409 | if (error) GB_export_error(error); |
|---|
| 2410 | else { |
|---|
| 2411 | gb_result = gb_dest_species; // success |
|---|
| 2412 | #if defined(DEBUG) |
|---|
| 2413 | fprintf(stderr, "created partial '%s' from '%s' (seq='%s')\n", dest_species, source_species, seq); |
|---|
| 2414 | #endif |
|---|
| 2415 | } |
|---|
| 2416 | |
|---|
| 2417 | free(seq); |
|---|
| 2418 | } |
|---|
| 2419 | } |
|---|
| 2420 | |
|---|
| 2421 | return gb_result; |
|---|
| 2422 | } |
|---|
| 2423 | |
|---|
| 2424 | static GB_ERROR modifyOneBase(GBDATA *gb_species, const char *aliname, char cOld, char cNew) { |
|---|
| 2425 | GB_transaction ta(gb_species); |
|---|
| 2426 | GB_ERROR error = "failed to modifyOneBase"; |
|---|
| 2427 | |
|---|
| 2428 | GBDATA *gb_seq = GBT_find_sequence(gb_species, aliname); |
|---|
| 2429 | if (gb_seq) { |
|---|
| 2430 | char *seq = GB_read_string(gb_seq); |
|---|
| 2431 | if (seq) { |
|---|
| 2432 | char *B = strchr(seq, cOld); |
|---|
| 2433 | if (!B) { |
|---|
| 2434 | error = "does not contain base in modifyOneBase"; |
|---|
| 2435 | } |
|---|
| 2436 | else { |
|---|
| 2437 | B[0] = cNew; |
|---|
| 2438 | error = GB_write_string(gb_seq, seq); |
|---|
| 2439 | } |
|---|
| 2440 | free(seq); |
|---|
| 2441 | } |
|---|
| 2442 | } |
|---|
| 2443 | |
|---|
| 2444 | return error; |
|---|
| 2445 | } |
|---|
| 2446 | |
|---|
| 2447 | static long unmark_unwanted(const char *, long cd_gbd, void*) { |
|---|
| 2448 | GBDATA *gbd = (GBDATA*)cd_gbd; |
|---|
| 2449 | GB_write_flag(gbd, 0); |
|---|
| 2450 | return 0; |
|---|
| 2451 | } |
|---|
| 2452 | |
|---|
| 2453 | void TEST_SLOW_nucl_tree_modifications() { |
|---|
| 2454 | const char *aliname = "ali_5s"; |
|---|
| 2455 | |
|---|
| 2456 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname); |
|---|
| 2457 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
|---|
| 2458 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial"); |
|---|
| 2459 | |
|---|
| 2460 | const int PARSIMONY_ORG = 302; |
|---|
| 2461 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14); |
|---|
| 2462 | |
|---|
| 2463 | // [NUCOPTI] opposed to protein tests below the initial tree here is NOT optimized! compare .@PROTOPTI |
|---|
| 2464 | // -> removing and adding species produces a better tree (for add+NNI) |
|---|
| 2465 | // |
|---|
| 2466 | // diff initial->removed : http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-removed.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected |
|---|
| 2467 | // diff initial->add-quick: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-add-quick.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected |
|---|
| 2468 | // diff initial->add-NNI: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-add-NNI.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected |
|---|
| 2469 | |
|---|
| 2470 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, "nucl-removed", PARSIMONY_ORG-94, env, true)); // test remove-marked only (same code as part of nt_reAdd) |
|---|
| 2471 | TEST_EXPECT_COMBINES_PERFORMED(env, 3); |
|---|
| 2472 | |
|---|
| 2473 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-add-quick", PARSIMONY_ORG-18, env, true)); // test quick-add |
|---|
| 2474 | TEST_EXPECT_COMBINES_PERFORMED(env, 400); |
|---|
| 2475 | |
|---|
| 2476 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI, "nucl-add-NNI", PARSIMONY_ORG-20, env, true)); // test add + NNI |
|---|
| 2477 | TEST_EXPECT_COMBINES_PERFORMED(env, 503); |
|---|
| 2478 | |
|---|
| 2479 | // test partial-add |
|---|
| 2480 | { |
|---|
| 2481 | GBDATA *gb_main = env.gbmain(); |
|---|
| 2482 | |
|---|
| 2483 | // create 2 non-overlapping partial species |
|---|
| 2484 | const int SPLIT = 55; |
|---|
| 2485 | GBDATA *CorGlutP = createPartialSeqFrom(gb_main, aliname, "CorGlutP", "CorGluta", 0, SPLIT); |
|---|
| 2486 | GBDATA *CloButyP = createPartialSeqFrom(gb_main, aliname, "CloButyP", "CloButyr", SPLIT+1, INT_MAX); |
|---|
| 2487 | GBDATA *CloButyM = createPartialSeqFrom(gb_main, aliname, "CloButyM", "CloButyr", SPLIT+1, INT_MAX); |
|---|
| 2488 | TEST_EXPECT_NO_ERROR(modifyOneBase(CloButyM, aliname, 'G', 'C')); // change first 'G' into 'C' |
|---|
| 2489 | |
|---|
| 2490 | TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); // no push yet (does nothing) |
|---|
| 2491 | |
|---|
| 2492 | // test partials differ from full: |
|---|
| 2493 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CorGlutP", "CorGluta", 0, INT_MAX)); |
|---|
| 2494 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyP", "CloButyr", 0, INT_MAX)); |
|---|
| 2495 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyM", "CloButyr", 0, INT_MAX)); |
|---|
| 2496 | // test partials created from CloButyr differ in partial range: |
|---|
| 2497 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyM", "CloButyP", SPLIT+1, INT_MAX)); |
|---|
| 2498 | |
|---|
| 2499 | // test condition that "CloButyr and CloButy2 do NOT differ in seq-range of partial" (otherwise test below makes no sense!) |
|---|
| 2500 | TEST_EXPECT_ZERO(seqDiff(gb_main, aliname, "CloButyr", "CloButy2", SPLIT+1, INT_MAX)); |
|---|
| 2501 | |
|---|
| 2502 | // test that "CloButyr and CloButy2 DO differ in whole seq-range" (otherwise inserting into tree is non-deterministic) |
|---|
| 2503 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyr", "CloButy2", 0, INT_MAX)); |
|---|
| 2504 | |
|---|
| 2505 | // add CloButyP (and undo) |
|---|
| 2506 | { |
|---|
| 2507 | env.push(); |
|---|
| 2508 | |
|---|
| 2509 | // CloButyr and CloButy2 do not differ in seq-range of partial -> any of both may be chosen as brother. |
|---|
| 2510 | // behavior should be changed with #605 |
|---|
| 2511 | TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", "nucl-addPart-CloButyP", PARSIMONY_ORG, env)); |
|---|
| 2512 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
|---|
| 2513 | env.pop(); |
|---|
| 2514 | } |
|---|
| 2515 | |
|---|
| 2516 | { |
|---|
| 2517 | env.push(); |
|---|
| 2518 | TEST_EXPECTATION(addingPartialResultsIn(CorGlutP, "CorGluta", "nucl-addPart-CorGlutP", PARSIMONY_ORG, env)); // add CorGlutP |
|---|
| 2519 | TEST_EXPECT_COMBINES_PERFORMED(env, 5); // @@@ partial-add should not perform combines at all (maybe caused by cost-recalc?) |
|---|
| 2520 | TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", "nucl-addPart-CorGlutP-CloButyP", PARSIMONY_ORG, env)); // also add CloButyP |
|---|
| 2521 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
|---|
| 2522 | env.pop(); |
|---|
| 2523 | } |
|---|
| 2524 | |
|---|
| 2525 | // now add CorGlutP as full, then CloButyP and CloButyM as partials |
|---|
| 2526 | { |
|---|
| 2527 | env.push(); |
|---|
| 2528 | |
|---|
| 2529 | mark_only(CorGlutP); |
|---|
| 2530 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2531 | { |
|---|
| 2532 | GB_transaction ta(gb_main); |
|---|
| 2533 | TEST_EXPECT_NO_ERROR(GBT_write_int(CorGlutP, "ARB_partial", 0)); // revert species to "full" |
|---|
| 2534 | } |
|---|
| 2535 | |
|---|
| 2536 | const int PARSIMONY_ADDED = PARSIMONY_ORG; // value after adding CorGlutP (as full-length sequence) |
|---|
| 2537 | |
|---|
| 2538 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-addPartialAsFull-CorGlutP", PARSIMONY_ADDED, env, false)); |
|---|
| 2539 | TEST_EXPECT_COMBINES_PERFORMED(env, 230); |
|---|
| 2540 | TEST_EXPECT_EQUAL(is_partial(CorGlutP), 0); // check CorGlutP was added as full sequence |
|---|
| 2541 | TEST_EXPECTATION(addedAsBrotherOf("CorGlutP", "CorGluta", env)); // partial created from CorGluta gets inserted next to CorGluta |
|---|
| 2542 | |
|---|
| 2543 | // add CloButyP as partial. |
|---|
| 2544 | // as expected it is placed next to matching full sequences (does not differ in partial range) |
|---|
| 2545 | TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", NULp, PARSIMONY_ADDED, env)); |
|---|
| 2546 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
|---|
| 2547 | |
|---|
| 2548 | // CloButyM differs slightly in overlap with CloButyr/CloButy2, but has no overlap with CorGlutP |
|---|
| 2549 | // shows bug described in #609 is fixed: |
|---|
| 2550 | TEST_EXPECTATION(addingPartialResultsIn(CloButyM, "CloButyP", "nucl-addPart-bug609", |
|---|
| 2551 | PARSIMONY_ADDED+1, // @@@ known bug - partial should not affect parsimony value; possibly related to ../HELP_SOURCE/source/pa_partial.hlp@WARNINGS |
|---|
| 2552 | env)); |
|---|
| 2553 | TEST_EXPECT_COMBINES_PERFORMED(env, 7); |
|---|
| 2554 | env.pop(); |
|---|
| 2555 | } |
|---|
| 2556 | } |
|---|
| 2557 | |
|---|
| 2558 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial"); |
|---|
| 2559 | |
|---|
| 2560 | const int PARSIMONY_NNI_MARKED = PARSIMONY_ORG-18; |
|---|
| 2561 | const int PARSIMONY_NNI_ALL = PARSIMONY_ORG-18; |
|---|
| 2562 | const int PARSIMONY_OPTI_MARKED = PARSIMONY_ORG-25; |
|---|
| 2563 | const int PARSIMONY_OPTI_VISIBLE = PARSIMONY_ORG-26; |
|---|
| 2564 | const int PARSIMONY_OPTI_ALL = PARSIMONY_ORG-36; |
|---|
| 2565 | |
|---|
| 2566 | { |
|---|
| 2567 | env.push(); |
|---|
| 2568 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_ORG)); |
|---|
| 2569 | TEST_EXPECT_COMBINES_PERFORMED(env, 342); |
|---|
| 2570 | env.pop(); |
|---|
| 2571 | } |
|---|
| 2572 | |
|---|
| 2573 | // ------------------------------ |
|---|
| 2574 | // test optimize (some) |
|---|
| 2575 | |
|---|
| 2576 | // mark initially marked species |
|---|
| 2577 | { |
|---|
| 2578 | GB_transaction ta(env.gbmain()); |
|---|
| 2579 | GBT_restore_marked_species(env.gbmain(), "CorAquat;CorGluta;CurCitre;CloButyr;CloButy2;CytAquat"); |
|---|
| 2580 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2581 | } |
|---|
| 2582 | |
|---|
| 2583 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
|---|
| 2584 | |
|---|
| 2585 | // test branchlength calculation |
|---|
| 2586 | // (optimizations below implicitely recalculates branchlengths) |
|---|
| 2587 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "nucl-calclength", PARSIMONY_ORG, env, false)); |
|---|
| 2588 | TEST_EXPECT_COMBINES_PERFORMED(env, 120); |
|---|
| 2589 | |
|---|
| 2590 | // test whether branchlength calculation depends on root-position |
|---|
| 2591 | { |
|---|
| 2592 | AP_tree_edge *orgRootEdge = rootEdge(); |
|---|
| 2593 | |
|---|
| 2594 | env.push(); |
|---|
| 2595 | |
|---|
| 2596 | const char *tested_roots[] = { |
|---|
| 2597 | "CloButyr", |
|---|
| 2598 | "CloTyro4", |
|---|
| 2599 | "CloTyrob", |
|---|
| 2600 | "CloInnoc", |
|---|
| 2601 | }; |
|---|
| 2602 | |
|---|
| 2603 | for (size_t r = 0; r<ARRAY_ELEMS(tested_roots); ++r) { |
|---|
| 2604 | const char *leafName = tested_roots[r]; |
|---|
| 2605 | env.root_node()->findLeafNamed(leafName)->set_root(); |
|---|
| 2606 | calc_branchlengths_and_reorder(env.graphic_tree()); |
|---|
| 2607 | orgRootEdge->set_root(); |
|---|
| 2608 | env.graphic_tree()->reorderTree(BIG_BRANCHES_TO_TOP); |
|---|
| 2609 | |
|---|
| 2610 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-calclength"); |
|---|
| 2611 | } |
|---|
| 2612 | TEST_EXPECT_COMBINES_PERFORMED(env, 517); |
|---|
| 2613 | |
|---|
| 2614 | env.pop(); |
|---|
| 2615 | } |
|---|
| 2616 | |
|---|
| 2617 | // test optimize (some) |
|---|
| 2618 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "nucl-opti-NNI", PARSIMONY_NNI_MARKED, env, true)); // test recursive NNI |
|---|
| 2619 | TEST_EXPECT_COMBINES_PERFORMED(env, 208); |
|---|
| 2620 | |
|---|
| 2621 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-marked-global", PARSIMONY_OPTI_MARKED, env, true)); // test recursive NNI+KL |
|---|
| 2622 | TEST_EXPECT_COMBINES_PERFORMED(env, 18518); |
|---|
| 2623 | |
|---|
| 2624 | { |
|---|
| 2625 | KL_Settings& KL = env.get_KL_settings(); |
|---|
| 2626 | LocallyModify<EdgeSpec> target(KL.whichEdges, EdgeSpec(KL.whichEdges&~SKIP_UNMARKED_EDGES)); // ignore marks; skip folded |
|---|
| 2627 | |
|---|
| 2628 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-visible-global", PARSIMONY_OPTI_VISIBLE, env, true)); // same result as if all species marked (see below) |
|---|
| 2629 | TEST_EXPECT_COMBINES_PERFORMED(env, 34925); |
|---|
| 2630 | |
|---|
| 2631 | KL.whichEdges = EdgeSpec(KL.whichEdges&~SKIP_FOLDED_EDGES); // ignore marks and folding |
|---|
| 2632 | |
|---|
| 2633 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-global", PARSIMONY_OPTI_ALL, env, true)); // same result as if all species marked and all groups unfolded (see below) |
|---|
| 2634 | TEST_EXPECT_COMBINES_PERFORMED(env, 124811); |
|---|
| 2635 | } |
|---|
| 2636 | |
|---|
| 2637 | // ----------------------------- |
|---|
| 2638 | // test optimize (all) |
|---|
| 2639 | |
|---|
| 2640 | // mark all species |
|---|
| 2641 | mark_all(env.gbmain()); |
|---|
| 2642 | // unmark species not in tree |
|---|
| 2643 | { |
|---|
| 2644 | GB_transaction ta(env.gbmain()); |
|---|
| 2645 | GB_HASH *markedNotInTree = GBT_create_marked_species_hash(env.gbmain()); |
|---|
| 2646 | NT_remove_species_in_tree_from_hash(env.root_node(), markedNotInTree); |
|---|
| 2647 | GBS_hash_do_loop(markedNotInTree, unmark_unwanted, NULp); |
|---|
| 2648 | GBS_free_hash(markedNotInTree); |
|---|
| 2649 | } |
|---|
| 2650 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2651 | TEST_EXPECT_EQUAL(GBT_count_marked_species(env.gbmain()), 15); |
|---|
| 2652 | |
|---|
| 2653 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
|---|
| 2654 | |
|---|
| 2655 | // test branchlength calculation |
|---|
| 2656 | // (optimizations below implicitely recalculates branchlengths) |
|---|
| 2657 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "nucl-calclength", PARSIMONY_ORG, env, false)); |
|---|
| 2658 | TEST_EXPECT_COMBINES_PERFORMED(env, 120); |
|---|
| 2659 | |
|---|
| 2660 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "nucl-opti-all-NNI", PARSIMONY_NNI_ALL, env, true)); // test recursive NNI |
|---|
| 2661 | TEST_EXPECT_COMBINES_PERFORMED(env, 242); |
|---|
| 2662 | |
|---|
| 2663 | { |
|---|
| 2664 | env.push(); |
|---|
| 2665 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-visible-global", PARSIMONY_OPTI_VISIBLE, env, false)); // test recursive NNI+KL |
|---|
| 2666 | TEST_EXPECT_COMBINES_PERFORMED(env, 34925); |
|---|
| 2667 | |
|---|
| 2668 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_OPTI_VISIBLE)); |
|---|
| 2669 | TEST_EXPECT_COMBINES_PERFORMED(env, 336); |
|---|
| 2670 | env.pop(); |
|---|
| 2671 | } |
|---|
| 2672 | |
|---|
| 2673 | // unfold groups |
|---|
| 2674 | { |
|---|
| 2675 | AP_tree_nlen *CloTyrob = env.root_node()->findLeafNamed("CloTyrob"); |
|---|
| 2676 | AP_tree_nlen *group = CloTyrob->get_father(); |
|---|
| 2677 | ap_assert(group->gr.grouped); |
|---|
| 2678 | group->gr.grouped = false; // unfold the only folded group |
|---|
| 2679 | |
|---|
| 2680 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-global", PARSIMONY_OPTI_ALL, env, true)); // test recursive NNI+KL |
|---|
| 2681 | TEST_EXPECT_COMBINES_PERFORMED(env, 124811); |
|---|
| 2682 | } |
|---|
| 2683 | |
|---|
| 2684 | // test re-add all (i.e. test "create tree from scratch") |
|---|
| 2685 | // Note: trees generated below are NO LONGER better than optimized trees! (see also r13651) |
|---|
| 2686 | |
|---|
| 2687 | // quick add: |
|---|
| 2688 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-readdall-quick", PARSIMONY_ORG-7, env, true)); |
|---|
| 2689 | TEST_EXPECT_COMBINES_PERFORMED(env, 439); |
|---|
| 2690 | |
|---|
| 2691 | // quick add + NNI: |
|---|
| 2692 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI, "nucl-readdall-NNI", PARSIMONY_ORG-8, env, true)); |
|---|
| 2693 | TEST_EXPECT_COMBINES_PERFORMED(env, 613); |
|---|
| 2694 | |
|---|
| 2695 | // test adding a too short sequence |
|---|
| 2696 | // (has to be last test, because it modifies seq data) << ------------ !!!!! |
|---|
| 2697 | { |
|---|
| 2698 | env.push(); |
|---|
| 2699 | |
|---|
| 2700 | AP_tree_nlen *CloTyrob = env.root_node()->findLeafNamed("CloTyrob"); |
|---|
| 2701 | mark_only(CloTyrob->gb_node); |
|---|
| 2702 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2703 | |
|---|
| 2704 | // modify sequence of CloTyrob (keep only some bases) |
|---|
| 2705 | { |
|---|
| 2706 | GB_transaction ta(env.gbmain()); |
|---|
| 2707 | GBDATA *gb_seq = GBT_find_sequence(CloTyrob->gb_node, aliname); |
|---|
| 2708 | |
|---|
| 2709 | char *seq = GB_read_string(gb_seq); |
|---|
| 2710 | int keep_bases = MIN_SEQUENCE_LENGTH-1; |
|---|
| 2711 | |
|---|
| 2712 | for (int i = 0; seq[i]; ++i) { |
|---|
| 2713 | if (!GAP::is_std_gap(seq[i])) { |
|---|
| 2714 | if (keep_bases) --keep_bases; |
|---|
| 2715 | else seq[i] = '.'; |
|---|
| 2716 | } |
|---|
| 2717 | } |
|---|
| 2718 | |
|---|
| 2719 | GB_topSecurityLevel unsecured(gb_seq); |
|---|
| 2720 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_seq, seq)); |
|---|
| 2721 | free(seq); |
|---|
| 2722 | } |
|---|
| 2723 | |
|---|
| 2724 | // remove CloTyrob |
|---|
| 2725 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, PARSIMONY_ORG-1, env, false)); |
|---|
| 2726 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
|---|
| 2727 | TEST_EXPECT_EQUAL(env.root_node()->count_leafs(), 14); |
|---|
| 2728 | |
|---|
| 2729 | // attempt to add CloTyrob (should fail because sequence too short) and CorGluta (should stay, because already in tree) |
|---|
| 2730 | TEST_REJECT_NULL(env.root_node()->findLeafNamed("CorGluta")); // has to be in tree |
|---|
| 2731 | { |
|---|
| 2732 | GB_transaction ta(env.gbmain()); |
|---|
| 2733 | GBT_restore_marked_species(env.gbmain(), "CloTyrob;CorGluta"); |
|---|
| 2734 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2735 | } |
|---|
| 2736 | |
|---|
| 2737 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_ADD, NULp, PARSIMONY_ORG-1, env, false)); |
|---|
| 2738 | TEST_EXPECT_COMBINES_PERFORMED(env, 110); // @@@ why does this perform combines at all? |
|---|
| 2739 | TEST_EXPECT_EQUAL(env.root_node()->count_leafs(), 14); // ok, CloTyrob was not added |
|---|
| 2740 | TEST_REJECT_NULL(env.root_node()->findLeafNamed("CorGluta")); // has to be in tree |
|---|
| 2741 | |
|---|
| 2742 | env.pop(); |
|---|
| 2743 | } |
|---|
| 2744 | } |
|---|
| 2745 | |
|---|
| 2746 | void TEST_SLOW_prot_tree_modifications() { |
|---|
| 2747 | const char *aliname = "ali_tuf_pro"; |
|---|
| 2748 | |
|---|
| 2749 | PARSIMONY_testenv<AP_sequence_protein> env("TEST_prot.arb", aliname); |
|---|
| 2750 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_prot_opti")); |
|---|
| 2751 | TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-initial"); |
|---|
| 2752 | |
|---|
| 2753 | const int PARSIMONY_ORG = 1081; |
|---|
| 2754 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 10); |
|---|
| 2755 | |
|---|
| 2756 | // [PROTOPTI] opposed to nucleid tests above the initial tree here is already optimized! compare .@NUCOPTI |
|---|
| 2757 | // -> adding species approximately reproduces initial topology |
|---|
| 2758 | // |
|---|
| 2759 | // diff initial->add-quick: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/prot-add-quick.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fprot-initial.tree.expected |
|---|
| 2760 | // diff initial->add-NNI: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/prot-add-NNI.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fprot-initial.tree.expected |
|---|
| 2761 | // |
|---|
| 2762 | // Note: comparing these two diffs also demonstrates why quick-adding w/o NNI suffers |
|---|
| 2763 | |
|---|
| 2764 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, "prot-removed", PARSIMONY_ORG-146, env, true)); // test remove-marked only (same code as part of nt_reAdd) |
|---|
| 2765 | TEST_EXPECT_COMBINES_PERFORMED(env, 5); |
|---|
| 2766 | |
|---|
| 2767 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "prot-add-quick", PARSIMONY_ORG, env, true)); // test quick-add |
|---|
| 2768 | TEST_EXPECT_COMBINES_PERFORMED(env, 213); |
|---|
| 2769 | |
|---|
| 2770 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI, "prot-add-NNI", PARSIMONY_ORG, env, true)); // test add + NNI |
|---|
| 2771 | TEST_EXPECT_COMBINES_PERFORMED(env, 262); |
|---|
| 2772 | |
|---|
| 2773 | // test partial-add |
|---|
| 2774 | { |
|---|
| 2775 | GBDATA *gb_main = env.gbmain(); |
|---|
| 2776 | |
|---|
| 2777 | // create 2 non-overlapping partial species |
|---|
| 2778 | GBDATA *MucRaceP = createPartialSeqFrom(gb_main, aliname, "MucRaceP", "MucRacem", 0, 60+4); // (+4 = dots inserted into DB at left end) |
|---|
| 2779 | GBDATA *StrCoelP = createPartialSeqFrom(gb_main, aliname, "StrCoelP", "StrCoel9", 66-1+4, 184-1+4); |
|---|
| 2780 | GBDATA *StrCoelM = createPartialSeqFrom(gb_main, aliname, "StrCoelM", "StrCoel9", 66-1+4, 184-1+4); |
|---|
| 2781 | TEST_EXPECT_NO_ERROR(modifyOneBase(StrCoelM, aliname, 'Y', 'H')); // change first 'Y' into 'H' |
|---|
| 2782 | |
|---|
| 2783 | // add StrCoelP (and undo) |
|---|
| 2784 | { |
|---|
| 2785 | env.push(); |
|---|
| 2786 | // StrCoel9 and StrRamo3 do not differ in seq-range of partial -> any of both may be chosen as brother. |
|---|
| 2787 | // behavior should be changed with #605 |
|---|
| 2788 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", "prot-addPart-StrCoelP", PARSIMONY_ORG, env)); |
|---|
| 2789 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
|---|
| 2790 | env.pop(); |
|---|
| 2791 | } |
|---|
| 2792 | |
|---|
| 2793 | { |
|---|
| 2794 | env.push(); |
|---|
| 2795 | TEST_EXPECTATION(addingPartialResultsIn(MucRaceP, "MucRacem", "prot-addPart-MucRaceP", PARSIMONY_ORG, env)); // add MucRaceP |
|---|
| 2796 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
|---|
| 2797 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", "prot-addPart-MucRaceP-StrCoelP", PARSIMONY_ORG, env)); // also add StrCoelP |
|---|
| 2798 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
|---|
| 2799 | env.pop(); |
|---|
| 2800 | } |
|---|
| 2801 | |
|---|
| 2802 | // now add MucRaceP as full, then StrCoelP and StrCoelM as partials |
|---|
| 2803 | { |
|---|
| 2804 | env.push(); |
|---|
| 2805 | |
|---|
| 2806 | mark_only(MucRaceP); |
|---|
| 2807 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2808 | { |
|---|
| 2809 | GB_transaction ta(gb_main); |
|---|
| 2810 | TEST_EXPECT_NO_ERROR(GBT_write_int(MucRaceP, "ARB_partial", 0)); // revert species to "full" |
|---|
| 2811 | } |
|---|
| 2812 | |
|---|
| 2813 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "prot-addPartialAsFull-MucRaceP", PARSIMONY_ORG, env, false)); |
|---|
| 2814 | TEST_EXPECT_COMBINES_PERFORMED(env, 156); |
|---|
| 2815 | TEST_EXPECT_EQUAL(is_partial(MucRaceP), 0); // check MucRaceP was added as full sequence |
|---|
| 2816 | TEST_EXPECTATION(addedAsBrotherOf("MucRaceP", "Eukarya EF-Tu", env)); // partial created from MucRacem gets inserted next to this group |
|---|
| 2817 | // Note: looks ok. group contains MucRacem, AbdGlauc and 4 other species |
|---|
| 2818 | |
|---|
| 2819 | // add StrCoelP as partial. |
|---|
| 2820 | // as expected it is placed next to matching full sequences (does not differ in partial range) |
|---|
| 2821 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", NULp, PARSIMONY_ORG, env)); |
|---|
| 2822 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
|---|
| 2823 | |
|---|
| 2824 | // StrCoelM differs slightly in overlap with StrCoel9/StrRamo3, but has no overlap with MucRaceP |
|---|
| 2825 | // shows bug described in #609 is fixed: |
|---|
| 2826 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelM, "StrCoelP", "prot-addPart-bug609", |
|---|
| 2827 | PARSIMONY_ORG+1, // @@@ known bug - partial should not affect parsimony value; possibly related to ../HELP_SOURCE/source/pa_partial.hlp@WARNINGS |
|---|
| 2828 | env)); |
|---|
| 2829 | TEST_EXPECT_COMBINES_PERFORMED(env, 5); |
|---|
| 2830 | env.pop(); |
|---|
| 2831 | } |
|---|
| 2832 | } |
|---|
| 2833 | |
|---|
| 2834 | TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-initial"); |
|---|
| 2835 | |
|---|
| 2836 | const unsigned mixseed = 8164724; |
|---|
| 2837 | |
|---|
| 2838 | const long PARSIMONY_MIXED = PARSIMONY_ORG + 1519; |
|---|
| 2839 | const long PARSIMONY_NNI_MARKED = PARSIMONY_ORG + 1053; |
|---|
| 2840 | const long PARSIMONY_NNI_ALL = PARSIMONY_ORG; |
|---|
| 2841 | const long PARSIMONY_OPTI_MARKED = PARSIMONY_ORG; |
|---|
| 2842 | const long PARSIMONY_OPTI_ALL = PARSIMONY_ORG; // no gain (initial tree already is optimized) |
|---|
| 2843 | |
|---|
| 2844 | // ------------------------------------------------------ |
|---|
| 2845 | // mix tree (original tree already is optimized) |
|---|
| 2846 | |
|---|
| 2847 | GB_random_seed(mixseed); |
|---|
| 2848 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_MIX_TREE, "prot-mixed", PARSIMONY_MIXED, env, false)); |
|---|
| 2849 | TEST_EXPECT_COMBINES_PERFORMED(env, 89); |
|---|
| 2850 | |
|---|
| 2851 | { |
|---|
| 2852 | env.push(); |
|---|
| 2853 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_MIXED)); |
|---|
| 2854 | TEST_EXPECT_COMBINES_PERFORMED(env, 234); |
|---|
| 2855 | env.pop(); |
|---|
| 2856 | } |
|---|
| 2857 | |
|---|
| 2858 | // ------------------------------ |
|---|
| 2859 | // test optimize (some) |
|---|
| 2860 | |
|---|
| 2861 | // mark initially marked species |
|---|
| 2862 | { |
|---|
| 2863 | GB_transaction ta(env.gbmain()); |
|---|
| 2864 | |
|---|
| 2865 | GBT_restore_marked_species(env.gbmain(), "CytLyti6;StrRamo3;MucRace2;SacCere5"); |
|---|
| 2866 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2867 | } |
|---|
| 2868 | |
|---|
| 2869 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_MIXED); |
|---|
| 2870 | |
|---|
| 2871 | // test branchlength calculation |
|---|
| 2872 | // (optimizations below implicitely recalculates branchlengths) |
|---|
| 2873 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "prot-calclength", PARSIMONY_MIXED, env, false)); |
|---|
| 2874 | TEST_EXPECT_COMBINES_PERFORMED(env, 79); |
|---|
| 2875 | |
|---|
| 2876 | // test whether branchlength calculation depends on root-position |
|---|
| 2877 | { |
|---|
| 2878 | AP_tree_edge *orgRootEdge = rootEdge(); |
|---|
| 2879 | |
|---|
| 2880 | env.push(); |
|---|
| 2881 | |
|---|
| 2882 | const char *tested_roots[] = { |
|---|
| 2883 | // "CytLyti6", // no effect on branchlengths |
|---|
| 2884 | "TaxOcell", |
|---|
| 2885 | "MucRace3", |
|---|
| 2886 | "StrCoel9", |
|---|
| 2887 | }; |
|---|
| 2888 | |
|---|
| 2889 | for (size_t r = 0; r<ARRAY_ELEMS(tested_roots); ++r) { |
|---|
| 2890 | TEST_ANNOTATE(tested_roots[r]); |
|---|
| 2891 | const char *leafName = tested_roots[r]; |
|---|
| 2892 | env.root_node()->findLeafNamed(leafName)->set_root(); |
|---|
| 2893 | calc_branchlengths_and_reorder(env.graphic_tree()); |
|---|
| 2894 | orgRootEdge->set_root(); |
|---|
| 2895 | env.graphic_tree()->reorderTree(BIG_BRANCHES_TO_TOP); |
|---|
| 2896 | |
|---|
| 2897 | TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-calclength"); |
|---|
| 2898 | } |
|---|
| 2899 | TEST_EXPECT_COMBINES_PERFORMED(env, 265); |
|---|
| 2900 | |
|---|
| 2901 | env.pop(); |
|---|
| 2902 | } |
|---|
| 2903 | |
|---|
| 2904 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "prot-opti-NNI", PARSIMONY_NNI_MARKED, env, true)); // test recursive NNI |
|---|
| 2905 | TEST_EXPECT_COMBINES_PERFORMED(env, 246); |
|---|
| 2906 | |
|---|
| 2907 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "prot-opti-marked-global", PARSIMONY_OPTI_MARKED, env, true)); // test recursive NNI+KL |
|---|
| 2908 | TEST_EXPECT_COMBINES_PERFORMED(env, 2810); |
|---|
| 2909 | |
|---|
| 2910 | // ----------------------------- |
|---|
| 2911 | // test optimize (all) |
|---|
| 2912 | |
|---|
| 2913 | // mark all species |
|---|
| 2914 | mark_all(env.gbmain()); |
|---|
| 2915 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 2916 | TEST_EXPECT_EQUAL(GBT_count_marked_species(env.gbmain()), 14); |
|---|
| 2917 | |
|---|
| 2918 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_MIXED); |
|---|
| 2919 | |
|---|
| 2920 | // test branchlength calculation |
|---|
| 2921 | // (optimizations below implicitely recalculates branchlengths) |
|---|
| 2922 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "prot-calclength", PARSIMONY_MIXED, env, false)); |
|---|
| 2923 | TEST_EXPECT_COMBINES_PERFORMED(env, 79); |
|---|
| 2924 | |
|---|
| 2925 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "prot-opti-all-NNI", PARSIMONY_NNI_ALL, env, true)); // test recursive NNI |
|---|
| 2926 | TEST_EXPECT_COMBINES_PERFORMED(env, 359); |
|---|
| 2927 | |
|---|
| 2928 | { |
|---|
| 2929 | env.push(); |
|---|
| 2930 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "prot-opti-global", PARSIMONY_OPTI_ALL, env, false)); // test recursive NNI+KL |
|---|
| 2931 | TEST_EXPECT_COMBINES_PERFORMED(env, 1690); |
|---|
| 2932 | |
|---|
| 2933 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_OPTI_ALL)); |
|---|
| 2934 | TEST_EXPECT_COMBINES_PERFORMED(env, 215); |
|---|
| 2935 | env.pop(); |
|---|
| 2936 | } |
|---|
| 2937 | } |
|---|
| 2938 | |
|---|
| 2939 | void TEST_node_stack() { |
|---|
| 2940 | // test was used to fix #620 |
|---|
| 2941 | |
|---|
| 2942 | const char *aliname = "ali_5s"; |
|---|
| 2943 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname); |
|---|
| 2944 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
|---|
| 2945 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial"); |
|---|
| 2946 | |
|---|
| 2947 | const int PARSIMONY_ORG = 302; |
|---|
| 2948 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14); |
|---|
| 2949 | |
|---|
| 2950 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2951 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
|---|
| 2952 | |
|---|
| 2953 | // test set root to CytAquat + pop (works) |
|---|
| 2954 | { |
|---|
| 2955 | env.push(); |
|---|
| 2956 | env.root_node()->findLeafNamed("CytAquat")->set_root(); |
|---|
| 2957 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2958 | env.pop(); |
|---|
| 2959 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2960 | } |
|---|
| 2961 | |
|---|
| 2962 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
|---|
| 2963 | |
|---|
| 2964 | // test set root to CloButyr + pop (works) |
|---|
| 2965 | { |
|---|
| 2966 | env.push(); |
|---|
| 2967 | env.root_node()->findLeafNamed("CloButyr")->set_root(); |
|---|
| 2968 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2969 | env.pop(); |
|---|
| 2970 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2971 | } |
|---|
| 2972 | |
|---|
| 2973 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
|---|
| 2974 | |
|---|
| 2975 | // test set root to CloBifer + set root to CloTyrob + pop (works) |
|---|
| 2976 | // Note: both species are in same subtree (of root) |
|---|
| 2977 | { |
|---|
| 2978 | env.push(); |
|---|
| 2979 | |
|---|
| 2980 | env.root_node()->findLeafNamed("CloBifer")->set_root(); |
|---|
| 2981 | env.root_node()->findLeafNamed("CloTyrob")->set_root(); |
|---|
| 2982 | |
|---|
| 2983 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2984 | env.pop(); |
|---|
| 2985 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 2986 | } |
|---|
| 2987 | |
|---|
| 2988 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
|---|
| 2989 | |
|---|
| 2990 | // test set root to CytAquat + set root to CloButyr + pop (failed, fixed by [13138]) |
|---|
| 2991 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
|---|
| 2992 | for (int calcCostsBetween = 0; calcCostsBetween<2; ++calcCostsBetween) { |
|---|
| 2993 | TEST_ANNOTATE(GBS_global_string("calcCostsBetween=%i", calcCostsBetween)); |
|---|
| 2994 | |
|---|
| 2995 | TEST_EXPECT_PARSVAL(env, PARSIMONY_ORG); |
|---|
| 2996 | |
|---|
| 2997 | env.push(); |
|---|
| 2998 | |
|---|
| 2999 | env.root_node()->findLeafNamed("CytAquat")->set_root(); |
|---|
| 3000 | |
|---|
| 3001 | if (calcCostsBetween) { |
|---|
| 3002 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 2); |
|---|
| 3003 | } |
|---|
| 3004 | |
|---|
| 3005 | env.root_node()->findLeafNamed("CloButyr")->set_root(); |
|---|
| 3006 | |
|---|
| 3007 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 3008 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 6); |
|---|
| 3009 | |
|---|
| 3010 | env.pop(); |
|---|
| 3011 | |
|---|
| 3012 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
|---|
| 3013 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
|---|
| 3014 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3015 | } |
|---|
| 3016 | |
|---|
| 3017 | { |
|---|
| 3018 | env.push(); |
|---|
| 3019 | { |
|---|
| 3020 | env.push(); |
|---|
| 3021 | |
|---|
| 3022 | env.root_node()->findLeafNamed("CloInnoc")->moveNextTo(env.root_node()->findLeafNamed("CytAquat"), 0.5); |
|---|
| 3023 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3024 | env.root_node()->findLeafNamed("CloInnoc")->set_root(); |
|---|
| 3025 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3026 | env.root_node()->findLeafNamed("CytAquat")->moveNextTo(env.root_node()->findLeafNamed("CloPaste"), 0.5); |
|---|
| 3027 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3028 | env.root_node()->findLeafNamed("CloPaste")->set_root(); |
|---|
| 3029 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3030 | env.root_node()->findLeafNamed("CloPaste")->moveNextTo(env.root_node()->findLeafNamed("CloInnoc"), 0.5); |
|---|
| 3031 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3032 | |
|---|
| 3033 | { |
|---|
| 3034 | AP_tree_nlen *son_of_brother; |
|---|
| 3035 | AP_tree_nlen *brother_of_father; |
|---|
| 3036 | |
|---|
| 3037 | // COVER1: son of root -> grandson of root |
|---|
| 3038 | { |
|---|
| 3039 | AP_tree_nlen *son_of_root = env.root_node()->get_leftson(); |
|---|
| 3040 | ap_assert(son_of_root); |
|---|
| 3041 | |
|---|
| 3042 | son_of_brother = son_of_root->get_brother()->get_leftson(); |
|---|
| 3043 | son_of_root->moveNextTo(son_of_brother, 0.5); |
|---|
| 3044 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3045 | } |
|---|
| 3046 | |
|---|
| 3047 | // COVER2: grandson of root -> son of brother |
|---|
| 3048 | { |
|---|
| 3049 | AP_tree_nlen *son_of_root = env.root_node()->get_leftson(); |
|---|
| 3050 | AP_tree_nlen *grandson_of_root = son_of_root->get_brother()->get_rightson(); |
|---|
| 3051 | ap_assert(grandson_of_root); |
|---|
| 3052 | |
|---|
| 3053 | son_of_brother = grandson_of_root->get_brother()->get_leftson(); |
|---|
| 3054 | grandson_of_root->moveNextTo(son_of_brother, 0.5); |
|---|
| 3055 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3056 | } |
|---|
| 3057 | |
|---|
| 3058 | AP_tree_nlen *some_leaf = env.root_node()->findLeafNamed("CloBifer"); |
|---|
| 3059 | ap_assert(some_leaf); |
|---|
| 3060 | |
|---|
| 3061 | // COVER3: some leaf -> son of brother |
|---|
| 3062 | son_of_brother = some_leaf->get_brother()->get_leftson(); |
|---|
| 3063 | some_leaf->moveNextTo(son_of_brother, 0.5); |
|---|
| 3064 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3065 | |
|---|
| 3066 | // COVER4: some leaf -> son of brother |
|---|
| 3067 | brother_of_father = some_leaf->get_father()->get_brother(); |
|---|
| 3068 | some_leaf->moveNextTo(brother_of_father, 0.5); |
|---|
| 3069 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3070 | |
|---|
| 3071 | // test forbidden moves: |
|---|
| 3072 | TEST_EXPECT_ERROR_CONTAINS(some_leaf->cantMoveNextTo(some_leaf->get_father()), "Already there"); |
|---|
| 3073 | TEST_EXPECT_ERROR_CONTAINS(some_leaf->cantMoveNextTo(some_leaf->get_brother()), "Already there"); |
|---|
| 3074 | } |
|---|
| 3075 | |
|---|
| 3076 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG+5, 6); |
|---|
| 3077 | |
|---|
| 3078 | env.pop(); |
|---|
| 3079 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3080 | } |
|---|
| 3081 | env.pop(); |
|---|
| 3082 | |
|---|
| 3083 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
|---|
| 3084 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3085 | } |
|---|
| 3086 | |
|---|
| 3087 | // remove + quick add marked + pop() both works |
|---|
| 3088 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-add-quick", PARSIMONY_ORG-18, env, true)); // test quick-add |
|---|
| 3089 | TEST_EXPECT_COMBINES_PERFORMED(env, 400); |
|---|
| 3090 | |
|---|
| 3091 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
|---|
| 3092 | |
|---|
| 3093 | // remove + quick-add marked + pop() quick-add -> corrupts tree |
|---|
| 3094 | // (root-edge is lost) |
|---|
| 3095 | { |
|---|
| 3096 | env.push(); |
|---|
| 3097 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3098 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd) |
|---|
| 3099 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
|---|
| 3100 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3101 | |
|---|
| 3102 | TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); |
|---|
| 3103 | |
|---|
| 3104 | env.push(); |
|---|
| 3105 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3106 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd) |
|---|
| 3107 | TEST_EXPECT_COMBINES_PERFORMED(env, 400); |
|---|
| 3108 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3109 | TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); |
|---|
| 3110 | env.pop(); |
|---|
| 3111 | |
|---|
| 3112 | TEST_VALIDITY(env.root_node()->has_valid_edges()); |
|---|
| 3113 | |
|---|
| 3114 | env.pop(); |
|---|
| 3115 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3116 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
|---|
| 3117 | } |
|---|
| 3118 | |
|---|
| 3119 | // same as above, but with only 1 species marked |
|---|
| 3120 | const char *testSingle[] = { |
|---|
| 3121 | "CytAquat", // CytAquat is the only grandson of root (CytAquat located in lower subtree) |
|---|
| 3122 | "CloBifer", // two father nodes between CloBifer and root (CloBifer located in upper subtree) |
|---|
| 3123 | "CloPaste", // two father nodes between CloPaste and root (CloPaste located in upper subtree) |
|---|
| 3124 | "CorGluta", // three father nodes between CorGluta and root (CorGluta located in lower subtree) |
|---|
| 3125 | "CelBiazo", // two father nodes between CelBiazo and root |
|---|
| 3126 | NULp |
|---|
| 3127 | }; |
|---|
| 3128 | |
|---|
| 3129 | for (int i = 0; testSingle[i]; ++i) { |
|---|
| 3130 | for (int swapped = 0; swapped<2; ++swapped) { |
|---|
| 3131 | TEST_ANNOTATE(GBS_global_string("single=%s swapped=%i", testSingle[i], swapped)); |
|---|
| 3132 | |
|---|
| 3133 | env.push(); |
|---|
| 3134 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3135 | { |
|---|
| 3136 | AP_tree_nlen *old_rightson = env.root_node()->get_rightson(); |
|---|
| 3137 | env.root_node()->get_leftson()->get_rightson()->set_root(); |
|---|
| 3138 | old_rightson->get_leftson()->set_root(); |
|---|
| 3139 | old_rightson->set_root(); |
|---|
| 3140 | |
|---|
| 3141 | ap_assert(env.root_node()->get_rightson() == old_rightson); |
|---|
| 3142 | } |
|---|
| 3143 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3144 | |
|---|
| 3145 | mark_only(env.root_node()->findLeafNamed(testSingle[i])->gb_node); |
|---|
| 3146 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 3147 | |
|---|
| 3148 | env.push(); |
|---|
| 3149 | if (swapped) { |
|---|
| 3150 | env.root_node()->swap_sons(); |
|---|
| 3151 | } |
|---|
| 3152 | |
|---|
| 3153 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3154 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd) |
|---|
| 3155 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3156 | |
|---|
| 3157 | env.push(); |
|---|
| 3158 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3159 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd) |
|---|
| 3160 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3161 | env.pop(); |
|---|
| 3162 | |
|---|
| 3163 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3164 | env.pop(); |
|---|
| 3165 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3166 | env.pop(); |
|---|
| 3167 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3168 | } |
|---|
| 3169 | } |
|---|
| 3170 | TEST_EXPECT_COMBINES_PERFORMED(env, 2120); |
|---|
| 3171 | |
|---|
| 3172 | // similar to above (remove+add a grandson of root; here grandson is a subtree with 4 species) |
|---|
| 3173 | |
|---|
| 3174 | for (int remove_from_lower_subtree = 0; remove_from_lower_subtree<2; ++remove_from_lower_subtree) { |
|---|
| 3175 | TEST_ANNOTATE(GBS_global_string("remove_from_lower_subtree=%i", remove_from_lower_subtree)); |
|---|
| 3176 | |
|---|
| 3177 | // mark a complete subtree (which - as a whole - forms a grandson of root). subtree is located in upper part of the tree |
|---|
| 3178 | mark_only(env.root_node()->findLeafNamed("CloButy2")->gb_node); |
|---|
| 3179 | mark(env.root_node()->findLeafNamed("CloButyr")->gb_node); |
|---|
| 3180 | mark(env.root_node()->findLeafNamed("CloCarni")->gb_node); |
|---|
| 3181 | mark(env.root_node()->findLeafNamed("CloPaste")->gb_node); |
|---|
| 3182 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
|---|
| 3183 | |
|---|
| 3184 | env.push(); |
|---|
| 3185 | if (remove_from_lower_subtree) { |
|---|
| 3186 | env.root_node()->swap_sons(); |
|---|
| 3187 | } |
|---|
| 3188 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3189 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd) |
|---|
| 3190 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3191 | |
|---|
| 3192 | env.push(); |
|---|
| 3193 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3194 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd) |
|---|
| 3195 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3196 | env.pop(); |
|---|
| 3197 | |
|---|
| 3198 | TEST_VALIDITY(env.root_node()->has_valid_edges()); // now always valid |
|---|
| 3199 | |
|---|
| 3200 | env.pop(); |
|---|
| 3201 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3202 | } |
|---|
| 3203 | TEST_EXPECT_COMBINES_PERFORMED(env, 584); |
|---|
| 3204 | } |
|---|
| 3205 | |
|---|
| 3206 | void TEST_node_edge_resources() { |
|---|
| 3207 | const char *aliname = "ali_5s"; |
|---|
| 3208 | |
|---|
| 3209 | // document memsize of nodes and edges: |
|---|
| 3210 | |
|---|
| 3211 | #define STATE_STACK_SIZE sizeof(StateStack) // 8 (Cxx11) or 16 (older C++); maybe 4/8 in 32bit |
|---|
| 3212 | |
|---|
| 3213 | #if defined(ARB_64) |
|---|
| 3214 | TEST_EXPECT_EQUAL(sizeof(AP_tree_nlen), 184 + STATE_STACK_SIZE); |
|---|
| 3215 | TEST_EXPECT_EQUAL(sizeof(AP_tree), 136); |
|---|
| 3216 | TEST_EXPECT_EQUAL(sizeof(ARB_seqtree), 88); |
|---|
| 3217 | TEST_EXPECT_EQUAL(sizeof(TreeNode), 80); |
|---|
| 3218 | #else // !defined(ARB_64) |
|---|
| 3219 | TEST_EXPECT_EQUAL(sizeof(AP_tree_nlen), 112 + STATE_STACK_SIZE); |
|---|
| 3220 | TEST_EXPECT_EQUAL(sizeof(AP_tree), 84); |
|---|
| 3221 | TEST_EXPECT_EQUAL(sizeof(ARB_seqtree), 48); |
|---|
| 3222 | TEST_EXPECT_EQUAL(sizeof(TreeNode), 44); |
|---|
| 3223 | #endif |
|---|
| 3224 | |
|---|
| 3225 | |
|---|
| 3226 | #if defined(ARB_64) |
|---|
| 3227 | TEST_EXPECT_EQUAL(sizeof(AP_tree_edge), 64); |
|---|
| 3228 | #else // !defined(ARB_64) |
|---|
| 3229 | TEST_EXPECT_EQUAL(sizeof(AP_tree_edge), 32); |
|---|
| 3230 | #endif |
|---|
| 3231 | |
|---|
| 3232 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname); |
|---|
| 3233 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
|---|
| 3234 | |
|---|
| 3235 | const int PARSIMONY_ORG = 302; |
|---|
| 3236 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14); |
|---|
| 3237 | |
|---|
| 3238 | AP_tree_nlen *CloButyr = env.root_node()->findLeafNamed("CloButyr"); |
|---|
| 3239 | AP_tree_nlen *CloButy2 = env.root_node()->findLeafNamed("CloButy2"); |
|---|
| 3240 | TEST_EXPECT_EQUAL(CloButyr->get_brother()->name, CloButy2->name); // test they are brothers |
|---|
| 3241 | |
|---|
| 3242 | AP_tree_nlen *CorAquat = env.root_node()->findLeafNamed("CorAquat"); |
|---|
| 3243 | AP_tree_nlen *CurCitre = env.root_node()->findLeafNamed("CurCitre"); |
|---|
| 3244 | TEST_EXPECT_EQUAL(CorAquat->get_brother()->name, CurCitre->name); // test they are brothers |
|---|
| 3245 | |
|---|
| 3246 | CorAquat->REMOVE(); |
|---|
| 3247 | |
|---|
| 3248 | for (int test = 1; test<=8; ++test) { |
|---|
| 3249 | // test == 1 -> provokes common nodes+edges in revert+accept |
|---|
| 3250 | // test == 2 -> provokes common nodes+edges in revert+accept |
|---|
| 3251 | // test == 3 -> provokes common nodes+edges in revert+accept |
|---|
| 3252 | // tests 4-7 do not provoke common nodes or edges |
|---|
| 3253 | |
|---|
| 3254 | for (int mode = 0; mode<=3; ++mode) { |
|---|
| 3255 | bool accept_outer = mode&2; |
|---|
| 3256 | bool accept_inner = mode&1; |
|---|
| 3257 | |
|---|
| 3258 | TEST_ANNOTATE(GBS_global_string("accept_outer=%i accept_inner=%i (mode=%i, test=%i)", accept_outer, accept_inner, mode, test)); |
|---|
| 3259 | |
|---|
| 3260 | TEST_EXPECT_NULL(CorAquat->get_father()); |
|---|
| 3261 | TEST_EXPECT(CloButyr->get_brother() == CloButy2); |
|---|
| 3262 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3263 | |
|---|
| 3264 | env.push(); |
|---|
| 3265 | |
|---|
| 3266 | switch (test) { |
|---|
| 3267 | case 1: CorAquat->insert(CurCitre); break; |
|---|
| 3268 | case 2: CorAquat->insert(CurCitre); break; |
|---|
| 3269 | case 3: break; |
|---|
| 3270 | case 4: CloButyr->REMOVE(); break; |
|---|
| 3271 | case 5: CloButyr->REMOVE(); break; |
|---|
| 3272 | case 6: break; |
|---|
| 3273 | case 7: CloButyr->moveNextTo(CurCitre, 0.5); break; |
|---|
| 3274 | case 8: break; |
|---|
| 3275 | default: ap_assert(0); break; |
|---|
| 3276 | } |
|---|
| 3277 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3278 | |
|---|
| 3279 | { |
|---|
| 3280 | env.push(); |
|---|
| 3281 | |
|---|
| 3282 | switch (test) { |
|---|
| 3283 | case 1: CorAquat->REMOVE(); break; |
|---|
| 3284 | case 2: break; |
|---|
| 3285 | case 3: CorAquat->insert(CurCitre); break; |
|---|
| 3286 | case 4: CloButyr->insert(CloButy2); break; |
|---|
| 3287 | case 5: break; |
|---|
| 3288 | case 6: CloButyr->REMOVE(); break; |
|---|
| 3289 | case 7: CloButyr->moveNextTo(CloButy2, 0.5); break; |
|---|
| 3290 | case 8: CorAquat->insert(CurCitre); CorAquat->REMOVE(); break; |
|---|
| 3291 | default: ap_assert(0); break; |
|---|
| 3292 | } |
|---|
| 3293 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3294 | |
|---|
| 3295 | env.accept_if(accept_inner); |
|---|
| 3296 | } |
|---|
| 3297 | |
|---|
| 3298 | switch (test) { |
|---|
| 3299 | case 1: break; |
|---|
| 3300 | case 2: CorAquat->REMOVE(); break; |
|---|
| 3301 | case 3: if (CorAquat->father) CorAquat->REMOVE(); break; |
|---|
| 3302 | case 4: break; |
|---|
| 3303 | case 5: CloButyr->insert(CloButy2); break; |
|---|
| 3304 | case 6: if (!CloButyr->father) CloButyr->insert(CloButy2); break; |
|---|
| 3305 | case 7: CloButyr->REMOVE(); break; |
|---|
| 3306 | case 8: break; |
|---|
| 3307 | default: ap_assert(0); break; |
|---|
| 3308 | } |
|---|
| 3309 | TEST_EXPECT_VALID_TREE(env.root_node()); |
|---|
| 3310 | |
|---|
| 3311 | env.accept_if(accept_outer); |
|---|
| 3312 | |
|---|
| 3313 | // manually revert changes (outside any stack frame) |
|---|
| 3314 | if (CorAquat->father) CorAquat->REMOVE(); |
|---|
| 3315 | if (!CloButyr->father) CloButyr->insert(CloButy2); |
|---|
| 3316 | } |
|---|
| 3317 | } |
|---|
| 3318 | |
|---|
| 3319 | CorAquat->insert(CurCitre); |
|---|
| 3320 | |
|---|
| 3321 | TEST_EXPECT_PARSVAL(env, PARSIMONY_ORG); |
|---|
| 3322 | env.combines_performed(); // accept any no of combines |
|---|
| 3323 | } |
|---|
| 3324 | |
|---|
| 3325 | #endif // UNIT_TESTS |
|---|
| 3326 | |
|---|
| 3327 | // -------------------------------------------------------------------------------- |
|---|
| 3328 | |
|---|