1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : PARS_main.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include "PerfMeter.h" |
---|
12 | #include "pars_main.hxx" |
---|
13 | #include "pars_klprops.hxx" |
---|
14 | #include "pars_awars.h" |
---|
15 | #include "ap_tree_nlen.hxx" |
---|
16 | #include "ap_main.hxx" |
---|
17 | |
---|
18 | #include <ColumnStat.hxx> |
---|
19 | #include <gui_aliview.hxx> |
---|
20 | #include <macros.hxx> |
---|
21 | #include <nds.h> |
---|
22 | #include <TreeCallbacks.hxx> |
---|
23 | |
---|
24 | #include <aw_awars.hxx> |
---|
25 | #include <aw_preset.hxx> |
---|
26 | #include <aw_msg.hxx> |
---|
27 | #include <aw_root.hxx> |
---|
28 | #include <aw_question.hxx> |
---|
29 | |
---|
30 | #include <awt.hxx> |
---|
31 | #include <awt_sel_boxes.hxx> |
---|
32 | #include <awt_filter.hxx> |
---|
33 | #include <awt_config_manager.hxx> |
---|
34 | |
---|
35 | #include <arb_progress.h> |
---|
36 | #include <arb_misc.h> |
---|
37 | #include <arb_defs.h> |
---|
38 | #include <arb_global_defs.h> |
---|
39 | |
---|
40 | #include <ad_cb.h> |
---|
41 | |
---|
42 | #include <list> |
---|
43 | #include <map> |
---|
44 | #include <mod_rlimit.h> |
---|
45 | |
---|
46 | #if defined(DEBUG) |
---|
47 | # define TESTMENU |
---|
48 | #endif // DEBUG |
---|
49 | |
---|
50 | using namespace std; |
---|
51 | |
---|
52 | AW_HEADER_MAIN |
---|
53 | |
---|
54 | #define AWAR_COLUMNSTAT_BASE "tmp/pars/colstat" |
---|
55 | #define AWAR_COLUMNSTAT_NAME AWAR_COLUMNSTAT_BASE "/name" |
---|
56 | |
---|
57 | #define AWT_TREE_PARS(ntw) DOWNCAST(AWT_graphic_parsimony*, (ntw)->gfx) |
---|
58 | |
---|
59 | static ArbParsimony *GLOBAL_PARS = NULp; |
---|
60 | |
---|
61 | inline AWT_graphic_parsimony *global_tree() { return GLOBAL_PARS->get_tree(); } |
---|
62 | inline AP_pars_root *global_tree_root() { return global_tree()->get_tree_root(); } |
---|
63 | |
---|
64 | // waaah more globals :( |
---|
65 | AP_main *ap_main; // @@@ move into ArbParsimony? or eliminate ArbParsimony |
---|
66 | |
---|
67 | void ArbParsimony::set_tree(AWT_graphic_parsimony *tree_) { |
---|
68 | ap_assert(!tree); // only call once |
---|
69 | tree = tree_; |
---|
70 | ap_main->set_tree_root(tree); |
---|
71 | } |
---|
72 | |
---|
73 | static void set_keep_ghostnodes() { |
---|
74 | // avoid that saving tree to DB does delete removed nodes |
---|
75 | // (hack to fix #528) |
---|
76 | // see ../ARBDB/adtree.cxx@keep_ghostnodes |
---|
77 | GBDATA *gb_tree = ap_main->get_tree_root()->get_gb_tree(); |
---|
78 | GB_transaction ta(gb_tree); |
---|
79 | GBDATA *gb_keep = GB_searchOrCreate_int(gb_tree, "keep_ghostnodes", 1); |
---|
80 | ASSERT_NO_ERROR(GB_set_temporary(gb_keep)); |
---|
81 | } |
---|
82 | static void delete_kept_ghostnodes() { |
---|
83 | if (ap_main->get_graphic_tree()) { |
---|
84 | GBDATA *gb_tree = ap_main->get_tree_root()->get_gb_tree(); |
---|
85 | GB_transaction ta(gb_tree); |
---|
86 | |
---|
87 | GBDATA *gb_keep = GB_entry(gb_tree, "keep_ghostnodes"); |
---|
88 | if (gb_keep) { // e.g. wrong for quick-add species |
---|
89 | GB_ERROR error = GB_delete(gb_keep); |
---|
90 | if (!error) { |
---|
91 | if (ap_main->get_tree_root()->was_saved()) { |
---|
92 | // if tree was saved, DB may contain ghostnodes |
---|
93 | // -> save again to delete them |
---|
94 | error = global_tree()->save_to_DB(GB_get_root(gb_tree), NULp); |
---|
95 | } |
---|
96 | } |
---|
97 | if (error) aw_message(error); |
---|
98 | } |
---|
99 | } |
---|
100 | } |
---|
101 | |
---|
102 | __ATTR__NORETURN static void pars_exit(AW_window *aww) { |
---|
103 | AW_root *aw_root = aww->get_root(); |
---|
104 | shutdown_macro_recording(aw_root); |
---|
105 | |
---|
106 | ap_main->accept_all(); |
---|
107 | delete_kept_ghostnodes(); |
---|
108 | |
---|
109 | aw_root->unlink_awars_from_DB(ap_main->get_gb_main()); |
---|
110 | #if defined(DEBUG) |
---|
111 | AWT_browser_forget_db(ap_main->get_gb_main()); |
---|
112 | #endif // DEBUG |
---|
113 | delete ap_main; // closes DB |
---|
114 | ap_main = NULp; |
---|
115 | |
---|
116 | exit(EXIT_SUCCESS); |
---|
117 | } |
---|
118 | |
---|
119 | static void AP_user_push_cb(AW_window *aww) { |
---|
120 | ap_main->remember_user_state(); |
---|
121 | aww->get_root()->awar(AWAR_STACKPOINTER)->write_int(ap_main->get_user_push_counter()); |
---|
122 | } |
---|
123 | |
---|
124 | static void AP_user_pop_cb(AW_window *aww, TREE_canvas *ntw) { |
---|
125 | if (ap_main->get_user_push_counter()<=0) { |
---|
126 | aw_message("No tree on stack."); |
---|
127 | return; |
---|
128 | } |
---|
129 | |
---|
130 | AWT_auto_refresh allowed_on(ntw); |
---|
131 | ap_main->revert_user_state(); |
---|
132 | ntw->request_save(); |
---|
133 | |
---|
134 | aww->get_root()->awar(AWAR_STACKPOINTER)->write_int(ap_main->get_user_push_counter()); |
---|
135 | if (ap_main->get_user_push_counter() <= 0) { // last tree was popped => push again |
---|
136 | AP_user_push_cb(aww); |
---|
137 | } |
---|
138 | } |
---|
139 | |
---|
140 | class InsertData { |
---|
141 | bool abort_flag; |
---|
142 | arb_progress progress; |
---|
143 | |
---|
144 | public: |
---|
145 | |
---|
146 | bool quick_add_flag; |
---|
147 | InsertData(bool quick, long spec_count) |
---|
148 | : abort_flag(false), |
---|
149 | progress(GBS_global_string("Inserting %li species", spec_count), spec_count), |
---|
150 | quick_add_flag(quick) |
---|
151 | {} |
---|
152 | |
---|
153 | bool aborted() const { return abort_flag; } |
---|
154 | void set_aborted(bool aborted_) { abort_flag = aborted_; } |
---|
155 | |
---|
156 | void inc() { |
---|
157 | progress.inc(); |
---|
158 | abort_flag = progress.aborted(); |
---|
159 | } |
---|
160 | |
---|
161 | arb_progress& get_progress() { return progress; } |
---|
162 | }; |
---|
163 | |
---|
164 | |
---|
165 | static int sort_sequences_by_length(const char*, long leaf0_ptr, const char*, long leaf1_ptr) { // @@@ any chance to make this typesafe? |
---|
166 | AP_tree_nlen *leaf0 = (AP_tree_nlen*)leaf0_ptr; |
---|
167 | AP_tree_nlen *leaf1 = (AP_tree_nlen*)leaf1_ptr; |
---|
168 | |
---|
169 | AP_FLOAT len0 = leaf0->get_seq()->weighted_base_count(); |
---|
170 | AP_FLOAT len1 = leaf1->get_seq()->weighted_base_count(); |
---|
171 | |
---|
172 | // longest sequence first |
---|
173 | if (len0<len1) return 1; |
---|
174 | if (len0>len1) return -1; |
---|
175 | |
---|
176 | // if length equal -> determine order by species name (just to have a defined order!) |
---|
177 | int cmp = strcmp(leaf1->name, leaf0->name); |
---|
178 | ap_assert(cmp != 0); |
---|
179 | return cmp; |
---|
180 | } |
---|
181 | |
---|
182 | static long transform_gbd_to_leaf(const char *key, long val, void *) { |
---|
183 | if (!val) return val; |
---|
184 | |
---|
185 | // @@@ instead implement create_linked_leaf(), then use that? |
---|
186 | |
---|
187 | GBDATA *gb_node = (GBDATA *)val; |
---|
188 | AP_pars_root *troot = ap_main->get_tree_root(); |
---|
189 | AP_tree_nlen *leaf = DOWNCAST(AP_tree_nlen*, troot->makeNode()); |
---|
190 | |
---|
191 | leaf->forget_origin(); // new leaf is not part of tree yet |
---|
192 | |
---|
193 | leaf->gb_node = gb_node; |
---|
194 | leaf->name = ARB_strdup(key); |
---|
195 | leaf->markAsLeaf(); |
---|
196 | |
---|
197 | leaf->set_seq(troot->get_seqTemplate()->dup()); |
---|
198 | GB_ERROR error = leaf->get_seq()->bind_to_species(gb_node); |
---|
199 | if (!error) { |
---|
200 | if (leaf->get_seq()->weighted_base_count() < MIN_SEQUENCE_LENGTH) { |
---|
201 | error = GBS_global_string("Species %s has too short sequence (%f, minimum is %i)", |
---|
202 | key, |
---|
203 | leaf->get_seq()->weighted_base_count(), |
---|
204 | MIN_SEQUENCE_LENGTH); |
---|
205 | } |
---|
206 | } |
---|
207 | if (error) { |
---|
208 | GBT_message(gb_node, error); |
---|
209 | destroy(leaf, troot); leaf = NULp; |
---|
210 | } |
---|
211 | return (long)leaf; |
---|
212 | } |
---|
213 | |
---|
214 | typedef vector<AP_tree_nlen*> InsertedSpecies; |
---|
215 | |
---|
216 | static long toInserted(const char *, long val, void *cd_toInsert) { |
---|
217 | InsertedSpecies *toInsert = (InsertedSpecies*)cd_toInsert; |
---|
218 | AP_tree_nlen *node = (AP_tree_nlen*)val; |
---|
219 | |
---|
220 | toInsert->push_back(node); |
---|
221 | return 0; |
---|
222 | } |
---|
223 | |
---|
224 | inline int maxAllowedInsertions(int inTree) { |
---|
225 | // max. species allowed to insert (in one pass) into a tree with 'inTree' leafs |
---|
226 | return inTree/2; |
---|
227 | } |
---|
228 | inline int calcInsertNow(int toInsert, int inTree) { |
---|
229 | // calculate number of species added in next pass |
---|
230 | return std::min(toInsert, maxAllowedInsertions(inTree)); |
---|
231 | } |
---|
232 | |
---|
233 | static long calc_steps(int toInsert, int inTree) { |
---|
234 | ap_assert((toInsert+inTree) >= 2); |
---|
235 | |
---|
236 | if (!toInsert) return 0; |
---|
237 | if (!inTree) return 1 + calc_steps(toInsert-2, 2); |
---|
238 | |
---|
239 | int edges = leafs_2_edges(inTree, UNROOTED); |
---|
240 | int insertNow = calcInsertNow(toInsert, inTree); |
---|
241 | |
---|
242 | return (edges+1)*insertNow + calc_steps(toInsert-insertNow, inTree+insertNow); // +1 for final step (=actual insertion of species) |
---|
243 | } |
---|
244 | |
---|
245 | class AP_subtree { // defines a subtree |
---|
246 | AP_tree_nlen *subNode; |
---|
247 | AP_tree_nlen *upNode; |
---|
248 | |
---|
249 | bool valid() const { return subNode && upNode; } |
---|
250 | |
---|
251 | public: |
---|
252 | AP_subtree() : subNode(NULp), upNode(NULp) {} |
---|
253 | AP_subtree(AP_tree_edge *e, AP_tree_nlen *sub_node) : |
---|
254 | subNode(sub_node), |
---|
255 | upNode(e->otherNode(subNode)) |
---|
256 | {} |
---|
257 | |
---|
258 | AP_tree_edge *edgeToSubtree() const { ap_assert(valid()); return upNode->edgeTo(subNode); } |
---|
259 | AP_tree_nlen *subtreeRoot() const { return subNode; } |
---|
260 | |
---|
261 | void setSubtreeRoot(AP_tree_nlen *new_subtree) { |
---|
262 | ap_assert(upNode->edgeTo(new_subtree)); |
---|
263 | subNode = new_subtree; |
---|
264 | } |
---|
265 | }; |
---|
266 | |
---|
267 | struct EdgeBetween : private AP_subtree { |
---|
268 | // semantically same as AP_tree_edge, but survives tree-modifications which modify edges (like insert+moveNextTo/moveTo) |
---|
269 | |
---|
270 | EdgeBetween() {} |
---|
271 | EdgeBetween(AP_tree_edge *e) : AP_subtree(e, e->sonNode()) {} |
---|
272 | AP_tree_edge *find() const { return edgeToSubtree(); } |
---|
273 | }; |
---|
274 | |
---|
275 | struct BestEdge { |
---|
276 | Mutations pars; |
---|
277 | EdgeBetween between; // need to store pair of AP_tree_nlen here |
---|
278 | // (using AP_tree_edge is not stable; may move elsewhere by calls insert() or moveNextTo()!) |
---|
279 | |
---|
280 | BestEdge() : pars(-1) {} |
---|
281 | BestEdge(const EdgeBetween& betw, Mutations p) : pars(p), between(betw) {} |
---|
282 | |
---|
283 | AP_tree_edge *edge() const { return between.find(); } |
---|
284 | }; |
---|
285 | |
---|
286 | struct NodeInsertOrder { |
---|
287 | bool operator() (AP_tree_nlen *i, AP_tree_nlen *j) { return strcmp(i->name, j->name)<0; } |
---|
288 | }; |
---|
289 | |
---|
290 | typedef InsertedSpecies::const_iterator InsertSpeciesIterator; |
---|
291 | |
---|
292 | static void insert_species_into_tree(const InsertSpeciesIterator begin, const InsertSpeciesIterator end, arb_progress& progress) { |
---|
293 | typedef map<AP_tree_nlen*, BestEdge> BestEdge4Node; |
---|
294 | BestEdge4Node bestpos; |
---|
295 | |
---|
296 | ap_assert(begin != end); |
---|
297 | |
---|
298 | { |
---|
299 | ap_main->remember(); |
---|
300 | |
---|
301 | EdgeChain chain(rootEdge(), ANY_EDGE, false); |
---|
302 | ap_assert(chain.size()>0); |
---|
303 | |
---|
304 | bool speciesInserted = false; |
---|
305 | |
---|
306 | while (chain) { |
---|
307 | AP_tree_edge *edge = *chain; ++chain; |
---|
308 | edge->set_root(); |
---|
309 | |
---|
310 | EdgeBetween betweenNodes(edge); |
---|
311 | |
---|
312 | InsertSpeciesIterator curr = begin; |
---|
313 | AP_tree_nlen *species = *curr++; |
---|
314 | |
---|
315 | if (speciesInserted) { |
---|
316 | species->moveTo(edge); |
---|
317 | } |
---|
318 | else { |
---|
319 | species->insert(edge->sonNode()); // edge is root-edge -> son does not matter |
---|
320 | speciesInserted = true; |
---|
321 | } |
---|
322 | |
---|
323 | species->set_root(); // => only needs one combine when exchanging species |
---|
324 | |
---|
325 | Mutations pars = rootNode()->costs(); |
---|
326 | BestEdge4Node::iterator found = bestpos.find(species); |
---|
327 | if (found == bestpos.end() || pars<found->second.pars) { |
---|
328 | bestpos[species] = BestEdge(betweenNodes, pars); |
---|
329 | } |
---|
330 | ++progress; |
---|
331 | |
---|
332 | AP_tree_nlen *rot_node = rootNode()->get_leftson(); // rot=rest of tree |
---|
333 | if (rot_node == species) { |
---|
334 | rot_node = rot_node->get_brother(); |
---|
335 | } |
---|
336 | ap_assert(rot_node->get_brother() == species); |
---|
337 | |
---|
338 | AP_combinableSeq *rot_seq = rot_node->get_seq(); |
---|
339 | Mutations rot_costs = rot_node->stored_costs(); |
---|
340 | |
---|
341 | ap_assert(species->stored_costs() == 0); // leaf has no mutations |
---|
342 | |
---|
343 | while (1) { |
---|
344 | if (curr == end) break; |
---|
345 | |
---|
346 | AP_tree_nlen *nextSpec = *curr++; |
---|
347 | AP_combinableSeq *nextSeq = nextSpec->get_seq(); |
---|
348 | |
---|
349 | pars = nextSeq->mutations_if_combined_with(rot_seq) + rot_costs; |
---|
350 | found = bestpos.find(nextSpec); |
---|
351 | if (found == bestpos.end() || pars<found->second.pars) { |
---|
352 | bestpos[nextSpec] = BestEdge(betweenNodes, pars); |
---|
353 | } |
---|
354 | ++progress; |
---|
355 | } |
---|
356 | } |
---|
357 | |
---|
358 | ap_main->revert(); |
---|
359 | } |
---|
360 | |
---|
361 | // create insert lists for each used insert position: |
---|
362 | typedef list<AP_tree_nlen*> NodeList; |
---|
363 | typedef map<AP_tree_edge*, NodeList> NodesAtEdge; |
---|
364 | |
---|
365 | NodesAtEdge atEdge; |
---|
366 | for (InsertSpeciesIterator s = begin; s != end; ++s) { |
---|
367 | const BestEdge& best = bestpos[*s]; |
---|
368 | AP_tree_edge *edge = best.edge(); |
---|
369 | |
---|
370 | ap_assert(edge != NULp); |
---|
371 | |
---|
372 | NodesAtEdge::iterator at = atEdge.find(edge); |
---|
373 | if (at == atEdge.end()) { |
---|
374 | atEdge[edge] = NodeList(1, *s); |
---|
375 | } |
---|
376 | else { |
---|
377 | at->second.push_back(*s); |
---|
378 | } |
---|
379 | } |
---|
380 | |
---|
381 | #if defined(DEVEL_RALF) |
---|
382 | // testcode: test whether all found edges are members of the tree |
---|
383 | // (got some problem with insert/REMOVE while root is next to inserted/removed node) |
---|
384 | |
---|
385 | set<AP_tree_edge*> edgeInTree; |
---|
386 | { |
---|
387 | EdgeChain chain(rootEdge(), ANY_EDGE, false); |
---|
388 | while (chain) { |
---|
389 | AP_tree_edge *edge = *chain; ++chain; |
---|
390 | edgeInTree.insert(edge); |
---|
391 | } |
---|
392 | |
---|
393 | for (BestEdge4Node::iterator b = bestpos.begin(); b != bestpos.end(); ++b) { |
---|
394 | AP_tree_edge *e = b->second.edge(); |
---|
395 | |
---|
396 | if (edgeInTree.find(e) == edgeInTree.end()) { |
---|
397 | GBK_terminate("remembered edge has been removed from tree"); |
---|
398 | } |
---|
399 | } |
---|
400 | } |
---|
401 | #endif |
---|
402 | |
---|
403 | // build list of edges where insert takes place (value=iterator into 'atEdge') |
---|
404 | // => insert in determined order |
---|
405 | typedef list<NodesAtEdge::iterator> InsertOrder; |
---|
406 | InsertOrder insertOrder; |
---|
407 | { |
---|
408 | EdgeChain chain(rootEdge(), ANY_EDGE, false); |
---|
409 | while (chain) { |
---|
410 | AP_tree_edge *edge = *chain; ++chain; |
---|
411 | |
---|
412 | NodesAtEdge::iterator at = atEdge.find(edge); |
---|
413 | if (at != atEdge.end()) { |
---|
414 | insertOrder.push_back(at); |
---|
415 | } |
---|
416 | } |
---|
417 | } |
---|
418 | |
---|
419 | typedef list<AP_subtree> OptiList; |
---|
420 | OptiList optiPos; |
---|
421 | |
---|
422 | // insert species to tree according to insert-lists: |
---|
423 | for (InsertOrder::iterator o = insertOrder.begin(); o != insertOrder.end(); ++o) { |
---|
424 | NodesAtEdge::iterator e = *o; |
---|
425 | AP_tree_edge *edge = e->first; |
---|
426 | NodeList& nodes = e->second; |
---|
427 | |
---|
428 | edge->set_root(); |
---|
429 | |
---|
430 | AP_tree_nlen *brother = edge->sonNode(); |
---|
431 | size_t nodes_size = nodes.size(); |
---|
432 | |
---|
433 | #if defined(ASSERTION_USED) |
---|
434 | ap_assert(bestpos[nodes.front()].edge() == edge); |
---|
435 | #endif |
---|
436 | |
---|
437 | if (nodes_size == 1) { |
---|
438 | nodes.front()->insert(brother); |
---|
439 | ASSERT_VALID_TREE(rootNode()); |
---|
440 | } |
---|
441 | else { |
---|
442 | bool atLeaf = brother->is_leaf(); |
---|
443 | if (!atLeaf && edge->is_leaf_edge()) { // at leaf edge -> make sure brother points to leaf node |
---|
444 | brother = edge->notSonNode(); |
---|
445 | ap_assert(brother->is_leaf()); |
---|
446 | atLeaf = true; |
---|
447 | } |
---|
448 | |
---|
449 | #if defined(UNIT_TESTS) |
---|
450 | if (RUNNING_TEST()) { |
---|
451 | // use a determined order to insert multiple species at one position. |
---|
452 | // Does not produce "better" topologies, just makes result independent from insert order. |
---|
453 | typedef vector<AP_tree_nlen*> NodeVector; |
---|
454 | |
---|
455 | NodeVector toSort(nodes.begin(), nodes.end()); |
---|
456 | sort(toSort.begin(), toSort.end(), NodeInsertOrder()); |
---|
457 | nodes = NodeList(toSort.begin(), toSort.end()); |
---|
458 | } |
---|
459 | #endif |
---|
460 | |
---|
461 | AP_tree_nlen *at = brother; |
---|
462 | for (NodeList::iterator n = nodes.begin(); n != nodes.end(); ++n) { |
---|
463 | (*n)->insert(at); |
---|
464 | at = *n; // only insert 1st node at 'brother', insert following nodes next to previously added nodes |
---|
465 | } |
---|
466 | |
---|
467 | ASSERT_VALID_TREE(rootNode()); |
---|
468 | |
---|
469 | AP_tree_nlen *ourFather = brother->get_father(); |
---|
470 | AP_tree_nlen *addedSubtree = brother->get_brother(); // contains all added species |
---|
471 | ap_assert(addedSubtree->is_ancestor_of(at)); |
---|
472 | |
---|
473 | if (atLeaf) { |
---|
474 | // if inserted at leaf edge -> perform NNI at parent edge (i.e. including the leaf) |
---|
475 | AP_tree_edge *toRest = ourFather->nextEdge(); |
---|
476 | for (int i = 0; i<2; ++i) { |
---|
477 | AP_tree_nlen *rest = toRest->otherNode(ourFather); |
---|
478 | if (rest != brother && rest != addedSubtree) { |
---|
479 | break; |
---|
480 | } |
---|
481 | toRest = ourFather->nextEdge(toRest); |
---|
482 | } |
---|
483 | |
---|
484 | optiPos.push_back(AP_subtree(toRest, ourFather)); |
---|
485 | ap_assert(optiPos.back().subtreeRoot() == ourFather); |
---|
486 | } |
---|
487 | else { |
---|
488 | if (nodes_size>2) { // if inserted at internal edge && only 2 species inserted -> NNI makes no sense |
---|
489 | // Store (directed) edge to brother (for later optimization of subtree): |
---|
490 | AP_tree_edge *subEdge = ourFather->edgeTo(addedSubtree); |
---|
491 | optiPos.push_back(AP_subtree(subEdge, addedSubtree)); |
---|
492 | ap_assert(optiPos.back().subtreeRoot() == addedSubtree); |
---|
493 | } |
---|
494 | } |
---|
495 | } |
---|
496 | progress.inc_by(nodes_size); |
---|
497 | } |
---|
498 | |
---|
499 | // Optimize all inserts of multiple species at one position: |
---|
500 | { |
---|
501 | arb_suppress_progress suppress_child; // suppress implicit progress count caused by nni_rec |
---|
502 | |
---|
503 | AP_FLOAT curr_pars = rootNode()->costs(); |
---|
504 | AP_FLOAT prev_pars = curr_pars; |
---|
505 | |
---|
506 | int loop = 0; |
---|
507 | |
---|
508 | do { |
---|
509 | ++loop; |
---|
510 | prev_pars = curr_pars; |
---|
511 | for (OptiList::iterator op = optiPos.begin(); op != optiPos.end(); ++op) { |
---|
512 | AP_tree_edge *subtreeEdge = op->edgeToSubtree(); |
---|
513 | AP_tree_nlen *subtreeRoot = op->subtreeRoot(); |
---|
514 | |
---|
515 | subtreeEdge->set_root(); |
---|
516 | ap_assert(subtreeEdge->isConnectedTo(subtreeRoot)); |
---|
517 | AP_tree_nlen *father = subtreeEdge->otherNode(subtreeRoot); |
---|
518 | |
---|
519 | AP_FLOAT this_pars; |
---|
520 | while (1) { |
---|
521 | ap_assert(subtreeEdge->isConnectedTo(father)); // otherwise block fails |
---|
522 | this_pars = subtreeEdge->nni_rec(SKIP_LEAF_EDGES, AP_BL_NNI_ONLY, father, false); |
---|
523 | if (!(this_pars<curr_pars)) { |
---|
524 | ap_assert(!(this_pars>curr_pars)); |
---|
525 | break; |
---|
526 | } |
---|
527 | curr_pars = this_pars; |
---|
528 | } |
---|
529 | |
---|
530 | ap_assert(subtreeEdge->isConnectedTo(father)); // otherwise next command fails |
---|
531 | AP_tree_nlen *newSubtreeRoot = subtreeEdge->otherNode(father); |
---|
532 | if (newSubtreeRoot != subtreeRoot) { |
---|
533 | op->setSubtreeRoot(newSubtreeRoot); |
---|
534 | } |
---|
535 | } |
---|
536 | } |
---|
537 | while (curr_pars<prev_pars); |
---|
538 | } |
---|
539 | } |
---|
540 | |
---|
541 | static void insert_all_species_into_tree(GB_HASH*& hash) { |
---|
542 | // inserts all species (from hash) into tree |
---|
543 | |
---|
544 | AP_tree_nlen *tree = rootNode(); |
---|
545 | |
---|
546 | int inTree = tree ? tree->count_leafs() : 0; |
---|
547 | int toInsert = GBS_hash_elements(hash); |
---|
548 | |
---|
549 | ap_assert(toInsert); |
---|
550 | |
---|
551 | long steps = calc_steps(toInsert, inTree); |
---|
552 | arb_progress progress(steps); |
---|
553 | |
---|
554 | // move species to insert to a stack |
---|
555 | InsertedSpecies speciesToInsert; |
---|
556 | speciesToInsert.reserve(toInsert); |
---|
557 | |
---|
558 | if (maxAllowedInsertions(inTree)<toInsert) { |
---|
559 | // insert longest sequences first |
---|
560 | GBS_hash_do_sorted_loop(hash, toInserted, sort_sequences_by_length, &speciesToInsert); |
---|
561 | } |
---|
562 | else { |
---|
563 | // insert all sequences (order should not matter) |
---|
564 | GBS_hash_do_loop(hash, toInserted, &speciesToInsert); |
---|
565 | } |
---|
566 | GBS_free_hash(hash); |
---|
567 | hash = NULp; |
---|
568 | |
---|
569 | ap_assert(toInsert != 2); // @@@ need to test this case |
---|
570 | |
---|
571 | InsertSpeciesIterator curr = speciesToInsert.begin(); |
---|
572 | InsertSpeciesIterator end = speciesToInsert.end(); |
---|
573 | |
---|
574 | AP_tree_edge *oldRootEdge = NULp; |
---|
575 | if (!tree) { // create initial tree |
---|
576 | AP_pars_root *troot = ap_main->get_tree_root(); |
---|
577 | |
---|
578 | AP_tree_nlen *s1 = *curr++; |
---|
579 | AP_tree_nlen *s2 = *curr++; |
---|
580 | |
---|
581 | s1->initial_insert(s2, troot); |
---|
582 | |
---|
583 | inTree = 2; |
---|
584 | toInsert -= 2; |
---|
585 | |
---|
586 | ++progress; |
---|
587 | } |
---|
588 | else { |
---|
589 | oldRootEdge = rootEdge(); |
---|
590 | } |
---|
591 | |
---|
592 | ASSERT_VALID_TREE(rootNode()); |
---|
593 | |
---|
594 | while (1) { |
---|
595 | int insertNow = calcInsertNow(toInsert, inTree); |
---|
596 | ap_assert(insertNow<=toInsert); |
---|
597 | if (insertNow == toInsert) break; |
---|
598 | |
---|
599 | { |
---|
600 | InsertSpeciesIterator partEnd = curr; |
---|
601 | advance(partEnd, insertNow); |
---|
602 | |
---|
603 | insert_species_into_tree(curr, partEnd, progress); |
---|
604 | curr = partEnd; |
---|
605 | } |
---|
606 | |
---|
607 | toInsert -= insertNow; |
---|
608 | inTree += insertNow; |
---|
609 | } |
---|
610 | |
---|
611 | insert_species_into_tree(curr, end, progress); |
---|
612 | |
---|
613 | if (oldRootEdge) oldRootEdge->set_root(); // set root back to old position |
---|
614 | } |
---|
615 | |
---|
616 | enum AddWhat { |
---|
617 | NT_ADD_MARKED, |
---|
618 | NT_ADD_SELECTED, |
---|
619 | }; |
---|
620 | |
---|
621 | static void nt_add(AWT_graphic_parsimony *agt, AddWhat what, bool quick) { |
---|
622 | GB_ERROR error = NULp; |
---|
623 | |
---|
624 | AP_tree *oldrootleft = NULp; |
---|
625 | AP_tree *oldrootright = NULp; |
---|
626 | { |
---|
627 | AP_tree_nlen *root = rootNode(); |
---|
628 | if (root) { |
---|
629 | root->reset_subtree_layout(); |
---|
630 | oldrootleft = root->get_leftson(); |
---|
631 | oldrootright = root->get_rightson(); |
---|
632 | } |
---|
633 | } |
---|
634 | |
---|
635 | GB_HASH *hash = NULp; |
---|
636 | GBDATA *gb_main = agt->get_gbmain(); |
---|
637 | { |
---|
638 | GB_transaction ta(gb_main); |
---|
639 | switch (what) { |
---|
640 | case NT_ADD_SELECTED: { |
---|
641 | char *name = GBT_readOrCreate_string(gb_main, AWAR_SPECIES_NAME, ""); |
---|
642 | if (name && strlen(name)) { |
---|
643 | GBDATA *gb_species = GBT_find_species(gb_main, name); |
---|
644 | if (gb_species) { |
---|
645 | hash = GBS_create_hash(1, GB_MIND_CASE); |
---|
646 | GBS_write_hash(hash, name, (long)gb_species); |
---|
647 | } |
---|
648 | else error = GBS_global_string("Selected Species (%s) not found", name); |
---|
649 | } |
---|
650 | else error = "Please select a species"; |
---|
651 | free(name); |
---|
652 | break; |
---|
653 | } |
---|
654 | case NT_ADD_MARKED: { |
---|
655 | hash = GBT_create_marked_species_hash(gb_main); |
---|
656 | break; |
---|
657 | } |
---|
658 | } |
---|
659 | } |
---|
660 | |
---|
661 | if (!error) { |
---|
662 | ap_assert(hash); |
---|
663 | |
---|
664 | arb_progress progress(quick ? "Quick add" : "Add + NNI"); |
---|
665 | |
---|
666 | NT_remove_species_in_tree_from_hash(rootNode(), hash); |
---|
667 | |
---|
668 | size_t species_count = GBS_hash_elements(hash); |
---|
669 | InsertPerfMeter insertPerf("(quick-)add", species_count); |
---|
670 | |
---|
671 | { |
---|
672 | GB_transaction ta(gb_main); |
---|
673 | GBS_hash_do_loop(hash, transform_gbd_to_leaf, NULp); |
---|
674 | } |
---|
675 | { |
---|
676 | size_t skipped = species_count - GBS_hash_elements(hash); |
---|
677 | if (skipped) { |
---|
678 | GBT_message(gb_main, GBS_global_string("Skipped %zu species (no data?)", skipped)); |
---|
679 | } |
---|
680 | } |
---|
681 | if (GBS_hash_elements(hash)) { |
---|
682 | insert_all_species_into_tree(hash); |
---|
683 | } |
---|
684 | else { |
---|
685 | GBT_message(gb_main, "No species (left) to insert"); |
---|
686 | } |
---|
687 | |
---|
688 | if (rootNode()) { |
---|
689 | if (oldrootleft) { |
---|
690 | if (oldrootleft->father == oldrootright) oldrootleft->set_root(); |
---|
691 | else oldrootright->set_root(); |
---|
692 | } |
---|
693 | else { |
---|
694 | ARB_edge innermost = rootNode()->get_tree_root()->find_innermost_edge(); |
---|
695 | innermost.set_root(); |
---|
696 | } |
---|
697 | |
---|
698 | if (!quick) { |
---|
699 | arb_suppress_progress quiet; // @@@ use weighted progress (#789) for loop below |
---|
700 | |
---|
701 | Mutations pars_prev = rootNode()->costs(); |
---|
702 | rootNode()->compute_tree(); // see AP_tree_edge.cxx@flags_broken_by_moveNextTo |
---|
703 | progress.subtitle("local optimize (repeated NNI)"); |
---|
704 | while (1) { |
---|
705 | rootEdge()->nni_rec(EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_LEAF_EDGES), AP_BL_NNI_ONLY, NULp, true); |
---|
706 | Mutations pars_curr = rootNode()->costs(); |
---|
707 | if (pars_curr == pars_prev) break; |
---|
708 | ap_assert(pars_curr<pars_prev); |
---|
709 | pars_prev = pars_curr; |
---|
710 | } |
---|
711 | } |
---|
712 | |
---|
713 | { |
---|
714 | arb_suppress_progress ignore; |
---|
715 | rootEdge()->calc_branchlengths(); |
---|
716 | } |
---|
717 | |
---|
718 | ASSERT_VALID_TREE(rootNode()); |
---|
719 | rootNode()->compute_tree(); |
---|
720 | } |
---|
721 | else { |
---|
722 | error = "Tree lost (no leafs left)"; |
---|
723 | } |
---|
724 | |
---|
725 | insertPerf.dump(stdout); |
---|
726 | } |
---|
727 | |
---|
728 | if (hash) GBS_free_hash(hash); |
---|
729 | if (error) aw_message(error); |
---|
730 | |
---|
731 | // @@@ quick-add w/o NNI should sort according to original tree |
---|
732 | agt->reorderTree(BIG_BRANCHES_TO_TOP); |
---|
733 | } |
---|
734 | |
---|
735 | // ------------------------------------------ |
---|
736 | // Adding partial sequences to tree |
---|
737 | |
---|
738 | class PartialSequence { |
---|
739 | GBDATA *gb_species; |
---|
740 | mutable AP_tree_nlen *self; // self converted to leaf (ready for insertion) |
---|
741 | const AP_tree_nlen *best_full_match; // full sequence position which matched best |
---|
742 | long overlap; // size of overlapping region |
---|
743 | long penalty; // weighted mismatches |
---|
744 | bool released; |
---|
745 | bool multi_match; |
---|
746 | string multi_list; // list of equal-rated insertion-points (not containing self) |
---|
747 | |
---|
748 | AP_tree_nlen *get_self() const { |
---|
749 | if (!self) { |
---|
750 | ap_assert(!released); // request not possible, because leaf has already been released! |
---|
751 | |
---|
752 | self = (AP_tree_nlen*)transform_gbd_to_leaf(GBT_get_name_or_description(gb_species), (long)gb_species, NULp); |
---|
753 | ap_assert(self); |
---|
754 | } |
---|
755 | return self; |
---|
756 | } |
---|
757 | |
---|
758 | public: |
---|
759 | PartialSequence(GBDATA *gb_species_) : |
---|
760 | gb_species(gb_species_), |
---|
761 | self(NULp), |
---|
762 | best_full_match(NULp), |
---|
763 | overlap(0), |
---|
764 | penalty(LONG_MAX), |
---|
765 | released(false), |
---|
766 | multi_match(false) |
---|
767 | {} |
---|
768 | PartialSequence(const PartialSequence& other) |
---|
769 | : gb_species(other.gb_species), |
---|
770 | self(other.self), |
---|
771 | best_full_match(other.best_full_match), |
---|
772 | overlap(other.overlap), |
---|
773 | penalty(other.penalty), |
---|
774 | released(other.released), |
---|
775 | multi_match(other.multi_match), |
---|
776 | multi_list(other.multi_list) |
---|
777 | { |
---|
778 | ap_assert(!self); // copying self not implemented |
---|
779 | } |
---|
780 | DECLARE_ASSIGNMENT_OPERATOR(PartialSequence); |
---|
781 | ~PartialSequence() { ap_assert(!self); } |
---|
782 | |
---|
783 | GBDATA *get_species() const { return gb_species; } |
---|
784 | const AP_tree_nlen *get_best_match() const { return best_full_match; } |
---|
785 | AP_FLOAT get_branchlength() const { return AP_FLOAT(penalty)/overlap; } |
---|
786 | void test_match(const AP_tree_nlen *leaf_full); |
---|
787 | bool is_multi_match() const { return multi_match; } |
---|
788 | |
---|
789 | const char *get_name() const { |
---|
790 | const char *name = get_self()->name; |
---|
791 | ap_assert(name); |
---|
792 | return name; |
---|
793 | } |
---|
794 | |
---|
795 | string get_multilist() const { |
---|
796 | ap_assert(is_multi_match()); |
---|
797 | return string(best_full_match->name)+multi_list; |
---|
798 | } |
---|
799 | |
---|
800 | AP_tree_nlen *release() { |
---|
801 | AP_tree_nlen *s = self; |
---|
802 | self = NULp; |
---|
803 | released = true; |
---|
804 | return s; |
---|
805 | } |
---|
806 | |
---|
807 | void dump(const char *whichMatch) const { |
---|
808 | ap_assert(best_full_match); |
---|
809 | printf("%s match for '%s' is '%s' (overlap=%li penalty=%li)\n", |
---|
810 | whichMatch, get_name(), best_full_match->name, |
---|
811 | overlap, penalty); |
---|
812 | } |
---|
813 | |
---|
814 | }; |
---|
815 | |
---|
816 | void PartialSequence::test_match(const AP_tree_nlen *leaf_full) { |
---|
817 | long curr_overlap; |
---|
818 | long curr_penalty; |
---|
819 | |
---|
820 | leaf_full->get_seq()->partial_match(get_self()->get_seq(), &curr_overlap, &curr_penalty); |
---|
821 | |
---|
822 | bool better = false; |
---|
823 | |
---|
824 | if (curr_overlap > overlap) { |
---|
825 | better = true; |
---|
826 | } |
---|
827 | else if (curr_overlap == overlap) { |
---|
828 | if (curr_penalty<penalty) { |
---|
829 | better = true; |
---|
830 | } |
---|
831 | else if (curr_penalty == penalty) { |
---|
832 | // found two equal-rated insertion points -> store data for warning |
---|
833 | #if defined(DEBUG) |
---|
834 | if (!multi_match) dump("better"); |
---|
835 | printf("Another equal match is against '%s' (overlap=%li penalty=%li)\n", leaf_full->name, curr_overlap, curr_penalty); |
---|
836 | #endif // DEBUG |
---|
837 | |
---|
838 | multi_match = true; |
---|
839 | multi_list.append(1, '/'); |
---|
840 | multi_list.append(leaf_full->name); |
---|
841 | } |
---|
842 | } |
---|
843 | |
---|
844 | if (better) { |
---|
845 | overlap = curr_overlap; |
---|
846 | penalty = curr_penalty; |
---|
847 | best_full_match = leaf_full; |
---|
848 | multi_match = false; |
---|
849 | multi_list = ""; |
---|
850 | |
---|
851 | #if defined(DEBUG) |
---|
852 | dump("better"); |
---|
853 | #endif |
---|
854 | } |
---|
855 | #if defined(DEBUG) |
---|
856 | else if (!multi_match) { |
---|
857 | printf("Worse match against '%s' (overlap=%li penalty=%li)\n", leaf_full->name, curr_overlap, curr_penalty); |
---|
858 | } |
---|
859 | #endif |
---|
860 | } |
---|
861 | |
---|
862 | static GB_ERROR nt_best_partial_match_rec(list<PartialSequence>& partial, const AP_tree_nlen *tree) { |
---|
863 | GB_ERROR error = NULp; |
---|
864 | |
---|
865 | if (tree) { |
---|
866 | if (tree->is_leaf() && tree->name) { |
---|
867 | if (tree->gb_node) { |
---|
868 | int is_partial = GBT_is_partial(tree->gb_node, 0, true); // marks undef as 'full sequence' |
---|
869 | if (is_partial == 0) { // do not consider other partial sequences |
---|
870 | list<PartialSequence>::iterator i = partial.begin(); |
---|
871 | list<PartialSequence>::iterator e = partial.end(); |
---|
872 | for (; i != e; ++i) { |
---|
873 | i->test_match(tree); |
---|
874 | } |
---|
875 | } |
---|
876 | else if (is_partial == -1) { |
---|
877 | error = GB_await_error(); |
---|
878 | } |
---|
879 | } |
---|
880 | } |
---|
881 | else { |
---|
882 | error = nt_best_partial_match_rec(partial, tree->get_leftson()); |
---|
883 | if (!error) error = nt_best_partial_match_rec(partial, tree->get_rightson()); |
---|
884 | } |
---|
885 | } |
---|
886 | return error; |
---|
887 | } |
---|
888 | |
---|
889 | static void count_partial_and_full(const AP_tree_nlen *at, int *partial, int *full, int *zombies, int default_value, bool define_if_undef) { |
---|
890 | if (at->is_leaf()) { |
---|
891 | if (at->gb_node) { |
---|
892 | int is_partial = GBT_is_partial(at->gb_node, default_value, define_if_undef); |
---|
893 | if (is_partial) ++(*partial); |
---|
894 | else ++(*full); |
---|
895 | } |
---|
896 | else { |
---|
897 | ++(*zombies); |
---|
898 | } |
---|
899 | } |
---|
900 | else { |
---|
901 | count_partial_and_full(at->get_leftson(), partial, full, zombies, default_value, define_if_undef); |
---|
902 | count_partial_and_full(at->get_rightson(), partial, full, zombies, default_value, define_if_undef); |
---|
903 | } |
---|
904 | } |
---|
905 | |
---|
906 | static const AP_tree_nlen *find_least_deep_leaf(const AP_tree_nlen *at, int depth, int *min_depth) { |
---|
907 | if (depth >= *min_depth) { |
---|
908 | return NULp; // already found better or equal |
---|
909 | } |
---|
910 | |
---|
911 | if (at->is_leaf()) { |
---|
912 | if (at->gb_node) { |
---|
913 | *min_depth = depth; |
---|
914 | return at; |
---|
915 | } |
---|
916 | return NULp; |
---|
917 | } |
---|
918 | |
---|
919 | const AP_tree_nlen *left = find_least_deep_leaf(at->get_leftson(), depth+1, min_depth); |
---|
920 | const AP_tree_nlen *right = find_least_deep_leaf(at->get_rightson(), depth+1, min_depth); |
---|
921 | |
---|
922 | return right ? right : left; |
---|
923 | } |
---|
924 | inline AP_tree_nlen *find_least_deep_leaf(AP_tree_nlen *at, int depth, int *min_depth) { |
---|
925 | return const_cast<AP_tree_nlen*>(find_least_deep_leaf(const_cast<const AP_tree_nlen*>(at), depth, min_depth)); |
---|
926 | } |
---|
927 | |
---|
928 | static void push_partial(const char *, long val, void *cd_partial) { |
---|
929 | list<PartialSequence> *partial = reinterpret_cast<list<PartialSequence> *>(cd_partial); |
---|
930 | partial->push_back(PartialSequence((GBDATA*)val)); |
---|
931 | } |
---|
932 | |
---|
933 | // ------------------------------- |
---|
934 | // Add Partial sequences |
---|
935 | |
---|
936 | static void nt_add_partial(AWT_graphic_parsimony *agt) { |
---|
937 | GB_ERROR error = NULp; |
---|
938 | GBDATA *gb_main = agt->get_gbmain(); |
---|
939 | |
---|
940 | GB_begin_transaction(gb_main); |
---|
941 | |
---|
942 | int full_marked_sequences = 0; |
---|
943 | |
---|
944 | arb_progress part_add_progress("Adding partial sequences"); |
---|
945 | |
---|
946 | { |
---|
947 | list<PartialSequence> partial; |
---|
948 | { |
---|
949 | GB_HASH *partial_hash = GBS_create_hash(GBT_get_species_count(gb_main), GB_MIND_CASE); |
---|
950 | |
---|
951 | int marked_found = 0; |
---|
952 | int partial_marked_sequences = 0; |
---|
953 | int no_data = 0; // no data in alignment |
---|
954 | |
---|
955 | for (GBDATA *gb_marked = GBT_first_marked_species(gb_main); |
---|
956 | !error && gb_marked; |
---|
957 | gb_marked = GBT_next_marked_species(gb_marked)) |
---|
958 | { |
---|
959 | ++marked_found; |
---|
960 | |
---|
961 | if (GBT_find_sequence(gb_marked, ap_main->get_aliname())) { // species has sequence in alignment |
---|
962 | const char *name = GBT_get_name_or_description(gb_marked); |
---|
963 | |
---|
964 | switch (GBT_is_partial(gb_marked, 1, true)) { // marks undef as 'partial sequence' |
---|
965 | case 0: { // full sequences |
---|
966 | GBT_message(gb_main, GBS_global_string("'%s' is a full sequence (cannot add partial)", name)); |
---|
967 | ++full_marked_sequences; |
---|
968 | break; |
---|
969 | } |
---|
970 | case 1: // partial sequences |
---|
971 | ++partial_marked_sequences; |
---|
972 | GBS_write_hash(partial_hash, name, (long)gb_marked); |
---|
973 | break; |
---|
974 | case -1: // error |
---|
975 | error = GB_await_error(); |
---|
976 | break; |
---|
977 | default: |
---|
978 | ap_assert(0); |
---|
979 | break; |
---|
980 | } |
---|
981 | } |
---|
982 | else { |
---|
983 | no_data++; |
---|
984 | } |
---|
985 | } |
---|
986 | |
---|
987 | if (!error && !marked_found) error = "There are no marked species"; |
---|
988 | |
---|
989 | if (!error) { |
---|
990 | NT_remove_species_in_tree_from_hash(rootNode(), partial_hash); // skip all species which are in tree |
---|
991 | GBS_hash_do_const_loop(partial_hash, push_partial, &partial); // build partial list from hash |
---|
992 | |
---|
993 | int partials_already_in_tree = partial_marked_sequences - partial.size(); |
---|
994 | |
---|
995 | if (no_data>0) GBT_message(gb_main, GBS_global_string("%i marked species have no data in '%s'", no_data, ap_main->get_aliname())); |
---|
996 | if (full_marked_sequences>0) GBT_message(gb_main, GBS_global_string("%i marked species are declared full sequences", full_marked_sequences)); |
---|
997 | if (partials_already_in_tree>0) GBT_message(gb_main, GBS_global_string("%i marked species are already in tree", partials_already_in_tree)); |
---|
998 | |
---|
999 | if (partial.empty()) error = "No species left to add"; |
---|
1000 | } |
---|
1001 | |
---|
1002 | GBS_free_hash(partial_hash); |
---|
1003 | } |
---|
1004 | |
---|
1005 | if (!error) error = GBT_add_new_changekey(gb_main, "ARB_partial", GB_INT); |
---|
1006 | |
---|
1007 | if (!error) { |
---|
1008 | rootNode()->reset_subtree_layout(); |
---|
1009 | |
---|
1010 | // find best matching full sequence for each partial sequence |
---|
1011 | error = nt_best_partial_match_rec(partial, rootNode()); |
---|
1012 | |
---|
1013 | list<PartialSequence>::iterator i = partial.begin(); |
---|
1014 | list<PartialSequence>::iterator e = partial.end(); |
---|
1015 | |
---|
1016 | arb_progress part_insert_progress(partial.size()); |
---|
1017 | |
---|
1018 | #if defined(DEBUG) |
---|
1019 | // show results : |
---|
1020 | for (; i != e; ++i) i->dump("best"); |
---|
1021 | i = partial.begin(); |
---|
1022 | #endif // DEBUG |
---|
1023 | |
---|
1024 | for (; i != e && !error; ++i) { |
---|
1025 | const char *name = i->get_name(); |
---|
1026 | |
---|
1027 | if (i->is_multi_match()) { |
---|
1028 | GBT_message(gb_main, GBS_global_string("Insertion of '%s' is ambiguous.\n" |
---|
1029 | "(took first of equal scored insertion points: %s)", |
---|
1030 | name, i->get_multilist().c_str())); |
---|
1031 | } |
---|
1032 | |
---|
1033 | AP_tree_nlen *part_leaf = i->release(); |
---|
1034 | AP_tree_nlen *full_seq = const_cast<AP_tree_nlen*>(i->get_best_match()); |
---|
1035 | AP_tree_nlen *brother = full_seq->get_brother(); |
---|
1036 | int is_partial = 0; |
---|
1037 | AP_tree_nlen *target = NULp; |
---|
1038 | |
---|
1039 | if (brother->is_leaf()) { |
---|
1040 | if (brother->gb_node) { |
---|
1041 | is_partial = GBT_is_partial(brother->gb_node, 0, true); |
---|
1042 | |
---|
1043 | if (is_partial) { // brother is partial sequence |
---|
1044 | target = brother; // insert as brother of brother |
---|
1045 | } |
---|
1046 | else { |
---|
1047 | target = full_seq; // insert as brother of full_seq |
---|
1048 | } |
---|
1049 | } |
---|
1050 | else { |
---|
1051 | error = "There are zombies in your tree - please remove them"; |
---|
1052 | } |
---|
1053 | } |
---|
1054 | else { |
---|
1055 | int partial_count = 0; |
---|
1056 | int full_count = 0; |
---|
1057 | int zombie_count = 0; |
---|
1058 | |
---|
1059 | count_partial_and_full(brother, &partial_count, &full_count, &zombie_count, 0, true); |
---|
1060 | |
---|
1061 | if (zombie_count) { |
---|
1062 | error = "There are zombies in your tree - please remove them"; |
---|
1063 | } |
---|
1064 | else if (full_count) { |
---|
1065 | // brother is a subtree containing full sequences |
---|
1066 | // -> add new brother to full_seq found above |
---|
1067 | target = full_seq; |
---|
1068 | } |
---|
1069 | else { // brother subtree only contains partial sequences |
---|
1070 | // find one of the least-deep leafs |
---|
1071 | int depth = INT_MAX; |
---|
1072 | target = find_least_deep_leaf(brother, 0, &depth); |
---|
1073 | is_partial = 1; |
---|
1074 | } |
---|
1075 | } |
---|
1076 | |
---|
1077 | |
---|
1078 | if (!error) { |
---|
1079 | #if defined(DEBUG) |
---|
1080 | printf("inserting '%s'\n", name); |
---|
1081 | #endif // DEBUG |
---|
1082 | part_leaf->insert(target); |
---|
1083 | |
---|
1084 | // we need to create the sequence of the father node! |
---|
1085 | AP_tree_nlen *father = part_leaf->get_father(); |
---|
1086 | father->costs(); |
---|
1087 | |
---|
1088 | // ensure full-sequence is always on top |
---|
1089 | if (father->rightson == target) { |
---|
1090 | father->swap_sons(); |
---|
1091 | } |
---|
1092 | |
---|
1093 | if (!error) { // now correct the branch lengths modified by insert() |
---|
1094 | // calc the original branchlen (of target leaf branch) |
---|
1095 | GBT_LEN orglen = father->get_branchlength()+target->get_branchlength(); |
---|
1096 | |
---|
1097 | if (is_partial) { // we have a subtree of partial sequences |
---|
1098 | target->set_branchlength(orglen); // restore original branchlength |
---|
1099 | father->set_branchlength(0); // all father branches are zero length |
---|
1100 | } |
---|
1101 | else { // we have a subtree of one full+one partial sequence |
---|
1102 | ap_assert(full_seq->get_father() == father); |
---|
1103 | |
---|
1104 | father->set_branchlength(orglen); // father branch represents original length (w/o partial seq) |
---|
1105 | full_seq->set_branchlength(0); // full seq has no sub-branch length |
---|
1106 | } |
---|
1107 | part_leaf->set_branchlength(i->get_branchlength()); |
---|
1108 | printf("Adding with branchlength=%f\n", i->get_branchlength()); |
---|
1109 | } |
---|
1110 | } |
---|
1111 | else { |
---|
1112 | destroy(part_leaf); |
---|
1113 | } |
---|
1114 | |
---|
1115 | part_insert_progress.inc_and_check_user_abort(error); |
---|
1116 | } |
---|
1117 | } |
---|
1118 | } |
---|
1119 | |
---|
1120 | if (full_marked_sequences) { |
---|
1121 | GBT_message(gb_main, GBS_global_string("%i marked full sequences were not added", full_marked_sequences)); |
---|
1122 | } |
---|
1123 | |
---|
1124 | if (error) { |
---|
1125 | GBT_message(gb_main, error); |
---|
1126 | GB_abort_transaction(gb_main); |
---|
1127 | } |
---|
1128 | else { |
---|
1129 | GB_commit_transaction(gb_main); |
---|
1130 | agt->exports.request_save(); |
---|
1131 | } |
---|
1132 | } |
---|
1133 | |
---|
1134 | static void NT_add_partial_and_update(UNFIXED, TREE_canvas *ntw) { |
---|
1135 | AWT_auto_refresh allowed_on(ntw); |
---|
1136 | nt_add_partial(AWT_TREE_PARS(ntw)); |
---|
1137 | } |
---|
1138 | |
---|
1139 | // ------------------------------- |
---|
1140 | // add marked / selected |
---|
1141 | |
---|
1142 | static void nt_add_and_update(AWT_canvas *ntw, AddWhat what, bool quick) { |
---|
1143 | AWT_auto_refresh allowed_on(ntw); |
---|
1144 | nt_add(AWT_TREE_PARS(ntw), what, quick); |
---|
1145 | } |
---|
1146 | |
---|
1147 | static void NT_add_and_NNI(UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_add_and_update(ntw, what, false); } |
---|
1148 | static void NT_add_quick (UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_add_and_update(ntw, what, true); } |
---|
1149 | |
---|
1150 | // ------------------------------------------ |
---|
1151 | // remove and add marked / selected |
---|
1152 | |
---|
1153 | static void nt_reAdd(AWT_graphic_parsimony *agt, AddWhat what, bool quick) { |
---|
1154 | if (agt->get_root_node()) { |
---|
1155 | ap_assert(what == NT_ADD_MARKED); // code below will misbehave for NT_ADD_SELECTED |
---|
1156 | agt->get_tree_root()->remove_leafs(AWT_REMOVE_MARKED); |
---|
1157 | nt_add(agt, what, quick); |
---|
1158 | } |
---|
1159 | } |
---|
1160 | |
---|
1161 | static void nt_reAdd_and_update(AWT_canvas *ntw, AddWhat what, bool quick) { |
---|
1162 | AWT_auto_refresh allowed_on(ntw); |
---|
1163 | nt_reAdd(AWT_TREE_PARS(ntw), what, quick); |
---|
1164 | } |
---|
1165 | |
---|
1166 | static void NT_reAdd_and_NNI(UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_reAdd_and_update(ntw, what, false); } |
---|
1167 | static void NT_reAdd_quick (UNFIXED, TREE_canvas *ntw, AddWhat what) { nt_reAdd_and_update(ntw, what, true); } |
---|
1168 | |
---|
1169 | // -------------------------------------------------------------------------------- |
---|
1170 | |
---|
1171 | static void calc_branchlengths_and_reorder(AWT_graphic_parsimony *agt) { |
---|
1172 | arb_progress progress("Calculating branchlengths"); |
---|
1173 | rootEdge()->calc_branchlengths(); |
---|
1174 | agt->reorderTree(BIG_BRANCHES_TO_TOP); |
---|
1175 | } |
---|
1176 | |
---|
1177 | static void NT_calc_branchlengths_reorder_and_update(AW_window *, TREE_canvas *ntw) { |
---|
1178 | AWT_auto_refresh allowed_on(ntw); |
---|
1179 | calc_branchlengths_and_reorder(AWT_TREE_PARS(ntw)); |
---|
1180 | } |
---|
1181 | |
---|
1182 | static void NT_bootstrap(AW_window *, TREE_canvas *ntw, bool limit_only) { |
---|
1183 | arb_progress progress("Calculating bootstrap limit"); |
---|
1184 | AWT_auto_refresh allowed_on(ntw); |
---|
1185 | AP_BL_MODE mode = AP_BL_MODE((limit_only ? AP_BL_BOOTSTRAP_LIMIT : AP_BL_BOOTSTRAP_ESTIMATE)|AP_BL_BL_ONLY); |
---|
1186 | |
---|
1187 | rootEdge()->nni_rec(ANY_EDGE, mode, NULp, true); |
---|
1188 | AWT_graphic_tree *agt = AWT_TREE(ntw); |
---|
1189 | agt->reorderTree(BIG_BRANCHES_TO_TOP); |
---|
1190 | agt->set_logical_root_to(agt->get_root_node()); |
---|
1191 | } |
---|
1192 | |
---|
1193 | static void optimizeTree(AWT_graphic_parsimony *agt, const KL_Settings& settings) { |
---|
1194 | arb_progress progress("Optimizing tree"); |
---|
1195 | agt->get_parsimony().optimize_tree(rootNode(), settings, progress); |
---|
1196 | ASSERT_VALID_TREE(rootNode()); |
---|
1197 | calc_branchlengths_and_reorder(agt); |
---|
1198 | } |
---|
1199 | static void NT_optimize(AW_window *, TREE_canvas *ntw) { |
---|
1200 | AWT_auto_refresh allowed_on(ntw); |
---|
1201 | optimizeTree(AWT_TREE_PARS(ntw), KL_Settings(ntw->awr)); |
---|
1202 | } |
---|
1203 | |
---|
1204 | static void recursiveNNI(AWT_graphic_parsimony *agt, EdgeSpec whichEdges) { |
---|
1205 | arb_progress progress("Recursive NNI"); |
---|
1206 | Mutations orgPars = rootNode()->costs(); |
---|
1207 | Mutations prevPars = orgPars; |
---|
1208 | progress.subtitle(GBS_global_string("best=%li", orgPars)); |
---|
1209 | |
---|
1210 | { |
---|
1211 | arb_suppress_progress quiet; // @@@ use weighted progress (#789) for loop below |
---|
1212 | |
---|
1213 | while (!progress.aborted()) { |
---|
1214 | Mutations currPars = rootEdge()->nni_rec(whichEdges, AP_BL_NNI_ONLY, NULp, true); |
---|
1215 | if (currPars == prevPars) break; // no improvement -> abort |
---|
1216 | progress.subtitle(GBS_global_string("best=%li (gain=%li)", currPars, orgPars-currPars)); |
---|
1217 | prevPars = currPars; |
---|
1218 | } |
---|
1219 | calc_branchlengths_and_reorder(agt); |
---|
1220 | } |
---|
1221 | } |
---|
1222 | |
---|
1223 | static void NT_recursiveNNI(AW_window *, TREE_canvas *ntw) { |
---|
1224 | AWT_auto_refresh allowed_on(ntw); |
---|
1225 | EdgeSpec whichEdges = KL_Settings(ntw->awr).whichEdges; |
---|
1226 | recursiveNNI(AWT_TREE_PARS(ntw), whichEdges); |
---|
1227 | } |
---|
1228 | |
---|
1229 | static int calculate_default_random_repeat(long leafs) { |
---|
1230 | double balanced_depth = log10(leafs) / log10(2); |
---|
1231 | int repeat = int(balanced_depth*2.0 + .5); |
---|
1232 | if (repeat<1) repeat = 1; |
---|
1233 | return repeat; |
---|
1234 | } |
---|
1235 | |
---|
1236 | static void update_random_repeat(AW_root *awr, AWT_graphic_parsimony *agt) { |
---|
1237 | long leafs = agt->get_root_node()->count_leafs(); |
---|
1238 | int repeat = calculate_default_random_repeat(leafs); |
---|
1239 | awr->awar(AWAR_RAND_REPEAT)->write_int(repeat); |
---|
1240 | } |
---|
1241 | |
---|
1242 | static void mixtree_and_calclengths(AWT_graphic_parsimony *agt, int repeat, int percent, EdgeSpec whichEdges) { |
---|
1243 | arb_progress progress("Randomizing tree", 2L); |
---|
1244 | |
---|
1245 | // @@@ possible candidate for weighted progress (#789) |
---|
1246 | progress.subtitle("mixing"); |
---|
1247 | rootEdge()->mixTree(repeat, percent, whichEdges); |
---|
1248 | ++progress; |
---|
1249 | |
---|
1250 | progress.subtitle("calculating branchlengths"); |
---|
1251 | rootEdge()->calc_branchlengths(); |
---|
1252 | ++progress; |
---|
1253 | |
---|
1254 | agt->exports.request_save(); |
---|
1255 | } |
---|
1256 | |
---|
1257 | static void randomMixTree(AW_window *aww, TREE_canvas *ntw) { |
---|
1258 | AWT_auto_refresh allowed_on(ntw); |
---|
1259 | AW_root *awr = aww->get_root(); |
---|
1260 | |
---|
1261 | mixtree_and_calclengths(AWT_TREE_PARS(ntw), awr->awar(AWAR_RAND_REPEAT)->read_int(), awr->awar(AWAR_RAND_PERCENT)->read_int(), KL_Settings(awr).whichEdges); |
---|
1262 | { |
---|
1263 | ARB_edge newRootEdge = rootNode()->get_tree_root()->find_innermost_edge(); |
---|
1264 | newRootEdge.son()->set_root(); |
---|
1265 | } |
---|
1266 | AWT_TREE_PARS(ntw)->reorderTree(BIG_BRANCHES_TO_TOP); |
---|
1267 | } |
---|
1268 | |
---|
1269 | |
---|
1270 | static AWT_config_mapping_def optimizer_config_mapping[] = { |
---|
1271 | { AWAR_OPTI_MARKED_ONLY, "marked_only" }, |
---|
1272 | { AWAR_OPTI_SKIP_FOLDED, "skip_folded" }, |
---|
1273 | |
---|
1274 | // { AWAR_RAND_REPEAT, "rand_repeat" }, // do not store (use treesize-dependent default) |
---|
1275 | { AWAR_RAND_PERCENT, "rand_percent" }, |
---|
1276 | |
---|
1277 | { AWAR_KL_MAXDEPTH, "maxdepth" }, |
---|
1278 | { AWAR_KL_INCDEPTH, "incdepth" }, |
---|
1279 | |
---|
1280 | { AWAR_KL_STATIC_ENABLED, "static" }, |
---|
1281 | { AWAR_KL_STATIC_DEPTH1, "s_depth1" }, |
---|
1282 | { AWAR_KL_STATIC_DEPTH2, "s_depth2" }, |
---|
1283 | { AWAR_KL_STATIC_DEPTH3, "s_depth3" }, |
---|
1284 | { AWAR_KL_STATIC_DEPTH4, "s_depth4" }, |
---|
1285 | { AWAR_KL_STATIC_DEPTH5, "s_depth5" }, |
---|
1286 | |
---|
1287 | { AWAR_KL_DYNAMIC_ENABLED, "dynamic" }, |
---|
1288 | { AWAR_KL_DYNAMIC_START, "start" }, |
---|
1289 | { AWAR_KL_DYNAMIC_MAXX, "maxx" }, |
---|
1290 | { AWAR_KL_DYNAMIC_MAXY, "maxy" }, |
---|
1291 | |
---|
1292 | { NULp, NULp } |
---|
1293 | }; |
---|
1294 | |
---|
1295 | static AWT_predefined_config optimizer_predefined_configs[] = { |
---|
1296 | { |
---|
1297 | "*minimum_static_reduction", |
---|
1298 | "Sets paths allowed by static reduction to maximum\n(causing the minimal reduction)", |
---|
1299 | "s_depth1='8';s_depth2='6';s_depth3='6';s_depth4='6';s_depth5='6';static='1'" // only defines/affects settings related to static path reduction |
---|
1300 | }, |
---|
1301 | { |
---|
1302 | "*whole_tree_level8", |
---|
1303 | "Level-8-optimization of whole tree\n(no path reduction)", |
---|
1304 | "dynamic='0';incdepth='0';marked_only='0';maxdepth='8';skip_folded='0';static='0'" |
---|
1305 | }, |
---|
1306 | { NULp, NULp, NULp } |
---|
1307 | }; |
---|
1308 | |
---|
1309 | static AW_window *createOptimizeWindow(AW_root *aw_root, TREE_canvas *ntw) { |
---|
1310 | AW_window_simple *aws = new AW_window_simple; |
---|
1311 | aws->init(aw_root, "TREE_OPTIMIZE", "Tree optimization"); |
---|
1312 | aws->load_xfig("pars/tree_opti.fig"); |
---|
1313 | |
---|
1314 | aws->at("close"); |
---|
1315 | aws->callback(AW_POPDOWN); |
---|
1316 | aws->create_button("CLOSE", "CLOSE", "C"); |
---|
1317 | |
---|
1318 | aws->at("help"); |
---|
1319 | aws->callback(makeHelpCallback("pa_optimizer.hlp")); |
---|
1320 | aws->create_button("HELP", "HELP", "H"); |
---|
1321 | |
---|
1322 | aws->at("marked"); |
---|
1323 | aws->label("Only subtrees containing marked species"); |
---|
1324 | aws->create_toggle(AWAR_OPTI_MARKED_ONLY); |
---|
1325 | |
---|
1326 | aws->at("folded"); |
---|
1327 | aws->label("Do not modify folded subtrees"); |
---|
1328 | aws->create_toggle(AWAR_OPTI_SKIP_FOLDED); |
---|
1329 | |
---|
1330 | aws->button_length(18); |
---|
1331 | |
---|
1332 | aws->at("rec_nni"); |
---|
1333 | aws->callback(makeWindowCallback(NT_recursiveNNI, ntw)); |
---|
1334 | aws->create_button("REC_NNI", "Recursive NNI", "N"); |
---|
1335 | |
---|
1336 | aws->at("heuristic"); |
---|
1337 | aws->callback(makeWindowCallback(NT_optimize, ntw)); |
---|
1338 | aws->create_button("HEURISTIC", "Heuristic\noptimizer", "H"); |
---|
1339 | |
---|
1340 | aws->at("config"); |
---|
1341 | AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, "treeopti", optimizer_config_mapping, NULp, optimizer_predefined_configs); |
---|
1342 | |
---|
1343 | aws->at("settings"); |
---|
1344 | aws->callback(makeCreateWindowCallback(create_kernighan_properties_window)); |
---|
1345 | aws->create_button("SETTINGS", "Settings", "S"); |
---|
1346 | |
---|
1347 | aws->at("randomize"); |
---|
1348 | aws->callback(makeWindowCallback(randomMixTree, ntw)); |
---|
1349 | aws->create_button("RANDOMIZE", "Randomize tree", "R"); |
---|
1350 | |
---|
1351 | aws->button_length(5); |
---|
1352 | |
---|
1353 | aws->at("repeat"); aws->create_input_field(AWAR_RAND_REPEAT); |
---|
1354 | aws->at("percent"); aws->create_input_field(AWAR_RAND_PERCENT); |
---|
1355 | |
---|
1356 | return aws; |
---|
1357 | } |
---|
1358 | |
---|
1359 | // ----------------------- |
---|
1360 | // test functions |
---|
1361 | |
---|
1362 | #if defined(TESTMENU) |
---|
1363 | static void refreshTree(AWT_canvas *ntw) { |
---|
1364 | GB_transaction ta(ntw->gb_main); |
---|
1365 | AWT_auto_refresh allowed_on(ntw); |
---|
1366 | ntw->request_save_and_zoom_reset(); |
---|
1367 | } |
---|
1368 | |
---|
1369 | static void setBranchlens(AP_tree_nlen *node, double newLen) { |
---|
1370 | node->setBranchlen(newLen, newLen); |
---|
1371 | |
---|
1372 | if (!node->is_leaf()) { |
---|
1373 | setBranchlens(node->get_leftson(), newLen); |
---|
1374 | setBranchlens(node->get_rightson(), newLen); |
---|
1375 | } |
---|
1376 | } |
---|
1377 | |
---|
1378 | static void TESTMENU_setBranchlen(AW_window *, AWT_canvas *ntw) { |
---|
1379 | AP_tree_nlen *root = rootNode(); |
---|
1380 | |
---|
1381 | setBranchlens(root, 1.0); |
---|
1382 | refreshTree(ntw); |
---|
1383 | } |
---|
1384 | |
---|
1385 | static void TESTMENU_treeStats(AW_window *) { |
---|
1386 | ARB_tree_info tinfo; |
---|
1387 | AP_tree_nlen *root = rootNode(); |
---|
1388 | |
---|
1389 | if (root) { |
---|
1390 | { |
---|
1391 | GB_transaction ta(root->get_tree_root()->get_gb_main()); |
---|
1392 | root->calcTreeInfo(tinfo); |
---|
1393 | } |
---|
1394 | |
---|
1395 | puts("Tree stats:"); |
---|
1396 | |
---|
1397 | printf("nodes =%6zu\n", tinfo.nodes()); |
---|
1398 | printf(" inner =%6zu\n", tinfo.innerNodes); |
---|
1399 | printf(" groups =%6zu\n", tinfo.groups); |
---|
1400 | printf(" leafs =%6zu\n", tinfo.leafs); |
---|
1401 | printf(" unlinked =%6zu (zombies?)\n", tinfo.unlinked); |
---|
1402 | printf(" linked =%6zu\n", tinfo.linked()); |
---|
1403 | printf(" marked =%6zu\n", tinfo.marked); |
---|
1404 | } |
---|
1405 | else { |
---|
1406 | puts("No tree"); |
---|
1407 | } |
---|
1408 | } |
---|
1409 | |
---|
1410 | static void TESTMENU_sortTreeByName(AW_window *, AWT_canvas *ntw) { |
---|
1411 | AP_tree_nlen *root = rootNode(); |
---|
1412 | |
---|
1413 | root->sortByName(); |
---|
1414 | refreshTree(ntw); |
---|
1415 | } |
---|
1416 | |
---|
1417 | static void init_TEST_menu(AW_window_menu_modes *awm, AWT_canvas *ntw) { |
---|
1418 | awm->create_menu("Test[debug]", "g", AWM_ALL); |
---|
1419 | |
---|
1420 | awm->insert_menu_topic("treestat", "Tree statistics", "s", "", AWM_ALL, TESTMENU_treeStats); |
---|
1421 | awm->insert_menu_topic("setlens", "Set branchlens", "b", "", AWM_ALL, makeWindowCallback(TESTMENU_setBranchlen, ntw)); |
---|
1422 | awm->insert_menu_topic("sorttreebyname", "Sort tree by name", "o", "", AWM_ALL, makeWindowCallback(TESTMENU_sortTreeByName, ntw)); |
---|
1423 | } |
---|
1424 | #endif // TESTMENU |
---|
1425 | |
---|
1426 | static GB_ERROR pars_check_size(AW_root *awr, GB_ERROR& warning, const adfiltercbstruct *filterDef) { |
---|
1427 | GB_ERROR error = NULp; |
---|
1428 | warning = NULp; |
---|
1429 | |
---|
1430 | char *tree_name = awr->awar(AWAR_TREE)->read_string(); |
---|
1431 | char *filter = awr->awar(filterDef->def_filter)->read_string(); |
---|
1432 | long ali_len = 0; |
---|
1433 | |
---|
1434 | if (strlen(filter)) { |
---|
1435 | int i; |
---|
1436 | for (i=0; filter[i]; i++) { |
---|
1437 | if (filter[i] != '0') ali_len++; |
---|
1438 | } |
---|
1439 | } |
---|
1440 | else { |
---|
1441 | char *ali_name = awr->awar(AWAR_ALIGNMENT)->read_string(); |
---|
1442 | ali_len = GBT_get_alignment_len(ap_main->get_gb_main(), ali_name); |
---|
1443 | if (ali_len<=0) { |
---|
1444 | error = "Please select a valid alignment"; |
---|
1445 | GB_clear_error(); |
---|
1446 | } |
---|
1447 | free(ali_name); |
---|
1448 | } |
---|
1449 | |
---|
1450 | if (!error) { |
---|
1451 | long tree_size = GBT_size_of_tree(ap_main->get_gb_main(), tree_name); |
---|
1452 | if (tree_size == -1) { |
---|
1453 | error = "Please select an existing tree"; |
---|
1454 | } |
---|
1455 | else { |
---|
1456 | size_t expected_memuse = (ali_len * tree_size * 4 / 1024); |
---|
1457 | if (expected_memuse > GB_get_usable_memory()) { |
---|
1458 | warning = GBS_global_string("Estimated memory usage (%s) exceeds physical memory (will swap)\n" |
---|
1459 | "(did you specify a filter?)", |
---|
1460 | GBS_readable_size(expected_memuse, "b")); |
---|
1461 | } |
---|
1462 | } |
---|
1463 | } |
---|
1464 | |
---|
1465 | free(filter); |
---|
1466 | free(tree_name); |
---|
1467 | |
---|
1468 | ap_assert(!GB_have_error()); |
---|
1469 | return error; |
---|
1470 | } |
---|
1471 | |
---|
1472 | static void pars_reset_optimal_parsimony(AW_window *aww) { |
---|
1473 | AW_root *awr = aww->get_root(); |
---|
1474 | awr->awar(AWAR_BEST_PARSIMONY)->write_int(awr->awar(AWAR_PARSIMONY)->read_int()); |
---|
1475 | } |
---|
1476 | |
---|
1477 | class LowDataCheck { |
---|
1478 | int leafs; // counts leafs with insufficiant data |
---|
1479 | int inner; // same for inner nodes |
---|
1480 | |
---|
1481 | public: |
---|
1482 | LowDataCheck() : leafs(0), inner(0) {} |
---|
1483 | |
---|
1484 | void count(AP_tree_nlen *node); |
---|
1485 | |
---|
1486 | int get_leafs() const { return leafs; } |
---|
1487 | int get_inner() const { return inner; } |
---|
1488 | }; |
---|
1489 | |
---|
1490 | void LowDataCheck::count(AP_tree_nlen *node) { |
---|
1491 | const AP_combinableSeq *seq = node->get_seq(); |
---|
1492 | AP_FLOAT bases = seq->weighted_base_count(); |
---|
1493 | |
---|
1494 | if (node->is_leaf()) { |
---|
1495 | if (bases<MIN_SEQUENCE_LENGTH) ++leafs; |
---|
1496 | } |
---|
1497 | else { |
---|
1498 | if (bases<MIN_SEQUENCE_LENGTH) ++inner; |
---|
1499 | |
---|
1500 | count(node->get_leftson()); |
---|
1501 | count(node->get_rightson()); |
---|
1502 | } |
---|
1503 | } |
---|
1504 | |
---|
1505 | static void PARS_infomode_cb(UNFIXED, TREE_canvas *canvas, AWT_COMMAND_MODE mode) { |
---|
1506 | AWT_trigger_remote_action(NULp, canvas->gb_main, "ARB_NT:species_info"); |
---|
1507 | nt_mode_event(NULp, canvas, mode); |
---|
1508 | } |
---|
1509 | |
---|
1510 | static void pars_start_cb(AW_window *aw_parent, WeightedFilter *wfilt, const PARS_commands *cmds) { |
---|
1511 | ModRLimit increase_stacksize(RLIMIT_STACK, TREEDISP_STACKSIZE); |
---|
1512 | |
---|
1513 | AW_root *awr = aw_parent->get_root(); |
---|
1514 | GBDATA *gb_main = ap_main->get_gb_main(); |
---|
1515 | GB_begin_transaction(gb_main); |
---|
1516 | { |
---|
1517 | GB_ERROR warning; |
---|
1518 | GB_ERROR error = pars_check_size(awr, warning, wfilt->get_adfiltercbstruct()); |
---|
1519 | |
---|
1520 | if (warning && !error) { |
---|
1521 | char *question = GBS_global_string_copy("%s\nDo you want to continue?", warning); |
---|
1522 | bool cont = aw_ask_sure("swap_warning", question); |
---|
1523 | free(question); |
---|
1524 | |
---|
1525 | if (!cont) error = "User abort"; |
---|
1526 | |
---|
1527 | } |
---|
1528 | |
---|
1529 | if (error) { |
---|
1530 | aw_message(error); |
---|
1531 | GB_commit_transaction(gb_main); |
---|
1532 | return; |
---|
1533 | } |
---|
1534 | } |
---|
1535 | |
---|
1536 | |
---|
1537 | AW_window_menu_modes *awm = new AW_window_menu_modes; |
---|
1538 | awm->init(awr, "ARB_PARSIMONY", "ARB_PARSIMONY", 400, 200); |
---|
1539 | |
---|
1540 | GLOBAL_PARS->generate_tree(wfilt); |
---|
1541 | |
---|
1542 | TREE_canvas *ntw; |
---|
1543 | { |
---|
1544 | AP_tree_display_style prev_style = global_tree()->get_tree_style(); |
---|
1545 | global_tree()->set_tree_style(AP_LIST_SIMPLE, NULp); // avoid NDS warnings during startup |
---|
1546 | ntw = new TREE_canvas(gb_main, awm, awm->get_window_id(), global_tree(), awr->awar(AWAR_TREE)); |
---|
1547 | global_tree()->set_tree_style(prev_style, ntw); |
---|
1548 | } |
---|
1549 | |
---|
1550 | { |
---|
1551 | GB_ERROR error = NULp; |
---|
1552 | arb_progress progress("loading tree"); |
---|
1553 | NT_reload_tree_event(awr, ntw, false); // load tree (but do not expose - first zombies need to be removed) |
---|
1554 | if (!global_tree()->get_root_node()) { |
---|
1555 | error = "Failed to load the selected tree"; |
---|
1556 | } |
---|
1557 | else { |
---|
1558 | AP_tree_edge::initialize(rootNode()); // builds edges |
---|
1559 | long removed = global_tree_root()->remove_leafs(AWT_REMOVE_ZOMBIES); |
---|
1560 | |
---|
1561 | PARS_tree_init(global_tree()); |
---|
1562 | removed += global_tree_root()->remove_leafs(AWT_RemoveType(AWT_REMOVE_ZOMBIES | AWT_REMOVE_NO_SEQUENCE)); |
---|
1563 | |
---|
1564 | if (!global_tree()->get_root_node()) { |
---|
1565 | const char *aliname = global_tree_root()->get_aliview()->get_aliname(); |
---|
1566 | error = GBS_global_string("Less than 2 species contain data in '%s'\n" |
---|
1567 | "Tree vanished", aliname); |
---|
1568 | } |
---|
1569 | else if (removed) { |
---|
1570 | aw_message(GBS_global_string("Removed %li leafs (zombies or species w/o data in alignment)", removed)); |
---|
1571 | } |
---|
1572 | |
---|
1573 | error = GB_end_transaction(ntw->gb_main, error); |
---|
1574 | if (!error) { |
---|
1575 | progress.subtitle("Calculating inner nodes"); |
---|
1576 | GLOBAL_PARS->get_root_node()->costs(); |
---|
1577 | |
---|
1578 | progress.subtitle("Checking amount of data"); |
---|
1579 | LowDataCheck lowData; |
---|
1580 | lowData.count(GLOBAL_PARS->get_root_node()); |
---|
1581 | |
---|
1582 | bool warned = false; |
---|
1583 | if (lowData.get_inner()>0) { |
---|
1584 | aw_message(GBS_global_string("Inner nodes with insufficient data: %i", lowData.get_inner())); |
---|
1585 | warned = true; |
---|
1586 | } |
---|
1587 | if (lowData.get_leafs()>0) { |
---|
1588 | aw_message(GBS_global_string("Species with insufficient data: %i", lowData.get_leafs())); |
---|
1589 | warned = true; |
---|
1590 | } |
---|
1591 | #define _STRGIZE(x) #x |
---|
1592 | #define INT2STR(i) _STRGIZE(i) |
---|
1593 | if (warned) { |
---|
1594 | aw_message("Warning: low sequence data (<" INT2STR(MIN_SEQUENCE_LENGTH) " bp) detected! (filter too restrictive?)"); |
---|
1595 | } |
---|
1596 | } |
---|
1597 | } |
---|
1598 | if (error) aw_popup_exit(error); |
---|
1599 | } |
---|
1600 | |
---|
1601 | if (cmds->add_marked) NT_add_quick(NULp, ntw, NT_ADD_MARKED); |
---|
1602 | if (cmds->add_selected) NT_add_quick(NULp, ntw, NT_ADD_SELECTED); |
---|
1603 | if (cmds->calc_branch_lengths) NT_calc_branchlengths_reorder_and_update(awm, ntw); |
---|
1604 | if (cmds->calc_bootstrap) NT_bootstrap(awm, ntw, 0); |
---|
1605 | if (cmds->quit) pars_exit(awm); |
---|
1606 | |
---|
1607 | GB_transaction ta(ntw->gb_main); |
---|
1608 | |
---|
1609 | #if defined(DEBUG) |
---|
1610 | AWT_create_debug_menu(awm); |
---|
1611 | #endif // DEBUG |
---|
1612 | |
---|
1613 | awm->create_menu("File", "F", AWM_ALL); |
---|
1614 | { |
---|
1615 | insert_macro_menu_entry(awm, false); |
---|
1616 | awm->insert_menu_topic("print_tree", "Print Tree ...", "P", "tree2prt.hlp", AWM_ALL, makeWindowCallback(AWT_popup_print_window, static_cast<AWT_canvas*>(ntw))); |
---|
1617 | awm->insert_menu_topic("quit", "Quit", "Q", "quit.hlp", AWM_ALL, pars_exit); |
---|
1618 | } |
---|
1619 | |
---|
1620 | awm->create_menu("Species", "S", AWM_ALL); |
---|
1621 | { |
---|
1622 | NT_insert_mark_submenus(awm, ntw, 0); |
---|
1623 | |
---|
1624 | } |
---|
1625 | awm->create_menu("Tree", "T", AWM_ALL); |
---|
1626 | { |
---|
1627 | |
---|
1628 | awm->insert_menu_topic("nds", "NDS (Node Display Setup) ...", "N", "props_nds.hlp", AWM_ALL, makeCreateWindowCallback(AWT_create_nds_window, ntw->gb_main)); |
---|
1629 | |
---|
1630 | awm->sep______________(); |
---|
1631 | awm->insert_menu_topic("tree_print", "Print tree ...", "P", "tree2prt.hlp", AWM_ALL, makeWindowCallback(AWT_popup_print_window, static_cast<AWT_canvas*>(ntw))); |
---|
1632 | awm->insert_menu_topic("tree_2_xfig", "Export tree to XFIG ...", "F", "tree2file.hlp", AWM_ALL, makeWindowCallback(AWT_popup_tree_export_window, static_cast<AWT_canvas*>(ntw))); |
---|
1633 | awm->sep______________(); |
---|
1634 | NT_insert_collapse_submenu(awm, ntw); |
---|
1635 | awm->sep______________(); |
---|
1636 | awm->insert_sub_menu("Remove Species from Tree", "R"); |
---|
1637 | { |
---|
1638 | awm->insert_menu_topic("tree_remove_deleted", "Remove Zombies", "Z", "trm_del.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_REMOVE_ZOMBIES)); |
---|
1639 | awm->insert_menu_topic("tree_remove_marked", "Remove Marked", "M", "trm_mrkd.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_REMOVE_MARKED)); |
---|
1640 | awm->insert_menu_topic("tree_keep_marked", "Keep Marked", "K", "tkeep_mrkd.hlp", AWM_ALL, makeWindowCallback(NT_remove_leafs, ntw, AWT_KEEP_MARKED)); |
---|
1641 | } |
---|
1642 | awm->close_sub_menu(); |
---|
1643 | awm->insert_sub_menu("Add Species to Tree", "A"); |
---|
1644 | { |
---|
1645 | awm->insert_menu_topic("add_marked", "Add Marked Species", "M", "pa_quick.hlp", AWM_ALL, makeWindowCallback(NT_add_quick, ntw, NT_ADD_MARKED)); |
---|
1646 | awm->insert_menu_topic("add_marked_nni", "Add Marked Species + Local Optimization (NNI)", "N", "pa_add.hlp", AWM_ALL, makeWindowCallback(NT_add_and_NNI, ntw, NT_ADD_MARKED)); |
---|
1647 | awm->insert_menu_topic("rm_add_marked", "Remove & Add Marked Species", "R", "pa_quick.hlp", AWM_ALL, makeWindowCallback(NT_reAdd_quick, ntw, NT_ADD_MARKED)); |
---|
1648 | awm->insert_menu_topic("rm_add_marked_nni|", "Remove & Add Marked + Local Optimization (NNI)", "L", "pa_add.hlp", AWM_ALL, makeWindowCallback(NT_reAdd_and_NNI, ntw, NT_ADD_MARKED)); |
---|
1649 | awm->sep______________(); |
---|
1650 | awm->insert_menu_topic("add_marked_partial", "Add Marked Partial Species", "P", "pa_partial.hlp", AWM_ALL, makeWindowCallback(NT_add_partial_and_update, ntw)); |
---|
1651 | awm->sep______________(); |
---|
1652 | awm->insert_menu_topic("add_selected", "Add Selected Species", "S", "pa_quick.hlp", AWM_ALL, makeWindowCallback(NT_add_quick, ntw, NT_ADD_SELECTED)); |
---|
1653 | awm->insert_menu_topic("add_selected_nni", "Add Selected Species + Local Optimization (NNI)", "O", "pa_add.hlp", AWM_ALL, makeWindowCallback(NT_add_and_NNI, ntw, NT_ADD_SELECTED)); |
---|
1654 | } |
---|
1655 | awm->close_sub_menu(); |
---|
1656 | awm->sep______________(); |
---|
1657 | awm->insert_menu_topic("optimize", "Tree Optimization ...", "O", "pa_optimizer.hlp", AWM_ALL, makeCreateWindowCallback(createOptimizeWindow, ntw)); |
---|
1658 | awm->insert_menu_topic("reset", "Reset optimal parsimony", "s", "pa_reset.hlp", AWM_ALL, pars_reset_optimal_parsimony); |
---|
1659 | awm->sep______________(); |
---|
1660 | awm->insert_menu_topic("beautify_tree", "Beautify Tree", "B", "resorttree.hlp", AWM_ALL, makeWindowCallback(NT_resort_tree_cb, ntw, BIG_BRANCHES_TO_TOP)); |
---|
1661 | awm->insert_menu_topic("calc_branch_lengths", "Calculate Branch Lengths", "L", "pa_branchlengths.hlp", AWM_ALL, makeWindowCallback(NT_calc_branchlengths_reorder_and_update, ntw)); |
---|
1662 | awm->sep______________(); |
---|
1663 | awm->insert_menu_topic("calc_upper_bootstrap_indep", "Calculate Upper Bootstrap Limit (dependent NNI)", "U", "pa_bootstrap.hlp", AWM_ALL, makeWindowCallback(NT_bootstrap, ntw, false)); |
---|
1664 | awm->insert_menu_topic("calc_upper_bootstrap_dep", "Calculate Upper Bootstrap Limit (independent NNI)", "i", "pa_bootstrap.hlp", AWM_ALL, makeWindowCallback(NT_bootstrap, ntw, true)); |
---|
1665 | awm->insert_menu_topic("tree_remove_remark", "Remove bootstrap values", "v", "trm_boot.hlp", AWM_ALL, makeWindowCallback(NT_remove_bootstrap, ntw)); |
---|
1666 | } |
---|
1667 | |
---|
1668 | #if defined(TESTMENU) |
---|
1669 | init_TEST_menu(awm, ntw); |
---|
1670 | #endif // TESTMENU |
---|
1671 | |
---|
1672 | awm->create_menu("Reset", "R", AWM_ALL); |
---|
1673 | { |
---|
1674 | awm->insert_menu_topic("reset_logical_zoom", "Logical Zoom", "L", "rst_log_zoom.hlp", AWM_ALL, makeWindowCallback(NT_reset_lzoom_cb, ntw)); |
---|
1675 | awm->insert_menu_topic("reset_physical_zoom", "Physical Zoom", "P", "rst_phys_zoom.hlp", AWM_ALL, makeWindowCallback(NT_reset_pzoom_cb, ntw)); |
---|
1676 | } |
---|
1677 | |
---|
1678 | awm->create_menu("Properties", "P", AWM_ALL); |
---|
1679 | { |
---|
1680 | awm->insert_menu_topic("props_menu", "Frame settings ...", "F", "props_frame.hlp", AWM_ALL, AW_preset_window); |
---|
1681 | awm->insert_menu_topic("props_tree2", "Tree options", "o", "nt_tree_settings.hlp", AWM_ALL, TREE_create_settings_window); |
---|
1682 | awm->insert_menu_topic("props_tree", "Tree colors & fonts", "c", "color_props.hlp", AWM_ALL, makeCreateWindowCallback(AW_create_gc_window, ntw->gc_manager)); |
---|
1683 | awm->insert_menu_topic("props_kl", "Optimizer settings (KL)", "K", "kernlin.hlp", AWM_ALL, makeCreateWindowCallback(create_kernighan_properties_window)); |
---|
1684 | awm->sep______________(); |
---|
1685 | AW_insert_common_property_menu_entries(awm); |
---|
1686 | awm->sep______________(); |
---|
1687 | awm->insert_menu_topic("save_props", "Save Defaults (pars.arb)", "D", "savedef.hlp", AWM_ALL, AW_save_properties); |
---|
1688 | } |
---|
1689 | awm->button_length(5); |
---|
1690 | |
---|
1691 | awm->insert_help_topic("ARB_PARSIMONY help", "P", "arb_pars.hlp", AWM_ALL, makeHelpCallback("arb_pars.hlp")); |
---|
1692 | |
---|
1693 | // ---------------------- |
---|
1694 | // mode buttons |
---|
1695 | // |
---|
1696 | // keep them synchronized as far as possible with those in ARB_PARSIMONY |
---|
1697 | // see ../NTREE/NT_extern.cxx@keepModesSynchronized |
---|
1698 | |
---|
1699 | awm->create_mode("mode_select.xpm", "mode_select.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SELECT)); |
---|
1700 | awm->create_mode("mode_mark.xpm", "mode_mark.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_MARK)); |
---|
1701 | awm->create_mode("mode_group.xpm", "mode_group.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_GROUP)); |
---|
1702 | awm->create_mode("mode_zoom.xpm", "mode_pzoom.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_ZOOM)); |
---|
1703 | awm->create_mode("mode_lzoom.xpm", "mode_lzoom.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_LZOOM)); |
---|
1704 | |
---|
1705 | awm->create_mode("mode_info.xpm", "mode_info.hlp", AWM_ALL, makeWindowCallback(PARS_infomode_cb, ntw, AWT_MODE_INFO)); |
---|
1706 | // reserve mode-locations (to put the modes below at the same position as in ARB_NT) |
---|
1707 | awm->create_mode("mode_empty.xpm", "mode.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_EMPTY)); |
---|
1708 | |
---|
1709 | // topology-modification-modes |
---|
1710 | awm->create_mode("mode_setroot.xpm", "mode_setroot.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SETROOT)); |
---|
1711 | awm->create_mode("mode_swap.xpm", "mode_swap.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_SWAP)); |
---|
1712 | awm->create_mode("mode_move.xpm", "mode_move.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_MOVE)); |
---|
1713 | |
---|
1714 | awm->create_mode("mode_nni.xpm", "mode_nni.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_NNI)); |
---|
1715 | awm->create_mode("mode_kernlin.xpm", "mode_kernlin.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_KERNINGHAN)); |
---|
1716 | awm->create_mode("mode_optimize.xpm", "mode_optimize.hlp", AWM_ALL, makeWindowCallback(nt_mode_event, ntw, AWT_MODE_OPTIMIZE)); |
---|
1717 | |
---|
1718 | awm->at(5, 2); |
---|
1719 | awm->auto_space(0, -2); |
---|
1720 | awm->shadow_width(1); |
---|
1721 | |
---|
1722 | |
---|
1723 | int db_treex, db_treey; |
---|
1724 | awm->get_at_position(&db_treex, &db_treey); |
---|
1725 | awm->callback(makeHelpCallback("nt_tree_select.hlp")); |
---|
1726 | awm->button_length(16); |
---|
1727 | awm->help_text("nt_tree_select.hlp"); |
---|
1728 | awm->create_button(NULp, AWAR_TREE); |
---|
1729 | |
---|
1730 | |
---|
1731 | int db_stackx, db_stacky; |
---|
1732 | awm->label_length(8); |
---|
1733 | awm->label("Stored"); |
---|
1734 | awm->get_at_position(&db_stackx, &db_stacky); |
---|
1735 | awm->button_length(6); |
---|
1736 | awm->callback(makeHelpCallback("ap_stack.hlp")); |
---|
1737 | awm->help_text("ap_stack.hlp"); |
---|
1738 | awm->create_button(NULp, AWAR_STACKPOINTER); |
---|
1739 | |
---|
1740 | int db_parsx, db_parsy; |
---|
1741 | awm->label_length(14); |
---|
1742 | awm->label("Current Pars:"); |
---|
1743 | awm->get_at_position(&db_parsx, &db_parsy); |
---|
1744 | |
---|
1745 | awm->button_length(10); |
---|
1746 | awm->create_button(NULp, AWAR_PARSIMONY, NULp, "+"); |
---|
1747 | |
---|
1748 | awm->button_length(0); |
---|
1749 | |
---|
1750 | awm->callback(makeWindowCallback(NT_jump_cb, ntw, AP_JUMP_BY_BUTTON)); |
---|
1751 | awm->help_text("tr_jump.hlp"); |
---|
1752 | awm->create_button("JUMP", "Jump"); |
---|
1753 | |
---|
1754 | awm->callback(makeHelpCallback("arb_pars.hlp")); |
---|
1755 | awm->help_text("help.hlp"); |
---|
1756 | awm->create_button("HELP", "HELP", "H"); |
---|
1757 | |
---|
1758 | awm->at_newline(); |
---|
1759 | |
---|
1760 | awm->button_length(8); |
---|
1761 | awm->at_x(db_stackx); |
---|
1762 | awm->callback(makeWindowCallback(AP_user_pop_cb, ntw)); |
---|
1763 | awm->help_text("ap_stack.hlp"); |
---|
1764 | awm->create_button("POP", "RESTORE"); |
---|
1765 | |
---|
1766 | awm->button_length(6); |
---|
1767 | awm->callback(AP_user_push_cb); |
---|
1768 | awm->help_text("ap_stack.hlp"); |
---|
1769 | awm->create_button("PUSH", "STORE"); |
---|
1770 | |
---|
1771 | awm->at_x(db_parsx); |
---|
1772 | awm->label_length(14); |
---|
1773 | awm->label("Optimal Pars:"); |
---|
1774 | |
---|
1775 | awm->button_length(10); |
---|
1776 | awm->create_button(NULp, AWAR_BEST_PARSIMONY, NULp, "+"); |
---|
1777 | |
---|
1778 | awm->button_length(0); |
---|
1779 | awm->auto_space(0, -2); |
---|
1780 | |
---|
1781 | awm->at_x(db_treex); |
---|
1782 | awm->callback(makeWindowCallback(NT_set_tree_style, ntw, AP_TREE_RADIAL)); |
---|
1783 | awm->help_text("tr_type_radial.hlp"); |
---|
1784 | awm->create_button("RADIAL_TREE", "#radial.xpm"); |
---|
1785 | |
---|
1786 | awm->callback(makeWindowCallback(NT_set_tree_style, ntw, AP_TREE_NORMAL)); |
---|
1787 | awm->help_text("tr_type_list.hlp"); |
---|
1788 | awm->create_button("LIST_TREE", "#dendro.xpm"); |
---|
1789 | |
---|
1790 | awm->at_newline(); |
---|
1791 | awm->at(db_treex, awm->get_at_yposition()); |
---|
1792 | |
---|
1793 | { |
---|
1794 | SmartPtr<AW_at_storage> maxSize(AW_at_storage::make(awm, AW_AT_MAXSIZE)); |
---|
1795 | |
---|
1796 | awm->button_length(AWAR_FOOTER_MAX_LEN); |
---|
1797 | awm->create_button(NULp, AWAR_FOOTER); |
---|
1798 | awm->at_newline(); |
---|
1799 | awm->restore_at_from(*maxSize); |
---|
1800 | } |
---|
1801 | |
---|
1802 | awm->get_at_position(&db_treex, &db_treey); |
---|
1803 | awm->set_info_area_height(db_treey); |
---|
1804 | |
---|
1805 | awm->set_bottom_area_height(0); |
---|
1806 | |
---|
1807 | aw_parent->hide(); // hide parent |
---|
1808 | awm->show(); |
---|
1809 | |
---|
1810 | TREE_install_update_callbacks(ntw); |
---|
1811 | |
---|
1812 | update_random_repeat(awr, AWT_TREE_PARS(ntw)); |
---|
1813 | AP_user_push_cb(aw_parent); // push initial tree |
---|
1814 | set_keep_ghostnodes(); // make sure no stacked nodes get deleted |
---|
1815 | } |
---|
1816 | |
---|
1817 | static AW_window *create_pars_init_window(AW_root *awr, const PARS_commands *cmds) { |
---|
1818 | AW_window_simple *aws = new AW_window_simple; |
---|
1819 | aws->init(awr, "PARS_PROPS", "SET PARSIMONY OPTIONS"); |
---|
1820 | aws->load_xfig("pars/init.fig"); |
---|
1821 | |
---|
1822 | aws->button_length(10); |
---|
1823 | aws->label_length(10); |
---|
1824 | |
---|
1825 | aws->callback(pars_exit); |
---|
1826 | aws->at("close"); |
---|
1827 | aws->create_button("ABORT", "ABORT", "A"); |
---|
1828 | |
---|
1829 | aws->callback(makeHelpCallback("arb_pars_init.hlp")); |
---|
1830 | aws->at("help"); |
---|
1831 | aws->create_button("HELP", "HELP", "H"); |
---|
1832 | |
---|
1833 | GBDATA *gb_main = ap_main->get_gb_main(); |
---|
1834 | WeightedFilter *weighted_filter = // do NOT free (bound to callbacks) |
---|
1835 | new WeightedFilter(gb_main, aws->get_root(), AWAR_FILTER_NAME, AWAR_COLUMNSTAT_NAME, aws->get_root()->awar_string(AWAR_ALIGNMENT)); |
---|
1836 | |
---|
1837 | aws->at("filter"); |
---|
1838 | aws->callback(makeCreateWindowCallback(awt_create_select_filter_win, weighted_filter->get_adfiltercbstruct())); |
---|
1839 | aws->create_button("SELECT_FILTER", AWAR_FILTER_NAME); |
---|
1840 | |
---|
1841 | aws->at("weights"); |
---|
1842 | aws->callback(makeCreateWindowCallback(COLSTAT_create_selection_window, weighted_filter->get_column_stat())); |
---|
1843 | aws->sens_mask(AWM_EXP); |
---|
1844 | aws->create_button("SELECT_CSP", AWAR_COLUMNSTAT_NAME); |
---|
1845 | aws->sens_mask(AWM_ALL); |
---|
1846 | |
---|
1847 | aws->at("alignment"); |
---|
1848 | awt_create_ALI_selection_list(gb_main, aws, AWAR_ALIGNMENT, "*="); |
---|
1849 | |
---|
1850 | aws->at("tree"); |
---|
1851 | awt_create_TREE_selection_list(gb_main, aws, AWAR_TREE, true); |
---|
1852 | |
---|
1853 | aws->callback(makeWindowCallback(pars_start_cb, weighted_filter, cmds)); |
---|
1854 | aws->at("go"); |
---|
1855 | aws->create_button("GO", "GO", "G"); |
---|
1856 | |
---|
1857 | return aws; |
---|
1858 | } |
---|
1859 | |
---|
1860 | KL_Settings::KL_Settings(AW_root *aw_root) { |
---|
1861 | maxdepth = aw_root->awar(AWAR_KL_MAXDEPTH)->read_int(); |
---|
1862 | incdepth = aw_root->awar(AWAR_KL_INCDEPTH)->read_int(); |
---|
1863 | |
---|
1864 | Static.enabled = aw_root->awar(AWAR_KL_STATIC_ENABLED)->read_int(); |
---|
1865 | Static.depth[0] = 2; // always test both possibilities at starting edge |
---|
1866 | Static.depth[1] = aw_root->awar(AWAR_KL_STATIC_DEPTH1)->read_int(); |
---|
1867 | Static.depth[2] = aw_root->awar(AWAR_KL_STATIC_DEPTH2)->read_int(); |
---|
1868 | Static.depth[3] = aw_root->awar(AWAR_KL_STATIC_DEPTH3)->read_int(); |
---|
1869 | Static.depth[4] = aw_root->awar(AWAR_KL_STATIC_DEPTH4)->read_int(); |
---|
1870 | Static.depth[5] = aw_root->awar(AWAR_KL_STATIC_DEPTH5)->read_int(); |
---|
1871 | |
---|
1872 | Dynamic.enabled = aw_root->awar(AWAR_KL_DYNAMIC_ENABLED)->read_int(); |
---|
1873 | Dynamic.start = aw_root->awar(AWAR_KL_DYNAMIC_START)->read_int(); |
---|
1874 | Dynamic.maxx = aw_root->awar(AWAR_KL_DYNAMIC_MAXX)->read_int(); |
---|
1875 | Dynamic.maxy = aw_root->awar(AWAR_KL_DYNAMIC_MAXY)->read_int(); |
---|
1876 | Dynamic.type = (KL_DYNAMIC_THRESHOLD_TYPE)aw_root->awar(AWAR_KL_FUNCTION_TYPE)->read_int(); |
---|
1877 | |
---|
1878 | whichEdges = ANY_EDGE; |
---|
1879 | if (aw_root->awar(AWAR_OPTI_MARKED_ONLY)->read_int()) whichEdges = EdgeSpec(whichEdges|SKIP_UNMARKED_EDGES); |
---|
1880 | if (aw_root->awar(AWAR_OPTI_SKIP_FOLDED)->read_int()) whichEdges = EdgeSpec(whichEdges|SKIP_FOLDED_EDGES); |
---|
1881 | } |
---|
1882 | #if defined(UNIT_TESTS) |
---|
1883 | KL_Settings::KL_Settings() { |
---|
1884 | // set default values |
---|
1885 | maxdepth = 15; |
---|
1886 | |
---|
1887 | Static.enabled = true; |
---|
1888 | Static.depth[0] = 2; // always test both possibilities at starting edge |
---|
1889 | Static.depth[1] = 8; |
---|
1890 | Static.depth[2] = 6; |
---|
1891 | Static.depth[3] = 6; |
---|
1892 | Static.depth[4] = 6; |
---|
1893 | Static.depth[5] = 6; |
---|
1894 | |
---|
1895 | Dynamic.enabled = true; |
---|
1896 | Dynamic.start = 100; |
---|
1897 | Dynamic.maxy = 150; |
---|
1898 | Dynamic.maxx = 6; |
---|
1899 | |
---|
1900 | // these values do not seem to have any effect (i.e. are not covered by unit-tests): |
---|
1901 | incdepth = 4; |
---|
1902 | |
---|
1903 | // const setting (not configurable) |
---|
1904 | Dynamic.type = AP_QUADRAT_START; |
---|
1905 | whichEdges = EdgeSpec(SKIP_UNMARKED_EDGES|SKIP_FOLDED_EDGES); |
---|
1906 | } |
---|
1907 | #endif |
---|
1908 | |
---|
1909 | static void create_optimize_vars(AW_root *aw_root, AW_default props) { |
---|
1910 | // kernighan |
---|
1911 | |
---|
1912 | aw_root->awar_int(AWAR_OPTI_MARKED_ONLY, 1, props); |
---|
1913 | aw_root->awar_int(AWAR_OPTI_SKIP_FOLDED, 1, props); |
---|
1914 | |
---|
1915 | aw_root->awar_int(AWAR_KL_MAXDEPTH, 15, props); |
---|
1916 | aw_root->awar_int(AWAR_KL_INCDEPTH, 4, props); |
---|
1917 | |
---|
1918 | aw_root->awar_int(AWAR_KL_STATIC_ENABLED, 1, props); |
---|
1919 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH1, 5, props)->set_minmax(1, 8); |
---|
1920 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH2, 3, props)->set_minmax(1, 6); |
---|
1921 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH3, 2, props)->set_minmax(1, 6); |
---|
1922 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH4, 2, props)->set_minmax(1, 6); |
---|
1923 | aw_root->awar_int(AWAR_KL_STATIC_DEPTH5, 1, props)->set_minmax(1, 6); |
---|
1924 | |
---|
1925 | aw_root->awar_int(AWAR_KL_DYNAMIC_ENABLED, 1, props); |
---|
1926 | aw_root->awar_int(AWAR_KL_DYNAMIC_START, 100, props); |
---|
1927 | aw_root->awar_int(AWAR_KL_DYNAMIC_MAXX, 6, props); |
---|
1928 | aw_root->awar_int(AWAR_KL_DYNAMIC_MAXY, 150, props); |
---|
1929 | |
---|
1930 | aw_root->awar_int(AWAR_KL_FUNCTION_TYPE, AP_QUADRAT_START, props); |
---|
1931 | } |
---|
1932 | |
---|
1933 | static void pars_create_all_awars(AW_root *awr, AW_default aw_def, GBDATA *gb_main) { |
---|
1934 | awr->awar_string(AWAR_SPECIES_NAME, "", gb_main); |
---|
1935 | awr->awar_string(AWAR_FOOTER, "", aw_def); |
---|
1936 | |
---|
1937 | // copy currently selected alignment to awar: |
---|
1938 | { |
---|
1939 | GB_transaction ta(gb_main); |
---|
1940 | char *dali = GBT_get_default_alignment(gb_main); |
---|
1941 | awr->awar_string(AWAR_ALIGNMENT, dali, gb_main)->write_string(dali); |
---|
1942 | free(dali); |
---|
1943 | } |
---|
1944 | |
---|
1945 | awt_create_filter_awars(awr, aw_def, AWAR_FILTER_NAME, AWAR_ALIGNMENT); |
---|
1946 | awt_set_awar_to_valid_filter_good_for_tree_methods(gb_main, awr, AWAR_FILTER_NAME); |
---|
1947 | |
---|
1948 | awr->awar_int(AWAR_PARS_TYPE, PARS_WAGNER, gb_main); |
---|
1949 | |
---|
1950 | { |
---|
1951 | GB_transaction ta(gb_main); |
---|
1952 | GBDATA *gb_tree_name = GB_search(gb_main, AWAR_TREE, GB_STRING); |
---|
1953 | char *tree_name = GB_read_string(gb_tree_name); |
---|
1954 | |
---|
1955 | awr->awar_string(AWAR_TREE, "", aw_def)->write_string(tree_name); |
---|
1956 | free(tree_name); |
---|
1957 | } |
---|
1958 | |
---|
1959 | awr->awar_int(AWAR_PARSIMONY, 0, aw_def); |
---|
1960 | awr->awar_int(AWAR_BEST_PARSIMONY, 0, aw_def); |
---|
1961 | awr->awar_int(AWAR_STACKPOINTER, 0, aw_def); |
---|
1962 | |
---|
1963 | awr->awar_int(AWAR_RAND_REPEAT, 1, aw_def)->set_minmax(1, 1000000); // default value is overwritten by update_random_repeat() |
---|
1964 | awr->awar_int(AWAR_RAND_PERCENT, 50, aw_def)->set_minmax(1, 100); |
---|
1965 | |
---|
1966 | create_optimize_vars(awr, aw_def); |
---|
1967 | create_nds_vars(awr, aw_def, gb_main, false); |
---|
1968 | |
---|
1969 | TREE_create_awars(awr, gb_main); |
---|
1970 | |
---|
1971 | #if defined(DEBUG) |
---|
1972 | AWT_create_db_browser_awars(awr, aw_def); |
---|
1973 | #endif // DEBUG |
---|
1974 | |
---|
1975 | GB_ERROR error = ARB_init_global_awars(awr, aw_def, gb_main); |
---|
1976 | if (error) aw_message(error); |
---|
1977 | } |
---|
1978 | |
---|
1979 | static AW_root *AD_map_viewer_aw_root = NULp; |
---|
1980 | |
---|
1981 | void PARS_map_viewer(GBDATA *gb_species, AD_MAP_VIEWER_TYPE vtype) { |
---|
1982 | // Note: sync with ../NTREE/ad_spec.cxx@launch_MapViewer_cb |
---|
1983 | |
---|
1984 | if (AD_map_viewer_aw_root && |
---|
1985 | gb_species && |
---|
1986 | (vtype == ADMVT_SELECT || vtype == ADMVT_INFO)) |
---|
1987 | { |
---|
1988 | AD_map_viewer_aw_root->awar(AWAR_SPECIES_NAME)->write_string(null2empty(GBT_get_name(gb_species))); |
---|
1989 | } |
---|
1990 | } |
---|
1991 | |
---|
1992 | int ARB_main(int argc, char *argv[]) { |
---|
1993 | aw_initstatus(); |
---|
1994 | |
---|
1995 | GB_shell shell; |
---|
1996 | AW_root *aw_root = AWT_create_root("pars.arb", "ARB_PARS", need_macro_ability()); |
---|
1997 | AD_map_viewer_aw_root = aw_root; |
---|
1998 | |
---|
1999 | ap_main = new AP_main; |
---|
2000 | GLOBAL_PARS = new ArbParsimony(); |
---|
2001 | |
---|
2002 | const char *db_server = ":"; |
---|
2003 | |
---|
2004 | PARS_commands cmds; |
---|
2005 | |
---|
2006 | while (argc>=2 && argv[1][0] == '-') { |
---|
2007 | argc--; |
---|
2008 | argv++; |
---|
2009 | if (!strcmp(argv[0], "-quit")) cmds.quit = 1; |
---|
2010 | else if (!strcmp(argv[0], "-add_marked")) cmds.add_marked = 1; |
---|
2011 | else if (!strcmp(argv[0], "-add_selected")) cmds.add_selected = 1; |
---|
2012 | else if (!strcmp(argv[0], "-calc_branchlengths")) cmds.calc_branch_lengths = 1; |
---|
2013 | else if (!strcmp(argv[0], "-calc_bootstrap")) cmds.calc_bootstrap = 1; |
---|
2014 | else { |
---|
2015 | fprintf(stderr, "Unknown option '%s'\n", argv[0]); |
---|
2016 | |
---|
2017 | printf(" Options: Meaning:\n" |
---|
2018 | "\n" |
---|
2019 | " -add_marked add marked species (without changing topology)\n" |
---|
2020 | " -add_selected add selected species (without changing topology)\n" |
---|
2021 | " -calc_branchlengths calculate branch lengths only\n" |
---|
2022 | " -calc_bootstrap estimate bootstrap values\n" |
---|
2023 | " -quit quit after performing operations\n" |
---|
2024 | ); |
---|
2025 | |
---|
2026 | exit(EXIT_FAILURE); |
---|
2027 | } |
---|
2028 | } |
---|
2029 | |
---|
2030 | |
---|
2031 | if (argc==2) db_server = argv[1]; |
---|
2032 | |
---|
2033 | GB_ERROR error = ap_main->open(db_server); |
---|
2034 | if (!error) { |
---|
2035 | GBDATA *gb_main = ap_main->get_gb_main(); |
---|
2036 | error = configure_macro_recording(aw_root, "ARB_PARS", gb_main); |
---|
2037 | |
---|
2038 | if (!error) { |
---|
2039 | #if defined(DEBUG) |
---|
2040 | AWT_announce_db_to_browser(gb_main, GBS_global_string("ARB-database (%s)", db_server)); |
---|
2041 | #endif // DEBUG |
---|
2042 | |
---|
2043 | pars_create_all_awars(aw_root, AW_ROOT_DEFAULT, gb_main); |
---|
2044 | |
---|
2045 | AW_window *aww = create_pars_init_window(aw_root, &cmds); |
---|
2046 | aww->show(); |
---|
2047 | |
---|
2048 | AWT_install_cb_guards(); |
---|
2049 | aw_root->main_loop(); |
---|
2050 | } |
---|
2051 | } |
---|
2052 | |
---|
2053 | if (error) aw_popup_exit(error); |
---|
2054 | return EXIT_SUCCESS; |
---|
2055 | } |
---|
2056 | |
---|
2057 | |
---|
2058 | // -------------------------------------------------------------------------------- |
---|
2059 | |
---|
2060 | #ifdef UNIT_TESTS |
---|
2061 | #include <arb_file.h> |
---|
2062 | #include <arb_diff.h> |
---|
2063 | #include <test_unit.h> |
---|
2064 | #include <AP_seq_dna.hxx> |
---|
2065 | #include <AP_seq_protein.hxx> |
---|
2066 | #include "test_env.h" |
---|
2067 | |
---|
2068 | // #define AUTO_UPDATE_IF_CHANGED // uncomment to auto update expected results |
---|
2069 | |
---|
2070 | static arb_test::match_expectation topologyEquals(AP_tree_nlen *root_node, const char *treefile_base) { |
---|
2071 | using namespace arb_test; |
---|
2072 | expectation_group fulfilled; |
---|
2073 | |
---|
2074 | char *outfile = GBS_global_string_copy("pars/%s.tree", treefile_base); |
---|
2075 | char *expected = GBS_global_string_copy("%s.expected", outfile); |
---|
2076 | bool update = false; |
---|
2077 | |
---|
2078 | { |
---|
2079 | FILE *out = fopen(outfile, "wt"); |
---|
2080 | fulfilled.add(that(out).does_differ_from_NULL()); |
---|
2081 | char *newick = GBT_tree_2_newick(root_node, NewickFormat(nLENGTH|nWRAP), false); |
---|
2082 | fputs(newick, out); |
---|
2083 | free(newick); |
---|
2084 | fclose(out); |
---|
2085 | } |
---|
2086 | |
---|
2087 | if (GB_is_regularfile(expected)) { |
---|
2088 | bool match_exp_topo = textfiles_have_difflines(outfile,expected,0); |
---|
2089 | #if defined(AUTO_UPDATE_IF_CHANGED) |
---|
2090 | if (!match_exp_topo) update = true; |
---|
2091 | #endif |
---|
2092 | if (!update) fulfilled.add(that(match_exp_topo).is_equal_to(true)); |
---|
2093 | } |
---|
2094 | else { |
---|
2095 | update = true; |
---|
2096 | } |
---|
2097 | |
---|
2098 | if (update) TEST_COPY_FILE(outfile, expected); |
---|
2099 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile)); |
---|
2100 | |
---|
2101 | free(expected); |
---|
2102 | free(outfile); |
---|
2103 | |
---|
2104 | return all().ofgroup(fulfilled); |
---|
2105 | } |
---|
2106 | |
---|
2107 | template<class ENV> |
---|
2108 | arb_test::match_expectation calcCostsCausesCombines(ENV& env, AP_FLOAT exp_pars, long exp_combines) { |
---|
2109 | using namespace arb_test; |
---|
2110 | expectation_group fulfilled; |
---|
2111 | |
---|
2112 | long combines_b4_costCalc = env.combines_performed(); |
---|
2113 | fulfilled.add(that(combines_b4_costCalc).is_equal_to(0)); |
---|
2114 | |
---|
2115 | AP_FLOAT new_pars = env.root_node()->costs(); |
---|
2116 | long combines_by_costCalc = env.combines_performed(); |
---|
2117 | |
---|
2118 | fulfilled.add(that(new_pars).fulfills(epsilon_similar(0.001), exp_pars)); |
---|
2119 | fulfilled.add(that(combines_by_costCalc).is_equal_to(exp_combines)); |
---|
2120 | |
---|
2121 | return all().ofgroup(fulfilled); |
---|
2122 | } |
---|
2123 | |
---|
2124 | #define TEST_EXPECT_SAVED_TOPOLOGY(env,exp_topo) TEST_EXPECTATION(topologyEquals(env.root_node(), exp_topo)) |
---|
2125 | #define TEST_EXPECT_SAVED_TOPOLOGY__BROKEN(env,exp_topo,got_topo) TEST_EXPECTATION__BROKEN(topologyEquals(env.root_node(), exp_topo), topologyEquals(env.root_node(), got_topo)) |
---|
2126 | |
---|
2127 | #define TEST_EXPECT_PARSVAL(env,exp_pars) TEST_EXPECT_EQUAL(env.root_node()->costs(), exp_pars); |
---|
2128 | #define TEST_EXPECT_ONLY_PARSVAL_COMBINES(env,exp_pars,exp_combines) TEST_EXPECTATION(calcCostsCausesCombines(env, exp_pars, exp_combines)) |
---|
2129 | // use TEST_EXPECT_ONLY_PARSVAL_COMBINES when |
---|
2130 | // - no combines occurred (or combines were just tested using TEST_EXPECT_COMBINES_PERFORMED) and |
---|
2131 | // - topology was modified, so that calculation of costs causes new combines. |
---|
2132 | #define TEST_EXPECT_KNOWN_PARSVAL(env,exp_pars) TEST_EXPECT_ONLY_PARSVAL_COMBINES(env,exp_pars,0) |
---|
2133 | |
---|
2134 | enum TopoMod { |
---|
2135 | MOD_REMOVE_MARKED, |
---|
2136 | |
---|
2137 | MOD_QUICK_READD, |
---|
2138 | MOD_QUICK_ADD, |
---|
2139 | MOD_READD_NNI, |
---|
2140 | |
---|
2141 | MOD_ADD_PARTIAL, |
---|
2142 | |
---|
2143 | MOD_CALC_LENS, |
---|
2144 | MOD_OPTI_NNI, |
---|
2145 | MOD_OPTI_GLOBAL, |
---|
2146 | |
---|
2147 | MOD_MIX_TREE, |
---|
2148 | }; |
---|
2149 | |
---|
2150 | template <typename SEQ> |
---|
2151 | static void modifyTopology(PARSIMONY_testenv<SEQ>& env, TopoMod mod) { |
---|
2152 | switch (mod) { |
---|
2153 | case MOD_REMOVE_MARKED: |
---|
2154 | env.graphic_tree()->get_tree_root()->remove_leafs(AWT_REMOVE_MARKED); |
---|
2155 | break; |
---|
2156 | |
---|
2157 | case MOD_QUICK_READD: |
---|
2158 | nt_reAdd(env.graphic_tree(), NT_ADD_MARKED, true); |
---|
2159 | break; |
---|
2160 | |
---|
2161 | case MOD_QUICK_ADD: |
---|
2162 | nt_add(env.graphic_tree(), NT_ADD_MARKED, true); |
---|
2163 | break; |
---|
2164 | |
---|
2165 | case MOD_READD_NNI: |
---|
2166 | nt_reAdd(env.graphic_tree(), NT_ADD_MARKED, false); |
---|
2167 | break; |
---|
2168 | |
---|
2169 | case MOD_ADD_PARTIAL: |
---|
2170 | nt_add_partial(env.graphic_tree()); |
---|
2171 | break; |
---|
2172 | |
---|
2173 | case MOD_CALC_LENS: |
---|
2174 | calc_branchlengths_and_reorder(env.graphic_tree()); |
---|
2175 | break; |
---|
2176 | |
---|
2177 | case MOD_OPTI_NNI: // only marked/unfolded |
---|
2178 | recursiveNNI(env.graphic_tree(), env.get_KL_settings().whichEdges); |
---|
2179 | break; |
---|
2180 | |
---|
2181 | case MOD_OPTI_GLOBAL: |
---|
2182 | optimizeTree(env.graphic_tree(), env.get_KL_settings()); |
---|
2183 | break; |
---|
2184 | |
---|
2185 | case MOD_MIX_TREE: { |
---|
2186 | long leafs = rootNode()->count_leafs(); |
---|
2187 | mixtree_and_calclengths(env.graphic_tree(), calculate_default_random_repeat(leafs), 100, ANY_EDGE); |
---|
2188 | break; |
---|
2189 | } |
---|
2190 | } |
---|
2191 | } |
---|
2192 | |
---|
2193 | template <typename SEQ> |
---|
2194 | static arb_test::match_expectation modifyingTopoResultsIn(TopoMod mod, const char *topo, long pars_expected, PARSIMONY_testenv<SEQ>& env, bool restore) { |
---|
2195 | using namespace arb_test; |
---|
2196 | expectation_group fulfilled; |
---|
2197 | |
---|
2198 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
2199 | |
---|
2200 | Level upc = env.get_user_push_counter(); |
---|
2201 | Level fl = env.get_frame_level(); |
---|
2202 | |
---|
2203 | if (restore) { |
---|
2204 | env.push(); |
---|
2205 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
2206 | } |
---|
2207 | |
---|
2208 | AWT_graphic_exports& exports = env.graphic_tree()->exports; |
---|
2209 | exports.clear_save_request(); |
---|
2210 | modifyTopology(env, mod); |
---|
2211 | if (topo) { |
---|
2212 | fulfilled.add(topologyEquals(env.root_node(), topo)); |
---|
2213 | if (mod != MOD_REMOVE_MARKED) { // remove_leafs doesn't request save |
---|
2214 | fulfilled.add(that(exports.needs_save()).is_equal_to(true)); |
---|
2215 | } |
---|
2216 | } |
---|
2217 | |
---|
2218 | fulfilled.add(that(allBranchlengthsAreDefined(env.root_node())).is_equal_to(true)); |
---|
2219 | |
---|
2220 | if (pars_expected != -1) { |
---|
2221 | fulfilled.add(that(env.root_node()->costs()).is_equal_to(pars_expected)); |
---|
2222 | } |
---|
2223 | |
---|
2224 | if (restore) { |
---|
2225 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
2226 | TEST_VALIDITY(env.pop_will_produce_valid_tree()); |
---|
2227 | env.pop(); |
---|
2228 | bool blen_def_after_pop = allBranchlengthsAreDefined(env.root_node()); |
---|
2229 | fulfilled.add(that(blen_def_after_pop).is_equal_to(true)); |
---|
2230 | } |
---|
2231 | |
---|
2232 | TEST_EXPECT_EQUAL(fl, env.get_frame_level()); |
---|
2233 | TEST_EXPECT_EQUAL(upc, env.get_user_push_counter()); |
---|
2234 | |
---|
2235 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
2236 | |
---|
2237 | return all().ofgroup(fulfilled); |
---|
2238 | } |
---|
2239 | |
---|
2240 | static arb_test::match_expectation movingRootDoesntAffectCosts(long pars_expected) { |
---|
2241 | using namespace arb_test; |
---|
2242 | expectation_group fulfilled; |
---|
2243 | |
---|
2244 | long pars_min = LONG_MAX; |
---|
2245 | long pars_max = LONG_MIN; |
---|
2246 | |
---|
2247 | for (int depth_first = 0; depth_first<=1; ++depth_first) { |
---|
2248 | for (int push_local = 0; push_local<=1; ++push_local) { |
---|
2249 | EdgeChain chain(rootEdge(), ANY_EDGE, depth_first); |
---|
2250 | |
---|
2251 | if (!push_local) ap_main->remember(); |
---|
2252 | while (chain) { |
---|
2253 | AP_tree_edge *edge = *chain; ++chain; |
---|
2254 | |
---|
2255 | if (push_local) ap_main->remember(); |
---|
2256 | edge->set_root(); |
---|
2257 | long pars = rootNode()->costs(); |
---|
2258 | pars_min = std::min(pars, pars_min); |
---|
2259 | pars_max = std::max(pars, pars_max); |
---|
2260 | if (push_local) ap_main->revert(); |
---|
2261 | } |
---|
2262 | if (!push_local) ap_main->revert(); |
---|
2263 | } |
---|
2264 | } |
---|
2265 | |
---|
2266 | fulfilled.add(that(pars_min).is_equal_to(pars_expected)); |
---|
2267 | fulfilled.add(that(pars_max).is_equal_to(pars_expected)); |
---|
2268 | |
---|
2269 | return all().ofgroup(fulfilled); |
---|
2270 | } |
---|
2271 | |
---|
2272 | static GBDATA *copy_to(GBDATA *gb_species, const char *newShortname) { |
---|
2273 | GBDATA *gb_species_data = GB_get_father(gb_species); |
---|
2274 | GBDATA *gb_new_species = GB_create_container(gb_species_data, "species"); |
---|
2275 | |
---|
2276 | GB_ERROR error = NULp; |
---|
2277 | if (!gb_new_species) { |
---|
2278 | error = GB_await_error(); |
---|
2279 | } |
---|
2280 | else { |
---|
2281 | error = GB_copy_dropProtectMarksAndTempstate(gb_new_species, gb_species); |
---|
2282 | if (!error) error = GBT_write_string(gb_new_species, "name", newShortname); |
---|
2283 | } |
---|
2284 | |
---|
2285 | ap_assert(contradicted(gb_new_species, error)); |
---|
2286 | return gb_new_species; |
---|
2287 | } |
---|
2288 | |
---|
2289 | inline void mark_only(GBDATA *gb_species) { |
---|
2290 | GBDATA *gb_main = GB_get_root(gb_species); |
---|
2291 | GB_transaction ta(gb_main); |
---|
2292 | GBT_mark_all(gb_main, 0); |
---|
2293 | GB_write_flag(gb_species, 1); |
---|
2294 | } |
---|
2295 | inline void mark(GBDATA *gb_species) { |
---|
2296 | GBDATA *gb_main = GB_get_root(gb_species); |
---|
2297 | GB_transaction ta(gb_main); |
---|
2298 | GB_write_flag(gb_species, 1); |
---|
2299 | } |
---|
2300 | inline void mark_all(GBDATA *gb_main) { |
---|
2301 | GB_transaction ta(gb_main); |
---|
2302 | GBT_mark_all(gb_main, 1); |
---|
2303 | } |
---|
2304 | |
---|
2305 | inline int is_partial(GBDATA *gb_species) { |
---|
2306 | GB_transaction ta(gb_species); |
---|
2307 | return GBT_is_partial(gb_species, -1, false); |
---|
2308 | } |
---|
2309 | |
---|
2310 | template <typename SEQ> |
---|
2311 | static arb_test::match_expectation addedAsBrotherOf(const char *name, const char *allowedBrothers, PARSIMONY_testenv<SEQ>& env) { |
---|
2312 | using namespace arb_test; |
---|
2313 | expectation_group fulfilled; |
---|
2314 | |
---|
2315 | AP_tree_nlen *node_in_tree = env.root_node()->findLeafNamed(name); |
---|
2316 | ap_assert(node_in_tree); |
---|
2317 | fulfilled.add(that(node_in_tree).does_differ_from_NULL()); |
---|
2318 | |
---|
2319 | const char *brother = node_in_tree->get_brother()->name; |
---|
2320 | ap_assert(brother); |
---|
2321 | fulfilled.add(that(allowedBrothers).does_contain(brother)); |
---|
2322 | |
---|
2323 | return all().ofgroup(fulfilled); |
---|
2324 | } |
---|
2325 | |
---|
2326 | template <typename SEQ> |
---|
2327 | static arb_test::match_expectation addingPartialResultsIn(GBDATA *gb_added_species, const char *allowedBrothers, const char *topo, int pars_expected, PARSIMONY_testenv<SEQ>& env) { |
---|
2328 | using namespace arb_test; |
---|
2329 | expectation_group fulfilled; |
---|
2330 | |
---|
2331 | mark_only(gb_added_species); |
---|
2332 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2333 | fulfilled.add(modifyingTopoResultsIn(MOD_ADD_PARTIAL, topo, pars_expected, env, false)); |
---|
2334 | fulfilled.add(that(is_partial(gb_added_species)).is_equal_to(1)); |
---|
2335 | |
---|
2336 | const char *name = GBT_get_name_or_description(gb_added_species); |
---|
2337 | fulfilled.add(addedAsBrotherOf(name, allowedBrothers, env)); |
---|
2338 | |
---|
2339 | return all().ofgroup(fulfilled); |
---|
2340 | } |
---|
2341 | |
---|
2342 | static int seqDiff(GBDATA *gb_main, const char *aliname, const char *species1, const char *species2, int startPos, int endPos) { |
---|
2343 | GB_transaction ta(gb_main); |
---|
2344 | |
---|
2345 | GBDATA *gb_species1 = GBT_expect_species(gb_main, species1); |
---|
2346 | GBDATA *gb_species2 = GBT_expect_species(gb_main, species2); |
---|
2347 | int diffs = -1; |
---|
2348 | |
---|
2349 | if (gb_species1 && gb_species2) { |
---|
2350 | GBDATA *gb_seq1 = GBT_find_sequence(gb_species1, aliname); |
---|
2351 | GBDATA *gb_seq2 = GBT_find_sequence(gb_species2, aliname); |
---|
2352 | |
---|
2353 | if (gb_seq1 && gb_seq2) { |
---|
2354 | char *seq1 = GB_read_string(gb_seq1); |
---|
2355 | char *seq2 = GB_read_string(gb_seq2); |
---|
2356 | |
---|
2357 | int maxPos1 = strlen(seq1)-1; |
---|
2358 | #if defined(ASSERTION_USED) |
---|
2359 | int maxPos2 = strlen(seq2)-1; |
---|
2360 | #endif |
---|
2361 | ap_assert(maxPos1 == maxPos2); |
---|
2362 | |
---|
2363 | if (endPos>maxPos1) endPos = maxPos1; |
---|
2364 | |
---|
2365 | diffs = 0; |
---|
2366 | for (int p = startPos; p<=endPos; ++p) { // LOOP_ VECTORIZED[!<9.1] // @@@ fails in RELEASE code! // IRRELEVANT_LOOP |
---|
2367 | diffs += seq1[p] != seq2[p]; |
---|
2368 | } |
---|
2369 | |
---|
2370 | free(seq2); |
---|
2371 | free(seq1); |
---|
2372 | } |
---|
2373 | } |
---|
2374 | |
---|
2375 | return diffs; |
---|
2376 | } |
---|
2377 | |
---|
2378 | static GBDATA *createPartialSeqFrom(GBDATA *gb_main, const char *aliname, const char *dest_species, const char *source_species, int startPos, int endPos) { |
---|
2379 | GB_transaction ta(gb_main); |
---|
2380 | |
---|
2381 | GBDATA *gb_result = NULp; |
---|
2382 | GBDATA *gb_source_species = GBT_expect_species(gb_main, source_species); |
---|
2383 | |
---|
2384 | if (gb_source_species) { |
---|
2385 | GBDATA *gb_dest_species = copy_to(gb_source_species, dest_species); |
---|
2386 | GBDATA *gb_dest_seq = GBT_find_sequence(gb_dest_species, aliname); // =same as source seq |
---|
2387 | char *seq = GB_read_string(gb_dest_seq); |
---|
2388 | |
---|
2389 | if (seq) { |
---|
2390 | int maxPos = strlen(seq)-1; |
---|
2391 | |
---|
2392 | startPos = std::min(startPos, maxPos); |
---|
2393 | endPos = std::min(endPos, maxPos); |
---|
2394 | |
---|
2395 | if (startPos>0) memset(seq, '.', startPos); |
---|
2396 | if (endPos<maxPos) memset(seq+endPos+1, '.', maxPos-endPos); |
---|
2397 | |
---|
2398 | GB_ERROR error = GB_write_string(gb_dest_seq, seq); |
---|
2399 | if (error) GB_export_error(error); |
---|
2400 | else { |
---|
2401 | gb_result = gb_dest_species; // success |
---|
2402 | #if defined(DEBUG) |
---|
2403 | fprintf(stderr, "created partial '%s' from '%s' (seq='%s')\n", dest_species, source_species, seq); |
---|
2404 | #endif |
---|
2405 | } |
---|
2406 | |
---|
2407 | free(seq); |
---|
2408 | } |
---|
2409 | } |
---|
2410 | |
---|
2411 | return gb_result; |
---|
2412 | } |
---|
2413 | |
---|
2414 | static GB_ERROR modifyOneBase(GBDATA *gb_species, const char *aliname, char cOld, char cNew) { |
---|
2415 | GB_transaction ta(gb_species); |
---|
2416 | GB_ERROR error = "failed to modifyOneBase"; |
---|
2417 | |
---|
2418 | GBDATA *gb_seq = GBT_find_sequence(gb_species, aliname); |
---|
2419 | if (gb_seq) { |
---|
2420 | char *seq = GB_read_string(gb_seq); |
---|
2421 | if (seq) { |
---|
2422 | char *B = strchr(seq, cOld); |
---|
2423 | if (!B) { |
---|
2424 | error = "does not contain base in modifyOneBase"; |
---|
2425 | } |
---|
2426 | else { |
---|
2427 | B[0] = cNew; |
---|
2428 | error = GB_write_string(gb_seq, seq); |
---|
2429 | } |
---|
2430 | free(seq); |
---|
2431 | } |
---|
2432 | } |
---|
2433 | |
---|
2434 | return error; |
---|
2435 | } |
---|
2436 | |
---|
2437 | static long unmark_unwanted(const char *, long cd_gbd, void*) { |
---|
2438 | GBDATA *gbd = (GBDATA*)cd_gbd; |
---|
2439 | GB_write_flag(gbd, 0); |
---|
2440 | return 0; |
---|
2441 | } |
---|
2442 | |
---|
2443 | void TEST_SLOW_nucl_tree_modifications() { |
---|
2444 | const char *aliname = "ali_5s"; |
---|
2445 | |
---|
2446 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname); |
---|
2447 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
---|
2448 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial"); |
---|
2449 | |
---|
2450 | const int PARSIMONY_ORG = 302; |
---|
2451 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14); |
---|
2452 | |
---|
2453 | // [NUCOPTI] opposed to protein tests below the initial tree here is NOT optimized! compare .@PROTOPTI |
---|
2454 | // -> removing and adding species produces a better tree (for add+NNI) |
---|
2455 | // |
---|
2456 | // diff initial->removed : http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-removed.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected |
---|
2457 | // diff initial->add-quick: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-add-quick.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected |
---|
2458 | // diff initial->add-NNI: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/nucl-add-NNI.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fnucl-initial.tree.expected |
---|
2459 | |
---|
2460 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, "nucl-removed", PARSIMONY_ORG-94, env, true)); // test remove-marked only (same code as part of nt_reAdd) |
---|
2461 | TEST_EXPECT_COMBINES_PERFORMED(env, 3); |
---|
2462 | |
---|
2463 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-add-quick", PARSIMONY_ORG-18, env, true)); // test quick-add |
---|
2464 | TEST_EXPECT_COMBINES_PERFORMED(env, 400); |
---|
2465 | |
---|
2466 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI, "nucl-add-NNI", PARSIMONY_ORG-20, env, true)); // test add + NNI |
---|
2467 | TEST_EXPECT_COMBINES_PERFORMED(env, 503); |
---|
2468 | |
---|
2469 | // test partial-add |
---|
2470 | { |
---|
2471 | GBDATA *gb_main = env.gbmain(); |
---|
2472 | |
---|
2473 | // create 2 non-overlapping partial species |
---|
2474 | const int SPLIT = 55; |
---|
2475 | GBDATA *CorGlutP = createPartialSeqFrom(gb_main, aliname, "CorGlutP", "CorGluta", 0, SPLIT); |
---|
2476 | GBDATA *CloButyP = createPartialSeqFrom(gb_main, aliname, "CloButyP", "CloButyr", SPLIT+1, INT_MAX); |
---|
2477 | GBDATA *CloButyM = createPartialSeqFrom(gb_main, aliname, "CloButyM", "CloButyr", SPLIT+1, INT_MAX); |
---|
2478 | TEST_EXPECT_NO_ERROR(modifyOneBase(CloButyM, aliname, 'G', 'C')); // change first 'G' into 'C' |
---|
2479 | |
---|
2480 | TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); // no push yet (does nothing) |
---|
2481 | |
---|
2482 | // test partials differ from full: |
---|
2483 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CorGlutP", "CorGluta", 0, INT_MAX)); |
---|
2484 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyP", "CloButyr", 0, INT_MAX)); |
---|
2485 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyM", "CloButyr", 0, INT_MAX)); |
---|
2486 | // test partials created from CloButyr differ in partial range: |
---|
2487 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyM", "CloButyP", SPLIT+1, INT_MAX)); |
---|
2488 | |
---|
2489 | // test condition that "CloButyr and CloButy2 do NOT differ in seq-range of partial" (otherwise test below makes no sense!) |
---|
2490 | TEST_EXPECT_ZERO(seqDiff(gb_main, aliname, "CloButyr", "CloButy2", SPLIT+1, INT_MAX)); |
---|
2491 | |
---|
2492 | // test that "CloButyr and CloButy2 DO differ in whole seq-range" (otherwise inserting into tree is non-deterministic) |
---|
2493 | TEST_REJECT_ZERO(seqDiff(gb_main, aliname, "CloButyr", "CloButy2", 0, INT_MAX)); |
---|
2494 | |
---|
2495 | // add CloButyP (and undo) |
---|
2496 | { |
---|
2497 | env.push(); |
---|
2498 | |
---|
2499 | // CloButyr and CloButy2 do not differ in seq-range of partial -> any of both may be chosen as brother. |
---|
2500 | // behavior should be changed with #605 |
---|
2501 | TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", "nucl-addPart-CloButyP", PARSIMONY_ORG, env)); |
---|
2502 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
---|
2503 | env.pop(); |
---|
2504 | } |
---|
2505 | |
---|
2506 | { |
---|
2507 | env.push(); |
---|
2508 | TEST_EXPECTATION(addingPartialResultsIn(CorGlutP, "CorGluta", "nucl-addPart-CorGlutP", PARSIMONY_ORG, env)); // add CorGlutP |
---|
2509 | TEST_EXPECT_COMBINES_PERFORMED(env, 5); // @@@ partial-add should not perform combines at all (maybe caused by cost-recalc?) |
---|
2510 | TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", "nucl-addPart-CorGlutP-CloButyP", PARSIMONY_ORG, env)); // also add CloButyP |
---|
2511 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
---|
2512 | env.pop(); |
---|
2513 | } |
---|
2514 | |
---|
2515 | // now add CorGlutP as full, then CloButyP and CloButyM as partials |
---|
2516 | { |
---|
2517 | env.push(); |
---|
2518 | |
---|
2519 | mark_only(CorGlutP); |
---|
2520 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2521 | { |
---|
2522 | GB_transaction ta(gb_main); |
---|
2523 | TEST_EXPECT_NO_ERROR(GBT_write_int(CorGlutP, "ARB_partial", 0)); // revert species to "full" |
---|
2524 | } |
---|
2525 | |
---|
2526 | const int PARSIMONY_ADDED = PARSIMONY_ORG; // value after adding CorGlutP (as full-length sequence) |
---|
2527 | |
---|
2528 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-addPartialAsFull-CorGlutP", PARSIMONY_ADDED, env, false)); |
---|
2529 | TEST_EXPECT_COMBINES_PERFORMED(env, 230); |
---|
2530 | TEST_EXPECT_EQUAL(is_partial(CorGlutP), 0); // check CorGlutP was added as full sequence |
---|
2531 | TEST_EXPECTATION(addedAsBrotherOf("CorGlutP", "CorGluta", env)); // partial created from CorGluta gets inserted next to CorGluta |
---|
2532 | |
---|
2533 | // add CloButyP as partial. |
---|
2534 | // as expected it is placed next to matching full sequences (does not differ in partial range) |
---|
2535 | TEST_EXPECTATION(addingPartialResultsIn(CloButyP, "CloButyr;CloButy2", NULp, PARSIMONY_ADDED, env)); |
---|
2536 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
---|
2537 | |
---|
2538 | // CloButyM differs slightly in overlap with CloButyr/CloButy2, but has no overlap with CorGlutP |
---|
2539 | // shows bug described in #609 is fixed: |
---|
2540 | TEST_EXPECTATION(addingPartialResultsIn(CloButyM, "CloButyP", "nucl-addPart-bug609", |
---|
2541 | PARSIMONY_ADDED+1, // @@@ known bug - partial should not affect parsimony value; possibly related to ../HELP_SOURCE/oldhelp/pa_partial.hlp@WARNINGS |
---|
2542 | env)); |
---|
2543 | TEST_EXPECT_COMBINES_PERFORMED(env, 7); |
---|
2544 | env.pop(); |
---|
2545 | } |
---|
2546 | } |
---|
2547 | |
---|
2548 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial"); |
---|
2549 | |
---|
2550 | const int PARSIMONY_NNI_MARKED = PARSIMONY_ORG-18; |
---|
2551 | const int PARSIMONY_NNI_ALL = PARSIMONY_ORG-18; |
---|
2552 | const int PARSIMONY_OPTI_MARKED = PARSIMONY_ORG-25; |
---|
2553 | const int PARSIMONY_OPTI_VISIBLE = PARSIMONY_ORG-26; |
---|
2554 | const int PARSIMONY_OPTI_ALL = PARSIMONY_ORG-36; |
---|
2555 | |
---|
2556 | { |
---|
2557 | env.push(); |
---|
2558 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_ORG)); |
---|
2559 | TEST_EXPECT_COMBINES_PERFORMED(env, 342); |
---|
2560 | env.pop(); |
---|
2561 | } |
---|
2562 | |
---|
2563 | // ------------------------------ |
---|
2564 | // test optimize (some) |
---|
2565 | |
---|
2566 | // mark initially marked species |
---|
2567 | { |
---|
2568 | GB_transaction ta(env.gbmain()); |
---|
2569 | GBT_restore_marked_species(env.gbmain(), "CorAquat;CorGluta;CurCitre;CloButyr;CloButy2;CytAquat"); |
---|
2570 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2571 | } |
---|
2572 | |
---|
2573 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
---|
2574 | |
---|
2575 | // test branchlength calculation |
---|
2576 | // (optimizations below implicitely recalculates branchlengths) |
---|
2577 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "nucl-calclength", PARSIMONY_ORG, env, false)); |
---|
2578 | TEST_EXPECT_COMBINES_PERFORMED(env, 120); |
---|
2579 | |
---|
2580 | // test whether branchlength calculation depends on root-position |
---|
2581 | { |
---|
2582 | AP_tree_edge *orgRootEdge = rootEdge(); |
---|
2583 | |
---|
2584 | env.push(); |
---|
2585 | |
---|
2586 | const char *tested_roots[] = { |
---|
2587 | "CloButyr", |
---|
2588 | "CloTyro4", |
---|
2589 | "CloTyrob", |
---|
2590 | "CloInnoc", |
---|
2591 | }; |
---|
2592 | |
---|
2593 | for (size_t r = 0; r<ARRAY_ELEMS(tested_roots); ++r) { |
---|
2594 | const char *leafName = tested_roots[r]; |
---|
2595 | env.root_node()->findLeafNamed(leafName)->set_root(); |
---|
2596 | calc_branchlengths_and_reorder(env.graphic_tree()); |
---|
2597 | orgRootEdge->set_root(); |
---|
2598 | env.graphic_tree()->reorderTree(BIG_BRANCHES_TO_TOP); |
---|
2599 | |
---|
2600 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-calclength"); |
---|
2601 | } |
---|
2602 | TEST_EXPECT_COMBINES_PERFORMED(env, 517); |
---|
2603 | |
---|
2604 | env.pop(); |
---|
2605 | } |
---|
2606 | |
---|
2607 | // test optimize (some) |
---|
2608 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "nucl-opti-NNI", PARSIMONY_NNI_MARKED, env, true)); // test recursive NNI |
---|
2609 | TEST_EXPECT_COMBINES_PERFORMED(env, 208); |
---|
2610 | |
---|
2611 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-marked-global", PARSIMONY_OPTI_MARKED, env, true)); // test recursive NNI+KL |
---|
2612 | TEST_EXPECT_COMBINES_PERFORMED(env, 18518); |
---|
2613 | |
---|
2614 | { |
---|
2615 | KL_Settings& KL = env.get_KL_settings(); |
---|
2616 | LocallyModify<EdgeSpec> target(KL.whichEdges, EdgeSpec(KL.whichEdges&~SKIP_UNMARKED_EDGES)); // ignore marks; skip folded |
---|
2617 | |
---|
2618 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-visible-global", PARSIMONY_OPTI_VISIBLE, env, true)); // same result as if all species marked (see below) |
---|
2619 | TEST_EXPECT_COMBINES_PERFORMED(env, 34925); |
---|
2620 | |
---|
2621 | KL.whichEdges = EdgeSpec(KL.whichEdges&~SKIP_FOLDED_EDGES); // ignore marks and folding |
---|
2622 | |
---|
2623 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-global", PARSIMONY_OPTI_ALL, env, true)); // same result as if all species marked and all groups unfolded (see below) |
---|
2624 | TEST_EXPECT_COMBINES_PERFORMED(env, 124811); |
---|
2625 | } |
---|
2626 | |
---|
2627 | // ----------------------------- |
---|
2628 | // test optimize (all) |
---|
2629 | |
---|
2630 | // mark all species |
---|
2631 | mark_all(env.gbmain()); |
---|
2632 | // unmark species not in tree |
---|
2633 | { |
---|
2634 | GB_transaction ta(env.gbmain()); |
---|
2635 | GB_HASH *markedNotInTree = GBT_create_marked_species_hash(env.gbmain()); |
---|
2636 | NT_remove_species_in_tree_from_hash(env.root_node(), markedNotInTree); |
---|
2637 | GBS_hash_do_loop(markedNotInTree, unmark_unwanted, NULp); |
---|
2638 | GBS_free_hash(markedNotInTree); |
---|
2639 | } |
---|
2640 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2641 | TEST_EXPECT_EQUAL(GBT_count_marked_species(env.gbmain()), 15); |
---|
2642 | |
---|
2643 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
---|
2644 | |
---|
2645 | // test branchlength calculation |
---|
2646 | // (optimizations below implicitely recalculates branchlengths) |
---|
2647 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "nucl-calclength", PARSIMONY_ORG, env, false)); |
---|
2648 | TEST_EXPECT_COMBINES_PERFORMED(env, 120); |
---|
2649 | |
---|
2650 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "nucl-opti-all-NNI", PARSIMONY_NNI_ALL, env, true)); // test recursive NNI |
---|
2651 | TEST_EXPECT_COMBINES_PERFORMED(env, 242); |
---|
2652 | |
---|
2653 | { |
---|
2654 | env.push(); |
---|
2655 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-visible-global", PARSIMONY_OPTI_VISIBLE, env, false)); // test recursive NNI+KL |
---|
2656 | TEST_EXPECT_COMBINES_PERFORMED(env, 34925); |
---|
2657 | |
---|
2658 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_OPTI_VISIBLE)); |
---|
2659 | TEST_EXPECT_COMBINES_PERFORMED(env, 336); |
---|
2660 | env.pop(); |
---|
2661 | } |
---|
2662 | |
---|
2663 | // unfold groups |
---|
2664 | { |
---|
2665 | AP_tree_nlen *CloTyrob = env.root_node()->findLeafNamed("CloTyrob"); |
---|
2666 | AP_tree_nlen *group = CloTyrob->get_father(); |
---|
2667 | ap_assert(group->gr.grouped); |
---|
2668 | group->gr.grouped = false; // unfold the only folded group |
---|
2669 | |
---|
2670 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "nucl-opti-global", PARSIMONY_OPTI_ALL, env, true)); // test recursive NNI+KL |
---|
2671 | TEST_EXPECT_COMBINES_PERFORMED(env, 124811); |
---|
2672 | } |
---|
2673 | |
---|
2674 | // test re-add all (i.e. test "create tree from scratch") |
---|
2675 | // Note: trees generated below are NO LONGER better than optimized trees! (see also r13651) |
---|
2676 | |
---|
2677 | // quick add: |
---|
2678 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-readdall-quick", PARSIMONY_ORG-7, env, true)); |
---|
2679 | TEST_EXPECT_COMBINES_PERFORMED(env, 439); |
---|
2680 | |
---|
2681 | // quick add + NNI: |
---|
2682 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI, "nucl-readdall-NNI", PARSIMONY_ORG-8, env, true)); |
---|
2683 | TEST_EXPECT_COMBINES_PERFORMED(env, 613); |
---|
2684 | |
---|
2685 | // test adding a too short sequence |
---|
2686 | // (has to be last test, because it modifies seq data) << ------------ !!!!! |
---|
2687 | { |
---|
2688 | env.push(); |
---|
2689 | |
---|
2690 | AP_tree_nlen *CloTyrob = env.root_node()->findLeafNamed("CloTyrob"); |
---|
2691 | mark_only(CloTyrob->gb_node); |
---|
2692 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2693 | |
---|
2694 | // modify sequence of CloTyrob (keep only some bases) |
---|
2695 | { |
---|
2696 | GB_transaction ta(env.gbmain()); |
---|
2697 | GBDATA *gb_seq = GBT_find_sequence(CloTyrob->gb_node, aliname); |
---|
2698 | |
---|
2699 | char *seq = GB_read_string(gb_seq); |
---|
2700 | int keep_bases = MIN_SEQUENCE_LENGTH-1; |
---|
2701 | |
---|
2702 | for (int i = 0; seq[i]; ++i) { |
---|
2703 | if (!GAP::is_std_gap(seq[i])) { |
---|
2704 | if (keep_bases) --keep_bases; |
---|
2705 | else seq[i] = '.'; |
---|
2706 | } |
---|
2707 | } |
---|
2708 | |
---|
2709 | GB_topSecurityLevel unsecured(gb_seq); |
---|
2710 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_seq, seq)); |
---|
2711 | free(seq); |
---|
2712 | } |
---|
2713 | |
---|
2714 | // remove CloTyrob |
---|
2715 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, PARSIMONY_ORG-1, env, false)); |
---|
2716 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
---|
2717 | TEST_EXPECT_EQUAL(env.root_node()->count_leafs(), 14); |
---|
2718 | |
---|
2719 | // attempt to add CloTyrob (should fail because sequence too short) and CorGluta (should stay, because already in tree) |
---|
2720 | TEST_REJECT_NULL(env.root_node()->findLeafNamed("CorGluta")); // has to be in tree |
---|
2721 | { |
---|
2722 | GB_transaction ta(env.gbmain()); |
---|
2723 | GBT_restore_marked_species(env.gbmain(), "CloTyrob;CorGluta"); |
---|
2724 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2725 | } |
---|
2726 | |
---|
2727 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_ADD, NULp, PARSIMONY_ORG-1, env, false)); |
---|
2728 | TEST_EXPECT_COMBINES_PERFORMED(env, 110); // @@@ why does this perform combines at all? |
---|
2729 | TEST_EXPECT_EQUAL(env.root_node()->count_leafs(), 14); // ok, CloTyrob was not added |
---|
2730 | TEST_REJECT_NULL(env.root_node()->findLeafNamed("CorGluta")); // has to be in tree |
---|
2731 | |
---|
2732 | env.pop(); |
---|
2733 | } |
---|
2734 | } |
---|
2735 | |
---|
2736 | void TEST_SLOW_prot_tree_modifications() { |
---|
2737 | const char *aliname = "ali_tuf_pro"; |
---|
2738 | |
---|
2739 | PARSIMONY_testenv<AP_sequence_protein> env("TEST_prot.arb", aliname); |
---|
2740 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_prot_opti")); |
---|
2741 | TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-initial"); |
---|
2742 | |
---|
2743 | const int PARSIMONY_ORG = 1081; |
---|
2744 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 10); |
---|
2745 | |
---|
2746 | // [PROTOPTI] opposed to nucleid tests above the initial tree here is already optimized! compare .@NUCOPTI |
---|
2747 | // -> adding species approximately reproduces initial topology |
---|
2748 | // |
---|
2749 | // diff initial->add-quick: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/prot-add-quick.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fprot-initial.tree.expected |
---|
2750 | // diff initial->add-NNI: http://bugs.arb-home.de/changeset/HEAD/branches/pars/UNIT_TESTER/run/pars/prot-add-NNI.tree.expected?old=HEAD&old_path=branches%2Fpars%2FUNIT_TESTER%2Frun%2Fpars%2Fprot-initial.tree.expected |
---|
2751 | // |
---|
2752 | // Note: comparing these two diffs also demonstrates why quick-adding w/o NNI suffers |
---|
2753 | |
---|
2754 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, "prot-removed", PARSIMONY_ORG-146, env, true)); // test remove-marked only (same code as part of nt_reAdd) |
---|
2755 | TEST_EXPECT_COMBINES_PERFORMED(env, 5); |
---|
2756 | |
---|
2757 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "prot-add-quick", PARSIMONY_ORG, env, true)); // test quick-add |
---|
2758 | TEST_EXPECT_COMBINES_PERFORMED(env, 213); |
---|
2759 | |
---|
2760 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_READD_NNI, "prot-add-NNI", PARSIMONY_ORG, env, true)); // test add + NNI |
---|
2761 | TEST_EXPECT_COMBINES_PERFORMED(env, 262); |
---|
2762 | |
---|
2763 | // test partial-add |
---|
2764 | { |
---|
2765 | GBDATA *gb_main = env.gbmain(); |
---|
2766 | |
---|
2767 | // create 2 non-overlapping partial species |
---|
2768 | GBDATA *MucRaceP = createPartialSeqFrom(gb_main, aliname, "MucRaceP", "MucRacem", 0, 60+4); // (+4 = dots inserted into DB at left end) |
---|
2769 | GBDATA *StrCoelP = createPartialSeqFrom(gb_main, aliname, "StrCoelP", "StrCoel9", 66-1+4, 184-1+4); |
---|
2770 | GBDATA *StrCoelM = createPartialSeqFrom(gb_main, aliname, "StrCoelM", "StrCoel9", 66-1+4, 184-1+4); |
---|
2771 | TEST_EXPECT_NO_ERROR(modifyOneBase(StrCoelM, aliname, 'Y', 'H')); // change first 'Y' into 'H' |
---|
2772 | |
---|
2773 | // add StrCoelP (and undo) |
---|
2774 | { |
---|
2775 | env.push(); |
---|
2776 | // StrCoel9 and StrRamo3 do not differ in seq-range of partial -> any of both may be chosen as brother. |
---|
2777 | // behavior should be changed with #605 |
---|
2778 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", "prot-addPart-StrCoelP", PARSIMONY_ORG, env)); |
---|
2779 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
---|
2780 | env.pop(); |
---|
2781 | } |
---|
2782 | |
---|
2783 | { |
---|
2784 | env.push(); |
---|
2785 | TEST_EXPECTATION(addingPartialResultsIn(MucRaceP, "MucRacem", "prot-addPart-MucRaceP", PARSIMONY_ORG, env)); // add MucRaceP |
---|
2786 | TEST_EXPECT_COMBINES_PERFORMED(env, 6); |
---|
2787 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", "prot-addPart-MucRaceP-StrCoelP", PARSIMONY_ORG, env)); // also add StrCoelP |
---|
2788 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
---|
2789 | env.pop(); |
---|
2790 | } |
---|
2791 | |
---|
2792 | // now add MucRaceP as full, then StrCoelP and StrCoelM as partials |
---|
2793 | { |
---|
2794 | env.push(); |
---|
2795 | |
---|
2796 | mark_only(MucRaceP); |
---|
2797 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2798 | { |
---|
2799 | GB_transaction ta(gb_main); |
---|
2800 | TEST_EXPECT_NO_ERROR(GBT_write_int(MucRaceP, "ARB_partial", 0)); // revert species to "full" |
---|
2801 | } |
---|
2802 | |
---|
2803 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "prot-addPartialAsFull-MucRaceP", PARSIMONY_ORG, env, false)); |
---|
2804 | TEST_EXPECT_COMBINES_PERFORMED(env, 156); |
---|
2805 | TEST_EXPECT_EQUAL(is_partial(MucRaceP), 0); // check MucRaceP was added as full sequence |
---|
2806 | TEST_EXPECTATION(addedAsBrotherOf("MucRaceP", "Eukarya EF-Tu", env)); // partial created from MucRacem gets inserted next to this group |
---|
2807 | // Note: looks ok. group contains MucRacem, AbdGlauc and 4 other species |
---|
2808 | |
---|
2809 | // add StrCoelP as partial. |
---|
2810 | // as expected it is placed next to matching full sequences (does not differ in partial range) |
---|
2811 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelP, "StrCoel9;StrRamo3", NULp, PARSIMONY_ORG, env)); |
---|
2812 | TEST_EXPECT_COMBINES_PERFORMED(env, 4); |
---|
2813 | |
---|
2814 | // StrCoelM differs slightly in overlap with StrCoel9/StrRamo3, but has no overlap with MucRaceP |
---|
2815 | // shows bug described in #609 is fixed: |
---|
2816 | TEST_EXPECTATION(addingPartialResultsIn(StrCoelM, "StrCoelP", "prot-addPart-bug609", |
---|
2817 | PARSIMONY_ORG+1, // @@@ known bug - partial should not affect parsimony value; possibly related to ../HELP_SOURCE/oldhelp/pa_partial.hlp@WARNINGS |
---|
2818 | env)); |
---|
2819 | TEST_EXPECT_COMBINES_PERFORMED(env, 5); |
---|
2820 | env.pop(); |
---|
2821 | } |
---|
2822 | } |
---|
2823 | |
---|
2824 | TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-initial"); |
---|
2825 | |
---|
2826 | const unsigned mixseed = 8164724; |
---|
2827 | |
---|
2828 | const long PARSIMONY_MIXED = PARSIMONY_ORG + 1519; |
---|
2829 | const long PARSIMONY_NNI_MARKED = PARSIMONY_ORG + 1053; |
---|
2830 | const long PARSIMONY_NNI_ALL = PARSIMONY_ORG; |
---|
2831 | const long PARSIMONY_OPTI_MARKED = PARSIMONY_ORG; |
---|
2832 | const long PARSIMONY_OPTI_ALL = PARSIMONY_ORG; // no gain (initial tree already is optimized) |
---|
2833 | |
---|
2834 | // ------------------------------------------------------ |
---|
2835 | // mix tree (original tree already is optimized) |
---|
2836 | |
---|
2837 | GB_random_seed(mixseed); |
---|
2838 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_MIX_TREE, "prot-mixed", PARSIMONY_MIXED, env, false)); |
---|
2839 | TEST_EXPECT_COMBINES_PERFORMED(env, 89); |
---|
2840 | |
---|
2841 | { |
---|
2842 | env.push(); |
---|
2843 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_MIXED)); |
---|
2844 | TEST_EXPECT_COMBINES_PERFORMED(env, 234); |
---|
2845 | env.pop(); |
---|
2846 | } |
---|
2847 | |
---|
2848 | // ------------------------------ |
---|
2849 | // test optimize (some) |
---|
2850 | |
---|
2851 | // mark initially marked species |
---|
2852 | { |
---|
2853 | GB_transaction ta(env.gbmain()); |
---|
2854 | |
---|
2855 | GBT_restore_marked_species(env.gbmain(), "CytLyti6;StrRamo3;MucRace2;SacCere5"); |
---|
2856 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2857 | } |
---|
2858 | |
---|
2859 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_MIXED); |
---|
2860 | |
---|
2861 | // test branchlength calculation |
---|
2862 | // (optimizations below implicitely recalculates branchlengths) |
---|
2863 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "prot-calclength", PARSIMONY_MIXED, env, false)); |
---|
2864 | TEST_EXPECT_COMBINES_PERFORMED(env, 79); |
---|
2865 | |
---|
2866 | // test whether branchlength calculation depends on root-position |
---|
2867 | { |
---|
2868 | AP_tree_edge *orgRootEdge = rootEdge(); |
---|
2869 | |
---|
2870 | env.push(); |
---|
2871 | |
---|
2872 | const char *tested_roots[] = { |
---|
2873 | // "CytLyti6", // no effect on branchlengths |
---|
2874 | "TaxOcell", |
---|
2875 | "MucRace3", |
---|
2876 | "StrCoel9", |
---|
2877 | }; |
---|
2878 | |
---|
2879 | for (size_t r = 0; r<ARRAY_ELEMS(tested_roots); ++r) { |
---|
2880 | TEST_ANNOTATE(tested_roots[r]); |
---|
2881 | const char *leafName = tested_roots[r]; |
---|
2882 | env.root_node()->findLeafNamed(leafName)->set_root(); |
---|
2883 | calc_branchlengths_and_reorder(env.graphic_tree()); |
---|
2884 | orgRootEdge->set_root(); |
---|
2885 | env.graphic_tree()->reorderTree(BIG_BRANCHES_TO_TOP); |
---|
2886 | |
---|
2887 | TEST_EXPECT_SAVED_TOPOLOGY(env, "prot-calclength"); |
---|
2888 | } |
---|
2889 | TEST_EXPECT_COMBINES_PERFORMED(env, 265); |
---|
2890 | |
---|
2891 | env.pop(); |
---|
2892 | } |
---|
2893 | |
---|
2894 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "prot-opti-NNI", PARSIMONY_NNI_MARKED, env, true)); // test recursive NNI |
---|
2895 | TEST_EXPECT_COMBINES_PERFORMED(env, 246); |
---|
2896 | |
---|
2897 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "prot-opti-marked-global", PARSIMONY_OPTI_MARKED, env, true)); // test recursive NNI+KL |
---|
2898 | TEST_EXPECT_COMBINES_PERFORMED(env, 2810); |
---|
2899 | |
---|
2900 | // ----------------------------- |
---|
2901 | // test optimize (all) |
---|
2902 | |
---|
2903 | // mark all species |
---|
2904 | mark_all(env.gbmain()); |
---|
2905 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
2906 | TEST_EXPECT_EQUAL(GBT_count_marked_species(env.gbmain()), 14); |
---|
2907 | |
---|
2908 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_MIXED); |
---|
2909 | |
---|
2910 | // test branchlength calculation |
---|
2911 | // (optimizations below implicitely recalculates branchlengths) |
---|
2912 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_CALC_LENS, "prot-calclength", PARSIMONY_MIXED, env, false)); |
---|
2913 | TEST_EXPECT_COMBINES_PERFORMED(env, 79); |
---|
2914 | |
---|
2915 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_NNI, "prot-opti-all-NNI", PARSIMONY_NNI_ALL, env, true)); // test recursive NNI |
---|
2916 | TEST_EXPECT_COMBINES_PERFORMED(env, 359); |
---|
2917 | |
---|
2918 | { |
---|
2919 | env.push(); |
---|
2920 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_OPTI_GLOBAL, "prot-opti-global", PARSIMONY_OPTI_ALL, env, false)); // test recursive NNI+KL |
---|
2921 | TEST_EXPECT_COMBINES_PERFORMED(env, 1690); |
---|
2922 | |
---|
2923 | TEST_EXPECTATION(movingRootDoesntAffectCosts(PARSIMONY_OPTI_ALL)); |
---|
2924 | TEST_EXPECT_COMBINES_PERFORMED(env, 215); |
---|
2925 | env.pop(); |
---|
2926 | } |
---|
2927 | } |
---|
2928 | |
---|
2929 | void TEST_node_stack() { |
---|
2930 | // test was used to fix #620 |
---|
2931 | |
---|
2932 | const char *aliname = "ali_5s"; |
---|
2933 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname); |
---|
2934 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
---|
2935 | TEST_EXPECT_SAVED_TOPOLOGY(env, "nucl-initial"); |
---|
2936 | |
---|
2937 | const int PARSIMONY_ORG = 302; |
---|
2938 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14); |
---|
2939 | |
---|
2940 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2941 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
---|
2942 | |
---|
2943 | // test set root to CytAquat + pop (works) |
---|
2944 | { |
---|
2945 | env.push(); |
---|
2946 | env.root_node()->findLeafNamed("CytAquat")->set_root(); |
---|
2947 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2948 | env.pop(); |
---|
2949 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2950 | } |
---|
2951 | |
---|
2952 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
---|
2953 | |
---|
2954 | // test set root to CloButyr + pop (works) |
---|
2955 | { |
---|
2956 | env.push(); |
---|
2957 | env.root_node()->findLeafNamed("CloButyr")->set_root(); |
---|
2958 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2959 | env.pop(); |
---|
2960 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2961 | } |
---|
2962 | |
---|
2963 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
---|
2964 | |
---|
2965 | // test set root to CloBifer + set root to CloTyrob + pop (works) |
---|
2966 | // Note: both species are in same subtree (of root) |
---|
2967 | { |
---|
2968 | env.push(); |
---|
2969 | |
---|
2970 | env.root_node()->findLeafNamed("CloBifer")->set_root(); |
---|
2971 | env.root_node()->findLeafNamed("CloTyrob")->set_root(); |
---|
2972 | |
---|
2973 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2974 | env.pop(); |
---|
2975 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2976 | } |
---|
2977 | |
---|
2978 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
---|
2979 | |
---|
2980 | // test set root to CytAquat + set root to CloButyr + pop (failed, fixed by [13138]) |
---|
2981 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
---|
2982 | for (int calcCostsBetween = 0; calcCostsBetween<2; ++calcCostsBetween) { |
---|
2983 | TEST_ANNOTATE(GBS_global_string("calcCostsBetween=%i", calcCostsBetween)); |
---|
2984 | |
---|
2985 | TEST_EXPECT_PARSVAL(env, PARSIMONY_ORG); |
---|
2986 | |
---|
2987 | env.push(); |
---|
2988 | |
---|
2989 | env.root_node()->findLeafNamed("CytAquat")->set_root(); |
---|
2990 | |
---|
2991 | if (calcCostsBetween) { |
---|
2992 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 2); |
---|
2993 | } |
---|
2994 | |
---|
2995 | env.root_node()->findLeafNamed("CloButyr")->set_root(); |
---|
2996 | |
---|
2997 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
2998 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 6); |
---|
2999 | |
---|
3000 | env.pop(); |
---|
3001 | |
---|
3002 | TEST_VALIDITY(env.root_node()->sequence_state_valid()); |
---|
3003 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
---|
3004 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3005 | } |
---|
3006 | |
---|
3007 | { |
---|
3008 | env.push(); |
---|
3009 | { |
---|
3010 | env.push(); |
---|
3011 | |
---|
3012 | env.root_node()->findLeafNamed("CloInnoc")->moveNextTo(env.root_node()->findLeafNamed("CytAquat"), 0.5); |
---|
3013 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3014 | env.root_node()->findLeafNamed("CloInnoc")->set_root(); |
---|
3015 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3016 | env.root_node()->findLeafNamed("CytAquat")->moveNextTo(env.root_node()->findLeafNamed("CloPaste"), 0.5); |
---|
3017 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3018 | env.root_node()->findLeafNamed("CloPaste")->set_root(); |
---|
3019 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3020 | env.root_node()->findLeafNamed("CloPaste")->moveNextTo(env.root_node()->findLeafNamed("CloInnoc"), 0.5); |
---|
3021 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3022 | |
---|
3023 | { |
---|
3024 | AP_tree_nlen *son_of_brother; |
---|
3025 | AP_tree_nlen *brother_of_father; |
---|
3026 | |
---|
3027 | // COVER1: son of root -> grandson of root |
---|
3028 | { |
---|
3029 | AP_tree_nlen *son_of_root = env.root_node()->get_leftson(); |
---|
3030 | ap_assert(son_of_root); |
---|
3031 | |
---|
3032 | son_of_brother = son_of_root->get_brother()->get_leftson(); |
---|
3033 | son_of_root->moveNextTo(son_of_brother, 0.5); |
---|
3034 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3035 | } |
---|
3036 | |
---|
3037 | // COVER2: grandson of root -> son of brother |
---|
3038 | { |
---|
3039 | AP_tree_nlen *son_of_root = env.root_node()->get_leftson(); |
---|
3040 | AP_tree_nlen *grandson_of_root = son_of_root->get_brother()->get_rightson(); |
---|
3041 | ap_assert(grandson_of_root); |
---|
3042 | |
---|
3043 | son_of_brother = grandson_of_root->get_brother()->get_leftson(); |
---|
3044 | grandson_of_root->moveNextTo(son_of_brother, 0.5); |
---|
3045 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3046 | } |
---|
3047 | |
---|
3048 | AP_tree_nlen *some_leaf = env.root_node()->findLeafNamed("CloBifer"); |
---|
3049 | ap_assert(some_leaf); |
---|
3050 | |
---|
3051 | // COVER3: some leaf -> son of brother |
---|
3052 | son_of_brother = some_leaf->get_brother()->get_leftson(); |
---|
3053 | some_leaf->moveNextTo(son_of_brother, 0.5); |
---|
3054 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3055 | |
---|
3056 | // COVER4: some leaf -> son of brother |
---|
3057 | brother_of_father = some_leaf->get_father()->get_brother(); |
---|
3058 | some_leaf->moveNextTo(brother_of_father, 0.5); |
---|
3059 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3060 | |
---|
3061 | // test forbidden moves: |
---|
3062 | TEST_EXPECT_ERROR_CONTAINS(some_leaf->cantMoveNextTo(some_leaf->get_father()), "Already there"); |
---|
3063 | TEST_EXPECT_ERROR_CONTAINS(some_leaf->cantMoveNextTo(some_leaf->get_brother()), "Already there"); |
---|
3064 | } |
---|
3065 | |
---|
3066 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG+5, 6); |
---|
3067 | |
---|
3068 | env.pop(); |
---|
3069 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3070 | } |
---|
3071 | env.pop(); |
---|
3072 | |
---|
3073 | TEST_EXPECT_KNOWN_PARSVAL(env, PARSIMONY_ORG); |
---|
3074 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3075 | } |
---|
3076 | |
---|
3077 | // remove + quick add marked + pop() both works |
---|
3078 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, "nucl-add-quick", PARSIMONY_ORG-18, env, true)); // test quick-add |
---|
3079 | TEST_EXPECT_COMBINES_PERFORMED(env, 400); |
---|
3080 | |
---|
3081 | TEST_EXPECT(env.root_node()->has_valid_root_remarks()); |
---|
3082 | |
---|
3083 | // remove + quick-add marked + pop() quick-add -> corrupts tree |
---|
3084 | // (root-edge is lost) |
---|
3085 | { |
---|
3086 | env.push(); |
---|
3087 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3088 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd) |
---|
3089 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
---|
3090 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3091 | |
---|
3092 | TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); |
---|
3093 | |
---|
3094 | env.push(); |
---|
3095 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3096 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd) |
---|
3097 | TEST_EXPECT_COMBINES_PERFORMED(env, 400); |
---|
3098 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3099 | TEST_VALIDITY(env.all_available_pops_will_produce_valid_trees()); |
---|
3100 | env.pop(); |
---|
3101 | |
---|
3102 | TEST_VALIDITY(env.root_node()->has_valid_edges()); |
---|
3103 | |
---|
3104 | env.pop(); |
---|
3105 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3106 | TEST_EXPECT_COMBINES_PERFORMED(env, 0); |
---|
3107 | } |
---|
3108 | |
---|
3109 | // same as above, but with only 1 species marked |
---|
3110 | const char *testSingle[] = { |
---|
3111 | "CytAquat", // CytAquat is the only grandson of root (CytAquat located in lower subtree) |
---|
3112 | "CloBifer", // two father nodes between CloBifer and root (CloBifer located in upper subtree) |
---|
3113 | "CloPaste", // two father nodes between CloPaste and root (CloPaste located in upper subtree) |
---|
3114 | "CorGluta", // three father nodes between CorGluta and root (CorGluta located in lower subtree) |
---|
3115 | "CelBiazo", // two father nodes between CelBiazo and root |
---|
3116 | NULp |
---|
3117 | }; |
---|
3118 | |
---|
3119 | for (int i = 0; testSingle[i]; ++i) { |
---|
3120 | for (int swapped = 0; swapped<2; ++swapped) { |
---|
3121 | TEST_ANNOTATE(GBS_global_string("single=%s swapped=%i", testSingle[i], swapped)); |
---|
3122 | |
---|
3123 | env.push(); |
---|
3124 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3125 | { |
---|
3126 | AP_tree_nlen *old_rightson = env.root_node()->get_rightson(); |
---|
3127 | env.root_node()->get_leftson()->get_rightson()->set_root(); |
---|
3128 | old_rightson->get_leftson()->set_root(); |
---|
3129 | old_rightson->set_root(); |
---|
3130 | |
---|
3131 | ap_assert(env.root_node()->get_rightson() == old_rightson); |
---|
3132 | } |
---|
3133 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3134 | |
---|
3135 | mark_only(env.root_node()->findLeafNamed(testSingle[i])->gb_node); |
---|
3136 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
3137 | |
---|
3138 | env.push(); |
---|
3139 | if (swapped) { |
---|
3140 | env.root_node()->swap_sons(); |
---|
3141 | } |
---|
3142 | |
---|
3143 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3144 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd) |
---|
3145 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3146 | |
---|
3147 | env.push(); |
---|
3148 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3149 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd) |
---|
3150 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3151 | env.pop(); |
---|
3152 | |
---|
3153 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3154 | env.pop(); |
---|
3155 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3156 | env.pop(); |
---|
3157 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3158 | } |
---|
3159 | } |
---|
3160 | TEST_EXPECT_COMBINES_PERFORMED(env, 2120); |
---|
3161 | |
---|
3162 | // similar to above (remove+add a grandson of root; here grandson is a subtree with 4 species) |
---|
3163 | |
---|
3164 | for (int remove_from_lower_subtree = 0; remove_from_lower_subtree<2; ++remove_from_lower_subtree) { |
---|
3165 | TEST_ANNOTATE(GBS_global_string("remove_from_lower_subtree=%i", remove_from_lower_subtree)); |
---|
3166 | |
---|
3167 | // mark a complete subtree (which - as a whole - forms a grandson of root). subtree is located in upper part of the tree |
---|
3168 | mark_only(env.root_node()->findLeafNamed("CloButy2")->gb_node); |
---|
3169 | mark(env.root_node()->findLeafNamed("CloButyr")->gb_node); |
---|
3170 | mark(env.root_node()->findLeafNamed("CloCarni")->gb_node); |
---|
3171 | mark(env.root_node()->findLeafNamed("CloPaste")->gb_node); |
---|
3172 | env.compute_tree(); // species marks affect order of node-chain (used in nni_rec) |
---|
3173 | |
---|
3174 | env.push(); |
---|
3175 | if (remove_from_lower_subtree) { |
---|
3176 | env.root_node()->swap_sons(); |
---|
3177 | } |
---|
3178 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3179 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_REMOVE_MARKED, NULp, -1, env, false)); // test remove-marked only (same code as part of nt_reAdd) |
---|
3180 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3181 | |
---|
3182 | env.push(); |
---|
3183 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3184 | TEST_EXPECTATION(modifyingTopoResultsIn(MOD_QUICK_READD, NULp, -1, env, false)); // test quick-add (same code as part of nt_reAdd) |
---|
3185 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3186 | env.pop(); |
---|
3187 | |
---|
3188 | TEST_VALIDITY(env.root_node()->has_valid_edges()); // now always valid |
---|
3189 | |
---|
3190 | env.pop(); |
---|
3191 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3192 | } |
---|
3193 | TEST_EXPECT_COMBINES_PERFORMED(env, 584); |
---|
3194 | } |
---|
3195 | |
---|
3196 | void TEST_node_edge_resources() { |
---|
3197 | const char *aliname = "ali_5s"; |
---|
3198 | |
---|
3199 | // document memsize of nodes and edges: |
---|
3200 | |
---|
3201 | #define STATE_STACK_SIZE sizeof(StateStack) // 8 (Cxx11) or 16 (older C++); maybe 4/8 in 32bit |
---|
3202 | |
---|
3203 | #if defined(ARB_64) |
---|
3204 | TEST_EXPECT_EQUAL(sizeof(AP_tree_nlen), 184 + STATE_STACK_SIZE); |
---|
3205 | TEST_EXPECT_EQUAL(sizeof(AP_tree), 136); |
---|
3206 | TEST_EXPECT_EQUAL(sizeof(ARB_seqtree), 88); |
---|
3207 | TEST_EXPECT_EQUAL(sizeof(TreeNode), 80); |
---|
3208 | #else // !defined(ARB_64) |
---|
3209 | TEST_EXPECT_EQUAL(sizeof(AP_tree_nlen), 112 + STATE_STACK_SIZE); |
---|
3210 | TEST_EXPECT_EQUAL(sizeof(AP_tree), 84); |
---|
3211 | TEST_EXPECT_EQUAL(sizeof(ARB_seqtree), 48); |
---|
3212 | TEST_EXPECT_EQUAL(sizeof(TreeNode), 44); |
---|
3213 | #endif |
---|
3214 | |
---|
3215 | |
---|
3216 | #if defined(ARB_64) |
---|
3217 | TEST_EXPECT_EQUAL(sizeof(AP_tree_edge), 64); |
---|
3218 | #else // !defined(ARB_64) |
---|
3219 | TEST_EXPECT_EQUAL(sizeof(AP_tree_edge), 32); |
---|
3220 | #endif |
---|
3221 | |
---|
3222 | PARSIMONY_testenv<AP_sequence_parsimony> env("TEST_trees.arb", aliname); |
---|
3223 | TEST_EXPECT_NO_ERROR(env.load_tree("tree_test")); |
---|
3224 | |
---|
3225 | const int PARSIMONY_ORG = 302; |
---|
3226 | TEST_EXPECT_ONLY_PARSVAL_COMBINES(env, PARSIMONY_ORG, 14); |
---|
3227 | |
---|
3228 | AP_tree_nlen *CloButyr = env.root_node()->findLeafNamed("CloButyr"); |
---|
3229 | AP_tree_nlen *CloButy2 = env.root_node()->findLeafNamed("CloButy2"); |
---|
3230 | TEST_EXPECT_EQUAL(CloButyr->get_brother()->name, CloButy2->name); // test they are brothers |
---|
3231 | |
---|
3232 | AP_tree_nlen *CorAquat = env.root_node()->findLeafNamed("CorAquat"); |
---|
3233 | AP_tree_nlen *CurCitre = env.root_node()->findLeafNamed("CurCitre"); |
---|
3234 | TEST_EXPECT_EQUAL(CorAquat->get_brother()->name, CurCitre->name); // test they are brothers |
---|
3235 | |
---|
3236 | CorAquat->REMOVE(); |
---|
3237 | |
---|
3238 | for (int test = 1; test<=8; ++test) { |
---|
3239 | // test == 1 -> provokes common nodes+edges in revert+accept |
---|
3240 | // test == 2 -> provokes common nodes+edges in revert+accept |
---|
3241 | // test == 3 -> provokes common nodes+edges in revert+accept |
---|
3242 | // tests 4-7 do not provoke common nodes or edges |
---|
3243 | |
---|
3244 | for (int mode = 0; mode<=3; ++mode) { |
---|
3245 | bool accept_outer = mode&2; |
---|
3246 | bool accept_inner = mode&1; |
---|
3247 | |
---|
3248 | TEST_ANNOTATE(GBS_global_string("accept_outer=%i accept_inner=%i (mode=%i, test=%i)", accept_outer, accept_inner, mode, test)); |
---|
3249 | |
---|
3250 | TEST_EXPECT_NULL(CorAquat->get_father()); |
---|
3251 | TEST_EXPECT(CloButyr->get_brother() == CloButy2); |
---|
3252 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3253 | |
---|
3254 | env.push(); |
---|
3255 | |
---|
3256 | switch (test) { |
---|
3257 | case 1: CorAquat->insert(CurCitre); break; |
---|
3258 | case 2: CorAquat->insert(CurCitre); break; |
---|
3259 | case 3: break; |
---|
3260 | case 4: CloButyr->REMOVE(); break; |
---|
3261 | case 5: CloButyr->REMOVE(); break; |
---|
3262 | case 6: break; |
---|
3263 | case 7: CloButyr->moveNextTo(CurCitre, 0.5); break; |
---|
3264 | case 8: break; |
---|
3265 | default: ap_assert(0); break; |
---|
3266 | } |
---|
3267 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3268 | |
---|
3269 | { |
---|
3270 | env.push(); |
---|
3271 | |
---|
3272 | switch (test) { |
---|
3273 | case 1: CorAquat->REMOVE(); break; |
---|
3274 | case 2: break; |
---|
3275 | case 3: CorAquat->insert(CurCitre); break; |
---|
3276 | case 4: CloButyr->insert(CloButy2); break; |
---|
3277 | case 5: break; |
---|
3278 | case 6: CloButyr->REMOVE(); break; |
---|
3279 | case 7: CloButyr->moveNextTo(CloButy2, 0.5); break; |
---|
3280 | case 8: CorAquat->insert(CurCitre); CorAquat->REMOVE(); break; |
---|
3281 | default: ap_assert(0); break; |
---|
3282 | } |
---|
3283 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3284 | |
---|
3285 | env.accept_if(accept_inner); |
---|
3286 | } |
---|
3287 | |
---|
3288 | switch (test) { |
---|
3289 | case 1: break; |
---|
3290 | case 2: CorAquat->REMOVE(); break; |
---|
3291 | case 3: if (CorAquat->father) CorAquat->REMOVE(); break; |
---|
3292 | case 4: break; |
---|
3293 | case 5: CloButyr->insert(CloButy2); break; |
---|
3294 | case 6: if (!CloButyr->father) CloButyr->insert(CloButy2); break; |
---|
3295 | case 7: CloButyr->REMOVE(); break; |
---|
3296 | case 8: break; |
---|
3297 | default: ap_assert(0); break; |
---|
3298 | } |
---|
3299 | TEST_EXPECT_VALID_TREE(env.root_node()); |
---|
3300 | |
---|
3301 | env.accept_if(accept_outer); |
---|
3302 | |
---|
3303 | // manually revert changes (outside any stack frame) |
---|
3304 | if (CorAquat->father) CorAquat->REMOVE(); |
---|
3305 | if (!CloButyr->father) CloButyr->insert(CloButy2); |
---|
3306 | } |
---|
3307 | } |
---|
3308 | |
---|
3309 | CorAquat->insert(CurCitre); |
---|
3310 | |
---|
3311 | TEST_EXPECT_PARSVAL(env, PARSIMONY_ORG); |
---|
3312 | env.combines_performed(); // accept any no of combines |
---|
3313 | } |
---|
3314 | |
---|
3315 | #endif // UNIT_TESTS |
---|
3316 | |
---|
3317 | // -------------------------------------------------------------------------------- |
---|
3318 | |
---|