1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : AP_tree_nlen.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in Summer 1995 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // =============================================================== // |
---|
11 | |
---|
12 | #include "ap_tree_nlen.hxx" |
---|
13 | #include "ap_main.hxx" |
---|
14 | |
---|
15 | #include <AP_seq_dna.hxx> |
---|
16 | #include <aw_root.hxx> |
---|
17 | |
---|
18 | using namespace std; |
---|
19 | |
---|
20 | // --------------------------------- |
---|
21 | // Section base operations: |
---|
22 | // --------------------------------- |
---|
23 | |
---|
24 | AP_UPDATE_FLAGS AP_tree_nlen::check_update() { |
---|
25 | AP_UPDATE_FLAGS res = AP_tree::check_update(); |
---|
26 | |
---|
27 | return res == AP_UPDATE_RELOADED ? AP_UPDATE_OK : res; |
---|
28 | } |
---|
29 | |
---|
30 | void AP_tree_nlen::copy(AP_tree_nlen *tree) { |
---|
31 | // like = operator |
---|
32 | // but copies sequence if is leaf |
---|
33 | |
---|
34 | this->is_leaf = tree->is_leaf; |
---|
35 | this->leftlen = tree->leftlen; |
---|
36 | this->rightlen = tree->rightlen; |
---|
37 | this->gb_node = tree->gb_node; |
---|
38 | |
---|
39 | if (tree->name != NULL) { |
---|
40 | this->name = ARB_strdup(tree->name); |
---|
41 | } |
---|
42 | else { |
---|
43 | this->name = NULL; |
---|
44 | } |
---|
45 | |
---|
46 | if (is_leaf) { |
---|
47 | ap_assert(tree->get_seq()); /* oops - AP_tree_nlen expects to have sequences at leafs! |
---|
48 | * did you forget to remove_leafs() ? */ |
---|
49 | |
---|
50 | set_seq(tree->get_seq()); |
---|
51 | // dangerous - no copy, just moves pointer |
---|
52 | // will result in undefined behavior |
---|
53 | |
---|
54 | ap_assert(0); // this will not work, but is only used in GA_genetic. |
---|
55 | // Use some kind of SmartPtr there! |
---|
56 | } |
---|
57 | } |
---|
58 | |
---|
59 | ostream& operator<<(ostream& out, const AP_tree_nlen *node) { |
---|
60 | out << ' '; |
---|
61 | |
---|
62 | if (node==NULL) { |
---|
63 | out << "NULL"; |
---|
64 | } |
---|
65 | if (node->is_leaf) { |
---|
66 | out << ((void *)node) << '(' << node->name << ')'; |
---|
67 | } |
---|
68 | else { |
---|
69 | static int notTooDeep; |
---|
70 | |
---|
71 | if (notTooDeep) { |
---|
72 | out << ((void *)node); |
---|
73 | if (!node->father) out << " (ROOT)"; |
---|
74 | } |
---|
75 | else { |
---|
76 | notTooDeep = 1; |
---|
77 | |
---|
78 | out << "NODE(" << ((void *)node); |
---|
79 | |
---|
80 | if (!node->father) { |
---|
81 | out << " (ROOT)"; |
---|
82 | } |
---|
83 | else { |
---|
84 | out << ", father=" << node->father; |
---|
85 | } |
---|
86 | |
---|
87 | out << ", leftson=" << node->leftson |
---|
88 | << ", rightson=" << node->rightson |
---|
89 | << ", edge[0]=" << node->edge[0] |
---|
90 | << ", edge[1]=" << node->edge[1] |
---|
91 | << ", edge[2]=" << node->edge[2] |
---|
92 | << ")"; |
---|
93 | |
---|
94 | notTooDeep = 0; |
---|
95 | } |
---|
96 | } |
---|
97 | |
---|
98 | return out << ' '; |
---|
99 | } |
---|
100 | |
---|
101 | int AP_tree_nlen::unusedEdgeIndex() const { |
---|
102 | for (int e=0; e<3; e++) if (edge[e]==NULL) return e; |
---|
103 | return -1; |
---|
104 | } |
---|
105 | |
---|
106 | AP_tree_edge* AP_tree_nlen::edgeTo(const AP_tree_nlen *neighbour) const { |
---|
107 | for (int e=0; e<3; e++) { |
---|
108 | if (edge[e]!=NULL && edge[e]->node[1-index[e]]==neighbour) { |
---|
109 | return edge[e]; |
---|
110 | } |
---|
111 | } |
---|
112 | return NULL; |
---|
113 | } |
---|
114 | |
---|
115 | AP_tree_edge* AP_tree_nlen::nextEdge(const AP_tree_edge *afterThatEdge) const { |
---|
116 | /*! @return one edge of 'this' |
---|
117 | * |
---|
118 | * @param afterThatEdge |
---|
119 | * - if == NULL -> returns the "first" edge (edge[0]) |
---|
120 | * - otherwise -> returns the next edge following 'afterThatEdge' in the array edge[] |
---|
121 | */ |
---|
122 | return edge[afterThatEdge ? ((indexOf(afterThatEdge)+1) % 3) : 0]; |
---|
123 | } |
---|
124 | |
---|
125 | void AP_tree_nlen::unlinkAllEdges(AP_tree_edge **edgePtr1, AP_tree_edge **edgePtr2, AP_tree_edge **edgePtr3) |
---|
126 | { |
---|
127 | ap_assert(edge[0]!=NULL); |
---|
128 | ap_assert(edge[1]!=NULL); |
---|
129 | ap_assert(edge[2]!=NULL); |
---|
130 | |
---|
131 | *edgePtr1 = edge[0]->unlink(); |
---|
132 | *edgePtr2 = edge[1]->unlink(); |
---|
133 | *edgePtr3 = edge[2]->unlink(); |
---|
134 | } |
---|
135 | |
---|
136 | void AP_tree_nlen::linkAllEdges(AP_tree_edge *edge1, AP_tree_edge *edge2, AP_tree_edge *edge3) |
---|
137 | { |
---|
138 | ap_assert(edge[0]==NULL); |
---|
139 | ap_assert(edge[1]==NULL); |
---|
140 | ap_assert(edge[2]==NULL); |
---|
141 | |
---|
142 | edge1->relink(this, get_father()->get_father() ? get_father() : get_brother()); |
---|
143 | edge2->relink(this, get_leftson()); |
---|
144 | edge3->relink(this, get_rightson()); |
---|
145 | } |
---|
146 | |
---|
147 | // ----------------------------- |
---|
148 | // Check tree structure |
---|
149 | |
---|
150 | #if defined(PROVIDE_TREE_STRUCTURE_TESTS) |
---|
151 | |
---|
152 | #if defined(DEBUG) |
---|
153 | #define DUMP_INVALID_SUBTREES |
---|
154 | #endif |
---|
155 | |
---|
156 | #if defined(DEVEL_RALF) |
---|
157 | #define CHECK_CORRECT_INVALIDATION // recombines all up-to-date nodes to find missing invalidations (VERY slow) |
---|
158 | #endif |
---|
159 | |
---|
160 | |
---|
161 | #if defined(DUMP_INVALID_SUBTREES) |
---|
162 | inline void dumpSubtree(const char *msg, const AP_tree_nlen *node) { |
---|
163 | fprintf(stderr, "%s:\n", msg); |
---|
164 | char *printable = GBT_tree_2_newick(node, NewickFormat(nSIMPLE|nWRAP), true); |
---|
165 | fputs(printable, stderr); |
---|
166 | fputc('\n', stderr); |
---|
167 | free(printable); |
---|
168 | } |
---|
169 | #endif |
---|
170 | |
---|
171 | inline const AP_tree_edge *edge_between(const AP_tree_nlen *node1, const AP_tree_nlen *node2) { |
---|
172 | AP_tree_edge *edge_12 = node1->edgeTo(node2); |
---|
173 | |
---|
174 | #if defined(ASSERTION_USED) |
---|
175 | AP_tree_edge *edge_21 = node2->edgeTo(node1); |
---|
176 | ap_assert(edge_12 == edge_21); // nodes should agree about their edge |
---|
177 | #endif |
---|
178 | |
---|
179 | return edge_12; |
---|
180 | } |
---|
181 | |
---|
182 | inline const char *no_valid_edge_between(const AP_tree_nlen *node1, const AP_tree_nlen *node2) { |
---|
183 | AP_tree_edge *edge_12 = node1->edgeTo(node2); |
---|
184 | AP_tree_edge *edge_21 = node2->edgeTo(node1); |
---|
185 | |
---|
186 | if (edge_12 == edge_21) { |
---|
187 | return edge_12 ? NULL : "edge missing"; |
---|
188 | } |
---|
189 | return "edge inconsistent"; |
---|
190 | } |
---|
191 | |
---|
192 | #if defined(DUMP_INVALID_SUBTREES) |
---|
193 | #define PRINT_BAD_EDGE(msg,node) dumpSubtree(msg,node) |
---|
194 | #else // !defined(DUMP_INVALID_SUBTREES) |
---|
195 | #define PRINT_BAD_EDGE(msg,node) fprintf(stderr, "Warning: %s (at node=%p)\n", (msg), (node)) |
---|
196 | #endif |
---|
197 | |
---|
198 | #define SHOW_BAD_EDGE(format,str,node) do{ \ |
---|
199 | char *msg = GBS_global_string_copy(format,str); \ |
---|
200 | PRINT_BAD_EDGE(msg, node); \ |
---|
201 | free(msg); \ |
---|
202 | }while(0) |
---|
203 | |
---|
204 | Validity AP_tree_nlen::has_valid_edges() const { |
---|
205 | Validity valid; |
---|
206 | if (get_father()) { // root has no edges |
---|
207 | if (get_father()->is_root_node()) { // sons of root have one edge between them |
---|
208 | if (is_leftson()) { // test root-edge only from one son |
---|
209 | const char *invalid = no_valid_edge_between(this, get_brother()); |
---|
210 | if (invalid) { |
---|
211 | SHOW_BAD_EDGE("root-%s. root", invalid, get_father()); |
---|
212 | valid = Validity(false, "no valid edge between sons of root"); |
---|
213 | } |
---|
214 | } |
---|
215 | const char *invalid = no_valid_edge_between(this, get_father()); |
---|
216 | if (!invalid || !strstr(invalid, "missing")) { |
---|
217 | SHOW_BAD_EDGE("unexpected edge (%s) between root and son", invalid ? invalid : "valid", this); |
---|
218 | valid = Validity(false, "unexpected edge between son-of-root and root"); |
---|
219 | } |
---|
220 | } |
---|
221 | else { |
---|
222 | const char *invalid = no_valid_edge_between(this, get_father()); |
---|
223 | if (invalid) { |
---|
224 | SHOW_BAD_EDGE("son-%s. father", invalid, get_father()); |
---|
225 | SHOW_BAD_EDGE("parent-%s. son", invalid, this); |
---|
226 | valid = Validity(false, "invalid edge between son and father"); |
---|
227 | } |
---|
228 | } |
---|
229 | } |
---|
230 | |
---|
231 | if (!is_leaf) { |
---|
232 | if (valid) valid = get_leftson()->has_valid_edges(); |
---|
233 | if (valid) valid = get_rightson()->has_valid_edges(); |
---|
234 | } |
---|
235 | return valid; |
---|
236 | } |
---|
237 | |
---|
238 | Validity AP_tree_nlen::sequence_state_valid() const { |
---|
239 | // if some node has a sequence, all son-nodes have to have sequences! |
---|
240 | |
---|
241 | Validity valid; |
---|
242 | |
---|
243 | const AP_sequence *sequence = get_seq(); |
---|
244 | if (sequence) { |
---|
245 | if (sequence->hasSequence()) { |
---|
246 | if (!is_leaf) { |
---|
247 | bool leftson_hasSequence = get_leftson()->hasSequence(); |
---|
248 | bool rightson_hasSequence = get_rightson()->hasSequence(); |
---|
249 | |
---|
250 | #if defined(DUMP_INVALID_SUBTREES) |
---|
251 | if (!leftson_hasSequence) dumpSubtree("left subtree has no sequence", get_leftson()); |
---|
252 | if (!rightson_hasSequence) dumpSubtree("right subtree has no sequence", get_rightson()); |
---|
253 | if (!(leftson_hasSequence && rightson_hasSequence)) { |
---|
254 | dumpSubtree("while father HAS sequence", this); |
---|
255 | } |
---|
256 | #endif |
---|
257 | |
---|
258 | valid = Validity(leftson_hasSequence && rightson_hasSequence, "node has sequence and son w/o sequence"); |
---|
259 | |
---|
260 | #if defined(CHECK_CORRECT_INVALIDATION) |
---|
261 | if (valid) { |
---|
262 | // check for missing invalidations |
---|
263 | // (if recalculating a node (via combine) does not reproduce the current sequence, it should have been invalidated) |
---|
264 | |
---|
265 | AP_sequence *recombined = sequence->dup(); |
---|
266 | AP_FLOAT mutations_from_combine = recombined->noncounting_combine(get_leftson()->get_seq(), get_rightson()->get_seq()); |
---|
267 | |
---|
268 | valid = Validity(recombined->equals(sequence), "recombining changed existing sequence (missing invalidation?)"); |
---|
269 | if (valid) { |
---|
270 | AP_FLOAT expected_mutrate = mutations_from_combine + get_leftson()->mutation_rate + get_rightson()->mutation_rate; |
---|
271 | valid = Validity(expected_mutrate == mutation_rate, "invalid mutation_rate"); |
---|
272 | } |
---|
273 | |
---|
274 | delete recombined; |
---|
275 | |
---|
276 | #if defined(DUMP_INVALID_SUBTREES) |
---|
277 | if (!valid) { |
---|
278 | dumpSubtree(valid.why_not(), this); |
---|
279 | } |
---|
280 | #endif |
---|
281 | } |
---|
282 | #endif |
---|
283 | } |
---|
284 | } |
---|
285 | #if defined(ASSERTION_USED) |
---|
286 | else { |
---|
287 | if (is_leaf) ap_assert(sequence->is_bound_to_species()); // can do lazy load if needed |
---|
288 | } |
---|
289 | #endif |
---|
290 | } |
---|
291 | |
---|
292 | if (!is_leaf) { |
---|
293 | if (valid) valid = get_leftson()->sequence_state_valid(); |
---|
294 | if (valid) valid = get_rightson()->sequence_state_valid(); |
---|
295 | } |
---|
296 | |
---|
297 | return valid; |
---|
298 | } |
---|
299 | |
---|
300 | Validity AP_tree_nlen::is_valid() const { |
---|
301 | ap_assert(knownNonNull(this)); |
---|
302 | |
---|
303 | Validity valid = AP_tree::is_valid(); |
---|
304 | if (valid) valid = has_valid_edges(); |
---|
305 | if (valid) valid = sequence_state_valid(); |
---|
306 | |
---|
307 | return valid; |
---|
308 | } |
---|
309 | |
---|
310 | #endif // PROVIDE_TREE_STRUCTURE_TESTS |
---|
311 | |
---|
312 | // ------------------------- |
---|
313 | // Tree operations: |
---|
314 | // |
---|
315 | // insert |
---|
316 | // remove |
---|
317 | // swap |
---|
318 | // set_root |
---|
319 | // move |
---|
320 | // costs |
---|
321 | |
---|
322 | |
---|
323 | inline void push_all_upnode_sequences(AP_tree_nlen *nodeBelow) { |
---|
324 | for (AP_tree_nlen *upnode = nodeBelow->get_father(); |
---|
325 | upnode; |
---|
326 | upnode = upnode->get_father()) |
---|
327 | { |
---|
328 | ap_main->push_node(upnode, SEQUENCE); |
---|
329 | } |
---|
330 | } |
---|
331 | |
---|
332 | inline void sortOldestFirst(AP_tree_edge **e1, AP_tree_edge **e2) { |
---|
333 | if ((*e1)->Age() > (*e2)->Age()) { |
---|
334 | swap(*e1, *e2); |
---|
335 | } |
---|
336 | } |
---|
337 | |
---|
338 | inline void sortOldestFirst(AP_tree_edge **e1, AP_tree_edge **e2, AP_tree_edge **e3) { |
---|
339 | sortOldestFirst(e1, e2); |
---|
340 | sortOldestFirst(e2, e3); |
---|
341 | sortOldestFirst(e1, e2); |
---|
342 | } |
---|
343 | |
---|
344 | void AP_tree_nlen::initial_insert(AP_tree_nlen *newBrother, AP_pars_root *troot) { |
---|
345 | // construct initial tree from 'this' and 'newBrother' |
---|
346 | // (both have to be leafs) |
---|
347 | |
---|
348 | ap_assert(newBrother); |
---|
349 | ap_assert(is_leaf); |
---|
350 | ap_assert(newBrother->is_leaf); |
---|
351 | |
---|
352 | AP_tree::initial_insert(newBrother, troot); |
---|
353 | makeEdge(newBrother, this); // build the root edge |
---|
354 | |
---|
355 | ASSERT_VALID_TREE(this->get_father()); |
---|
356 | } |
---|
357 | |
---|
358 | void AP_tree_nlen::insert(AP_tree_nlen *newBrother) { |
---|
359 | // inserts 'this' (a new node) at the father-edge of 'newBrother' |
---|
360 | ap_assert(newBrother); |
---|
361 | |
---|
362 | ASSERT_VALID_TREE(this); |
---|
363 | ASSERT_VALID_TREE(newBrother); |
---|
364 | |
---|
365 | ap_main->push_node(this, STRUCTURE); |
---|
366 | ap_main->push_node(newBrother, STRUCTURE); |
---|
367 | |
---|
368 | AP_tree_nlen *brothersFather = newBrother->get_father(); |
---|
369 | if (brothersFather) { |
---|
370 | ap_main->push_node(brothersFather, BOTH); |
---|
371 | push_all_upnode_sequences(brothersFather); |
---|
372 | |
---|
373 | if (brothersFather->get_father()) { |
---|
374 | AP_tree_edge *oldEdge = newBrother->edgeTo(newBrother->get_father())->unlink(); |
---|
375 | AP_tree::insert(newBrother); |
---|
376 | oldEdge->relink(get_father(), get_father()->get_father()); |
---|
377 | } |
---|
378 | else { // insert to son of root |
---|
379 | AP_tree_nlen *brothersOldBrother = newBrother->get_brother(); |
---|
380 | ap_main->push_node(brothersOldBrother, STRUCTURE); |
---|
381 | |
---|
382 | AP_tree_edge *oldEdge = newBrother->edgeTo(brothersOldBrother)->unlink(); |
---|
383 | AP_tree::insert(newBrother); |
---|
384 | oldEdge->relink(get_father(), get_father()->get_brother()); |
---|
385 | } |
---|
386 | |
---|
387 | makeEdge(this, get_father()); |
---|
388 | makeEdge(get_father(), newBrother); |
---|
389 | |
---|
390 | ASSERT_VALID_TREE(get_father()->get_father()); |
---|
391 | } |
---|
392 | else { // insert at root |
---|
393 | ap_assert(!newBrother->is_leaf); // either swap 'this' and 'newBrother' or use initial_insert() to construct the initial tree |
---|
394 | |
---|
395 | AP_tree_nlen *lson = newBrother->get_leftson(); |
---|
396 | AP_tree_nlen *rson = newBrother->get_rightson(); |
---|
397 | |
---|
398 | ap_main->push_node(lson, STRUCTURE); |
---|
399 | ap_main->push_node(rson, STRUCTURE); |
---|
400 | |
---|
401 | AP_tree_edge *oldEdge = lson->edgeTo(rson)->unlink(); |
---|
402 | |
---|
403 | AP_tree::insert(newBrother); |
---|
404 | |
---|
405 | oldEdge->relink(this, newBrother); |
---|
406 | makeEdge(newBrother, rson); |
---|
407 | makeEdge(newBrother, lson); |
---|
408 | |
---|
409 | ASSERT_VALID_TREE(get_father()); |
---|
410 | } |
---|
411 | } |
---|
412 | |
---|
413 | AP_tree_nlen *AP_tree_nlen::REMOVE() { |
---|
414 | // Removes 'this' and its father from the tree: |
---|
415 | // |
---|
416 | // grandpa grandpa |
---|
417 | // / / |
---|
418 | // / / |
---|
419 | // father => brother |
---|
420 | // / \ . |
---|
421 | // / \ . |
---|
422 | // this brother |
---|
423 | // |
---|
424 | // One of the edges is relinked between brother and grandpa. |
---|
425 | // 'father' is destroyed, 'this' is returned. |
---|
426 | |
---|
427 | AP_tree_nlen *oldBrother = get_brother(); |
---|
428 | |
---|
429 | ASSERT_VALID_TREE(this); |
---|
430 | |
---|
431 | ap_assert(father); // can't remove complete tree, |
---|
432 | |
---|
433 | ap_main->push_node(this, STRUCTURE); |
---|
434 | ap_main->push_node(oldBrother, STRUCTURE); |
---|
435 | push_all_upnode_sequences(get_father()); |
---|
436 | |
---|
437 | AP_tree_edge *oldEdge; |
---|
438 | AP_tree_nlen *grandPa = get_father()->get_father(); |
---|
439 | if (grandPa) { |
---|
440 | ASSERT_VALID_TREE(grandPa); |
---|
441 | |
---|
442 | ap_main->push_node(get_father(), BOTH); |
---|
443 | ap_main->push_node(grandPa, STRUCTURE); |
---|
444 | |
---|
445 | destroyEdge(edgeTo(get_father())->unlink()); |
---|
446 | destroyEdge(get_father()->edgeTo(oldBrother)->unlink()); |
---|
447 | |
---|
448 | if (grandPa->father) { |
---|
449 | oldEdge = get_father()->edgeTo(grandPa)->unlink(); |
---|
450 | AP_tree::REMOVE(); |
---|
451 | oldEdge->relink(oldBrother, grandPa); |
---|
452 | } |
---|
453 | else { // remove grandson of root |
---|
454 | AP_tree_nlen *uncle = get_father()->get_brother(); |
---|
455 | ap_main->push_node(uncle, STRUCTURE); |
---|
456 | |
---|
457 | oldEdge = get_father()->edgeTo(uncle)->unlink(); |
---|
458 | AP_tree::REMOVE(); |
---|
459 | oldEdge->relink(oldBrother, uncle); |
---|
460 | } |
---|
461 | ASSERT_VALID_TREE(grandPa); |
---|
462 | } |
---|
463 | else { // remove son of root |
---|
464 | AP_tree_nlen *oldRoot = get_father(); |
---|
465 | ASSERT_VALID_TREE(oldRoot); |
---|
466 | |
---|
467 | if (oldBrother->is_leaf) { |
---|
468 | // root |
---|
469 | // oo |
---|
470 | // o o |
---|
471 | // o o |
---|
472 | // oldBrother --- this -----> NULL |
---|
473 | // |
---|
474 | ap_main->push_node(oldRoot, ROOT); |
---|
475 | |
---|
476 | destroyEdge(edgeTo(oldBrother)->unlink()); |
---|
477 | |
---|
478 | #if defined(ASSERTION_USED) |
---|
479 | AP_pars_root *troot = get_tree_root(); |
---|
480 | #endif // ASSERTION_USED |
---|
481 | AP_tree::REMOVE(); |
---|
482 | ap_assert(!troot->get_root_node()); // tree should have been removed |
---|
483 | } |
---|
484 | else { |
---|
485 | // |
---|
486 | // root |
---|
487 | // oo . |
---|
488 | // o o root (=oldBrother) |
---|
489 | // o o oo . |
---|
490 | // oldBrother --- this -----> o o . |
---|
491 | // /\ o o . |
---|
492 | // / \ lson ----- rson |
---|
493 | // / \ . |
---|
494 | // lson rson |
---|
495 | // |
---|
496 | AP_tree_nlen *lson = oldBrother->get_leftson(); |
---|
497 | AP_tree_nlen *rson = oldBrother->get_rightson(); |
---|
498 | |
---|
499 | ap_assert(lson && rson); |
---|
500 | |
---|
501 | ap_main->push_node(lson, STRUCTURE); |
---|
502 | ap_main->push_node(rson, STRUCTURE); |
---|
503 | ap_main->push_node(oldRoot, ROOT); |
---|
504 | |
---|
505 | destroyEdge(edgeTo(oldBrother)->unlink()); |
---|
506 | destroyEdge(oldBrother->edgeTo(lson)->unlink()); |
---|
507 | |
---|
508 | oldEdge = oldBrother->edgeTo(rson)->unlink(); |
---|
509 | AP_tree::REMOVE(); |
---|
510 | oldEdge->relink(lson, rson); |
---|
511 | |
---|
512 | ap_assert(lson->get_tree_root()->get_root_node() == oldBrother); |
---|
513 | ASSERT_VALID_TREE(oldBrother); |
---|
514 | } |
---|
515 | } |
---|
516 | |
---|
517 | father = NULL; |
---|
518 | set_tree_root(NULL); |
---|
519 | |
---|
520 | ASSERT_VALID_TREE(this); |
---|
521 | return this; |
---|
522 | } |
---|
523 | |
---|
524 | void AP_tree_nlen::swap_sons() { |
---|
525 | ap_assert(!is_leaf); // cannot swap leafs |
---|
526 | |
---|
527 | ap_main->push_node(this, STRUCTURE); |
---|
528 | AP_tree::swap_sons(); |
---|
529 | } |
---|
530 | |
---|
531 | void AP_tree_nlen::swap_assymetric(AP_TREE_SIDE mode) { |
---|
532 | // mode AP_LEFT exchanges leftson with brother |
---|
533 | // mode AP_RIGHT exchanges rightson with brother |
---|
534 | |
---|
535 | ap_assert(!is_leaf); // cannot swap leafs |
---|
536 | ap_assert(father); // cannot swap root (has no brother) |
---|
537 | ap_assert(mode == AP_LEFT || mode == AP_RIGHT); // illegal mode |
---|
538 | |
---|
539 | AP_tree_nlen *oldBrother = get_brother(); |
---|
540 | AP_tree_nlen *movedSon = mode == AP_LEFT ? get_leftson() : get_rightson(); |
---|
541 | |
---|
542 | if (!father->father) { |
---|
543 | // son of root case |
---|
544 | // take leftson of brother to exchange with |
---|
545 | |
---|
546 | if (!oldBrother->is_leaf) { // swap needed ? |
---|
547 | AP_tree_nlen *nephew = oldBrother->get_leftson(); |
---|
548 | |
---|
549 | ap_main->push_node(this, BOTH); |
---|
550 | ap_main->push_node(movedSon, STRUCTURE); |
---|
551 | ap_main->push_node(get_father(), SEQUENCE); |
---|
552 | ap_main->push_node(nephew, STRUCTURE); |
---|
553 | ap_main->push_node(oldBrother, BOTH); |
---|
554 | |
---|
555 | AP_tree_edge *edge1 = edgeTo(movedSon)->unlink(); |
---|
556 | AP_tree_edge *edge2 = oldBrother->edgeTo(nephew)->unlink(); |
---|
557 | |
---|
558 | if (mode == AP_LEFT) { |
---|
559 | swap(leftson->father, nephew->father); |
---|
560 | swap(leftson, oldBrother->leftson); |
---|
561 | } |
---|
562 | else { |
---|
563 | swap(rightson->father, nephew->father); |
---|
564 | swap(rightson, oldBrother->leftson); |
---|
565 | } |
---|
566 | |
---|
567 | edge2->relink(this, nephew); |
---|
568 | edge1->relink(oldBrother, movedSon); |
---|
569 | |
---|
570 | if (nephew->gr.mark_sum != movedSon->gr.mark_sum) { |
---|
571 | get_brother()->recalc_marked_from_sons(); |
---|
572 | this->recalc_marked_from_sons_and_forward_upwards(); |
---|
573 | } |
---|
574 | } |
---|
575 | } |
---|
576 | else { |
---|
577 | ap_main->push_node(this, BOTH); |
---|
578 | ap_main->push_node(get_father(), BOTH); |
---|
579 | ap_main->push_node(oldBrother, STRUCTURE); |
---|
580 | ap_main->push_node(movedSon, STRUCTURE); |
---|
581 | |
---|
582 | push_all_upnode_sequences(get_father()); |
---|
583 | |
---|
584 | AP_tree_edge *edge1 = edgeTo(movedSon)->unlink(); |
---|
585 | AP_tree_edge *edge2 = get_father()->edgeTo(oldBrother)->unlink(); |
---|
586 | |
---|
587 | if (mode == AP_LEFT) { // swap leftson with brother |
---|
588 | swap(leftson->father, oldBrother->father); |
---|
589 | if (father->leftson == this) { |
---|
590 | swap(leftson, father->rightson); |
---|
591 | } |
---|
592 | else { |
---|
593 | swap(leftson, father->leftson); |
---|
594 | } |
---|
595 | } |
---|
596 | else { // swap rightson with brother |
---|
597 | swap(rightson->father, oldBrother->father); |
---|
598 | if (father->leftson == this) { |
---|
599 | swap(rightson, father->rightson); |
---|
600 | } |
---|
601 | else { |
---|
602 | swap(rightson, father->leftson); |
---|
603 | } |
---|
604 | } |
---|
605 | |
---|
606 | edge2->relink(this, oldBrother); |
---|
607 | edge1->relink(get_father(), movedSon); |
---|
608 | |
---|
609 | if (oldBrother->gr.mark_sum != movedSon->gr.mark_sum) { |
---|
610 | recalc_marked_from_sons_and_forward_upwards(); // father is done implicit |
---|
611 | } |
---|
612 | } |
---|
613 | } |
---|
614 | |
---|
615 | void AP_tree_nlen::set_root() { |
---|
616 | if (at_root()) return; // already root |
---|
617 | |
---|
618 | // from this to root buffer the nodes |
---|
619 | ap_main->push_node(this, STRUCTURE); |
---|
620 | |
---|
621 | AP_tree_nlen *son_of_root = 0; // in previous topology 'this' was contained inside 'son_of_root' |
---|
622 | AP_tree_nlen *old_root = 0; |
---|
623 | { |
---|
624 | AP_tree_nlen *pntr; |
---|
625 | for (pntr = get_father(); pntr->father; pntr = pntr->get_father()) { |
---|
626 | ap_main->push_node(pntr, BOTH); |
---|
627 | son_of_root = pntr; |
---|
628 | } |
---|
629 | old_root = pntr; |
---|
630 | } |
---|
631 | |
---|
632 | ap_assert(son_of_root); // always true |
---|
633 | |
---|
634 | { |
---|
635 | AP_tree_nlen *other_son_of_root = son_of_root->get_brother(); |
---|
636 | ap_main->push_node(other_son_of_root, STRUCTURE); |
---|
637 | } |
---|
638 | |
---|
639 | ap_main->push_node(old_root, ROOT); |
---|
640 | AP_tree::set_root(); |
---|
641 | |
---|
642 | for (AP_tree_nlen *node = son_of_root; node ; node = node->get_father()) { |
---|
643 | node->recalc_marked_from_sons(); |
---|
644 | } |
---|
645 | } |
---|
646 | |
---|
647 | void AP_tree_nlen::moveNextTo(AP_tree_nlen *newBrother, AP_FLOAT rel_pos) { |
---|
648 | // Note: see http://bugs.arb-home.de/ticket/627#comment:8 for an experimental |
---|
649 | // replacement of moveNextTo with REMOVE() + insert() |
---|
650 | |
---|
651 | ap_assert(father); |
---|
652 | ap_assert(newBrother); |
---|
653 | ap_assert(newBrother->father); |
---|
654 | ap_assert(newBrother->father != father); // already there |
---|
655 | ap_assert(newBrother != father); // already there |
---|
656 | |
---|
657 | ASSERT_VALID_TREE(rootNode()); |
---|
658 | |
---|
659 | // push everything that will be modified onto stack |
---|
660 | ap_main->push_node(this, STRUCTURE); |
---|
661 | ap_main->push_node(get_brother(), STRUCTURE); |
---|
662 | |
---|
663 | if (father->father) { |
---|
664 | AP_tree_nlen *grandpa = get_father()->get_father(); |
---|
665 | |
---|
666 | ap_main->push_node(get_father(), BOTH); |
---|
667 | |
---|
668 | if (grandpa->father) { |
---|
669 | ap_main->push_node(grandpa, BOTH); |
---|
670 | push_all_upnode_sequences(grandpa); |
---|
671 | } |
---|
672 | else { // 'this' is grandson of root |
---|
673 | ap_main->push_node(grandpa, ROOT); |
---|
674 | ap_main->push_node(get_father()->get_brother(), STRUCTURE); |
---|
675 | } |
---|
676 | } |
---|
677 | else { // 'this' is son of root |
---|
678 | ap_main->push_node(get_father(), ROOT); |
---|
679 | |
---|
680 | if (!get_brother()->is_leaf) { |
---|
681 | ap_main->push_node(get_brother()->get_leftson(), STRUCTURE); |
---|
682 | ap_main->push_node(get_brother()->get_rightson(), STRUCTURE); |
---|
683 | } |
---|
684 | } |
---|
685 | |
---|
686 | ap_main->push_node(newBrother, STRUCTURE); |
---|
687 | if (newBrother->father) { |
---|
688 | AP_tree_nlen *newBrothersFather = newBrother->get_father(); |
---|
689 | ap_main->push_node(newBrothersFather, BOTH); |
---|
690 | if (!newBrothersFather->father) { // move to son of root |
---|
691 | ap_main->push_node(newBrother->get_brother(), STRUCTURE); |
---|
692 | } |
---|
693 | push_all_upnode_sequences(newBrothersFather); |
---|
694 | } |
---|
695 | |
---|
696 | AP_tree_nlen *thisFather = get_father(); |
---|
697 | AP_tree_nlen *grandFather = thisFather->get_father(); |
---|
698 | AP_tree_nlen *oldBrother = get_brother(); |
---|
699 | AP_tree_nlen *newBrothersFather = newBrother->get_father(); |
---|
700 | AP_tree_edge *e1, *e2, *e3; |
---|
701 | |
---|
702 | if (thisFather==newBrothersFather->get_father()) { // son -> son of brother |
---|
703 | if (grandFather) { |
---|
704 | if (grandFather->get_father()) { |
---|
705 | // covered by test at PARS_main.cxx@COVER3 |
---|
706 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
707 | AP_tree_edge *e4 = newBrother->edgeTo(oldBrother)->unlink(); |
---|
708 | |
---|
709 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
710 | |
---|
711 | sortOldestFirst(&e1, &e2, &e3); |
---|
712 | e1->relink(oldBrother, grandFather); // use oldest edge at remove position |
---|
713 | thisFather->linkAllEdges(e2, e3, e4); |
---|
714 | } |
---|
715 | else { // grandson of root -> son of brother |
---|
716 | // covered by test at PARS_main.cxx@COVER2 |
---|
717 | AP_tree_nlen *uncle = thisFather->get_brother(); |
---|
718 | |
---|
719 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
720 | AP_tree_edge *e4 = newBrother->edgeTo(oldBrother)->unlink(); |
---|
721 | |
---|
722 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
723 | |
---|
724 | sortOldestFirst(&e1, &e2, &e3); |
---|
725 | e1->relink(oldBrother, uncle); |
---|
726 | thisFather->linkAllEdges(e2, e3, e4); |
---|
727 | } |
---|
728 | } |
---|
729 | else { // son of root -> grandson of root |
---|
730 | // covered by test at PARS_main.cxx@COVER1 |
---|
731 | oldBrother->unlinkAllEdges(&e1, &e2, &e3); |
---|
732 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
733 | thisFather->linkAllEdges(e1, e2, e3); |
---|
734 | } |
---|
735 | } |
---|
736 | else if (grandFather==newBrothersFather) { // son -> brother of father |
---|
737 | if (grandFather->father) { |
---|
738 | // covered by test at PARS_main.cxx@COVER4 |
---|
739 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
740 | AP_tree_edge *e4 = grandFather->edgeTo(newBrother)->unlink(); |
---|
741 | |
---|
742 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
743 | |
---|
744 | sortOldestFirst(&e1, &e2, &e3); |
---|
745 | e1->relink(oldBrother, grandFather); |
---|
746 | thisFather->linkAllEdges(e2, e3, e4); |
---|
747 | } |
---|
748 | else { // no edges change if we move grandson of root -> son of root |
---|
749 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
750 | } |
---|
751 | } |
---|
752 | else { |
---|
753 | // now we are sure, the minimal distance |
---|
754 | // between 'this' and 'newBrother' is 4 edges |
---|
755 | // or if the root-edge is between them, the |
---|
756 | // minimal distance is 3 edges |
---|
757 | |
---|
758 | if (!grandFather) { // son of root |
---|
759 | oldBrother->unlinkAllEdges(&e1, &e2, &e3); |
---|
760 | AP_tree_edge *e4 = newBrother->edgeTo(newBrothersFather)->unlink(); |
---|
761 | |
---|
762 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
763 | |
---|
764 | sortOldestFirst(&e1, &e2, &e3); |
---|
765 | e1->relink(oldBrother->get_leftson(), oldBrother->get_rightson()); // new root-edge |
---|
766 | thisFather->linkAllEdges(e2, e3, e4); // old root |
---|
767 | } |
---|
768 | else if (!grandFather->get_father()) { // grandson of root |
---|
769 | if (newBrothersFather->get_father()->get_father()==NULL) { // grandson of root -> grandson of root |
---|
770 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
771 | AP_tree_edge *e4 = newBrother->edgeTo(newBrothersFather)->unlink(); |
---|
772 | |
---|
773 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
774 | |
---|
775 | sortOldestFirst(&e1, &e2, &e3); |
---|
776 | e1->relink(oldBrother, newBrothersFather); // new root-edge |
---|
777 | thisFather->linkAllEdges(e2, e3, e4); |
---|
778 | } |
---|
779 | else { |
---|
780 | AP_tree_nlen *uncle = thisFather->get_brother(); |
---|
781 | |
---|
782 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
783 | AP_tree_edge *e4 = newBrother->edgeTo(newBrothersFather)->unlink(); |
---|
784 | |
---|
785 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
786 | |
---|
787 | sortOldestFirst(&e1, &e2, &e3); |
---|
788 | e1->relink(oldBrother, uncle); |
---|
789 | thisFather->linkAllEdges(e2, e3, e4); |
---|
790 | } |
---|
791 | } |
---|
792 | else { |
---|
793 | if (newBrothersFather->get_father()==NULL) { // move to son of root |
---|
794 | AP_tree_nlen *newBrothersBrother = newBrother->get_brother(); |
---|
795 | |
---|
796 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
797 | AP_tree_edge *e4 = newBrother->edgeTo(newBrothersBrother)->unlink(); |
---|
798 | |
---|
799 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
800 | |
---|
801 | sortOldestFirst(&e1, &e2, &e3); |
---|
802 | e1->relink(oldBrother, grandFather); |
---|
803 | thisFather->linkAllEdges(e2, e3, e4); |
---|
804 | } |
---|
805 | else { // simple independent move |
---|
806 | thisFather->unlinkAllEdges(&e1, &e2, &e3); |
---|
807 | AP_tree_edge *e4 = newBrother->edgeTo(newBrothersFather)->unlink(); |
---|
808 | |
---|
809 | AP_tree::moveNextTo(newBrother, rel_pos); |
---|
810 | |
---|
811 | sortOldestFirst(&e1, &e2, &e3); |
---|
812 | e1->relink(oldBrother, grandFather); |
---|
813 | thisFather->linkAllEdges(e2, e3, e4); |
---|
814 | } |
---|
815 | } |
---|
816 | } |
---|
817 | |
---|
818 | ASSERT_VALID_TREE(this); |
---|
819 | ASSERT_VALID_TREE(rootNode()); |
---|
820 | |
---|
821 | ap_assert(is_leftson()); |
---|
822 | ap_assert(get_brother() == newBrother); |
---|
823 | } |
---|
824 | |
---|
825 | void AP_tree_nlen::unhash_sequence() { |
---|
826 | /*! removes the parsimony sequence from an inner node |
---|
827 | * (has no effect for leafs) |
---|
828 | */ |
---|
829 | |
---|
830 | AP_sequence *sequence = get_seq(); |
---|
831 | if (sequence && !is_leaf) sequence->forget_sequence(); |
---|
832 | } |
---|
833 | |
---|
834 | bool AP_tree_nlen::acceptCurrentState(Level frame_level) { |
---|
835 | // returns |
---|
836 | // - true if the top state has been removed |
---|
837 | // - false if the top state was kept/extended for possible revert at lower frame_level |
---|
838 | |
---|
839 | if (remembered_for_frame != frame_level) { |
---|
840 | ap_assert(0); // internal control number check failed |
---|
841 | return false; |
---|
842 | } |
---|
843 | |
---|
844 | NodeState *state = states.pop(); |
---|
845 | bool removed = true; |
---|
846 | |
---|
847 | Level next_frame_level = frame_level-1; |
---|
848 | Level stored_frame_level = state->frameNr; |
---|
849 | |
---|
850 | if (!next_frame_level) { // accept() called at top-level |
---|
851 | delete state; |
---|
852 | } |
---|
853 | else if (stored_frame_level == next_frame_level) { |
---|
854 | // node already is buffered for next_frame_level |
---|
855 | |
---|
856 | // if the currently accepted state->mode is not completely covered by previous state->mode |
---|
857 | // => a future revert() would only restore partially |
---|
858 | // To avoid that, move missing state information to previous NodeState |
---|
859 | { |
---|
860 | NodeState *prev_state = states.top(); |
---|
861 | AP_STACK_MODE prev_mode = prev_state->mode; |
---|
862 | AP_STACK_MODE common = AP_STACK_MODE(prev_mode & state->mode); |
---|
863 | |
---|
864 | if (common != state->mode) { |
---|
865 | AP_STACK_MODE missing = AP_STACK_MODE(state->mode & ~common); // previous is missing this state information |
---|
866 | |
---|
867 | ap_assert((prev_mode&missing) == NOTHING); |
---|
868 | state->move_info_to(*prev_state, missing); |
---|
869 | } |
---|
870 | } |
---|
871 | |
---|
872 | delete state; |
---|
873 | } |
---|
874 | else { |
---|
875 | // keep state for future revert |
---|
876 | states.push(state); |
---|
877 | removed = false; |
---|
878 | } |
---|
879 | remembered_for_frame = next_frame_level; |
---|
880 | |
---|
881 | return removed; |
---|
882 | } |
---|
883 | |
---|
884 | |
---|
885 | bool AP_tree_nlen::rememberState(AP_STACK_MODE mode, Level frame_level) { |
---|
886 | // according to mode |
---|
887 | // tree_structure or sequence is buffered in the node |
---|
888 | |
---|
889 | NodeState *store; |
---|
890 | bool ret; |
---|
891 | |
---|
892 | if (is_leaf && !(STRUCTURE & mode)) return false; // tips push only structure |
---|
893 | |
---|
894 | if (remembered_for_frame == frame_level) { // node already has a push (at current frame_level) |
---|
895 | NodeState *is_stored = states.top(); |
---|
896 | |
---|
897 | if (0 == (mode & ~is_stored->mode)) { // already buffered |
---|
898 | AP_sequence *sequence = get_seq(); |
---|
899 | if (sequence && (mode & SEQUENCE)) sequence->forget_sequence(); |
---|
900 | return false; |
---|
901 | } |
---|
902 | store = is_stored; |
---|
903 | ret = false; |
---|
904 | } |
---|
905 | else { // first push for this node (in current stack frame) |
---|
906 | store = new NodeState(remembered_for_frame); |
---|
907 | states.push(store); |
---|
908 | |
---|
909 | remembered_for_frame = frame_level; |
---|
910 | ret = true; |
---|
911 | } |
---|
912 | |
---|
913 | if ((mode & (STRUCTURE|SEQUENCE)) && !(store->mode & (STRUCTURE|SEQUENCE))) { |
---|
914 | store->mark_sum = gr.mark_sum; |
---|
915 | } |
---|
916 | if ((mode & STRUCTURE) && !(store->mode & STRUCTURE)) { |
---|
917 | store->father = get_father(); |
---|
918 | store->leftson = get_leftson(); |
---|
919 | store->rightson = get_rightson(); |
---|
920 | store->leftlen = leftlen; |
---|
921 | store->rightlen = rightlen; |
---|
922 | store->root = get_tree_root(); |
---|
923 | store->gb_node = gb_node; |
---|
924 | |
---|
925 | for (int e=0; e<3; e++) { |
---|
926 | store->edge[e] = edge[e]; |
---|
927 | store->edgeIndex[e] = index[e]; |
---|
928 | } |
---|
929 | } |
---|
930 | |
---|
931 | if (mode & SEQUENCE) { |
---|
932 | ap_assert(!is_leaf); // only allowed to push SEQUENCE for inner nodes |
---|
933 | if (!(store->mode & SEQUENCE)) { |
---|
934 | AP_sequence *sequence = take_seq(); |
---|
935 | store->sequence = sequence; |
---|
936 | store->mutation_rate = mutation_rate; |
---|
937 | mutation_rate = 0.0; |
---|
938 | } |
---|
939 | else { |
---|
940 | AP_sequence *sequence = get_seq(); |
---|
941 | if (sequence) sequence->forget_sequence(); |
---|
942 | } |
---|
943 | } |
---|
944 | |
---|
945 | store->mode = (AP_STACK_MODE)(store->mode|mode); |
---|
946 | |
---|
947 | return ret; |
---|
948 | } |
---|
949 | |
---|
950 | void NodeState::move_info_to(NodeState& target, AP_STACK_MODE what) { |
---|
951 | // rescue partial NodeState information |
---|
952 | |
---|
953 | ap_assert((mode&what) == what); // this has to contain 'what' is moved |
---|
954 | ap_assert((target.mode&what) == NOTHING); // target shall not already contain 'what' is moved |
---|
955 | |
---|
956 | if ((what & (STRUCTURE|SEQUENCE)) && !(target.mode & (STRUCTURE|SEQUENCE))) { |
---|
957 | target.mark_sum = mark_sum; |
---|
958 | } |
---|
959 | if (what & STRUCTURE) { |
---|
960 | target.father = father; |
---|
961 | target.leftson = leftson; |
---|
962 | target.rightson = rightson; |
---|
963 | target.leftlen = leftlen; |
---|
964 | target.rightlen = rightlen; |
---|
965 | target.root = root; |
---|
966 | target.gb_node = gb_node; |
---|
967 | |
---|
968 | for (int e=0; e<3; e++) { |
---|
969 | target.edge[e] = edge[e]; |
---|
970 | target.edgeIndex[e] = edgeIndex[e]; |
---|
971 | } |
---|
972 | } |
---|
973 | if (what & SEQUENCE) { |
---|
974 | target.sequence = sequence; |
---|
975 | target.mutation_rate = mutation_rate; |
---|
976 | sequence = NULL; |
---|
977 | |
---|
978 | } |
---|
979 | // nothing needs to be done for ROOT |
---|
980 | target.mode = AP_STACK_MODE(target.mode|what); |
---|
981 | } |
---|
982 | |
---|
983 | void AP_tree_nlen::restore_structure(const NodeState& state) { |
---|
984 | father = state.father; |
---|
985 | leftson = state.leftson; |
---|
986 | rightson = state.rightson; |
---|
987 | leftlen = state.leftlen; |
---|
988 | rightlen = state.rightlen; |
---|
989 | set_tree_root(state.root); |
---|
990 | gb_node = state.gb_node; |
---|
991 | |
---|
992 | gr.mark_sum = state.mark_sum; |
---|
993 | |
---|
994 | for (int e=0; e<3; e++) { |
---|
995 | edge[e] = state.edge[e]; |
---|
996 | index[e] = state.edgeIndex[e]; |
---|
997 | if (edge[e]) { |
---|
998 | edge[e]->index[index[e]] = e; |
---|
999 | edge[e]->node[index[e]] = this; |
---|
1000 | } |
---|
1001 | } |
---|
1002 | } |
---|
1003 | void AP_tree_nlen::restore_sequence(NodeState& state) { |
---|
1004 | replace_seq(state.sequence); |
---|
1005 | state.sequence = NULL; |
---|
1006 | mutation_rate = state.mutation_rate; |
---|
1007 | gr.mark_sum = state.mark_sum; |
---|
1008 | } |
---|
1009 | void AP_tree_nlen::restore_sequence_nondestructive(const NodeState& state) { |
---|
1010 | replace_seq(state.sequence ? state.sequence->dup() : NULL); |
---|
1011 | mutation_rate = state.mutation_rate; |
---|
1012 | } |
---|
1013 | void AP_tree_nlen::restore_root(const NodeState& state) { |
---|
1014 | state.root->change_root(state.root->get_root_node(), this); |
---|
1015 | } |
---|
1016 | |
---|
1017 | void AP_tree_nlen::restore(NodeState& state) { |
---|
1018 | //! restore 'this' from NodeState (cheap; only call once for each 'state') |
---|
1019 | AP_STACK_MODE mode = state.mode; |
---|
1020 | if (mode&STRUCTURE) restore_structure(state); |
---|
1021 | if (mode&SEQUENCE) restore_sequence(state); |
---|
1022 | if (ROOT==mode) restore_root(state); |
---|
1023 | } |
---|
1024 | void AP_tree_nlen::restore_nondestructive(const NodeState& state) { |
---|
1025 | //! restore 'this' from NodeState (expensive; may be called multiple times for each 'state') |
---|
1026 | AP_STACK_MODE mode = state.mode; |
---|
1027 | if (mode&STRUCTURE) restore_structure(state); |
---|
1028 | if (mode&SEQUENCE) restore_sequence_nondestructive(state); |
---|
1029 | if (ROOT==mode) restore_root(state); |
---|
1030 | } |
---|
1031 | |
---|
1032 | void AP_tree_nlen::revertToPreviousState(Level IF_ASSERTION_USED(curr_frameLevel), bool& IF_ASSERTION_USED(rootPopped)) { // pop old tree costs |
---|
1033 | ap_assert(remembered_for_frame == curr_frameLevel); // error in node stack (node wasnt remembered in current frame!) |
---|
1034 | |
---|
1035 | NodeState *previous = states.pop(); |
---|
1036 | #if defined(ASSERTION_USED) |
---|
1037 | if (previous->mode == ROOT) { // @@@ remove test code later |
---|
1038 | ap_assert(!rootPopped); // only allowed once |
---|
1039 | rootPopped = true; |
---|
1040 | } |
---|
1041 | #endif |
---|
1042 | restore(*previous); |
---|
1043 | |
---|
1044 | remembered_for_frame = previous->frameNr; |
---|
1045 | delete previous; |
---|
1046 | } |
---|
1047 | |
---|
1048 | void AP_tree_nlen::parsimony_rec(char *mutPerSite) { |
---|
1049 | AP_sequence *sequence = get_seq(); |
---|
1050 | |
---|
1051 | if (is_leaf) { |
---|
1052 | ap_assert(sequence); // tree w/o aliview? |
---|
1053 | sequence->ensure_sequence_loaded(); |
---|
1054 | } |
---|
1055 | else { |
---|
1056 | if (!sequence) { |
---|
1057 | sequence = set_seq(get_tree_root()->get_seqTemplate()->dup()); |
---|
1058 | ap_assert(sequence); |
---|
1059 | } |
---|
1060 | |
---|
1061 | if (!sequence->hasSequence()) { |
---|
1062 | AP_tree_nlen *lson = get_leftson(); |
---|
1063 | AP_tree_nlen *rson = get_rightson(); |
---|
1064 | |
---|
1065 | ap_assert(lson); |
---|
1066 | ap_assert(rson); |
---|
1067 | |
---|
1068 | lson->parsimony_rec(mutPerSite); |
---|
1069 | rson->parsimony_rec(mutPerSite); |
---|
1070 | |
---|
1071 | AP_sequence *lseq = lson->get_seq(); |
---|
1072 | AP_sequence *rseq = rson->get_seq(); |
---|
1073 | |
---|
1074 | ap_assert(lseq); |
---|
1075 | ap_assert(rseq); |
---|
1076 | |
---|
1077 | AP_FLOAT mutations_for_combine = sequence->combine(lseq, rseq, mutPerSite); |
---|
1078 | mutation_rate = lson->mutation_rate + rson->mutation_rate + mutations_for_combine; |
---|
1079 | } |
---|
1080 | } |
---|
1081 | } |
---|
1082 | |
---|
1083 | AP_FLOAT AP_tree_nlen::costs(char *mutPerSite) { |
---|
1084 | // returns costs of a tree ( = number of mutations) |
---|
1085 | |
---|
1086 | ap_assert(get_tree_root()->get_seqTemplate()); // forgot to set_seqTemplate() ? (previously returned 0.0 in this case) |
---|
1087 | ap_assert(sequence_state_valid()); |
---|
1088 | |
---|
1089 | parsimony_rec(mutPerSite); |
---|
1090 | return mutation_rate; |
---|
1091 | } |
---|
1092 | |
---|
1093 | AP_FLOAT AP_tree_nlen::nn_interchange_rec(EdgeSpec whichEdges, AP_BL_MODE mode) { |
---|
1094 | if (!father) { |
---|
1095 | return rootEdge()->nni_rec(whichEdges, mode, NULL, true); |
---|
1096 | } |
---|
1097 | if (!father->father) { |
---|
1098 | AP_tree_edge *e = rootEdge(); |
---|
1099 | return e->nni_rec(whichEdges, mode, e->otherNode(this), false); |
---|
1100 | } |
---|
1101 | return edgeTo(get_father())->nni_rec(whichEdges, mode, get_father(), false); |
---|
1102 | } |
---|
1103 | |
---|
1104 | inline CONSTEXPR_RETURN AP_TREE_SIDE idx2side(const int idx) { |
---|
1105 | return idx&1 ? AP_RIGHT : AP_LEFT; |
---|
1106 | } |
---|
1107 | |
---|
1108 | bool AP_tree_edge::kl_rec(const KL_params& KL, const int rec_depth, AP_FLOAT pars_best) { |
---|
1109 | /*! does K.L. recursion |
---|
1110 | * @param KL parameters defining how recursion is done |
---|
1111 | * @param rec_depth current recursion depth (starts with 0) |
---|
1112 | * @param pars_best current parsimony value of topology |
---|
1113 | */ |
---|
1114 | |
---|
1115 | ap_assert(!is_leaf_edge()); |
---|
1116 | if (rec_depth >= KL.max_rec_depth) return false; |
---|
1117 | |
---|
1118 | ap_assert(implicated(rec_depth>0, kl_visited)); |
---|
1119 | |
---|
1120 | int order[8]; |
---|
1121 | AP_tree_edge *descend[8]; |
---|
1122 | |
---|
1123 | { |
---|
1124 | if (rec_depth == 0) { |
---|
1125 | descend[0] = this; |
---|
1126 | descend[2] = NULL; |
---|
1127 | descend[4] = NULL; |
---|
1128 | descend[6] = NULL; |
---|
1129 | } |
---|
1130 | else { |
---|
1131 | AP_tree_nlen *son = sonNode(); |
---|
1132 | AP_tree_nlen *notSon = otherNode(son); // brother or father |
---|
1133 | |
---|
1134 | descend[0] = notSon->nextEdge(this); |
---|
1135 | descend[2] = notSon->nextEdge(descend[0]); |
---|
1136 | |
---|
1137 | ap_assert(descend[2] != this); |
---|
1138 | |
---|
1139 | descend[4] = son->nextEdge(this); |
---|
1140 | descend[6] = son->nextEdge(descend[4]); |
---|
1141 | |
---|
1142 | ap_assert(descend[6] != this); |
---|
1143 | } |
---|
1144 | |
---|
1145 | descend[1] = descend[0]; |
---|
1146 | descend[3] = descend[2]; |
---|
1147 | descend[5] = descend[4]; |
---|
1148 | descend[7] = descend[6]; |
---|
1149 | } |
---|
1150 | |
---|
1151 | // --------------------------------- |
---|
1152 | // detect parsimony values |
---|
1153 | |
---|
1154 | ap_main->remember(); // @@@ i think this is unneeded. better reset root after all done in caller |
---|
1155 | set_root(); |
---|
1156 | rootNode()->costs(); |
---|
1157 | |
---|
1158 | int rec_width_dynamic = 0; |
---|
1159 | int visited_subtrees = 0; |
---|
1160 | int better_subtrees = 0; |
---|
1161 | |
---|
1162 | AP_FLOAT pars[8]; // eight parsimony values (produced by 2*swap_assymetric at each adjacent edge) |
---|
1163 | |
---|
1164 | #if defined(ASSERTION_USED) |
---|
1165 | int forbidden_descends = 0; |
---|
1166 | #endif |
---|
1167 | { |
---|
1168 | AP_FLOAT schwellwert = KL.thresFunctor.calculate(rec_depth); // @@@ skip if not needed |
---|
1169 | for (int i = 0; i < 8; i++) { |
---|
1170 | order[i] = i; |
---|
1171 | AP_tree_edge * const subedge = descend[i]; |
---|
1172 | |
---|
1173 | if (subedge && |
---|
1174 | !subedge->is_leaf_edge() && |
---|
1175 | !subedge->kl_visited && |
---|
1176 | (!KL.stopAtFoldedGroups || !subedge->next_to_folded_group()) |
---|
1177 | ) |
---|
1178 | { |
---|
1179 | ap_main->remember(); |
---|
1180 | subedge->sonNode()->swap_assymetric(idx2side(i)); |
---|
1181 | pars[i] = rootNode()->costs(); |
---|
1182 | if (pars[i] < pars_best) { |
---|
1183 | better_subtrees++; |
---|
1184 | pars_best = pars[i]; // @@@ do not overwrite yet; store and overwrite when done with this loop |
---|
1185 | } |
---|
1186 | if (pars[i] < schwellwert) { |
---|
1187 | rec_width_dynamic++; |
---|
1188 | } |
---|
1189 | ap_main->revert(); |
---|
1190 | visited_subtrees++; |
---|
1191 | } |
---|
1192 | else { |
---|
1193 | pars[i] = -1; |
---|
1194 | #if defined(ASSERTION_USED) |
---|
1195 | if (subedge && subedge->kl_visited) { |
---|
1196 | forbidden_descends++; |
---|
1197 | } |
---|
1198 | #endif |
---|
1199 | } |
---|
1200 | } |
---|
1201 | } |
---|
1202 | |
---|
1203 | // bubblesort pars[]+order[], such that pars[0] contains best (=smallest) parsimony value |
---|
1204 | { |
---|
1205 | for (int i=7, t=0; t<i; t++) { // move negative (=unused) parsimony values to the end |
---|
1206 | if (pars[t] <0) { |
---|
1207 | pars[t] = pars[i]; |
---|
1208 | order[t] = i; |
---|
1209 | t--; |
---|
1210 | i--; |
---|
1211 | } |
---|
1212 | } |
---|
1213 | |
---|
1214 | for (int t = visited_subtrees - 1; t > 0; t--) { |
---|
1215 | bool bubbled = false; |
---|
1216 | for (int i = 0; i < t; i++) { |
---|
1217 | if (pars[i] > pars[i+1]) { |
---|
1218 | std::swap(order[i], order[i+1]); |
---|
1219 | std::swap(pars[i], pars[i+1]); |
---|
1220 | bubbled = true; |
---|
1221 | } |
---|
1222 | } |
---|
1223 | if (!bubbled) break; |
---|
1224 | } |
---|
1225 | } |
---|
1226 | |
---|
1227 | #if defined(ASSERTION_USED) |
---|
1228 | // rec_depth == 0 (called with start-node) |
---|
1229 | // rec_depth == 1 (called twice with start-node (swap_assymetric AP_LEFT + AP_RIGHT)) |
---|
1230 | // rec_depth == 2 (called twice with each adjacent node -> 8 calls) |
---|
1231 | // rec_depth == 3 (called twice with each adjacent node, but not with those were recursion came from -> 6 calls) |
---|
1232 | |
---|
1233 | if (!is_root_edge()) { |
---|
1234 | switch (rec_depth) { |
---|
1235 | case 0: |
---|
1236 | ap_assert(visited_subtrees == 2); |
---|
1237 | ap_assert(forbidden_descends == 0); |
---|
1238 | break; |
---|
1239 | case 1: |
---|
1240 | ap_assert(visited_subtrees <= 8); |
---|
1241 | ap_assert(forbidden_descends == 0); |
---|
1242 | break; |
---|
1243 | default: |
---|
1244 | ap_assert(visited_subtrees <= 6); |
---|
1245 | ap_assert(forbidden_descends == 2); |
---|
1246 | break; |
---|
1247 | } |
---|
1248 | } |
---|
1249 | else { // at root |
---|
1250 | switch (rec_depth) { |
---|
1251 | case 0: |
---|
1252 | ap_assert(visited_subtrees <= 2); |
---|
1253 | break; |
---|
1254 | case 1: |
---|
1255 | ap_assert(visited_subtrees <= 8); |
---|
1256 | ap_assert(forbidden_descends <= 2); // in case of subtree-optimization, 2 descends may be forbidden |
---|
1257 | break; |
---|
1258 | default: |
---|
1259 | ap_assert(visited_subtrees <= 8); |
---|
1260 | break; |
---|
1261 | } |
---|
1262 | } |
---|
1263 | #endif |
---|
1264 | |
---|
1265 | int rec_width; |
---|
1266 | if (better_subtrees) { |
---|
1267 | rec_width = better_subtrees; // @@@ wrong if static/dynamic reduction would allow more |
---|
1268 | |
---|
1269 | // @@@ IMO the whole concept of incrementing depth when a better topology was found has no positive effect |
---|
1270 | // (the better topology is kept anyway and next recursive KL will do a full optimization starting from that edge as well) |
---|
1271 | |
---|
1272 | } |
---|
1273 | else { |
---|
1274 | rec_width = visited_subtrees; |
---|
1275 | if (KL.rec_type & AP_STATIC) { |
---|
1276 | int rec_width_static = (rec_depth < CUSTOM_DEPTHS) ? KL.rec_width[rec_depth] : 1; |
---|
1277 | rec_width = std::min(rec_width, rec_width_static); |
---|
1278 | } |
---|
1279 | if (KL.rec_type & AP_DYNAMIK) { |
---|
1280 | rec_width = std::min(rec_width, rec_width_dynamic); |
---|
1281 | } |
---|
1282 | } |
---|
1283 | ap_assert(rec_width<=visited_subtrees); |
---|
1284 | |
---|
1285 | bool found_better = false; |
---|
1286 | for (int i=0; i<rec_width && !found_better; i++) { |
---|
1287 | AP_tree_edge * const subedge = descend[order[i]]; |
---|
1288 | |
---|
1289 | ap_main->remember(); |
---|
1290 | subedge->kl_visited = true; // mark |
---|
1291 | subedge->sonNode()->swap_assymetric(idx2side(order[i])); // swap |
---|
1292 | rootNode()->parsimony_rec(); |
---|
1293 | |
---|
1294 | if (better_subtrees) { |
---|
1295 | KL_params modified = KL; |
---|
1296 | modified.rec_type = AP_STATIC; |
---|
1297 | modified.max_rec_depth += KL.inc_rec_depth; |
---|
1298 | |
---|
1299 | subedge->kl_rec(modified, rec_depth+1, pars_best); |
---|
1300 | found_better = true; |
---|
1301 | } |
---|
1302 | else { |
---|
1303 | found_better = subedge->kl_rec(KL, rec_depth+1, pars_best); |
---|
1304 | } |
---|
1305 | |
---|
1306 | subedge->kl_visited = false; // unmark |
---|
1307 | ap_main->accept_if(found_better); // revert |
---|
1308 | } |
---|
1309 | |
---|
1310 | ap_main->accept_if(found_better); // undo set_root otherwise |
---|
1311 | return found_better; |
---|
1312 | } |
---|
1313 | |
---|
1314 | |
---|
1315 | const char* AP_tree_nlen::sortByName() |
---|
1316 | { |
---|
1317 | if (name) return name; // leafs |
---|
1318 | |
---|
1319 | const char *n1 = get_leftson()->sortByName(); |
---|
1320 | const char *n2 = get_rightson()->sortByName(); |
---|
1321 | |
---|
1322 | if (strcmp(n1, n2)<0) return n1; |
---|
1323 | |
---|
1324 | AP_tree::swap_sons(); |
---|
1325 | |
---|
1326 | return n2; |
---|
1327 | } |
---|
1328 | |
---|
1329 | const char *AP_tree_nlen::fullname() const |
---|
1330 | { |
---|
1331 | if (!name) { |
---|
1332 | static char *buffer; |
---|
1333 | char *lName = ARB_strdup(get_leftson()->fullname()); |
---|
1334 | char *rName = ARB_strdup(get_rightson()->fullname()); |
---|
1335 | int len = strlen(lName)+strlen(rName)+4; |
---|
1336 | |
---|
1337 | if (buffer) free(buffer); |
---|
1338 | |
---|
1339 | ARB_alloc(buffer, len); |
---|
1340 | |
---|
1341 | strcpy(buffer, "["); |
---|
1342 | strcat(buffer, lName); |
---|
1343 | strcat(buffer, ","); |
---|
1344 | strcat(buffer, rName); |
---|
1345 | strcat(buffer, "]"); |
---|
1346 | |
---|
1347 | free(lName); |
---|
1348 | free(rName); |
---|
1349 | |
---|
1350 | return buffer; |
---|
1351 | } |
---|
1352 | |
---|
1353 | return name; |
---|
1354 | } |
---|
1355 | |
---|
1356 | |
---|
1357 | char* AP_tree_nlen::getSequenceCopy() { |
---|
1358 | costs(); |
---|
1359 | |
---|
1360 | AP_sequence_parsimony *pseq = DOWNCAST(AP_sequence_parsimony*, get_seq()); |
---|
1361 | ap_assert(pseq->hasSequence()); |
---|
1362 | |
---|
1363 | size_t len = pseq->get_sequence_length(); |
---|
1364 | char *s = new char[len]; |
---|
1365 | memcpy(s, pseq->get_sequence(), len); |
---|
1366 | |
---|
1367 | return s; |
---|
1368 | } |
---|
1369 | |
---|
1370 | |
---|
1371 | GB_ERROR AP_pars_root::saveToDB() { |
---|
1372 | has_been_saved = true; |
---|
1373 | return AP_tree_root::saveToDB(); |
---|
1374 | } |
---|
1375 | |
---|
1376 | void AP_tree_nlen::buildNodeList_rec(AP_tree_nlen **list, long& num) { |
---|
1377 | // builds a list of all inner nodes (w/o root node) |
---|
1378 | if (!is_leaf) { |
---|
1379 | if (father) list[num++] = this; |
---|
1380 | get_leftson()->buildNodeList_rec(list, num); |
---|
1381 | get_rightson()->buildNodeList_rec(list, num); |
---|
1382 | } |
---|
1383 | } |
---|
1384 | |
---|
1385 | void AP_tree_nlen::buildNodeList(AP_tree_nlen **&list, long &num) { |
---|
1386 | num = this->count_leafs()-1; |
---|
1387 | list = new AP_tree_nlen *[num+1]; |
---|
1388 | list[num] = 0; |
---|
1389 | num = 0; |
---|
1390 | buildNodeList_rec(list, num); |
---|
1391 | } |
---|
1392 | |
---|
1393 | void AP_tree_nlen::buildBranchList_rec(AP_tree_nlen **list, long& num, bool create_terminal_branches, int deep) { |
---|
1394 | // builds a list of all species |
---|
1395 | // (returns pairs of leafs/father and nodes/father) |
---|
1396 | |
---|
1397 | if (deep) { |
---|
1398 | if (father && (create_terminal_branches || !is_leaf)) { |
---|
1399 | if (father->father) { |
---|
1400 | list[num++] = this; |
---|
1401 | list[num++] = get_father(); |
---|
1402 | } |
---|
1403 | else { // root |
---|
1404 | if (father->leftson == this) { |
---|
1405 | list[num++] = this; |
---|
1406 | list[num++] = get_brother(); |
---|
1407 | } |
---|
1408 | } |
---|
1409 | } |
---|
1410 | if (!is_leaf) { |
---|
1411 | get_leftson() ->buildBranchList_rec(list, num, create_terminal_branches, deep-1); |
---|
1412 | get_rightson()->buildBranchList_rec(list, num, create_terminal_branches, deep-1); |
---|
1413 | } |
---|
1414 | } |
---|
1415 | } |
---|
1416 | |
---|
1417 | void AP_tree_nlen::buildBranchList(AP_tree_nlen **&list, long &num, bool create_terminal_branches, int deep) { |
---|
1418 | if (deep>=0) { |
---|
1419 | num = 2; |
---|
1420 | for (int i=0; i<deep; i++) num *= 2; |
---|
1421 | } |
---|
1422 | else { |
---|
1423 | num = count_leafs() * (create_terminal_branches ? 2 : 1); |
---|
1424 | } |
---|
1425 | |
---|
1426 | ap_assert(num >= 0); |
---|
1427 | |
---|
1428 | list = new AP_tree_nlen *[num*2+4]; |
---|
1429 | |
---|
1430 | if (num) { |
---|
1431 | long count = 0; |
---|
1432 | |
---|
1433 | buildBranchList_rec(list, count, create_terminal_branches, deep); |
---|
1434 | list[count] = 0; |
---|
1435 | num = count/2; |
---|
1436 | } |
---|
1437 | } |
---|