source: tags/testbuild/SL/ROOTED_TREE/RootedTree.cxx

Last change on this file was 13207, checked in by westram, 10 years ago
  • in AP_tree::moveNextTo()
    • when moving son of root: announce new root (=brother) after modifying tree. fixes corruption demonstrated with [13194]
    • triggered another bug: doing pop() after calling fixed code, caused a similar corruption
  • in AP_main::push_node()
    • on first ROOT push, move node to bottom of stack ⇒ will be popped as last node
    • fixes bug triggered by above fix
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.6 KB
Line 
1// ================================================================ //
2//                                                                  //
3//   File      : RootedTree.cxx                                     //
4//   Purpose   :                                                    //
5//                                                                  //
6//   Coded by Ralf Westram (coder@reallysoft.de) in December 2013   //
7//   Institute of Microbiology (Technical University Munich)        //
8//   http://www.arb-home.de/                                        //
9//                                                                  //
10// ================================================================ //
11
12#include "RootedTree.h"
13#include <map>
14#include <set>
15#include <cmath> // needed with 4.4.3 (but not with 4.7.3)
16
17// ------------------
18//      TreeRoot
19
20TreeRoot::~TreeRoot() {
21    deleteWithNodes = false; // avoid recursive call of ~TreeRoot
22    delete rootNode;
23    rt_assert(!rootNode);
24    delete nodeMaker;
25}
26
27void TreeRoot::change_root(RootedTree *oldroot, RootedTree *newroot) {
28    rt_assert(rootNode == oldroot);
29    rt_assert(implicated(newroot, !newroot->father));
30    rootNode = newroot;
31
32    if (oldroot && oldroot->get_tree_root() && !oldroot->is_inside(newroot)) oldroot->set_tree_root(0); // unlink from this
33    if (newroot && newroot->get_tree_root() != this) newroot->set_tree_root(this); // link to this
34}
35
36// --------------------
37//      RootedTree
38
39#if defined(PROVIDE_TREE_STRUCTURE_TESTS)
40
41void RootedTree::assert_valid() const {
42    rt_assert(this);
43
44    TreeRoot *troot = get_tree_root();
45    if (troot) {
46        if (!is_leaf) {
47            rt_assert(rightson);
48            rt_assert(leftson);
49            get_rightson()->assert_valid();
50            get_leftson()->assert_valid();
51        }
52        if (father) {
53            rt_assert(is_inside(get_father()));
54            rt_assert(troot->get_root_node()->is_anchestor_of(this));
55            rt_assert(get_father()->get_tree_root() == troot);
56        }
57        else {
58            rt_assert(troot->get_root_node()  == this);
59            rt_assert(!is_leaf);                    // leaf@root (tree has to have at least 2 leafs)
60        }
61    }
62    else { // removed node (may be incomplete)
63        if (!is_leaf) {
64            if (rightson) get_rightson()->assert_valid();
65            if (leftson)  get_leftson()->assert_valid();
66        }
67        if (father) {
68            rt_assert(is_inside(get_father()));
69            rt_assert(!get_father()->get_tree_root());
70        }
71    }
72}
73#endif // PROVIDE_TREE_STRUCTURE_TESTS
74
75void RootedTree::set_tree_root(TreeRoot *new_root) {
76    if (tree_root != new_root) {
77        tree_root = new_root;
78        if (leftson) get_leftson()->set_tree_root(new_root);
79        if (rightson) get_rightson()->set_tree_root(new_root);
80    }
81}
82
83void RootedTree::reorder_subtree(TreeOrder mode) {
84    static const char *smallest_leafname; // has to be set to the alphabetically smallest name (when function exits)
85
86    if (is_leaf) {
87        smallest_leafname = name;
88    }
89    else {
90        int leftsize  = get_leftson() ->get_leaf_count();
91        int rightsize = get_rightson()->get_leaf_count();
92
93        {
94            bool big_at_top    = leftsize>rightsize;
95            bool big_at_bottom = leftsize<rightsize;
96            bool swap_branches = (mode&ORDER_BIG_DOWN) ? big_at_top : big_at_bottom;
97            if (swap_branches) swap_sons();
98        }
99
100        TreeOrder lmode, rmode;
101        if (mode & (ORDER_BIG_TO_EDGE|ORDER_BIG_TO_CENTER)) { // symmetric
102            if (mode & ORDER_ALTERNATING) mode = TreeOrder(mode^(ORDER_BIG_TO_EDGE|ORDER_BIG_TO_CENTER));
103
104            if (mode & ORDER_BIG_TO_CENTER) {
105                lmode = TreeOrder(mode | ORDER_BIG_DOWN);
106                rmode = TreeOrder(mode & ~ORDER_BIG_DOWN);
107            }
108            else {
109                lmode = TreeOrder(mode & ~ORDER_BIG_DOWN);
110                rmode = TreeOrder(mode | ORDER_BIG_DOWN);
111            }
112        }
113        else { // asymmetric
114            if (mode & ORDER_ALTERNATING) mode = TreeOrder(mode^ORDER_BIG_DOWN);
115
116            lmode = mode;
117            rmode = mode;
118        }
119
120        get_leftson()->reorder_subtree(lmode);
121        const char *leftleafname = smallest_leafname;
122
123        get_rightson()->reorder_subtree(rmode);
124        const char *rightleafname = smallest_leafname;
125
126        if (leftleafname && rightleafname) {
127            int name_cmp = strcmp(leftleafname, rightleafname);
128            if (name_cmp <= 0) {
129                smallest_leafname = leftleafname;
130            }
131            else {
132                smallest_leafname = rightleafname;
133                if (leftsize == rightsize) { // if sizes of subtrees are equal and rightleafname<leftleafname -> swap branches
134                    const char *smallest_leafname_save = smallest_leafname;
135
136                    swap_sons();
137                    get_leftson ()->reorder_subtree(lmode); rt_assert(strcmp(smallest_leafname, rightleafname)== 0);
138                    get_rightson()->reorder_subtree(rmode); rt_assert(strcmp(smallest_leafname, leftleafname) == 0);
139
140                    smallest_leafname = smallest_leafname_save;
141                }
142            }
143        }
144    }
145    rt_assert(smallest_leafname);
146}
147
148void RootedTree::reorder_tree(TreeOrder mode) {
149    /*! beautify tree (does not change topology, only swaps branches)
150     */
151    compute_tree();
152    reorder_subtree(mode);
153}
154
155void RootedTree::rotate_subtree() {
156    if (!is_leaf) {
157        swap_sons();
158        get_leftson()->rotate_subtree();
159        get_rightson()->rotate_subtree();
160    }
161}
162
163void RootedTree::set_root() {
164    /*! set the root at parent edge of this
165     * pointers to tree-nodes remain valid, but all parent-nodes of this change their meaning
166     * (afterwards they will point to [father+brother] instead of [this+brother])
167     * esp. pointers to the root-node will still point to the root-node (which only changed children)
168     */
169
170    if (at_root()) return; // already root
171
172    RootedTree *old_root    = get_root_node();
173    RootedTree *old_brother = is_inside(old_root->get_leftson()) ? old_root->get_rightson() : old_root->get_leftson();
174
175    // move remark branches to top
176    {
177        char *remark = nulldup(get_remark());
178        for (RootedTree *node = this; node->father; node = node->get_father()) {
179            remark = node->swap_remark(remark);
180        }
181        free(remark);
182    }
183
184    GBT_LEN old_root_len = old_root->leftlen + old_root->rightlen;
185
186    // new node & this init
187    old_root->leftson  = this;
188    old_root->rightson = father; // will be set later
189
190    if (father->leftson == this) {
191        old_root->leftlen = old_root->rightlen = father->leftlen*.5;
192    }
193    else {
194        old_root->leftlen = old_root->rightlen = father->rightlen*.5;
195    }
196
197    RootedTree *next = get_father()->get_father();
198    RootedTree *prev = old_root;
199    RootedTree *pntr = get_father();
200
201    if (father->leftson == this)    father->leftson = old_root; // to set the flag correctly
202
203    // loop from father to son of root, rotate tree
204    while  (next->father) {
205        double len = (next->leftson == pntr) ? next->leftlen : next->rightlen;
206
207        if (pntr->leftson == prev) {
208            pntr->leftson = next;
209            pntr->leftlen = len;
210        }
211        else {
212            pntr->rightson = next;
213            pntr->rightlen = len;
214        }
215
216        pntr->father = prev;
217        prev         = pntr;
218        pntr         = next;
219        next         = next->get_father();
220    }
221    // now next points to the old root, which has been destroyed above
222    //
223    // pointer at oldroot
224    // pntr == brother before old root == next
225
226    if (pntr->leftson == prev) {
227        pntr->leftlen = old_root_len;
228        pntr->leftson = old_brother;
229    }
230    else {
231        pntr->rightlen = old_root_len;
232        pntr->rightson = old_brother;
233    }
234
235    old_brother->father = pntr;
236    pntr->father        = prev;
237    father              = old_root;
238
239    rt_assert(get_root_node() == old_root);
240}
241
242RootedTree *RootedTree::findLeafNamed(const char *wantedName) {
243    RootedTree *found = NULL;
244    if (is_leaf) {
245        if (name && strcmp(name, wantedName) == 0) found = this;
246    }
247    else {
248        found             = get_leftson()->findLeafNamed(wantedName);
249        if (!found) found = get_rightson()->findLeafNamed(wantedName);
250    }
251    return found;
252}
253
254// ----------------------------
255//      find_innermost_edge
256
257class NodeLeafDistance {
258    GBT_LEN downdist, updist;
259    enum { NLD_NODIST = 0, NLD_DOWNDIST, NLD_BOTHDIST } state;
260
261public:
262
263    NodeLeafDistance()
264        : downdist(-1.0),
265          updist(-1.0),
266          state(NLD_NODIST)
267    {}
268
269    GBT_LEN get_downdist() const { rt_assert(state >= NLD_DOWNDIST); return downdist; }
270    void set_downdist(GBT_LEN DownDist) {
271        if (state < NLD_DOWNDIST) state = NLD_DOWNDIST;
272        downdist = DownDist;
273    }
274
275    GBT_LEN get_updist() const { rt_assert(state >= NLD_BOTHDIST); return updist; }
276    void set_updist(GBT_LEN UpDist) {
277        if (state < NLD_BOTHDIST) state = NLD_BOTHDIST;
278        updist = UpDist;
279    }
280
281};
282
283class EdgeFinder {
284    std::map<RootedTree*, NodeLeafDistance> data; // maximum distance to farthest leaf
285
286    ARB_edge innermost;
287    double   min_distdiff; // abs diff between up- and downdiff
288
289    GBT_LEN calc_distdiff(GBT_LEN d1, GBT_LEN d2) { return fabs(d1-d2); }
290
291    void insert_tree(RootedTree *node) {
292        if (node->is_leaf) {
293            data[node].set_downdist(0.0);
294        }
295        else {
296            insert_tree(node->get_leftson());
297            insert_tree(node->get_rightson());
298
299            data[node].set_downdist(std::max(data[node->get_leftson()].get_downdist()+node->leftlen,
300                                             data[node->get_rightson()].get_downdist()+node->rightlen));
301        }
302    }
303
304    void findBetterEdge_sub(RootedTree *node) {
305        RootedTree *father  = node->get_father();
306        RootedTree *brother = node->get_brother();
307
308        GBT_LEN len      = node->get_branchlength();
309        GBT_LEN brothLen = brother->get_branchlength();
310
311        GBT_LEN upDist   = std::max(data[father].get_updist(), data[brother].get_downdist()+brothLen);
312        GBT_LEN downDist = data[node].get_downdist();
313
314        {
315            GBT_LEN edge_dd = calc_distdiff(upDist, downDist);
316            if (edge_dd<min_distdiff) { // found better edge
317                innermost    = ARB_edge(node, father);
318                min_distdiff = edge_dd;
319            }
320        }
321
322        data[node].set_updist(upDist+len);
323
324        if (!node->is_leaf) {
325            findBetterEdge_sub(node->get_leftson());
326            findBetterEdge_sub(node->get_rightson());
327        }
328    }
329
330    void findBetterEdge(RootedTree *node) {
331        if (!node->is_leaf) {
332            findBetterEdge_sub(node->get_leftson());
333            findBetterEdge_sub(node->get_rightson());
334        }
335    }
336
337public:
338    EdgeFinder(RootedTree *rootNode)
339        : innermost(rootNode->get_leftson(), rootNode->get_rightson()) // root-edge
340    {
341        insert_tree(rootNode);
342
343        RootedTree *lson = rootNode->get_leftson();
344        RootedTree *rson = rootNode->get_rightson();
345
346        GBT_LEN rootEdgeLen = rootNode->leftlen + rootNode->rightlen;
347
348        GBT_LEN lddist = data[lson].get_downdist();
349        GBT_LEN rddist = data[rson].get_downdist();
350
351        data[lson].set_updist(rddist+rootEdgeLen);
352        data[rson].set_updist(lddist+rootEdgeLen);
353
354        min_distdiff = calc_distdiff(lddist, rddist);
355
356        findBetterEdge(lson);
357        findBetterEdge(rson);
358    }
359
360    const ARB_edge& innermost_edge() const { return innermost; }
361};
362
363ARB_edge TreeRoot::find_innermost_edge() {
364    EdgeFinder edgeFinder(get_root_node());
365    return edgeFinder.innermost_edge();
366}
367
368// ------------------------
369//      multifurcation
370
371class RootedTree::LengthCollector {
372    typedef std::map<RootedTree*,GBT_LEN> LengthMap;
373    typedef std::set<RootedTree*>         NodeSet;
374
375    LengthMap eliminatedParentLength;
376    LengthMap addedParentLength;
377
378public:
379    void eliminate_parent_edge(RootedTree *node) {
380        rt_assert(!node->is_root_node());
381        eliminatedParentLength[node] += parentEdge(node).eliminate();
382    }
383
384    void add_parent_length(RootedTree *node, GBT_LEN addLen) {
385        rt_assert(!node->is_root_node());
386        addedParentLength[node] += addLen;
387    }
388
389    void independent_distribution() {
390        // step 2: (see caller)
391        while (!eliminatedParentLength.empty()) { // got eliminated lengths which need to be distributed
392            for (LengthMap::iterator from = eliminatedParentLength.begin(); from != eliminatedParentLength.end(); ++from) {
393                ARB_edge elimEdge = parentEdge(from->first);
394                GBT_LEN  elimLen  = from->second;
395
396                elimEdge.virtually_distribute_length(elimLen, *this);
397            }
398            eliminatedParentLength.clear(); // all distributed!
399
400            // handle special cases where distributed length is negative and results in negative destination branches.
401            // Avoid generating negative dest. branchlengths by
402            // - eliminating the dest. branch
403            // - redistributing the additional (negative) length (may cause additional negative lengths on other dest. branches)
404
405            NodeSet handled;
406            for (LengthMap::iterator to = addedParentLength.begin(); to != addedParentLength.end(); ++to) {
407                ARB_edge affectedEdge     = parentEdge(to->first);
408                GBT_LEN  additionalLen    = to->second;
409                double   effective_length = affectedEdge.length() + additionalLen;
410
411                if (effective_length<=0.0) { // negative or zero
412                    affectedEdge.set_length(effective_length);
413                    eliminate_parent_edge(to->first); // adds entry to eliminatedParentLength and causes another additional loop
414                    handled.insert(to->first);
415                }
416            }
417
418            // remove all redistributed nodes
419            for (NodeSet::iterator del = handled.begin(); del != handled.end(); ++del) {
420                addedParentLength.erase(*del);
421            }
422        }
423
424        // step 3:
425        for (LengthMap::iterator to = addedParentLength.begin(); to != addedParentLength.end(); ++to) {
426            ARB_edge affectedEdge     = parentEdge(to->first);
427            GBT_LEN  additionalLen    = to->second;
428            double   effective_length = affectedEdge.length() + additionalLen;
429
430            affectedEdge.set_length(effective_length);
431        }
432    }
433};
434
435GBT_LEN ARB_edge::adjacent_distance() const {
436    //! return length of edges starting from this->dest()
437
438    if (at_leaf()) return 0.0;
439    return next().length_or_adjacent_distance() + otherNext().length_or_adjacent_distance();
440}
441
442void ARB_edge::virtually_add_or_distribute_length_forward(GBT_LEN len, RootedTree::LengthCollector& collect) const {
443    rt_assert(!is_nan_or_inf(len));
444    if (length() > 0.0) {
445        collect.add_parent_length(son(), len);
446    }
447    else {
448        if (len != 0.0) virtually_distribute_length_forward(len, collect);
449    }
450}
451
452
453void ARB_edge::virtually_distribute_length_forward(GBT_LEN len, RootedTree::LengthCollector& collect) const {
454    /*! distribute length to edges adjacent in edge direction (i.e. edges starting from this->dest())
455     * Split 'len' proportional to adjacent edges lengths.
456     *
457     * Note: length will not be distributed to tree-struction itself (yet), but collected in 'collect'
458     */
459
460    rt_assert(is_normal(len));
461    rt_assert(!at_leaf()); // cannot forward anything (nothing beyond leafs)
462
463    ARB_edge e1 = next();
464    ARB_edge e2 = otherNext();
465
466    GBT_LEN d1 = e1.length_or_adjacent_distance();
467    GBT_LEN d2 = e2.length_or_adjacent_distance();
468
469    len = len/(d1+d2);
470
471    e1.virtually_add_or_distribute_length_forward(len*d1, collect);
472    e2.virtually_add_or_distribute_length_forward(len*d2, collect);
473}
474
475void ARB_edge::virtually_distribute_length(GBT_LEN len, RootedTree::LengthCollector& collect) const {
476    /*! distribute length to all adjacent edges.
477     * Longer edges receive more than shorter ones.
478     *
479     * Edges with length zero will not be changed, instead both edges "beyond"
480     * the edge will be affected (they will be affected equally to direct edges,
481     * because edges at multifurcations are considered to BE direct edges).
482     *
483     * Note: length will not be distributed to tree-struction itself (yet), but collected in 'collect'
484     */
485
486    ARB_edge backEdge = inverse();
487    GBT_LEN len_fwd, len_bwd;
488    {
489        GBT_LEN dist_fwd = adjacent_distance();
490        GBT_LEN dist_bwd = backEdge.adjacent_distance();
491        GBT_LEN lenW     = len/(dist_fwd+dist_bwd);
492        len_fwd          = lenW*dist_fwd;
493        len_bwd          = lenW*dist_bwd;
494
495    }
496
497    if (is_normal(len_fwd)) virtually_distribute_length_forward(len_fwd, collect);
498    if (is_normal(len_bwd)) backEdge.virtually_distribute_length_forward(len_bwd, collect);
499}
500
501void RootedTree::eliminate_and_collect(const multifurc_limits& below, LengthCollector& collect) {
502    /*! eliminate edges specified by 'below' and
503     * store their length in 'collect' for later distribution.
504     */
505    rt_assert(!is_root_node());
506    if (!is_leaf || below.applyAtLeafs) {
507        double value;
508        switch (parse_bootstrap(value)) {
509            case REMARK_NONE:
510                value = 100.0;
511                // fall-through
512            case REMARK_BOOTSTRAP:
513                if (value<below.bootstrap && get_branchlength_unrooted()<below.branchlength) {
514                    collect.eliminate_parent_edge(this);
515                }
516                break;
517
518            case REMARK_OTHER: break;
519        }
520    }
521
522    if (!is_leaf) {
523        get_leftson() ->eliminate_and_collect(below, collect);
524        get_rightson()->eliminate_and_collect(below, collect);
525    }
526}
527
528void ARB_edge::multifurcate() {
529    /*! eliminate edge and distribute length to adjacent edges
530     * - sets its length to zero
531     * - removes remark (bootstrap)
532     * - distributes length to neighbour-branches
533     */
534    RootedTree::LengthCollector collector;
535    collector.eliminate_parent_edge(son());
536    collector.independent_distribution();
537}
538void RootedTree::multifurcate() {
539    /*! eliminate branch from 'this' to 'father' (or brother @ root)
540     * @see ARB_edge::multifurcate()
541     */
542    rt_assert(father); // cannot multifurcate at root; call with son of root to multifurcate root-edge
543    if (father) parentEdge(this).multifurcate();
544}
545
546void RootedTree::set_branchlength_preserving(GBT_LEN new_len) {
547    /*! set branchlength to 'new_len' while preserving overall distance in tree.
548     *
549     * Always works on unrooted tree (i.e. lengths @ root are treated correctly).
550     * Length is preserved as in multifurcate()
551     */
552
553    GBT_LEN old_len = get_branchlength_unrooted();
554    GBT_LEN change  = new_len-old_len;
555
556    char *old_remark = nulldup(get_remark());
557
558    // distribute the negative 'change' to neighbours:
559    set_branchlength_unrooted(-change);
560    multifurcate();
561
562    set_branchlength_unrooted(new_len);
563    use_as_remark(old_remark); // restore remark (was removed by multifurcate())
564}
565
566void RootedTree::multifurcate_whole_tree(const multifurc_limits& below) {
567    /*! multifurcate all branches specified by 'below'
568     * - step 1: eliminate all branches, store eliminated lengths
569     * - step 2: calculate length distribution for all adjacent branches (proportionally to original length of each branch)
570     * - step 3: apply distributed length
571     */
572    RootedTree      *root = get_root_node();
573    LengthCollector  collector;
574
575    // step 1:
576    root->get_leftson()->eliminate_and_collect(below, collector);
577    root->get_rightson()->eliminate_and_collect(below, collector);
578    // root-edge is handled twice by the above calls.
579    // Unproblematic: first call will eliminate root-edge, so second call will do nothing.
580
581    // step 2 and 3:
582    collector.independent_distribution();
583}
584
Note: See TracBrowser for help on using the repository browser.