source: tags/arb-6.0.3/SL/ROOTED_TREE/RootedTree.cxx

Last change on this file was 11560, checked in by westram, 10 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 19.6 KB
Line 
1// ================================================================ //
2//                                                                  //
3//   File      : RootedTree.cxx                                     //
4//   Purpose   :                                                    //
5//                                                                  //
6//   Coded by Ralf Westram (coder@reallysoft.de) in December 2013   //
7//   Institute of Microbiology (Technical University Munich)        //
8//   http://www.arb-home.de/                                        //
9//                                                                  //
10// ================================================================ //
11
12#include "RootedTree.h"
13#include <map>
14#include <set>
15#include <cmath> // needed with 4.4.3 (but not with 4.7.3)
16
17// ------------------
18//      TreeRoot
19
20TreeRoot::~TreeRoot() {
21    deleteWithNodes = false; // avoid recursive call of ~TreeRoot
22    delete rootNode;
23    rt_assert(!rootNode);
24    delete nodeMaker;
25}
26
27void TreeRoot::change_root(RootedTree *oldroot, RootedTree *newroot) {
28    rt_assert(rootNode == oldroot);
29    rootNode = newroot;
30
31    if (oldroot && oldroot->get_tree_root() && !oldroot->is_inside(newroot)) oldroot->set_tree_root(0); // unlink from this
32    if (newroot && newroot->get_tree_root() != this) newroot->set_tree_root(this); // link to this
33}
34
35// --------------------
36//      RootedTree
37
38#if defined(PROVIDE_TREE_STRUCTURE_TESTS)
39
40void RootedTree::assert_valid() const {
41    rt_assert(this);
42
43    TreeRoot *troot = get_tree_root();
44    if (troot) {
45        if (!is_leaf) {
46            rt_assert(rightson);
47            rt_assert(leftson);
48            get_rightson()->assert_valid();
49            get_leftson()->assert_valid();
50        }
51        if (father) {
52            rt_assert(is_inside(get_father()));
53            rt_assert(troot->get_root_node()->is_anchestor_of(this));
54            rt_assert(get_father()->get_tree_root() == troot);
55        }
56        else {
57            rt_assert(troot->get_root_node()  == this);
58            rt_assert(!is_leaf);                    // leaf@root (tree has to have at least 2 leafs)
59        }
60    }
61    else { // removed node (may be incomplete)
62        if (!is_leaf) {
63            if (rightson) get_rightson()->assert_valid();
64            if (leftson)  get_leftson()->assert_valid();
65        }
66        if (father) {
67            rt_assert(is_inside(get_father()));
68            rt_assert(!get_father()->get_tree_root());
69        }
70    }
71}
72#endif // PROVIDE_TREE_STRUCTURE_TESTS
73
74void RootedTree::set_tree_root(TreeRoot *new_root) {
75    if (tree_root != new_root) {
76        tree_root = new_root;
77        if (leftson) get_leftson()->set_tree_root(new_root);
78        if (rightson) get_rightson()->set_tree_root(new_root);
79    }
80}
81
82void RootedTree::reorder_subtree(TreeOrder mode) {
83    static const char *smallest_leafname; // has to be set to the alphabetically smallest name (when function exits)
84
85    if (is_leaf) {
86        smallest_leafname = name;
87    }
88    else {
89        int leftsize  = get_leftson() ->get_leaf_count();
90        int rightsize = get_rightson()->get_leaf_count();
91
92        {
93            bool big_at_top    = leftsize>rightsize;
94            bool big_at_bottom = leftsize<rightsize;
95            bool swap_branches = (mode&ORDER_BIG_DOWN) ? big_at_top : big_at_bottom;
96            if (swap_branches) swap_sons();
97        }
98
99        TreeOrder lmode, rmode;
100        if (mode & (ORDER_BIG_TO_EDGE|ORDER_BIG_TO_CENTER)) { // symmetric
101            if (mode & ORDER_ALTERNATING) mode = TreeOrder(mode^(ORDER_BIG_TO_EDGE|ORDER_BIG_TO_CENTER));
102
103            if (mode & ORDER_BIG_TO_CENTER) {
104                lmode = TreeOrder(mode | ORDER_BIG_DOWN);
105                rmode = TreeOrder(mode & ~ORDER_BIG_DOWN);
106            }
107            else {
108                lmode = TreeOrder(mode & ~ORDER_BIG_DOWN);
109                rmode = TreeOrder(mode | ORDER_BIG_DOWN);
110            }
111        }
112        else { // asymmetric
113            if (mode & ORDER_ALTERNATING) mode = TreeOrder(mode^ORDER_BIG_DOWN);
114
115            lmode = mode;
116            rmode = mode;
117        }
118
119        get_leftson()->reorder_subtree(lmode);
120        const char *leftleafname = smallest_leafname;
121
122        get_rightson()->reorder_subtree(rmode);
123        const char *rightleafname = smallest_leafname;
124
125        if (leftleafname && rightleafname) {
126            int name_cmp = strcmp(leftleafname, rightleafname);
127            if (name_cmp <= 0) {
128                smallest_leafname = leftleafname;
129            }
130            else {
131                smallest_leafname = rightleafname;
132                if (leftsize == rightsize) { // if sizes of subtrees are equal and rightleafname<leftleafname -> swap branches
133                    const char *smallest_leafname_save = smallest_leafname;
134
135                    swap_sons();
136                    get_leftson ()->reorder_subtree(lmode); rt_assert(strcmp(smallest_leafname, rightleafname)== 0);
137                    get_rightson()->reorder_subtree(rmode); rt_assert(strcmp(smallest_leafname, leftleafname) == 0);
138
139                    smallest_leafname = smallest_leafname_save;
140                }
141            }
142        }
143    }
144    rt_assert(smallest_leafname);
145}
146
147void RootedTree::reorder_tree(TreeOrder mode) {
148    /*! beautify tree (does not change topology, only swaps branches)
149     */
150    compute_tree();
151    reorder_subtree(mode);
152}
153
154void RootedTree::rotate_subtree() {
155    if (!is_leaf) {
156        swap_sons();
157        get_leftson()->rotate_subtree();
158        get_rightson()->rotate_subtree();
159    }
160}
161
162void RootedTree::set_root() {
163    /*! set the root at parent edge of this
164     * pointers to tree-nodes remain valid, but all parent-nodes of this change their meaning
165     * (afterwards they will point to [father+brother] instead of [this+brother])
166     * esp. pointers to the root-node will still point to the root-node (which only changed children)
167     */
168
169    if (at_root()) return; // already root
170
171    RootedTree *old_root    = get_root_node();
172    RootedTree *old_brother = is_inside(old_root->get_leftson()) ? old_root->get_rightson() : old_root->get_leftson();
173
174    // move remark branches to top
175    {
176        char *remark = nulldup(get_remark());
177        for (RootedTree *node = this; node->father; node = node->get_father()) {
178            remark = node->swap_remark(remark);
179        }
180        free(remark);
181    }
182
183    GBT_LEN old_root_len = old_root->leftlen + old_root->rightlen;
184
185    // new node & this init
186    old_root->leftson  = this;
187    old_root->rightson = father; // will be set later
188
189    if (father->leftson == this) {
190        old_root->leftlen = old_root->rightlen = father->leftlen*.5;
191    }
192    else {
193        old_root->leftlen = old_root->rightlen = father->rightlen*.5;
194    }
195
196    RootedTree *next = get_father()->get_father();
197    RootedTree *prev = old_root;
198    RootedTree *pntr = get_father();
199
200    if (father->leftson == this)    father->leftson = old_root; // to set the flag correctly
201
202    // loop from father to son of root, rotate tree
203    while  (next->father) {
204        double len = (next->leftson == pntr) ? next->leftlen : next->rightlen;
205
206        if (pntr->leftson == prev) {
207            pntr->leftson = next;
208            pntr->leftlen = len;
209        }
210        else {
211            pntr->rightson = next;
212            pntr->rightlen = len;
213        }
214
215        pntr->father = prev;
216        prev         = pntr;
217        pntr         = next;
218        next         = next->get_father();
219    }
220    // now next points to the old root, which has been destroyed above
221    //
222    // pointer at oldroot
223    // pntr == brother before old root == next
224
225    if (pntr->leftson == prev) {
226        pntr->leftlen = old_root_len;
227        pntr->leftson = old_brother;
228    }
229    else {
230        pntr->rightlen = old_root_len;
231        pntr->rightson = old_brother;
232    }
233
234    old_brother->father = pntr;
235    pntr->father        = prev;
236    father              = old_root;
237
238    rt_assert(get_root_node() == old_root);
239}
240
241RootedTree *RootedTree::findLeafNamed(const char *wantedName) {
242    RootedTree *found = NULL;
243    if (is_leaf) {
244        if (name && strcmp(name, wantedName) == 0) found = this;
245    }
246    else {
247        found             = get_leftson()->findLeafNamed(wantedName);
248        if (!found) found = get_rightson()->findLeafNamed(wantedName);
249    }
250    return found;
251}
252
253// ----------------------------
254//      find_innermost_edge
255
256class NodeLeafDistance {
257    GBT_LEN downdist, updist;
258    enum { NLD_NODIST = 0, NLD_DOWNDIST, NLD_BOTHDIST } state;
259
260public:
261
262    NodeLeafDistance()
263        : downdist(-1.0),
264          updist(-1.0),
265          state(NLD_NODIST)
266    {}
267
268    GBT_LEN get_downdist() const { rt_assert(state >= NLD_DOWNDIST); return downdist; }
269    void set_downdist(GBT_LEN DownDist) {
270        if (state < NLD_DOWNDIST) state = NLD_DOWNDIST;
271        downdist = DownDist;
272    }
273
274    GBT_LEN get_updist() const { rt_assert(state >= NLD_BOTHDIST); return updist; }
275    void set_updist(GBT_LEN UpDist) {
276        if (state < NLD_BOTHDIST) state = NLD_BOTHDIST;
277        updist = UpDist;
278    }
279
280};
281
282class EdgeFinder {
283    std::map<RootedTree*, NodeLeafDistance> data; // maximum distance to farthest leaf
284
285    ARB_edge innermost;
286    double   min_distdiff; // abs diff between up- and downdiff
287
288    GBT_LEN calc_distdiff(GBT_LEN d1, GBT_LEN d2) { return fabs(d1-d2); }
289
290    void insert_tree(RootedTree *node) {
291        if (node->is_leaf) {
292            data[node].set_downdist(0.0);
293        }
294        else {
295            insert_tree(node->get_leftson());
296            insert_tree(node->get_rightson());
297
298            data[node].set_downdist(std::max(data[node->get_leftson()].get_downdist()+node->leftlen,
299                                             data[node->get_rightson()].get_downdist()+node->rightlen));
300        }
301    }
302
303    void findBetterEdge_sub(RootedTree *node) {
304        RootedTree *father  = node->get_father();
305        RootedTree *brother = node->get_brother();
306
307        GBT_LEN len      = node->get_branchlength();
308        GBT_LEN brothLen = brother->get_branchlength();
309
310        GBT_LEN upDist   = std::max(data[father].get_updist(), data[brother].get_downdist()+brothLen);
311        GBT_LEN downDist = data[node].get_downdist();
312
313        {
314            GBT_LEN edge_dd = calc_distdiff(upDist, downDist);
315            if (edge_dd<min_distdiff) { // found better edge
316                innermost    = ARB_edge(node, father);
317                min_distdiff = edge_dd;
318            }
319        }
320
321        data[node].set_updist(upDist+len);
322
323        if (!node->is_leaf) {
324            findBetterEdge_sub(node->get_leftson());
325            findBetterEdge_sub(node->get_rightson());
326        }
327    }
328
329    void findBetterEdge(RootedTree *node) {
330        if (!node->is_leaf) {
331            findBetterEdge_sub(node->get_leftson());
332            findBetterEdge_sub(node->get_rightson());
333        }
334    }
335
336public:
337    EdgeFinder(RootedTree *rootNode)
338        : innermost(rootNode->get_leftson(), rootNode->get_rightson()) // root-edge
339    {
340        insert_tree(rootNode);
341
342        RootedTree *lson = rootNode->get_leftson();
343        RootedTree *rson = rootNode->get_rightson();
344
345        GBT_LEN rootEdgeLen = rootNode->leftlen + rootNode->rightlen;
346
347        GBT_LEN lddist = data[lson].get_downdist();
348        GBT_LEN rddist = data[rson].get_downdist();
349
350        data[lson].set_updist(rddist+rootEdgeLen);
351        data[rson].set_updist(lddist+rootEdgeLen);
352
353        min_distdiff = calc_distdiff(lddist, rddist);
354
355        findBetterEdge(lson);
356        findBetterEdge(rson);
357    }
358
359    const ARB_edge& innermost_edge() const { return innermost; }
360};
361
362ARB_edge TreeRoot::find_innermost_edge() {
363    EdgeFinder edgeFinder(get_root_node());
364    return edgeFinder.innermost_edge();
365}
366
367// ------------------------
368//      multifurcation
369
370class RootedTree::LengthCollector {
371    typedef std::map<RootedTree*,GBT_LEN> LengthMap;
372    typedef std::set<RootedTree*>         NodeSet;
373
374    LengthMap eliminatedParentLength;
375    LengthMap addedParentLength;
376
377public:
378    void eliminate_parent_edge(RootedTree *node) {
379        rt_assert(!node->is_root_node());
380        eliminatedParentLength[node] += parentEdge(node).eliminate();
381    }
382
383    void add_parent_length(RootedTree *node, GBT_LEN addLen) {
384        rt_assert(!node->is_root_node());
385        addedParentLength[node] += addLen;
386    }
387
388    void independent_distribution() {
389        // step 2: (see caller)
390        while (!eliminatedParentLength.empty()) { // got eliminated lengths which need to be distributed
391            for (LengthMap::iterator from = eliminatedParentLength.begin(); from != eliminatedParentLength.end(); ++from) {
392                ARB_edge elimEdge = parentEdge(from->first);
393                GBT_LEN  elimLen  = from->second;
394
395                elimEdge.virtually_distribute_length(elimLen, *this);
396            }
397            eliminatedParentLength.clear(); // all distributed!
398
399            // handle special cases where distributed length is negative and results in negative destination branches.
400            // Avoid generating negative dest. branchlengths by
401            // - eliminating the dest. branch
402            // - redistributing the additional (negative) length (may cause additional negative lengths on other dest. branches)
403
404            NodeSet handled;
405            for (LengthMap::iterator to = addedParentLength.begin(); to != addedParentLength.end(); ++to) {
406                ARB_edge affectedEdge     = parentEdge(to->first);
407                GBT_LEN  additionalLen    = to->second;
408                double   effective_length = affectedEdge.length() + additionalLen;
409
410                if (effective_length<=0.0) { // negative or zero
411                    affectedEdge.set_length(effective_length);
412                    eliminate_parent_edge(to->first); // adds entry to eliminatedParentLength and causes another additional loop
413                    handled.insert(to->first);
414                }
415            }
416
417            // remove all redistributed nodes
418            for (NodeSet::iterator del = handled.begin(); del != handled.end(); ++del) {
419                addedParentLength.erase(*del);
420            }
421        }
422
423        // step 3:
424        for (LengthMap::iterator to = addedParentLength.begin(); to != addedParentLength.end(); ++to) {
425            ARB_edge affectedEdge     = parentEdge(to->first);
426            GBT_LEN  additionalLen    = to->second;
427            double   effective_length = affectedEdge.length() + additionalLen;
428
429            affectedEdge.set_length(effective_length);
430        }
431    }
432};
433
434GBT_LEN ARB_edge::adjacent_distance() const {
435    //! return length of edges starting from this->dest()
436
437    if (at_leaf()) return 0.0;
438    return next().length_or_adjacent_distance() + otherNext().length_or_adjacent_distance();
439}
440
441void ARB_edge::virtually_add_or_distribute_length_forward(GBT_LEN len, RootedTree::LengthCollector& collect) const {
442    rt_assert(!is_nan_or_inf(len));
443    if (length() > 0.0) {
444        collect.add_parent_length(son(), len);
445    }
446    else {
447        if (len != 0.0) virtually_distribute_length_forward(len, collect);
448    }
449}
450
451
452void ARB_edge::virtually_distribute_length_forward(GBT_LEN len, RootedTree::LengthCollector& collect) const {
453    /*! distribute length to edges adjacent in edge direction (i.e. edges starting from this->dest())
454     * Split 'len' proportional to adjacent edges lengths.
455     *
456     * Note: length will not be distributed to tree-struction itself (yet), but collected in 'collect'
457     */
458
459    rt_assert(is_normal(len));
460    rt_assert(!at_leaf()); // cannot forward anything (nothing beyond leafs)
461
462    ARB_edge e1 = next();
463    ARB_edge e2 = otherNext();
464
465    GBT_LEN d1 = e1.length_or_adjacent_distance();
466    GBT_LEN d2 = e2.length_or_adjacent_distance();
467
468    len = len/(d1+d2);
469
470    e1.virtually_add_or_distribute_length_forward(len*d1, collect);
471    e2.virtually_add_or_distribute_length_forward(len*d2, collect);
472}
473
474void ARB_edge::virtually_distribute_length(GBT_LEN len, RootedTree::LengthCollector& collect) const {
475    /*! distribute length to all adjacent edges.
476     * Longer edges receive more than shorter ones.
477     *
478     * Edges with length zero will not be changed, instead both edges "beyond"
479     * the edge will be affected (they will be affected equally to direct edges,
480     * because edges at multifurcations are considered to BE direct edges).
481     *
482     * Note: length will not be distributed to tree-struction itself (yet), but collected in 'collect'
483     */
484
485    ARB_edge backEdge = inverse();
486    GBT_LEN len_fwd, len_bwd;
487    {
488        GBT_LEN dist_fwd = adjacent_distance();
489        GBT_LEN dist_bwd = backEdge.adjacent_distance();
490        GBT_LEN lenW     = len/(dist_fwd+dist_bwd);
491        len_fwd          = lenW*dist_fwd;
492        len_bwd          = lenW*dist_bwd;
493
494    }
495
496    if (is_normal(len_fwd)) virtually_distribute_length_forward(len_fwd, collect);
497    if (is_normal(len_bwd)) backEdge.virtually_distribute_length_forward(len_bwd, collect);
498}
499
500void RootedTree::eliminate_and_collect(const multifurc_limits& below, LengthCollector& collect) {
501    /*! eliminate edges specified by 'below' and
502     * store their length in 'collect' for later distribution.
503     */
504    rt_assert(!is_root_node());
505    if (!is_leaf || below.applyAtLeafs) {
506        double value;
507        switch (parse_bootstrap(value)) {
508            case REMARK_NONE:
509                value = 100.0;
510                // fall-through
511            case REMARK_BOOTSTRAP:
512                if (value<below.bootstrap && get_branchlength_unrooted()<below.branchlength) {
513                    collect.eliminate_parent_edge(this);
514                }
515                break;
516
517            case REMARK_OTHER: break;
518        }
519    }
520
521    if (!is_leaf) {
522        get_leftson() ->eliminate_and_collect(below, collect);
523        get_rightson()->eliminate_and_collect(below, collect);
524    }
525}
526
527void ARB_edge::multifurcate() {
528    /*! eliminate edge and distribute length to adjacent edges
529     * - sets its length to zero
530     * - removes remark (bootstrap)
531     * - distributes length to neighbour-branches
532     */
533    RootedTree::LengthCollector collector;
534    collector.eliminate_parent_edge(son());
535    collector.independent_distribution();
536}
537void RootedTree::multifurcate() {
538    /*! eliminate branch from 'this' to 'father' (or brother @ root)
539     * @see ARB_edge::multifurcate()
540     */
541    rt_assert(father); // cannot multifurcate at root; call with son of root to multifurcate root-edge
542    if (father) parentEdge(this).multifurcate();
543}
544
545void RootedTree::set_branchlength_preserving(GBT_LEN new_len) {
546    /*! set branchlength to 'new_len' while preserving overall distance in tree.
547     *
548     * Always works on unrooted tree (i.e. lengths @ root are treated correctly).
549     * Length is preserved as in multifurcate()
550     */
551
552    GBT_LEN old_len = get_branchlength_unrooted();
553    GBT_LEN change  = new_len-old_len;
554
555    char *old_remark = nulldup(get_remark());
556
557    // distribute the negative 'change' to neighbours:
558    set_branchlength_unrooted(-change);
559    multifurcate();
560
561    set_branchlength_unrooted(new_len);
562    use_as_remark(old_remark); // restore remark (was removed by multifurcate())
563}
564
565void RootedTree::multifurcate_whole_tree(const multifurc_limits& below) {
566    /*! multifurcate all branches specified by 'below'
567     * - step 1: eliminate all branches, store eliminated lengths
568     * - step 2: calculate length distribution for all adjacent branches (proportionally to original length of each branch)
569     * - step 3: apply distributed length
570     */
571    RootedTree      *root = get_root_node();
572    LengthCollector  collector;
573
574    // step 1:
575    root->get_leftson()->eliminate_and_collect(below, collector);
576    root->get_rightson()->eliminate_and_collect(below, collector);
577    // root-edge is handled twice by the above calls.
578    // Unproblematic: first call will eliminate root-edge, so second call will do nothing.
579
580    // step 2 and 3:
581    collector.independent_distribution();
582}
583
Note: See TracBrowser for help on using the repository browser.