1 | // ============================================================= // |
---|
2 | // // |
---|
3 | // File : CT_rbtree.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // ============================================================= // |
---|
10 | |
---|
11 | // Reconstruct GBT-tree from Ntree |
---|
12 | |
---|
13 | #include "CT_ntree.hxx" |
---|
14 | #include "CT_ctree.hxx" |
---|
15 | #include <arb_sort.h> |
---|
16 | |
---|
17 | |
---|
18 | struct RB_INFO { |
---|
19 | GBT_LEN len; |
---|
20 | TreeNode *node; |
---|
21 | int percent; // branch probability [0..100] |
---|
22 | }; |
---|
23 | |
---|
24 | static int GBT_TREE_order(const TreeNode *t1, const TreeNode *t2) { |
---|
25 | // define a strict order on trees |
---|
26 | |
---|
27 | int cmp = t1->is_leaf() - t2->is_leaf(); // leafs first |
---|
28 | if (!cmp) { |
---|
29 | GBT_LEN l1 = t1->leftlen+t1->rightlen; |
---|
30 | GBT_LEN l2 = t2->leftlen+t2->rightlen; |
---|
31 | |
---|
32 | cmp = double_cmp(l1, l2); // NOW REALLY insert smaller len first |
---|
33 | // (change had no effect on test results) |
---|
34 | if (!cmp) { |
---|
35 | if (t1->is_leaf()) { |
---|
36 | cmp = strcmp(t1->name, t2->name); |
---|
37 | } |
---|
38 | else { |
---|
39 | int cll = GBT_TREE_order(t1->get_leftson(), t2->get_leftson()); |
---|
40 | int clr = GBT_TREE_order(t1->get_leftson(), t2->get_rightson()); |
---|
41 | int crl = GBT_TREE_order(t1->get_rightson(), t2->get_leftson()); |
---|
42 | int crr = GBT_TREE_order(t1->get_rightson(), t2->get_rightson()); |
---|
43 | |
---|
44 | cmp = cll+clr+crl+crr; |
---|
45 | if (!cmp) { |
---|
46 | cmp = cll; |
---|
47 | arb_assert(cmp); // order not strict enough |
---|
48 | } |
---|
49 | } |
---|
50 | } |
---|
51 | } |
---|
52 | return cmp; |
---|
53 | } |
---|
54 | |
---|
55 | static int RB_INFO_order(const void *v1, const void *v2, void *) { |
---|
56 | // defines a strict order on RB_INFOs |
---|
57 | |
---|
58 | const RB_INFO *i1 = (const RB_INFO *)v1; |
---|
59 | const RB_INFO *i2 = (const RB_INFO *)v2; |
---|
60 | |
---|
61 | int cmp = i1->percent - i2->percent; // insert more probable branches first |
---|
62 | |
---|
63 | if (!cmp) { |
---|
64 | cmp = double_cmp(i1->len, i2->len); // NOW REALLY insert smaller len first |
---|
65 | // (change slightly affected test results in "weak" tree-parts) |
---|
66 | if (!cmp) { |
---|
67 | cmp = GBT_TREE_order(i1->node, i2->node); |
---|
68 | } |
---|
69 | } |
---|
70 | return cmp; |
---|
71 | } |
---|
72 | |
---|
73 | RB_INFO *ConsensusTree::rbtree(const NT_NODE *tree, TreeRoot *root) { |
---|
74 | // doing all the work for rb_gettree() :-) |
---|
75 | // convert a Ntree into a GBT-Tree |
---|
76 | |
---|
77 | TreeNode *tnode = root->makeNode(); |
---|
78 | tnode->father = NULp; |
---|
79 | |
---|
80 | RB_INFO *info = ARB_calloc<RB_INFO>(1); |
---|
81 | info->node = tnode; // return-information |
---|
82 | info->percent = int(tree->part->get_weight()*100.0+.5); |
---|
83 | info->len = tree->part->get_len(); |
---|
84 | |
---|
85 | NSONS *nsonp = tree->son_list; |
---|
86 | if (!nsonp) { // if node is leaf |
---|
87 | int idx = tree->part->index(); |
---|
88 | |
---|
89 | tnode->name = ARB_strdup(get_species_name(idx)); |
---|
90 | tnode->markAsLeaf(); |
---|
91 | } |
---|
92 | else { |
---|
93 | arb_assert(!tnode->is_leaf()); |
---|
94 | arb_assert(!tnode->get_remark()); |
---|
95 | if (info->percent < 100) { |
---|
96 | tnode->set_bootstrap(info->percent); |
---|
97 | } |
---|
98 | |
---|
99 | int multifurc = ntree_count_sons(tree); |
---|
100 | RB_INFO *son_info[multifurc]; |
---|
101 | |
---|
102 | { |
---|
103 | int sidx = 0; |
---|
104 | while (nsonp) { |
---|
105 | son_info[sidx++] = rbtree(nsonp->node, root); |
---|
106 | nsonp = nsonp->next; |
---|
107 | } |
---|
108 | |
---|
109 | GB_sort((void**)son_info, 0, sidx, RB_INFO_order, NULp); // bring sons into strict order |
---|
110 | } |
---|
111 | |
---|
112 | while (multifurc>1) { |
---|
113 | int didx = 0; |
---|
114 | for (int sidx1 = 0; sidx1<multifurc; sidx1 += 2) { |
---|
115 | int sidx2 = sidx1+1; |
---|
116 | if (sidx2<multifurc) { |
---|
117 | TreeNode *mf; |
---|
118 | RB_INFO *sinfo; |
---|
119 | |
---|
120 | if (multifurc > 2) { |
---|
121 | mf = root->makeNode(); |
---|
122 | ARB_calloc(sinfo, 1); |
---|
123 | |
---|
124 | mf->father = NULp; |
---|
125 | |
---|
126 | sinfo->percent = 0; // branch never really occurs (artificial multifurcation in binary tree) |
---|
127 | sinfo->len = 0.0; |
---|
128 | sinfo->node = mf; |
---|
129 | } |
---|
130 | else { // last step |
---|
131 | mf = tnode; |
---|
132 | sinfo = info; |
---|
133 | } |
---|
134 | |
---|
135 | mf->leftson = son_info[sidx1]->node; |
---|
136 | mf->leftlen = son_info[sidx1]->len; |
---|
137 | |
---|
138 | mf->rightson = son_info[sidx2]->node; |
---|
139 | mf->rightlen = son_info[sidx2]->len; |
---|
140 | |
---|
141 | mf->leftson->father = mf; |
---|
142 | mf->rightson->father = mf; |
---|
143 | |
---|
144 | freenull(son_info[sidx1]); |
---|
145 | freenull(son_info[sidx2]); |
---|
146 | |
---|
147 | son_info[didx++] = sinfo; |
---|
148 | } |
---|
149 | else { |
---|
150 | son_info[didx++] = son_info[sidx1]; |
---|
151 | } |
---|
152 | } |
---|
153 | multifurc = didx; |
---|
154 | } |
---|
155 | |
---|
156 | arb_assert(multifurc == 1); |
---|
157 | arb_assert(son_info[0] == info); |
---|
158 | arb_assert(!info->node->father); |
---|
159 | } |
---|
160 | return info; |
---|
161 | } |
---|
162 | |
---|
163 | |
---|
164 | SizeAwareTree *ConsensusTree::rb_gettree(const NT_NODE *tree) { |
---|
165 | // reconstruct GBT Tree from Ntree. Ntree is not destructed afterwards! |
---|
166 | RB_INFO *info = rbtree(tree, new SizeAwareRoot); |
---|
167 | SizeAwareTree *satree = DOWNCAST(SizeAwareTree*, info->node); |
---|
168 | satree->announce_tree_constructed(); |
---|
169 | ASSERT_VALID_TREE(satree); |
---|
170 | free(info); |
---|
171 | return satree; |
---|
172 | } |
---|