| 1 | // ============================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : CT_rbtree.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ============================================================= // |
|---|
| 10 | |
|---|
| 11 | // Reconstruct GBT-tree from Ntree |
|---|
| 12 | |
|---|
| 13 | #include "CT_ntree.hxx" |
|---|
| 14 | #include "CT_ctree.hxx" |
|---|
| 15 | #include <arb_sort.h> |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | struct RB_INFO { |
|---|
| 19 | GBT_LEN len; |
|---|
| 20 | TreeNode *node; |
|---|
| 21 | int percent; // branch probability [0..100] |
|---|
| 22 | }; |
|---|
| 23 | |
|---|
| 24 | static int GBT_TREE_order(const TreeNode *t1, const TreeNode *t2) { |
|---|
| 25 | // define a strict order on trees |
|---|
| 26 | |
|---|
| 27 | int cmp = t1->is_leaf() - t2->is_leaf(); // leafs first |
|---|
| 28 | if (!cmp) { |
|---|
| 29 | GBT_LEN l1 = t1->leftlen+t1->rightlen; |
|---|
| 30 | GBT_LEN l2 = t2->leftlen+t2->rightlen; |
|---|
| 31 | |
|---|
| 32 | cmp = double_cmp(l1, l2); // NOW REALLY insert smaller len first |
|---|
| 33 | // (change had no effect on test results) |
|---|
| 34 | if (!cmp) { |
|---|
| 35 | if (t1->is_leaf()) { |
|---|
| 36 | cmp = strcmp(t1->name, t2->name); |
|---|
| 37 | } |
|---|
| 38 | else { |
|---|
| 39 | int cll = GBT_TREE_order(t1->get_leftson(), t2->get_leftson()); |
|---|
| 40 | int clr = GBT_TREE_order(t1->get_leftson(), t2->get_rightson()); |
|---|
| 41 | int crl = GBT_TREE_order(t1->get_rightson(), t2->get_leftson()); |
|---|
| 42 | int crr = GBT_TREE_order(t1->get_rightson(), t2->get_rightson()); |
|---|
| 43 | |
|---|
| 44 | cmp = cll+clr+crl+crr; |
|---|
| 45 | if (!cmp) { |
|---|
| 46 | cmp = cll; |
|---|
| 47 | arb_assert(cmp); // order not strict enough |
|---|
| 48 | } |
|---|
| 49 | } |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | return cmp; |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | static int RB_INFO_order(const void *v1, const void *v2, void *) { |
|---|
| 56 | // defines a strict order on RB_INFOs |
|---|
| 57 | |
|---|
| 58 | const RB_INFO *i1 = (const RB_INFO *)v1; |
|---|
| 59 | const RB_INFO *i2 = (const RB_INFO *)v2; |
|---|
| 60 | |
|---|
| 61 | int cmp = i1->percent - i2->percent; // insert more probable branches first |
|---|
| 62 | |
|---|
| 63 | if (!cmp) { |
|---|
| 64 | cmp = double_cmp(i1->len, i2->len); // NOW REALLY insert smaller len first |
|---|
| 65 | // (change slightly affected test results in "weak" tree-parts) |
|---|
| 66 | if (!cmp) { |
|---|
| 67 | cmp = GBT_TREE_order(i1->node, i2->node); |
|---|
| 68 | } |
|---|
| 69 | } |
|---|
| 70 | return cmp; |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | RB_INFO *ConsensusTree::rbtree(const NT_NODE *tree, TreeRoot *root) { |
|---|
| 74 | // doing all the work for rb_gettree() :-) |
|---|
| 75 | // convert a Ntree into a GBT-Tree |
|---|
| 76 | |
|---|
| 77 | TreeNode *tnode = root->makeNode(); |
|---|
| 78 | tnode->father = NULp; |
|---|
| 79 | |
|---|
| 80 | RB_INFO *info = ARB_calloc<RB_INFO>(1); |
|---|
| 81 | info->node = tnode; // return-information |
|---|
| 82 | info->percent = int(tree->part->get_weight()*100.0+.5); |
|---|
| 83 | info->len = tree->part->get_len(); |
|---|
| 84 | |
|---|
| 85 | NSONS *nsonp = tree->son_list; |
|---|
| 86 | if (!nsonp) { // if node is leaf |
|---|
| 87 | int idx = tree->part->index(); |
|---|
| 88 | |
|---|
| 89 | tnode->name = ARB_strdup(get_species_name(idx)); |
|---|
| 90 | tnode->markAsLeaf(); |
|---|
| 91 | } |
|---|
| 92 | else { |
|---|
| 93 | arb_assert(!tnode->is_leaf()); |
|---|
| 94 | arb_assert(!tnode->get_remark()); |
|---|
| 95 | if (info->percent < 100) { |
|---|
| 96 | tnode->set_bootstrap(info->percent); |
|---|
| 97 | } |
|---|
| 98 | |
|---|
| 99 | int multifurc = ntree_count_sons(tree); |
|---|
| 100 | RB_INFO *son_info[multifurc]; |
|---|
| 101 | |
|---|
| 102 | { |
|---|
| 103 | int sidx = 0; |
|---|
| 104 | while (nsonp) { |
|---|
| 105 | son_info[sidx++] = rbtree(nsonp->node, root); |
|---|
| 106 | nsonp = nsonp->next; |
|---|
| 107 | } |
|---|
| 108 | |
|---|
| 109 | GB_sort((void**)son_info, 0, sidx, RB_INFO_order, NULp); // bring sons into strict order |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | while (multifurc>1) { |
|---|
| 113 | int didx = 0; |
|---|
| 114 | for (int sidx1 = 0; sidx1<multifurc; sidx1 += 2) { |
|---|
| 115 | int sidx2 = sidx1+1; |
|---|
| 116 | if (sidx2<multifurc) { |
|---|
| 117 | TreeNode *mf; |
|---|
| 118 | RB_INFO *sinfo; |
|---|
| 119 | |
|---|
| 120 | if (multifurc > 2) { |
|---|
| 121 | mf = root->makeNode(); |
|---|
| 122 | ARB_calloc(sinfo, 1); |
|---|
| 123 | |
|---|
| 124 | mf->father = NULp; |
|---|
| 125 | |
|---|
| 126 | sinfo->percent = 0; // branch never really occurs (artificial multifurcation in binary tree) |
|---|
| 127 | sinfo->len = 0.0; |
|---|
| 128 | sinfo->node = mf; |
|---|
| 129 | } |
|---|
| 130 | else { // last step |
|---|
| 131 | mf = tnode; |
|---|
| 132 | sinfo = info; |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | mf->leftson = son_info[sidx1]->node; |
|---|
| 136 | mf->leftlen = son_info[sidx1]->len; |
|---|
| 137 | |
|---|
| 138 | mf->rightson = son_info[sidx2]->node; |
|---|
| 139 | mf->rightlen = son_info[sidx2]->len; |
|---|
| 140 | |
|---|
| 141 | mf->leftson->father = mf; |
|---|
| 142 | mf->rightson->father = mf; |
|---|
| 143 | |
|---|
| 144 | freenull(son_info[sidx1]); |
|---|
| 145 | freenull(son_info[sidx2]); |
|---|
| 146 | |
|---|
| 147 | son_info[didx++] = sinfo; |
|---|
| 148 | } |
|---|
| 149 | else { |
|---|
| 150 | son_info[didx++] = son_info[sidx1]; |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | multifurc = didx; |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | arb_assert(multifurc == 1); |
|---|
| 157 | arb_assert(son_info[0] == info); |
|---|
| 158 | arb_assert(!info->node->father); |
|---|
| 159 | } |
|---|
| 160 | return info; |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | SizeAwareTree *ConsensusTree::rb_gettree(const NT_NODE *tree) { |
|---|
| 165 | // reconstruct GBT Tree from Ntree. Ntree is not destructed afterwards! |
|---|
| 166 | RB_INFO *info = rbtree(tree, new SizeAwareRoot); |
|---|
| 167 | SizeAwareTree *satree = DOWNCAST(SizeAwareTree*, info->node); |
|---|
| 168 | satree->announce_tree_constructed(); |
|---|
| 169 | ASSERT_VALID_TREE(satree); |
|---|
| 170 | free(info); |
|---|
| 171 | return satree; |
|---|
| 172 | } |
|---|