| 1 | // ============================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : CT_rbtree.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ============================================================= // |
|---|
| 10 | |
|---|
| 11 | // Reconstruct GBT-tree from Ntree |
|---|
| 12 | |
|---|
| 13 | #include "CT_ntree.hxx" |
|---|
| 14 | #include "CT_ctree.hxx" |
|---|
| 15 | #include <arb_sort.h> |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | struct RB_INFO { |
|---|
| 19 | GBT_LEN len; |
|---|
| 20 | GBT_TREE *node; |
|---|
| 21 | int percent; // branch probability [0..100] |
|---|
| 22 | }; |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | #define RMSTRLEN 81 |
|---|
| 26 | |
|---|
| 27 | static char *rb_remark(const char *info, int perc, char *txt) { |
|---|
| 28 | // build a remark with the percentage representation of the partition |
|---|
| 29 | char *txt2 = (char *) getmem(RMSTRLEN); |
|---|
| 30 | sprintf(txt2, "%s%i%%", info, perc); |
|---|
| 31 | if (txt) { |
|---|
| 32 | strcat(txt, txt2); |
|---|
| 33 | free(txt2); |
|---|
| 34 | } |
|---|
| 35 | else { |
|---|
| 36 | txt = txt2; |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | return txt; |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | static int GBT_TREE_order(const GBT_TREE *t1, const GBT_TREE *t2) { |
|---|
| 43 | // define a strict order on trees |
|---|
| 44 | |
|---|
| 45 | int cmp = t1->is_leaf - t2->is_leaf; // leafs first |
|---|
| 46 | if (!cmp) { |
|---|
| 47 | GBT_LEN l1 = t1->leftlen+t1->rightlen; |
|---|
| 48 | GBT_LEN l2 = t2->leftlen+t2->rightlen; |
|---|
| 49 | |
|---|
| 50 | cmp = double_cmp(l1, l2); // insert smaller len first |
|---|
| 51 | if (!cmp) { |
|---|
| 52 | if (t1->is_leaf) { |
|---|
| 53 | cmp = strcmp(t1->name, t2->name); |
|---|
| 54 | } |
|---|
| 55 | else { |
|---|
| 56 | int cll = GBT_TREE_order(t1->leftson, t2->leftson); |
|---|
| 57 | int clr = GBT_TREE_order(t1->leftson, t2->rightson); |
|---|
| 58 | int crl = GBT_TREE_order(t1->rightson, t2->leftson); |
|---|
| 59 | int crr = GBT_TREE_order(t1->rightson, t2->rightson); |
|---|
| 60 | |
|---|
| 61 | cmp = cll+clr+crl+crr; |
|---|
| 62 | arb_assert(cmp); // order not strict enough |
|---|
| 63 | } |
|---|
| 64 | } |
|---|
| 65 | } |
|---|
| 66 | return cmp; |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | static int RB_INFO_order(const void *v1, const void *v2, void *) { |
|---|
| 70 | // defines a strict order on RB_INFOs |
|---|
| 71 | |
|---|
| 72 | const RB_INFO *i1 = (const RB_INFO *)v1; |
|---|
| 73 | const RB_INFO *i2 = (const RB_INFO *)v2; |
|---|
| 74 | |
|---|
| 75 | int cmp = i1->percent - i2->percent; // insert more probable branches first |
|---|
| 76 | |
|---|
| 77 | if (!cmp) { |
|---|
| 78 | cmp = double_cmp(i1->len, i2->len); // insert smaller len first |
|---|
| 79 | if (!cmp) { |
|---|
| 80 | cmp = GBT_TREE_order(i1->node, i2->node); |
|---|
| 81 | } |
|---|
| 82 | } |
|---|
| 83 | return cmp; |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | RB_INFO *ConsensusTree::rbtree(const NT_NODE *tree, GBT_TREE *father) { |
|---|
| 87 | // doing all the work for rb_gettree() :-) |
|---|
| 88 | // convert a Ntree into a GBT-Tree |
|---|
| 89 | |
|---|
| 90 | GBT_TREE *gbtnode = (GBT_TREE *) getmem(sizeof(GBT_TREE)); |
|---|
| 91 | gbtnode->father = father; |
|---|
| 92 | |
|---|
| 93 | RB_INFO *info = (RB_INFO *) getmem(sizeof(RB_INFO)); |
|---|
| 94 | info->node = gbtnode; // return-information |
|---|
| 95 | info->percent = int(tree->part->get_weight()*100.0+.5); |
|---|
| 96 | info->len = tree->part->get_len(); |
|---|
| 97 | |
|---|
| 98 | NSONS *nsonp = tree->son_list; |
|---|
| 99 | if (!nsonp) { // if node is leaf |
|---|
| 100 | int idx = tree->part->index(); |
|---|
| 101 | |
|---|
| 102 | gbtnode->name = strdup(get_species_name(idx)); |
|---|
| 103 | gbtnode->is_leaf = true; |
|---|
| 104 | } |
|---|
| 105 | else { |
|---|
| 106 | gbtnode->is_leaf = false; |
|---|
| 107 | if (info->percent < 100) { |
|---|
| 108 | gbtnode->remark_branch = rb_remark("", info->percent, gbtnode->remark_branch); |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | int multifurc = ntree_count_sons(tree); |
|---|
| 112 | RB_INFO *son_info[multifurc]; |
|---|
| 113 | |
|---|
| 114 | { |
|---|
| 115 | int sidx = 0; |
|---|
| 116 | while (nsonp) { |
|---|
| 117 | son_info[sidx++] = rbtree(nsonp->node, NULL); |
|---|
| 118 | nsonp = nsonp->next; |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | GB_sort((void**)son_info, 0, sidx, RB_INFO_order, NULL); // bring sons into strict order |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | while (multifurc>1) { |
|---|
| 125 | int didx = 0; |
|---|
| 126 | for (int sidx1 = 0; sidx1<multifurc; sidx1 += 2) { |
|---|
| 127 | int sidx2 = sidx1+1; |
|---|
| 128 | if (sidx2<multifurc) { |
|---|
| 129 | GBT_TREE *mf; |
|---|
| 130 | RB_INFO *sinfo; |
|---|
| 131 | |
|---|
| 132 | if (multifurc > 2) { |
|---|
| 133 | mf = (GBT_TREE *) getmem(sizeof(GBT_TREE)); |
|---|
| 134 | sinfo = (RB_INFO *) getmem(sizeof(RB_INFO)); |
|---|
| 135 | |
|---|
| 136 | mf->father = NULL; |
|---|
| 137 | |
|---|
| 138 | sinfo->percent = 0; // branch never really occurs (artificial multifurcation in binary tree) |
|---|
| 139 | sinfo->len = 0.0; |
|---|
| 140 | sinfo->node = mf; |
|---|
| 141 | } |
|---|
| 142 | else { // last step |
|---|
| 143 | mf = gbtnode; |
|---|
| 144 | sinfo = info; |
|---|
| 145 | } |
|---|
| 146 | |
|---|
| 147 | mf->leftson = son_info[sidx1]->node; |
|---|
| 148 | mf->leftlen = son_info[sidx1]->len; |
|---|
| 149 | |
|---|
| 150 | mf->rightson = son_info[sidx2]->node; |
|---|
| 151 | mf->rightlen = son_info[sidx2]->len; |
|---|
| 152 | |
|---|
| 153 | mf->leftson->father = mf; |
|---|
| 154 | mf->rightson->father = mf; |
|---|
| 155 | |
|---|
| 156 | freenull(son_info[sidx1]); |
|---|
| 157 | freenull(son_info[sidx2]); |
|---|
| 158 | |
|---|
| 159 | son_info[didx++] = sinfo; |
|---|
| 160 | } |
|---|
| 161 | else { |
|---|
| 162 | son_info[didx++] = son_info[sidx1]; |
|---|
| 163 | } |
|---|
| 164 | } |
|---|
| 165 | multifurc = didx; |
|---|
| 166 | } |
|---|
| 167 | |
|---|
| 168 | arb_assert(multifurc == 1); |
|---|
| 169 | arb_assert(son_info[0] == info); |
|---|
| 170 | arb_assert(info->node->father == father); |
|---|
| 171 | } |
|---|
| 172 | return info; |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | |
|---|
| 176 | GBT_TREE *ConsensusTree::rb_gettree(const NT_NODE *tree) { |
|---|
| 177 | // reconstruct GBT Tree from Ntree. Ntree is not destructed afterwards! |
|---|
| 178 | RB_INFO *info = rbtree(tree, NULL); |
|---|
| 179 | GBT_TREE *gbttree = info->node; |
|---|
| 180 | free(info); |
|---|
| 181 | return gbttree; |
|---|
| 182 | } |
|---|