source: branches/stable/CONSENSUS_TREE/CT_ctree.cxx

Last change on this file was 18634, checked in by westram, 3 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1// ============================================================= //
2//                                                               //
3//   File      : CT_ctree.cxx                                    //
4//   Purpose   : consensus tree                                  //
5//                                                               //
6//   Institute of Microbiology (Technical University Munich)     //
7//   http://www.arb-home.de/                                     //
8//                                                               //
9// ============================================================= //
10
11#include "CT_ctree.hxx"
12#include "CT_hash.hxx"
13#include "CT_ntree.hxx"
14#include <arb_strbuf.h>
15
16SpeciesSpace::SpeciesSpace(const CharPtrArray& names_) :
17    names(names_),
18    Name_hash(NULp),
19    psize(NULp)
20{
21    // names = leafnames (=species names)
22
23    int leaf_count = names.size();
24    Name_hash = GBS_create_hash(leaf_count, GB_MIND_CASE);
25    for (int i=0; i< leaf_count; i++) {
26        GBS_write_hash(Name_hash, names[i], i+1);
27    }
28
29    psize      = new PartitionSize(leaf_count);
30    allSpecies = psize->create_root();
31}
32
33SpeciesSpace::~SpeciesSpace() {
34    delete allSpecies;
35    delete psize;
36
37    if (Name_hash) {
38        GBS_free_hash(Name_hash);
39        Name_hash  = NULp;
40    }
41}
42
43// ---------------------------
44//      DeconstructedTree
45
46DeconstructedTree::DeconstructedTree(const SpeciesSpace& specSpace_) :
47    specSpace(specSpace_),
48    overall_weight(0)
49{
50    registry = new PartRegistry;
51}
52DeconstructedTree::~DeconstructedTree() {
53    delete registry;
54}
55void DeconstructedTree::start_sorted_retrieval() { registry->build_sorted_list(overall_weight); }
56size_t DeconstructedTree::get_part_count() const { return registry->size(); }
57PART *DeconstructedTree::get_part() { return registry->get_part(); }
58const PART *DeconstructedTree::peek_part(int idx) const { return registry->peek_part(idx); }
59
60const PART *DeconstructedTree::part_with_origin(const TreeNode *node) const {
61    arb_assert(node);
62    size_t parts = get_part_count();
63    for (size_t i = 0; i<parts; ++i) {
64        const PART *p = peek_part(i);
65        if (p->get_origin() == node) {
66            return p;
67        }
68    }
69    return NULp;
70}
71const PART *DeconstructedTree::find_part(const TreeNode *node) const {
72    const PART *found = part_with_origin(node);
73    if (!found && node->is_son_of_root()) {
74        found = part_with_origin(node->get_brother());
75    }
76    return found;
77}
78
79const PART *DeconstructedTree::find_innermost_part() const {
80    int         best_dist = INT_MAX;
81    const PART *best_part = NULp;
82
83    size_t parts = get_part_count();
84    for (size_t p = 0; p<parts; ++p) {
85        const PART *part = peek_part(p);
86        int         dist = part->distance_to_tree_center();
87
88        if (dist<best_dist) {
89            best_dist = dist;
90            best_part = part;
91        }
92    }
93
94    arb_assert(best_part);
95    return best_part;
96}
97// -----------------------
98//      ConsensusTree
99
100ConsensusTree::ConsensusTree(const CharPtrArray& names_) :
101    SpeciesSpace(names_),
102    DeconstructedTree(static_cast<const SpeciesSpace&>(*this))
103{
104}
105
106GB_ERROR DeconstructedTree::deconstruct_weighted(const TreeNode *tree, const PART *wholeTree, int leafs, double weight, bool provideProgress, const PART *allSpecies, DeconstructionMode mode) {
107    // inserts a tree into the PartRegistry.
108    arb_assert(GBT_count_leafs(tree) == size_t(leafs));
109
110    overall_weight += weight;
111
112    if (wholeTree->get_members() != leafs) {
113        arb_assert(wholeTree->get_members() < leafs);
114        return "tree contains duplicated species";
115    }
116
117    arb_progress *insertProgress = NULp;
118    if (provideProgress) {
119        long nodeCount = leafs_2_nodes(wholeTree->get_members(), ROOTED);
120        insertProgress = new arb_progress(nodeCount);
121    }
122
123    bool contains_all_species   = wholeTree->equals(allSpecies);
124    bool handle_partial_as_full = mode == DMODE_ROOTSYNC;
125
126    if (contains_all_species || handle_partial_as_full) {
127        deconstruct_full_rootnode(tree, weight, insertProgress);
128    }
129    else {
130        deconstruct_partial_rootnode(tree, weight, wholeTree, insertProgress);
131    }
132
133    GB_ERROR error = NULp;
134    if (provideProgress) {
135        error = insertProgress->error_if_aborted();
136
137        delete insertProgress;
138        insertProgress = NULp;
139    }
140    arb_assert(!insertProgress);
141
142    return error;
143}
144
145GB_ERROR ConsensusTree::insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress) {
146    PART_Ptr wholeTree(create_tree_PART(tree, 0.0)); // weight does not matter here
147    return deconstruct_weighted(tree, &*wholeTree, leafs, weight, provideProgress, get_allSpecies(), DMODE_CONSENSUS_TREE);
148}
149
150SizeAwareTree *ConsensusTree::get_consensus_tree(GB_ERROR& error) {
151    // Get new consensus-tree -> SizeAwareTree
152
153    /* This function is little bit tricky:
154       the root-partition consist of 111....111 so it must have two sons
155       that represent the same partition son1 == ~son2 to do this we must split
156       the fist son-partition in two parts through logical calculation there
157       could only be one son! */
158
159    arb_assert(!error);
160    ntree_init(get_PartitionSize());
161
162    start_sorted_retrieval();
163
164    arb_progress progress(get_part_count()+2);
165    {
166#if defined(DUMP_PART_INSERTION)
167        PART::start_pretty_printing(get_names());
168#endif
169
170        PART *p = get_part();
171        while (p && !progress.aborted()) {
172            insert_ntree(p);
173            ++progress;
174            p = get_part();
175        }
176
177#if defined(DUMP_PART_INSERTION)
178        PART::stop_pretty_printing();
179#endif
180    }
181
182    SizeAwareTree *result_tree = NULp;
183
184    error = progress.error_if_aborted();
185    if (!error) {
186        const NT_NODE *n = ntree_get();
187
188        arb_assert(ntree_count_sons(n) == 2);
189
190#if defined(NTREE_DEBUG_FUNCTIONS)
191        arb_assert(is_well_formed(n));
192#endif
193
194        result_tree = rb_gettree(n);
195
196        ++progress;
197
198        result_tree->get_tree_root()->find_innermost_edge().set_root();
199        result_tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM);
200
201        ++progress;
202    }
203
204    ntree_cleanup();
205    arb_assert(contradicted(result_tree, error));
206    return result_tree;
207}
208
209
210char *ConsensusTreeBuilder::get_tree_remark() const {
211    GBS_strstruct remark(1000);
212    {
213        char *build_info = GBS_global_string_copy("ARB consensus tree build from %zu trees:", get_tree_count());
214        char *dated      = GBS_log_action_to(NULp, build_info, true);
215        remark.cat(dated);
216        free(dated);
217        free(build_info);
218    }
219
220    size_t allCount = get_species_count();
221
222    size_t maxnamelen = 0;
223    for (size_t t = 0; t<get_tree_count(); ++t) {
224        maxnamelen = std::max(maxnamelen, strlen(get_tree_info(t).name()));
225    }
226    for (size_t t = 0; t<get_tree_count(); ++t) {
227        const TreeInfo& tree = get_tree_info(t);
228        remark.cat(" - ");
229        remark.nprintf(maxnamelen, "%-*s", int(maxnamelen), tree.name());
230        if (tree.species_count()<allCount) {
231            double pc = tree.species_count() / double(allCount);
232            remark.nprintf(50, " (%.1f%%; %zu/%zu)", pc*100.0, tree.species_count(), allCount);
233        }
234        remark.put('\n');
235    }
236
237    return remark.release();
238}
239
240
241
Note: See TracBrowser for help on using the repository browser.