source: tags/testbuild/CONSENSUS_TREE/CT_ctree.hxx

Last change on this file was 11401, checked in by westram, 10 years ago
  • reintegrates 'tree' into 'trunk':
    • consensus trees:
      • support for merging partial trees ("worked" before, but results were crap; implements #65)
      • generated trees are automatically re-rooted and -ordered
      • always list source trees in consensus-tree-comment; show info about partial trees
      • fixed progress bar
    • made GBT_TREE a base class of other tree classes (implements #31)
    • save tree properties in properties (not in DB)
    • new functions 'Remove zombies/marked from ALL trees'
    • tree load/save: layout fixes
    • unit tests
      • added tests for basic tree modifications (PARSIMONY)
    • performance:
      • compute_tree updates tree information in one traversal
      • tree generators are now capable to generate any type of tree (w/o needing to copy it once)
    • bugfixes:
      • NNI (of marked species) was also always performed for colored species
      • centered beautify-order is stable now
      • improved 'search optimal root'
  • adds:
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1// ============================================================= //
2//                                                               //
3//   File      : CT_ctree.hxx                                    //
4//   Purpose   : interface of CONSENSUS_TREE library             //
5//                                                               //
6//   Institute of Microbiology (Technical University Munich)     //
7//   http://www.arb-home.de/                                     //
8//                                                               //
9// ============================================================= //
10
11#ifndef CT_CTREE_HXX
12#define CT_CTREE_HXX
13
14#ifndef ARBTOOLS_H
15#include <arbtools.h>
16#endif
17#ifndef ARBDBT_H
18#include <arbdbt.h>
19#endif
20#ifndef ARB_STRARRAY_H
21#include <arb_strarray.h>
22#endif
23#ifndef _GLIBCXX_MAP
24#include <map>
25#endif
26#ifndef _GLIBCXX_VECTOR
27#include <vector>
28#endif
29#ifndef _GLIBCXX_STRING
30#include <string>
31#endif
32#ifndef ROOTEDTREE_H
33#include <RootedTree.h>
34#endif
35#ifndef ARB_PROGRESS_H
36#include <arb_progress.h>
37#endif
38
39class  PART;
40struct NT_NODE;
41class  PartitionSize;
42class  PartRegistry;
43class  RB_INFO;
44
45// -----------------------
46//      SizeAwareTree
47
48class SizeAwareTree : public RootedTree {
49    // simple size-aware tree
50    unsigned leaf_count;
51public:
52    SizeAwareTree(TreeRoot *root) : RootedTree(root) {}
53    unsigned get_leaf_count() const OVERRIDE {
54        return leaf_count;
55    }
56    void compute_tree() OVERRIDE {
57        if (is_leaf) {
58            leaf_count = 1;
59        }
60        else {
61            get_leftson()->compute_tree();
62            get_rightson()->compute_tree();
63            leaf_count = get_leftson()->get_leaf_count()+get_rightson()->get_leaf_count();
64        }
65    }
66};
67
68struct SizeAwareNodeFactory : public RootedTreeNodeFactory {
69    RootedTree *makeNode(TreeRoot *root) const OVERRIDE { return new SizeAwareTree(root); }
70};
71
72// -----------------------
73//      ConsensusTree
74
75class ConsensusTree : virtual Noncopyable {
76    double               overall_weight;
77    GB_HASH             *Name_hash;
78    PartitionSize       *size;
79    PART                *allSpecies;
80    PartRegistry        *registry;
81    const CharPtrArray&  names;
82
83    arb_progress *insertProgress;
84
85    PART *deconstruct_full_subtree(const GBT_TREE *tree, const GBT_LEN& len, const double& weight);
86    PART *deconstruct_partial_subtree(const GBT_TREE *tree, const GBT_LEN& len, const double& weight, const PART *partialTree);
87
88    void deconstruct_full_rootnode(const GBT_TREE *tree, const double& weight);
89    void deconstruct_partial_rootnode(const GBT_TREE *tree, const double& weight, const PART *partialTree);
90
91    int get_species_index(const char *name) const {
92        int idx = GBS_read_hash(Name_hash, name);
93        arb_assert(idx>0); // given 'name' is unknown
94        return idx-1;
95    }
96
97    const char *get_species_name(int idx) const {
98        return names[idx];
99    }
100
101    struct RB_INFO *rbtree(const NT_NODE *tree, TreeRoot *root);
102    SizeAwareTree  *rb_gettree(const NT_NODE *tree);
103
104    void add_tree_to_PART(const GBT_TREE *tree, PART& part) const;
105    PART *create_tree_PART(const GBT_TREE *tree, const double& weight) const;
106
107    void inc_insert_progress() { if (insertProgress) ++(*insertProgress); }
108
109public:
110    ConsensusTree(const CharPtrArray& names_);
111    ~ConsensusTree();
112
113    __ATTR__USERESULT GB_ERROR insert_tree_weighted(const GBT_TREE *tree, int leafs, double weight, bool provideProgress);
114
115    SizeAwareTree *get_consensus_tree(GB_ERROR& error);
116};
117
118// ------------------------------
119//      ConsensusTreeBuilder
120
121class TreeInfo {
122    std::string Name;
123    size_t      specCount;
124
125public:
126    explicit TreeInfo(const char *Name_, size_t specCount_)
127        : Name(Name_),
128          specCount(specCount_)
129    {
130    }
131
132    const char *name() const { return Name.c_str(); }
133    size_t species_count() const { return specCount; }
134};
135
136class ConsensusTreeBuilder {
137    // wrapper for ConsensusTree
138    // - automatically collects species occurring in added trees (has to be done by caller of ConsensusTree)
139    // - uses much more memory than using ConsensusTree directly, since it stores all added trees
140    //
141    // Not helpful for consensing thousands of trees like bootstrapping does
142
143    typedef std::map<std::string, size_t> OccurCount;
144    typedef std::vector<SizeAwareTree*>   Trees;
145    typedef std::vector<double>           Weights;
146
147    OccurCount species_occurred;
148    Trees      trees;
149    Weights    weights;
150
151    std::vector<TreeInfo> tree_info;
152
153public:
154    ~ConsensusTreeBuilder() {
155        for (size_t i = 0; i<trees.size(); ++i) {
156            delete trees[i];
157        }
158    }
159
160    void add(SizeAwareTree*& tree, const char *treename, double weight) {
161        tree->get_tree_root()->find_innermost_edge().set_root();
162
163        // (currently) reordering trees before deconstructing no longer
164        // affects the resulting consense tree (as performed as unit tests).
165        //
166        // tree->reorder_tree(BIG_BRANCHES_TO_TOP);
167        // tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM);
168        // tree->reorder_tree(BIG_BRANCHES_TO_EDGE);
169
170        trees.push_back(tree);
171        weights.push_back(weight);
172
173        size_t   name_count;
174        GB_CSTR *names = GBT_get_names_of_species_in_tree(tree, &name_count);
175
176        for (size_t n = 0; n<name_count; ++n) {
177            const char *name = names[n];
178            if (species_occurred.find(name) == species_occurred.end()) {
179                species_occurred[name] = 1;
180            }
181            else {
182                species_occurred[name]++;
183            }
184        }
185
186        tree_info.push_back(TreeInfo(treename, name_count));
187
188        free(names);
189        tree = NULL;
190    }
191
192    SizeAwareTree *get(size_t& different_species, GB_ERROR& error) {
193        arb_assert(!error);
194
195        ConstStrArray species_names;
196        for (OccurCount::iterator s = species_occurred.begin(); s != species_occurred.end(); ++s) {
197            species_names.put(s->first.c_str());
198        }
199
200        different_species = species_occurred.size();
201        ConsensusTree ctree(species_names);
202        {
203            arb_progress deconstruct("deconstructing", trees.size());
204
205            for (size_t i = 0; i<trees.size() && !error; ++i) {
206                error = ctree.insert_tree_weighted(trees[i], tree_info[i].species_count(), weights[i], true);
207                ++deconstruct;
208                if (error) {
209                    error = GBS_global_string("Failed to deconstruct '%s' (Reason: %s)", tree_info[i].name(), error);
210                }
211                else {
212                    error = deconstruct.error_if_aborted();
213                }
214            }
215            if (error) deconstruct.done();
216        }
217
218        if (error) return NULL;
219
220#if defined(DEBUG)
221        // if class ConsensusTree does depend on any local data,
222        // instanciating another instance will interfere:
223        ConsensusTree influence(species_names);
224#endif
225
226        arb_progress reconstruct("reconstructing");
227        return ctree.get_consensus_tree(error);
228    }
229
230    char *get_remark() const;
231    size_t species_count() const { return species_occurred.size(); }
232};
233
234
235#else
236#error CT_ctree.hxx included twice
237#endif // CT_CTREE_HXX
Note: See TracBrowser for help on using the repository browser.