1 | // ============================================================= // |
---|
2 | // // |
---|
3 | // File : CT_ctree.hxx // |
---|
4 | // Purpose : interface to consensus tree calculation // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // ============================================================= // |
---|
10 | |
---|
11 | #ifndef CT_CTREE_HXX |
---|
12 | #define CT_CTREE_HXX |
---|
13 | |
---|
14 | #ifndef CT_COMMON_HXX |
---|
15 | #include <CT_common.hxx> |
---|
16 | #endif |
---|
17 | #ifndef ARBTOOLS_H |
---|
18 | #include <arbtools.h> |
---|
19 | #endif |
---|
20 | #ifndef ARBDBT_H |
---|
21 | #include <arbdbt.h> |
---|
22 | #endif |
---|
23 | #ifndef _GLIBCXX_STRING |
---|
24 | #include <string> |
---|
25 | #endif |
---|
26 | #ifndef TRIANGULAR_H |
---|
27 | #include <triangular.h> |
---|
28 | #endif |
---|
29 | |
---|
30 | struct NT_NODE; |
---|
31 | class RB_INFO; |
---|
32 | |
---|
33 | // ----------------------- |
---|
34 | // ConsensusTree |
---|
35 | // |
---|
36 | // (Note: used directly from DIST/DI_matr.cxx) |
---|
37 | |
---|
38 | class ConsensusTree : public SpeciesSpace, public DeconstructedTree { // derived from Noncopyable |
---|
39 | struct RB_INFO *rbtree(const NT_NODE *tree, TreeRoot *root); |
---|
40 | SizeAwareTree *rb_gettree(const NT_NODE *tree); |
---|
41 | |
---|
42 | public: |
---|
43 | ConsensusTree(const CharPtrArray& names_); |
---|
44 | |
---|
45 | __ATTR__USERESULT GB_ERROR insert_tree_weighted(const TreeNode *tree, int leafs, double weight, bool provideProgress); |
---|
46 | |
---|
47 | SizeAwareTree *get_consensus_tree(GB_ERROR& error); |
---|
48 | }; |
---|
49 | |
---|
50 | // ------------------------------ |
---|
51 | // ConsensusTreeBuilder |
---|
52 | |
---|
53 | class ConsensusTreeBuilder : public TreeContainer { |
---|
54 | // wrapper for ConsensusTree |
---|
55 | // - automatically collects species occurring in added trees (has to be done by caller of ConsensusTree) |
---|
56 | // - uses much more memory than using ConsensusTree directly, since it stores all added trees |
---|
57 | // |
---|
58 | // Not helpful for consensing thousands of trees like bootstrapping does |
---|
59 | |
---|
60 | typedef std::vector<double> Weights; |
---|
61 | |
---|
62 | Weights weights; |
---|
63 | |
---|
64 | public: |
---|
65 | void add(SizeAwareTree*& tree, const char *treename, double weight) { |
---|
66 | tree->get_tree_root()->find_innermost_edge().set_root(); // only wanted when building consensus tree |
---|
67 | |
---|
68 | // (currently) reordering trees before deconstructing no longer |
---|
69 | // affects the resulting consense tree (as performed as unit tests). |
---|
70 | // |
---|
71 | // tree->reorder_tree(BIG_BRANCHES_TO_TOP); |
---|
72 | // tree->reorder_tree(BIG_BRANCHES_TO_BOTTOM); |
---|
73 | // tree->reorder_tree(BIG_BRANCHES_TO_EDGE); |
---|
74 | |
---|
75 | TreeContainer::add(tree, treename); |
---|
76 | weights.push_back(weight); |
---|
77 | } |
---|
78 | |
---|
79 | SizeAwareTree *get(size_t& different_species, GB_ERROR& error) { |
---|
80 | arb_assert(!error); |
---|
81 | |
---|
82 | ConstStrArray species_names; |
---|
83 | get_species_names(species_names); |
---|
84 | |
---|
85 | different_species = get_species_count(); |
---|
86 | |
---|
87 | double phase1_fraction; |
---|
88 | { |
---|
89 | size_t source_species_count = 0; // number of species added (all source trees; species counted once for each tree) |
---|
90 | for (size_t i = 0; i<get_tree_count();++i) { |
---|
91 | const TreeInfo& treeInfo = get_tree_info(i); |
---|
92 | source_species_count += treeInfo.species_count(); |
---|
93 | } |
---|
94 | size_t target_edge_count = leafs_2_edges(different_species, UNROOTED); |
---|
95 | |
---|
96 | size_t common_species_count = source_species_count-different_species; |
---|
97 | size_t noncommon_species_count = source_species_count-common_species_count; |
---|
98 | |
---|
99 | double O_deconstruct = 3.0*noncommon_species_count*noncommon_species_count + target_edge_count*target_edge_count; |
---|
100 | double O_reconstruct = (10.0*noncommon_species_count + target_edge_count) * 1.0e7; |
---|
101 | phase1_fraction = O_deconstruct / (O_deconstruct+O_reconstruct); |
---|
102 | } |
---|
103 | |
---|
104 | arb_progress progress(WEIGHTED, phase1_fraction); |
---|
105 | |
---|
106 | ConsensusTree ctree(species_names); |
---|
107 | { |
---|
108 | arb_progress deconstruct("deconstructing", get_tree_count()); |
---|
109 | |
---|
110 | for (size_t i = 0; i<get_tree_count() && !error; ++i) { |
---|
111 | const TreeInfo& treeInfo = get_tree_info(i); |
---|
112 | const SizeAwareTree *tree = get_tree(i); |
---|
113 | |
---|
114 | error = ctree.insert_tree_weighted(tree, treeInfo.species_count(), weights[i], true); |
---|
115 | ++deconstruct; |
---|
116 | if (error) { |
---|
117 | error = GBS_global_string("Failed to deconstruct '%s' (Reason: %s)", treeInfo.name(), error); |
---|
118 | } |
---|
119 | else { |
---|
120 | error = deconstruct.error_if_aborted(); |
---|
121 | } |
---|
122 | } |
---|
123 | if (error) deconstruct.done(); |
---|
124 | ++progress; |
---|
125 | } |
---|
126 | |
---|
127 | if (error) return NULp; |
---|
128 | |
---|
129 | #if defined(DEBUG) |
---|
130 | // if class ConsensusTree does depend on any local data, |
---|
131 | // instanciating another instance will interfere: |
---|
132 | ConsensusTree influence(species_names); |
---|
133 | #endif |
---|
134 | |
---|
135 | { |
---|
136 | arb_progress reconstruct("reconstructing"); |
---|
137 | SizeAwareTree *sat = ctree.get_consensus_tree(error); |
---|
138 | ++progress; |
---|
139 | return sat; |
---|
140 | } |
---|
141 | } |
---|
142 | |
---|
143 | char *get_tree_remark() const; |
---|
144 | }; |
---|
145 | |
---|
146 | |
---|
147 | #else |
---|
148 | #error CT_ctree.hxx included twice |
---|
149 | #endif // CT_CTREE_HXX |
---|