source: tags/arb-7.0.2/CONSENSUS_TREE/CT_hash.cxx

Last change on this file was 18634, checked in by westram, 4 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.6 KB
Line 
1// ============================================================= //
2//                                                               //
3//   File      : CT_hash.cxx                                     //
4//   Purpose   :                                                 //
5//                                                               //
6//   Institute of Microbiology (Technical University Munich)     //
7//   http://www.arb-home.de/                                     //
8//                                                               //
9// ============================================================= //
10
11#include "CT_hash.hxx"
12#include "CT_ntree.hxx"
13#include "CT_ctree.hxx"
14#include <cmath>
15
16#include <arb_sort.h>
17
18PartRegistry::~PartRegistry() {
19    for (PartSet::iterator p = parts.begin(); p != parts.end(); ++p) delete *p;
20    for (PartSet::iterator p = artificial_parts.begin(); p != artificial_parts.end(); ++p) delete *p;
21    for (PartVector::iterator p = sorted.begin(); p != sorted.end(); ++p) delete *p;
22}
23
24PART *PartRegistry::get_part() {
25    /*! each call to get_part() returns the next part from the sorted PART-list.
26     * build_sorted_list() has to be called once before.
27     */
28
29    arb_assert(retrieval_phase()); // call build_sorted_list() before retrieving
30
31    PART *p = NULp;
32    if (retrieved<sorted.size()) {
33        std::swap(p, sorted[retrieved++]);
34    }
35    return p;
36}
37
38const PART *PartRegistry::peek_part(int idx) const {
39    arb_assert(idx>=0 && size_t(idx)<size());
40    arb_assert(retrieval_phase()); // call build_sorted_list() before retrieving
41
42    return sorted[idx];
43}
44
45
46void PartRegistry::put_part_from_complete_tree(PART*& part, const TreeNode *node) {
47    //! insert part in PartRegistry (destroys/reassigns part)
48
49    arb_assert(registration_phase());
50    part->standardize();
51
52    PartSet::iterator found = parts.find(part);
53
54    if (found != parts.end()) {
55        (*found)->addWeightAndLength(part);
56        delete part;
57    }
58    else {
59        part->set_origin(node);
60        parts.insert(part);
61    }
62    part = NULp;
63}
64
65void PartRegistry::put_artificial_part(PART*& part) {
66    arb_assert(registration_phase());
67    part->standardize();
68
69    PartSet::iterator found = artificial_parts.find(part);
70    if (found != artificial_parts.end()) {
71        (*found)->addWeightAndLength(part);
72        delete part;
73    }
74    else {
75        artificial_parts.insert(part);
76    }
77    part = NULp;
78}
79
80void PartRegistry::put_part_from_partial_tree(PART*& part, const PART *partialTree, const TreeNode *node) {
81    //! insert part from partialTree into PartRegistry (destroys/reassigns part)
82    //
83    // Example:
84    //
85    // 'part':                     A---B        (A=part, B=invs)
86    // set of missing species:     C
87    //
88    //                                   A
89    //                                  /
90    // assumed tree:               C---+
91    //                                  \         .
92    //                                   B
93    //
94    // inserted partitions: A---CB
95    //                      AC---B
96    //
97    // Both partitions are inserted here. Since they mutually exclude each other,
98    // only one of them can and will be inserted into the result tree.
99    //
100    // The algorithm assumes that all species missing in the partialTree,
101    // reside on the bigger side of each branch of the partialTree.
102    //
103    // Therefore the partition representing that state is added with normal weight.
104    // The opposite ("missing on smaller side of branch") is added with reduced
105    // probability.
106    //
107    // Note: The PART 'C---AB' might exists at EACH branch in the deconstructed tree
108    // and is inserted once globally (in deconstruct_partial_rootnode).
109
110    arb_assert(registration_phase());
111
112    PART *invs = part->clone();
113    invs->invertInSuperset(partialTree);
114
115    int diff = part->get_members() - invs->get_members();
116    if (diff != 0) {
117        if (diff<0) std::swap(part, invs);
118        // now part contains bigger partition
119
120        // Note: adding 'part' means "add unknown species to smaller side of the partition"
121        int edges_in_part = leafs_2_edges(part->get_members()+1, ROOTED);
122        int edges_in_invs = leafs_2_edges(invs->get_members()+1, ROOTED);
123
124        double part_prob = double(edges_in_invs)/edges_in_part; // probability for one unkown species to reside inside smaller set
125        arb_assert(part_prob>0.0 && part_prob<1.0);
126
127        int    unknownCount     = partialTree->get_nonmembers();
128        double all_in_part_prob = pow(part_prob, unknownCount); // probability ALL unkown species reside inside smaller set
129
130        part->set_faked_weight(all_in_part_prob); // make "unknown in smaller" unlikely
131                                                  // -> reduces chance to be picked and bootstrap IF picked
132    }
133    // else, if both partitions have equal size -> simply add both
134
135    put_part_from_complete_tree(part, node); // inserts 'A---CB' (i.e. missing species added to invs, which is smaller)
136    put_part_from_complete_tree(invs, node); // inserts 'B---CA' (i.e. missing species added to part)
137}
138
139inline bool insertionOrder_less(const PART *p1, const PART *p2) {
140    return p1->insertionOrder_cmp(p2)<0;
141}
142
143void PartRegistry::build_sorted_list(double overall_weight) {
144    //! sort the parts into insertion order.
145
146    arb_assert(!parts.empty()); // nothing has been added
147    arb_assert(registration_phase());
148
149    merge_artificial_parts();
150
151    copy(parts.begin(), parts.end(), std::back_insert_iterator<PartVector>(sorted));
152    sort(sorted.begin(), sorted.end(), insertionOrder_less);
153
154    parts.clear();
155
156    for (PartVector::iterator pi = sorted.begin(); pi != sorted.end(); ++pi) {
157        (*pi)->takeMean(overall_weight);
158    }
159
160    arb_assert(retrieval_phase());
161}
162
163void PartRegistry::merge_artificial_parts() {
164    /*! 'artificial_parts' contains those partitions that did not occur in
165     * one PARTIAL input tree.
166     *
167     * Not-yet-registered artificial_parts will be registered here.
168     * Their weight is faked to zero (aka "never occurs").
169     *
170     * Already registered artificial_parts are only skipped -> their bootstrap
171     * will be set correctly (just like when the branch as such does not exist
172     * in a non-partial tree).
173     */
174
175    for (PartSet::iterator arti = artificial_parts.begin(); arti != artificial_parts.end(); ++arti) {
176        if (parts.find(*arti) == parts.end()) {
177            // partition was not added by any known edge -> add plain artificial edge
178            (*arti)->set_faked_weight(0.0); // never occurs (implicates length=1.0; see get_len())
179            parts.insert(*arti);
180        }
181        else {
182            delete *arti;
183        }
184    }
185    artificial_parts.clear();
186}
187
Note: See TracBrowser for help on using the repository browser.