source: branches/profile/CONSENSUS_TREE/CT_hash.cxx

Last change on this file was 11488, checked in by westram, 10 years ago
  • reintegrates 'tree' into 'trunk'
    • implements #417 (multifurcate tree)
    • tree display
      • adds MULTIFURC MODE
      • reordered modes (synchronizes NTREE and PARSIMONY)
    • branch analysis
      • display number of multifurcations in 'mark long branches'
      • display "in-tree-distance" and "per-species-distance"
    • added function to toggle '100%' bootstraps
    • document bug in GBT_remove_leafs (#452)
  • adds:
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.3 KB
Line 
1// ============================================================= //
2//                                                               //
3//   File      : CT_hash.cxx                                     //
4//   Purpose   :                                                 //
5//                                                               //
6//   Institute of Microbiology (Technical University Munich)     //
7//   http://www.arb-home.de/                                     //
8//                                                               //
9// ============================================================= //
10
11#include "CT_hash.hxx"
12#include "CT_ntree.hxx"
13#include "CT_ctree.hxx"
14#include <cmath>
15
16#include <arb_sort.h>
17
18PartRegistry::~PartRegistry() {
19    for (PartSet::iterator p = parts.begin(); p != parts.end(); ++p) delete *p;
20    for (PartSet::iterator p = artificial_parts.begin(); p != artificial_parts.end(); ++p) delete *p;
21    for (PartVector::iterator p = sorted.begin(); p != sorted.end(); ++p) delete *p;
22}
23
24PART *PartRegistry::get_part() {
25    /*! each call to get_part() returns the next part from the sorted PART-list.
26     * build_sorted_list() has to be called once before.
27     */
28
29    arb_assert(retrieval_phase()); // call build_sorted_list() before retrieving
30
31    PART *p = NULL;
32    if (retrieved<sorted.size()) {
33        std::swap(p, sorted[retrieved++]);
34    }
35    return p;
36}
37
38void PartRegistry::put_part_from_complete_tree(PART*& part) {
39    //! insert part in PartRegistry (destroys/reassigns part)
40
41    arb_assert(registration_phase());
42    part->standardize();
43
44    PartSet::iterator found = parts.find(part);
45
46    if (found != parts.end()) {
47        (*found)->addWeightAndLength(part);
48        delete part;
49    }
50    else {
51        parts.insert(part);
52    }
53    part = NULL;
54}
55
56void PartRegistry::put_artificial_part(PART*& part) {
57    arb_assert(registration_phase());
58    part->standardize();
59
60    PartSet::iterator found = artificial_parts.find(part);
61    if (found != artificial_parts.end()) {
62        (*found)->addWeightAndLength(part);
63        delete part;
64    }
65    else {
66        artificial_parts.insert(part);
67    }
68    part = NULL;
69}
70
71void PartRegistry::put_part_from_partial_tree(PART*& part, const PART *partialTree) {
72    //! insert part from partialTree into PartRegistry (destroys/reassigns part)
73    //
74    // Example:
75    //
76    // 'part':                     A---B        (A=part, B=invs)
77    // set of missing species:     C
78    //
79    //                                   A
80    //                                  /
81    // assumed tree:               C---+
82    //                                  \         .
83    //                                   B
84    //
85    // inserted partitions: A---CB
86    //                      AC---B
87    //
88    // Both partitions are inserted here. Since they mutually exclude each other,
89    // only one of them can and will be inserted into the result tree.
90    //
91    // The algorithm assumes that all species missing in the partialTree,
92    // reside on the bigger side of each branch of the partialTree.
93    //
94    // Therefore the partition representing that state is added with normal weight.
95    // The opposite ("missing on smaller side of branch") is added with reduced
96    // probability.
97    //
98    // Note: The PART 'C---AB' might exists at EACH branch in the deconstructed tree
99    // and is inserted once globally (in deconstruct_partial_rootnode).
100
101    arb_assert(registration_phase());
102
103    PART *invs = part->clone();
104    invs->invertInSuperset(partialTree);
105
106    int diff = part->get_members() - invs->get_members();
107    if (diff != 0) {
108        if (diff<0) std::swap(part, invs);
109        // now part contains bigger partition
110
111        // Note: adding 'part' means "add unknown species to smaller side of the partition"
112        int edges_in_part = leafs_2_edges(part->get_members()+1, ROOTED);
113        int edges_in_invs = leafs_2_edges(invs->get_members()+1, ROOTED);
114
115        double part_prob = double(edges_in_invs)/edges_in_part; // probability for one unkown species to reside inside smaller set
116        arb_assert(part_prob>0.0 && part_prob<1.0);
117
118        int    unknownCount     = partialTree->get_nonmembers();
119        double all_in_part_prob = pow(part_prob, unknownCount); // probability ALL unkown species reside inside smaller set
120
121        part->set_faked_weight(all_in_part_prob); // make "unknown in smaller" unlikely
122                                                  // -> reduces chance to be picked and bootstrap IF picked
123    }
124    // else, if both partitions have equal size -> simply add both
125
126    put_part_from_complete_tree(part); // inserts 'A---CB' (i.e. missing species added to invs, which is smaller)
127    put_part_from_complete_tree(invs); // inserts 'B---CA' (i.e. missing species added to part)
128}
129
130inline bool insertionOrder_less(const PART *p1, const PART *p2) {
131    return p1->insertionOrder_cmp(p2)<0;
132}
133
134void PartRegistry::build_sorted_list(double overall_weight) {
135    //! sort the parts into insertion order.
136
137    arb_assert(!parts.empty()); // nothing has been added
138    arb_assert(registration_phase());
139
140    merge_artificial_parts();
141
142    copy(parts.begin(), parts.end(), std::back_insert_iterator<PartVector>(sorted));
143    sort(sorted.begin(), sorted.end(), insertionOrder_less);
144
145    parts.clear();
146
147    for (PartVector::iterator pi = sorted.begin(); pi != sorted.end(); ++pi) {
148        (*pi)->takeMean(overall_weight);
149    }
150
151    arb_assert(retrieval_phase());
152}
153
154void PartRegistry::merge_artificial_parts() {
155    /*! 'artificial_parts' contains those partitions that did not occur in
156     * one PARTIAL input tree.
157     *
158     * Not-yet-registered artificial_parts will be registered here.
159     * Their weight is faked to zero (aka "never occurs").
160     *
161     * Already registered artificial_parts are only skipped -> their bootstrap
162     * will be set correctly (just like when the branch as such does not exist
163     * in a non-partial tree).
164     */
165
166    for (PartSet::iterator arti = artificial_parts.begin(); arti != artificial_parts.end(); ++arti) {
167        if (parts.find(*arti) == parts.end()) {
168            // partition was not added by any known edge -> add plain artificial edge
169            (*arti)->set_faked_weight(0.0); // never occurs (implicates length=1.0; see get_len())
170            parts.insert(*arti);
171        }
172        else {
173            delete *arti;
174        }
175    }
176    artificial_parts.clear();
177}
178
Note: See TracBrowser for help on using the repository browser.