source: branches/profile/DIST/di_clustertree.hxx

Last change on this file was 11401, checked in by westram, 6 years ago
  • reintegrates 'tree' into 'trunk':
    • consensus trees:
      • support for merging partial trees ("worked" before, but results were crap; implements #65)
      • generated trees are automatically re-rooted and -ordered
      • always list source trees in consensus-tree-comment; show info about partial trees
      • fixed progress bar
    • made GBT_TREE a base class of other tree classes (implements #31)
    • save tree properties in properties (not in DB)
    • new functions 'Remove zombies/marked from ALL trees'
    • tree load/save: layout fixes
    • unit tests
      • added tests for basic tree modifications (PARSIMONY)
    • performance:
      • compute_tree updates tree information in one traversal
      • tree generators are now capable to generate any type of tree (w/o needing to copy it once)
    • bugfixes:
      • NNI (of marked species) was also always performed for colored species
      • centered beautify-order is stable now
      • improved 'search optimal root'
  • adds:
File size: 6.9 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : di_clustertree.hxx                                //
4//   Purpose   : Tree structure used for cluster detection         //
5//                                                                 //
6//   Coded by Ralf Westram (coder@reallysoft.de) in October 2009   //
7//   Institute of Microbiology (Technical University Munich)       //
8//   http://www.arb-home.de/                                       //
9//                                                                 //
10// =============================================================== //
11
12#ifndef DI_CLUSTERTREE_HXX
13#define DI_CLUSTERTREE_HXX
14
15#ifndef ARB_TREE_HXX
16#include <ARB_Tree.hxx>
17#endif
18#ifndef AP_SEQUENCE_HXX
19#include <AP_sequence.hxx>
20#endif
21
22#ifndef _GLIBCXX_CLIMITS
23#include <climits>
24#endif
25#ifndef _GLIBCXX_STRING
26#include <string>
27#endif
28#ifndef _GLIBCXX_MAP
29#include <map>
30#endif
31
32
33#define cl_assert(cond) arb_assert(cond)
34
35class AP_sequence;
36class ClusterTree;
37class arb_progress;
38
39enum ClusterState {
40    CS_UNKNOWN       = 0,                           // initial state
41    CS_TOO_SMALL     = 1,                           // cluster is too small
42    CS_MAYBE_CLUSTER = 2,                           // need to test whether this is a cluster
43    CS_NO_CLUSTER    = 4,                           // not a cluster (only worstKnownDistance is known)
44    CS_IS_CLUSTER    = 8,                           // subtree is cluster (all sequence distances are known)
45    CS_SUB_CLUSTER   = 16,                          // like CS_IS_CLUSTER, but father is cluster as well
46};
47
48const float NO_DISTANCE = -1.0;
49
50// ------------------------
51//      ClusterTreeRoot
52
53class ClusterTreeRoot : public ARB_seqtree_root {
54    AP_FLOAT maxDistance;                           // max. allowed distance inside cluster
55    unsigned minClusterSize;                        // min. size of cluster (number of leafs)
56
57public:
58    ClusterTreeRoot(AliView *aliview, AP_sequence *seqTemplate_, AP_FLOAT maxDistance_, size_t minClusterSize_);
59    virtual ~ClusterTreeRoot() OVERRIDE {}
60
61    DEFINE_DOWNCAST_ACCESSORS(ClusterTree, get_root_node, ARB_seqtree_root::get_root_node());
62
63    GB_ERROR find_clusters();
64    unsigned get_minClusterSize() const { return minClusterSize; }
65    AP_FLOAT get_maxDistance() const { return maxDistance; }
66};
67
68// ------------------
69//      LeafPairs
70
71class TwoLeafs {
72    ClusterTree *ct1, *ct2; // ct1<ct2!
73
74public:
75    TwoLeafs(ClusterTree *c1, ClusterTree *c2)
76        : ct1(c1<c2 ? c1 : c2)
77        , ct2(c1<c2 ? c2 : c1)
78    {}
79
80    const ClusterTree *first() const { return ct1; }
81    ClusterTree *first() { return ct1; }
82    const ClusterTree *second() const { return ct2; }
83    ClusterTree *second()  { return ct2; }
84
85    bool operator<(const TwoLeafs& other) const {
86        return ct1 == other.ct1 ? ct2<other.ct2 : ct1<other.ct1;
87    }
88};
89
90class LeafRelation {
91    const TwoLeafs *pair;
92    const AP_FLOAT  value;
93public:
94    LeafRelation(const TwoLeafs& pair_, AP_FLOAT value_)
95        : pair(&pair_)
96        , value(value_)
97    {}
98
99    bool operator<(const LeafRelation& other) const {
100        if (value == other.value) return *pair < *other.pair;
101        return value < other.value;
102    }
103
104    const TwoLeafs& get_pair() const  { return *pair; }
105    AP_FLOAT get_value() const { return value; }
106};
107
108typedef std::map<ClusterTree*, AP_FLOAT> NodeValues;
109typedef std::map<TwoLeafs, AP_FLOAT>     LeafRelations;
110typedef LeafRelations::const_iterator    LeafRelationCIter;
111
112// --------------------
113//      ClusterTree
114
115
116#if defined(DEBUG)
117#define TRACE_DIST_CALC
118#endif // DEBUG
119
120class ClusterTree : public ARB_countedTree { // derived from a Noncopyable
121    ClusterState state;
122
123    unsigned leaf_count;                            // number of leafs in subtree
124    unsigned clus_count;                            // number of clusters at and in subtree
125    unsigned depth;                                 // depth of node ( 1 == root )
126    AP_FLOAT min_bases;                             // min. bases used for comparing two members
127
128    NodeValues    *branchDepths;                    // leaf-depths (distance from this each leaf)
129    LeafRelations *branchDists;                     // distance (branch) between two leafs
130    LeafRelations *sequenceDists;                   // real distance between sequences of two leafs
131
132    TwoLeafs *worstKnownDistance;
133
134    void calc_branch_depths();
135    void calc_branch_dists();
136
137#if defined(TRACE_DIST_CALC)
138    unsigned calculatedDistances;
139#endif // TRACE_DIST_CALC
140
141public:
142    explicit ClusterTree(ClusterTreeRoot *tree_root_)
143        : ARB_countedTree(tree_root_)
144        , state(CS_UNKNOWN)
145        , leaf_count(0)
146        , clus_count(0)
147        , depth(0)
148        , min_bases(-1.0)
149        , branchDepths(NULL)
150        , branchDists(NULL)
151        , sequenceDists(NULL)
152        , worstKnownDistance(NULL)
153    {}
154
155    ~ClusterTree() OVERRIDE {
156        delete worstKnownDistance;
157        delete sequenceDists;
158        delete branchDists;
159        delete branchDepths;
160    }
161    DEFINE_TREE_ACCESSORS(ClusterTreeRoot, ClusterTree);
162
163    unsigned get_cluster_count() const { return clus_count; }
164    unsigned get_leaf_count() const OVERRIDE { return leaf_count; }
165    unsigned get_depth() const { return depth; }
166
167#if defined(TRACE_DIST_CALC)
168    unsigned get_calculated_distances() const { return calculatedDistances; }
169#endif // TRACE_DIST_CALC
170
171    bool knows_seqDists() const { return state & (CS_IS_CLUSTER|CS_SUB_CLUSTER); }
172
173    unsigned possible_relations() const { return (leaf_count*(leaf_count-1)) / 2; }
174    unsigned known_seqDists() const { return knows_seqDists() ? possible_relations() : 0; }
175
176    ClusterTree *get_cluster(size_t num);           // this allows sequentiell access to clusters
177    ClusterState get_state() const { return state; }
178
179    void init_tree() OVERRIDE;
180    void detect_clusters(arb_progress& progress);
181
182    const NodeValues *get_branch_depths() {
183        if (!branchDepths) calc_branch_depths();
184        return branchDepths;
185    }
186
187    const LeafRelations *get_branch_dists() {
188        if (!branchDists) calc_branch_dists();
189        return branchDists;
190    }
191
192    const LeafRelations *get_sequence_dists() const { return sequenceDists; }
193
194    AP_FLOAT get_seqDist(const TwoLeafs& pair);
195    const AP_FLOAT *has_seqDist(const TwoLeafs& pair) const;
196    const ClusterTree *commonFatherWith(const ClusterTree *other) const;
197
198    AP_FLOAT get_min_bases() const { return min_bases; }
199
200    void oblivion(bool forgetDistances); // forget unneeded data
201};
202
203struct ClusterTreeNodeFactory : public RootedTreeNodeFactory {
204    virtual RootedTree *makeNode(TreeRoot *root) const {
205        return new ClusterTree(DOWNCAST(ClusterTreeRoot*, root));
206    }
207};
208
209struct UseAnyTree : public ARB_tree_predicate {
210    bool selects(const ARB_seqtree&) const OVERRIDE { return true; }
211};
212
213#else
214#error di_clustertree.hxx included twice
215#endif // DI_CLUSTERTREE_HXX
Note: See TracBrowser for help on using the repository browser.