source: branches/ali/DIST/di_clustertree.hxx

Last change on this file was 16880, checked in by westram, 7 years ago
  • reintegrates 'multicore' into 'trunk'
    • derive sequence classes able to be combined from new class AP_combinableSeq
      • elim dummy functions from rest of hierarchy
    • implements non-writing combine for add-species (speedup: 25-35%)
    • introduce type Mutations ⇒ uses long everywhere (before mostly, but not always used float)
    • allows use of futures
      • nucleotide combine is able to calculate async (disabled via define atm → NEVER_COMBINE_ASYNC; needs more work on calling algorithm)
  • adds: log:branches/multicore@16769:16879
File size: 6.8 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : di_clustertree.hxx                                //
4//   Purpose   : Tree structure used for cluster detection         //
5//                                                                 //
6//   Coded by Ralf Westram (coder@reallysoft.de) in October 2009   //
7//   Institute of Microbiology (Technical University Munich)       //
8//   http://www.arb-home.de/                                       //
9//                                                                 //
10// =============================================================== //
11
12#ifndef DI_CLUSTERTREE_HXX
13#define DI_CLUSTERTREE_HXX
14
15#ifndef ARB_TREE_HXX
16#include <ARB_Tree.hxx>
17#endif
18#ifndef AP_SEQUENCE_HXX
19#include <AP_sequence.hxx>
20#endif
21#ifndef _GLIBCXX_MAP
22#include <map>
23#endif
24
25
26#define cl_assert(cond) arb_assert(cond)
27
28class ClusterTree;
29class arb_progress;
30
31enum ClusterState {
32    CS_UNKNOWN       = 0,                           // initial state
33    CS_TOO_SMALL     = 1,                           // cluster is too small
34    CS_MAYBE_CLUSTER = 2,                           // need to test whether this is a cluster
35    CS_NO_CLUSTER    = 4,                           // not a cluster (only worstKnownDistance is known)
36    CS_IS_CLUSTER    = 8,                           // subtree is cluster (all sequence distances are known)
37    CS_SUB_CLUSTER   = 16,                          // like CS_IS_CLUSTER, but father is cluster as well
38};
39
40// ------------------------
41//      ClusterTreeRoot
42
43class ClusterTreeRoot FINAL_TYPE : public ARB_seqtree_root {
44    AP_FLOAT maxDistance;                           // max. allowed distance inside cluster
45    unsigned minClusterSize;                        // min. size of cluster (number of leafs)
46
47public:
48    ClusterTreeRoot(AliView *aliview, AP_sequence *seqTemplate_, AP_FLOAT maxDistance_, size_t minClusterSize_);
49    ~ClusterTreeRoot() OVERRIDE { predelete(); }
50
51    inline TreeNode *makeNode() const             OVERRIDE;
52    inline void destroyNode(TreeNode *node) const OVERRIDE;
53
54    DEFINE_DOWNCAST_ACCESSORS(ClusterTree, get_root_node, ARB_seqtree_root::get_root_node());
55
56    GB_ERROR find_clusters();
57    unsigned get_minClusterSize() const { return minClusterSize; }
58    AP_FLOAT get_maxDistance() const { return maxDistance; }
59};
60
61// ------------------
62//      LeafPairs
63
64class TwoLeafs {
65    ClusterTree *ct1, *ct2; // ct1<ct2!
66
67public:
68    TwoLeafs(ClusterTree *c1, ClusterTree *c2) :
69        ct1(c1<c2 ? c1 : c2),
70        ct2(c1<c2 ? c2 : c1)
71    {}
72
73    const ClusterTree *first() const { return ct1; }
74    const ClusterTree *second() const { return ct2; }
75
76    bool operator<(const TwoLeafs& other) const {
77        return ct1 == other.ct1 ? ct2<other.ct2 : ct1<other.ct1;
78    }
79};
80
81class LeafRelation {
82    const TwoLeafs *pair;
83    const AP_FLOAT  value;
84public:
85    LeafRelation(const TwoLeafs& pair_, AP_FLOAT value_) :
86        pair(&pair_),
87        value(value_)
88    {}
89
90    bool operator<(const LeafRelation& other) const {
91        if (value == other.value) return *pair < *other.pair;
92        return value < other.value;
93    }
94
95    const TwoLeafs& get_pair() const  { return *pair; }
96};
97
98typedef std::map<ClusterTree*, AP_FLOAT> NodeValues;
99typedef std::map<TwoLeafs, AP_FLOAT>     LeafRelations;
100typedef LeafRelations::const_iterator    LeafRelationCIter;
101
102// --------------------
103//      ClusterTree
104
105
106#if defined(DEBUG)
107#define TRACE_DIST_CALC
108#endif // DEBUG
109
110class ClusterTree FINAL_TYPE : public ARB_countedTree { // derived from a Noncopyable
111    ClusterState state;
112
113    unsigned leaf_count;                            // number of leafs in subtree
114    unsigned clus_count;                            // number of clusters at and in subtree
115    unsigned depth;                                 // depth of node ( 1 == root )
116    AP_FLOAT min_bases;                             // min. bases used for comparing two members
117
118    NodeValues    *branchDepths;                    // leaf-depths (distance from this each leaf)
119    LeafRelations *branchDists;                     // distance (branch) between two leafs
120    LeafRelations *sequenceDists;                   // real distance between sequences of two leafs
121
122    TwoLeafs *worstKnownDistance;
123
124    void calc_branch_depths();
125    void calc_branch_dists();
126
127#if defined(TRACE_DIST_CALC)
128    unsigned calculatedDistances;
129#endif // TRACE_DIST_CALC
130
131    unsigned get_depth() const { return depth; }
132    bool knows_seqDists() const { return state & (CS_IS_CLUSTER|CS_SUB_CLUSTER); }
133    unsigned possible_relations() const { return (leaf_count*(leaf_count-1)) / 2; }
134    unsigned known_seqDists() const { return knows_seqDists() ? possible_relations() : 0; }
135
136    const NodeValues *get_branch_depths() {
137        if (!branchDepths) calc_branch_depths();
138        return branchDepths;
139    }
140
141    const LeafRelations *get_branch_dists() {
142        if (!branchDists) calc_branch_dists();
143        return branchDists;
144    }
145
146    AP_FLOAT get_seqDist(const TwoLeafs& pair);
147    const AP_FLOAT *has_seqDist(const TwoLeafs& pair) const;
148    const ClusterTree *commonFatherWith(const ClusterTree *other) const;
149
150    void oblivion(bool forgetDistances); // forget unneeded data
151
152protected:
153    ~ClusterTree() OVERRIDE {
154        delete worstKnownDistance;
155        delete sequenceDists;
156        delete branchDists;
157        delete branchDepths;
158    }
159    friend class ClusterTreeRoot;
160public:
161    explicit ClusterTree(ClusterTreeRoot *tree_root_) :
162        ARB_countedTree(tree_root_),
163        state(CS_UNKNOWN),
164        leaf_count(0),
165        clus_count(0),
166        depth(0),
167        min_bases(-1.0),
168        branchDepths(NULp),
169        branchDists(NULp),
170        sequenceDists(NULp),
171        worstKnownDistance(NULp)
172    {}
173
174    DEFINE_TREE_ACCESSORS(ClusterTreeRoot, ClusterTree);
175    OVERRIDE_SEQ_ACCESSORS(AP_combinableSeq,ARB_countedTree);
176
177    unsigned get_cluster_count() const { return clus_count; }
178    unsigned get_leaf_count() const OVERRIDE { return leaf_count; }
179
180#if defined(TRACE_DIST_CALC)
181    unsigned get_calculated_distances() const { return calculatedDistances; }
182#endif // TRACE_DIST_CALC
183
184    ClusterState get_state() const { return state; }
185
186    void init_tree() OVERRIDE;
187    void detect_clusters(arb_progress& progress);
188
189    const LeafRelations *get_sequence_dists() const { return sequenceDists; }
190
191    AP_FLOAT get_min_bases() const { return min_bases; }
192};
193
194inline TreeNode *ClusterTreeRoot::makeNode() const { return new ClusterTree(const_cast<ClusterTreeRoot*>(this)); }
195inline void ClusterTreeRoot::destroyNode(TreeNode *node) const { delete DOWNCAST(ClusterTree*, node); }
196
197class UseAnyTree : public ARB_tree_predicate {
198    bool selects(const ARB_seqtree&) const OVERRIDE { return true; }
199};
200
201#else
202#error di_clustertree.hxx included twice
203#endif // DI_CLUSTERTREE_HXX
Note: See TracBrowser for help on using the repository browser.