1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : NT_tree_cmp.h // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #ifndef NT_TREE_CMP_H |
---|
12 | #define NT_TREE_CMP_H |
---|
13 | |
---|
14 | #ifndef ARBDB_BASE_H |
---|
15 | #include <arbdb_base.h> |
---|
16 | #endif |
---|
17 | #ifndef NT_LOCAL_H |
---|
18 | #include "NT_local.h" |
---|
19 | #endif |
---|
20 | |
---|
21 | enum GroupTransferMode { |
---|
22 | COMPARE_TOPOLOGY, // compare inner nodes + write differences to tree remarks |
---|
23 | REMOVE_EXISTING_GROUPS, // transfer groups (removes existing groups) |
---|
24 | KEEP_OLD_NAMES, // transfer groups (keep old groups; combines mismatching names) |
---|
25 | }; |
---|
26 | |
---|
27 | enum GroupsToTransfer { |
---|
28 | XFER_ALL_GROUPS, |
---|
29 | XFER_GROUPS_WITH_MARKED, |
---|
30 | }; |
---|
31 | |
---|
32 | class RSpecSet; |
---|
33 | class TSpecSet; |
---|
34 | class GroupMatchScorer; |
---|
35 | |
---|
36 | class RatioLimits { |
---|
37 | double minRatio; |
---|
38 | double maxRatio; |
---|
39 | |
---|
40 | public: |
---|
41 | RatioLimits() : minRatio(0.0), maxRatio(1.0) {} |
---|
42 | RatioLimits(double lower, double upper) : minRatio(lower), maxRatio(upper) {} |
---|
43 | |
---|
44 | bool insideLimits(double ratio) const { return minRatio <= ratio && ratio <= maxRatio; } |
---|
45 | bool isValid() const { return minRatio>=0.0 && minRatio<=maxRatio && maxRatio<=1.0; } |
---|
46 | }; |
---|
47 | |
---|
48 | class GroupPenalty { |
---|
49 | double penalty; // 0 = perfect match; >0 otherwise |
---|
50 | |
---|
51 | double ingroup_ratio; // how many percent of original groups members are members of new group [0..1] |
---|
52 | double outgroup_ratio; // how many percent of new group are no original group members [0..1] |
---|
53 | |
---|
54 | bool keeled; // @@@ shall be used for #451 |
---|
55 | |
---|
56 | // data copied from unregistered set (TSpecSet; i.e. from source tree when moving groups): |
---|
57 | int groupSize; // size of source group (known members) |
---|
58 | int unknown; // unknown members in source group |
---|
59 | |
---|
60 | static double no_match_penalty() { return LONG_MAX; } |
---|
61 | |
---|
62 | #if defined(ASSERTION_USED) |
---|
63 | bool isRatio(double r) const { return r>=0.0 && r<=1.0; } |
---|
64 | #endif |
---|
65 | |
---|
66 | public: |
---|
67 | GroupPenalty() : |
---|
68 | penalty(no_match_penalty()), |
---|
69 | ingroup_ratio(-1), |
---|
70 | outgroup_ratio(-1), |
---|
71 | keeled(false), |
---|
72 | groupSize(-1), |
---|
73 | unknown(-1) |
---|
74 | {} |
---|
75 | GroupPenalty(double penalty_, double ingroup_ratio_, double outgroup_ratio_, int unreg_groupsize) : |
---|
76 | penalty(penalty_), |
---|
77 | ingroup_ratio(ingroup_ratio_), |
---|
78 | outgroup_ratio(outgroup_ratio_), |
---|
79 | keeled(false), |
---|
80 | groupSize(unreg_groupsize), |
---|
81 | unknown(-1) |
---|
82 | { |
---|
83 | nt_assert(isRatio(ingroup_ratio)); |
---|
84 | nt_assert(isRatio(outgroup_ratio)); |
---|
85 | nt_assert(groupSize>0); |
---|
86 | } |
---|
87 | |
---|
88 | bool doesMatch() const { return penalty != no_match_penalty(); } |
---|
89 | bool betterThan(const GroupPenalty& other) const { return penalty < other.penalty; } |
---|
90 | |
---|
91 | double get_penalty() const { return penalty; } |
---|
92 | bool isPerfectMatch() const { return penalty == 0.0; } |
---|
93 | double get_ingroup_ratio() const { return ingroup_ratio; } |
---|
94 | double get_outgroup_ratio() const { return outgroup_ratio; } |
---|
95 | |
---|
96 | int get_groupsize() const { nt_assert(groupSize>0); return groupSize; } |
---|
97 | int get_unknown() const { nt_assert(unknown>=0); return unknown; } |
---|
98 | |
---|
99 | bool shouldHaveBeenKeeled() const { return keeled; } |
---|
100 | |
---|
101 | static GroupPenalty NoMatch() { return GroupPenalty(); } // syntactic sugar |
---|
102 | |
---|
103 | void mark_as_keeled() { keeled = true; } |
---|
104 | void addPenalty(double p) { |
---|
105 | nt_assert(p>=0.0); |
---|
106 | nt_assert(doesMatch()); // avoid "!doesMatch()"-condition gets destroyed |
---|
107 | penalty += p; |
---|
108 | } |
---|
109 | void registerUnfound(const GroupMatchScorer& scorer, const TSpecSet& tset); |
---|
110 | }; |
---|
111 | |
---|
112 | class GroupMatchScorer { |
---|
113 | RatioLimits ingroup; // only need lower limit (upper should always be 100%) |
---|
114 | RatioLimits outgroup; // only need upper limit (lower should always be 0%) |
---|
115 | |
---|
116 | // Pep: per error penalty (=absolute penalty=traditional penalty scoring) |
---|
117 | double ingroupPep; // penalty for each former member of group (when moved out of group, but still member of tree) |
---|
118 | double outgroupPep; // penalty for each former non-member (when moved into group) |
---|
119 | double unfoundPep; // penalty for each unknown species (when moved into group and not member of source tree) |
---|
120 | |
---|
121 | double keelPenalty; // penalty added for using keeled group |
---|
122 | |
---|
123 | // RelPen: ratio penalty (=relative penalty) |
---|
124 | double ingroupInvRelPen; // factor for (1-GroupPenalty::ingroup_ratio), i.e. penalty rel.to ratio of non-included former group members |
---|
125 | double outgroupRelPen; // factor for ( GroupPenalty::outgroup_ratio), i.e. penalty rel.to ratio of former non-member in new group |
---|
126 | |
---|
127 | bool insideLimits(double ingroupRatio, double outgroupRatio) const { |
---|
128 | return |
---|
129 | ingroup.insideLimits(ingroupRatio) && |
---|
130 | outgroup.insideLimits(outgroupRatio); |
---|
131 | } |
---|
132 | GroupPenalty calcPenalty(long removed, long added, long commonSpecies); |
---|
133 | |
---|
134 | public: |
---|
135 | GroupMatchScorer() : |
---|
136 | ingroupPep(1.0), |
---|
137 | outgroupPep(1.0), |
---|
138 | unfoundPep(0.0001), |
---|
139 | keelPenalty(0.01), |
---|
140 | ingroupInvRelPen(0.0), |
---|
141 | outgroupRelPen(0.0) |
---|
142 | {} |
---|
143 | |
---|
144 | GB_ERROR check_validity() const; |
---|
145 | |
---|
146 | // setup: |
---|
147 | void setLimits(const RatioLimits& ingroupLimits, const RatioLimits& outgroupLimits) { |
---|
148 | nt_assert(ingroupLimits.insideLimits(1.0)); // upper should always be 100% |
---|
149 | nt_assert(outgroupLimits.insideLimits(0.0)); // lower should always be 0% |
---|
150 | |
---|
151 | ingroup = ingroupLimits; |
---|
152 | outgroup = outgroupLimits; |
---|
153 | } |
---|
154 | void setPerErrorPenalties(double ingroup_pep, double outgroup_pep, double unfound_pep) { |
---|
155 | ingroupPep = ingroup_pep; |
---|
156 | outgroupPep = outgroup_pep; |
---|
157 | unfoundPep = unfound_pep; |
---|
158 | } |
---|
159 | void setRelativePenalties(double ingroup_inv_relpen, double outgroup_relpen) { |
---|
160 | ingroupInvRelPen = ingroup_inv_relpen; |
---|
161 | outgroupRelPen = outgroup_relpen; |
---|
162 | } |
---|
163 | |
---|
164 | // scoring: |
---|
165 | GroupPenalty matchGroups(const TSpecSet& sourceSet, const RSpecSet& targetSet, long commonSpecies, long overallSpecies); |
---|
166 | double calcUnknownMembersPenalty(const TSpecSet& sourceSet) const; |
---|
167 | }; |
---|
168 | |
---|
169 | GB_ERROR NTREE_move_tree_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, GroupTransferMode mode, GroupsToTransfer what, const GroupMatchScorer& scorer, const char *aci); |
---|
170 | |
---|
171 | #else |
---|
172 | #error NT_tree_cmp.h included twice |
---|
173 | #endif // NT_TREE_CMP_H |
---|