source: trunk/NTREE/NT_tree_cmp.h

Last change on this file was 17342, checked in by westram, 6 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 6.5 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : NT_tree_cmp.h                                     //
4//   Purpose   :                                                   //
5//                                                                 //
6//   Institute of Microbiology (Technical University Munich)       //
7//   http://www.arb-home.de/                                       //
8//                                                                 //
9// =============================================================== //
10
11#ifndef NT_TREE_CMP_H
12#define NT_TREE_CMP_H
13
14#ifndef ARBDB_BASE_H
15#include <arbdb_base.h>
16#endif
17#ifndef NT_LOCAL_H
18#include "NT_local.h"
19#endif
20
21enum GroupTransferMode {
22    COMPARE_TOPOLOGY,       // compare inner nodes + write differences to tree remarks
23    REMOVE_EXISTING_GROUPS, // transfer groups (removes existing groups)
24    KEEP_OLD_NAMES,         // transfer groups (keep old groups; combines mismatching names)
25};
26
27enum GroupsToTransfer {
28    XFER_ALL_GROUPS,
29    XFER_GROUPS_WITH_MARKED,
30};
31
32class RSpecSet;
33class TSpecSet;
34class GroupMatchScorer;
35
36class RatioLimits {
37    double minRatio;
38    double maxRatio;
39
40public:
41    RatioLimits()                           : minRatio(0.0),   maxRatio(1.0)   {}
42    RatioLimits(double lower, double upper) : minRatio(lower), maxRatio(upper) {}
43
44    bool insideLimits(double ratio) const { return minRatio <= ratio && ratio <= maxRatio; }
45    bool isValid() const { return minRatio>=0.0 && minRatio<=maxRatio && maxRatio<=1.0; }
46};
47
48class GroupPenalty {
49    double penalty; // 0 = perfect match; >0 otherwise
50
51    double ingroup_ratio;  // how many percent of original groups members are members of new group [0..1]
52    double outgroup_ratio; // how many percent of new group are no original group members [0..1]
53
54    bool keeled; // @@@ shall be used for #451
55
56    // data copied from unregistered set (TSpecSet; i.e. from source tree when moving groups):
57    int groupSize; // size of source group (known members)
58    int unknown;   // unknown members in source group
59
60    static double no_match_penalty() { return LONG_MAX; }
61
62#if defined(ASSERTION_USED)
63    bool isRatio(double r) const { return r>=0.0 && r<=1.0; }
64#endif
65
66public:
67    GroupPenalty() :
68        penalty(no_match_penalty()),
69        ingroup_ratio(-1),
70        outgroup_ratio(-1),
71        keeled(false),
72        groupSize(-1),
73        unknown(-1)
74    {}
75    GroupPenalty(double penalty_, double ingroup_ratio_, double outgroup_ratio_, int unreg_groupsize) :
76        penalty(penalty_),
77        ingroup_ratio(ingroup_ratio_),
78        outgroup_ratio(outgroup_ratio_),
79        keeled(false),
80        groupSize(unreg_groupsize),
81        unknown(-1)
82    {
83        nt_assert(isRatio(ingroup_ratio));
84        nt_assert(isRatio(outgroup_ratio));
85        nt_assert(groupSize>0);
86    }
87
88    bool doesMatch() const { return penalty != no_match_penalty(); }
89    bool betterThan(const GroupPenalty& other) const { return penalty < other.penalty; }
90
91    double get_penalty() const { return penalty; }
92    bool isPerfectMatch() const { return penalty == 0.0; }
93    double get_ingroup_ratio() const { return ingroup_ratio; }
94    double get_outgroup_ratio() const { return outgroup_ratio; }
95
96    int get_groupsize() const { nt_assert(groupSize>0); return groupSize; }
97    int get_unknown() const { nt_assert(unknown>=0); return unknown; }
98
99    bool shouldHaveBeenKeeled() const { return keeled; }
100
101    static GroupPenalty NoMatch() { return GroupPenalty(); } // syntactic sugar
102
103    void mark_as_keeled() { keeled = true; }
104    void addPenalty(double p) {
105        nt_assert(p>=0.0);
106        nt_assert(doesMatch()); // avoid "!doesMatch()"-condition gets destroyed
107        penalty += p;
108    }
109    void registerUnfound(const GroupMatchScorer& scorer, const TSpecSet& tset);
110};
111
112class GroupMatchScorer {
113    RatioLimits ingroup;  // only need lower limit (upper should always be 100%)
114    RatioLimits outgroup; // only need upper limit (lower should always be 0%)
115
116    // Pep: per error penalty (=absolute penalty=traditional penalty scoring)
117    double ingroupPep;  // penalty for each former member of group (when moved out of group, but still member of tree)
118    double outgroupPep; // penalty for each former non-member (when moved into group)
119    double unfoundPep;  // penalty for each unknown species (when moved into group and not member of source tree)
120
121    double keelPenalty; // penalty added for using keeled group
122
123    // RelPen: ratio penalty (=relative penalty)
124    double ingroupInvRelPen; // factor for (1-GroupPenalty::ingroup_ratio), i.e. penalty rel.to ratio of non-included former group members
125    double outgroupRelPen;   // factor for ( GroupPenalty::outgroup_ratio), i.e. penalty rel.to ratio of former non-member in new group
126
127    bool insideLimits(double ingroupRatio, double outgroupRatio) const {
128        return
129            ingroup.insideLimits(ingroupRatio) &&
130            outgroup.insideLimits(outgroupRatio);
131    }
132    GroupPenalty calcPenalty(long removed, long added, long commonSpecies);
133
134public:
135    GroupMatchScorer() :
136        ingroupPep(1.0),
137        outgroupPep(1.0),
138        unfoundPep(0.0001),
139        keelPenalty(0.01),
140        ingroupInvRelPen(0.0),
141        outgroupRelPen(0.0)
142    {}
143
144    GB_ERROR check_validity() const;
145
146    // setup:
147    void setLimits(const RatioLimits& ingroupLimits, const RatioLimits& outgroupLimits) {
148        nt_assert(ingroupLimits.insideLimits(1.0)); // upper should always be 100%
149        nt_assert(outgroupLimits.insideLimits(0.0)); // lower should always be 0%
150
151        ingroup  = ingroupLimits;
152        outgroup = outgroupLimits;
153    }
154    void setPerErrorPenalties(double ingroup_pep, double outgroup_pep, double unfound_pep) {
155        ingroupPep  = ingroup_pep;
156        outgroupPep = outgroup_pep;
157        unfoundPep  = unfound_pep;
158    }
159    void setRelativePenalties(double ingroup_inv_relpen, double outgroup_relpen) {
160        ingroupInvRelPen = ingroup_inv_relpen;
161        outgroupRelPen   = outgroup_relpen;
162    }
163
164    // scoring:
165    GroupPenalty matchGroups(const TSpecSet& sourceSet, const RSpecSet& targetSet, long commonSpecies, long overallSpecies);
166    double       calcUnknownMembersPenalty(const TSpecSet& sourceSet) const;
167};
168
169GB_ERROR NTREE_move_tree_info(GBDATA *gb_main, const char *tree_source, const char *tree_dest, const char *log_file, GroupTransferMode mode, GroupsToTransfer what, const GroupMatchScorer& scorer, const char *aci);
170
171#else
172#error NT_tree_cmp.h included twice
173#endif // NT_TREE_CMP_H
Note: See TracBrowser for help on using the repository browser.