source: branches/help/NTREE/NT_species_set.h

Last change on this file was 17342, checked in by westram, 6 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : NT_species_set.h                                  //
4//   Purpose   :                                                   //
5//                                                                 //
6//   Institute of Microbiology (Technical University Munich)       //
7//   http://www.arb-home.de/                                       //
8//                                                                 //
9// =============================================================== //
10
11#ifndef NT_SPECIES_SET_H
12#define NT_SPECIES_SET_H
13
14#ifndef NT_TREE_CMP_H
15#include "NT_tree_cmp.h"
16#endif
17#ifndef ARBTOOLS_H
18#include <arbtools.h>
19#endif
20#ifndef AP_TREE_HXX
21#include <AP_Tree.hxx>
22#endif
23
24class RSpecSet;
25class TSpecSet;
26class arb_progress;
27
28// @@@ improve compare logic:
29// - species sets (and bitstrings) should only contain species that occur in both trees
30// - species that occur only in RSpecSet-tree shall be stored in RSpecSet (like done in TSpecSet::unfound_species_count)
31// - a small penalty shall be assigned (as done for TSpecSet)
32
33class SpecSetRegistry : virtual Noncopyable {
34    long species_counter; // number of species added to hash
35    long nspecies;
36    long nsets; // number of RSpecSet added to 'sets'
37
38    RSpecSet **sets;
39    int        set_bits[256];
40
41    GroupMatchScorer  scorer;
42    arb_progress     *progress;
43    GB_HASH          *species_hash; // contains [1..N]
44    unsigned char    *tmp_bitstring;
45
46    int max_nsets() const { return leafs_2_innerNodes(nspecies, ROOTED); }
47
48    void dump_bitstring(const char *tag, unsigned char *bs);
49
50    void add(const char *species_name); // max nspecies
51    void add(RSpecSet *rset);           // max 2 * nspecies
52
53    double search_and_remember_best_match_and_log_errors(const TSpecSet *tset, FILE *log);
54
55#if defined(UNIT_TESTS)
56    friend void TEST_species_sets();
57#endif
58
59public:
60    SpecSetRegistry(long nspecies_, arb_progress *progress_, const GroupMatchScorer& scorer_);
61    ~SpecSetRegistry();
62    void finish(GB_ERROR& error); // call before destruction to retrieve errors
63
64    long bitstring_bytes() const { return (nspecies-1)/8 + 1; }
65    long bitstring_longs() const { return (bitstring_bytes()-1)/sizeof(long) + 1; }
66
67    unsigned char *allocate_bitstring() const { return ARB_calloc<unsigned char>(bitstring_longs()*sizeof(long)); }
68
69    long get_species_index(const char *species_name) const { return GBS_read_hash(species_hash, species_name); }
70    RSpecSet *registerTree(AP_tree *node);
71
72    RSpecSet *search_best_match(const TSpecSet *tset, GroupPenalty& min_penalty);
73    TSpecSet *find_best_matches_info(AP_tree *node, FILE *log, bool compare_node_info);
74    GB_ERROR  write_node_information(FILE *log, bool delete_old_nodes, GroupsToTransfer what, const char *aci);
75
76    void setScorer(const GroupMatchScorer& newScorer) { scorer = newScorer; }
77};
78
79
80class SpecSet : virtual Noncopyable {
81protected:
82    // SpecSet should only be used by derived classes
83
84    int known_members; // number of registered members
85
86    void init(AP_tree *nodei, const SpecSetRegistry& ssr);
87
88    SpecSet(AP_tree *nodei, const SpecSetRegistry& ssr, const char *species_name);           // create from species..
89    SpecSet(AP_tree *nodei, const SpecSetRegistry& ssr, const SpecSet *l, const SpecSet *r); // ..or from two subsets
90    ~SpecSet();
91
92public:
93    // @@@ make member private
94    unsigned char *bitstring;
95    AP_tree       *set_node; // node in tree (from which SpecSet was initialized)
96
97    bool is_leaf_set() const { return set_node && set_node->is_leaf(); } // @@@ might be wrong for zombies
98    int get_known_members() const { return known_members; }
99};
100
101class RSpecSet : public SpecSet { // derived from Noncopyable
102    // set registered in SpecSetRegistry
103    AP_tree      *best_node;  // node in other tree
104    GroupPenalty  best_match; // result of matching 'this' versus TSpecSet of best_node
105
106public:
107    RSpecSet(AP_tree *nodei, const SpecSetRegistry& ssr, const char *species_name);             // create from species..
108    RSpecSet(AP_tree *nodei, const SpecSetRegistry& ssr, const RSpecSet *l, const RSpecSet *r); // ..or from two subsets
109
110    void storeBetterMatch(const GroupPenalty& match, AP_tree *matched_node) {
111        // if 'this' was detected as best match for any TSpecSet of other (not registered) tree,
112        // -> store match in best_match + node of TSpecSet in best_node:
113
114        nt_assert(!best_match.betterThan(match)); // avoid overwriting with worse match
115
116        best_match = match;
117        best_node  = matched_node;
118    }
119
120    int size() const { return known_members; } // only contains known members by definition
121    const GroupPenalty& bestMatch() const { return best_match; }
122    AP_tree* matchedNode() const { return best_node; }
123};
124
125class TSpecSet : public SpecSet { // derived from Noncopyable
126    // set tested against sets in registry
127
128    int unfound_species_count; // species missing in SpecSetRegistry
129public:
130    TSpecSet(AP_tree *nodei, const SpecSetRegistry& ssr, const char *species_name);             // create from species..
131    TSpecSet(AP_tree *nodei, const SpecSetRegistry& ssr, const TSpecSet *l, const TSpecSet *r); // ..or from two subsets
132
133    int size() const { return known_members + unfound_species_count; }
134    int get_unknown_members() const { return unfound_species_count; }
135};
136
137#else
138#error NT_species_set.h included twice
139#endif // NT_SPECIES_SET_H
Note: See TracBrowser for help on using the repository browser.