source: branches/stable/SL/FILTER/AP_filter.hxx

Last change on this file was 16880, checked in by westram, 6 years ago
  • reintegrates 'multicore' into 'trunk'
    • derive sequence classes able to be combined from new class AP_combinableSeq
      • elim dummy functions from rest of hierarchy
    • implements non-writing combine for add-species (speedup: 25-35%)
    • introduce type Mutations ⇒ uses long everywhere (before mostly, but not always used float)
    • allows use of futures
      • nucleotide combine is able to calculate async (disabled via define atm → NEVER_COMBINE_ASYNC; needs more work on calling algorithm)
  • adds: log:branches/multicore@16769:16879
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : AP_filter.hxx                                     //
4//   Purpose   :                                                   //
5//                                                                 //
6//   Institute of Microbiology (Technical University Munich)       //
7//   http://www.arb-home.de/                                       //
8//                                                                 //
9// =============================================================== //
10
11#ifndef AP_FILTER_HXX
12#define AP_FILTER_HXX
13
14#ifndef ARBDB_BASE_H
15#include <arbdb_base.h>
16#endif
17#ifndef ARB_ASSERT_H
18#include <arb_assert.h>
19#endif
20#ifndef ARBTOOLS_H
21#include <arbtools.h>
22#endif
23
24#define af_assert(cond) arb_assert(cond)
25
26typedef unsigned char uchar;
27
28enum AWT_FILTER_SIMPLIFY {
29    AWT_FILTER_SIMPLIFY_NOT_INITIALIZED = -1,
30    AWT_FILTER_SIMPLIFY_NONE            = 0,
31    AWT_FILTER_SIMPLIFY_DNA,
32    AWT_FILTER_SIMPLIFY_PROTEIN,
33};
34
35enum AF_Not     { NOT };
36enum AF_Combine { AND, OR, XOR };
37
38class AP_filter {
39    bool   *filter_mask;                            // true means "use position"
40    size_t  filter_len;                             // length of 'filter_mask'
41    size_t  real_len;                               // how many 'true's are in 'filter_mask'
42    long    update;                                 // timestamp
43
44    uchar               simplify[256];              // base -> simplified base
45    AWT_FILTER_SIMPLIFY simplify_type;
46
47    size_t *bootstrap; // bootstrap[i] points to random filter positions [0..real_len[
48
49    size_t *filterpos_2_seqpos;                     // filterpos -> sequencepos
50
51#if defined(ASSERTION_USED)
52    mutable bool checked_for_validity;
53#endif
54
55    void calc_filterpos_2_seqpos();
56
57    // ctor-helper-functions:
58    void init(size_t size);
59    void make_permeable(size_t size);
60    void init_from_string(const char *ifilter, const char *zerobases, size_t size);
61
62    size_t bootstrapped_filterpos(size_t bpos) const {
63        af_assert(does_bootstrap());
64        af_assert(bpos<real_len);
65        size_t fpos = bootstrap[bpos];
66        af_assert(fpos<real_len);
67        return fpos;
68    }
69
70public:
71    AP_filter(size_t size); // permeable filter (passes all columns)
72    AP_filter(const char *filter, const char *zerobases, size_t size);
73    AP_filter(const AP_filter& other);
74
75    AP_filter(AF_Not, const AP_filter& other);
76    AP_filter(const AP_filter& f1, AF_Combine comb, const AP_filter& f2);
77
78    ~AP_filter();
79    DECLARE_ASSIGNMENT_OPERATOR(AP_filter);
80
81    long get_timestamp() const { return update; }
82    size_t get_filtered_length() const { return real_len; }
83    size_t get_length() const { return filter_len; }
84
85    bool use_position(size_t pos) const {           // returns true if filter is set for position 'pos'
86        af_assert(checked_for_validity);
87        af_assert(pos<filter_len);
88        return filter_mask[pos];
89    }
90
91    const size_t *get_filterpos_2_seqpos() const {
92        if (!filterpos_2_seqpos) {
93            // this is no modification, it's lazy initialization:
94            const_cast<AP_filter*>(this)->calc_filterpos_2_seqpos();
95        }
96        return filterpos_2_seqpos;
97    }
98
99    void enable_simplify(AWT_FILTER_SIMPLIFY type); // default is AWT_FILTER_SIMPLIFY_NONE
100    const uchar *get_simplify_table() const {
101        if (simplify_type == AWT_FILTER_SIMPLIFY_NOT_INITIALIZED) {
102            // this is no modification, it's lazy initialization:
103            const_cast<AP_filter*>(this)->enable_simplify(AWT_FILTER_SIMPLIFY_NONE);
104        }
105        return simplify;
106    }
107
108    void enable_bootstrap();
109    bool does_bootstrap() const { return bootstrap; }
110
111    size_t bootstrapped_seqpos(size_t bpos) const {
112        size_t fpos   = bootstrapped_filterpos(bpos);
113        size_t spos = (get_filterpos_2_seqpos())[fpos];
114        af_assert(spos<filter_len);
115        return spos;
116    }
117
118    char *to_string() const;                        // convert to 0/1 string
119
120    char *blowup_string(const char *filtered_string, char insert) const;
121    char *filter_string(const char *fulllen_string) const;
122
123    GB_ERROR is_invalid() const {
124        /*! returns error
125         * - if filter is based on an empty alignment (i.e. no alignment is selected)
126         * - if all positions are filtered out (i.e. filtered sequences will be empty)
127         */
128
129#if defined(ASSERTION_USED)
130        checked_for_validity = true;
131#endif
132        if (get_filtered_length()) {
133            af_assert(get_filtered_length()<=get_length());
134            return NULp;
135        }
136        if (get_length()) return "Sequence completely filtered out (no columns left)";
137        return "No alignment selected";
138    }
139#if defined(ASSERTION_USED)
140    bool was_checked_for_validity() const { return checked_for_validity; }
141#endif
142};
143
144
145
146class AP_weights {
147    size_t    len;
148    GB_UINT4 *weights __attribute__((__aligned__(16)));
149
150public:
151
152    AP_weights(const AP_filter *fil); // dummy weights (all columns weighted equal)
153    AP_weights(const GB_UINT4 *w, size_t wlen, const AP_filter *fil);
154    AP_weights(const AP_weights& other);
155    ~AP_weights();
156    DECLARE_ASSIGNMENT_OPERATOR(AP_weights);
157
158    const GB_UINT4* get_weights() const {
159        return weights;
160    }
161
162    GB_UINT4 weight(size_t idx) const {
163        af_assert(idx<len);
164        return is_unweighted() ? 1 : weights[idx];
165    }
166
167    size_t length() const { return len; }
168    bool is_unweighted() const { return !weights; }
169};
170
171long AP_timer();
172
173#else
174#error AP_filter.hxx included twice
175#endif // AP_FILTER_HXX
Note: See TracBrowser for help on using the repository browser.