1 | // ================================================================ // |
---|
2 | // // |
---|
3 | // File : SequenceBuffer.h // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in December 2006 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // ================================================================ // |
---|
11 | #ifndef SEQUENCEBUFFER_H |
---|
12 | #define SEQUENCEBUFFER_H |
---|
13 | |
---|
14 | #ifndef TYPES_H |
---|
15 | #include "types.h" |
---|
16 | #endif |
---|
17 | #ifndef SMARTPTR_H |
---|
18 | #include <smartptr.h> |
---|
19 | #endif |
---|
20 | |
---|
21 | class CharCounter { |
---|
22 | size_t count[256]; |
---|
23 | size_t all; |
---|
24 | |
---|
25 | public: |
---|
26 | CharCounter() { clear(); } |
---|
27 | |
---|
28 | size_t getCount(unsigned char idx) const { return count[idx]; } |
---|
29 | size_t getCount() const { return all; } |
---|
30 | void countChars(const std::string& line); |
---|
31 | |
---|
32 | void clear(); |
---|
33 | }; |
---|
34 | |
---|
35 | |
---|
36 | enum Base { BC_A, BC_C, BC_G, BC_T, BC_OTHER, BC_ALL, BC_COUNTERS }; |
---|
37 | |
---|
38 | class BaseCounter { |
---|
39 | std::string source; // where does information originate from |
---|
40 | size_t count[BC_COUNTERS]; // number of occurrences of single bases |
---|
41 | |
---|
42 | SmartPtr<CharCounter> char_count; // character counter (used by addLine) |
---|
43 | |
---|
44 | void catchUpWithLineCounter() const; |
---|
45 | |
---|
46 | public: |
---|
47 | BaseCounter(const std::string& Source) |
---|
48 | : source(Source) |
---|
49 | { |
---|
50 | for (int i = 0; i<BC_COUNTERS; ++i) count[i] = 0; |
---|
51 | } |
---|
52 | |
---|
53 | void addCount(Base base, size_t amount) { count[base] += amount; } |
---|
54 | void checkOverallCounter() const; |
---|
55 | void calcOverallCounter(); |
---|
56 | |
---|
57 | void startLineCounter(); |
---|
58 | void addLine(const std::string& line) { |
---|
59 | gi_assert(!char_count.isNull()); // call startLineCounter before! |
---|
60 | char_count->countChars(line); |
---|
61 | } |
---|
62 | |
---|
63 | void expectEqual(const BaseCounter& other) const; |
---|
64 | size_t getCount(Base base) const { |
---|
65 | catchUpWithLineCounter(); |
---|
66 | return count[base]; |
---|
67 | } |
---|
68 | }; |
---|
69 | |
---|
70 | class SequenceBuffer : virtual Noncopyable { |
---|
71 | stringVector lines; // stores input lines |
---|
72 | BaseCounter baseCounter; |
---|
73 | mutable char *seq; |
---|
74 | |
---|
75 | public: |
---|
76 | SequenceBuffer(size_t expectedSize) : |
---|
77 | baseCounter("sequence data"), |
---|
78 | seq(NULp) |
---|
79 | { |
---|
80 | lines.reserve(expectedSize/60+1); // flatfiles use 60 bases per sequence line |
---|
81 | baseCounter.startLineCounter(); |
---|
82 | } |
---|
83 | ~SequenceBuffer(); |
---|
84 | |
---|
85 | void addLine(const std::string& line) { |
---|
86 | lines.push_back(line); |
---|
87 | baseCounter.addLine(line); |
---|
88 | } |
---|
89 | |
---|
90 | const BaseCounter& getBaseCounter() const { return baseCounter; } |
---|
91 | BaseCounter& getBaseCounter() { return baseCounter; } |
---|
92 | |
---|
93 | const char *getSequence() const; |
---|
94 | }; |
---|
95 | |
---|
96 | |
---|
97 | #else |
---|
98 | #error SequenceBuffer.h included twice |
---|
99 | #endif // SEQUENCEBUFFER_H |
---|
100 | |
---|