| 1 | // ================================================================ // |
|---|
| 2 | // // |
|---|
| 3 | // File : SequenceBuffer.h // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in December 2006 // |
|---|
| 7 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 8 | // http://www.arb-home.de/ // |
|---|
| 9 | // // |
|---|
| 10 | // ================================================================ // |
|---|
| 11 | #ifndef SEQUENCEBUFFER_H |
|---|
| 12 | #define SEQUENCEBUFFER_H |
|---|
| 13 | |
|---|
| 14 | #ifndef TYPES_H |
|---|
| 15 | #include "types.h" |
|---|
| 16 | #endif |
|---|
| 17 | #ifndef SMARTPTR_H |
|---|
| 18 | #include <smartptr.h> |
|---|
| 19 | #endif |
|---|
| 20 | |
|---|
| 21 | class CharCounter { |
|---|
| 22 | size_t count[256]; |
|---|
| 23 | size_t all; |
|---|
| 24 | |
|---|
| 25 | public: |
|---|
| 26 | CharCounter() { clear(); } |
|---|
| 27 | |
|---|
| 28 | size_t getCount(unsigned char idx) const { return count[idx]; } |
|---|
| 29 | size_t getCount() const { return all; } |
|---|
| 30 | void countChars(const std::string& line); |
|---|
| 31 | |
|---|
| 32 | void clear(); |
|---|
| 33 | }; |
|---|
| 34 | |
|---|
| 35 | |
|---|
| 36 | enum Base { BC_A, BC_C, BC_G, BC_T, BC_OTHER, BC_ALL, BC_COUNTERS }; |
|---|
| 37 | |
|---|
| 38 | class BaseCounter { |
|---|
| 39 | std::string source; // where does information originate from |
|---|
| 40 | size_t count[BC_COUNTERS]; // number of occurrences of single bases |
|---|
| 41 | |
|---|
| 42 | SmartPtr<CharCounter> char_count; // character counter (used by addLine) |
|---|
| 43 | |
|---|
| 44 | void catchUpWithLineCounter() const; |
|---|
| 45 | |
|---|
| 46 | public: |
|---|
| 47 | BaseCounter(const std::string& Source) |
|---|
| 48 | : source(Source) |
|---|
| 49 | { |
|---|
| 50 | for (int i = 0; i<BC_COUNTERS; ++i) count[i] = 0; |
|---|
| 51 | } |
|---|
| 52 | |
|---|
| 53 | void addCount(Base base, size_t amount) { count[base] += amount; } |
|---|
| 54 | void checkOverallCounter() const; |
|---|
| 55 | void calcOverallCounter(); |
|---|
| 56 | |
|---|
| 57 | void startLineCounter(); |
|---|
| 58 | void addLine(const std::string& line) { |
|---|
| 59 | gi_assert(!char_count.isNull()); // call startLineCounter before! |
|---|
| 60 | char_count->countChars(line); |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | void expectEqual(const BaseCounter& other) const; |
|---|
| 64 | size_t getCount(Base base) const { |
|---|
| 65 | catchUpWithLineCounter(); |
|---|
| 66 | return count[base]; |
|---|
| 67 | } |
|---|
| 68 | }; |
|---|
| 69 | |
|---|
| 70 | class SequenceBuffer : virtual Noncopyable { |
|---|
| 71 | stringVector lines; // stores input lines |
|---|
| 72 | BaseCounter baseCounter; |
|---|
| 73 | mutable char *seq; |
|---|
| 74 | |
|---|
| 75 | public: |
|---|
| 76 | SequenceBuffer(size_t expectedSize) : |
|---|
| 77 | baseCounter("sequence data"), |
|---|
| 78 | seq(NULp) |
|---|
| 79 | { |
|---|
| 80 | lines.reserve(expectedSize/60+1); // flatfiles use 60 bases per sequence line |
|---|
| 81 | baseCounter.startLineCounter(); |
|---|
| 82 | } |
|---|
| 83 | ~SequenceBuffer(); |
|---|
| 84 | |
|---|
| 85 | void addLine(const std::string& line) { |
|---|
| 86 | lines.push_back(line); |
|---|
| 87 | baseCounter.addLine(line); |
|---|
| 88 | } |
|---|
| 89 | |
|---|
| 90 | const BaseCounter& getBaseCounter() const { return baseCounter; } |
|---|
| 91 | BaseCounter& getBaseCounter() { return baseCounter; } |
|---|
| 92 | |
|---|
| 93 | const char *getSequence() const; |
|---|
| 94 | }; |
|---|
| 95 | |
|---|
| 96 | |
|---|
| 97 | #else |
|---|
| 98 | #error SequenceBuffer.h included twice |
|---|
| 99 | #endif // SEQUENCEBUFFER_H |
|---|
| 100 | |
|---|