| 1 | // ================================================================ // |
|---|
| 2 | // // |
|---|
| 3 | // File : SequenceBuffer.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in December 2006 // |
|---|
| 7 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 8 | // http://www.arb-home.de/ // |
|---|
| 9 | // // |
|---|
| 10 | // ================================================================ // |
|---|
| 11 | #include "SequenceBuffer.h" |
|---|
| 12 | |
|---|
| 13 | using namespace std; |
|---|
| 14 | |
|---|
| 15 | void CharCounter::clear() { |
|---|
| 16 | for (int i = 0; i<256; i++) count[i] = 0; |
|---|
| 17 | all = 0; |
|---|
| 18 | } |
|---|
| 19 | |
|---|
| 20 | void CharCounter::countChars(const string& line) { |
|---|
| 21 | string::const_iterator e = line.end(); |
|---|
| 22 | for (string::const_iterator i = line.begin(); i != e; ++i) { |
|---|
| 23 | ++count[static_cast<unsigned char>(*i)]; |
|---|
| 24 | ++all; |
|---|
| 25 | } |
|---|
| 26 | } |
|---|
| 27 | |
|---|
| 28 | // -------------------------------------------------------------------------------- |
|---|
| 29 | |
|---|
| 30 | void BaseCounter::calcOverallCounter() { |
|---|
| 31 | gi_assert(char_count.isNull()); // may not be used for line counted BaseCounter |
|---|
| 32 | gi_assert(count[BC_ALL] == 0); // already have a value for overall counter |
|---|
| 33 | |
|---|
| 34 | size_t all = 0; |
|---|
| 35 | for (int i = 0; i<BC_ALL; ++i) all += count[i]; // LOOP_VECTORIZED[!<810] |
|---|
| 36 | |
|---|
| 37 | if (count[BC_ALL] == 0) { // got no value for all counter -> use calculated value |
|---|
| 38 | count[BC_ALL] = all; |
|---|
| 39 | } |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | void BaseCounter::checkOverallCounter() const { |
|---|
| 43 | catchUpWithLineCounter(); |
|---|
| 44 | |
|---|
| 45 | size_t all = 0; |
|---|
| 46 | |
|---|
| 47 | for (int i = 0; i<BC_ALL; ++i) all += count[i]; // LOOP_VECTORIZED[!<810] |
|---|
| 48 | |
|---|
| 49 | gi_assert(count[BC_ALL]>0); // forgot to call calcOverallCounter ? |
|---|
| 50 | if (count[BC_ALL] != all) { |
|---|
| 51 | throw GBS_global_string("Overall bp (=%zu) does not match sum (=%zu) of single bases (Occurrence: '%s')", |
|---|
| 52 | count[BC_ALL], all, source.c_str()); |
|---|
| 53 | } |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | void BaseCounter::catchUpWithLineCounter() const { |
|---|
| 57 | if (!char_count.isNull()) { |
|---|
| 58 | CharCounter& cc = const_cast<CharCounter&>(*char_count); |
|---|
| 59 | size_t all = cc.getCount(); // all bases |
|---|
| 60 | |
|---|
| 61 | if (all>0) { // anything counted ? |
|---|
| 62 | size_t normal = 0; // normal bases |
|---|
| 63 | size_t *mucount = const_cast<size_t*>(count); |
|---|
| 64 | |
|---|
| 65 | static const unsigned char normalChars[] = "aAcCgGtTuU"; |
|---|
| 66 | static Base normalBase[] = { BC_A, BC_A, BC_C, BC_C, BC_G, BC_G, BC_T, BC_T, BC_T, BC_T }; |
|---|
| 67 | |
|---|
| 68 | // count standard bases |
|---|
| 69 | for (unsigned i = 0; normalChars[i]; ++i) { |
|---|
| 70 | size_t bp = cc.getCount(normalChars[i]); |
|---|
| 71 | mucount[normalBase[i]] += bp; |
|---|
| 72 | normal += bp; |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | mucount[BC_ALL] += all; |
|---|
| 76 | mucount[BC_OTHER] += all-normal; |
|---|
| 77 | |
|---|
| 78 | cc.clear(); // reset counter |
|---|
| 79 | } |
|---|
| 80 | } |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | void BaseCounter::startLineCounter() { |
|---|
| 84 | gi_assert(char_count.isNull()); |
|---|
| 85 | char_count = new CharCounter; |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | |
|---|
| 89 | void BaseCounter::expectEqual(const BaseCounter& other) const { |
|---|
| 90 | // expect counters to be equal or throw error |
|---|
| 91 | |
|---|
| 92 | catchUpWithLineCounter(); |
|---|
| 93 | other.catchUpWithLineCounter(); |
|---|
| 94 | |
|---|
| 95 | checkOverallCounter(); |
|---|
| 96 | other.checkOverallCounter(); |
|---|
| 97 | |
|---|
| 98 | for (int i = 0; i<BC_COUNTERS; ++i) { |
|---|
| 99 | if (count[i] != other.count[i]) { // mismatch in counter |
|---|
| 100 | string error = "Base counter mismatch"; |
|---|
| 101 | error = error+"\n "+source+"<>"+other.source; |
|---|
| 102 | |
|---|
| 103 | for (; i<BC_COUNTERS; ++i) { |
|---|
| 104 | if (count[i] != other.count[i]) { // mismatch in counter |
|---|
| 105 | string whichCounter; |
|---|
| 106 | |
|---|
| 107 | if (i == BC_OTHER) whichCounter = "other"; |
|---|
| 108 | else if (i == BC_ALL) whichCounter = "overall"; |
|---|
| 109 | else whichCounter = "ACGT"[i]; |
|---|
| 110 | |
|---|
| 111 | error = error+"\n "+whichCounter+": "+GBS_global_string("%zu <> %zu", count[i], other.count[i]); |
|---|
| 112 | } |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | throw error; |
|---|
| 116 | } |
|---|
| 117 | } |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | // -------------------------------------------------------------------------------- |
|---|
| 121 | |
|---|
| 122 | SequenceBuffer::~SequenceBuffer() { |
|---|
| 123 | delete [] seq; |
|---|
| 124 | } |
|---|
| 125 | |
|---|
| 126 | const char *SequenceBuffer::getSequence() const { |
|---|
| 127 | gi_assert(!seq); // don't call twice! |
|---|
| 128 | if (!seq) { |
|---|
| 129 | size_t len = baseCounter.getCount(BC_ALL); |
|---|
| 130 | seq = new char[len+1]; |
|---|
| 131 | |
|---|
| 132 | char *sp = seq; |
|---|
| 133 | stringVectorCIter e = lines.end(); |
|---|
| 134 | |
|---|
| 135 | for (stringVectorCIter i = lines.begin(); i != e; ++i) { |
|---|
| 136 | size_t ilen = i->length(); |
|---|
| 137 | memcpy(sp, i->c_str(), ilen); |
|---|
| 138 | sp += ilen; |
|---|
| 139 | } |
|---|
| 140 | |
|---|
| 141 | #if defined(DEBUG) |
|---|
| 142 | size_t stored = sp-seq; |
|---|
| 143 | gi_assert(stored == len); |
|---|
| 144 | #endif // DEBUG |
|---|
| 145 | |
|---|
| 146 | seq[len] = 0; |
|---|
| 147 | } |
|---|
| 148 | return seq; |
|---|
| 149 | } |
|---|
| 150 | |
|---|
| 151 | |
|---|