1 | // ================================================================ // |
---|
2 | // // |
---|
3 | // File : SequenceBuffer.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in December 2006 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // ================================================================ // |
---|
11 | #include "SequenceBuffer.h" |
---|
12 | |
---|
13 | using namespace std; |
---|
14 | |
---|
15 | void CharCounter::clear() { |
---|
16 | for (int i = 0; i<256; i++) count[i] = 0; |
---|
17 | all = 0; |
---|
18 | } |
---|
19 | |
---|
20 | void CharCounter::countChars(const string& line) { |
---|
21 | string::const_iterator e = line.end(); |
---|
22 | for (string::const_iterator i = line.begin(); i != e; ++i) { |
---|
23 | ++count[static_cast<unsigned char>(*i)]; |
---|
24 | ++all; |
---|
25 | } |
---|
26 | } |
---|
27 | |
---|
28 | // -------------------------------------------------------------------------------- |
---|
29 | |
---|
30 | void BaseCounter::calcOverallCounter() { |
---|
31 | gi_assert(char_count.isNull()); // may not be used for line counted BaseCounter |
---|
32 | gi_assert(count[BC_ALL] == 0); // already have a value for overall counter |
---|
33 | |
---|
34 | size_t all = 0; |
---|
35 | for (int i = 0; i<BC_ALL; ++i) all += count[i]; |
---|
36 | |
---|
37 | if (count[BC_ALL] == 0) { // got no value for all counter -> use calculated value |
---|
38 | count[BC_ALL] = all; |
---|
39 | } |
---|
40 | } |
---|
41 | |
---|
42 | void BaseCounter::checkOverallCounter() const { |
---|
43 | catchUpWithLineCounter(); |
---|
44 | |
---|
45 | size_t all = 0; |
---|
46 | |
---|
47 | for (int i = 0; i<BC_ALL; ++i) all += count[i]; |
---|
48 | |
---|
49 | gi_assert(count[BC_ALL]>0); // forgot to call calcOverallCounter ? |
---|
50 | if (count[BC_ALL] != all) { |
---|
51 | throw GBS_global_string("Overall bp (=%zu) does not match sum (=%zu) of single bases (Occurrence: '%s')", |
---|
52 | count[BC_ALL], all, source.c_str()); |
---|
53 | } |
---|
54 | } |
---|
55 | |
---|
56 | void BaseCounter::catchUpWithLineCounter() const { |
---|
57 | if (!char_count.isNull()) { |
---|
58 | CharCounter& cc = const_cast<CharCounter&>(*char_count); |
---|
59 | size_t all = cc.getCount(); // all bases |
---|
60 | |
---|
61 | if (all>0) { // anything counted ? |
---|
62 | size_t normal = 0; // normal bases |
---|
63 | size_t *mucount = const_cast<size_t*>(count); |
---|
64 | |
---|
65 | static const unsigned char normalChars[] = "aAcCgGtTuU"; |
---|
66 | static Base normalBase[] = { BC_A, BC_A, BC_C, BC_C, BC_G, BC_G, BC_T, BC_T, BC_T, BC_T }; |
---|
67 | |
---|
68 | // count standard bases |
---|
69 | for (unsigned i = 0; normalChars[i]; ++i) { |
---|
70 | size_t bp = cc.getCount(normalChars[i]); |
---|
71 | mucount[normalBase[i]] += bp; |
---|
72 | normal += bp; |
---|
73 | } |
---|
74 | |
---|
75 | mucount[BC_ALL] += all; |
---|
76 | mucount[BC_OTHER] += all-normal; |
---|
77 | |
---|
78 | cc.clear(); // reset counter |
---|
79 | } |
---|
80 | } |
---|
81 | } |
---|
82 | |
---|
83 | void BaseCounter::startLineCounter() { |
---|
84 | gi_assert(char_count.isNull()); |
---|
85 | char_count = new CharCounter; |
---|
86 | } |
---|
87 | |
---|
88 | |
---|
89 | void BaseCounter::expectEqual(const BaseCounter& other) const { |
---|
90 | // expect counters to be equal or throw error |
---|
91 | |
---|
92 | catchUpWithLineCounter(); |
---|
93 | other.catchUpWithLineCounter(); |
---|
94 | |
---|
95 | checkOverallCounter(); |
---|
96 | other.checkOverallCounter(); |
---|
97 | |
---|
98 | for (int i = 0; i<BC_COUNTERS; ++i) { |
---|
99 | if (count[i] != other.count[i]) { // mismatch in counter |
---|
100 | string error = "Base counter mismatch"; |
---|
101 | error = error+"\n "+source+"<>"+other.source; |
---|
102 | |
---|
103 | for (; i<BC_COUNTERS; ++i) { |
---|
104 | if (count[i] != other.count[i]) { // mismatch in counter |
---|
105 | string whichCounter; |
---|
106 | |
---|
107 | if (i == BC_OTHER) whichCounter = "other"; |
---|
108 | else if (i == BC_ALL) whichCounter = "overall"; |
---|
109 | else whichCounter = "ACGT"[i]; |
---|
110 | |
---|
111 | error = error+"\n "+whichCounter+": "+GBS_global_string("%zu <> %zu", count[i], other.count[i]); |
---|
112 | } |
---|
113 | } |
---|
114 | |
---|
115 | throw error; |
---|
116 | } |
---|
117 | } |
---|
118 | } |
---|
119 | |
---|
120 | // -------------------------------------------------------------------------------- |
---|
121 | |
---|
122 | SequenceBuffer::~SequenceBuffer() { |
---|
123 | delete [] seq; |
---|
124 | } |
---|
125 | |
---|
126 | const char *SequenceBuffer::getSequence() const { |
---|
127 | gi_assert(!seq); // don't call twice! |
---|
128 | if (!seq) { |
---|
129 | size_t len = baseCounter.getCount(BC_ALL); |
---|
130 | seq = new char[len+1]; |
---|
131 | |
---|
132 | char *sp = seq; |
---|
133 | stringVectorCIter e = lines.end(); |
---|
134 | |
---|
135 | for (stringVectorCIter i = lines.begin(); i != e; ++i) { |
---|
136 | size_t ilen = i->length(); |
---|
137 | memcpy(sp, i->c_str(), ilen); |
---|
138 | sp += ilen; |
---|
139 | } |
---|
140 | |
---|
141 | #if defined(DEBUG) |
---|
142 | size_t stored = sp-seq; |
---|
143 | gi_assert(stored == len); |
---|
144 | #endif // DEBUG |
---|
145 | |
---|
146 | seq[len] = 0; |
---|
147 | } |
---|
148 | return seq; |
---|
149 | } |
---|
150 | |
---|
151 | |
---|