1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : insdel.cxx // |
---|
4 | // Purpose : insert/delete columns // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | // AISC_MKPT_PROMOTE:#ifndef ARBDB_BASE_H |
---|
12 | // AISC_MKPT_PROMOTE:#include <arbdb_base.h> |
---|
13 | // AISC_MKPT_PROMOTE:#endif |
---|
14 | |
---|
15 | #include "insdel.h" |
---|
16 | #include <RangeList.h> |
---|
17 | |
---|
18 | #include <arbdbt.h> |
---|
19 | #include <adGene.h> |
---|
20 | #include <arb_progress.h> |
---|
21 | #include <arb_defs.h> |
---|
22 | #include <arb_diff.h> |
---|
23 | #include <algorithm> |
---|
24 | |
---|
25 | using namespace std; |
---|
26 | |
---|
27 | #define id_assert(cond) arb_assert(cond) |
---|
28 | |
---|
29 | // -------------------------------------------------------------------------------- |
---|
30 | // helper to hold any kind of unit (char, int, float) |
---|
31 | |
---|
32 | class UnitPtr { |
---|
33 | const void *ptr; |
---|
34 | public: |
---|
35 | UnitPtr() : ptr(NULL) {} |
---|
36 | UnitPtr(const void *ptr_) |
---|
37 | : ptr(ptr_) |
---|
38 | { |
---|
39 | id_assert(ptr); |
---|
40 | } |
---|
41 | |
---|
42 | void set_pointer(const void *ptr_) { |
---|
43 | id_assert(!ptr); |
---|
44 | ptr = ptr_; |
---|
45 | } |
---|
46 | const void *get_pointer() const { return ptr; } |
---|
47 | const void *expect_pointer() const { id_assert(ptr); return ptr; } |
---|
48 | }; |
---|
49 | struct UnitPair { |
---|
50 | UnitPtr left, right; |
---|
51 | }; |
---|
52 | |
---|
53 | template <typename T> |
---|
54 | inline int compare_type(const T& t1, const T& t2) { |
---|
55 | return t1<t2 ? -1 : (t1>t2 ? 1 : 0); |
---|
56 | } |
---|
57 | |
---|
58 | // -------------------------------------------------------------------------------- |
---|
59 | |
---|
60 | class AliData; |
---|
61 | typedef SmartPtr<AliData> AliDataPtr; |
---|
62 | |
---|
63 | // -------------------------------------------------------------------------------- |
---|
64 | |
---|
65 | class AliData { |
---|
66 | size_t size; |
---|
67 | static GB_ERROR op_error; |
---|
68 | |
---|
69 | public: |
---|
70 | AliData(size_t size_) : size(size_) {} |
---|
71 | virtual ~AliData() {} |
---|
72 | |
---|
73 | virtual size_t unitsize() const = 0; |
---|
74 | virtual bool has_slice() const = 0; |
---|
75 | |
---|
76 | enum memop { |
---|
77 | COPY_TO, // always returns 0 |
---|
78 | COMPARE_WITH, // returns compare value |
---|
79 | CHECK_DELETE, // return 0 if ok to delete, otherwise op_error is set |
---|
80 | }; |
---|
81 | |
---|
82 | void clear_error() const { op_error = NULL; } |
---|
83 | void set_error(GB_ERROR error) const { |
---|
84 | id_assert(error); |
---|
85 | id_assert(!op_error); |
---|
86 | op_error = error; |
---|
87 | } |
---|
88 | |
---|
89 | virtual int operate_on_mem(void *mem, size_t start, size_t count, memop op) const = 0; |
---|
90 | virtual int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const = 0; |
---|
91 | |
---|
92 | void copyPartTo(void *mem, size_t start, size_t count) const { operate_on_mem(mem, start, count, COPY_TO); } |
---|
93 | int cmpPartWith(const void *mem, size_t start, size_t count) const { |
---|
94 | id_assert(is_valid_part(start, count)); |
---|
95 | return operate_on_mem(const_cast<void*>(mem), start, count, COMPARE_WITH); // COMPARE_WITH does not modify |
---|
96 | } |
---|
97 | GB_ERROR check_delete_allowed(size_t start, size_t count) const { |
---|
98 | op_error = NULL; |
---|
99 | id_assert(start <= size); |
---|
100 | IF_ASSERTION_USED(int forbidden =) operate_on_mem(NULL, start, std::min(count, size-start), CHECK_DELETE); |
---|
101 | id_assert(correlated(forbidden, op_error)); |
---|
102 | return op_error; |
---|
103 | } |
---|
104 | |
---|
105 | virtual UnitPtr unit_left_of(size_t pos) const = 0; |
---|
106 | virtual UnitPtr unit_right_of(size_t pos) const = 0; |
---|
107 | |
---|
108 | virtual AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const = 0; |
---|
109 | virtual AliDataPtr slice_down(size_t start, size_t count) const = 0; |
---|
110 | |
---|
111 | size_t elems() const { return size; } |
---|
112 | size_t memsize() const { return unitsize()*elems(); } |
---|
113 | void copyTo(void *mem) const { copyPartTo(mem, 0, elems()); } |
---|
114 | bool empty() const { return !elems(); } |
---|
115 | |
---|
116 | int cmp_whole_data(const AliData& other) const { |
---|
117 | int cmp = cmp_data(0, other, 0, std::min(elems(), other.elems())); |
---|
118 | if (cmp == 0) { // prefixes are equal |
---|
119 | return compare_type(elems(), other.elems()); |
---|
120 | } |
---|
121 | return cmp; |
---|
122 | } |
---|
123 | |
---|
124 | bool equals(const AliData& other) const { |
---|
125 | if (&other == this) return true; |
---|
126 | if (elems() != other.elems()) return false; |
---|
127 | |
---|
128 | return cmp_whole_data(other) == 0; |
---|
129 | } |
---|
130 | bool differs_from(const AliData& other) const { return !equals(other); } |
---|
131 | |
---|
132 | bool is_valid_pos(size_t pos) const { return pos < elems(); } |
---|
133 | bool is_valid_between(size_t pos) const { return pos <= elems(); } // pos == 0 -> before first base; pos == elems() -> after last base |
---|
134 | |
---|
135 | bool is_valid_part(size_t start, size_t count) const { |
---|
136 | return is_valid_between(start) && is_valid_between(start+count); |
---|
137 | } |
---|
138 | }; |
---|
139 | |
---|
140 | GB_ERROR AliData::op_error = NULL; |
---|
141 | |
---|
142 | // -------------------------------------------------------------------------------- |
---|
143 | |
---|
144 | class AliDataSlice : public AliData { |
---|
145 | AliDataPtr from; |
---|
146 | size_t offset; |
---|
147 | |
---|
148 | static int fix_amount(AliDataPtr from, size_t offset, size_t amount) { |
---|
149 | if (amount) { |
---|
150 | size_t from_size = from->elems(); |
---|
151 | if (offset>from_size) { |
---|
152 | amount = 0; |
---|
153 | } |
---|
154 | else { |
---|
155 | size_t last_pos = offset+amount-1; |
---|
156 | size_t last_from = from->elems()-1; |
---|
157 | |
---|
158 | if (last_pos > last_from) { |
---|
159 | id_assert(last_from >= offset); |
---|
160 | amount = last_from-offset+1; |
---|
161 | } |
---|
162 | } |
---|
163 | } |
---|
164 | return amount; |
---|
165 | } |
---|
166 | |
---|
167 | AliDataSlice(AliDataPtr from_, size_t offset_, size_t amount_) |
---|
168 | : AliData(fix_amount(from_, offset_, amount_)), |
---|
169 | from(from_), |
---|
170 | offset(offset_) |
---|
171 | { |
---|
172 | id_assert(!from->has_slice()); // do not double-slice |
---|
173 | } |
---|
174 | |
---|
175 | public: |
---|
176 | static AliDataPtr make(AliDataPtr from, size_t offset, size_t amount) { |
---|
177 | return (offset == 0 && amount >= from->elems()) |
---|
178 | ? from |
---|
179 | : (from->has_slice() |
---|
180 | ? from->slice_down(offset, amount) |
---|
181 | : new AliDataSlice(from, offset, amount)); |
---|
182 | } |
---|
183 | |
---|
184 | size_t unitsize() const OVERRIDE { return from->unitsize(); } |
---|
185 | bool has_slice() const OVERRIDE { return true; } |
---|
186 | |
---|
187 | AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE { |
---|
188 | return from->create_gap(gapsize, gapinfo); |
---|
189 | } |
---|
190 | AliDataPtr slice_down(size_t start, size_t count) const OVERRIDE { |
---|
191 | return new AliDataSlice(from, offset+start, std::min(count, elems())); |
---|
192 | } |
---|
193 | int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE { |
---|
194 | id_assert(is_valid_part(start, count)); |
---|
195 | return from->operate_on_mem(mem, start+offset, count, op); |
---|
196 | } |
---|
197 | UnitPtr unit_left_of(size_t pos) const OVERRIDE { |
---|
198 | id_assert(is_valid_between(pos)); |
---|
199 | return from->unit_left_of(pos+offset); |
---|
200 | } |
---|
201 | UnitPtr unit_right_of(size_t pos) const OVERRIDE { |
---|
202 | id_assert(is_valid_between(pos)); |
---|
203 | return from->unit_right_of(pos+offset); |
---|
204 | } |
---|
205 | int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE { |
---|
206 | id_assert(is_valid_part(start, count)); |
---|
207 | id_assert(other.is_valid_part(ostart, count)); |
---|
208 | |
---|
209 | return from->cmp_data(start+offset, other, ostart, count); |
---|
210 | } |
---|
211 | }; |
---|
212 | |
---|
213 | class ComposedAliData : public AliData { |
---|
214 | AliDataPtr left, right; |
---|
215 | bool hasSlice; |
---|
216 | |
---|
217 | ComposedAliData(AliDataPtr l, AliDataPtr r) |
---|
218 | : AliData(l->elems()+r->elems()), |
---|
219 | left(l), |
---|
220 | right(r), |
---|
221 | hasSlice(left->has_slice() || right->has_slice()) |
---|
222 | { |
---|
223 | id_assert(l->unitsize() == r->unitsize()); |
---|
224 | id_assert(l->elems()); |
---|
225 | id_assert(r->elems()); |
---|
226 | } |
---|
227 | friend AliDataPtr concat(AliDataPtr left, AliDataPtr right); // for above ctor |
---|
228 | |
---|
229 | void *inc_by_units(void *mem, size_t units) const { return reinterpret_cast<char*>(mem)+units*unitsize(); } |
---|
230 | |
---|
231 | public: |
---|
232 | size_t unitsize() const OVERRIDE { return left->unitsize(); } |
---|
233 | bool has_slice() const OVERRIDE { return hasSlice; } |
---|
234 | |
---|
235 | AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE { |
---|
236 | return left->create_gap(gapsize, gapinfo); |
---|
237 | } |
---|
238 | AliDataPtr slice_down(size_t start, size_t count) const OVERRIDE { |
---|
239 | size_t left_elems = left->elems(); |
---|
240 | |
---|
241 | if (left_elems <= start) { // left is before slice |
---|
242 | return AliDataSlice::make(right, start-left_elems, count); |
---|
243 | } |
---|
244 | |
---|
245 | size_t pos_behind = start+count; |
---|
246 | if (left_elems >= pos_behind) { // right is behind slice |
---|
247 | return AliDataSlice::make(left, start, min(count, left_elems)); |
---|
248 | } |
---|
249 | |
---|
250 | size_t take_left = left_elems-start; |
---|
251 | size_t take_right = count-take_left; |
---|
252 | |
---|
253 | return new ComposedAliData( |
---|
254 | AliDataSlice::make(left, start, take_left), |
---|
255 | AliDataSlice::make(right, 0, take_right) |
---|
256 | ); |
---|
257 | } |
---|
258 | int operate_on_mem(void *mem, size_t start, size_t count, memop op) const OVERRIDE { |
---|
259 | size_t left_elems = left->elems(); |
---|
260 | size_t take_left = 0; |
---|
261 | int res = 0; |
---|
262 | if (start<left_elems) { |
---|
263 | take_left = min(count, left_elems-start); |
---|
264 | res = left->operate_on_mem(mem, start, take_left, op); |
---|
265 | } |
---|
266 | |
---|
267 | if (res == 0) { |
---|
268 | size_t take_right = count-take_left; |
---|
269 | if (take_right) { |
---|
270 | size_t rstart = start>left_elems ? start-left_elems : 0; |
---|
271 | id_assert(right->is_valid_part(rstart, take_right)); |
---|
272 | res = right->operate_on_mem(inc_by_units(mem, take_left), rstart, take_right, op); |
---|
273 | } |
---|
274 | } |
---|
275 | return res; |
---|
276 | } |
---|
277 | int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE { |
---|
278 | size_t left_elems = left->elems(); |
---|
279 | size_t take_left = 0; |
---|
280 | int cmp = 0; |
---|
281 | if (start<left_elems) { |
---|
282 | take_left = min(count, left_elems-start); |
---|
283 | cmp = left->cmp_data(start, other, ostart, take_left); |
---|
284 | } |
---|
285 | |
---|
286 | if (cmp == 0) { |
---|
287 | size_t take_right = count-take_left; |
---|
288 | if (take_right) { |
---|
289 | size_t rstart = start>left_elems ? start-left_elems : 0; |
---|
290 | size_t rostart = ostart+take_left; |
---|
291 | |
---|
292 | id_assert(is_valid_part(rstart, take_right)); |
---|
293 | id_assert(other.is_valid_part(rostart, take_right)); |
---|
294 | |
---|
295 | cmp = right->cmp_data(rstart, other, rostart, take_right); |
---|
296 | } |
---|
297 | } |
---|
298 | return cmp; |
---|
299 | } |
---|
300 | |
---|
301 | UnitPtr unit_left_of(size_t pos) const OVERRIDE { |
---|
302 | id_assert(is_valid_between(pos)); |
---|
303 | if (left->elems() == pos) { // split between left and right |
---|
304 | id_assert(pos >= 1); |
---|
305 | return left->unit_right_of(pos-1); |
---|
306 | } |
---|
307 | else if (left->elems() < pos) { // split inside or behind 'right' |
---|
308 | return right->unit_left_of(pos-left->elems()); |
---|
309 | } |
---|
310 | else { // split inside or frontof 'left' |
---|
311 | return left->unit_left_of(pos); |
---|
312 | } |
---|
313 | } |
---|
314 | UnitPtr unit_right_of(size_t pos) const OVERRIDE { |
---|
315 | id_assert(is_valid_between(pos)); |
---|
316 | if (left->elems() == pos) { // split between left and right |
---|
317 | id_assert(pos >= 1); |
---|
318 | return right->unit_left_of(0); |
---|
319 | } |
---|
320 | else if (left->elems() < pos) { // split inside or behind 'right' |
---|
321 | return right->unit_right_of(pos-left->elems()); |
---|
322 | } |
---|
323 | else { // split inside or frontof 'left' |
---|
324 | return left->unit_right_of(pos); |
---|
325 | } |
---|
326 | } |
---|
327 | }; |
---|
328 | |
---|
329 | // -------------------------------------------------------------------------------- |
---|
330 | |
---|
331 | class Deletable { // define characters allowed to delete (only applicable to TypedAliData<char>) |
---|
332 | bool deletable[256]; |
---|
333 | |
---|
334 | void init(bool val) { |
---|
335 | for (int i = 0; i<256; ++i) { |
---|
336 | deletable[i] = val; |
---|
337 | } |
---|
338 | } |
---|
339 | |
---|
340 | public: |
---|
341 | enum DeleteWhat { NOTHING, ANYTHING }; |
---|
342 | explicit Deletable(DeleteWhat what) { |
---|
343 | switch (what) { |
---|
344 | case ANYTHING: init(true); break; |
---|
345 | case NOTHING: init(false); break; |
---|
346 | } |
---|
347 | } |
---|
348 | explicit Deletable(const char *allowed) { |
---|
349 | init(false); |
---|
350 | for (int i = 0; allowed[i]; ++i) { |
---|
351 | deletable[safeCharIndex(allowed[i])] = true; |
---|
352 | } |
---|
353 | } |
---|
354 | |
---|
355 | GB_ERROR get_delete_error(const char *data, size_t start, size_t count) const { |
---|
356 | GB_ERROR error = NULL; |
---|
357 | id_assert(count > 0); |
---|
358 | size_t end = start+count-1; |
---|
359 | for (size_t col = start; col <= end && !error; ++col) { |
---|
360 | if (!deletable[safeCharIndex(data[col])]) { |
---|
361 | error = GBS_global_string("You tried to delete '%c' at position %zu -> Operation aborted", data[col], col); |
---|
362 | } |
---|
363 | } |
---|
364 | return error; |
---|
365 | } |
---|
366 | }; |
---|
367 | |
---|
368 | // -------------------------------------------------------------------------------- |
---|
369 | |
---|
370 | template<typename T> |
---|
371 | class TypedAliData : public AliData { |
---|
372 | T gap; |
---|
373 | |
---|
374 | protected: |
---|
375 | static const T *typed_ptr(const UnitPtr& uptr) { return (const T*)uptr.get_pointer(); } |
---|
376 | const T* std_gap_ptr() const { return ⪆ } |
---|
377 | |
---|
378 | public: |
---|
379 | TypedAliData(size_t size_, T gap_) |
---|
380 | : AliData(size_), |
---|
381 | gap(gap_) |
---|
382 | {} |
---|
383 | |
---|
384 | const T& std_gap() const { return gap; } |
---|
385 | |
---|
386 | size_t unitsize() const OVERRIDE { return sizeof(T); } |
---|
387 | bool has_slice() const OVERRIDE { return false; } |
---|
388 | |
---|
389 | virtual UnitPtr at_ptr(size_t pos) const = 0; |
---|
390 | AliDataPtr create_gap(size_t gapsize, const UnitPair& /*gapinfo*/) const OVERRIDE; |
---|
391 | __ATTR__NORETURN AliDataPtr slice_down(size_t /*start*/, size_t /*count*/) const OVERRIDE { |
---|
392 | GBK_terminate("logic error: slice_down called for explicit TypedAliData"); |
---|
393 | } |
---|
394 | UnitPtr unit_left_of(size_t pos) const OVERRIDE { |
---|
395 | id_assert(is_valid_between(pos)); |
---|
396 | return at_ptr(pos-1); |
---|
397 | } |
---|
398 | UnitPtr unit_right_of(size_t pos) const OVERRIDE { |
---|
399 | id_assert(is_valid_between(pos)); |
---|
400 | return at_ptr(pos); |
---|
401 | } |
---|
402 | }; |
---|
403 | |
---|
404 | template<typename T> |
---|
405 | struct SpecificGap : public TypedAliData<T> { |
---|
406 | typedef TypedAliData<T> BaseType; |
---|
407 | |
---|
408 | SpecificGap(size_t gapsize, const T& gap_) |
---|
409 | : BaseType(gapsize, gap_) |
---|
410 | {} |
---|
411 | int operate_on_mem(void *mem, size_t IF_ASSERTION_USED(start), size_t count, AliData::memop op) const OVERRIDE { |
---|
412 | id_assert(BaseType::is_valid_part(start, count)); |
---|
413 | switch (op) { |
---|
414 | case AliData::COPY_TO: { |
---|
415 | T *typedMem = (T*)mem; |
---|
416 | for (size_t a = 0; a<count; ++a) { |
---|
417 | typedMem[a] = BaseType::std_gap(); |
---|
418 | } |
---|
419 | break; |
---|
420 | } |
---|
421 | case AliData::COMPARE_WITH: { |
---|
422 | const T *typedMem = (const T*)mem; |
---|
423 | for (size_t a = 0; a<count; ++a) { |
---|
424 | int cmp = compare_type(BaseType::std_gap(), typedMem[a]); |
---|
425 | if (cmp) return cmp; |
---|
426 | } |
---|
427 | break; |
---|
428 | } |
---|
429 | case AliData::CHECK_DELETE: { |
---|
430 | break; // deleting an inserted gap is always permitted |
---|
431 | } |
---|
432 | } |
---|
433 | return 0; |
---|
434 | } |
---|
435 | int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE { |
---|
436 | const SpecificGap<T> *other_is_gap = dynamic_cast<const SpecificGap<T>*>(&other); |
---|
437 | if (other_is_gap) { |
---|
438 | return compare_type(BaseType::std_gap(), other_is_gap->std_gap()); |
---|
439 | } |
---|
440 | return -other.cmp_data(ostart, *this, start, count); |
---|
441 | } |
---|
442 | UnitPtr at_ptr(size_t pos) const OVERRIDE { |
---|
443 | if (pos<BaseType::elems()) return UnitPtr(BaseType::std_gap_ptr()); |
---|
444 | return UnitPtr(); |
---|
445 | } |
---|
446 | }; |
---|
447 | |
---|
448 | template <typename T> |
---|
449 | AliDataPtr TypedAliData<T>::create_gap(size_t gapsize, const UnitPair& /*gapinfo*/) const { |
---|
450 | return new SpecificGap<T>(gapsize, std_gap()); |
---|
451 | } |
---|
452 | |
---|
453 | class SizeAwarable { |
---|
454 | bool allows_oversize; |
---|
455 | size_t org_ali_size; |
---|
456 | public: |
---|
457 | SizeAwarable(bool allows_oversize_, size_t ali_size_) |
---|
458 | : allows_oversize(allows_oversize_), |
---|
459 | org_ali_size(ali_size_) |
---|
460 | {} |
---|
461 | |
---|
462 | size_t get_allowed_size(size_t term_size, size_t new_ali_size) const { |
---|
463 | size_t allowed_size = new_ali_size; |
---|
464 | if (allows_oversize && term_size>org_ali_size) { |
---|
465 | size_t oversize = term_size-org_ali_size; |
---|
466 | allowed_size = new_ali_size+oversize; |
---|
467 | } |
---|
468 | return allowed_size; |
---|
469 | } |
---|
470 | }; |
---|
471 | inline SizeAwarable dontAllowOversize(size_t ali_size) { return SizeAwarable(false, ali_size); } |
---|
472 | |
---|
473 | template<typename T> |
---|
474 | inline GB_ERROR check_delete_allowed(const T *, size_t, size_t , const Deletable& ) { |
---|
475 | return NULL; // for non-char deleting is always allowed |
---|
476 | } |
---|
477 | template<> |
---|
478 | inline GB_ERROR check_delete_allowed(const char *data, size_t start, size_t count, const Deletable& deletable) { |
---|
479 | return deletable.get_delete_error(data, start, count); |
---|
480 | } |
---|
481 | |
---|
482 | template<typename T> |
---|
483 | class SpecificAliData : public TypedAliData<T>, public SizeAwarable, virtual Noncopyable { |
---|
484 | const T *data; |
---|
485 | Deletable deletable; |
---|
486 | |
---|
487 | public: |
---|
488 | typedef TypedAliData<T> BaseType; |
---|
489 | |
---|
490 | SpecificAliData(const T *static_data, size_t elements, const T& gap_, const SizeAwarable& sizeAware, const Deletable& deletable_) |
---|
491 | : BaseType(elements, gap_), |
---|
492 | SizeAwarable(sizeAware), |
---|
493 | data(static_data), |
---|
494 | deletable(deletable_) |
---|
495 | {} |
---|
496 | |
---|
497 | int operate_on_mem(void *mem, size_t start, size_t count, AliData::memop op) const OVERRIDE { |
---|
498 | if (count>0) { |
---|
499 | id_assert(BaseType::is_valid_part(start, count)); |
---|
500 | switch (op) { |
---|
501 | case AliData::COPY_TO: { |
---|
502 | size_t msize = BaseType::unitsize()*count; |
---|
503 | id_assert(msize>0); |
---|
504 | memcpy(mem, data+start, msize); |
---|
505 | break; |
---|
506 | } |
---|
507 | case AliData::COMPARE_WITH: { |
---|
508 | const T *typedMem = (const T*)mem; |
---|
509 | for (size_t a = 0; a<count; ++a) { |
---|
510 | int cmp = compare_type(data[start+a], typedMem[a]); |
---|
511 | if (cmp) return cmp; |
---|
512 | } |
---|
513 | break; |
---|
514 | } |
---|
515 | case AliData::CHECK_DELETE: { |
---|
516 | const T *typedMem = (const T*)data; |
---|
517 | GB_ERROR error = check_delete_allowed<T>(typedMem, start, count, deletable); |
---|
518 | if (error) { |
---|
519 | BaseType::set_error(error); |
---|
520 | return 1; |
---|
521 | } |
---|
522 | break; |
---|
523 | } |
---|
524 | } |
---|
525 | } |
---|
526 | return 0; |
---|
527 | } |
---|
528 | int cmp_data(size_t start, const AliData& other, size_t ostart, size_t count) const OVERRIDE { |
---|
529 | id_assert(BaseType::is_valid_part(start, count)); |
---|
530 | id_assert(other.is_valid_part(ostart, count)); |
---|
531 | |
---|
532 | // if (&other == this && start == ostart) return true; // @@@ why does this fail tests? |
---|
533 | return -other.cmpPartWith(data+start, ostart, count); |
---|
534 | } |
---|
535 | UnitPtr at_ptr(size_t pos) const OVERRIDE { |
---|
536 | if (pos<BaseType::elems()) return UnitPtr(&data[pos]); |
---|
537 | return UnitPtr(); |
---|
538 | } |
---|
539 | const T *get_data() const { return data; } |
---|
540 | }; |
---|
541 | |
---|
542 | class SequenceAliData : public SpecificAliData<char> { |
---|
543 | char dot; |
---|
544 | |
---|
545 | char preferred_gap(const char *s1, const char *s2) const { |
---|
546 | if (s1 && s2) { |
---|
547 | if (*s1 == std_gap() || *s2 == std_gap()) { |
---|
548 | return std_gap(); |
---|
549 | } |
---|
550 | if (*s1 == dot || *s2 == dot) { |
---|
551 | return dot; |
---|
552 | } |
---|
553 | return std_gap(); |
---|
554 | } |
---|
555 | else if (s1) { |
---|
556 | id_assert(!s2); |
---|
557 | return *s1 == std_gap() ? std_gap() : dot; |
---|
558 | } |
---|
559 | else if (s2) { |
---|
560 | id_assert(!s1); |
---|
561 | return *s2 == std_gap() ? std_gap() : dot; |
---|
562 | } |
---|
563 | else { |
---|
564 | id_assert(!s1 && !s2); |
---|
565 | return dot; |
---|
566 | } |
---|
567 | } |
---|
568 | |
---|
569 | public: |
---|
570 | SequenceAliData(const char* static_data, size_t elements, char stdgap, char dotgap, const SizeAwarable& sizeAware, const Deletable& deletable_) |
---|
571 | : SpecificAliData<char>(static_data, elements, stdgap, sizeAware, deletable_), |
---|
572 | dot(dotgap) |
---|
573 | {} |
---|
574 | |
---|
575 | AliDataPtr create_gap(size_t gapsize, const UnitPair& gapinfo) const OVERRIDE { |
---|
576 | char use = preferred_gap(typed_ptr(gapinfo.left), typed_ptr(gapinfo.right)); |
---|
577 | return new SpecificGap<char>(gapsize, use); |
---|
578 | } |
---|
579 | }; |
---|
580 | |
---|
581 | // -------------------------------------------------------------------------------- |
---|
582 | // @@@ move things below into a class ? |
---|
583 | |
---|
584 | inline AliDataPtr concat(AliDataPtr left, AliDataPtr right) { |
---|
585 | return left->empty() ? right : (right->empty() ? left : new ComposedAliData(left, right)); |
---|
586 | } |
---|
587 | inline AliDataPtr concat(AliDataPtr left, AliDataPtr mid, AliDataPtr right) { |
---|
588 | return concat(left, concat(mid, right)); |
---|
589 | } |
---|
590 | |
---|
591 | inline AliDataPtr partof(AliDataPtr data, size_t pos, size_t amount) { return AliDataSlice::make(data, pos, amount); } |
---|
592 | inline AliDataPtr before(AliDataPtr data, size_t pos) { return partof(data, 0, pos); } |
---|
593 | inline AliDataPtr after(AliDataPtr data, size_t pos) { return partof(data, pos+1, data->elems()-pos-1); } |
---|
594 | |
---|
595 | inline AliDataPtr delete_from(AliDataPtr from, size_t pos, size_t amount, GB_ERROR& error) { |
---|
596 | error = from->check_delete_allowed(pos, amount); |
---|
597 | return concat(before(from, pos), after(from, pos+amount-1)); |
---|
598 | } |
---|
599 | inline AliDataPtr insert_at(AliDataPtr dest, size_t pos, AliDataPtr src) { |
---|
600 | return concat(before(dest, pos), src, after(dest, pos-1)); |
---|
601 | } |
---|
602 | |
---|
603 | inline AliDataPtr insert_gap(AliDataPtr data, size_t pos, size_t count) { |
---|
604 | UnitPair gapinfo; |
---|
605 | |
---|
606 | id_assert(data->unitsize() <= sizeof(gapinfo.left)); |
---|
607 | |
---|
608 | gapinfo.left = data->unit_left_of(pos); // @@@ do not perform ALWAYS (put into an object and lazy eval) |
---|
609 | gapinfo.right = data->unit_right_of(pos); |
---|
610 | |
---|
611 | AliDataPtr gap = data->create_gap(count, gapinfo); |
---|
612 | return insert_at(data, pos, gap); |
---|
613 | } |
---|
614 | |
---|
615 | inline AliDataPtr format(AliDataPtr data, const size_t wanted_len, GB_ERROR& error) { |
---|
616 | size_t curr_len = data->elems(); |
---|
617 | if (curr_len < wanted_len) { |
---|
618 | data = insert_gap(data, curr_len, wanted_len-curr_len); |
---|
619 | } |
---|
620 | else if (curr_len > wanted_len) { |
---|
621 | data = delete_from(data, wanted_len, curr_len-wanted_len, error); |
---|
622 | } |
---|
623 | id_assert(data->elems() == wanted_len); |
---|
624 | return data; |
---|
625 | } |
---|
626 | |
---|
627 | |
---|
628 | template<typename T> inline AliDataPtr makeAliData(T*& allocated_data, size_t elems, const T& gap) { |
---|
629 | return new SpecificAliData<T>(allocated_data, elems, gap, dontAllowOversize(elems), Deletable(Deletable::ANYTHING)); |
---|
630 | } |
---|
631 | inline AliDataPtr makeAliSeqData(char*& allocated_data, size_t elems, char gap, char dot) { |
---|
632 | return new SequenceAliData(allocated_data, elems, gap, dot, dontAllowOversize(elems), Deletable(Deletable::ANYTHING)); |
---|
633 | } |
---|
634 | |
---|
635 | // -------------------------------------------------------------------------------- |
---|
636 | |
---|
637 | #ifdef UNIT_TESTS |
---|
638 | #ifndef TEST_UNIT_H |
---|
639 | #include <test_unit.h> |
---|
640 | #endif |
---|
641 | |
---|
642 | template<typename T> |
---|
643 | inline T*& copyof(const T* const_data, size_t elemsize, size_t elements) { |
---|
644 | static T *copy = NULL; |
---|
645 | |
---|
646 | size_t memsize = elemsize*elements; |
---|
647 | id_assert(!copy); |
---|
648 | copy = (T*)malloc(memsize); |
---|
649 | id_assert(copy); |
---|
650 | memcpy(copy, const_data, memsize); |
---|
651 | return copy; |
---|
652 | } |
---|
653 | |
---|
654 | #define COPYOF(typedarray) copyof(typedarray, sizeof(*(typedarray)), ARRAY_ELEMS(typedarray)) |
---|
655 | #define SIZEOF(typedarray) (sizeof(*(typedarray))*ARRAY_ELEMS(typedarray)) |
---|
656 | |
---|
657 | #define TEST_EXPECT_COPIES_EQUAL(d1,d2) do{ \ |
---|
658 | size_t s1 = (d1)->memsize(); \ |
---|
659 | size_t s2 = (d2)->memsize(); \ |
---|
660 | TEST_EXPECT_EQUAL(s1, s2); \ |
---|
661 | void *copy1 = malloc(s1+s2); \ |
---|
662 | void *copy2 = reinterpret_cast<char*>(copy1)+s1; \ |
---|
663 | (d1)->copyTo(copy1); \ |
---|
664 | (d2)->copyTo(copy2); \ |
---|
665 | TEST_EXPECT_MEM_EQUAL(copy1, copy2, s1); \ |
---|
666 | free(copy1); \ |
---|
667 | }while(0) |
---|
668 | |
---|
669 | #define TEST_EXPECT_COPY_EQUALS_ARRAY(adp,typedarray,asize) do{ \ |
---|
670 | size_t size = (adp)->memsize(); \ |
---|
671 | TEST_EXPECT_EQUAL(size, asize); \ |
---|
672 | void *ad_copy = malloc(size); \ |
---|
673 | (adp)->copyTo(ad_copy); \ |
---|
674 | TEST_EXPECT_MEM_EQUAL(ad_copy, typedarray, size); \ |
---|
675 | free(ad_copy); \ |
---|
676 | }while(0) |
---|
677 | |
---|
678 | #define TEST_EXPECT_COPY_EQUALS_STRING(adp,str) do{ \ |
---|
679 | size_t size = (adp)->memsize(); \ |
---|
680 | char *ad_copy = (char*)malloc(size+1); \ |
---|
681 | (adp)->copyTo(ad_copy); \ |
---|
682 | ad_copy[size] = 0; \ |
---|
683 | TEST_EXPECT_EQUAL(ad_copy, str); \ |
---|
684 | free(ad_copy); \ |
---|
685 | }while(0) |
---|
686 | |
---|
687 | #if defined(ENABLE_CRASH_TESTS) && defined(ASSERTION_USED) |
---|
688 | static void illegal_alidata_composition() { |
---|
689 | const int ELEMS = 5; |
---|
690 | |
---|
691 | int *i = (int*)malloc(sizeof(int)*ELEMS); |
---|
692 | char *c = (char*)malloc(sizeof(char)*ELEMS); |
---|
693 | |
---|
694 | concat(makeAliData(i, ELEMS, 0), makeAliData(c, ELEMS, '-')); |
---|
695 | } |
---|
696 | #endif |
---|
697 | |
---|
698 | template <typename T> |
---|
699 | inline T *makeCopy(AliDataPtr d) { |
---|
700 | TEST_EXPECT_EQUAL(d->unitsize(), sizeof(T)); |
---|
701 | size_t size = d->memsize(); |
---|
702 | T *copy = (T*)malloc(size); |
---|
703 | d->copyTo(copy); |
---|
704 | return copy; |
---|
705 | } |
---|
706 | |
---|
707 | template <typename T> |
---|
708 | static arb_test::match_expectation compare_works(AliDataPtr d1, AliDataPtr d2, int expected_cmp) { |
---|
709 | int brute_force_compare = 0; |
---|
710 | { |
---|
711 | int minSize = std::min(d1->elems(), d2->elems()); |
---|
712 | |
---|
713 | T *copy1 = makeCopy<T>(d1); |
---|
714 | T *copy2 = makeCopy<T>(d2); |
---|
715 | |
---|
716 | for (int i = 0; i < minSize && brute_force_compare == 0; ++i) { // compare inclusive terminal zero-element |
---|
717 | brute_force_compare = compare_type(copy1[i], copy2[i]); |
---|
718 | } |
---|
719 | |
---|
720 | if (brute_force_compare == 0) { |
---|
721 | brute_force_compare = compare_type(d1->elems(), d2->elems()); |
---|
722 | } |
---|
723 | |
---|
724 | free(copy2); |
---|
725 | free(copy1); |
---|
726 | } |
---|
727 | |
---|
728 | int smart_forward_compare = d1->cmp_whole_data(*d2); |
---|
729 | int smart_backward_compare = d2->cmp_whole_data(*d1); |
---|
730 | |
---|
731 | using namespace arb_test; |
---|
732 | expectation_group expected; |
---|
733 | |
---|
734 | expected.add(that(brute_force_compare).is_equal_to(expected_cmp)); |
---|
735 | expected.add(that(smart_forward_compare).is_equal_to(expected_cmp)); |
---|
736 | expected.add(that(smart_backward_compare).is_equal_to(-expected_cmp)); |
---|
737 | |
---|
738 | return all().ofgroup(expected); |
---|
739 | } |
---|
740 | |
---|
741 | #define TEST_COMPARE_WORKS(d1,d2,expected) TEST_EXPECTATION(compare_works<char>(d1,d2,expected)) |
---|
742 | |
---|
743 | #define TEST_COMPARE_WORKS_ALL_TYPES(tid,d1,d2,expected) \ |
---|
744 | switch (tid) { \ |
---|
745 | case 0: TEST_EXPECTATION(compare_works<char>(d1,d2,expected)); break; \ |
---|
746 | case 1: TEST_EXPECTATION(compare_works<GB_UINT4>(d1,d2,expected)); break; \ |
---|
747 | case 2: TEST_EXPECTATION(compare_works<float>(d1,d2,expected)); break; \ |
---|
748 | } |
---|
749 | |
---|
750 | void TEST_AliData() { |
---|
751 | #define SEQDATA "CGCAC-C-GG-C-GG.A.-C------GG-.C..UCAGU" |
---|
752 | char chr_src[] = SEQDATA; // also contains trailing 0-byte! |
---|
753 | GB_CUINT4 int_src[] = { 0x01, 0x1213, 0x242526, 0x37383930, 0xffffffff }; |
---|
754 | float flt_src[] = { 0.0, 0.5, 1.0, -5.0, 20.1 }; |
---|
755 | |
---|
756 | AliDataPtr type[] = { |
---|
757 | makeAliSeqData(COPYOF(chr_src), ARRAY_ELEMS(chr_src)-1, '-', '.'), |
---|
758 | makeAliData(COPYOF(int_src), ARRAY_ELEMS(int_src), 0U), |
---|
759 | makeAliData(COPYOF(flt_src), ARRAY_ELEMS(flt_src), 0.0F) |
---|
760 | }; |
---|
761 | TEST_EXPECT_COPY_EQUALS_ARRAY(type[0], chr_src, SIZEOF(chr_src)-1); |
---|
762 | TEST_EXPECT_COPY_EQUALS_STRING(type[0], chr_src); |
---|
763 | TEST_EXPECT_COPY_EQUALS_ARRAY(type[1], int_src, SIZEOF(int_src)); |
---|
764 | TEST_EXPECT_COPY_EQUALS_ARRAY(type[2], flt_src, SIZEOF(flt_src)); |
---|
765 | |
---|
766 | for (size_t t = 0; t<ARRAY_ELEMS(type); ++t) { |
---|
767 | AliDataPtr data = type[t]; |
---|
768 | AliDataPtr dup = concat(data, data); |
---|
769 | TEST_EXPECT_EQUAL(dup->elems(), 2*data->elems()); |
---|
770 | |
---|
771 | AliDataPtr start = before(data, 3); |
---|
772 | TEST_EXPECT_EQUAL(start->elems(), 3U); |
---|
773 | |
---|
774 | AliDataPtr end = after(data, 3); |
---|
775 | TEST_EXPECT_EQUAL(end->elems(), data->elems()-4); |
---|
776 | |
---|
777 | AliDataPtr mid = partof(data, 3, 1); |
---|
778 | TEST_EXPECT_COPIES_EQUAL(concat(start, mid, end), data); |
---|
779 | |
---|
780 | GB_ERROR error = NULL; |
---|
781 | AliDataPtr del = delete_from(data, 3, 1, error); |
---|
782 | TEST_EXPECT_NO_ERROR(error); |
---|
783 | TEST_EXPECT_EQUAL(del->elems(), data->elems()-1); |
---|
784 | TEST_EXPECT_COPIES_EQUAL(concat(start, end), del); |
---|
785 | |
---|
786 | AliDataPtr empty = before(data, 0); |
---|
787 | TEST_EXPECT_EQUAL(empty->elems(), 0U); |
---|
788 | |
---|
789 | TEST_EXPECT_COPIES_EQUAL(data, concat(data, empty)); |
---|
790 | TEST_EXPECT_COPIES_EQUAL(data, concat(empty, data)); |
---|
791 | TEST_EXPECT_COPIES_EQUAL(empty, concat(empty, empty)); |
---|
792 | |
---|
793 | AliDataPtr del_rest = delete_from(data, 3, 999, error); |
---|
794 | TEST_EXPECT_NO_ERROR(error); |
---|
795 | TEST_EXPECT_COPIES_EQUAL(start, del_rest); |
---|
796 | |
---|
797 | AliDataPtr ins = insert_at(del, 3, mid); |
---|
798 | TEST_EXPECT_COPIES_EQUAL(data, ins); |
---|
799 | TEST_EXPECT_COPIES_EQUAL(del, delete_from(ins, 3, 1, error)); |
---|
800 | TEST_EXPECT_NO_ERROR(error); |
---|
801 | |
---|
802 | TEST_EXPECT_COPIES_EQUAL(insert_at(del, 3, empty), del); |
---|
803 | TEST_EXPECT_COPIES_EQUAL(insert_at(del, 777, empty), del); // append via insert_at |
---|
804 | TEST_EXPECT_COPIES_EQUAL(insert_at(start, 777, end), del); // append via insert_at |
---|
805 | |
---|
806 | AliDataPtr ins_gap = insert_gap(del, 4, 5); |
---|
807 | TEST_EXPECT_EQUAL(ins_gap->elems(), del->elems()+5); |
---|
808 | |
---|
809 | AliDataPtr gap_iseq = partof(ins_gap, 4, 5); |
---|
810 | |
---|
811 | TEST_EXPECT_COPIES_EQUAL(ins_gap, insert_gap(ins_gap, 7, 0)); // insert empty gap |
---|
812 | |
---|
813 | AliDataPtr start_gap1 = insert_gap(ins_gap, 0, 1); // insert gap at start |
---|
814 | AliDataPtr start_gap3 = insert_gap(ins_gap, 0, 3); // insert gap at start |
---|
815 | |
---|
816 | AliDataPtr gap_iempty = insert_gap(empty, 0, 5); |
---|
817 | TEST_EXPECT_EQUAL(gap_iempty->elems(), 5U); |
---|
818 | |
---|
819 | AliDataPtr gap_in_gap = insert_gap(gap_iempty, 3, 2); |
---|
820 | TEST_EXPECT_EQUAL(gap_in_gap->elems(), 7U); |
---|
821 | |
---|
822 | AliDataPtr end_gap1 = insert_gap(mid, 1, 1); |
---|
823 | TEST_EXPECT_EQUAL(end_gap1->elems(), 2U); |
---|
824 | |
---|
825 | if (t == 0) { |
---|
826 | AliDataPtr end_gap2 = insert_gap(end, 34, 2); |
---|
827 | |
---|
828 | TEST_EXPECT_COPY_EQUALS_STRING(start, "CGC"); |
---|
829 | TEST_EXPECT_COPY_EQUALS_STRING(end, "C-C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
830 | TEST_EXPECT_COPY_EQUALS_STRING(end_gap2, "C-C-GG-C-GG.A.-C------GG-.C..UCAGU.."); |
---|
831 | TEST_EXPECT_COPY_EQUALS_STRING(mid, "A"); |
---|
832 | TEST_EXPECT_COPY_EQUALS_STRING(end_gap1, "A-"); // '-' is ok, since before there was a C behind (but correct would be '.') |
---|
833 | TEST_EXPECT_COPY_EQUALS_STRING(del, "CGCC-C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
834 | TEST_EXPECT_COPY_EQUALS_STRING(del_rest, "CGC"); |
---|
835 | TEST_EXPECT_COPY_EQUALS_STRING(ins, "CGCAC-C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
836 | TEST_EXPECT_COPY_EQUALS_STRING(gap_iseq, "-----"); // inserted between bases |
---|
837 | TEST_EXPECT_COPY_EQUALS_STRING(gap_iempty, "....."); // inserted in empty sequence |
---|
838 | TEST_EXPECT_COPY_EQUALS_STRING(gap_in_gap, "......."); // inserted gap in gap |
---|
839 | TEST_EXPECT_COPY_EQUALS_STRING(ins_gap, "CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
840 | TEST_EXPECT_COPY_EQUALS_STRING(start_gap1, ".CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
841 | TEST_EXPECT_COPY_EQUALS_STRING(start_gap3, "...CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
842 | |
---|
843 | AliDataPtr bef_dot = insert_gap(ins, 15, 2); |
---|
844 | AliDataPtr aft_dot = insert_gap(ins, 16, 2); |
---|
845 | AliDataPtr bet_dots = insert_gap(ins, 32, 2); |
---|
846 | AliDataPtr bet_dashes = insert_gap(ins, 23, 2); |
---|
847 | AliDataPtr bet_dashdot = insert_gap(ins, 29, 2); |
---|
848 | AliDataPtr bet_dotdash = insert_gap(ins, 18, 2); |
---|
849 | |
---|
850 | TEST_EXPECT_COPY_EQUALS_STRING(ins, "CGCAC-C-GG-C-GG.A.-C------GG-.C..UCAGU"); |
---|
851 | TEST_EXPECT_COPY_EQUALS_STRING(bef_dot, "CGCAC-C-GG-C-GG...A.-C------GG-.C..UCAGU"); |
---|
852 | TEST_EXPECT_COPY_EQUALS_STRING(aft_dot, "CGCAC-C-GG-C-GG...A.-C------GG-.C..UCAGU"); |
---|
853 | TEST_EXPECT_COPY_EQUALS_STRING(bet_dots, "CGCAC-C-GG-C-GG.A.-C------GG-.C....UCAGU"); |
---|
854 | TEST_EXPECT_COPY_EQUALS_STRING(bet_dashes, "CGCAC-C-GG-C-GG.A.-C--------GG-.C..UCAGU"); |
---|
855 | TEST_EXPECT_COPY_EQUALS_STRING(bet_dashdot,"CGCAC-C-GG-C-GG.A.-C------GG---.C..UCAGU"); |
---|
856 | TEST_EXPECT_COPY_EQUALS_STRING(bet_dotdash,"CGCAC-C-GG-C-GG.A.---C------GG-.C..UCAGU"); |
---|
857 | |
---|
858 | { |
---|
859 | // test comparability of AliData |
---|
860 | |
---|
861 | AliDataPtr same_as_start_gap1 = after(start_gap3, 1); |
---|
862 | |
---|
863 | TEST_COMPARE_WORKS(start_gap1, same_as_start_gap1, 0); |
---|
864 | |
---|
865 | TEST_EXPECT(start_gap1->differs_from(*start_gap3)); |
---|
866 | // TEST_EXPECT_EQUAL(strcmp(".CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU", // start_gap1 |
---|
867 | // "...CGCC------C-GG-C-GG.A.-C------GG-.C..UCAGU"), 1); // start_gap3 |
---|
868 | |
---|
869 | TEST_EXPECT_EQUAL(start_gap1->cmp_whole_data(*start_gap3), 1); |
---|
870 | TEST_EXPECT_EQUAL(start_gap3->cmp_whole_data(*start_gap1), -1); |
---|
871 | |
---|
872 | TEST_COMPARE_WORKS(end, end_gap2, -1); |
---|
873 | } |
---|
874 | } |
---|
875 | |
---|
876 | { |
---|
877 | // test comparability of AliData (for all types) |
---|
878 | |
---|
879 | TEST_COMPARE_WORKS_ALL_TYPES(t, start_gap1, start_gap3, 1); |
---|
880 | TEST_COMPARE_WORKS_ALL_TYPES(t, gap_iempty, gap_in_gap, -1); |
---|
881 | TEST_COMPARE_WORKS_ALL_TYPES(t, del, ins, 1); |
---|
882 | TEST_COMPARE_WORKS_ALL_TYPES(t, partof(ins_gap, 0, 17), partof(start_gap3, 3, 17), 0); |
---|
883 | TEST_COMPARE_WORKS_ALL_TYPES(t, start_gap3, start_gap3, 0); |
---|
884 | } |
---|
885 | } |
---|
886 | |
---|
887 | TEST_FAILS_INSIDE_VALGRIND(TEST_EXPECT_CODE_ASSERTION_FAILS(illegal_alidata_composition)); // composing different unitsizes shall fail |
---|
888 | } |
---|
889 | |
---|
890 | #endif // UNIT_TESTS |
---|
891 | |
---|
892 | // -------------------------------------------------------------------------------- |
---|
893 | |
---|
894 | enum TerminalType { |
---|
895 | IDT_SPECIES = 0, |
---|
896 | IDT_SAI, |
---|
897 | IDT_SECSTRUCT, |
---|
898 | }; |
---|
899 | |
---|
900 | static GB_CSTR targetTypeName[] = { |
---|
901 | "Species", |
---|
902 | "SAI", |
---|
903 | "SeceditStruct", |
---|
904 | }; |
---|
905 | |
---|
906 | class Alignment { |
---|
907 | SmartCharPtr name; // name of alignment |
---|
908 | size_t len; // length of alignment |
---|
909 | public: |
---|
910 | Alignment(const char *name_, size_t len_) : name(strdup(name_)), len(len_) {} |
---|
911 | |
---|
912 | const char *get_name() const { return &*name; } |
---|
913 | size_t get_len() const { return len; } |
---|
914 | }; |
---|
915 | |
---|
916 | // -------------------------------------------------------------------------------- |
---|
917 | |
---|
918 | class AliApplicable { // something that can be appied to the whole alignment |
---|
919 | virtual GB_ERROR apply_to_terminal(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const = 0; |
---|
920 | |
---|
921 | GB_ERROR apply_recursive(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const; |
---|
922 | GB_ERROR apply_to_childs_named(GBDATA *gb_item_data, const char *item_field, TerminalType term_type, const Alignment& ali) const; |
---|
923 | GB_ERROR apply_to_secstructs(GBDATA *gb_secstructs, const Alignment& ali) const; |
---|
924 | |
---|
925 | public: |
---|
926 | AliApplicable() {} |
---|
927 | virtual ~AliApplicable() {} |
---|
928 | |
---|
929 | GB_ERROR apply_to_alignment(GBDATA *gb_main, const Alignment& ali) const; |
---|
930 | }; |
---|
931 | |
---|
932 | GB_ERROR AliApplicable::apply_recursive(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const { |
---|
933 | GB_ERROR error = 0; |
---|
934 | GB_TYPES type = GB_read_type(gb_data); |
---|
935 | |
---|
936 | if (type == GB_DB) { |
---|
937 | GBDATA *gb_child; |
---|
938 | for (gb_child = GB_child(gb_data); gb_child && !error; gb_child = GB_nextChild(gb_child)) { |
---|
939 | error = apply_recursive(gb_child, term_type, item_name, ali); |
---|
940 | } |
---|
941 | } |
---|
942 | else { |
---|
943 | error = apply_to_terminal(gb_data, term_type, item_name, ali); |
---|
944 | } |
---|
945 | |
---|
946 | return error; |
---|
947 | } |
---|
948 | GB_ERROR AliApplicable::apply_to_childs_named(GBDATA *gb_item_data, const char *item_field, TerminalType term_type, const Alignment& ali) const { |
---|
949 | GBDATA *gb_item; |
---|
950 | GB_ERROR error = 0; |
---|
951 | long item_count = GB_number_of_subentries(gb_item_data); |
---|
952 | |
---|
953 | if (item_count) { |
---|
954 | for (gb_item = GB_entry(gb_item_data, item_field); |
---|
955 | gb_item && !error; |
---|
956 | gb_item = GB_nextEntry(gb_item)) |
---|
957 | { |
---|
958 | GBDATA *gb_ali = GB_entry(gb_item, ali.get_name()); |
---|
959 | if (gb_ali) { |
---|
960 | const char *item_name = GBT_read_name(gb_item); |
---|
961 | error = apply_recursive(gb_ali, term_type, item_name, ali); |
---|
962 | if (error) error = GBS_global_string("%s '%s': %s", targetTypeName[term_type], item_name, error); |
---|
963 | } |
---|
964 | } |
---|
965 | } |
---|
966 | return error; |
---|
967 | } |
---|
968 | GB_ERROR AliApplicable::apply_to_secstructs(GBDATA *gb_secstructs, const Alignment& ali) const { |
---|
969 | GB_ERROR error = 0; |
---|
970 | GBDATA *gb_ali = GB_entry(gb_secstructs, ali.get_name()); |
---|
971 | |
---|
972 | if (gb_ali) { |
---|
973 | long item_count = GB_number_of_subentries(gb_ali)-1; |
---|
974 | if (item_count<1) item_count = 1; |
---|
975 | |
---|
976 | GBDATA *gb_item; |
---|
977 | for (gb_item = GB_entry(gb_ali, "struct"); |
---|
978 | gb_item && !error; |
---|
979 | gb_item = GB_nextEntry(gb_item)) |
---|
980 | { |
---|
981 | GBDATA *gb_ref = GB_entry(gb_item, "ref"); |
---|
982 | if (gb_ref) { |
---|
983 | error = apply_recursive(gb_ref, IDT_SECSTRUCT, "ref", ali); |
---|
984 | if (error) { |
---|
985 | const char *item_name = GBT_read_name(gb_item); |
---|
986 | error = GBS_global_string("%s '%s': %s", targetTypeName[IDT_SECSTRUCT], item_name, error); |
---|
987 | } |
---|
988 | } |
---|
989 | } |
---|
990 | } |
---|
991 | return error; |
---|
992 | } |
---|
993 | |
---|
994 | GB_ERROR AliApplicable::apply_to_alignment(GBDATA *gb_main, const Alignment& ali) const { |
---|
995 | GB_ERROR error = apply_to_childs_named(GBT_find_or_create(gb_main, "extended_data", 7), "extended", IDT_SAI, ali); |
---|
996 | if (!error) error = apply_to_secstructs(GB_search(gb_main, "secedit/structs", GB_CREATE_CONTAINER), ali); |
---|
997 | if (!error) error = apply_to_childs_named(GBT_find_or_create(gb_main, "species_data", 7), "species", IDT_SPECIES, ali); |
---|
998 | return error; |
---|
999 | } |
---|
1000 | |
---|
1001 | // -------------------------------------------------------------------------------- |
---|
1002 | |
---|
1003 | class AliEntryCounter : public AliApplicable { |
---|
1004 | mutable size_t count; |
---|
1005 | GB_ERROR apply_to_terminal(GBDATA *, TerminalType, const char *, const Alignment&) const OVERRIDE { count++; return NULL; } |
---|
1006 | public: |
---|
1007 | AliEntryCounter() : count(0) {} |
---|
1008 | size_t get_entry_count() const { return count; } |
---|
1009 | }; |
---|
1010 | |
---|
1011 | // -------------------------------------------------------------------------------- |
---|
1012 | |
---|
1013 | struct AliEditCommand { |
---|
1014 | virtual ~AliEditCommand() {} |
---|
1015 | virtual AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const = 0; |
---|
1016 | virtual GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const = 0; |
---|
1017 | }; |
---|
1018 | |
---|
1019 | class AliInsertCommand : public AliEditCommand { |
---|
1020 | size_t pos; // inserts in front of pos |
---|
1021 | size_t amount; |
---|
1022 | public: |
---|
1023 | AliInsertCommand(size_t pos_, size_t amount_) : pos(pos_), amount(amount_) {} |
---|
1024 | AliDataPtr apply(AliDataPtr to, GB_ERROR& /*error*/) const OVERRIDE { return insert_gap(to, pos, amount); } |
---|
1025 | GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE { |
---|
1026 | size_t len = ali.get_len(); |
---|
1027 | if (pos>len) { |
---|
1028 | return GBS_global_string("Can't insert at position %zu (exceeds length %zu of alignment '%s')", |
---|
1029 | pos, len, ali.get_name()); |
---|
1030 | } |
---|
1031 | resulting_ali_length = len+amount; |
---|
1032 | return NULL; |
---|
1033 | } |
---|
1034 | }; |
---|
1035 | |
---|
1036 | class AliDeleteCommand : public AliEditCommand { |
---|
1037 | size_t pos; |
---|
1038 | size_t amount; |
---|
1039 | public: |
---|
1040 | AliDeleteCommand(size_t pos_, size_t amount_) |
---|
1041 | : pos(pos_), |
---|
1042 | amount(amount_) |
---|
1043 | {} |
---|
1044 | AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const OVERRIDE { return delete_from(to, pos, amount, error); } |
---|
1045 | GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE { |
---|
1046 | size_t len = ali.get_len(); |
---|
1047 | size_t end_pos = pos+amount-1; |
---|
1048 | if (end_pos >= len) { |
---|
1049 | return GBS_global_string("Can't delete positions %zu-%zu (exceeds max. position %zu of alignment '%s')", |
---|
1050 | pos, end_pos, len-1, ali.get_name()); |
---|
1051 | } |
---|
1052 | resulting_ali_length = len-amount; |
---|
1053 | return NULL; |
---|
1054 | } |
---|
1055 | }; |
---|
1056 | |
---|
1057 | class AliFormatCommand : public AliEditCommand { |
---|
1058 | size_t wanted_len; |
---|
1059 | |
---|
1060 | public: |
---|
1061 | AliFormatCommand(size_t wanted_len_) : wanted_len(wanted_len_) {} |
---|
1062 | AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const OVERRIDE { |
---|
1063 | SizeAwarable *knows_size = dynamic_cast<SizeAwarable*>(&*to); |
---|
1064 | |
---|
1065 | id_assert(knows_size); // format can only be applied to SpecificAliData |
---|
1066 | // i.e. AliFormatCommand has to be the FIRST of a series of applied commands! |
---|
1067 | |
---|
1068 | int allowed_size = knows_size->get_allowed_size(to->elems(), wanted_len); |
---|
1069 | return format(to, allowed_size, error); |
---|
1070 | } |
---|
1071 | GB_ERROR check_applicable_to(const Alignment& IF_ASSERTION_USED(ali), size_t& resulting_ali_length) const OVERRIDE { |
---|
1072 | id_assert(ali.get_len() == wanted_len); |
---|
1073 | resulting_ali_length = wanted_len; |
---|
1074 | return NULL; |
---|
1075 | } |
---|
1076 | }; |
---|
1077 | |
---|
1078 | class AliAutoFormatCommand : public AliEditCommand { |
---|
1079 | mutable SmartPtr<AliFormatCommand> cmd; |
---|
1080 | public: |
---|
1081 | AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const OVERRIDE { |
---|
1082 | return cmd->apply(to, error); |
---|
1083 | } |
---|
1084 | GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE { |
---|
1085 | cmd = new AliFormatCommand(ali.get_len()); // late decision on length to format |
---|
1086 | return cmd->check_applicable_to(ali, resulting_ali_length); |
---|
1087 | } |
---|
1088 | }; |
---|
1089 | |
---|
1090 | class AliCompositeCommand : public AliEditCommand, virtual Noncopyable { |
---|
1091 | AliEditCommand *first; |
---|
1092 | AliEditCommand *second; |
---|
1093 | public: |
---|
1094 | AliCompositeCommand(AliEditCommand *cmd1_, AliEditCommand *cmd2_) // takes ownership of commands |
---|
1095 | : first(cmd1_), |
---|
1096 | second(cmd2_) |
---|
1097 | {} |
---|
1098 | ~AliCompositeCommand() OVERRIDE { delete second; delete first; } |
---|
1099 | AliDataPtr apply(AliDataPtr to, GB_ERROR& error) const OVERRIDE { |
---|
1100 | AliDataPtr tmp = first->apply(to, error); |
---|
1101 | if (!error) tmp = second->apply(tmp, error); |
---|
1102 | return tmp; |
---|
1103 | } |
---|
1104 | GB_ERROR check_applicable_to(const Alignment& ali, size_t& resulting_ali_length) const OVERRIDE { |
---|
1105 | GB_ERROR error = first->check_applicable_to(ali, resulting_ali_length); |
---|
1106 | if (!error) { |
---|
1107 | Alignment tmp_ali(ali.get_name(), resulting_ali_length); |
---|
1108 | error = second->check_applicable_to(tmp_ali, resulting_ali_length); |
---|
1109 | } |
---|
1110 | return error; |
---|
1111 | } |
---|
1112 | }; |
---|
1113 | |
---|
1114 | // -------------------------------------------------------------------------------- |
---|
1115 | |
---|
1116 | class AliEditor : public AliApplicable { |
---|
1117 | const AliEditCommand& cmd; |
---|
1118 | Deletable deletable; |
---|
1119 | |
---|
1120 | mutable arb_progress progress; |
---|
1121 | mutable size_t modified_counter; |
---|
1122 | |
---|
1123 | GB_ERROR apply_to_terminal(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const OVERRIDE; |
---|
1124 | |
---|
1125 | bool shall_edit(GBDATA *gb_data, TerminalType term_type) const { |
---|
1126 | // defines whether specific DB-elements shall be edited by any AliEditor |
---|
1127 | // (true for all data, that contains alignment position specific data) |
---|
1128 | |
---|
1129 | const char *key = GB_read_key_pntr(gb_data); |
---|
1130 | bool shall = key[0] != '_'; // general case: don't apply to keys starting with '_' |
---|
1131 | if (!shall) shall = term_type == IDT_SAI && strcmp(key, "_REF") == 0; // exception (SAI:_REF needs editing) |
---|
1132 | return shall; |
---|
1133 | } |
---|
1134 | |
---|
1135 | public: |
---|
1136 | AliEditor(const AliEditCommand& cmd_, const Deletable& deletable_, const char *progress_title, size_t progress_count) |
---|
1137 | : cmd(cmd_), |
---|
1138 | deletable(deletable_), |
---|
1139 | progress(progress_title, progress_count), |
---|
1140 | modified_counter(0) |
---|
1141 | { |
---|
1142 | } |
---|
1143 | ~AliEditor() OVERRIDE { |
---|
1144 | progress.done(); |
---|
1145 | } |
---|
1146 | |
---|
1147 | const AliEditCommand& edit_command() const { return cmd; } |
---|
1148 | }; |
---|
1149 | |
---|
1150 | // -------------------------------------------------------------------------------- |
---|
1151 | |
---|
1152 | static char *insDelBuffer = 0; |
---|
1153 | static size_t insDelBuffer_size; |
---|
1154 | |
---|
1155 | inline void free_insDelBuffer() { |
---|
1156 | freenull(insDelBuffer); |
---|
1157 | } |
---|
1158 | inline char *provide_insDelBuffer(size_t neededSpace) { |
---|
1159 | if (insDelBuffer && insDelBuffer_size<neededSpace) free_insDelBuffer(); |
---|
1160 | if (!insDelBuffer) { |
---|
1161 | insDelBuffer_size = neededSpace+10; |
---|
1162 | insDelBuffer = (char*)malloc(insDelBuffer_size); |
---|
1163 | } |
---|
1164 | return insDelBuffer; |
---|
1165 | } |
---|
1166 | |
---|
1167 | inline GB_CSTR alidata2buffer(const AliData& data) { // @@@ DRY vs copying code (above in this file) |
---|
1168 | char *buffer = provide_insDelBuffer(data.memsize()+1); |
---|
1169 | |
---|
1170 | data.copyTo(buffer); |
---|
1171 | buffer[data.memsize()] = 0; // only needed for strings but does not harm otherwise |
---|
1172 | |
---|
1173 | return buffer; |
---|
1174 | } |
---|
1175 | |
---|
1176 | // -------------------------------------------------------------------------------- |
---|
1177 | |
---|
1178 | class EditedTerminal : virtual Noncopyable { |
---|
1179 | GBDATA *gb_data; |
---|
1180 | GB_TYPES type; |
---|
1181 | const char *item_name; // name of SAI/species etc |
---|
1182 | AliDataPtr data; |
---|
1183 | Deletable deletable; |
---|
1184 | GB_ERROR error; |
---|
1185 | |
---|
1186 | bool has_key(const char *expected_key) const { |
---|
1187 | return strcmp(GB_read_key_pntr(gb_data), expected_key) == 0; |
---|
1188 | } |
---|
1189 | bool has_name(const char *expected_name) const { |
---|
1190 | return strcmp(item_name, expected_name) == 0; |
---|
1191 | } |
---|
1192 | |
---|
1193 | bool is_ref(TerminalType term_type) const { |
---|
1194 | return |
---|
1195 | type == GB_STRING && |
---|
1196 | ((term_type == IDT_SECSTRUCT && has_key("ref")) || |
---|
1197 | (term_type == IDT_SAI && has_key("_REF"))); |
---|
1198 | } |
---|
1199 | bool is_helix(TerminalType term_type) const { |
---|
1200 | return |
---|
1201 | type == GB_STRING && |
---|
1202 | term_type == IDT_SAI && |
---|
1203 | (has_name("HELIX") || has_name("HELIX_NR")) && |
---|
1204 | has_key("data"); |
---|
1205 | } |
---|
1206 | |
---|
1207 | bool does_allow_oversize(TerminalType term_type) const { return is_ref(term_type); } |
---|
1208 | char get_std_string_gaptype(TerminalType term_type) const { |
---|
1209 | bool prefers_dots = is_ref(term_type) || is_helix(term_type); |
---|
1210 | return prefers_dots ? '.' : '-'; |
---|
1211 | } |
---|
1212 | |
---|
1213 | public: |
---|
1214 | EditedTerminal(GBDATA *gb_data_, GB_TYPES type_, const char *item_name_, size_t size_, TerminalType term_type, const Alignment& ali, const Deletable& deletable_) |
---|
1215 | : gb_data(gb_data_), |
---|
1216 | type(type_), |
---|
1217 | item_name(item_name_), |
---|
1218 | deletable(deletable_), |
---|
1219 | error(NULL) |
---|
1220 | { |
---|
1221 | SizeAwarable oversizable(does_allow_oversize(term_type), ali.get_len()); |
---|
1222 | |
---|
1223 | // @@@ DRY cases |
---|
1224 | switch(type) { |
---|
1225 | case GB_STRING: { |
---|
1226 | const char *s = GB_read_char_pntr(gb_data); |
---|
1227 | if (!s) error = GB_await_error(); |
---|
1228 | else { |
---|
1229 | char stdgap = get_std_string_gaptype(term_type); |
---|
1230 | if (stdgap == '.') data = new SpecificAliData<char>(s, size_, '.', oversizable, deletable); |
---|
1231 | else data = new SequenceAliData(s, size_, stdgap, '.', oversizable, deletable); |
---|
1232 | } |
---|
1233 | break; |
---|
1234 | } |
---|
1235 | case GB_BITS: { |
---|
1236 | const char *b = GB_read_bits_pntr(gb_data, '-', '+'); |
---|
1237 | if (!b) error = GB_await_error(); |
---|
1238 | else data = new SpecificAliData<char>(b, size_, '-', oversizable, deletable); |
---|
1239 | break; |
---|
1240 | } |
---|
1241 | case GB_BYTES: { |
---|
1242 | const char *b = GB_read_bytes_pntr(gb_data); |
---|
1243 | if (!b) error = GB_await_error(); |
---|
1244 | else data = new SpecificAliData<char>(b, size_, 0, oversizable, deletable); |
---|
1245 | break; |
---|
1246 | } |
---|
1247 | case GB_INTS: { |
---|
1248 | const GB_UINT4 *ui = GB_read_ints_pntr(gb_data); |
---|
1249 | if (!ui) error = GB_await_error(); |
---|
1250 | else data = new SpecificAliData<GB_UINT4>(ui, size_, 0, oversizable, deletable); |
---|
1251 | break; |
---|
1252 | } |
---|
1253 | case GB_FLOATS: { |
---|
1254 | const float *f = GB_read_floats_pntr(gb_data); |
---|
1255 | if (!f) error = GB_await_error(); |
---|
1256 | else data = new SpecificAliData<float>(f, size_, 0.0, oversizable, deletable); |
---|
1257 | break; |
---|
1258 | } |
---|
1259 | |
---|
1260 | default: |
---|
1261 | error = GBS_global_string("Unhandled type '%i'", type); |
---|
1262 | id_assert(0); |
---|
1263 | break; |
---|
1264 | } |
---|
1265 | |
---|
1266 | id_assert(implicated(!error, size_ == data->elems())); |
---|
1267 | } |
---|
1268 | |
---|
1269 | GB_ERROR apply(const AliEditCommand& cmd, bool& did_modify) { |
---|
1270 | did_modify = false; |
---|
1271 | if (!error) { |
---|
1272 | AliDataPtr modified_data = cmd.apply(data, error); |
---|
1273 | |
---|
1274 | if (!error && modified_data->differs_from(*data)) { |
---|
1275 | GB_CSTR modified = alidata2buffer(*modified_data); |
---|
1276 | size_t modified_elems = modified_data->elems(); |
---|
1277 | |
---|
1278 | switch (type) { |
---|
1279 | case GB_STRING: { |
---|
1280 | id_assert(strlen(modified) == modified_elems); |
---|
1281 | error = GB_write_string(gb_data, modified); |
---|
1282 | break; |
---|
1283 | } |
---|
1284 | case GB_BITS: error = GB_write_bits (gb_data, modified, modified_elems, "-"); break; |
---|
1285 | case GB_BYTES: error = GB_write_bytes (gb_data, modified, modified_elems); break; |
---|
1286 | case GB_INTS: error = GB_write_ints (gb_data, (GB_UINT4*)modified, modified_elems); break; |
---|
1287 | case GB_FLOATS: error = GB_write_floats(gb_data, (float*)modified, modified_elems); break; |
---|
1288 | |
---|
1289 | default: id_assert(0); break; |
---|
1290 | } |
---|
1291 | |
---|
1292 | if (!error) did_modify = true; |
---|
1293 | } |
---|
1294 | } |
---|
1295 | return error; |
---|
1296 | } |
---|
1297 | }; |
---|
1298 | |
---|
1299 | GB_ERROR AliEditor::apply_to_terminal(GBDATA *gb_data, TerminalType term_type, const char *item_name, const Alignment& ali) const { |
---|
1300 | GB_TYPES gbtype = GB_read_type(gb_data); |
---|
1301 | GB_ERROR error = NULL; |
---|
1302 | if (gbtype >= GB_BITS && gbtype != GB_LINK) { |
---|
1303 | if (shall_edit(gb_data, term_type)) { |
---|
1304 | EditedTerminal edited(gb_data, gbtype, item_name, GB_read_count(gb_data), term_type, ali, deletable); |
---|
1305 | |
---|
1306 | bool terminal_was_modified; |
---|
1307 | error = edited.apply(edit_command(), terminal_was_modified); |
---|
1308 | if (terminal_was_modified) { |
---|
1309 | progress.subtitle(GBS_global_string("modified: %zu", ++modified_counter)); |
---|
1310 | } |
---|
1311 | } |
---|
1312 | } |
---|
1313 | progress.inc_and_check_user_abort(error); |
---|
1314 | return error; |
---|
1315 | } |
---|
1316 | |
---|
1317 | // -------------------------------------------------------------------------------- |
---|
1318 | |
---|
1319 | static size_t countAffectedEntries(GBDATA *Main, const Alignment& ali) { |
---|
1320 | AliEntryCounter counter; |
---|
1321 | counter.apply_to_alignment(Main, ali); |
---|
1322 | return counter.get_entry_count(); |
---|
1323 | } |
---|
1324 | |
---|
1325 | static GB_ERROR apply_command_to_alignment(const AliEditCommand& cmd, const char *cmd_description, GBDATA *Main, const char *alignment_name, const char *deletable_chars) { |
---|
1326 | // applies 'cmd' to one or all alignments |
---|
1327 | // (if 'alignment_name' is NULL, all alignments are affected - probably useless case) |
---|
1328 | // |
---|
1329 | // 'deletable_chars' is either |
---|
1330 | // - NULL -> nothing may be deleted |
---|
1331 | // - "%" -> anything may be deleted |
---|
1332 | // - or a string containing all deletable characters |
---|
1333 | |
---|
1334 | Deletable deletable = |
---|
1335 | deletable_chars |
---|
1336 | ? ( strchr(deletable_chars, '%') |
---|
1337 | ? Deletable(Deletable::ANYTHING) |
---|
1338 | : Deletable(deletable_chars)) |
---|
1339 | : Deletable(Deletable::NOTHING); |
---|
1340 | |
---|
1341 | GB_ERROR error = 0; |
---|
1342 | GBDATA *gb_presets = GBT_get_presets(Main); |
---|
1343 | |
---|
1344 | for (GBDATA *gb_ali = GB_entry(gb_presets, "alignment"); |
---|
1345 | gb_ali && !error; |
---|
1346 | gb_ali = GB_nextEntry(gb_ali)) |
---|
1347 | { |
---|
1348 | GBDATA *gb_name = GB_find_string(gb_ali, "alignment_name", alignment_name, GB_IGNORE_CASE, SEARCH_CHILD); |
---|
1349 | |
---|
1350 | if (gb_name) { |
---|
1351 | GBDATA *gb_len = GB_entry(gb_ali, "alignment_len"); |
---|
1352 | Alignment ali(GB_read_char_pntr(gb_name), GB_read_int(gb_len)); |
---|
1353 | |
---|
1354 | size_t resulting_ali_length; |
---|
1355 | error = cmd.check_applicable_to(ali, resulting_ali_length); |
---|
1356 | |
---|
1357 | if (!error) error = AliEditor(cmd, deletable, cmd_description, countAffectedEntries(Main, ali)).apply_to_alignment(Main, ali); |
---|
1358 | if (!error) error = GB_write_int(gb_len, resulting_ali_length); |
---|
1359 | } |
---|
1360 | } |
---|
1361 | |
---|
1362 | free_insDelBuffer(); |
---|
1363 | |
---|
1364 | if (!error) GB_disable_quicksave(Main, "a lot of sequences changed"); // @@@ only disable if a reasonable amount of sequences has changed! |
---|
1365 | |
---|
1366 | return error; |
---|
1367 | } |
---|
1368 | |
---|
1369 | static GB_ERROR format_to_alilen(GBDATA *Main, const char *alignment_name) { // @@@ inline |
---|
1370 | AliAutoFormatCommand fcmd; |
---|
1371 | return apply_command_to_alignment(fcmd, "Formatting alignment", Main, alignment_name, "-."); |
---|
1372 | } |
---|
1373 | |
---|
1374 | GB_ERROR ARB_format_alignment(GBDATA *Main, const char *alignment_name) { |
---|
1375 | GB_ERROR err = 0; |
---|
1376 | |
---|
1377 | if (strcmp(alignment_name, GENOM_ALIGNMENT) != 0) { // NEVER EVER format 'ali_genom' |
---|
1378 | err = GBT_check_data(Main, alignment_name); // detect max. length |
---|
1379 | if (!err) err = format_to_alilen(Main, alignment_name); // format sequences in alignment |
---|
1380 | if (!err) err = GBT_check_data(Main, alignment_name); // sets state to "formatted" |
---|
1381 | } |
---|
1382 | else { |
---|
1383 | err = "It's forbidden to format '" GENOM_ALIGNMENT "'!"; |
---|
1384 | } |
---|
1385 | return err; |
---|
1386 | } |
---|
1387 | |
---|
1388 | GB_ERROR ARB_insdel_columns(GBDATA *Main, const char *alignment_name, long pos, long count, const char *deletable_chars) { |
---|
1389 | /* if count > 0 insert 'count' characters at pos |
---|
1390 | * if count < 0 delete pos to pos+|count| |
---|
1391 | * |
---|
1392 | * Note: deleting is only performed, if found characters in deleted range are listed in 'deletable_chars' |
---|
1393 | * otherwise function returns with an error. |
---|
1394 | * (if 'deletable_chars' contains a '%', any character will be deleted) |
---|
1395 | * |
---|
1396 | * This affects all species' and SAIs having data in given 'alignment_name' and |
---|
1397 | * modifies several data entries found there |
---|
1398 | * (see shall_edit() for details which fields are affected). |
---|
1399 | */ |
---|
1400 | |
---|
1401 | GB_ERROR error = 0; |
---|
1402 | |
---|
1403 | if (pos<0) { |
---|
1404 | error = GBS_global_string("Illegal sequence position %li", pos); |
---|
1405 | } |
---|
1406 | else { |
---|
1407 | const char *description = NULL; |
---|
1408 | |
---|
1409 | SmartPtr<AliEditCommand> idcmd; |
---|
1410 | if (count<0) { |
---|
1411 | idcmd = new AliDeleteCommand(pos, -count); |
---|
1412 | description = "Deleting columns"; |
---|
1413 | } |
---|
1414 | else { |
---|
1415 | idcmd = new AliInsertCommand(pos, count); |
---|
1416 | description = "Inserting columns"; |
---|
1417 | } |
---|
1418 | |
---|
1419 | error = apply_command_to_alignment(*idcmd, description, Main, alignment_name, deletable_chars); |
---|
1420 | } |
---|
1421 | return error; |
---|
1422 | } |
---|
1423 | |
---|
1424 | // AISC_MKPT_PROMOTE:class RangeList; |
---|
1425 | // AISC_MKPT_PROMOTE:enum UseRange { RANGES, SINGLE_COLUMNS }; |
---|
1426 | // AISC_MKPT_PROMOTE:enum InsertWhere { INFRONTOF, BEHIND }; |
---|
1427 | |
---|
1428 | GB_ERROR ARB_delete_columns_using_SAI(GBDATA *Main, const char *alignment_name, const RangeList& ranges, const char *deletable_chars) { |
---|
1429 | // Deletes all columns defined by 'ranges' |
---|
1430 | // from all members (SAIs, seqs, ..) of alignment named 'alignment_name'. |
---|
1431 | |
---|
1432 | GB_ERROR error; |
---|
1433 | if (ranges.empty()) { |
---|
1434 | error = "Done with deleting nothing :)"; |
---|
1435 | } |
---|
1436 | else { |
---|
1437 | AliEditCommand *cmd = new AliAutoFormatCommand; // @@@ use SmartPtr (here and in AliCompositeCommand) |
---|
1438 | for (RangeList::reverse_iterator r = ranges.rbegin(); r != ranges.rend(); ++r) { |
---|
1439 | cmd = new AliCompositeCommand(cmd, new AliDeleteCommand(r->start(), r->size())); |
---|
1440 | } |
---|
1441 | error = apply_command_to_alignment(*cmd, "Deleting columns using SAI", Main, alignment_name, deletable_chars); |
---|
1442 | delete cmd; |
---|
1443 | } |
---|
1444 | return error; |
---|
1445 | } |
---|
1446 | |
---|
1447 | GB_ERROR ARB_insert_columns_using_SAI(GBDATA *Main, const char *alignment_name, const RangeList& ranges, UseRange units, InsertWhere where, size_t amount) { |
---|
1448 | // Insert 'amount' columns into all members of the alignment named 'alignment_name'. |
---|
1449 | // |
---|
1450 | // If units is |
---|
1451 | // - RANGES, each range |
---|
1452 | // - SINGLE_COLUMNS, each column of each range |
---|
1453 | // is handled as a unit. |
---|
1454 | // |
---|
1455 | // InsertWhere specifies whether the insertion happens INFRONTOF or BEHIND |
---|
1456 | |
---|
1457 | GB_ERROR error; |
---|
1458 | if (!amount || ranges.empty()) { |
---|
1459 | error = "Done with inserting no gaps :)"; |
---|
1460 | } |
---|
1461 | else { |
---|
1462 | AliEditCommand *cmd = new AliAutoFormatCommand; // @@@ use SmartPtr (here and in AliCompositeCommand) |
---|
1463 | for (RangeList::reverse_iterator r = ranges.rbegin(); r != ranges.rend(); ++r) { |
---|
1464 | switch (units) { |
---|
1465 | case RANGES: { |
---|
1466 | int pos = 0; |
---|
1467 | switch (where) { |
---|
1468 | case INFRONTOF: pos = r->start(); break; |
---|
1469 | case BEHIND: pos = r->end()+1; break; |
---|
1470 | } |
---|
1471 | cmd = new AliCompositeCommand(cmd, new AliInsertCommand(pos, amount)); |
---|
1472 | break; |
---|
1473 | } |
---|
1474 | case SINGLE_COLUMNS: { |
---|
1475 | for (int pos = r->end(); pos >= r->start(); --pos) { |
---|
1476 | cmd = new AliCompositeCommand(cmd, new AliInsertCommand(where == INFRONTOF ? pos : pos+1, amount)); |
---|
1477 | } |
---|
1478 | break; |
---|
1479 | } |
---|
1480 | } |
---|
1481 | } |
---|
1482 | error = apply_command_to_alignment(*cmd, "Inserting columns using SAI", Main, alignment_name, NULL); |
---|
1483 | delete cmd; |
---|
1484 | } |
---|
1485 | return error; |
---|
1486 | } |
---|
1487 | |
---|
1488 | // -------------------------------------------------------------------------------- |
---|
1489 | |
---|
1490 | #ifdef UNIT_TESTS |
---|
1491 | #ifndef TEST_UNIT_H |
---|
1492 | #include <test_unit.h> |
---|
1493 | #endif |
---|
1494 | #include <arb_unit_test.h> |
---|
1495 | |
---|
1496 | #define PLAIN_APPLY_CMD(str,cmd) \ |
---|
1497 | size_t str_len = strlen(str); \ |
---|
1498 | AliDataPtr data = new SequenceAliData(str, str_len, '-', '.', dontAllowOversize(str_len), Deletable("-.")); \ |
---|
1499 | GB_ERROR error = NULL; \ |
---|
1500 | AliDataPtr mod = cmd.apply(data, error) |
---|
1501 | |
---|
1502 | #define APPLY_CMD(str,cmd) \ |
---|
1503 | PLAIN_APPLY_CMD(str, cmd); \ |
---|
1504 | TEST_EXPECT_NO_ERROR(error); \ |
---|
1505 | GB_CSTR res = mod->differs_from(*data) ? alidata2buffer(*mod) : NULL |
---|
1506 | |
---|
1507 | #define DO_FORMAT(str,wanted_len) \ |
---|
1508 | AliFormatCommand cmd(wanted_len); \ |
---|
1509 | APPLY_CMD(str, cmd) |
---|
1510 | |
---|
1511 | #define DO_INSERT(str,pos,amount) \ |
---|
1512 | AliInsertCommand cmd(pos, amount); \ |
---|
1513 | APPLY_CMD(str, cmd) |
---|
1514 | |
---|
1515 | #define DO_FORMAT_AND_INSERT(str,wanted_len,pos,amount) \ |
---|
1516 | AliCompositeCommand cmd(new AliFormatCommand(wanted_len), \ |
---|
1517 | new AliInsertCommand(pos,amount)); \ |
---|
1518 | APPLY_CMD(str, cmd) |
---|
1519 | |
---|
1520 | #define DO_DELETE(str,pos,amount) \ |
---|
1521 | AliDeleteCommand cmd(pos, amount); \ |
---|
1522 | APPLY_CMD(str, cmd) |
---|
1523 | |
---|
1524 | #define TEST_FORMAT(str,wanted_alilen,expected) do { DO_FORMAT(str,wanted_alilen); TEST_EXPECT_EQUAL(res, expected); } while(0) |
---|
1525 | #define TEST_FORMAT__BROKEN(str,wanted_alilen,expected) do { DO_FORMAT(str,wanted_alilen); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0) |
---|
1526 | |
---|
1527 | #define TEST_INSERT(str,pos,amount,expected) do { DO_INSERT(str,pos,amount); TEST_EXPECT_EQUAL(res, expected); } while(0) |
---|
1528 | #define TEST_INSERT__BROKEN(str,pos,amount,expected) do { DO_INSERT(str,pos,amount); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0) |
---|
1529 | |
---|
1530 | #define TEST_DELETE(str,pos,amount,expected) do { DO_DELETE(str,pos,amount); TEST_EXPECT_EQUAL(res, expected); } while(0) |
---|
1531 | #define TEST_DELETE__BROKEN(str,pos,amount,expected) do { DO_DELETE(str,pos,amount); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0) |
---|
1532 | |
---|
1533 | #define TEST_FORMAT_AND_INSERT(str,wanted_alilen,pos,amount,expected) do { DO_FORMAT_AND_INSERT(str,wanted_alilen,pos,amount); TEST_EXPECT_EQUAL(res, expected); } while(0) |
---|
1534 | #define TEST_FORMAT_AND_INSERT__BROKEN(str,wanted_alilen,pos,amount,expected) do { DO_FORMAT_AND_INSERT(str,wanted_alilen,pos,amount); TEST_EXPECT_EQUAL__BROKEN(res, expected); } while(0) |
---|
1535 | |
---|
1536 | #define TEST_FORMAT_ERROR(str,wanted_alilen,exp_err) do { \ |
---|
1537 | AliFormatCommand cmd(wanted_alilen); \ |
---|
1538 | PLAIN_APPLY_CMD(str, cmd); \ |
---|
1539 | TEST_EXPECT_ERROR_CONTAINS(error, exp_err); \ |
---|
1540 | } while(0) |
---|
1541 | |
---|
1542 | #define TEST_DELETE_ERROR(str,pos,amount,exp_err) do { \ |
---|
1543 | AliDeleteCommand cmd(pos, amount); \ |
---|
1544 | PLAIN_APPLY_CMD(str, cmd); \ |
---|
1545 | TEST_EXPECT_ERROR_CONTAINS(error, exp_err); \ |
---|
1546 | } while(0) |
---|
1547 | |
---|
1548 | |
---|
1549 | // -------------------------------------------------------------------------------- |
---|
1550 | |
---|
1551 | void TEST_format_insert_delete() { |
---|
1552 | // this test is a bit weird. |
---|
1553 | // |
---|
1554 | // originally it was used to test the function gbt_insert_delete, which is gone now. |
---|
1555 | // now it tests AliFormatCommand, AliInsertCommand, AliDeleteCommand and AliCompositeCommand (but quite implicit). |
---|
1556 | |
---|
1557 | const char *UNMODIFIED = NULL; |
---|
1558 | |
---|
1559 | TEST_FORMAT("xxx", 5, "xxx.."); |
---|
1560 | TEST_FORMAT(".x.", 5, ".x..."); |
---|
1561 | TEST_FORMAT(".x..", 5, ".x..."); |
---|
1562 | TEST_FORMAT(".x...", 5, UNMODIFIED); |
---|
1563 | |
---|
1564 | TEST_FORMAT("xxx--", 3, "xxx"); |
---|
1565 | TEST_FORMAT("xxx..", 3, "xxx"); |
---|
1566 | TEST_FORMAT_ERROR("xxxxx", 3, "You tried to delete 'x' at position 3 -> Operation aborted"); |
---|
1567 | TEST_FORMAT_ERROR("xxx", 0, "You tried to delete 'x' at position 0 -> Operation aborted"); |
---|
1568 | |
---|
1569 | // insert/delete in the middle |
---|
1570 | TEST_INSERT("abcde", 3, 0, UNMODIFIED); |
---|
1571 | TEST_INSERT("abcde", 3, 1, "abc-de"); |
---|
1572 | TEST_INSERT("abcde", 3, 2, "abc--de"); |
---|
1573 | |
---|
1574 | TEST_DELETE("abcde", 3, 0, UNMODIFIED); |
---|
1575 | TEST_DELETE("abc-de", 3, 1, "abcde"); |
---|
1576 | TEST_DELETE("abc--de", 3, 2, "abcde"); |
---|
1577 | TEST_DELETE_ERROR("abc-xde", 3, 2, "You tried to delete 'x' at position 4 -> Operation aborted"); |
---|
1578 | |
---|
1579 | // insert/delete at end |
---|
1580 | TEST_INSERT("abcde", 5, 1, "abcde."); |
---|
1581 | TEST_INSERT("abcde", 5, 4, "abcde...."); |
---|
1582 | |
---|
1583 | TEST_DELETE("abcde-", 5, 1, "abcde"); |
---|
1584 | TEST_DELETE("abcde----", 5, 4, "abcde"); |
---|
1585 | |
---|
1586 | // insert/delete at start |
---|
1587 | TEST_INSERT("abcde", 0, 1, ".abcde"); |
---|
1588 | TEST_INSERT("abcde", 0, 4, "....abcde"); |
---|
1589 | |
---|
1590 | TEST_DELETE("-abcde", 0, 1, "abcde"); |
---|
1591 | TEST_DELETE("----abcde", 0, 4, "abcde"); |
---|
1592 | |
---|
1593 | // insert behind end |
---|
1594 | TEST_FORMAT_AND_INSERT("abcde", 10, 8, 1, "abcde......"); |
---|
1595 | TEST_FORMAT_AND_INSERT("abcde", 10, 8, 4, "abcde........."); |
---|
1596 | |
---|
1597 | // insert/delete all |
---|
1598 | TEST_INSERT("", 0, 3, "..."); |
---|
1599 | TEST_DELETE("---", 0, 3, ""); |
---|
1600 | |
---|
1601 | free_insDelBuffer(); |
---|
1602 | } |
---|
1603 | |
---|
1604 | // ------------------------------ |
---|
1605 | |
---|
1606 | struct arb_unit_test::test_alignment_data TADinsdel[] = { |
---|
1607 | { 1, "MtnK1722", "...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC....." }, |
---|
1608 | { 1, "MhnFormi", "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU....." }, |
---|
1609 | { 1, "MhnT1916", "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU----" }, |
---|
1610 | }; |
---|
1611 | |
---|
1612 | struct arb_unit_test::test_alignment_data EXTinsdel[] = { |
---|
1613 | { 0, "ECOLI", "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA...." }, |
---|
1614 | { 0, "HELIX", ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]" }, |
---|
1615 | { 0, "HELIX_NR", ".....1.1.........25.25.34..34.34..34...25.25...1........1" }, |
---|
1616 | }; |
---|
1617 | |
---|
1618 | #define HELIX_REF ".....x..x........x...x.x....x.x....x...x...x...x.........x" |
---|
1619 | #define HELIX_STRUCT "VERSION=3\nLOOP={etc.pp\n}\n" |
---|
1620 | |
---|
1621 | static const char *read_item_entry(GBDATA *gb_item, const char *ali_name, const char *entry_name) { |
---|
1622 | const char *result = NULL; |
---|
1623 | if (gb_item) { |
---|
1624 | GBDATA *gb_ali = GB_find(gb_item, ali_name, SEARCH_CHILD); |
---|
1625 | if (gb_ali) { |
---|
1626 | GBDATA *gb_entry = GB_entry(gb_ali, entry_name); |
---|
1627 | if (gb_entry) { |
---|
1628 | result = GB_read_char_pntr(gb_entry); |
---|
1629 | } |
---|
1630 | } |
---|
1631 | } |
---|
1632 | if (!result) TEST_EXPECT_NO_ERROR(GB_await_error()); |
---|
1633 | return result; |
---|
1634 | } |
---|
1635 | static char *ints2string(const GB_UINT4 *ints, size_t count) { |
---|
1636 | char *str = (char*)malloc(count+1); |
---|
1637 | for (size_t c = 0; c<count; ++c) { |
---|
1638 | str[c] = (ints[c]<10) ? ints[c]+'0' : '?'; |
---|
1639 | } |
---|
1640 | str[count] = 0; |
---|
1641 | return str; |
---|
1642 | } |
---|
1643 | static GB_UINT4 *string2ints(const char *str, size_t count) { |
---|
1644 | GB_UINT4 *ints = (GB_UINT4*)malloc(sizeof(GB_UINT4)*count); |
---|
1645 | for (size_t c = 0; c<count; ++c) { |
---|
1646 | ints[c] = int(str[c]-'0'); |
---|
1647 | } |
---|
1648 | return ints; |
---|
1649 | } |
---|
1650 | static char *floats2string(const float *floats, size_t count) { |
---|
1651 | char *str = (char*)malloc(count+1); |
---|
1652 | for (size_t c = 0; c<count; ++c) { |
---|
1653 | str[c] = char(floats[c]*64.0+0.5)+' '+1; |
---|
1654 | } |
---|
1655 | str[count] = 0; |
---|
1656 | return str; |
---|
1657 | } |
---|
1658 | static float *string2floats(const char *str, size_t count) { |
---|
1659 | float *floats = (float*)malloc(sizeof(float)*count); |
---|
1660 | for (size_t c = 0; c<count; ++c) { |
---|
1661 | floats[c] = float(str[c]-' '-1)/64.0; |
---|
1662 | } |
---|
1663 | return floats; |
---|
1664 | } |
---|
1665 | |
---|
1666 | static GBDATA *get_ali_entry(GBDATA *gb_item, const char *ali_name, const char *entry_name) { |
---|
1667 | GBDATA *gb_entry = NULL; |
---|
1668 | if (gb_item) { |
---|
1669 | GBDATA *gb_ali = GB_find(gb_item, ali_name, SEARCH_CHILD); |
---|
1670 | if (gb_ali) gb_entry = GB_entry(gb_ali, entry_name); |
---|
1671 | } |
---|
1672 | return gb_entry; |
---|
1673 | } |
---|
1674 | |
---|
1675 | static char *read_item_ints_entry_as_string(GBDATA *gb_item, const char *ali_name, const char *entry_name) { |
---|
1676 | char *result = NULL; |
---|
1677 | GBDATA *gb_entry = get_ali_entry(gb_item, ali_name, entry_name); |
---|
1678 | if (gb_entry) { |
---|
1679 | GB_UINT4 *ints = GB_read_ints(gb_entry); |
---|
1680 | result = ints2string(ints, GB_read_count(gb_entry)); |
---|
1681 | free(ints); |
---|
1682 | } |
---|
1683 | if (!result) TEST_EXPECT_NO_ERROR(GB_await_error()); |
---|
1684 | return result; |
---|
1685 | } |
---|
1686 | static char *read_item_floats_entry_as_string(GBDATA *gb_item, const char *ali_name, const char *entry_name) { |
---|
1687 | char *result = NULL; |
---|
1688 | GBDATA *gb_entry = get_ali_entry(gb_item, ali_name, entry_name); |
---|
1689 | if (gb_entry) { |
---|
1690 | float *floats = GB_read_floats(gb_entry); |
---|
1691 | result = floats2string(floats, GB_read_count(gb_entry)); |
---|
1692 | free(floats); |
---|
1693 | } |
---|
1694 | if (!result) TEST_EXPECT_NO_ERROR(GB_await_error()); |
---|
1695 | return result; |
---|
1696 | } |
---|
1697 | |
---|
1698 | #define TEST_ITEM_HAS_ENTRY(find,name,ename,expected) \ |
---|
1699 | TEST_EXPECT_EQUAL(read_item_entry(find(gb_main, name), ali_name, ename), expected) |
---|
1700 | |
---|
1701 | #define TEST_ITEM_HAS_INTSENTRY(find,name,ename,expected) \ |
---|
1702 | TEST_EXPECT_EQUAL(&*SmartCharPtr(read_item_ints_entry_as_string(find(gb_main, name), ali_name, ename)), expected) |
---|
1703 | |
---|
1704 | #define TEST_ITEM_HAS_FLOATSENTRY(find,name,ename,expected) \ |
---|
1705 | TEST_EXPECT_EQUAL(&*SmartCharPtr(read_item_floats_entry_as_string(find(gb_main, name), ali_name, ename)), expected) |
---|
1706 | |
---|
1707 | #define TEST_ITEM_HAS_DATA(find,name,expected) TEST_ITEM_HAS_ENTRY(find,name,"data",expected) |
---|
1708 | |
---|
1709 | #define TEST_SPECIES_HAS_DATA(ad,sd) TEST_ITEM_HAS_DATA(GBT_find_species,ad.name,sd) |
---|
1710 | #define TEST_SAI_HAS_DATA(ad,sd) TEST_ITEM_HAS_DATA(GBT_find_SAI,ad.name,sd) |
---|
1711 | #define TEST_SAI_HAS_ENTRY(ad,ename,sd) TEST_ITEM_HAS_ENTRY(GBT_find_SAI,ad.name,ename,sd) |
---|
1712 | |
---|
1713 | #define TEST_SPECIES_HAS_INTS(ad,id) TEST_ITEM_HAS_INTSENTRY(GBT_find_species,ad.name,"NN",id) |
---|
1714 | #define TEST_SPECIES_HAS_FLOATS(ad,fd) TEST_ITEM_HAS_FLOATSENTRY(GBT_find_species,ad.name,"FF",fd) |
---|
1715 | |
---|
1716 | #define TEST_DATA(sd0,sd1,sd2,ed0,ed1,ed2,ref,ints,floats,struct) do { \ |
---|
1717 | TEST_SPECIES_HAS_DATA(TADinsdel[0], sd0); \ |
---|
1718 | TEST_SPECIES_HAS_DATA(TADinsdel[1], sd1); \ |
---|
1719 | TEST_SPECIES_HAS_DATA(TADinsdel[2], sd2); \ |
---|
1720 | TEST_SAI_HAS_DATA(EXTinsdel[0], ed0); \ |
---|
1721 | TEST_SAI_HAS_DATA(EXTinsdel[1], ed1); \ |
---|
1722 | TEST_SAI_HAS_DATA(EXTinsdel[2], ed2); \ |
---|
1723 | TEST_SAI_HAS_ENTRY(EXTinsdel[1], "_REF", ref); \ |
---|
1724 | GBDATA *gb_ref = GB_search(gb_main, "secedit/structs/ali_mini/struct/ref", GB_FIND); \ |
---|
1725 | TEST_EXPECT_EQUAL(GB_read_char_pntr(gb_ref), ref); \ |
---|
1726 | TEST_SPECIES_HAS_INTS(TADinsdel[0], ints); \ |
---|
1727 | TEST_SPECIES_HAS_FLOATS(TADinsdel[0], floats); \ |
---|
1728 | TEST_SAI_HAS_ENTRY(EXTinsdel[1], "_STRUCT", struct); \ |
---|
1729 | } while(0) |
---|
1730 | |
---|
1731 | static int get_alignment_aligned(GBDATA *gb_main, const char *aliname) { // former GBT_get_alignment_aligned |
---|
1732 | GBDATA *gb_alignment = GBT_get_alignment(gb_main, aliname); |
---|
1733 | return gb_alignment ? *GBT_read_int(gb_alignment, "aligned") : -1; |
---|
1734 | } |
---|
1735 | |
---|
1736 | #define TEST_ALI_LEN_ALIGNED(len,aligned) do { \ |
---|
1737 | TEST_EXPECT_EQUAL(GBT_get_alignment_len(gb_main, ali_name), len); \ |
---|
1738 | TEST_EXPECT_EQUAL(get_alignment_aligned(gb_main, ali_name), aligned); \ |
---|
1739 | } while(0) |
---|
1740 | |
---|
1741 | static ARB_ERROR add_some_SAIs(GBDATA *gb_main, const char *ali_name) { |
---|
1742 | ARB_ERROR error; |
---|
1743 | GB_transaction ta(gb_main); |
---|
1744 | TEST_DB_INSERT_SAI(gb_main, error, ali_name, EXTinsdel); |
---|
1745 | |
---|
1746 | // add secondary structure to "HELIX" |
---|
1747 | GBDATA *gb_helix = GBT_find_SAI(gb_main, "HELIX"); |
---|
1748 | if (!gb_helix) error = GB_await_error(); |
---|
1749 | else { |
---|
1750 | GBDATA *gb_struct = GBT_add_data(gb_helix, ali_name, "_STRUCT", GB_STRING); |
---|
1751 | if (!gb_struct) error = GB_await_error(); |
---|
1752 | else error = GB_write_string(gb_struct, HELIX_STRUCT); |
---|
1753 | |
---|
1754 | GBDATA *gb_struct_ref = GBT_add_data(gb_helix, ali_name, "_REF", GB_STRING); |
---|
1755 | if (!gb_struct_ref) error = GB_await_error(); |
---|
1756 | else error = GB_write_string(gb_struct_ref, HELIX_REF); |
---|
1757 | } |
---|
1758 | |
---|
1759 | // add stored secondary structure |
---|
1760 | GBDATA *gb_ref = GB_search(gb_main, "secedit/structs/ali_mini/struct/ref", GB_STRING); |
---|
1761 | if (!gb_ref) error = GB_await_error(); |
---|
1762 | else error = GB_write_string(gb_ref, HELIX_REF); |
---|
1763 | |
---|
1764 | // create one INTS and one FLOATS entry for first species |
---|
1765 | GBDATA *gb_spec = GBT_find_species(gb_main, TADinsdel[0].name); |
---|
1766 | { |
---|
1767 | GBDATA *gb_ints = GBT_add_data(gb_spec, ali_name, "NN", GB_INTS); |
---|
1768 | const char *intsAsStr = "9346740960354855652100942568200611650200211394358998513"; |
---|
1769 | size_t len = strlen(intsAsStr); |
---|
1770 | GB_UINT4 *ints = string2ints(intsAsStr, len); |
---|
1771 | { |
---|
1772 | char *asStr = ints2string(ints, len); |
---|
1773 | TEST_EXPECT_EQUAL(intsAsStr, asStr); |
---|
1774 | free(asStr); |
---|
1775 | } |
---|
1776 | error = GB_write_ints(gb_ints, ints, len); |
---|
1777 | free(ints); |
---|
1778 | } |
---|
1779 | { |
---|
1780 | GBDATA *gb_ints = GBT_add_data(gb_spec, ali_name, "FF", GB_FLOATS); |
---|
1781 | const char *floatsAsStr = "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW"; |
---|
1782 | size_t len = strlen(floatsAsStr); |
---|
1783 | float *floats = string2floats(floatsAsStr, len); |
---|
1784 | { |
---|
1785 | char *asStr = floats2string(floats, len); |
---|
1786 | TEST_EXPECT_EQUAL(floatsAsStr, asStr); |
---|
1787 | free(asStr); |
---|
1788 | } |
---|
1789 | error = GB_write_floats(gb_ints, floats, len); |
---|
1790 | free(floats); |
---|
1791 | } |
---|
1792 | return error; |
---|
1793 | } |
---|
1794 | |
---|
1795 | void TEST_insert_delete_DB() { |
---|
1796 | GB_shell shell; |
---|
1797 | ARB_ERROR error; |
---|
1798 | const char *ali_name = "ali_mini"; |
---|
1799 | GBDATA *gb_main = TEST_CREATE_DB(error, ali_name, TADinsdel, false); |
---|
1800 | |
---|
1801 | arb_suppress_progress noProgress; |
---|
1802 | |
---|
1803 | if (!error) error = add_some_SAIs(gb_main, ali_name); |
---|
1804 | if (!error) { |
---|
1805 | GB_transaction ta(gb_main); |
---|
1806 | |
---|
1807 | for (int pass = 1; pass <= 2; ++pass) { |
---|
1808 | if (pass == 1) TEST_ALI_LEN_ALIGNED(56, 1); |
---|
1809 | if (pass == 2) TEST_ALI_LEN_ALIGNED(57, 0); // was marked as "not aligned" |
---|
1810 | |
---|
1811 | TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC.....", |
---|
1812 | "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU.....", |
---|
1813 | "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU----", |
---|
1814 | "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA....", |
---|
1815 | ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]", |
---|
1816 | ".....1.1.........25.25.34..34.34..34...25.25...1........1", |
---|
1817 | ".....x..x........x...x.x....x.x....x...x...x...x.........x", |
---|
1818 | "9346740960354855652100942568200611650200211394358998513", // a INTS entry |
---|
1819 | "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW", // a FLOATS entry |
---|
1820 | HELIX_STRUCT); |
---|
1821 | |
---|
1822 | if (pass == 1) TEST_EXPECT_NO_ERROR(GBT_check_data(gb_main, ali_name)); |
---|
1823 | } |
---|
1824 | |
---|
1825 | TEST_EXPECT_NO_ERROR(ARB_format_alignment(gb_main, ali_name)); |
---|
1826 | TEST_ALI_LEN_ALIGNED(57, 1); |
---|
1827 | TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC......", |
---|
1828 | "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU.....", |
---|
1829 | "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU-----", // @@@ <- should convert '-' to '.' |
---|
1830 | "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA.....", |
---|
1831 | ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]", |
---|
1832 | ".....1.1.........25.25.34..34.34..34...25.25...1........1", |
---|
1833 | ".....x..x........x...x.x....x.x....x...x...x...x.........x", |
---|
1834 | "934674096035485565210094256820061165020021139435899851300", |
---|
1835 | "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW!", |
---|
1836 | HELIX_STRUCT); |
---|
1837 | |
---|
1838 | // text-editor column -> alignment column |
---|
1839 | #define COL(col) ((col)-19) |
---|
1840 | |
---|
1841 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(64), 2, "")); // insert in middle |
---|
1842 | TEST_ALI_LEN_ALIGNED(59, 1); |
---|
1843 | TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC......", |
---|
1844 | "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.....", |
---|
1845 | "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-----", |
---|
1846 | "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.....", |
---|
1847 | ".....[<[.........[..[..[<<.[..].>>]....]..]......].>......]", |
---|
1848 | ".....1.1.........25.25.34..34.34..34...25.25.....1........1", |
---|
1849 | ".....x..x........x...x.x....x.x....x...x...x.....x.........x", |
---|
1850 | "93467409603548556521009425682006116502002113900435899851300", |
---|
1851 | "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59DW!", |
---|
1852 | HELIX_STRUCT); |
---|
1853 | |
---|
1854 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(75), 2, "")); // insert near end |
---|
1855 | TEST_ALI_LEN_ALIGNED(61, 1); |
---|
1856 | TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........", |
---|
1857 | "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1858 | "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1859 | "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......", |
---|
1860 | ".....[<[.........[..[..[<<.[..].>>]....]..]......].>........]", |
---|
1861 | ".....1.1.........25.25.34..34.34..34...25.25.....1..........1", |
---|
1862 | ".....x..x........x...x.x....x.x....x...x...x.....x...........x", |
---|
1863 | "9346740960354855652100942568200611650200211390043589985100300", |
---|
1864 | "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1865 | HELIX_STRUCT); |
---|
1866 | |
---|
1867 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(20), 2, "")); // insert near start |
---|
1868 | TEST_ALI_LEN_ALIGNED(63, 1); |
---|
1869 | TEST_DATA(".....G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........", |
---|
1870 | "-----A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1871 | ".....A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1872 | "-----U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......", |
---|
1873 | ".......[<[.........[..[..[<<.[..].>>]....]..]......].>........]", |
---|
1874 | ".......1.1.........25.25.34..34.34..34...25.25.....1..........1", |
---|
1875 | ".......x..x........x...x.x....x.x....x...x...x.....x...........x", |
---|
1876 | "900346740960354855652100942568200611650200211390043589985100300", |
---|
1877 | "O!!Du8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1878 | HELIX_STRUCT); |
---|
1879 | |
---|
1880 | |
---|
1881 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(26), 2, "")); // insert at left helix start |
---|
1882 | TEST_ALI_LEN_ALIGNED(65, 1); |
---|
1883 | TEST_DATA(".....G---GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........", |
---|
1884 | "-----A---CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1885 | ".....A---CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1886 | "-----U---GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......", |
---|
1887 | ".........[<[.........[..[..[<<.[..].>>]....]..]......].>........]", |
---|
1888 | ".........1.1.........25.25.34..34.34..34...25.25.....1..........1", |
---|
1889 | ".........x..x........x...x.x....x.x....x...x...x.....x...........x", |
---|
1890 | "90034670040960354855652100942568200611650200211390043589985100300", |
---|
1891 | "O!!Du8E!!Jh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1892 | HELIX_STRUCT); |
---|
1893 | |
---|
1894 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(29), 2, "")); // insert behind left helix start |
---|
1895 | TEST_ALI_LEN_ALIGNED(67, 1); |
---|
1896 | TEST_DATA(".....G---G--GC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........", |
---|
1897 | "-----A---C--GA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1898 | ".....A---C--GA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1899 | "-----U---G--CC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......", |
---|
1900 | ".........[..<[.........[..[..[<<.[..].>>]....]..]......].>........]", |
---|
1901 | ".........1...1.........25.25.34..34.34..34...25.25.....1..........1", |
---|
1902 | ".........x....x........x...x.x....x.x....x...x...x.....x...........x", |
---|
1903 | "9003467004000960354855652100942568200611650200211390043589985100300", |
---|
1904 | "O!!Du8E!!J!!h60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1905 | HELIX_STRUCT); |
---|
1906 | |
---|
1907 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(32), 2, "")); // insert at left helix end |
---|
1908 | TEST_ALI_LEN_ALIGNED(69, 1); |
---|
1909 | TEST_DATA(".....G---G--G--C-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........", |
---|
1910 | "-----A---C--G--A-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1911 | ".....A---C--G--A-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1912 | "-----U---G--C--C-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......", |
---|
1913 | ".........[..<..[.........[..[..[<<.[..].>>]....]..]......].>........]", |
---|
1914 | ".........1.....1.........25.25.34..34.34..34...25.25.....1..........1", |
---|
1915 | ".........x......x........x...x.x....x.x....x...x...x.....x...........x", |
---|
1916 | "900346700400000960354855652100942568200611650200211390043589985100300", |
---|
1917 | "O!!Du8E!!J!!h!!60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1918 | HELIX_STRUCT); |
---|
1919 | |
---|
1920 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(35), 2, "")); // insert behind left helix end |
---|
1921 | TEST_ALI_LEN_ALIGNED(71, 1); |
---|
1922 | TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCC........", |
---|
1923 | "-----A---C--G--A---U-C-----C--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1924 | ".....A---C--G--A---A-C.....G--G--GAA-CCUG-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1925 | "-----U---G--C--C---U-G-----G--C--CCU-UAGC-GCGG-UGG--UCC--CACCUGA.......", |
---|
1926 | ".........[..<..[...........[..[..[<<.[..].>>]....]..]......].>........]", |
---|
1927 | ".........1.....1...........25.25.34..34.34..34...25.25.....1..........1", |
---|
1928 | ".........x........x........x...x.x....x.x....x...x...x.....x...........x", // @@@ _REF gets destroyed here! (see #159) |
---|
1929 | // ^ ^ |
---|
1930 | "90034670040000090060354855652100942568200611650200211390043589985100300", |
---|
1931 | "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1932 | HELIX_STRUCT); |
---|
1933 | |
---|
1934 | |
---|
1935 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(59), 2, "")); // insert at right helix start |
---|
1936 | TEST_ALI_LEN_ALIGNED(73, 1); |
---|
1937 | TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G-CGGC-UGG--AUC--ACCUCC........", |
---|
1938 | "-----A---C--G--A---U-C-----C--G--GAA-CCU--G-CGGC-UGG--AUC--ACCUCCU.......", |
---|
1939 | ".....A---C--G--A---A-C.....G--G--GAA-CCU--G-CGGC-UGG--AUC--ACCUCCU-------", |
---|
1940 | "-----U---G--C--C---U-G-----G--C--CCU-UAG--C-GCGG-UGG--UCC--CACCUGA.......", |
---|
1941 | ".........[..<..[...........[..[..[<<.[....].>>]....]..]......].>........]", |
---|
1942 | ".........1.....1...........25.25.34..34...34..34...25.25.....1..........1", |
---|
1943 | ".........x........x........x...x.x....x...x....x...x...x.....x...........x", |
---|
1944 | "9003467004000009006035485565210094256820000611650200211390043589985100300", |
---|
1945 | "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!JxT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1946 | HELIX_STRUCT); |
---|
1947 | |
---|
1948 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(62), 2, "")); // insert behind right helix start |
---|
1949 | TEST_ALI_LEN_ALIGNED(75, 1); |
---|
1950 | TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G---CGGC-UGG--AUC--ACCUCC........", |
---|
1951 | "-----A---C--G--A---U-C-----C--G--GAA-CCU--G---CGGC-UGG--AUC--ACCUCCU.......", |
---|
1952 | ".....A---C--G--A---A-C.....G--G--GAA-CCU--G---CGGC-UGG--AUC--ACCUCCU-------", |
---|
1953 | "-----U---G--C--C---U-G-----G--C--CCU-UAG--C---GCGG-UGG--UCC--CACCUGA.......", |
---|
1954 | ".........[..<..[...........[..[..[<<.[....]...>>]....]..]......].>........]", |
---|
1955 | ".........1.....1...........25.25.34..34...3..4..34...25.25.....1..........1", // @@@ <- helix nr destroyed |
---|
1956 | // ^^^^ |
---|
1957 | ".........x........x........x...x.x....x...x......x...x...x.....x...........x", |
---|
1958 | "900346700400000900603548556521009425682000000611650200211390043589985100300", |
---|
1959 | "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!J!!xT6JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1960 | HELIX_STRUCT); |
---|
1961 | |
---|
1962 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(67), 2, "")); // insert at right helix end |
---|
1963 | TEST_ALI_LEN_ALIGNED(77, 1); |
---|
1964 | TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G---CG--GC-UGG--AUC--ACCUCC........", |
---|
1965 | "-----A---C--G--A---U-C-----C--G--GAA-CCU--G---CG--GC-UGG--AUC--ACCUCCU.......", |
---|
1966 | ".....A---C--G--A---A-C.....G--G--GAA-CCU--G---CG--GC-UGG--AUC--ACCUCCU-------", |
---|
1967 | "-----U---G--C--C---U-G-----G--C--CCU-UAG--C---GC--GG-UGG--UCC--CACCUGA.......", |
---|
1968 | ".........[..<..[...........[..[..[<<.[....]...>>..]....]..]......].>........]", |
---|
1969 | ".........1.....1...........25.25.34..34...3..4....34...25.25.....1..........1", |
---|
1970 | ".........x........x........x...x.x....x...x........x...x...x.....x...........x", |
---|
1971 | "90034670040000090060354855652100942568200000061100650200211390043589985100300", |
---|
1972 | "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!J!!xT6!!JPiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1973 | HELIX_STRUCT); |
---|
1974 | |
---|
1975 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(70), 2, "")); // insert behind right helix end |
---|
1976 | TEST_ALI_LEN_ALIGNED(79, 1); |
---|
1977 | TEST_DATA(".....G---G--G--C---C-G...--A--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCC........", |
---|
1978 | "-----A---C--G--A---U-C-----C--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU.......", |
---|
1979 | ".....A---C--G--A---A-C.....G--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU-------", |
---|
1980 | "-----U---G--C--C---U-G-----G--C--CCU-UAG--C---GC--G--G-UGG--UCC--CACCUGA.......", |
---|
1981 | ".........[..<..[...........[..[..[<<.[....]...>>..]......]..]......].>........]", |
---|
1982 | ".........1.....1...........25.25.34..34...3..4....3..4...25.25.....1..........1", // @@@ <- helix nr destroyed |
---|
1983 | ".........x........x........x...x.x....x...x..........x...x...x.....x...........x", // @@@ _REF gets destroyed here! (see #159) |
---|
1984 | "9003467004000009006035485565210094256820000006110060050200211390043589985100300", |
---|
1985 | "O!!Du8E!!J!!h!!6!!0e1XYLgxvzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
1986 | HELIX_STRUCT); |
---|
1987 | |
---|
1988 | |
---|
1989 | |
---|
1990 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(44), 2, "")); // insert at gap border (between different gap types) |
---|
1991 | TEST_ALI_LEN_ALIGNED(81, 1); |
---|
1992 | TEST_DATA(".....G---G--G--C---C-G...----A--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCC........", // now prefers '-' here |
---|
1993 | "-----A---C--G--A---U-C-------C--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU.......", |
---|
1994 | ".....A---C--G--A---A-C.......G--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU-------", |
---|
1995 | "-----U---G--C--C---U-G-------G--C--CCU-UAG--C---GC--G--G-UGG--UCC--CACCUGA.......", |
---|
1996 | ".........[..<..[.............[..[..[<<.[....]...>>..]......]..]......].>........]", |
---|
1997 | ".........1.....1.............25.25.34..34...3..4....3..4...25.25.....1..........1", |
---|
1998 | ".........x........x..........x...x.x....x...x..........x...x...x.....x...........x", |
---|
1999 | "900346700400000900603548500565210094256820000006110060050200211390043589985100300", |
---|
2000 | "O!!Du8E!!J!!h!!6!!0e1XYLg!!xvzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
2001 | HELIX_STRUCT); |
---|
2002 | |
---|
2003 | |
---|
2004 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(42), -6, "-.")); // delete gaps |
---|
2005 | TEST_ALI_LEN_ALIGNED(75, 1); |
---|
2006 | TEST_DATA(".....G---G--G--C---C-G.A--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCC........", |
---|
2007 | "-----A---C--G--A---U-C-C--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU.......", |
---|
2008 | ".....A---C--G--A---A-C.G--G--GAA-CCU--G---CG--G--C-UGG--AUC--ACCUCCU-------", |
---|
2009 | "-----U---G--C--C---U-G-G--C--CCU-UAG--C---GC--G--G-UGG--UCC--CACCUGA.......", |
---|
2010 | ".........[..<..[.......[..[..[<<.[....]...>>..]......]..]......].>........]", |
---|
2011 | ".........1.....1.......25.25.34..34...3..4....3..4...25.25.....1..........1", |
---|
2012 | ".........x........x....x...x.x....x...x..........x...x...x.....x...........x", |
---|
2013 | "900346700400000900603545210094256820000006110060050200211390043589985100300", |
---|
2014 | "O!!Du8E!!J!!h!!6!!0e1XYzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQrq4uC!!LDoHlWV59!!DW!", |
---|
2015 | HELIX_STRUCT); |
---|
2016 | |
---|
2017 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(74), -1, "-.")); // delete gap inside helix destroying helix nrs |
---|
2018 | TEST_ALI_LEN_ALIGNED(74, 1); |
---|
2019 | TEST_DATA(".....G---G--G--C---C-G.A--G--GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCC........", |
---|
2020 | "-----A---C--G--A---U-C-C--G--GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU.......", |
---|
2021 | ".....A---C--G--A---A-C.G--G--GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU-------", |
---|
2022 | "-----U---G--C--C---U-G-G--C--CCU-UAG--C---GC--G--G-UGG-UCC--CACCUGA.......", |
---|
2023 | ".........[..<..[.......[..[..[<<.[....]...>>..]......].]......].>........]", |
---|
2024 | ".........1.....1.......25.25.34..34...3..4....3..4...2525.....1..........1", // @@@ helix nr destroyed ('25.25' -> '2525') |
---|
2025 | ".........x........x....x...x.x....x...x..........x...x..x.....x...........x", |
---|
2026 | "90034670040000090060354521009425682000000611006005020021390043589985100300", |
---|
2027 | "O!!Du8E!!J!!h!!6!!0e1XYzrqmeMiMAjB5E!!J!!xT6!!J!!PiCvQr4uC!!LDoHlWV59!!DW!", |
---|
2028 | HELIX_STRUCT); |
---|
2029 | |
---|
2030 | |
---|
2031 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(47), -1, "-.")); // delete gap between helices destroying helix nrs |
---|
2032 | TEST_ALI_LEN_ALIGNED(73, 1); |
---|
2033 | TEST_DATA(".....G---G--G--C---C-G.A--G-GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCC........", |
---|
2034 | "-----A---C--G--A---U-C-C--G-GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU.......", |
---|
2035 | ".....A---C--G--A---A-C.G--G-GAA-CCU--G---CG--G--C-UGG-AUC--ACCUCCU-------", |
---|
2036 | "-----U---G--C--C---U-G-G--C-CCU-UAG--C---GC--G--G-UGG-UCC--CACCUGA.......", |
---|
2037 | ".........[..<..[.......[..[.[<<.[....]...>>..]......].]......].>........]", |
---|
2038 | ".........1.....1.......25.2534..34...3..4....3..4...2525.....1..........1", // @@@ helix nr destroyed ('25.34' -> '2534') |
---|
2039 | ".........x........x....x...xx....x...x..........x...x..x.....x...........x", |
---|
2040 | "9003467004000009006035452100425682000000611006005020021390043589985100300", |
---|
2041 | "O!!Du8E!!J!!h!!6!!0e1XYzrqmeiMAjB5E!!J!!xT6!!J!!PiCvQr4uC!!LDoHlWV59!!DW!", |
---|
2042 | HELIX_STRUCT); |
---|
2043 | |
---|
2044 | |
---|
2045 | TEST_EXPECT_NO_ERROR(ARB_insdel_columns(gb_main, ali_name, COL(72), -5, "%")); // delete anything |
---|
2046 | TEST_ALI_LEN_ALIGNED(68, 1); |
---|
2047 | TEST_DATA(".....G---G--G--C---C-G.A--G-GAA-CCU--G---CG--G--C-UGG-ACCUCC........", |
---|
2048 | "-----A---C--G--A---U-C-C--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU.......", |
---|
2049 | ".....A---C--G--A---A-C.G--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU-------", |
---|
2050 | "-----U---G--C--C---U-G-G--C-CCU-UAG--C---GC--G--G-UGG-CACCUGA.......", |
---|
2051 | ".........[..<..[.......[..[.[<<.[....]...>>..]......]...].>........]", |
---|
2052 | ".........1.....1.......25.2534..34...3..4....3..4...2...1..........1", |
---|
2053 | ".........x........x....x...xx....x...x..........x...x...x...........x", |
---|
2054 | "90034670040000090060354521004256820000006110060050200043589985100300", |
---|
2055 | "O!!Du8E!!J!!h!!6!!0e1XYzrqmeiMAjB5E!!J!!xT6!!J!!PiCvQ!LDoHlWV59!!DW!", |
---|
2056 | HELIX_STRUCT); |
---|
2057 | |
---|
2058 | } |
---|
2059 | |
---|
2060 | if (!error) { |
---|
2061 | { |
---|
2062 | GB_transaction ta(gb_main); |
---|
2063 | TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, COL(35), -3, "-."), // illegal delete |
---|
2064 | "SAI 'HELIX': You tried to delete 'x' at position 18 -> Operation aborted"); |
---|
2065 | ta.close("xxx"); |
---|
2066 | } |
---|
2067 | { |
---|
2068 | GB_transaction ta(gb_main); |
---|
2069 | TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, COL(57), -3, "-."), // illegal delete |
---|
2070 | "SAI 'HELIX_NR': You tried to delete '4' at position 40 -> Operation aborted"); |
---|
2071 | ta.close("xxx"); |
---|
2072 | } |
---|
2073 | { |
---|
2074 | GB_transaction ta(gb_main); |
---|
2075 | TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, 4711, 3, "-."), // illegal insert |
---|
2076 | "Can't insert at position 4711 (exceeds length 68 of alignment 'ali_mini')"); |
---|
2077 | ta.close("xxx"); |
---|
2078 | } |
---|
2079 | { |
---|
2080 | GB_transaction ta(gb_main); |
---|
2081 | TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, 66, -3, "-."), // illegal delete |
---|
2082 | "Can't delete positions 66-68 (exceeds max. position 67 of alignment 'ali_mini')"); |
---|
2083 | ta.close("xxx"); |
---|
2084 | } |
---|
2085 | { |
---|
2086 | GB_transaction ta(gb_main); |
---|
2087 | TEST_EXPECT_EQUAL(ARB_insdel_columns(gb_main, ali_name, -1, 3, "-."), // illegal insert |
---|
2088 | "Illegal sequence position -1"); |
---|
2089 | ta.close("xxx"); |
---|
2090 | } |
---|
2091 | } |
---|
2092 | if (!error) { |
---|
2093 | GB_transaction ta(gb_main); |
---|
2094 | TEST_DATA(".....G---G--G--C---C-G.A--G-GAA-CCU--G---CG--G--C-UGG-ACCUCC........", |
---|
2095 | "-----A---C--G--A---U-C-C--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU.......", |
---|
2096 | ".....A---C--G--A---A-C.G--G-GAA-CCU--G---CG--G--C-UGG-ACCUCCU-------", |
---|
2097 | "-----U---G--C--C---U-G-G--C-CCU-UAG--C---GC--G--G-UGG-CACCUGA.......", |
---|
2098 | ".........[..<..[.......[..[.[<<.[....]...>>..]......]...].>........]", |
---|
2099 | ".........1.....1.......25.2534..34...3..4....3..4...2...1..........1", |
---|
2100 | ".........x........x....x...xx....x...x..........x...x...x...........x", |
---|
2101 | "90034670040000090060354521004256820000006110060050200043589985100300", |
---|
2102 | "O!!Du8E!!J!!h!!6!!0e1XYzrqmeiMAjB5E!!J!!xT6!!J!!PiCvQ!LDoHlWV59!!DW!", |
---|
2103 | HELIX_STRUCT); |
---|
2104 | } |
---|
2105 | |
---|
2106 | GB_close(gb_main); |
---|
2107 | TEST_EXPECT_NO_ERROR(error.deliver()); |
---|
2108 | } |
---|
2109 | |
---|
2110 | void TEST_insert_delete_DB_using_SAI() { |
---|
2111 | GB_shell shell; |
---|
2112 | ARB_ERROR error; |
---|
2113 | const char *ali_name = "ali_mini"; |
---|
2114 | GBDATA *gb_main = TEST_CREATE_DB(error, ali_name, TADinsdel, false); |
---|
2115 | |
---|
2116 | arb_suppress_progress noProgress; |
---|
2117 | |
---|
2118 | if (!error) error = add_some_SAIs(gb_main, ali_name); |
---|
2119 | if (!error) { |
---|
2120 | GB_transaction ta(gb_main); |
---|
2121 | |
---|
2122 | // test here is just a duplicate from TEST_insert_delete_DB() - just here to show the data |
---|
2123 | TEST_EXPECT_NO_ERROR(ARB_format_alignment(gb_main, ali_name)); |
---|
2124 | int alilen_exp = 57; |
---|
2125 | TEST_ALI_LEN_ALIGNED(alilen_exp, 1); |
---|
2126 | TEST_DATA("...G-GGC-C-G...--A--G--GAA-CCUG-CGGC-UGG--AUCACCUCC......", |
---|
2127 | "---A-CGA-U-C-----C--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU.....", |
---|
2128 | "...A-CGA-A-C.....G--G--GAA-CCUG-CGGC-UGG--AUCACCUCCU-----", |
---|
2129 | "---U-GCC-U-G-----G--C--CCU-UAGC-GCGG-UGG--UCCCACCUGA.....", |
---|
2130 | ".....[<[.........[..[..[<<.[..].>>]....]..]....].>......]", |
---|
2131 | ".....1.1.........25.25.34..34.34..34...25.25...1........1", |
---|
2132 | ".....x..x........x...x.x....x.x....x...x...x...x.........x", |
---|
2133 | "934674096035485565210094256820061165020021139435899851300", |
---|
2134 | "ODu8EJh60e1XYLgxvzrqmeMiMAjB5EJxT6JPiCvQrq4uCLDoHlWV59DW!", |
---|
2135 | HELIX_STRUCT); |
---|
2136 | |
---|
2137 | RangeList delRanges = build_RangeList_from_string( |
---|
2138 | /* */ "xxx-------x-x-xxx---------x---------x---------------xxxx-", |
---|
2139 | "x", false); |
---|
2140 | TEST_EXPECT_NO_ERROR(ARB_delete_columns_using_SAI(gb_main, ali_name, delRanges, ".-")); |
---|
2141 | alilen_exp -= 14; |
---|
2142 | TEST_ALI_LEN_ALIGNED(alilen_exp, 1); |
---|
2143 | TEST_DATA("G-GGC-CG.A--G--GAACCUG-CGGCUGG--AUCACCUCC..", |
---|
2144 | "A-CGA-UC-C--G--GAACCUG-CGGCUGG--AUCACCUCCU.", |
---|
2145 | "A-CGA-AC.G--G--GAACCUG-CGGCUGG--AUCACCUCCU-", |
---|
2146 | "U-GCC-UG-G--C--CCUUAGC-GCGGUGG--UCCCACCUGA.", |
---|
2147 | "..[<[....[..[..[<<[..].>>]...]..]....].>..]", |
---|
2148 | "..1.1....25.25.34.34.34..34..25.25...1....1", |
---|
2149 | "..x..x...x...x.x...x.x....x..x...x...x.....x", |
---|
2150 | "6740960585210094258200611652002113943589980", |
---|
2151 | "8EJh60eXLzrqmeMiMAB5EJxT6JPCvQrq4uCLDoHlWV!", |
---|
2152 | HELIX_STRUCT); |
---|
2153 | |
---|
2154 | // insert INFRONTOF each range |
---|
2155 | RangeList insRanges = build_RangeList_from_string( |
---|
2156 | /* */ "---xx---xxxxxxxx---------xxxx--------------", |
---|
2157 | "x", false); |
---|
2158 | TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, RANGES, INFRONTOF, 2)); |
---|
2159 | alilen_exp += 3*2; |
---|
2160 | TEST_ALI_LEN_ALIGNED(alilen_exp, 1); |
---|
2161 | TEST_DATA("G-G--GC-CG...A--G--GAACCUG-CG--GCUGG--AUCACCUCC..", |
---|
2162 | "A-C--GA-UC---C--G--GAACCUG-CG--GCUGG--AUCACCUCCU.", |
---|
2163 | "A-C--GA-AC...G--G--GAACCUG-CG--GCUGG--AUCACCUCCU-", |
---|
2164 | "U-G--CC-UG---G--C--CCUUAGC-GC--GGUGG--UCCCACCUGA.", |
---|
2165 | "..[..<[......[..[..[<<[..].>>..]...]..]....].>..]", |
---|
2166 | "..1...1......25.25.34.34.34....34..25.25...1....1", |
---|
2167 | "..x....x.....x...x.x...x.x......x..x...x...x.....x", |
---|
2168 | "6740009605008521009425820061100652002113943589980", |
---|
2169 | "8EJ!!h60eX!!LzrqmeMiMAB5EJxT6!!JPCvQrq4uCLDoHlWV!", |
---|
2170 | HELIX_STRUCT); |
---|
2171 | |
---|
2172 | // insert BEHIND each range |
---|
2173 | insRanges = build_RangeList_from_string( |
---|
2174 | /* */ "-----------xx-x------------xxxxxx---------------x", |
---|
2175 | "x", false); |
---|
2176 | TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, RANGES, BEHIND, 4)); |
---|
2177 | alilen_exp += 4*4; |
---|
2178 | TEST_ALI_LEN_ALIGNED(alilen_exp, 1); |
---|
2179 | TEST_DATA("G-G--GC-CG.......A------G--GAACCUG-CG--GC----UGG--AUCACCUCC......", |
---|
2180 | "A-C--GA-UC-------C------G--GAACCUG-CG--GC----UGG--AUCACCUCCU.....", |
---|
2181 | "A-C--GA-AC.......G------G--GAACCUG-CG--GC----UGG--AUCACCUCCU-----", |
---|
2182 | "U-G--CC-UG-------G------C--CCUUAGC-GC--GG----UGG--UCCCACCUGA.....", |
---|
2183 | "..[..<[..........[......[..[<<[..].>>..].......]..]....].>..]....", |
---|
2184 | "..1...1..........25.....25.34.34.34....34......25.25...1....1....", |
---|
2185 | "..x....x.........x.......x.x...x.x......x......x...x...x.........x", // @@@ ref gets destroyed here |
---|
2186 | "67400096050080000520000100942582006110065000020021139435899800000", |
---|
2187 | "8EJ!!h60eX!!L!!!!zr!!!!qmeMiMAB5EJxT6!!JP!!!!CvQrq4uCLDoHlWV!!!!!", |
---|
2188 | HELIX_STRUCT); |
---|
2189 | |
---|
2190 | // insert INFRONTOF each column |
---|
2191 | insRanges = build_RangeList_from_string( |
---|
2192 | /* */ "x----xx--------------------------------------xxx----xxxx--------x", |
---|
2193 | "x", false); |
---|
2194 | TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, SINGLE_COLUMNS, INFRONTOF, 1)); |
---|
2195 | alilen_exp += 11*1; |
---|
2196 | TEST_ALI_LEN_ALIGNED(alilen_exp, 1); |
---|
2197 | TEST_DATA(".G-G---G-C-CG.......A------G--GAACCUG-CG--GC-----U-G-G--AU-C-A-C-CUCC.......", |
---|
2198 | ".A-C---G-A-UC-------C------G--GAACCUG-CG--GC-----U-G-G--AU-C-A-C-CUCCU......", |
---|
2199 | ".A-C---G-A-AC.......G------G--GAACCUG-CG--GC-----U-G-G--AU-C-A-C-CUCCU------", |
---|
2200 | ".U-G---C-C-UG-------G------C--CCUUAGC-GC--GG-----U-G-G--UC-C-C-A-CCUGA......", |
---|
2201 | "...[...<.[..........[......[..[<<[..].>>..]..........]..]........].>..].....", |
---|
2202 | "...1.....1..........25.....25.34.34.34....34.........25.25.......1....1.....", |
---|
2203 | "...x......x.........x.......x.x...x.x......x.........x...x.......x..........x", |
---|
2204 | "0674000009605008000052000010094258200611006500000200002113090403058998000000", |
---|
2205 | "!8EJ!!!h!60eX!!L!!!!zr!!!!qmeMiMAB5EJxT6!!JP!!!!!C!v!Qrq4u!C!L!D!oHlWV!!!!!!", |
---|
2206 | HELIX_STRUCT); |
---|
2207 | |
---|
2208 | // insert BEHIND each column |
---|
2209 | insRanges = build_RangeList_from_string( |
---|
2210 | /* */ "------------------------------xxxxxxx----------------------------xxxxxx-----", |
---|
2211 | "x", false); |
---|
2212 | TEST_EXPECT_NO_ERROR(ARB_insert_columns_using_SAI(gb_main, ali_name, insRanges, SINGLE_COLUMNS, BEHIND, 2)); |
---|
2213 | alilen_exp += 13*2; |
---|
2214 | TEST_ALI_LEN_ALIGNED(alilen_exp, 1); |
---|
2215 | TEST_DATA(".G-G---G-C-CG.......A------G--G--A--A--C--C--U--G---CG--GC-----U-G-G--AU-C-A-C-C--U--C--C.............", |
---|
2216 | ".A-C---G-A-UC-------C------G--G--A--A--C--C--U--G---CG--GC-----U-G-G--AU-C-A-C-C--U--C--C--U..........", |
---|
2217 | ".A-C---G-A-AC.......G------G--G--A--A--C--C--U--G---CG--GC-----U-G-G--AU-C-A-C-C--U--C--C--U----------", |
---|
2218 | ".U-G---C-C-UG-------G------C--C--C--U--U--A--G--C---GC--GG-----U-G-G--UC-C-C-A-C--C--U--G--A..........", |
---|
2219 | "...[...<.[..........[......[..[..<..<..[........]...>>..]..........]..]........].....>........].......", |
---|
2220 | "...1.....1..........25.....25.3..4.....3..4.....3..4....34.........25.25.......1..............1.......", // @@@ helix nrs destroyed |
---|
2221 | "...x......x.........x.......x.x...........x.....x........x.........x...x.......x......................x", // @@@ ref destroyed further |
---|
2222 | "067400000960500800005200001009400200500800200000000611006500000200002113090403050080090090080000000000", |
---|
2223 | "!8EJ!!!h!60eX!!L!!!!zr!!!!qmeMi!!M!!A!!B!!5!!E!!J!!xT6!!JP!!!!!C!v!Qrq4u!C!L!D!o!!H!!l!!W!!V!!!!!!!!!!", |
---|
2224 | HELIX_STRUCT); |
---|
2225 | } |
---|
2226 | |
---|
2227 | GB_close(gb_main); |
---|
2228 | TEST_EXPECT_NO_ERROR(error.deliver()); |
---|
2229 | } |
---|
2230 | |
---|
2231 | #endif // UNIT_TESTS |
---|
2232 | |
---|