1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : adcolumns.cxx // |
---|
4 | // Purpose : insert/delete columns // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include <arbdbt.h> |
---|
12 | #include <adGene.h> |
---|
13 | #include <arb_progress.h> |
---|
14 | |
---|
15 | #include "gb_local.h" |
---|
16 | |
---|
17 | static char *insDelBuffer = 0; |
---|
18 | static size_t insDelBuffer_size; |
---|
19 | |
---|
20 | inline void free_insDelBuffer() { |
---|
21 | freenull(insDelBuffer); |
---|
22 | } |
---|
23 | |
---|
24 | static const char *gbt_insert_delete(const char *source, long srclen, long destlen, long *newlenPtr, long pos, long nchar, long mod, char insert_what, char insert_tail, int extraByte) { |
---|
25 | /* removes elems from or inserts elems into an array |
---|
26 | * |
---|
27 | * srclen len of source |
---|
28 | * destlen if != 0, then cut or append characters to get this len, otherwise keep srclen |
---|
29 | * newlenPtr the resulting len |
---|
30 | * pos where to insert/delete |
---|
31 | * nchar and how many items |
---|
32 | * mod size of an item |
---|
33 | * insert_what insert this character (mod times) |
---|
34 | * insert_tail append this character (if destlen>srclen) |
---|
35 | * extraByte 0 or 1. append extra zero byte at end? use 1 for strings! |
---|
36 | * |
---|
37 | * resulting array has destlen+nchar elements |
---|
38 | * |
---|
39 | * 1. array size is corrected to 'destlen' (by appending/cutting tail) |
---|
40 | * 2. part is deleted inserted |
---|
41 | */ |
---|
42 | |
---|
43 | const char *result; |
---|
44 | |
---|
45 | pos *= mod; |
---|
46 | nchar *= mod; |
---|
47 | srclen *= mod; |
---|
48 | destlen *= mod; |
---|
49 | |
---|
50 | if (!destlen) destlen = srclen; // if no destlen is set then keep srclen |
---|
51 | if ((nchar<0) && (pos-nchar>destlen)) nchar = pos-destlen; // clip maximum characters to delete at end of array |
---|
52 | |
---|
53 | if (destlen == srclen && (pos>srclen || nchar == 0)) { // length stays same and clip-range is empty or behind end of sequence |
---|
54 | /* before 26.2.09 the complete data was copied in this case - but nevertheless NULL(=failure) was returned. |
---|
55 | * I guess this was some test accessing complete data w/o writing anything back to DB, |
---|
56 | * but AFAIK it was not used anywhere --ralf |
---|
57 | */ |
---|
58 | result = NULL; |
---|
59 | } |
---|
60 | else { |
---|
61 | long newlen = destlen+nchar; // length of result (w/o trailing zero-byte) |
---|
62 | if (newlen == 0) { |
---|
63 | result = ""; |
---|
64 | } |
---|
65 | else { |
---|
66 | size_t neededSpace = newlen+extraByte; |
---|
67 | |
---|
68 | if (insDelBuffer && insDelBuffer_size<neededSpace) freenull(insDelBuffer); |
---|
69 | if (!insDelBuffer) { |
---|
70 | insDelBuffer_size = neededSpace; |
---|
71 | insDelBuffer = (char*)malloc(neededSpace); |
---|
72 | } |
---|
73 | |
---|
74 | char *dest = insDelBuffer; |
---|
75 | gb_assert(dest); |
---|
76 | |
---|
77 | if (pos>srclen) { // insert/delete happens inside appended range |
---|
78 | insert_what = insert_tail; |
---|
79 | pos = srclen; // insert/delete directly after source, to avoid illegal access below |
---|
80 | } |
---|
81 | |
---|
82 | gb_assert(pos >= 0); |
---|
83 | if (pos>0) { // copy part left of pos |
---|
84 | memcpy(dest, source, (size_t)pos); |
---|
85 | dest += pos; |
---|
86 | source += pos; srclen -= pos; |
---|
87 | } |
---|
88 | |
---|
89 | if (nchar>0) { // insert |
---|
90 | memset(dest, insert_what, (size_t)nchar); |
---|
91 | dest += nchar; |
---|
92 | } |
---|
93 | else if (nchar<0) { // delete |
---|
94 | source += -nchar; srclen -= -nchar; |
---|
95 | } |
---|
96 | |
---|
97 | if (srclen>0) { // copy rest of source |
---|
98 | memcpy(dest, source, (size_t)srclen); |
---|
99 | dest += srclen; |
---|
100 | source += srclen; srclen = 0; |
---|
101 | } |
---|
102 | |
---|
103 | long rest = newlen-(dest-insDelBuffer); |
---|
104 | gb_assert(rest >= 0); |
---|
105 | |
---|
106 | if (rest>0) { // append tail |
---|
107 | memset(dest, insert_tail, rest); |
---|
108 | dest += rest; |
---|
109 | } |
---|
110 | |
---|
111 | if (extraByte) dest[0] = 0; // append zero byte (used for strings) |
---|
112 | |
---|
113 | result = insDelBuffer; |
---|
114 | } |
---|
115 | *newlenPtr = newlen/mod; // report result length |
---|
116 | } |
---|
117 | return result; |
---|
118 | } |
---|
119 | |
---|
120 | enum insDelTarget { |
---|
121 | IDT_SPECIES = 0, |
---|
122 | IDT_SAI, |
---|
123 | IDT_SECSTRUCT, |
---|
124 | }; |
---|
125 | |
---|
126 | static GB_CSTR targetType[] = { |
---|
127 | "Species", |
---|
128 | "SAI", |
---|
129 | "SeceditStruct", |
---|
130 | }; |
---|
131 | |
---|
132 | static bool insdel_shall_be_applied_to(GBDATA *gb_data, enum insDelTarget target) { |
---|
133 | bool apply = true; |
---|
134 | const char *key = GB_read_key_pntr(gb_data); |
---|
135 | |
---|
136 | if (key[0] == '_') { // don't apply to keys starting with '_' |
---|
137 | switch (target) { |
---|
138 | case IDT_SECSTRUCT: |
---|
139 | case IDT_SPECIES: |
---|
140 | apply = false; |
---|
141 | break; |
---|
142 | |
---|
143 | case IDT_SAI: |
---|
144 | if (strcmp(key, "_REF") != 0) { // despite key is _REF |
---|
145 | apply = false; |
---|
146 | } |
---|
147 | break; |
---|
148 | } |
---|
149 | } |
---|
150 | |
---|
151 | return apply; |
---|
152 | } |
---|
153 | |
---|
154 | struct insDel_params { |
---|
155 | char *ali_name; // name of alignment |
---|
156 | long ali_len; // wanted length of alignment |
---|
157 | long pos; // start position of insert/delete |
---|
158 | long nchar; // number of elements to insert/delete |
---|
159 | const char *delete_chars; // characters allowed to delete (array with 256 entries, value == 0 means deletion allowed) |
---|
160 | }; |
---|
161 | |
---|
162 | |
---|
163 | |
---|
164 | static GB_ERROR gbt_insert_character_gbd(GBDATA *gb_data, enum insDelTarget target, const insDel_params *params) { |
---|
165 | GB_ERROR error = 0; |
---|
166 | GB_TYPES type = GB_read_type(gb_data); |
---|
167 | |
---|
168 | if (type == GB_DB) { |
---|
169 | GBDATA *gb_child; |
---|
170 | for (gb_child = GB_child(gb_data); gb_child && !error; gb_child = GB_nextChild(gb_child)) { |
---|
171 | error = gbt_insert_character_gbd(gb_child, target, params); |
---|
172 | } |
---|
173 | } |
---|
174 | else { |
---|
175 | gb_assert(params->pos >= 0); |
---|
176 | if (type >= GB_BITS && type != GB_LINK) { |
---|
177 | long size = GB_read_count(gb_data); |
---|
178 | |
---|
179 | if (params->ali_len != size || params->nchar != 0) { // nothing would change |
---|
180 | if (insdel_shall_be_applied_to(gb_data, target)) { |
---|
181 | GB_CSTR source = 0; |
---|
182 | long mod = sizeof(char); |
---|
183 | char insert_what = 0; |
---|
184 | char insert_tail = 0; |
---|
185 | char extraByte = 0; |
---|
186 | long pos = params->pos; |
---|
187 | long nchar = params->nchar; |
---|
188 | |
---|
189 | switch (type) { |
---|
190 | case GB_STRING: { |
---|
191 | source = GB_read_char_pntr(gb_data); |
---|
192 | extraByte = 1; |
---|
193 | insert_what = '-'; |
---|
194 | insert_tail = '.'; |
---|
195 | |
---|
196 | if (source) { |
---|
197 | if (nchar > 0) { // insert |
---|
198 | if (pos<size) { // otherwise insert pos is behind (old and too short) sequence -> dots are inserted at tail |
---|
199 | if ((pos>0 && source[pos-1] == '.') || source[pos] == '.') { // dot at insert position? |
---|
200 | insert_what = '.'; // insert dots |
---|
201 | } |
---|
202 | } |
---|
203 | } |
---|
204 | else { // delete |
---|
205 | long after = pos+(-nchar); // position after deleted part |
---|
206 | long p; |
---|
207 | GB_CSTR delete_chars = params->delete_chars; |
---|
208 | |
---|
209 | if (after>size) after = size; |
---|
210 | for (p = pos; p<after; p++) { |
---|
211 | if (delete_chars[((const unsigned char *)source)[p]]) { |
---|
212 | error = GBS_global_string("You tried to delete '%c' at position %li -> Operation aborted", source[p], p); |
---|
213 | } |
---|
214 | } |
---|
215 | } |
---|
216 | } |
---|
217 | |
---|
218 | break; |
---|
219 | } |
---|
220 | case GB_BITS: source = GB_read_bits_pntr(gb_data, '-', '+'); insert_what = '-'; insert_tail = '-'; break; |
---|
221 | case GB_BYTES: source = GB_read_bytes_pntr(gb_data); break; |
---|
222 | case GB_INTS: source = (GB_CSTR)GB_read_ints_pntr(gb_data); mod = sizeof(GB_UINT4); break; |
---|
223 | case GB_FLOATS: source = (GB_CSTR)GB_read_floats_pntr(gb_data); mod = sizeof(float); break; |
---|
224 | |
---|
225 | default: |
---|
226 | error = GBS_global_string("Unhandled type '%i'", type); |
---|
227 | GB_internal_error(error); |
---|
228 | break; |
---|
229 | } |
---|
230 | |
---|
231 | if (!error) { |
---|
232 | if (!source) error = GB_await_error(); |
---|
233 | else { |
---|
234 | long modified_len; |
---|
235 | GB_CSTR modified = gbt_insert_delete(source, size, params->ali_len, &modified_len, pos, nchar, mod, insert_what, insert_tail, extraByte); |
---|
236 | |
---|
237 | if (modified) { |
---|
238 | gb_assert(modified_len == (params->ali_len+params->nchar)); |
---|
239 | |
---|
240 | switch (type) { |
---|
241 | case GB_STRING: error = GB_write_string(gb_data, modified); break; |
---|
242 | case GB_BITS: error = GB_write_bits (gb_data, modified, modified_len, "-"); break; |
---|
243 | case GB_BYTES: error = GB_write_bytes (gb_data, modified, modified_len); break; |
---|
244 | case GB_INTS: error = GB_write_ints (gb_data, (GB_UINT4*)modified, modified_len); break; |
---|
245 | case GB_FLOATS: error = GB_write_floats(gb_data, (float*)modified, modified_len); break; |
---|
246 | |
---|
247 | default: gb_assert(0); break; |
---|
248 | } |
---|
249 | } |
---|
250 | } |
---|
251 | } |
---|
252 | } |
---|
253 | } |
---|
254 | } |
---|
255 | } |
---|
256 | |
---|
257 | return error; |
---|
258 | } |
---|
259 | |
---|
260 | static GB_ERROR gbt_insert_character_item(GBDATA *gb_item, enum insDelTarget item_type, const insDel_params *params) { |
---|
261 | GB_ERROR error = 0; |
---|
262 | GBDATA *gb_ali = GB_entry(gb_item, params->ali_name); |
---|
263 | |
---|
264 | if (gb_ali) { |
---|
265 | error = gbt_insert_character_gbd(gb_ali, item_type, params); |
---|
266 | if (error) { |
---|
267 | const char *item_name = GBT_read_name(gb_item); |
---|
268 | error = GBS_global_string("%s '%s': %s", targetType[item_type], item_name, error); |
---|
269 | } |
---|
270 | } |
---|
271 | |
---|
272 | return error; |
---|
273 | } |
---|
274 | |
---|
275 | static GB_ERROR gbt_insert_character(GBDATA *gb_item_data, const char *item_field, enum insDelTarget item_type, const insDel_params *params) { |
---|
276 | GBDATA *gb_item; |
---|
277 | GB_ERROR error = 0; |
---|
278 | long item_count = GB_number_of_subentries(gb_item_data); |
---|
279 | arb_progress progress(item_field, item_count); |
---|
280 | |
---|
281 | for (gb_item = GB_entry(gb_item_data, item_field); |
---|
282 | gb_item && !error; |
---|
283 | gb_item = GB_nextEntry(gb_item)) |
---|
284 | { |
---|
285 | error = gbt_insert_character_item(gb_item, item_type, params); |
---|
286 | progress.inc_and_check_user_abort(error); |
---|
287 | } |
---|
288 | return error; |
---|
289 | } |
---|
290 | |
---|
291 | static GB_ERROR gbt_insert_character_secstructs(GBDATA *gb_secstructs, const insDel_params *params) { |
---|
292 | GB_ERROR error = 0; |
---|
293 | GBDATA *gb_ali = GB_entry(gb_secstructs, params->ali_name); |
---|
294 | |
---|
295 | if (gb_ali) { |
---|
296 | long item_count = GB_number_of_subentries(gb_ali)-1; |
---|
297 | |
---|
298 | if (item_count<1) item_count = 1; |
---|
299 | arb_progress progress("secstructs", item_count); |
---|
300 | |
---|
301 | GBDATA *gb_item; |
---|
302 | for (gb_item = GB_entry(gb_ali, "struct"); |
---|
303 | gb_item && !error; |
---|
304 | gb_item = GB_nextEntry(gb_item)) |
---|
305 | { |
---|
306 | GBDATA *gb_ref = GB_entry(gb_item, "ref"); |
---|
307 | if (gb_ref) { |
---|
308 | error = gbt_insert_character_gbd(gb_ref, IDT_SECSTRUCT, params); |
---|
309 | if (error) { |
---|
310 | const char *item_name = GBT_read_name(gb_item); |
---|
311 | error = GBS_global_string("%s '%s': %s", targetType[IDT_SECSTRUCT], item_name, error); |
---|
312 | } |
---|
313 | } |
---|
314 | progress.inc_and_check_user_abort(error); |
---|
315 | } |
---|
316 | } |
---|
317 | return error; |
---|
318 | } |
---|
319 | |
---|
320 | static GB_ERROR GBT_check_lengths(GBDATA *Main, const char *alignment_name) { |
---|
321 | GB_ERROR error = 0; |
---|
322 | GBDATA *gb_presets = GBT_find_or_create(Main, "presets", 7); |
---|
323 | GBDATA *gb_species_data = GBT_find_or_create(Main, "species_data", 7); |
---|
324 | GBDATA *gb_extended_data = GBT_find_or_create(Main, "extended_data", 7); |
---|
325 | GBDATA *gb_secstructs = GB_search(Main, "secedit/structs", GB_CREATE_CONTAINER); |
---|
326 | GBDATA *gb_ali; |
---|
327 | |
---|
328 | insDel_params params = { 0, 0, 0, 0, 0 }; |
---|
329 | |
---|
330 | for (gb_ali = GB_entry(gb_presets, "alignment"); |
---|
331 | gb_ali && !error; |
---|
332 | gb_ali = GB_nextEntry(gb_ali)) |
---|
333 | { |
---|
334 | GBDATA *gb_name = GB_find_string(gb_ali, "alignment_name", alignment_name, GB_IGNORE_CASE, SEARCH_CHILD); |
---|
335 | |
---|
336 | if (gb_name) { |
---|
337 | arb_progress progress("Formatting alignment", 3); // SAI, species and secstructs |
---|
338 | GBDATA *gb_len = GB_entry(gb_ali, "alignment_len"); |
---|
339 | |
---|
340 | params.ali_name = GB_read_string(gb_name); |
---|
341 | params.ali_len = GB_read_int(gb_len); |
---|
342 | |
---|
343 | error = gbt_insert_character(gb_extended_data, "extended", IDT_SAI, ¶ms); |
---|
344 | if (!error) error = gbt_insert_character(gb_species_data, "species", IDT_SPECIES, ¶ms); |
---|
345 | if (!error) error = gbt_insert_character_secstructs(gb_secstructs, ¶ms); |
---|
346 | |
---|
347 | freenull(params.ali_name); |
---|
348 | } |
---|
349 | } |
---|
350 | free_insDelBuffer(); |
---|
351 | return error; |
---|
352 | } |
---|
353 | |
---|
354 | GB_ERROR GBT_format_alignment(GBDATA *Main, const char *alignment_name) { |
---|
355 | GB_ERROR err = 0; |
---|
356 | |
---|
357 | if (strcmp(alignment_name, GENOM_ALIGNMENT) != 0) { // NEVER EVER format 'ali_genom' |
---|
358 | err = GBT_check_data(Main, alignment_name); // detect max. length |
---|
359 | if (!err) err = GBT_check_lengths(Main, alignment_name); // format sequences in alignment |
---|
360 | if (!err) err = GBT_check_data(Main, alignment_name); // sets state to "formatted" |
---|
361 | } |
---|
362 | else { |
---|
363 | err = "It's forbidden to format '" GENOM_ALIGNMENT "'!"; |
---|
364 | } |
---|
365 | return err; |
---|
366 | } |
---|
367 | |
---|
368 | |
---|
369 | GB_ERROR GBT_insert_character(GBDATA *Main, const char *alignment_name, long pos, long count, const char *char_delete) |
---|
370 | { |
---|
371 | /* if count > 0 insert 'count' characters at pos |
---|
372 | * if count < 0 delete pos to pos+|count| |
---|
373 | * |
---|
374 | * Note: deleting is only performed, if found characters in deleted range are listed in 'char_delete' |
---|
375 | * otherwise function returns with error |
---|
376 | * |
---|
377 | * This affects all species' and SAIs having data in given 'alignment_name' and |
---|
378 | * modifies several data entries found there |
---|
379 | * (see insdel_shall_be_applied_to for details which fields are affected). |
---|
380 | */ |
---|
381 | |
---|
382 | GB_ERROR error = 0; |
---|
383 | |
---|
384 | if (pos<0) { |
---|
385 | error = GB_export_error("Illegal sequence position"); |
---|
386 | } |
---|
387 | else { |
---|
388 | GBDATA *gb_ali; |
---|
389 | GBDATA *gb_presets = GBT_find_or_create(Main, "presets", 7); |
---|
390 | GBDATA *gb_species_data = GBT_find_or_create(Main, "species_data", 7); |
---|
391 | GBDATA *gb_extended_data = GBT_find_or_create(Main, "extended_data", 7); |
---|
392 | GBDATA *gb_secstructs = GB_search(Main, "secedit/structs", GB_CREATE_CONTAINER); |
---|
393 | char char_delete_list[256]; |
---|
394 | |
---|
395 | if (strchr(char_delete, '%')) { |
---|
396 | memset(char_delete_list, 0, 256); |
---|
397 | } |
---|
398 | else { |
---|
399 | int ch; |
---|
400 | for (ch = 0; ch<256; ch++) { |
---|
401 | if (char_delete) { |
---|
402 | if (strchr(char_delete, ch)) char_delete_list[ch] = 0; |
---|
403 | else char_delete_list[ch] = 1; |
---|
404 | } |
---|
405 | else { |
---|
406 | char_delete_list[ch] = 0; |
---|
407 | } |
---|
408 | } |
---|
409 | } |
---|
410 | |
---|
411 | for (gb_ali = GB_entry(gb_presets, "alignment"); |
---|
412 | gb_ali && !error; |
---|
413 | gb_ali = GB_nextEntry(gb_ali)) |
---|
414 | { |
---|
415 | GBDATA *gb_name = GB_find_string(gb_ali, "alignment_name", alignment_name, GB_IGNORE_CASE, SEARCH_CHILD); |
---|
416 | |
---|
417 | if (gb_name) { |
---|
418 | GBDATA *gb_len = GB_entry(gb_ali, "alignment_len"); |
---|
419 | long len = GB_read_int(gb_len); |
---|
420 | char *use = GB_read_string(gb_name); |
---|
421 | |
---|
422 | if (pos > len) { |
---|
423 | error = GBS_global_string("Can't insert at position %li (exceeds length %li of alignment '%s')", pos, len, use); |
---|
424 | } |
---|
425 | else { |
---|
426 | if (count < 0 && pos-count > len) count = pos - len; |
---|
427 | error = GB_write_int(gb_len, len + count); |
---|
428 | } |
---|
429 | |
---|
430 | if (!error) { |
---|
431 | insDel_params params = { use, len, pos, count, char_delete_list }; |
---|
432 | |
---|
433 | error = gbt_insert_character(gb_species_data, "species", IDT_SPECIES, ¶ms); |
---|
434 | if (!error) error = gbt_insert_character(gb_extended_data, "extended", IDT_SAI, ¶ms); |
---|
435 | if (!error) error = gbt_insert_character_secstructs(gb_secstructs, ¶ms); |
---|
436 | } |
---|
437 | free(use); |
---|
438 | } |
---|
439 | } |
---|
440 | |
---|
441 | free_insDelBuffer(); |
---|
442 | |
---|
443 | if (!error) GB_disable_quicksave(Main, "a lot of sequences changed"); |
---|
444 | } |
---|
445 | return error; |
---|
446 | } |
---|