1 | /* ============================================================ */ |
---|
2 | /* */ |
---|
3 | /* File : adcolumns.c */ |
---|
4 | /* Purpose : insert/delete columns */ |
---|
5 | /* */ |
---|
6 | /* Institute of Microbiology (Technical University Munich) */ |
---|
7 | /* www.arb-home.de */ |
---|
8 | /* */ |
---|
9 | /* ============================================================ */ |
---|
10 | |
---|
11 | #include <stdlib.h> |
---|
12 | #include <string.h> |
---|
13 | |
---|
14 | #include <adlocal.h> |
---|
15 | #include <arbdbt.h> |
---|
16 | #include <adGene.h> |
---|
17 | |
---|
18 | |
---|
19 | /* ----------------------- */ |
---|
20 | /* insert/delete */ |
---|
21 | |
---|
22 | static char *insDelBuffer = 0; |
---|
23 | static size_t insDelBuffer_size; |
---|
24 | |
---|
25 | static void free_insDelBuffer() { |
---|
26 | freeset(insDelBuffer, 0); |
---|
27 | } |
---|
28 | |
---|
29 | static const char *gbt_insert_delete(const char *source, long srclen, long destlen, long *newlenPtr, long pos, long nchar, long mod, char insert_what, char insert_tail, int extraByte) { |
---|
30 | /* removes elems from or inserts elems into an array |
---|
31 | * |
---|
32 | * srclen len of source |
---|
33 | * destlen if != 0, then cut or append characters to get this len, otherwise keep srclen |
---|
34 | * newlenPtr the resulting len |
---|
35 | * pos where to insert/delete |
---|
36 | * nchar and how many items |
---|
37 | * mod size of an item |
---|
38 | * insert_what insert this character (mod times) |
---|
39 | * insert_tail append this character (if destlen>srclen) |
---|
40 | * extraByte 0 or 1. append extra zero byte at end? use 1 for strings! |
---|
41 | * |
---|
42 | * resulting array has destlen+nchar elements |
---|
43 | * |
---|
44 | * 1. array size is corrected to 'destlen' (by appending/cutting tail) |
---|
45 | * 2. part is deleted inserted |
---|
46 | */ |
---|
47 | |
---|
48 | const char *result; |
---|
49 | |
---|
50 | pos *= mod; |
---|
51 | nchar *= mod; |
---|
52 | srclen *= mod; |
---|
53 | destlen *= mod; |
---|
54 | |
---|
55 | if (!destlen) destlen = srclen; /* if no destlen is set then keep srclen */ |
---|
56 | if ((nchar<0) && (pos-nchar>destlen)) nchar = pos-destlen; /* clip maximum characters to delete at end of array */ |
---|
57 | |
---|
58 | if (destlen == srclen && (pos>srclen || nchar == 0)) { /* length stays same and clip-range is empty or behind end of sequence */ |
---|
59 | /* before 26.2.09 the complete data was copied in this case - but nevertheless NULL(=failure) was returned. |
---|
60 | * I guess this was some test accessing complete data w/o writing anything back to DB, |
---|
61 | * but AFAIK it was not used anywhere --ralf |
---|
62 | */ |
---|
63 | result = NULL; |
---|
64 | } |
---|
65 | else { |
---|
66 | long newlen = destlen+nchar; /* length of result (w/o trailing zero-byte) */ |
---|
67 | if (newlen == 0) { |
---|
68 | result = ""; |
---|
69 | } |
---|
70 | else { |
---|
71 | size_t neededSpace = newlen+extraByte; |
---|
72 | |
---|
73 | if (insDelBuffer && insDelBuffer_size<neededSpace) freeset(insDelBuffer, 0); |
---|
74 | if (!insDelBuffer) { |
---|
75 | insDelBuffer_size = neededSpace; |
---|
76 | insDelBuffer = (char*)malloc(neededSpace); |
---|
77 | } |
---|
78 | |
---|
79 | char *dest = insDelBuffer; |
---|
80 | gb_assert(dest); |
---|
81 | |
---|
82 | if (pos>srclen) { /* insert/delete happens inside appended range */ |
---|
83 | insert_what = insert_tail; |
---|
84 | pos = srclen; /* insert/delete directly after source, to avoid illegal access below */ |
---|
85 | } |
---|
86 | |
---|
87 | gb_assert(pos >= 0); |
---|
88 | if (pos>0) { /* copy part left of pos */ |
---|
89 | memcpy(dest, source, (size_t)pos); |
---|
90 | dest += pos; |
---|
91 | source += pos; srclen -= pos; |
---|
92 | } |
---|
93 | |
---|
94 | if (nchar>0) { /* insert */ |
---|
95 | memset(dest, insert_what, (size_t)nchar); |
---|
96 | dest += nchar; |
---|
97 | } |
---|
98 | else if (nchar<0) { /* delete */ |
---|
99 | source += -nchar; srclen -= -nchar; |
---|
100 | } |
---|
101 | |
---|
102 | if (srclen>0) { /* copy rest of source */ |
---|
103 | memcpy(dest, source, (size_t)srclen); |
---|
104 | dest += srclen; |
---|
105 | source += srclen; srclen = 0; |
---|
106 | } |
---|
107 | |
---|
108 | long rest = newlen-(dest-insDelBuffer); |
---|
109 | gb_assert(rest >= 0); |
---|
110 | |
---|
111 | if (rest>0) { /* append tail */ |
---|
112 | memset(dest, insert_tail, rest); |
---|
113 | dest += rest; |
---|
114 | } |
---|
115 | |
---|
116 | if (extraByte) dest[0] = 0; /* append zero byte (used for strings) */ |
---|
117 | |
---|
118 | result = insDelBuffer; |
---|
119 | } |
---|
120 | *newlenPtr = newlen/mod; /* report result length */ |
---|
121 | } |
---|
122 | return result; |
---|
123 | } |
---|
124 | |
---|
125 | enum insDelTarget { |
---|
126 | IDT_SPECIES = 0, |
---|
127 | IDT_SAI, |
---|
128 | IDT_SECSTRUCT, |
---|
129 | }; |
---|
130 | |
---|
131 | static GB_CSTR targetType[] = { |
---|
132 | "Species", |
---|
133 | "SAI", |
---|
134 | "SeceditStruct", |
---|
135 | }; |
---|
136 | |
---|
137 | static GB_BOOL insdel_shall_be_applied_to(GBDATA *gb_data, enum insDelTarget target) { |
---|
138 | GB_BOOL apply = GB_TRUE; |
---|
139 | const char *key = GB_read_key_pntr(gb_data); |
---|
140 | |
---|
141 | if (key[0] == '_') { // dont apply to keys starting with '_' |
---|
142 | switch (target) { |
---|
143 | case IDT_SECSTRUCT: |
---|
144 | case IDT_SPECIES: |
---|
145 | apply = GB_FALSE; |
---|
146 | break; |
---|
147 | |
---|
148 | case IDT_SAI: |
---|
149 | if (strcmp(key, "_REF") != 0) { // despite key is _REF |
---|
150 | apply = GB_FALSE; |
---|
151 | } |
---|
152 | break; |
---|
153 | } |
---|
154 | } |
---|
155 | |
---|
156 | return apply; |
---|
157 | } |
---|
158 | |
---|
159 | struct insDel_params { |
---|
160 | char *ali_name; // name of alignment |
---|
161 | long ali_len; // wanted length of alignment |
---|
162 | long pos; // start position of insert/delete |
---|
163 | long nchar; // number of elements to insert/delete |
---|
164 | const char *delete_chars; // characters allowed to delete (array with 256 entries, value == 0 means deletion allowed) |
---|
165 | }; |
---|
166 | |
---|
167 | |
---|
168 | |
---|
169 | static GB_ERROR gbt_insert_character_gbd(GBDATA *gb_data, enum insDelTarget target, const struct insDel_params *params) { |
---|
170 | GB_ERROR error = 0; |
---|
171 | GB_TYPES type = GB_read_type(gb_data); |
---|
172 | |
---|
173 | if (type == GB_DB) { |
---|
174 | GBDATA *gb_child; |
---|
175 | for (gb_child = GB_child(gb_data); gb_child && !error; gb_child = GB_nextChild(gb_child)) { |
---|
176 | error = gbt_insert_character_gbd(gb_child, target, params); |
---|
177 | } |
---|
178 | } |
---|
179 | else { |
---|
180 | ad_assert(params->pos >= 0); |
---|
181 | if (type >= GB_BITS && type != GB_LINK) { |
---|
182 | long size = GB_read_count(gb_data); |
---|
183 | |
---|
184 | if (params->ali_len != size || params->nchar != 0) { /* nothing would change */ |
---|
185 | if (insdel_shall_be_applied_to(gb_data, target)) { |
---|
186 | GB_CSTR source = 0; |
---|
187 | long mod = sizeof(char); |
---|
188 | char insert_what = 0; |
---|
189 | char insert_tail = 0; |
---|
190 | char extraByte = 0; |
---|
191 | long pos = params->pos; |
---|
192 | long nchar = params->nchar; |
---|
193 | |
---|
194 | switch (type) { |
---|
195 | case GB_STRING: { |
---|
196 | source = GB_read_char_pntr(gb_data); |
---|
197 | extraByte = 1; |
---|
198 | insert_what = '-'; |
---|
199 | insert_tail = '.'; |
---|
200 | |
---|
201 | if (source) { |
---|
202 | if (nchar > 0) { /* insert */ |
---|
203 | if (pos<size) { /* otherwise insert pos is behind (old and too short) sequence -> dots are inserted at tail */ |
---|
204 | if ((pos>0 && source[pos-1] == '.') || source[pos] == '.') { /* dot at insert position? */ |
---|
205 | insert_what = '.'; /* insert dots */ |
---|
206 | } |
---|
207 | } |
---|
208 | } |
---|
209 | else { /* delete */ |
---|
210 | long after = pos+(-nchar); /* position after deleted part */ |
---|
211 | long p; |
---|
212 | GB_CSTR delete_chars = params->delete_chars; |
---|
213 | |
---|
214 | if (after>size) after = size; |
---|
215 | for (p = pos; p<after; p++){ |
---|
216 | if (delete_chars[((const unsigned char *)source)[p]]) { |
---|
217 | error = GBS_global_string("You tried to delete '%c' at position %li -> Operation aborted", source[p], p); |
---|
218 | } |
---|
219 | } |
---|
220 | } |
---|
221 | } |
---|
222 | |
---|
223 | break; |
---|
224 | } |
---|
225 | case GB_BITS: source = GB_read_bits_pntr(gb_data, '-', '+'); insert_what = '-'; insert_tail = '-'; break; |
---|
226 | case GB_BYTES: source = GB_read_bytes_pntr(gb_data); break; |
---|
227 | case GB_INTS: source = (GB_CSTR)GB_read_ints_pntr(gb_data); mod = sizeof(GB_UINT4); break; |
---|
228 | case GB_FLOATS: source = (GB_CSTR)GB_read_floats_pntr(gb_data); mod = sizeof(float); break; |
---|
229 | |
---|
230 | default : |
---|
231 | error = GBS_global_string("Unhandled type '%i'", type); |
---|
232 | GB_internal_error(error); |
---|
233 | break; |
---|
234 | } |
---|
235 | |
---|
236 | if (!error) { |
---|
237 | if (!source) error = GB_await_error(); |
---|
238 | else { |
---|
239 | long modified_len; |
---|
240 | GB_CSTR modified = gbt_insert_delete(source, size, params->ali_len, &modified_len, pos, nchar, mod, insert_what, insert_tail, extraByte); |
---|
241 | |
---|
242 | if (modified) { |
---|
243 | gb_assert(modified_len == (params->ali_len+params->nchar)); |
---|
244 | |
---|
245 | switch (type) { |
---|
246 | case GB_STRING: error = GB_write_string(gb_data, modified); break; |
---|
247 | case GB_BITS: error = GB_write_bits (gb_data, modified, modified_len, "-"); break; |
---|
248 | case GB_BYTES: error = GB_write_bytes (gb_data, modified, modified_len); break; |
---|
249 | case GB_INTS: error = GB_write_ints (gb_data, (GB_UINT4*)modified, modified_len); break; |
---|
250 | case GB_FLOATS: error = GB_write_floats(gb_data, (float*)modified, modified_len); break; |
---|
251 | |
---|
252 | default: gb_assert(0); break; |
---|
253 | } |
---|
254 | } |
---|
255 | } |
---|
256 | } |
---|
257 | } |
---|
258 | } |
---|
259 | } |
---|
260 | } |
---|
261 | |
---|
262 | return error; |
---|
263 | } |
---|
264 | |
---|
265 | static GB_ERROR gbt_insert_character_item(GBDATA *gb_item, enum insDelTarget item_type, const struct insDel_params *params) { |
---|
266 | GB_ERROR error = 0; |
---|
267 | GBDATA *gb_ali = GB_entry(gb_item, params->ali_name); |
---|
268 | |
---|
269 | if (gb_ali) { |
---|
270 | error = gbt_insert_character_gbd(gb_ali, item_type, params); |
---|
271 | if (error) { |
---|
272 | const char *item_name = GBT_read_name(gb_item); |
---|
273 | error = GBS_global_string("%s '%s': %s", targetType[item_type], item_name, error); |
---|
274 | } |
---|
275 | } |
---|
276 | |
---|
277 | return error; |
---|
278 | } |
---|
279 | |
---|
280 | static GB_ERROR gbt_insert_character(GBDATA *gb_item_data, const char *item_field, enum insDelTarget item_type, const struct insDel_params *params) { |
---|
281 | GBDATA *gb_item; |
---|
282 | GB_ERROR error = 0; |
---|
283 | long item_count = GB_number_of_subentries(gb_item_data); |
---|
284 | long count = 0; |
---|
285 | |
---|
286 | for (gb_item = GB_entry(gb_item_data, item_field); |
---|
287 | gb_item && !error; |
---|
288 | gb_item = GB_nextEntry(gb_item)) |
---|
289 | { |
---|
290 | error = gbt_insert_character_item(gb_item, item_type, params); |
---|
291 | count++; |
---|
292 | GB_status((double)count/item_count); |
---|
293 | } |
---|
294 | return error; |
---|
295 | } |
---|
296 | |
---|
297 | static GB_ERROR gbt_insert_character_secstructs(GBDATA *gb_secstructs, const struct insDel_params *params) { |
---|
298 | GB_ERROR error = 0; |
---|
299 | GBDATA *gb_ali = GB_entry(gb_secstructs, params->ali_name); |
---|
300 | if (gb_ali) { |
---|
301 | long item_count = GB_number_of_subentries(gb_ali)-1; |
---|
302 | long count = 0; |
---|
303 | GBDATA *gb_item; |
---|
304 | |
---|
305 | if (item_count<1) item_count = 1; |
---|
306 | |
---|
307 | for (gb_item = GB_entry(gb_ali, "struct"); |
---|
308 | gb_item && !error; |
---|
309 | gb_item = GB_nextEntry(gb_item)) |
---|
310 | { |
---|
311 | GBDATA *gb_ref = GB_entry(gb_item, "ref"); |
---|
312 | if (gb_ref) { |
---|
313 | error = gbt_insert_character_gbd(gb_ref, IDT_SECSTRUCT, params); |
---|
314 | if (error) { |
---|
315 | const char *item_name = GBT_read_name(gb_item); |
---|
316 | error = GBS_global_string("%s '%s': %s", targetType[IDT_SECSTRUCT], item_name, error); |
---|
317 | } |
---|
318 | } |
---|
319 | count++; |
---|
320 | GB_status((double)count/item_count); |
---|
321 | } |
---|
322 | } |
---|
323 | return error; |
---|
324 | } |
---|
325 | |
---|
326 | static GB_ERROR GBT_check_lengths(GBDATA *Main, const char *alignment_name) { |
---|
327 | GB_ERROR error = 0; |
---|
328 | GBDATA *gb_presets = GBT_find_or_create(Main,"presets",7); |
---|
329 | GBDATA *gb_species_data = GBT_find_or_create(Main,"species_data",7); |
---|
330 | GBDATA *gb_extended_data = GBT_find_or_create(Main,"extended_data",7); |
---|
331 | GBDATA *gb_secstructs = GB_search(Main,"secedit/structs", GB_CREATE_CONTAINER); |
---|
332 | GBDATA *gb_ali; |
---|
333 | |
---|
334 | struct insDel_params params = { 0, 0, 0, 0, 0 }; |
---|
335 | |
---|
336 | for (gb_ali = GB_entry(gb_presets,"alignment"); |
---|
337 | gb_ali && !error; |
---|
338 | gb_ali = GB_nextEntry(gb_ali)) |
---|
339 | { |
---|
340 | GBDATA *gb_name = GB_find_string(gb_ali,"alignment_name",alignment_name,GB_IGNORE_CASE,down_level); |
---|
341 | |
---|
342 | if (gb_name) { |
---|
343 | GBDATA *gb_len = GB_entry(gb_ali,"alignment_len"); |
---|
344 | |
---|
345 | params.ali_name = GB_read_string(gb_name); |
---|
346 | params.ali_len = GB_read_int(gb_len); |
---|
347 | |
---|
348 | error = gbt_insert_character(gb_extended_data, "extended", IDT_SAI, ¶ms); |
---|
349 | if (!error) error = gbt_insert_character(gb_species_data, "species", IDT_SPECIES, ¶ms); |
---|
350 | if (!error) error = gbt_insert_character_secstructs(gb_secstructs, ¶ms); |
---|
351 | |
---|
352 | freeset(params.ali_name, 0); |
---|
353 | } |
---|
354 | } |
---|
355 | free_insDelBuffer(); |
---|
356 | return error; |
---|
357 | } |
---|
358 | |
---|
359 | GB_ERROR GBT_format_alignment(GBDATA *Main, const char *alignment_name) { |
---|
360 | GB_ERROR err = 0; |
---|
361 | |
---|
362 | if (strcmp(alignment_name, GENOM_ALIGNMENT) != 0) { // NEVER EVER format 'ali_genom' |
---|
363 | err = GBT_check_data(Main, alignment_name); // detect max. length |
---|
364 | if (!err) err = GBT_check_lengths(Main, alignment_name); // format sequences in alignment |
---|
365 | if (!err) err = GBT_check_data(Main, alignment_name); // sets state to "formatted" |
---|
366 | } |
---|
367 | else { |
---|
368 | err = "It's forbidden to format '" GENOM_ALIGNMENT "'!"; |
---|
369 | } |
---|
370 | return err; |
---|
371 | } |
---|
372 | |
---|
373 | |
---|
374 | GB_ERROR GBT_insert_character(GBDATA *Main, char *alignment_name, long pos, long count, char *char_delete) |
---|
375 | { |
---|
376 | /* if count > 0 insert 'count' characters at pos |
---|
377 | * if count < 0 delete pos to pos+|count| |
---|
378 | * |
---|
379 | * Note: deleting is only performed, if found characters in deleted range are listed in 'char_delete' |
---|
380 | * otherwise function returns with error |
---|
381 | * |
---|
382 | * This affects all species' and SAIs having data in given 'alignment_name' and |
---|
383 | * modifies several data entries found there |
---|
384 | * (see insdel_shall_be_applied_to for details which fields are affected). |
---|
385 | */ |
---|
386 | |
---|
387 | GB_ERROR error = 0; |
---|
388 | |
---|
389 | if (pos<0) { |
---|
390 | error = GB_export_error("Illegal sequence position"); |
---|
391 | } |
---|
392 | else { |
---|
393 | GBDATA *gb_ali; |
---|
394 | GBDATA *gb_presets = GBT_find_or_create(Main,"presets",7); |
---|
395 | GBDATA *gb_species_data = GBT_find_or_create(Main,"species_data",7); |
---|
396 | GBDATA *gb_extended_data = GBT_find_or_create(Main,"extended_data",7); |
---|
397 | GBDATA *gb_secstructs = GB_search(Main,"secedit/structs", GB_CREATE_CONTAINER); |
---|
398 | char char_delete_list[256]; |
---|
399 | |
---|
400 | if (strchr(char_delete,'%') ) { |
---|
401 | memset(char_delete_list,0,256); |
---|
402 | } |
---|
403 | else { |
---|
404 | int ch; |
---|
405 | for (ch = 0;ch<256; ch++) { |
---|
406 | if (char_delete) { |
---|
407 | if (strchr(char_delete,ch)) char_delete_list[ch] = 0; |
---|
408 | else char_delete_list[ch] = 1; |
---|
409 | } |
---|
410 | else { |
---|
411 | char_delete_list[ch] = 0; |
---|
412 | } |
---|
413 | } |
---|
414 | } |
---|
415 | |
---|
416 | for (gb_ali = GB_entry(gb_presets, "alignment"); |
---|
417 | gb_ali && !error; |
---|
418 | gb_ali = GB_nextEntry(gb_ali)) |
---|
419 | { |
---|
420 | GBDATA *gb_name = GB_find_string(gb_ali, "alignment_name", alignment_name, GB_IGNORE_CASE, down_level); |
---|
421 | |
---|
422 | if (gb_name) { |
---|
423 | GBDATA *gb_len = GB_entry(gb_ali, "alignment_len"); |
---|
424 | long len = GB_read_int(gb_len); |
---|
425 | char *use = GB_read_string(gb_name); |
---|
426 | |
---|
427 | if (pos > len) { |
---|
428 | error = GBS_global_string("Can't insert at position %li (exceeds length %li of alignment '%s')", pos, len, use); |
---|
429 | } |
---|
430 | else { |
---|
431 | if (count < 0 && pos-count > len) count = pos - len; |
---|
432 | error = GB_write_int(gb_len, len + count); |
---|
433 | } |
---|
434 | |
---|
435 | if (!error) { |
---|
436 | struct insDel_params params = { use, len, pos, count, char_delete_list }; |
---|
437 | |
---|
438 | error = gbt_insert_character(gb_species_data, "species", IDT_SPECIES, ¶ms); |
---|
439 | if (!error) error = gbt_insert_character(gb_extended_data, "extended", IDT_SAI, ¶ms); |
---|
440 | if (!error) error = gbt_insert_character_secstructs(gb_secstructs, ¶ms); |
---|
441 | } |
---|
442 | free(use); |
---|
443 | } |
---|
444 | } |
---|
445 | |
---|
446 | free_insDelBuffer(); |
---|
447 | |
---|
448 | if (!error) GB_disable_quicksave(Main,"a lot of sequences changed"); |
---|
449 | } |
---|
450 | return error; |
---|
451 | } |
---|