1 | // ======================================================================================= |
---|
2 | /* */ |
---|
3 | // File : NT_concatenate.cxx |
---|
4 | // Purpose : 1.Concatenatenation of sequences or alignments |
---|
5 | // 2.Merging the fields of similar species and creating a new species |
---|
6 | // Author : Yadhu Kumar (yadhu@mikro.biologie.tu-muenchen.de) |
---|
7 | // web site : http://www.arb-home.de/ |
---|
8 | /* */ |
---|
9 | // Copyright Department of Microbiology (Technical University Munich) |
---|
10 | /* */ |
---|
11 | // ======================================================================================= |
---|
12 | |
---|
13 | #include "NT_local.h" |
---|
14 | |
---|
15 | #include <items.h> |
---|
16 | #include <item_sel_list.h> |
---|
17 | #include <awt_sel_boxes.hxx> |
---|
18 | #include <AW_rename.hxx> |
---|
19 | #include <aw_question.hxx> |
---|
20 | #include <aw_awar.hxx> |
---|
21 | #include <aw_msg.hxx> |
---|
22 | #include <aw_root.hxx> |
---|
23 | #include <arb_progress.h> |
---|
24 | #include <arb_strbuf.h> |
---|
25 | #include <arb_strarray.h> |
---|
26 | #include <awt_modules.hxx> |
---|
27 | |
---|
28 | using namespace std; |
---|
29 | |
---|
30 | #define AWAR_CON_SEQUENCE_TYPE "tmp/concat/sequence_type" |
---|
31 | #define AWAR_CON_NEW_ALIGNMENT_NAME "tmp/concat/new_alignment_name" |
---|
32 | #define AWAR_CON_ALIGNMENT_SEPARATOR "tmp/concat/alignment_separator" |
---|
33 | #define AWAR_CON_DB_ALIGNS "tmp/concat/database_alignments" |
---|
34 | #define AWAR_CON_MERGE_FIELD "tmp/concat/merge_field" |
---|
35 | #define AWAR_CON_STORE_SIM_SP_NO "tmp/concat/store_sim_sp_no" |
---|
36 | |
---|
37 | #define MERGE_SIMILAR_CONCATENATE_ALIGNMENTS 1 |
---|
38 | #define MOVE_DOWN 0 |
---|
39 | #define MOVE_UP 1 |
---|
40 | |
---|
41 | struct SpeciesConcatenateList { |
---|
42 | GBDATA *species; |
---|
43 | char *species_name; |
---|
44 | |
---|
45 | SpeciesConcatenateList *next; |
---|
46 | }; |
---|
47 | |
---|
48 | // --------------------------creating and initializing AWARS---------------------------------------- |
---|
49 | void NT_createConcatenationAwars(AW_root *aw_root, AW_default aw_def) { |
---|
50 | aw_root->awar_string(AWAR_CON_SEQUENCE_TYPE, "ami", aw_def); |
---|
51 | aw_root->awar_string(AWAR_CON_NEW_ALIGNMENT_NAME, "ali_concat", aw_def); |
---|
52 | aw_root->awar_string(AWAR_CON_ALIGNMENT_SEPARATOR, "XXX", aw_def); |
---|
53 | aw_root->awar_string(AWAR_CON_MERGE_FIELD, "full_name", aw_def); |
---|
54 | aw_root->awar_string(AWAR_CON_STORE_SIM_SP_NO, "merged_species", aw_def); |
---|
55 | aw_root->awar_string(AWAR_CON_DB_ALIGNS, "", aw_def); |
---|
56 | } |
---|
57 | |
---|
58 | // ------------------------Selecting alignments from the database for concatenation---------------------- |
---|
59 | |
---|
60 | inline char *get_alitype_eval(AW_root *aw_root) { |
---|
61 | return GBS_global_string_copy("%s=", aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->read_char_pntr()); |
---|
62 | } |
---|
63 | |
---|
64 | static void alitype_changed_cb(AW_root *aw_root, AW_CL cl_db_sel) { |
---|
65 | AW_DB_selection *db_sel = (AW_DB_selection*)cl_db_sel; |
---|
66 | char *ali_type = get_alitype_eval(aw_root); |
---|
67 | awt_reconfigure_ALI_selection_list(db_sel, ali_type); |
---|
68 | free(ali_type); |
---|
69 | } |
---|
70 | |
---|
71 | static AW_DB_selection* createSelectionList(GBDATA *gb_main, AW_window *aws, const char *awarName) { |
---|
72 | |
---|
73 | #ifdef DEBUG |
---|
74 | static bool ran=false; |
---|
75 | nt_assert(!ran); |
---|
76 | ran=true; // prevents calling this function for the second time |
---|
77 | #endif |
---|
78 | |
---|
79 | AW_root *aw_root = aws->get_root(); |
---|
80 | char *ali_type = get_alitype_eval(aw_root); |
---|
81 | AW_DB_selection *db_sel = awt_create_ALI_selection_list(gb_main, aws, awarName, ali_type); |
---|
82 | |
---|
83 | free(ali_type); |
---|
84 | return db_sel; |
---|
85 | } |
---|
86 | |
---|
87 | // ---------- Create SAI to display alignments that were concatenated -------------- |
---|
88 | |
---|
89 | static GB_ERROR create_concatInfo_SAI(GBDATA *gb_main, const char *new_ali_name, const char *ali_separator, const StrArray& ali_names) { |
---|
90 | GB_ERROR error = NULL; |
---|
91 | GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, "ConcatInfo"); |
---|
92 | |
---|
93 | if (!gb_extended) error = GB_await_error(); |
---|
94 | else { |
---|
95 | GBDATA *gb_data = GBT_add_data(gb_extended, new_ali_name, "data", GB_STRING); |
---|
96 | |
---|
97 | if (!gb_data) { |
---|
98 | error = GB_await_error(); |
---|
99 | } |
---|
100 | else { |
---|
101 | int new_ali_length = GBT_get_alignment_len(gb_main, new_ali_name); |
---|
102 | int sep_len = strlen(ali_separator); |
---|
103 | |
---|
104 | char *info = (char*)malloc(new_ali_length+1); |
---|
105 | memset(info, '=', new_ali_length); |
---|
106 | |
---|
107 | int offset = 0; |
---|
108 | int last_ali_idx = ali_names.size()-1; |
---|
109 | |
---|
110 | for (int a = 0; a <= last_ali_idx; ++a) { |
---|
111 | const char *ali = ali_names[a]; |
---|
112 | int ali_len = GBT_get_alignment_len(gb_main, ali); |
---|
113 | int ali_str_len = strlen(ali); |
---|
114 | |
---|
115 | char *my_info = info+offset; |
---|
116 | |
---|
117 | int half_ali_len = ali_len/2; |
---|
118 | for (int i = 0; i<5; ++i) { |
---|
119 | if (i<half_ali_len) { |
---|
120 | my_info[i] = '<'; |
---|
121 | my_info[ali_len-i-1] = '>'; |
---|
122 | } |
---|
123 | } |
---|
124 | |
---|
125 | if (ali_str_len<ali_len) { |
---|
126 | int namepos = half_ali_len - ali_str_len/2; |
---|
127 | memcpy(my_info+namepos, ali, ali_str_len); |
---|
128 | } |
---|
129 | |
---|
130 | offset += ali_len; |
---|
131 | if (a != last_ali_idx) { |
---|
132 | memcpy(info+offset, ali_separator, sep_len); |
---|
133 | offset += sep_len; |
---|
134 | } |
---|
135 | } |
---|
136 | |
---|
137 | nt_assert(offset == new_ali_length); // wrong alignment length! |
---|
138 | info[new_ali_length] = 0; |
---|
139 | |
---|
140 | if (!error) error = GB_write_string(gb_data, info); |
---|
141 | free(info); |
---|
142 | } |
---|
143 | } |
---|
144 | return error; |
---|
145 | } |
---|
146 | |
---|
147 | // ---------------------------------------- Concatenation function ---------------------------------- |
---|
148 | static void concatenateAlignments(AW_window *aws, AW_CL cl_selected_alis) { |
---|
149 | nt_assert(cl_selected_alis); |
---|
150 | AW_selection *selected_alis = (AW_selection*)cl_selected_alis; |
---|
151 | |
---|
152 | GB_push_transaction(GLOBAL.gb_main); |
---|
153 | long marked_species = GBT_count_marked_species(GLOBAL.gb_main); |
---|
154 | arb_progress progress("Concatenating alignments", marked_species); |
---|
155 | AW_root *aw_root = aws->get_root(); |
---|
156 | |
---|
157 | char *new_ali_name = aw_root->awar(AWAR_CON_NEW_ALIGNMENT_NAME)->read_string(); |
---|
158 | GB_ERROR error = GBT_check_alignment_name(new_ali_name); |
---|
159 | |
---|
160 | StrArray ali_names; |
---|
161 | selected_alis->get_values(ali_names); |
---|
162 | |
---|
163 | size_t ali_count = ali_names.size(); |
---|
164 | if (!error && ali_count<2) error = "Not enough alignments selected for concatenation (need at least 2)"; |
---|
165 | |
---|
166 | if (!error) { |
---|
167 | int found[ali_count], missing[ali_count]; |
---|
168 | for (size_t j = 0; j<ali_count; j++) { found[j] = 0; missing[j] = 0; } // initializing found and missing alis |
---|
169 | |
---|
170 | char *ali_separator = aw_root->awar(AWAR_CON_ALIGNMENT_SEPARATOR)->read_string(); |
---|
171 | int sep_len = strlen(ali_separator); |
---|
172 | |
---|
173 | long new_alignment_len = 0; |
---|
174 | for (size_t a = 0; a<ali_count; ++a) { |
---|
175 | new_alignment_len += GBT_get_alignment_len(GLOBAL.gb_main, ali_names[a]) + (a ? sep_len : 0); |
---|
176 | } |
---|
177 | |
---|
178 | GBDATA *gb_presets = GBT_get_presets(GLOBAL.gb_main); |
---|
179 | GBDATA *gb_alignment_exists = GB_find_string(gb_presets, "alignment_name", new_ali_name, GB_IGNORE_CASE, SEARCH_GRANDCHILD); |
---|
180 | GBDATA *gb_new_alignment = 0; |
---|
181 | char *seq_type = aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->read_string(); |
---|
182 | |
---|
183 | if (gb_alignment_exists) { // check wheather new alignment exists or not, if yes prompt user to overwrite the existing alignment; if no create an empty alignment |
---|
184 | bool overwrite = aw_ask_sure("concat_ali_overwrite", GBS_global_string("Existing data in alignment \"%s\" may be overwritten. Do you want to continue?", new_ali_name)); |
---|
185 | if (!overwrite) { |
---|
186 | error = "Alignment exists - aborted"; |
---|
187 | } |
---|
188 | else { |
---|
189 | gb_new_alignment = GBT_get_alignment(GLOBAL.gb_main, new_ali_name); |
---|
190 | if (!gb_new_alignment) error = GB_await_error(); |
---|
191 | } |
---|
192 | } |
---|
193 | else { |
---|
194 | gb_new_alignment = GBT_create_alignment(GLOBAL.gb_main, new_ali_name, new_alignment_len, 0, 0, seq_type); |
---|
195 | if (!gb_new_alignment) error = GB_await_error(); |
---|
196 | } |
---|
197 | |
---|
198 | if (!error) { |
---|
199 | AW_repeated_question ask_about_missing_alignment; |
---|
200 | |
---|
201 | for (GBDATA *gb_species = GBT_first_marked_species(GLOBAL.gb_main); |
---|
202 | gb_species && !error; |
---|
203 | gb_species = GBT_next_marked_species(gb_species)) |
---|
204 | { |
---|
205 | GBS_strstruct *str_seq = GBS_stropen(new_alignment_len+1); // create output stream |
---|
206 | int ali_len = 0; |
---|
207 | int ali_ctr = 0; |
---|
208 | |
---|
209 | for (size_t a = 0; a<ali_count; ++a) { |
---|
210 | if (a) GBS_strcat(str_seq, ali_separator); |
---|
211 | GBDATA *gb_seq_data = GBT_find_sequence(gb_species, ali_names[a]); |
---|
212 | if (gb_seq_data) { |
---|
213 | const char *str_data = GB_read_char_pntr(gb_seq_data); |
---|
214 | GBS_strcat(str_seq, str_data); |
---|
215 | ++found[ali_ctr]; |
---|
216 | } |
---|
217 | else { |
---|
218 | char *speciesName = GB_read_string(GB_entry(gb_species, "full_name")); |
---|
219 | char *question = GBS_global_string_copy("\"%s\" alignment doesn't exist in \"%s\"!", ali_names[a], speciesName); |
---|
220 | int skip_ali = ask_about_missing_alignment.get_answer("insert_gaps_for_missing_ali", question, "Insert Gaps for Missing Alignment,Skip Missing Alignment", "all", true); |
---|
221 | if (!skip_ali) { |
---|
222 | ali_len = GBT_get_alignment_len(GLOBAL.gb_main, ali_names[a]); |
---|
223 | GBS_chrncat(str_seq, '.', ali_len); |
---|
224 | } |
---|
225 | ++missing[ali_ctr]; |
---|
226 | free(question); |
---|
227 | free(speciesName); |
---|
228 | } |
---|
229 | } |
---|
230 | |
---|
231 | { |
---|
232 | char *concatenated_ali_seq_data = GBS_strclose(str_seq); |
---|
233 | GBDATA *gb_data = GBT_add_data(gb_species, new_ali_name, "data", GB_STRING); |
---|
234 | GB_write_string(gb_data, concatenated_ali_seq_data); |
---|
235 | free(concatenated_ali_seq_data); |
---|
236 | } |
---|
237 | progress.inc_and_check_user_abort(error); |
---|
238 | } |
---|
239 | |
---|
240 | if (!error) { |
---|
241 | // ............. print missing alignments........... |
---|
242 | aw_message(GBS_global_string("Concatenation of Alignments was performed for %ld species.", marked_species)); |
---|
243 | for (size_t a = 0; a<ali_count; ++a) { |
---|
244 | aw_message(GBS_global_string("%s : Found in %d species & Missing in %d species.", ali_names[a], found[a], missing[a])); |
---|
245 | } |
---|
246 | } |
---|
247 | |
---|
248 | if (!error) error = create_concatInfo_SAI(GLOBAL.gb_main, new_ali_name, ali_separator, ali_names); |
---|
249 | } |
---|
250 | |
---|
251 | free(seq_type); |
---|
252 | free(ali_separator); |
---|
253 | } |
---|
254 | |
---|
255 | if (!error) { |
---|
256 | char *nfield = GBS_global_string_copy("%s/data", new_ali_name); |
---|
257 | error = GBT_add_new_changekey(GLOBAL.gb_main, nfield, GB_STRING); |
---|
258 | free(nfield); |
---|
259 | } |
---|
260 | else { |
---|
261 | progress.done(); |
---|
262 | } |
---|
263 | GB_end_transaction_show_error(GLOBAL.gb_main, error, aw_message); |
---|
264 | free(new_ali_name); |
---|
265 | } |
---|
266 | |
---|
267 | static void addSpeciesToConcatenateList(SpeciesConcatenateList **sclp, GB_CSTR species_name) { |
---|
268 | |
---|
269 | GBDATA *gb_species_data = GBT_get_species_data(GLOBAL.gb_main); |
---|
270 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, species_name); |
---|
271 | |
---|
272 | if (gb_species) { |
---|
273 | SpeciesConcatenateList *scl = new SpeciesConcatenateList; |
---|
274 | |
---|
275 | scl->species = gb_species; |
---|
276 | scl->species_name = strdup(species_name); |
---|
277 | scl->next = *sclp; |
---|
278 | *sclp = scl; |
---|
279 | } |
---|
280 | } |
---|
281 | |
---|
282 | static void freeSpeciesConcatenateList(SpeciesConcatenateList *scl) { |
---|
283 | while (scl) { |
---|
284 | SpeciesConcatenateList *next = scl->next; |
---|
285 | free(scl->species_name); |
---|
286 | delete scl; |
---|
287 | scl = next; |
---|
288 | } |
---|
289 | } |
---|
290 | |
---|
291 | static GB_ERROR checkAndMergeFields(GBDATA *gb_new_species, GB_ERROR error, SpeciesConcatenateList *scl) { |
---|
292 | |
---|
293 | char *doneFields = strdup(";name;"); // all fields which are already merged |
---|
294 | int doneLen = strlen(doneFields); |
---|
295 | SpeciesConcatenateList *sl = scl; |
---|
296 | int sl_length = 0; while (scl) { sl_length++; scl=scl->next; } // counting no. of similar species stored in the list |
---|
297 | int *fieldStat = new int[sl_length]; // 0 = not used yet ; -1 = doesn't have field ; 1..n = field content (same number means same content) |
---|
298 | |
---|
299 | while (sl && !error) { // with all species do.. |
---|
300 | char *newFields = GB_get_subfields(sl->species); |
---|
301 | char *fieldStart = newFields; // points to ; before next field |
---|
302 | |
---|
303 | while (fieldStart[1] && !error) { // with all subfields of the species do.. |
---|
304 | char *fieldEnd = strchr(fieldStart+1, ';'); |
---|
305 | nt_assert(fieldEnd); |
---|
306 | char behind = fieldEnd[1]; fieldEnd[1] = 0; |
---|
307 | |
---|
308 | if (strstr(doneFields, fieldStart)==0) { // field is not merged yet |
---|
309 | char *fieldName = fieldStart+1; |
---|
310 | int fieldLen = int(fieldEnd-fieldName); |
---|
311 | |
---|
312 | nt_assert(fieldEnd[0]==';'); |
---|
313 | fieldEnd[0] = 0; |
---|
314 | |
---|
315 | GBDATA *gb_field = GB_search(sl->species, fieldName, GB_FIND); // field does to exist (it was found before) |
---|
316 | GB_TYPES type = GB_read_type(gb_field); |
---|
317 | |
---|
318 | if (type==GB_STRING) { // we only merge string fields |
---|
319 | int i; int doneSpecies = 0; int nextStat = 1; |
---|
320 | |
---|
321 | for (i=0; i<sl_length; i++) { fieldStat[i] = 0; } // clear field status |
---|
322 | |
---|
323 | while (doneSpecies<sl_length) { // since all species in list were handled |
---|
324 | SpeciesConcatenateList *sl2 = sl; |
---|
325 | i = 0; |
---|
326 | |
---|
327 | while (sl2) { |
---|
328 | if (fieldStat[i]==0) { |
---|
329 | gb_field = GB_search(sl2->species, fieldName, GB_FIND); |
---|
330 | if (gb_field) { |
---|
331 | char *content = GB_read_as_string(gb_field); |
---|
332 | SpeciesConcatenateList *sl3 = sl2->next; |
---|
333 | fieldStat[i] = nextStat; |
---|
334 | int j = i+1; doneSpecies++; |
---|
335 | |
---|
336 | while (sl3) { |
---|
337 | if (fieldStat[j]==0) { |
---|
338 | gb_field = GB_search(sl3->species, fieldName, GB_FIND); |
---|
339 | if (gb_field) { |
---|
340 | char *content2 = GB_read_as_string(gb_field); |
---|
341 | if (strcmp(content, content2)==0) { // if contents are the same, they get the same status |
---|
342 | fieldStat[j] = nextStat; |
---|
343 | doneSpecies++; |
---|
344 | } |
---|
345 | free(content2); |
---|
346 | } |
---|
347 | else { |
---|
348 | fieldStat[j] = -1; |
---|
349 | doneSpecies++; |
---|
350 | } |
---|
351 | } |
---|
352 | sl3 = sl3->next; j++; |
---|
353 | } |
---|
354 | free(content); nextStat++; |
---|
355 | } |
---|
356 | else { |
---|
357 | fieldStat[i] = -1; // field does not exist here |
---|
358 | doneSpecies++; |
---|
359 | } |
---|
360 | } |
---|
361 | sl2 = sl2->next; i++; |
---|
362 | } |
---|
363 | if (!sl2) break; |
---|
364 | } |
---|
365 | nt_assert(nextStat!=1); // this would mean that none of the species contained the field |
---|
366 | { |
---|
367 | char *new_content = 0; |
---|
368 | int new_content_len = 0; // @@@ useless (0 where used; unused otherwise) |
---|
369 | |
---|
370 | if (nextStat==2) { // all species contain same field content or do not have the field |
---|
371 | SpeciesConcatenateList *sl2 = sl; |
---|
372 | while (sl2) { |
---|
373 | gb_field = GB_search(sl2->species, fieldName, GB_FIND); |
---|
374 | if (gb_field) { |
---|
375 | new_content = GB_read_as_string(gb_field); |
---|
376 | new_content_len = strlen(new_content); |
---|
377 | break; |
---|
378 | } |
---|
379 | sl2 = sl2->next; |
---|
380 | } |
---|
381 | } |
---|
382 | else { // different field contents |
---|
383 | int actualStat; |
---|
384 | for (actualStat=1; actualStat<nextStat; actualStat++) { |
---|
385 | int names_len = 1; // open bracket |
---|
386 | SpeciesConcatenateList *sl2 = sl; |
---|
387 | char *content = 0; i = 0; |
---|
388 | |
---|
389 | while (sl2) { |
---|
390 | if (fieldStat[i]==actualStat) { |
---|
391 | names_len += strlen(sl2->species_name)+1; |
---|
392 | if (!content) { |
---|
393 | gb_field = GB_search(sl2->species, fieldName, GB_FIND); |
---|
394 | nt_assert(gb_field); |
---|
395 | content = GB_read_as_string(gb_field); |
---|
396 | } |
---|
397 | } |
---|
398 | sl2 = sl2->next; i++; |
---|
399 | } |
---|
400 | nt_assert(content); |
---|
401 | int add_len = names_len+1+strlen(content); |
---|
402 | char *whole = (char*)malloc(new_content_len+1+add_len+1); |
---|
403 | nt_assert(whole); |
---|
404 | char *add = new_content ? whole+sprintf(whole, "%s ", new_content) : whole; |
---|
405 | sl2 = sl; i = 0; |
---|
406 | int first = 1; |
---|
407 | while (sl2) { |
---|
408 | if (fieldStat[i]==actualStat) { |
---|
409 | add += sprintf(add, "%c%s", first ? '{' : ';', sl2->species_name); |
---|
410 | first = 0; |
---|
411 | } |
---|
412 | sl2 = sl2->next; i++; |
---|
413 | } |
---|
414 | add += sprintf(add, "} %s", content); |
---|
415 | |
---|
416 | free(content); |
---|
417 | freeset(new_content, whole); |
---|
418 | new_content_len = strlen(new_content); // cppcheck-suppress deallocuse |
---|
419 | } |
---|
420 | } |
---|
421 | |
---|
422 | if (new_content) { |
---|
423 | error = GBT_write_string(gb_new_species, fieldName, new_content); |
---|
424 | free(new_content); |
---|
425 | } |
---|
426 | } |
---|
427 | } |
---|
428 | |
---|
429 | // mark field as done: |
---|
430 | char *new_doneFields = (char*)malloc(doneLen+fieldLen+1+1); |
---|
431 | sprintf(new_doneFields, "%s%s;", doneFields, fieldName); |
---|
432 | doneLen += fieldLen+1; |
---|
433 | freeset(doneFields, new_doneFields); |
---|
434 | fieldEnd[0] = ';'; |
---|
435 | } |
---|
436 | fieldEnd[1] = behind; |
---|
437 | fieldStart = fieldEnd; |
---|
438 | } |
---|
439 | free(newFields); |
---|
440 | sl = sl->next; |
---|
441 | } |
---|
442 | free(doneFields); |
---|
443 | delete [] fieldStat; |
---|
444 | |
---|
445 | return error; |
---|
446 | } |
---|
447 | |
---|
448 | static GBDATA *concatenateFieldsCreateNewSpecies(AW_window *, GBDATA *gb_species, SpeciesConcatenateList *scl) { |
---|
449 | GB_push_transaction(GLOBAL.gb_main); |
---|
450 | |
---|
451 | GB_ERROR error = 0; |
---|
452 | GBDATA *gb_species_data = GBT_get_species_data(GLOBAL.gb_main); |
---|
453 | |
---|
454 | // data needed for name generation |
---|
455 | char *full_name = 0; |
---|
456 | char *acc = 0; |
---|
457 | |
---|
458 | // --------------------getting the species related data -------------------- |
---|
459 | |
---|
460 | GBDATA *gb_new_species = 0; |
---|
461 | |
---|
462 | if (!error) { |
---|
463 | // copy species to create a new species |
---|
464 | gb_new_species = GB_create_container(gb_species_data, "species"); |
---|
465 | error = gb_new_species ? GB_copy(gb_new_species, gb_species) : GB_await_error(); |
---|
466 | |
---|
467 | if (!error) { // write dummy-name (real name written below) |
---|
468 | error = GBT_write_string(gb_new_species, "name", "$currcat$"); |
---|
469 | } |
---|
470 | } |
---|
471 | |
---|
472 | if (!error) { // copy full name |
---|
473 | full_name = GBT_read_string(gb_species, "full_name"); |
---|
474 | if (!full_name) error = GB_await_error(); |
---|
475 | else error = GBT_write_string(gb_new_species, "full_name", full_name); |
---|
476 | } |
---|
477 | |
---|
478 | if (!error) { |
---|
479 | ConstStrArray ali_names; |
---|
480 | GBT_get_alignment_names(ali_names, GLOBAL.gb_main); |
---|
481 | |
---|
482 | long id = 0; |
---|
483 | for (SpeciesConcatenateList *speciesList = scl; speciesList; speciesList = speciesList->next) { |
---|
484 | for (int no_of_alignments = 0; ali_names[no_of_alignments]!=0; no_of_alignments++) { |
---|
485 | GBDATA *gb_seq_data = GBT_find_sequence(speciesList->species, ali_names[no_of_alignments]); |
---|
486 | if (gb_seq_data) { |
---|
487 | const char *seq_data = GB_read_char_pntr(gb_seq_data); |
---|
488 | GBDATA *gb_data = GBT_add_data(gb_new_species, ali_names[no_of_alignments], "data", GB_STRING); |
---|
489 | error = GB_write_string(gb_data, seq_data); |
---|
490 | if (!error) id += GBS_checksum(seq_data, 1, ".-"); // creating checksum of the each aligned sequence to generate new accession number |
---|
491 | } |
---|
492 | if (error) error = GB_export_errorf("Can't create alignment '%s'", ali_names[no_of_alignments]); |
---|
493 | } |
---|
494 | } |
---|
495 | |
---|
496 | if (!error) { |
---|
497 | acc = GBS_global_string_copy("ARB_%lX", id); // create new accession number |
---|
498 | error = GBT_write_string(gb_new_species, "acc", acc); |
---|
499 | } |
---|
500 | } |
---|
501 | |
---|
502 | if (!error) error = checkAndMergeFields(gb_new_species, error, scl); |
---|
503 | |
---|
504 | // now generate new name |
---|
505 | if (!error) { |
---|
506 | char *new_species_name = 0; |
---|
507 | |
---|
508 | const char *add_field = AW_get_nameserver_addid(GLOBAL.gb_main); |
---|
509 | GBDATA *gb_addid = add_field[0] ? GB_entry(gb_new_species, add_field) : 0; |
---|
510 | char *addid = 0; |
---|
511 | if (gb_addid) addid = GB_read_as_string(gb_addid); |
---|
512 | |
---|
513 | error = AWTC_generate_one_name(GLOBAL.gb_main, full_name, acc, addid, new_species_name); |
---|
514 | if (!error) { // name was created |
---|
515 | if (GBT_find_species_rel_species_data(gb_species_data, new_species_name) != 0) { |
---|
516 | // if the name is not unique -> create unique name |
---|
517 | UniqueNameDetector und(gb_species_data); |
---|
518 | freeset(new_species_name, AWTC_makeUniqueShortName(new_species_name, und)); |
---|
519 | if (!new_species_name) error = GB_await_error(); |
---|
520 | } |
---|
521 | } |
---|
522 | |
---|
523 | if (!error) error = GBT_write_string(gb_new_species, "name", new_species_name); // insert new 'name' |
---|
524 | |
---|
525 | free(new_species_name); |
---|
526 | free(addid); |
---|
527 | } |
---|
528 | |
---|
529 | error = GB_end_transaction(GLOBAL.gb_main, error); |
---|
530 | if (error) { |
---|
531 | gb_new_species = 0; |
---|
532 | aw_message(error); |
---|
533 | } |
---|
534 | |
---|
535 | free(acc); |
---|
536 | free(full_name); |
---|
537 | |
---|
538 | return gb_new_species; |
---|
539 | } |
---|
540 | |
---|
541 | static GB_ERROR checkAndCreateNewField(GBDATA *gb_main, char *new_field_name) { |
---|
542 | GB_ERROR error = GB_check_key(new_field_name); |
---|
543 | |
---|
544 | if (error) return error; |
---|
545 | else { |
---|
546 | error = GBT_add_new_changekey(gb_main, new_field_name, GB_STRING); |
---|
547 | if (error) { |
---|
548 | bool overwrite = aw_ask_sure("merge_similar_overwrite_field", |
---|
549 | GBS_global_string("\"%s\" field exists! Do you want to overwrite the existing field?", new_field_name)); |
---|
550 | if (!overwrite) return error; |
---|
551 | } |
---|
552 | } |
---|
553 | return 0; |
---|
554 | } |
---|
555 | |
---|
556 | static void mergeSimilarSpecies(AW_window *aws, AW_CL cl_mergeSimilarConcatenateAlignments, AW_CL cl_selected_alis) { |
---|
557 | GB_ERROR error = NULL; |
---|
558 | arb_progress wrapper; |
---|
559 | { |
---|
560 | AW_root *aw_root = aws->get_root(); |
---|
561 | char *merge_field_name = aw_root->awar(AWAR_CON_MERGE_FIELD)->read_string(); |
---|
562 | char *new_field_name = aw_root->awar(AWAR_CON_STORE_SIM_SP_NO)->read_string(); |
---|
563 | |
---|
564 | SpeciesConcatenateList *scl = 0; // to build list of similar species |
---|
565 | SpeciesConcatenateList *newSpeciesList = 0; // new SpeciesConcatenateList |
---|
566 | |
---|
567 | GB_begin_transaction(GLOBAL.gb_main); // open database for transaction |
---|
568 | |
---|
569 | error = checkAndCreateNewField(GLOBAL.gb_main, new_field_name); |
---|
570 | |
---|
571 | PersistentNameServerConnection stayAlive; |
---|
572 | arb_progress progress("Merging similar species", GBT_count_marked_species(GLOBAL.gb_main)); |
---|
573 | progress.auto_subtitles("Species"); |
---|
574 | |
---|
575 | for (GBDATA * gb_species = GBT_first_marked_species(GLOBAL.gb_main); |
---|
576 | gb_species && !error; |
---|
577 | gb_species = GBT_next_marked_species(gb_species)) |
---|
578 | { |
---|
579 | GBDATA *gb_species_field = GB_entry(gb_species, merge_field_name); |
---|
580 | const char *name = GBT_read_name(gb_species); |
---|
581 | |
---|
582 | if (!gb_species_field) { |
---|
583 | // exit if species doesn't have any data in the selected field |
---|
584 | error = GBS_global_string("Species '%s' does not contain data in selected field '%s'", name, merge_field_name); |
---|
585 | } |
---|
586 | else { |
---|
587 | char *gb_species_field_content = GB_read_string(gb_species_field); |
---|
588 | int similar_species = 0; |
---|
589 | |
---|
590 | for (GBDATA * gb_species_next = GBT_next_marked_species(gb_species); |
---|
591 | gb_species_next && !error; |
---|
592 | gb_species_next = GBT_next_marked_species(gb_species_next)) |
---|
593 | { |
---|
594 | GBDATA *gb_next_species_field = GB_entry(gb_species_next, merge_field_name); |
---|
595 | const char *next_name = GBT_read_name(gb_species_next); |
---|
596 | |
---|
597 | if (!gb_next_species_field) { |
---|
598 | // exit if species doesn't have any data in the selected field |
---|
599 | error = GBS_global_string("Species '%s' does not contain data in selected field '%s'", next_name, merge_field_name); |
---|
600 | } |
---|
601 | else { |
---|
602 | char *gb_next_species_field_content = GB_read_string(gb_next_species_field); |
---|
603 | |
---|
604 | if (strcmp(gb_species_field_content, gb_next_species_field_content) == 0) { |
---|
605 | addSpeciesToConcatenateList(&scl, next_name); |
---|
606 | GB_write_flag(gb_species_next, 0); |
---|
607 | ++similar_species; |
---|
608 | ++progress; |
---|
609 | } |
---|
610 | free(gb_next_species_field_content); |
---|
611 | } |
---|
612 | } |
---|
613 | |
---|
614 | if (similar_species > 0 && !error) { |
---|
615 | addSpeciesToConcatenateList(&scl, name); |
---|
616 | GB_write_flag(gb_species, 0); |
---|
617 | |
---|
618 | GBDATA *new_species_created = concatenateFieldsCreateNewSpecies(aws, gb_species, scl); |
---|
619 | |
---|
620 | nt_assert(new_species_created); |
---|
621 | if (new_species_created) { // create a list of newly created species |
---|
622 | addSpeciesToConcatenateList(&newSpeciesList, GBT_read_name(new_species_created)); |
---|
623 | } |
---|
624 | |
---|
625 | error = GBT_write_int(new_species_created, new_field_name, ++similar_species); |
---|
626 | } |
---|
627 | |
---|
628 | freeSpeciesConcatenateList(scl); scl = 0; |
---|
629 | free(gb_species_field_content); |
---|
630 | } |
---|
631 | |
---|
632 | progress.inc_and_check_user_abort(error); |
---|
633 | } |
---|
634 | |
---|
635 | if (!error) { |
---|
636 | GBT_mark_all(GLOBAL.gb_main, 0); // unmark all species in the database |
---|
637 | int newSpeciesCount = 0; |
---|
638 | |
---|
639 | for (; newSpeciesList; newSpeciesList = newSpeciesList->next) { // mark only newly created species |
---|
640 | GB_write_flag(newSpeciesList->species, 1); |
---|
641 | newSpeciesCount++; |
---|
642 | } |
---|
643 | aw_message(GBS_global_string("%i new species were created by taking \"%s\" as a criterion!", newSpeciesCount, merge_field_name)); |
---|
644 | freeSpeciesConcatenateList(newSpeciesList); |
---|
645 | } |
---|
646 | |
---|
647 | free(merge_field_name); |
---|
648 | free(new_field_name); |
---|
649 | |
---|
650 | GB_end_transaction_show_error(GLOBAL.gb_main, error, aw_message); |
---|
651 | } |
---|
652 | // Concatenate alignments of the merged species if cl_mergeSimilarConcatenateAlignments = MERGE_SIMILAR_CONCATENATE_ALIGNMENTS |
---|
653 | if (cl_mergeSimilarConcatenateAlignments && !error) concatenateAlignments(aws, cl_selected_alis); |
---|
654 | } |
---|
655 | |
---|
656 | |
---|
657 | |
---|
658 | static AW_window *createMergeSimilarSpeciesWindow(AW_root *aw_root, AW_CL option, AW_CL cl_subsel) { |
---|
659 | AW_window_simple *aws = new AW_window_simple; |
---|
660 | |
---|
661 | { |
---|
662 | char *window_id = GBS_global_string_copy("MERGE_SPECIES_%i", int(option)); |
---|
663 | aws->init(aw_root, window_id, "MERGE SPECIES WINDOW"); |
---|
664 | free(window_id); |
---|
665 | } |
---|
666 | aws->load_xfig("merge_species.fig"); |
---|
667 | |
---|
668 | aws->callback(makeHelpCallback("merge_species.hlp")); |
---|
669 | aws->at("help"); |
---|
670 | aws->create_button("HELP", "HELP", "H"); |
---|
671 | |
---|
672 | aws->at("field_select"); |
---|
673 | aws->auto_space(0, 0); |
---|
674 | aws->callback(AW_POPDOWN); |
---|
675 | create_selection_list_on_itemfields(GLOBAL.gb_main, aws, AWAR_CON_MERGE_FIELD, true, FIELD_FILTER_NDS, "field_select", 0, SPECIES_get_selector(), 20, 30, SelectedFields(SF_PSEUDO|SF_HIDDEN), "sel_merge_field"); |
---|
676 | |
---|
677 | aws->at("store_sp_no"); |
---|
678 | aws->label_length(20); |
---|
679 | aws->create_input_field(AWAR_CON_STORE_SIM_SP_NO, 20); |
---|
680 | |
---|
681 | aws->at("merge"); |
---|
682 | aws->callback(mergeSimilarSpecies, option, cl_subsel); |
---|
683 | aws->create_button("MERGE_SIMILAR_SPECIES", "MERGE SIMILAR SPECIES", "M"); |
---|
684 | |
---|
685 | aws->at("close"); |
---|
686 | aws->callback(AW_POPDOWN); |
---|
687 | aws->create_button("CLOSE", "CLOSE", "C"); |
---|
688 | |
---|
689 | return (AW_window *)aws; |
---|
690 | } |
---|
691 | |
---|
692 | AW_window *NT_createMergeSimilarSpeciesWindow(AW_root *aw_root) { |
---|
693 | static AW_window *aw = 0; |
---|
694 | if (!aw) aw = createMergeSimilarSpeciesWindow(aw_root, 0, 0); |
---|
695 | return aw; |
---|
696 | } |
---|
697 | |
---|
698 | static AW_window *NT_createMergeSimilarSpeciesAndConcatenateWindow(AW_root *aw_root, AW_CL cl_subsel) { |
---|
699 | static AW_window *aw = 0; |
---|
700 | if (!aw) aw = createMergeSimilarSpeciesWindow(aw_root, MERGE_SIMILAR_CONCATENATE_ALIGNMENTS, cl_subsel); |
---|
701 | return aw; |
---|
702 | } |
---|
703 | |
---|
704 | // ----------------------------Creating concatenation window----------------------------------------- |
---|
705 | AW_window *NT_createConcatenationWindow(AW_root *aw_root) { |
---|
706 | AW_window_simple *aws = new AW_window_simple; |
---|
707 | |
---|
708 | aws->init(aw_root, "CONCAT_ALIGNMENTS", "Concatenate Alignments"); |
---|
709 | aws->load_xfig("concatenate.fig"); |
---|
710 | |
---|
711 | aws->button_length(8); |
---|
712 | |
---|
713 | aws->callback(makeHelpCallback("concatenate.hlp")); |
---|
714 | aws->at("help"); |
---|
715 | aws->create_button("HELP", "HELP", "H"); |
---|
716 | |
---|
717 | aws->at("close"); |
---|
718 | aws->callback((AW_CB0)AW_POPDOWN); |
---|
719 | aws->create_button("CLOSE", "CLOSE", "C"); |
---|
720 | |
---|
721 | aws->at("dbAligns"); |
---|
722 | AW_DB_selection *all_alis = createSelectionList(GLOBAL.gb_main, aws, AWAR_CON_DB_ALIGNS); |
---|
723 | AW_selection *sel_alis = awt_create_subset_selection_list(aws, all_alis->get_sellist(), "concatAligns", "collect", "sort"); |
---|
724 | |
---|
725 | aws->at("type"); |
---|
726 | aws->create_option_menu(AWAR_CON_SEQUENCE_TYPE, true); |
---|
727 | aws->insert_option("DNA", "d", "dna"); |
---|
728 | aws->insert_option("RNA", "r", "rna"); |
---|
729 | aws->insert_default_option("PROTEIN", "p", "ami"); |
---|
730 | aws->update_option_menu(); |
---|
731 | aw_root->awar(AWAR_CON_SEQUENCE_TYPE)->add_callback(alitype_changed_cb, (AW_CL)all_alis); |
---|
732 | |
---|
733 | aws->button_length(0); |
---|
734 | |
---|
735 | aws->at("aliName"); |
---|
736 | aws->label_length(15); |
---|
737 | aws->create_input_field(AWAR_CON_NEW_ALIGNMENT_NAME, 25); |
---|
738 | |
---|
739 | aws->at("aliSeparator"); |
---|
740 | aws->label_length(5); |
---|
741 | aws->create_input_field(AWAR_CON_ALIGNMENT_SEPARATOR, 10); |
---|
742 | |
---|
743 | aws->button_length(22); |
---|
744 | aws->auto_space(5, 5); |
---|
745 | aws->at("go"); |
---|
746 | |
---|
747 | aws->callback(concatenateAlignments, (AW_CL)sel_alis); |
---|
748 | aws->create_button("CONCATENATE", "CONCATENATE", "A"); |
---|
749 | |
---|
750 | aws->callback(NT_createMergeSimilarSpeciesWindow); |
---|
751 | aws->create_button("MERGE_SPECIES", "MERGE SIMILAR SPECIES", "M"); |
---|
752 | |
---|
753 | aws->callback(AW_POPUP, (AW_CL)NT_createMergeSimilarSpeciesAndConcatenateWindow, (AW_CL)sel_alis); |
---|
754 | aws->create_button("MERGE_CONCATENATE", "MERGE & CONCATENATE", "S"); |
---|
755 | |
---|
756 | aws->show(); |
---|
757 | return aws; |
---|
758 | } |
---|
759 | // ------------------------------------------------------------------------------------------------------- |
---|