1 | #include "GDE_extglob.h" |
---|
2 | #include "GDE_awars.h" |
---|
3 | |
---|
4 | #include <awt_filter.hxx> |
---|
5 | #include <aw_window.hxx> |
---|
6 | #include <aw_root.hxx> |
---|
7 | #include <aw_question.hxx> |
---|
8 | #include <aw_awar.hxx> |
---|
9 | #include <aw_msg.hxx> |
---|
10 | #include <aw_file.hxx> |
---|
11 | #include <AP_filter.hxx> |
---|
12 | #include <arb_progress.h> |
---|
13 | #include <arb_strbuf.h> |
---|
14 | #include <arb_global_defs.h> |
---|
15 | |
---|
16 | #include <set> |
---|
17 | #include <string> |
---|
18 | |
---|
19 | #include <unistd.h> |
---|
20 | |
---|
21 | using namespace std; |
---|
22 | |
---|
23 | extern adfiltercbstruct *agde_filter; |
---|
24 | |
---|
25 | /* |
---|
26 | ReplaceArgs(): |
---|
27 | Replace all command line arguments with the appropriate values |
---|
28 | stored for the chosen menu item. |
---|
29 | |
---|
30 | Copyright (c) 1989-1990, University of Illinois board of trustees. All |
---|
31 | rights reserved. Written by Steven Smith at the Center for Prokaryote Genome |
---|
32 | Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. |
---|
33 | Carl Woese. |
---|
34 | |
---|
35 | Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. |
---|
36 | All rights reserved. |
---|
37 | |
---|
38 | */ |
---|
39 | |
---|
40 | |
---|
41 | static char *ReplaceArgs(AW_root *awr, char *Action, GmenuItem *gmenuitem, int number) { |
---|
42 | /* |
---|
43 | * The basic idea is to replace all of the symbols in the method |
---|
44 | * string with the values picked in the dialog box. The method |
---|
45 | * is the general command line structure. All arguments have two |
---|
46 | * parts : a label and a value. Values are the |
---|
47 | * associated arguments that some flags require. All symbols that |
---|
48 | * require argvalue replacement should have a '$' infront of the symbol |
---|
49 | * name in the itemmethod definition. |
---|
50 | * |
---|
51 | * If '$symbol' is prefixed by '!' ARB_GDE does a label replacement, i.e. insert |
---|
52 | * the value visible in GUI. Only works for argchoice arguments! |
---|
53 | * This is intended for informational use (e.g. to write used settings |
---|
54 | * into the comment of a generated tree). |
---|
55 | * |
---|
56 | * An example command line replacement would be: |
---|
57 | * |
---|
58 | * itemmethod=> "lpr -P $arg1 $arg2" |
---|
59 | * |
---|
60 | * arglabel arg1=> "To printer?" |
---|
61 | * argvalue arg1=> "lw" |
---|
62 | * |
---|
63 | * arglabel arg2=> "File name?" |
---|
64 | * argvalue arg2=> "foobar" |
---|
65 | * |
---|
66 | * final command line: |
---|
67 | * |
---|
68 | * lpr -P lw foobar |
---|
69 | * |
---|
70 | */ |
---|
71 | |
---|
72 | char *textvalue = NULp; |
---|
73 | const char *labelvalue = NULp; |
---|
74 | |
---|
75 | const GmenuItemArg& currArg = gmenuitem->arg[number]; |
---|
76 | |
---|
77 | const char *symbol = currArg.symbol; |
---|
78 | int type = currArg.type; |
---|
79 | |
---|
80 | if (type == SLIDER) { |
---|
81 | char *awarname = GDE_makeawarname(gmenuitem, number); |
---|
82 | textvalue = awr->awar(awarname)->read_as_string(); |
---|
83 | free(awarname); |
---|
84 | } |
---|
85 | else if (type == FILE_SELECTOR) { |
---|
86 | char *awar_base = GDE_maketmpawarname(gmenuitem, number); |
---|
87 | textvalue = AW_get_selected_fullname(awr, awar_base); |
---|
88 | free(awar_base); |
---|
89 | } |
---|
90 | else if (type == CHOOSER || |
---|
91 | type == CHOICE_TREE || |
---|
92 | type == CHOICE_SAI || |
---|
93 | type == CHOICE_MENU || |
---|
94 | type == CHOICE_LIST || |
---|
95 | type == CHOICE_WEIGHTS || |
---|
96 | type == TEXTFIELD) |
---|
97 | { |
---|
98 | char *awarname = GDE_makeawarname(gmenuitem, number); |
---|
99 | textvalue = awr->awar(awarname)->read_string(); |
---|
100 | |
---|
101 | if (currArg.choice) { |
---|
102 | for (int c = 0; c<currArg.numchoices && !labelvalue; ++c) { |
---|
103 | GargChoice& choice = currArg.choice[c]; |
---|
104 | if (choice.method) { |
---|
105 | if (strcmp(choice.method, textvalue) == 0) { |
---|
106 | labelvalue = choice.label; |
---|
107 | } |
---|
108 | } |
---|
109 | } |
---|
110 | } |
---|
111 | } |
---|
112 | |
---|
113 | if (!textvalue) ARB_calloc(textvalue, 1); |
---|
114 | if (!symbol) symbol = ""; |
---|
115 | |
---|
116 | set<string>warned_about; |
---|
117 | int conversion_warning = 0; |
---|
118 | |
---|
119 | const int symLen = strlen(symbol); |
---|
120 | int actlen = strlen(Action); |
---|
121 | |
---|
122 | for (int i, j = 0; (i=Find2(Action+j, symbol)) != -1;) { |
---|
123 | i += j; |
---|
124 | ++j; |
---|
125 | if (i>0 && Action[i-1] == '$') { |
---|
126 | const char *replaceBy = textvalue; |
---|
127 | int skip = 1; // skip '$' |
---|
128 | |
---|
129 | if (i>1 && Action[i-2] == '!') { // use label (if available) |
---|
130 | if (labelvalue) { |
---|
131 | replaceBy = labelvalue; |
---|
132 | skip = 2; // skip '!$' |
---|
133 | } |
---|
134 | else { |
---|
135 | aw_message(GBS_global_string("[ARB_GDE]: Cannot access label of '%s'\n", symbol)); |
---|
136 | return NULp; // @@@ ignores resources (should only occur during development) |
---|
137 | } |
---|
138 | } |
---|
139 | |
---|
140 | int repLen = strlen(replaceBy); |
---|
141 | int remLen = skip+symLen; |
---|
142 | |
---|
143 | GBS_strstruct temp(actlen-remLen+repLen+1); |
---|
144 | |
---|
145 | temp.ncat(Action, i-skip); |
---|
146 | temp.ncat(replaceBy, repLen); |
---|
147 | temp.cat(Action+i+symLen); |
---|
148 | |
---|
149 | actlen = temp.get_position(); |
---|
150 | freeset(Action, temp.release()); |
---|
151 | } |
---|
152 | else { |
---|
153 | if (warned_about.find(symbol) == warned_about.end()) { |
---|
154 | fprintf(stderr, |
---|
155 | "old arb version converted '%s' to '%s' (now only '$%s' is converted)\n", |
---|
156 | symbol, textvalue, symbol); |
---|
157 | conversion_warning++; |
---|
158 | warned_about.insert(symbol); |
---|
159 | } |
---|
160 | } |
---|
161 | } |
---|
162 | |
---|
163 | if (conversion_warning) { |
---|
164 | fprintf(stderr, |
---|
165 | "Conversion warnings occurred in Action:\n'%s'\n", |
---|
166 | Action); |
---|
167 | } |
---|
168 | |
---|
169 | free(textvalue); |
---|
170 | return Action; |
---|
171 | } |
---|
172 | |
---|
173 | static void ReplaceString(char*& Action, const char *olds, const char *news) { |
---|
174 | size_t oldslen = strlen(olds); |
---|
175 | size_t newslen = strlen(news); |
---|
176 | size_t actlen = strlen(Action); |
---|
177 | |
---|
178 | int i; |
---|
179 | for (; (i=Find2(Action, olds)) != -1;) { |
---|
180 | GBS_strstruct temp(actlen-oldslen+newslen+1); |
---|
181 | |
---|
182 | temp.ncat(Action, i); |
---|
183 | temp.ncat(news, newslen); |
---|
184 | temp.cat(Action+i+oldslen); |
---|
185 | |
---|
186 | actlen = temp.get_position(); |
---|
187 | freeset(Action, temp.release()); |
---|
188 | } |
---|
189 | } |
---|
190 | |
---|
191 | static void ReplaceFile(char*& Action, GfileFormat file) { |
---|
192 | ReplaceString(Action, file.symbol, file.name); |
---|
193 | } |
---|
194 | |
---|
195 | static void GDE_freesequ(NA_Sequence *sequ) { |
---|
196 | if (sequ) { |
---|
197 | freenull(sequ->comments); |
---|
198 | freenull(sequ->baggage); |
---|
199 | freenull(sequ->sequence); |
---|
200 | } |
---|
201 | } |
---|
202 | |
---|
203 | NA_Alignment::NA_Alignment(GBDATA *gb_main_) : |
---|
204 | id(NULp), |
---|
205 | description(NULp), |
---|
206 | authority(NULp), |
---|
207 | numelements(0), |
---|
208 | maxnumelements(0), |
---|
209 | maxlen(0), |
---|
210 | rel_offset(0), |
---|
211 | element(NULp), |
---|
212 | numgroups(0), |
---|
213 | group(NULp), |
---|
214 | format(0), |
---|
215 | gb_main(gb_main_) |
---|
216 | { |
---|
217 | GB_transaction ta(gb_main); |
---|
218 | alignment_name = GBT_get_default_alignment(gb_main); |
---|
219 | alignment_type = GBT_get_alignment_type(gb_main, alignment_name); |
---|
220 | } |
---|
221 | |
---|
222 | NA_Alignment::~NA_Alignment() { |
---|
223 | free(id); |
---|
224 | free(description); |
---|
225 | free(authority); |
---|
226 | free(alignment_name); |
---|
227 | |
---|
228 | for (unsigned long i=0; i<numelements; i++) { |
---|
229 | GDE_freesequ(element+i); |
---|
230 | } |
---|
231 | } |
---|
232 | |
---|
233 | static GB_ERROR write_sequence_autoinc_alisize(GBDATA *gb_data, long& ali_len, const char *sequence, int seq_len) { |
---|
234 | /* writes sequence data. |
---|
235 | * Specials things done: |
---|
236 | * - cuts content beyond 'ali_len' if nothing relevant there |
---|
237 | * - increments alignment length (stored in DB and parameter) |
---|
238 | */ |
---|
239 | |
---|
240 | GB_ERROR error = NULp; |
---|
241 | int part_len = seq_len; // size that will be written |
---|
242 | if (seq_len > ali_len) { // sequence longer than alignment |
---|
243 | // check whether it can be cutoff w/o loosing anything relevant |
---|
244 | int oversize = seq_len-ali_len; |
---|
245 | int irrelevant = strspn(sequence+ali_len, "-.nN"); // @@@ this has to be different for AA! |
---|
246 | int relevant_oversize = oversize-irrelevant; |
---|
247 | |
---|
248 | part_len = ali_len+relevant_oversize; |
---|
249 | |
---|
250 | if (relevant_oversize) { // got some relevant data behind alignment length -> increase alignment length |
---|
251 | int new_ali_len = part_len; |
---|
252 | GBDATA *gb_main = GB_get_root(gb_data); |
---|
253 | const char *ali_name = GB_read_key_pntr(GB_get_father(gb_data)); |
---|
254 | |
---|
255 | gde_assert(GBT_get_alignment_len(gb_main, ali_name) == ali_len); |
---|
256 | |
---|
257 | error = GBT_set_alignment_len(gb_main, ali_name, new_ali_len); |
---|
258 | ali_len = new_ali_len; |
---|
259 | } |
---|
260 | } |
---|
261 | |
---|
262 | if (!error) { |
---|
263 | if (part_len<seq_len) { |
---|
264 | char *seq_part = ARB_strndup(sequence, part_len); |
---|
265 | error = GB_write_string(gb_data, seq_part); |
---|
266 | free(seq_part); |
---|
267 | } |
---|
268 | else { |
---|
269 | gde_assert(part_len == seq_len); |
---|
270 | error = GB_write_string(gb_data, sequence); |
---|
271 | } |
---|
272 | } |
---|
273 | |
---|
274 | return error; |
---|
275 | } |
---|
276 | |
---|
277 | inline bool isgap(char c) { return GAP::is_std_gap(c); } |
---|
278 | inline bool isTU(char c) { return c == 'T' || c == 'U'; } |
---|
279 | |
---|
280 | inline char eatgaps(const char *seq, int& index) { |
---|
281 | /*! increments index forward to next base (or EOS) |
---|
282 | * @return first gap char seen or 0 |
---|
283 | */ |
---|
284 | if (isgap(seq[index])) { |
---|
285 | char gap = seq[index++]; |
---|
286 | while (isgap(seq[index])) ++index; |
---|
287 | return gap; |
---|
288 | } |
---|
289 | return 0; |
---|
290 | } |
---|
291 | |
---|
292 | static char *fix_aligned_data(const char *old_seq, const char *new_seq, GB_alignment_type ali_type) { |
---|
293 | char *fixed = ARB_strdup(new_seq); |
---|
294 | |
---|
295 | int o = 0; |
---|
296 | int n = 0; |
---|
297 | int f = 0; |
---|
298 | |
---|
299 | bool fixTU = ali_type == GB_AT_RNA || ali_type == GB_AT_DNA; |
---|
300 | char TU = ali_type == GB_AT_RNA ? 'U' : 'T'; |
---|
301 | char tu = tolower(TU); |
---|
302 | |
---|
303 | while (old_seq[o]) { |
---|
304 | char og = eatgaps(old_seq, o); |
---|
305 | char ng = eatgaps(new_seq, n); |
---|
306 | |
---|
307 | if (og && ng && og != ng) memset(fixed+f, og, n-f); |
---|
308 | f = n; |
---|
309 | |
---|
310 | char oc = old_seq[o++]; |
---|
311 | char nc = new_seq[n++]; |
---|
312 | if (!nc) break; |
---|
313 | |
---|
314 | char oC = toupper(oc); |
---|
315 | char nC = toupper(nc); |
---|
316 | |
---|
317 | if (fixTU && isTU(nC) && isTU(oC)) fixed[f] = (oc == oC) ? TU : tu; |
---|
318 | else if (oc != nc && oC == nC) fixed[f] = oc; |
---|
319 | |
---|
320 | f++; |
---|
321 | } |
---|
322 | |
---|
323 | return fixed; |
---|
324 | } |
---|
325 | |
---|
326 | static void export_to_DB(NA_Alignment& dataset, size_t oldnumelements, bool aligned_data) { |
---|
327 | /*! (re-)import data into arb DB |
---|
328 | * @param dataset normally has been read from file (which was created by external tool) |
---|
329 | * @param oldnumelements start index into dataset |
---|
330 | * @param aligned_data if true => only import sequences; expect checksums did not change; repair some minor, unwanted changes (case, T<>U, gaptype) |
---|
331 | */ |
---|
332 | if (dataset.numelements == oldnumelements) return; |
---|
333 | gde_assert(dataset.numelements > oldnumelements); // otherwise this is a noop |
---|
334 | |
---|
335 | GBDATA *gb_main = db_access.gb_main; |
---|
336 | GB_ERROR error = GB_begin_transaction(gb_main); |
---|
337 | const char *ali_name = dataset.alignment_name; |
---|
338 | long maxalignlen = GBT_get_alignment_len(gb_main, ali_name); |
---|
339 | |
---|
340 | if (maxalignlen <= 0 && !error) { |
---|
341 | error = GB_await_error(); |
---|
342 | } |
---|
343 | |
---|
344 | long lotyp = 0; |
---|
345 | if (!error) { |
---|
346 | GB_alignment_type at = GBT_get_alignment_type(gb_main, ali_name); |
---|
347 | |
---|
348 | switch (at) { |
---|
349 | case GB_AT_DNA: lotyp = DNA; break; |
---|
350 | case GB_AT_RNA: lotyp = RNA; break; |
---|
351 | case GB_AT_AA: lotyp = PROTEIN; break; |
---|
352 | case GB_AT_UNKNOWN: lotyp = DNA; break; |
---|
353 | } |
---|
354 | } |
---|
355 | |
---|
356 | unsigned long i; |
---|
357 | const long oldalignlen = maxalignlen; |
---|
358 | bool auto_format = false; |
---|
359 | |
---|
360 | AW_repeated_question overwrite_question; |
---|
361 | AW_repeated_question checksum_change_question; |
---|
362 | |
---|
363 | arb_progress progress("importing", dataset.numelements-oldnumelements+1); // +1 avoids zero-progress |
---|
364 | for (i = oldnumelements; !error && i < dataset.numelements; i++) { |
---|
365 | NA_Sequence *sequ = dataset.element+i; |
---|
366 | int seqtyp, issame = 0; |
---|
367 | |
---|
368 | seqtyp = sequ->elementtype; |
---|
369 | if ((seqtyp == lotyp) || ((seqtyp == DNA) && (lotyp == RNA)) || ((seqtyp == RNA) && (lotyp == DNA))) { |
---|
370 | issame = 1; |
---|
371 | } |
---|
372 | else { |
---|
373 | aw_message(GBS_global_string("Warning: sequence type of species '%s' changed", sequ->short_name)); |
---|
374 | } |
---|
375 | |
---|
376 | if (sequ->tmatrix) { |
---|
377 | for (long j = 0; j < sequ->seqlen; j++) { |
---|
378 | sequ->sequence[j] = (char)sequ->tmatrix[sequ->sequence[j]]; |
---|
379 | } |
---|
380 | sequ->sequence[sequ->seqlen] = 0; |
---|
381 | } |
---|
382 | |
---|
383 | char *savename = GBS_string_2_key(sequ->short_name); |
---|
384 | |
---|
385 | sequ->gb_species = NULp; |
---|
386 | |
---|
387 | const char *new_seq = (const char *)sequ->sequence; |
---|
388 | int new_seq_len = sequ->seqlen; |
---|
389 | |
---|
390 | gde_assert(new_seq[new_seq_len] == 0); |
---|
391 | gde_assert((int)strlen(new_seq) == new_seq_len); |
---|
392 | |
---|
393 | if (!issame) { // save as extended |
---|
394 | GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, savename); |
---|
395 | |
---|
396 | if (!gb_extended) error = GB_await_error(); |
---|
397 | else { |
---|
398 | sequ->gb_species = gb_extended; |
---|
399 | GBDATA *gb_data = GBT_add_data(gb_extended, ali_name, "data", GB_STRING); |
---|
400 | |
---|
401 | if (!gb_data) error = GB_await_error(); |
---|
402 | else { |
---|
403 | error = write_sequence_autoinc_alisize(gb_data, maxalignlen, new_seq, new_seq_len); |
---|
404 | if (new_seq_len<maxalignlen) auto_format = true; |
---|
405 | } |
---|
406 | } |
---|
407 | } |
---|
408 | else { // save as sequence |
---|
409 | GBDATA *gb_species_data = GBT_get_species_data(gb_main); |
---|
410 | if (!gb_species_data) error = GB_await_error(); |
---|
411 | else { |
---|
412 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, savename); |
---|
413 | bool fix_data_changes = false; |
---|
414 | |
---|
415 | GB_topSecurityLevel unsecured(gb_main); |
---|
416 | |
---|
417 | if (gb_species) { // new element that already exists !!!! |
---|
418 | enum ReplaceMode { REPLACE_SPEC = 0, REIMPORT_SEQ = 1, SKIP_IMPORT = 2 } replace_mode; |
---|
419 | |
---|
420 | if (aligned_data) { |
---|
421 | replace_mode = REIMPORT_SEQ; |
---|
422 | } |
---|
423 | else { |
---|
424 | const char *question = |
---|
425 | GBS_global_string("You are (re-)importing a species '%s'.\n" |
---|
426 | "That species already exists in your database!\n" |
---|
427 | "\n" |
---|
428 | "Possible actions:\n" |
---|
429 | "\n" |
---|
430 | " - overwrite existing species (all fields)\n" |
---|
431 | " - overwrite the sequence (does not change other fields)\n" |
---|
432 | " - skip import of the species\n", |
---|
433 | savename); |
---|
434 | |
---|
435 | replace_mode = (ReplaceMode)overwrite_question.get_answer("GDE_overwrite", question, "Overwrite species,Overwrite sequence only,Skip entry", "all", false); |
---|
436 | } |
---|
437 | |
---|
438 | switch (replace_mode) { |
---|
439 | case SKIP_IMPORT: |
---|
440 | gb_species = NULp; |
---|
441 | break; |
---|
442 | case REPLACE_SPEC: |
---|
443 | error = GB_delete(gb_species); |
---|
444 | gb_species = NULp; |
---|
445 | if (error) break; |
---|
446 | // fall-through |
---|
447 | case REIMPORT_SEQ: |
---|
448 | gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, savename, true); |
---|
449 | if (!gb_species) error = GB_await_error(); |
---|
450 | break; |
---|
451 | } |
---|
452 | |
---|
453 | fix_data_changes = replace_mode == REIMPORT_SEQ; |
---|
454 | } |
---|
455 | else { |
---|
456 | if (aligned_data) { |
---|
457 | aw_message(GBS_global_string("Warning: new species '%s' has been created (unexpected; possible naming problems)", savename)); |
---|
458 | } |
---|
459 | gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, savename, true); |
---|
460 | if (!gb_species) error = GB_await_error(); |
---|
461 | } |
---|
462 | |
---|
463 | if (gb_species) { |
---|
464 | gde_assert(!error); |
---|
465 | sequ->gb_species = gb_species; |
---|
466 | |
---|
467 | GBDATA *gb_data = GBT_add_data(gb_species, ali_name, "data", GB_STRING); // does only add if not already existing |
---|
468 | if (!gb_data) error = GB_await_error(); |
---|
469 | else { |
---|
470 | GBDATA *gb_old_data = GBT_find_sequence(gb_species, ali_name); |
---|
471 | bool writeSequence = true; |
---|
472 | if (gb_old_data) { // we already have data -> compare checksums |
---|
473 | const char *old_seq = GB_read_char_pntr(gb_old_data); |
---|
474 | |
---|
475 | long old_checksum = 0; |
---|
476 | long new_checksum = 0; |
---|
477 | bool calcStdChecksum = true; |
---|
478 | if (fix_data_changes) { |
---|
479 | char *new_seq_fixed = fix_aligned_data(old_seq, new_seq, dataset.alignment_type); // apply some fixes to (realigned) data |
---|
480 | |
---|
481 | switch (dataset.alignment_type) { |
---|
482 | case GB_AT_DNA: |
---|
483 | case GB_AT_RNA: { |
---|
484 | char *old_TU = GBS_string_eval(old_seq, ":T=U:t=u"); |
---|
485 | char *new_TU = GBS_string_eval(new_seq_fixed, ":T=U:t=u"); |
---|
486 | |
---|
487 | old_checksum = GBS_checksum(old_TU, 1, "-."); |
---|
488 | new_checksum = GBS_checksum(new_TU, 1, "-."); |
---|
489 | |
---|
490 | free(new_TU); |
---|
491 | free(old_TU); |
---|
492 | break; |
---|
493 | } |
---|
494 | case GB_AT_AA: |
---|
495 | case GB_AT_UNKNOWN: |
---|
496 | old_checksum = GBS_checksum(old_seq, 1, "-."); |
---|
497 | new_checksum = GBS_checksum(new_seq_fixed, 1, "-."); |
---|
498 | break; |
---|
499 | } |
---|
500 | |
---|
501 | if (new_checksum == old_checksum) { // fix succeeded |
---|
502 | free(sequ->sequence); |
---|
503 | sequ->sequence = (NA_Base*)new_seq_fixed; |
---|
504 | new_seq = new_seq_fixed; |
---|
505 | calcStdChecksum = false; |
---|
506 | } |
---|
507 | else { |
---|
508 | fprintf(stderr, "Checksum changed for '%s':\nold='%s'\nfix='%s' (failed)\nnew='%s'\n", savename, old_seq, new_seq_fixed, new_seq); |
---|
509 | free(new_seq_fixed); |
---|
510 | } |
---|
511 | } |
---|
512 | if (calcStdChecksum) { |
---|
513 | old_checksum = GBS_checksum(old_seq, 1, "-."); |
---|
514 | new_checksum = GBS_checksum(new_seq, 1, "-."); |
---|
515 | } |
---|
516 | |
---|
517 | if (old_checksum != new_checksum) { |
---|
518 | if (!fix_data_changes) { // already dumped above |
---|
519 | fprintf(stderr, "Checksum changed for '%s':\nold='%s'\nnew='%s'\n", savename, old_seq, new_seq); |
---|
520 | } |
---|
521 | |
---|
522 | char *question = GBS_global_string_copy("Warning: Sequence checksum of '%s' has changed!\n" |
---|
523 | "This should NOT happen if you aligned sequences!\n" |
---|
524 | "(see console for changes to sequence)", savename); |
---|
525 | |
---|
526 | const char *questionID = aligned_data ? "GDE_accept_aligner_seqchange" : "GDE_accept_seqchange"; |
---|
527 | |
---|
528 | enum ChangeMode { |
---|
529 | ACCEPT_CHANGE = 0, |
---|
530 | REJECT_CHANGE = 1, |
---|
531 | } change_mode = (ChangeMode)checksum_change_question.get_answer(questionID, question, "Accept change,Reject", "all", false); |
---|
532 | |
---|
533 | if (change_mode == REJECT_CHANGE) writeSequence = false; |
---|
534 | |
---|
535 | aw_message(GBS_global_string("Warning: Sequence checksum for '%s' has changed (%s)", |
---|
536 | savename, writeSequence ? "accepted" : "rejected")); |
---|
537 | free(question); |
---|
538 | } |
---|
539 | } |
---|
540 | if (writeSequence) { |
---|
541 | error = write_sequence_autoinc_alisize(gb_data, maxalignlen, new_seq, new_seq_len); |
---|
542 | if (new_seq_len<maxalignlen) auto_format = true; |
---|
543 | } |
---|
544 | } |
---|
545 | } |
---|
546 | } |
---|
547 | } |
---|
548 | free(savename); |
---|
549 | progress.inc_and_check_user_abort(error); |
---|
550 | } |
---|
551 | |
---|
552 | if (!auto_format) auto_format = oldalignlen != maxalignlen; |
---|
553 | |
---|
554 | if (auto_format) { |
---|
555 | if (db_access.format_ali) { |
---|
556 | GB_topSecurityLevel unsecured(gb_main); |
---|
557 | error = db_access.format_ali(gb_main, ali_name); |
---|
558 | } |
---|
559 | } |
---|
560 | |
---|
561 | progress.done(); |
---|
562 | |
---|
563 | GB_end_transaction_show_error(db_access.gb_main, error, aw_message); |
---|
564 | } |
---|
565 | |
---|
566 | static char *preCreateTempfile(const char *name) { |
---|
567 | // creates a tempfile and returns heapcopy of fullpath |
---|
568 | // exits in case of error |
---|
569 | char *fullname = GB_create_tempfile(name); |
---|
570 | |
---|
571 | if (!fullname) aw_message(GBS_global_string("[ARB_GDE]: %s", GB_await_error())); |
---|
572 | return fullname; |
---|
573 | } |
---|
574 | |
---|
575 | void GDE_startaction_cb(AW_window *aw, GmenuItem *gmenuitem) { |
---|
576 | gde_assert(!GB_have_error()); |
---|
577 | |
---|
578 | AW_root *aw_root = aw->get_root(); |
---|
579 | GmenuItem *current_item = gmenuitem; |
---|
580 | |
---|
581 | GapCompression compress = static_cast<GapCompression>(aw_root->awar(AWAR_GDE_COMPRESSION)->read_int()); |
---|
582 | arb_progress progress(current_item->label); |
---|
583 | NA_Alignment DataSet(db_access.gb_main); |
---|
584 | int stop = 0; |
---|
585 | |
---|
586 | if (current_item->numinputs>0) { |
---|
587 | TypeInfo typeinfo = UNKNOWN_TYPEINFO; |
---|
588 | { |
---|
589 | for (int j=0; j<current_item->numinputs; j++) { |
---|
590 | if (j == 0) { typeinfo = current_item->input[j].typeinfo; } |
---|
591 | else if (current_item->input[j].typeinfo != typeinfo) { |
---|
592 | aw_message("'intyped' must be same for all inputs (config error in GDE menu file)"); |
---|
593 | stop = 1; |
---|
594 | } |
---|
595 | } |
---|
596 | } |
---|
597 | gde_assert(typeinfo != UNKNOWN_TYPEINFO); |
---|
598 | |
---|
599 | if (!stop) { |
---|
600 | AP_filter *filter2 = awt_get_filter(agde_filter); |
---|
601 | gde_assert(gmenuitem->seqtype != '-'); // inputs w/o seqtype? impossible! |
---|
602 | { |
---|
603 | GB_ERROR error = awt_invalid_filter(filter2); |
---|
604 | if (error) { |
---|
605 | aw_message(error); |
---|
606 | stop = 1; |
---|
607 | } |
---|
608 | } |
---|
609 | |
---|
610 | if (!stop) { |
---|
611 | GB_transaction ta(DataSet.gb_main); |
---|
612 | progress.subtitle("reading database"); |
---|
613 | |
---|
614 | long cutoff_stop_codon = aw_root->awar(AWAR_GDE_CUTOFF_STOPCODON)->read_int(); |
---|
615 | bool marked = (aw_root->awar(AWAR_GDE_SPECIES)->read_int() != 0); |
---|
616 | |
---|
617 | if (db_access.get_sequences) { |
---|
618 | stop = ReadArbdb2(DataSet, filter2, compress, cutoff_stop_codon, typeinfo); |
---|
619 | } |
---|
620 | else { |
---|
621 | stop = ReadArbdb(DataSet, marked, filter2, compress, cutoff_stop_codon, typeinfo); |
---|
622 | } |
---|
623 | } |
---|
624 | delete filter2; |
---|
625 | } |
---|
626 | |
---|
627 | if (!stop && DataSet.numelements==0) { |
---|
628 | aw_message("no sequences selected"); |
---|
629 | stop = 1; |
---|
630 | } |
---|
631 | } |
---|
632 | |
---|
633 | if (!stop) { |
---|
634 | int select_mode = (current_item->numinputs>0) ? ALL : NONE; |
---|
635 | int pid = getpid(); |
---|
636 | |
---|
637 | static int fileindx = 0; |
---|
638 | for (int j=0; j<current_item->numinputs; j++) { |
---|
639 | GfileFormat& gfile = current_item->input[j]; |
---|
640 | |
---|
641 | char buffer[GBUFSIZ]; |
---|
642 | sprintf(buffer, "gde%d_%d", pid, fileindx++); |
---|
643 | gfile.name = preCreateTempfile(buffer); |
---|
644 | |
---|
645 | switch (gfile.format) { |
---|
646 | case GENBANK: WriteGen (DataSet, gfile.name, select_mode); break; |
---|
647 | case NA_FLAT: WriteNA_Flat(DataSet, gfile.name, select_mode); break; |
---|
648 | case GDE: WriteGDE (DataSet, gfile.name, select_mode); break; |
---|
649 | default: break; |
---|
650 | } |
---|
651 | } |
---|
652 | |
---|
653 | for (int j=0; j<current_item->numoutputs; j++) { |
---|
654 | char buffer[GBUFSIZ]; |
---|
655 | sprintf(buffer, "gde%d_%d", pid, fileindx++); |
---|
656 | current_item->output[j].name = preCreateTempfile(buffer); |
---|
657 | } |
---|
658 | |
---|
659 | // Create the command line for external the function call |
---|
660 | char *Action = ARB_strdup(current_item->method); |
---|
661 | |
---|
662 | while (1) { |
---|
663 | char *oldAction = ARB_strdup(Action); |
---|
664 | |
---|
665 | for (int j=0; j<current_item->numargs; j++) Action = ReplaceArgs(aw_root, Action, gmenuitem, j); |
---|
666 | bool changed = strcmp(oldAction, Action) != 0; |
---|
667 | free(oldAction); |
---|
668 | |
---|
669 | if (!changed) break; |
---|
670 | } |
---|
671 | |
---|
672 | for (int j=0; j<current_item->numinputs; j++) ReplaceFile(Action, current_item->input[j]); |
---|
673 | for (int j=0; j<current_item->numoutputs; j++) ReplaceFile(Action, current_item->output[j]); |
---|
674 | |
---|
675 | if (Find(Action, "$FILTER") == true) { |
---|
676 | char *filter_name = AWT_get_combined_filter_name(aw_root, AWAR_PREFIX_GDE_TEMP); |
---|
677 | ReplaceString(Action, "$FILTER", filter_name); |
---|
678 | free(filter_name); |
---|
679 | } |
---|
680 | |
---|
681 | // call and go... |
---|
682 | progress.subtitle("calling external program"); |
---|
683 | aw_message_if(GBK_system(Action)); |
---|
684 | free(Action); |
---|
685 | |
---|
686 | size_t oldnumelements = DataSet.numelements; |
---|
687 | |
---|
688 | for (int j=0; j<current_item->numoutputs; j++) { |
---|
689 | switch (current_item->output[j].format) { |
---|
690 | case GENBANK: |
---|
691 | case NA_FLAT: |
---|
692 | case GDE: |
---|
693 | LoadData(current_item->output[j].name, DataSet); |
---|
694 | break; |
---|
695 | default: |
---|
696 | gde_assert(0); |
---|
697 | break; |
---|
698 | } |
---|
699 | } |
---|
700 | for (int j=0; j<current_item->numoutputs; j++) { |
---|
701 | if (!current_item->output[j].save) { |
---|
702 | unlink(current_item->output[j].name); |
---|
703 | } |
---|
704 | } |
---|
705 | |
---|
706 | for (int j=0; j<current_item->numinputs; j++) { |
---|
707 | if (!current_item->input[j].save) { |
---|
708 | unlink(current_item->input[j].name); |
---|
709 | } |
---|
710 | } |
---|
711 | |
---|
712 | export_to_DB(DataSet, oldnumelements, current_item->aligned); |
---|
713 | } |
---|
714 | |
---|
715 | gde_assert(!GB_have_error()); |
---|
716 | } |
---|
717 | |
---|
718 | // -------------------------------------------------------------------------------- |
---|
719 | |
---|
720 | #ifdef UNIT_TESTS |
---|
721 | #ifndef TEST_UNIT_H |
---|
722 | #include <test_unit.h> |
---|
723 | #endif |
---|
724 | |
---|
725 | static arb_test::match_expectation fixed_as(GB_alignment_type ali_type, const char *old, const char *expected_fix, const char *aligned) { |
---|
726 | using namespace arb_test; |
---|
727 | char *fixed = fix_aligned_data(old, aligned, ali_type); |
---|
728 | match_expectation e = that(fixed).is_equal_to(expected_fix); |
---|
729 | free(fixed); |
---|
730 | return e; |
---|
731 | } |
---|
732 | |
---|
733 | #define TEST_FIX_ALIGNED(t,o,f,a) TEST_EXPECTATION(fixed_as(t,o,f,a)) |
---|
734 | #define TEST_FIX_ALIGNED__BROKEN(t,o,fw,fg,a) TEST_EXPECTATION__BROKEN(fixed_as(t,o,fw,a), fixed_as(t,o,fg,a)) |
---|
735 | |
---|
736 | void TEST_fix_aligned_data() { |
---|
737 | TEST_FIX_ALIGNED(GB_AT_RNA, |
---|
738 | "...A---CG..G--U.....", // old |
---|
739 | "..AC--G..GU...", // fixed: gaps corrected; T->U |
---|
740 | "--AC--G--GT---"); // aligned |
---|
741 | |
---|
742 | TEST_FIX_ALIGNED(GB_AT_RNA, |
---|
743 | "A---CG..G--U", // old (no gaps at border) |
---|
744 | "--AC--G..GU---", // fixed: gaps corrected; T->U |
---|
745 | "--AC--G--GT---"); // aligned |
---|
746 | |
---|
747 | TEST_FIX_ALIGNED(GB_AT_RNA, |
---|
748 | "...A---CG..G--U.....", // old |
---|
749 | "AC--G..GU", // fixed: gaps corrected; T->U |
---|
750 | "AC--G--GT"); // aligned (no gaps at border) |
---|
751 | |
---|
752 | TEST_FIX_ALIGNED(GB_AT_RNA, |
---|
753 | "A---CG..G--U", // old |
---|
754 | "AC-----GT", // not fixed |
---|
755 | "AC-----GT"); // aligned (bases changed!) |
---|
756 | |
---|
757 | TEST_FIX_ALIGNED(GB_AT_DNA, |
---|
758 | "A---cTUu..G--t", // old |
---|
759 | "AcT--Tt..Gt", // fixed: case restored; U's convert to T's |
---|
760 | "ACT--UT--GU"); // aligned |
---|
761 | |
---|
762 | TEST_FIX_ALIGNED(GB_AT_RNA, |
---|
763 | "A---cTUu..G--t", // old |
---|
764 | "AcU--Uu..Gu", // fixed: case restored; T's convert to U's |
---|
765 | "ACT--UT--GU"); // aligned |
---|
766 | } |
---|
767 | |
---|
768 | #endif // UNIT_TESTS |
---|
769 | |
---|
770 | // -------------------------------------------------------------------------------- |
---|