1 | #include <stdio.h> |
---|
2 | #include <stdlib.h> |
---|
3 | #include <unistd.h> |
---|
4 | #include <string.h> |
---|
5 | // #include <malloc.h> |
---|
6 | #include <arbdb.h> |
---|
7 | #include <arbdbt.h> |
---|
8 | #include <aw_root.hxx> |
---|
9 | #include <aw_device.hxx> |
---|
10 | #include <aw_window.hxx> |
---|
11 | #include <aw_global.hxx> |
---|
12 | #include <awt.hxx> |
---|
13 | #include <awt_tree.hxx> |
---|
14 | #include <inline.h> |
---|
15 | |
---|
16 | #include "awti_export.hxx" |
---|
17 | #include "awti_exp_local.hxx" |
---|
18 | #include "awt_sel_boxes.hxx" |
---|
19 | #include "aw_awars.hxx" |
---|
20 | |
---|
21 | #include "xml.hxx" |
---|
22 | |
---|
23 | #define awte_assert(cond) arb_assert(cond) |
---|
24 | |
---|
25 | using std::string; |
---|
26 | |
---|
27 | // --------------------------------- |
---|
28 | // internal export commands |
---|
29 | |
---|
30 | enum AWTI_EXPORT_CMD { |
---|
31 | // real formats |
---|
32 | AWTI_EXPORT_XML, |
---|
33 | |
---|
34 | AWTI_EXPORT_INVALID, |
---|
35 | AWTI_EXPORT_USING_FORM, // default mode (has to be last entry in enum) |
---|
36 | }; |
---|
37 | |
---|
38 | static const char *internal_export_commands[] = { |
---|
39 | "xml_write", |
---|
40 | NULL |
---|
41 | }; |
---|
42 | |
---|
43 | AWTI_EXPORT_CMD check_internal(const char *command) { |
---|
44 | AWTI_EXPORT_CMD cmd = AWTI_EXPORT_INVALID; |
---|
45 | for (int i = 0; internal_export_commands[i]; ++i) { |
---|
46 | if (strcmp(command, internal_export_commands[i]) == 0) { |
---|
47 | cmd = static_cast<AWTI_EXPORT_CMD>(i); |
---|
48 | } |
---|
49 | } |
---|
50 | return cmd; |
---|
51 | } |
---|
52 | |
---|
53 | // ---------------------- |
---|
54 | // export_format |
---|
55 | |
---|
56 | struct export_format { |
---|
57 | char *system; |
---|
58 | // char *internal_command; |
---|
59 | char *new_format; |
---|
60 | char *suffix; |
---|
61 | char *form; // transformed export expression (part behind 'BEGIN') |
---|
62 | |
---|
63 | enum AWTI_EXPORT_CMD export_mode; |
---|
64 | |
---|
65 | export_format(); |
---|
66 | ~export_format(); |
---|
67 | }; |
---|
68 | |
---|
69 | export_format::export_format(void){ |
---|
70 | memset((char *)this,0,sizeof(export_format)); |
---|
71 | } |
---|
72 | |
---|
73 | export_format::~export_format(void) { |
---|
74 | free(system); |
---|
75 | free(new_format); |
---|
76 | free(suffix); |
---|
77 | free(form); |
---|
78 | } |
---|
79 | |
---|
80 | static GB_ERROR awtc_read_export_format(export_format *efo, const char *file, bool load_complete_form){ |
---|
81 | GB_ERROR error = 0; |
---|
82 | |
---|
83 | if (!file || !file[0]) { |
---|
84 | error = "No export format selected"; |
---|
85 | } |
---|
86 | else { |
---|
87 | char *fullfile = AWT_unfold_path(file,"ARBHOME"); |
---|
88 | FILE *in = fopen(fullfile,"r"); |
---|
89 | |
---|
90 | if (!in) error = GB_export_IO_error("reading export form", fullfile); |
---|
91 | else { |
---|
92 | char *s1, *s2; |
---|
93 | size_t linenumber = 0; |
---|
94 | |
---|
95 | efo->export_mode = AWTI_EXPORT_USING_FORM; // default mode |
---|
96 | |
---|
97 | while (!error && awtc_read_string_pair(in, s1, s2, linenumber)) { |
---|
98 | if (!strcmp(s1, "SYSTEM")) { reassign(efo->system, s2); } |
---|
99 | else if (!strcmp(s1, "PRE_FORMAT")) { reassign(efo->new_format, s2); } |
---|
100 | else if (!strcmp(s1, "SUFFIX")) { reassign(efo->suffix, s2); } |
---|
101 | else if (!strcmp(s1, "INTERNAL")) { |
---|
102 | efo->export_mode = check_internal(s2); |
---|
103 | if (efo->export_mode == AWTI_EXPORT_INVALID) { |
---|
104 | error = GBS_global_string("Unknown INTERNAL command '%s'", s2); |
---|
105 | } |
---|
106 | } |
---|
107 | else if (!strcmp(s1, "BEGIN")) { |
---|
108 | if (efo->export_mode != AWTI_EXPORT_USING_FORM) { |
---|
109 | error = "'BEGIN' not allowed when 'INTERNAL' is used"; |
---|
110 | } |
---|
111 | else { |
---|
112 | break; |
---|
113 | } |
---|
114 | } |
---|
115 | else { |
---|
116 | error = GBS_global_string("Unknown command '%s'", s1); |
---|
117 | } |
---|
118 | |
---|
119 | // add error location |
---|
120 | if (error) error = GBS_global_string("%s in line #%zu", error, linenumber); |
---|
121 | |
---|
122 | free(s2); |
---|
123 | free(s1); |
---|
124 | } |
---|
125 | |
---|
126 | if (!error && load_complete_form && efo->export_mode == AWTI_EXPORT_USING_FORM) { |
---|
127 | // now 'in' points to line behind 'BEGIN' |
---|
128 | char *form = GB_read_fp(in); // read rest of file |
---|
129 | |
---|
130 | // Join lines that end with \ with next line. |
---|
131 | // Replace ' = ' and ':' by '\=' and '\:' |
---|
132 | efo->form = GBS_string_eval(form, "\\\\\n=:\\==\\\\\\=:*=\\*\\=*1:\\:=\\\\\\:", 0); |
---|
133 | if (!efo->form) error = GB_failedTo_error("evaluate part below 'BEGIN'", NULL, GB_await_error()); |
---|
134 | free(form); |
---|
135 | } |
---|
136 | |
---|
137 | // some checks for incompatible commands |
---|
138 | if (!error) { |
---|
139 | if (efo->system && !efo->new_format) error = "Missing 'PRE_FORMAT' (needed by 'SYSTEM')"; |
---|
140 | else if (efo->new_format && !efo->system) error = "Missing 'SYSTEM' (needed by 'PRE_FORMAT')"; |
---|
141 | else if (efo->export_mode != AWTI_EXPORT_USING_FORM) { |
---|
142 | if (efo->system) error = "'SYSTEM' is not allowed together with 'INTERNAL'"; |
---|
143 | if (efo->new_format) error = "'PRE_FORMAT' is not allowed together with 'INTERNAL'"; |
---|
144 | } |
---|
145 | } |
---|
146 | |
---|
147 | error = GB_failedTo_error("read export format", fullfile, error); |
---|
148 | fclose(in); |
---|
149 | } |
---|
150 | free(fullfile); |
---|
151 | } |
---|
152 | |
---|
153 | return error; |
---|
154 | } |
---|
155 | |
---|
156 | // ---------------------------------------- |
---|
157 | // export sequence helper class |
---|
158 | |
---|
159 | typedef GBDATA *(*FindSpeciesFunction)(GBDATA *); |
---|
160 | |
---|
161 | class export_sequence_data { |
---|
162 | GBDATA *last_species_read; |
---|
163 | char *seq; |
---|
164 | size_t len; |
---|
165 | char *error; |
---|
166 | |
---|
167 | GBDATA *gb_main; |
---|
168 | char *ali; |
---|
169 | FindSpeciesFunction find_first, find_next; |
---|
170 | size_t species_count; |
---|
171 | AP_filter *filter; |
---|
172 | bool cut_stop_codon; |
---|
173 | int compress; // 0 = no;1 = vertical gaps; 2 = all gaps; |
---|
174 | |
---|
175 | size_t max_ali_len; // length of alignment |
---|
176 | int *export_column; // list of exported seq data positions |
---|
177 | int columns; // how many columns get exported |
---|
178 | |
---|
179 | GBDATA *single_species; // if != NULL -> first/next only return that species (used to export to multiple files) |
---|
180 | |
---|
181 | public: |
---|
182 | |
---|
183 | export_sequence_data(GBDATA *Gb_Main, bool only_marked, AP_filter* Filter, bool CutStopCodon, int Compress) |
---|
184 | : last_species_read(0) |
---|
185 | , seq(0) |
---|
186 | , len(0), error(0) |
---|
187 | , gb_main(Gb_Main), species_count(size_t(-1)) |
---|
188 | , filter(Filter) |
---|
189 | , cut_stop_codon(CutStopCodon) |
---|
190 | , compress(Compress) |
---|
191 | , export_column(0) |
---|
192 | , columns(0) |
---|
193 | , single_species(0) |
---|
194 | { |
---|
195 | ali = GBT_get_default_alignment(gb_main); |
---|
196 | max_ali_len = GBT_get_alignment_len(gb_main, ali); |
---|
197 | |
---|
198 | if (cut_stop_codon) { |
---|
199 | GB_alignment_type ali_type = GBT_get_alignment_type(gb_main, ali); |
---|
200 | if (ali_type != GB_AT_AA) { |
---|
201 | aw_message("Cutting stop codon makes no sense - ignored"); |
---|
202 | cut_stop_codon = false; |
---|
203 | } |
---|
204 | } |
---|
205 | awte_assert(filter); |
---|
206 | |
---|
207 | if (only_marked) { |
---|
208 | find_first = GBT_first_marked_species; |
---|
209 | find_next = GBT_next_marked_species; |
---|
210 | } |
---|
211 | else { |
---|
212 | find_first = GBT_first_species; |
---|
213 | find_next = GBT_next_species; |
---|
214 | } |
---|
215 | |
---|
216 | if (size_t(filter->filter_len) < max_ali_len) { |
---|
217 | aw_message(GBS_global_string("Warning: Your filter is shorter than the alignment (%li<%zu)", |
---|
218 | filter->filter_len, max_ali_len)); |
---|
219 | max_ali_len = filter->filter_len; |
---|
220 | } |
---|
221 | } |
---|
222 | |
---|
223 | ~export_sequence_data() { |
---|
224 | delete [] export_column; |
---|
225 | delete [] seq; |
---|
226 | free(error); |
---|
227 | free(ali); |
---|
228 | } |
---|
229 | |
---|
230 | const char *getAlignment() const { return ali; } |
---|
231 | |
---|
232 | void set_single_mode(GBDATA *gb_species) { single_species = gb_species; } |
---|
233 | bool in_single_mode() const { return single_species; } |
---|
234 | |
---|
235 | GBDATA *first_species() const { return single_species ? single_species : find_first(gb_main); } |
---|
236 | GBDATA *next_species(GBDATA *gb_prev) const { return single_species ? NULL : find_next(gb_prev); } |
---|
237 | |
---|
238 | const unsigned char *get_seq_data(GBDATA *gb_species, size_t& slen, GB_ERROR& error) const; |
---|
239 | static bool isGap(char c) { return c == '-' || c == '.'; } |
---|
240 | |
---|
241 | size_t count_species() { |
---|
242 | awte_assert(!in_single_mode()); |
---|
243 | if (species_count == size_t(-1)) { |
---|
244 | species_count = 0; |
---|
245 | for (GBDATA *gb_species = find_first(gb_main); gb_species; gb_species = find_next(gb_species)) { |
---|
246 | species_count++; |
---|
247 | } |
---|
248 | } |
---|
249 | return species_count; |
---|
250 | } |
---|
251 | |
---|
252 | GB_ERROR detectVerticalGaps(); |
---|
253 | const char *get_export_sequence(GBDATA *gb_species, size_t& seq_len, GB_ERROR& error); |
---|
254 | }; |
---|
255 | |
---|
256 | const unsigned char *export_sequence_data::get_seq_data(GBDATA *gb_species, size_t& slen, GB_ERROR& err) const { |
---|
257 | const char *data = 0; |
---|
258 | GBDATA *gb_seq = GBT_read_sequence(gb_species, ali); |
---|
259 | |
---|
260 | if (!gb_seq) { |
---|
261 | err = GBS_global_string_copy("No data in alignment '%s' of species '%s'", ali, GBT_read_name(gb_species)); |
---|
262 | slen = 0; |
---|
263 | } |
---|
264 | else { |
---|
265 | data = GB_read_char_pntr(gb_seq); |
---|
266 | slen = GB_read_count(gb_seq); |
---|
267 | err = 0; |
---|
268 | } |
---|
269 | return (const unsigned char *)data; |
---|
270 | } |
---|
271 | |
---|
272 | |
---|
273 | GB_ERROR export_sequence_data::detectVerticalGaps() { |
---|
274 | GB_ERROR err = 0; |
---|
275 | |
---|
276 | awte_assert(!in_single_mode()); |
---|
277 | |
---|
278 | filter->calc_filter_2_seq(); |
---|
279 | if (compress == 1) { // compress vertical gaps! |
---|
280 | int gap_columns = filter->real_len; |
---|
281 | int *gap_column = new int[gap_columns+1]; |
---|
282 | memcpy(gap_column, filter->filterpos_2_seqpos, gap_columns*sizeof(*gap_column)); |
---|
283 | gap_column[gap_columns] = max_ali_len; |
---|
284 | |
---|
285 | size_t spec_count = count_species(); |
---|
286 | size_t stat_update = spec_count/1000; |
---|
287 | |
---|
288 | if (stat_update == 0) stat_update = 1; |
---|
289 | |
---|
290 | size_t count = 0; |
---|
291 | size_t next_stat = count+stat_update; |
---|
292 | |
---|
293 | aw_status("Calculating vertical gaps"); |
---|
294 | aw_status(0.0); |
---|
295 | |
---|
296 | for (GBDATA *gb_species = first_species(); |
---|
297 | gb_species && !err; |
---|
298 | gb_species = next_species(gb_species)) |
---|
299 | { |
---|
300 | size_t slen; |
---|
301 | const unsigned char *sdata = get_seq_data(gb_species, slen, err); |
---|
302 | |
---|
303 | if (!err) { |
---|
304 | int j = 0; |
---|
305 | int i; |
---|
306 | for (i = 0; i<gap_columns; ++i) { |
---|
307 | if (isGap(sdata[gap_column[i]])) { |
---|
308 | gap_column[j++] = gap_column[i]; // keep gap column |
---|
309 | } |
---|
310 | // otherwise it's overwritten |
---|
311 | } |
---|
312 | |
---|
313 | int skipped_columns = i-j; |
---|
314 | gap_columns -= skipped_columns; |
---|
315 | awte_assert(gap_columns >= 0); |
---|
316 | } |
---|
317 | ++count; |
---|
318 | if (count >= next_stat) { |
---|
319 | if (aw_status(count/double(spec_count))) err = "User abort"; |
---|
320 | next_stat = count+stat_update; |
---|
321 | } |
---|
322 | } |
---|
323 | |
---|
324 | aw_status(1.0); |
---|
325 | |
---|
326 | if (!err) { |
---|
327 | columns = filter->real_len - gap_columns; |
---|
328 | export_column = new int[columns]; |
---|
329 | |
---|
330 | int gpos = 0; // index into array of vertical gaps |
---|
331 | int epos = 0; // index into array of exported columns |
---|
332 | int flen = filter->real_len; |
---|
333 | int a; |
---|
334 | for (a = 0; a<flen && gpos<gap_columns; ++a) { |
---|
335 | int fpos = filter->filterpos_2_seqpos[a]; |
---|
336 | if (fpos == gap_column[gpos]) { // only gaps here -> skip column |
---|
337 | gpos++; |
---|
338 | } |
---|
339 | else { // not only gaps -> use column |
---|
340 | awte_assert(fpos<gap_column[gpos]); |
---|
341 | awte_assert(epos < columns); // got more columns than expected |
---|
342 | export_column[epos++] = fpos; |
---|
343 | } |
---|
344 | } |
---|
345 | for (; a<flen; ++a) { |
---|
346 | export_column[epos++] = filter->filterpos_2_seqpos[a]; |
---|
347 | } |
---|
348 | |
---|
349 | awte_assert(epos == columns); |
---|
350 | } |
---|
351 | |
---|
352 | delete [] gap_column; |
---|
353 | delete [] filter->filterpos_2_seqpos; |
---|
354 | filter->filterpos_2_seqpos = 0; |
---|
355 | } |
---|
356 | else { // compress all or none (simply use filter) |
---|
357 | export_column = filter->filterpos_2_seqpos; |
---|
358 | filter->filterpos_2_seqpos = 0; |
---|
359 | columns = filter->real_len; |
---|
360 | } |
---|
361 | |
---|
362 | seq = new char[columns+1]; |
---|
363 | |
---|
364 | return err; |
---|
365 | } |
---|
366 | |
---|
367 | const char *export_sequence_data::get_export_sequence(GBDATA *gb_species, size_t& seq_len, GB_ERROR& err) { |
---|
368 | if (gb_species != last_species_read) { |
---|
369 | freeset(error, 0); |
---|
370 | |
---|
371 | // read + filter a new species |
---|
372 | GB_ERROR curr_error; |
---|
373 | const unsigned char *data = get_seq_data(gb_species, len, curr_error); |
---|
374 | |
---|
375 | if (curr_error) { |
---|
376 | error = strdup(curr_error); |
---|
377 | } |
---|
378 | else { |
---|
379 | int i; |
---|
380 | const uchar *simplify = filter->simplify; |
---|
381 | |
---|
382 | if (cut_stop_codon) { |
---|
383 | const unsigned char *stop_codon = (const unsigned char *)memchr(data, '*', len); |
---|
384 | if (stop_codon) { |
---|
385 | len = stop_codon-data; |
---|
386 | } |
---|
387 | } |
---|
388 | |
---|
389 | if (compress == 2) { // compress all gaps |
---|
390 | int j = 0; |
---|
391 | for (i = 0; i<columns; ++i) { |
---|
392 | size_t seq_pos = export_column[i]; |
---|
393 | if (seq_pos<len) { |
---|
394 | unsigned char c = data[seq_pos]; |
---|
395 | if (!isGap(c)) { |
---|
396 | seq[j++] = simplify[c]; |
---|
397 | } |
---|
398 | } |
---|
399 | } |
---|
400 | seq[j] = 0; |
---|
401 | len = j; |
---|
402 | } |
---|
403 | else { // compress vertical or compress none (simply use filter in both cases) |
---|
404 | for (i = 0; i<columns; ++i) { |
---|
405 | size_t seq_pos = export_column[i]; |
---|
406 | if (seq_pos<len) { |
---|
407 | seq[i] = simplify[data[seq_pos]]; |
---|
408 | } |
---|
409 | else { |
---|
410 | seq[i] = simplify['.']; |
---|
411 | } |
---|
412 | } |
---|
413 | seq[i] = 0; |
---|
414 | len = columns; |
---|
415 | } |
---|
416 | } |
---|
417 | } |
---|
418 | |
---|
419 | err = error; |
---|
420 | if (error) { |
---|
421 | seq_len = 0; |
---|
422 | return 0; |
---|
423 | } |
---|
424 | |
---|
425 | seq_len = len; |
---|
426 | return seq; |
---|
427 | } |
---|
428 | |
---|
429 | // ---------------------------------------- |
---|
430 | // exported_sequence is hooked into ACI temporary (provides result of command 'export_sequence') |
---|
431 | // which is the sequence filtered and compressed according to settings in the export window |
---|
432 | |
---|
433 | static export_sequence_data *esd = 0; |
---|
434 | |
---|
435 | extern "C" const char *exported_sequence(GBDATA *gb_species, size_t *seq_len, GB_ERROR *error) { |
---|
436 | awte_assert(esd); |
---|
437 | return esd->get_export_sequence(gb_species, *seq_len, *error); |
---|
438 | } |
---|
439 | |
---|
440 | static GB_ERROR AWTI_XML_recursive(GBDATA *gbd) { |
---|
441 | GB_ERROR error = 0; |
---|
442 | const char *key_name = GB_read_key_pntr(gbd); |
---|
443 | XML_Tag *tag = 0; |
---|
444 | bool descend = true; |
---|
445 | |
---|
446 | if (strncmp(key_name, "ali_", 4) == 0) |
---|
447 | { |
---|
448 | awte_assert(esd); |
---|
449 | descend = false; // do not descend into alignments |
---|
450 | if (strcmp(esd->getAlignment(), key_name) == 0) { // the wanted alignment |
---|
451 | |
---|
452 | tag = new XML_Tag("ALIGNMENT"); |
---|
453 | tag->add_attribute("name", key_name+4); |
---|
454 | |
---|
455 | GBDATA *gb_species = GB_get_father(gbd); |
---|
456 | size_t len; |
---|
457 | const char *seq = exported_sequence(gb_species, &len, &error); |
---|
458 | |
---|
459 | if (seq) { |
---|
460 | XML_Tag dtag("data"); |
---|
461 | { XML_Text seqText(seq); } |
---|
462 | } |
---|
463 | } |
---|
464 | } |
---|
465 | else { |
---|
466 | tag = new XML_Tag(key_name); |
---|
467 | |
---|
468 | const char *name = GBT_read_char_pntr(gbd, "name"); |
---|
469 | if (name) tag->add_attribute("name", name); |
---|
470 | } |
---|
471 | |
---|
472 | if (descend) { |
---|
473 | switch (GB_read_type(gbd)) { |
---|
474 | case GB_DB: { |
---|
475 | for (GBDATA *gb_child = GB_child(gbd); gb_child && !error; gb_child = GB_nextChild(gb_child)) { |
---|
476 | const char *sub_key_name = GB_read_key_pntr(gb_child); |
---|
477 | |
---|
478 | if (strcmp(sub_key_name, "name") != 0) { // do not recurse for "name" (is handled above) |
---|
479 | error = AWTI_XML_recursive(gb_child); |
---|
480 | } |
---|
481 | } |
---|
482 | break; |
---|
483 | } |
---|
484 | default: { |
---|
485 | char *content = GB_read_as_string(gbd); |
---|
486 | if (content) { |
---|
487 | XML_Text text(content); |
---|
488 | } |
---|
489 | else { |
---|
490 | tag->add_attribute("error", "unsavable"); |
---|
491 | } |
---|
492 | } |
---|
493 | } |
---|
494 | } |
---|
495 | |
---|
496 | delete tag; |
---|
497 | return error; |
---|
498 | } |
---|
499 | |
---|
500 | static GB_ERROR export_species_using_form(FILE *out, GBDATA *gb_species, const char *form) { |
---|
501 | GB_ERROR error = NULL; |
---|
502 | char *pars = GBS_string_eval(" ", form, gb_species); |
---|
503 | if (!pars) error = GB_await_error(); |
---|
504 | else { |
---|
505 | char *p; |
---|
506 | char *o = pars; |
---|
507 | while ( (p = GBS_find_string(o,"$$DELETE_LINE$$",0)) ) { |
---|
508 | char *l,*r; |
---|
509 | for (l = p; l>o; l--) if (*l=='\n') break; |
---|
510 | r = strchr(p,'\n'); if (!r) r = p +strlen(p); |
---|
511 | fwrite(o,1,l-o,out); |
---|
512 | o = r; |
---|
513 | } |
---|
514 | fputs(o, out); |
---|
515 | free(pars); |
---|
516 | } |
---|
517 | return error; |
---|
518 | } |
---|
519 | |
---|
520 | static GB_ERROR AWTI_export_format(AW_root *aw_root, const char *formname, const char *outname, char **resulting_outname) { |
---|
521 | // Exports sequences specified by 'esd' (module global variable) |
---|
522 | // to format specified by 'formname'. |
---|
523 | // |
---|
524 | // if 'outname' == NULL -> export species to temporary file, otherwise to 'outname'. |
---|
525 | // Full path of generated file is returned in 'resulting_outname' |
---|
526 | |
---|
527 | static int export_depth = 0; |
---|
528 | static int export_depth_max = 0; |
---|
529 | export_depth++; |
---|
530 | |
---|
531 | *resulting_outname = 0; |
---|
532 | |
---|
533 | export_format efo; |
---|
534 | GB_ERROR error = awtc_read_export_format(&efo, formname, true); |
---|
535 | |
---|
536 | if (!error) { |
---|
537 | if (!outname) { // if no 'outname' is given -> export to temporary file |
---|
538 | *resulting_outname = GB_create_tempfile(GB_unique_filename("exported", efo.suffix)); |
---|
539 | if (!*resulting_outname) error = GB_await_error(); |
---|
540 | } |
---|
541 | else *resulting_outname = strdup(outname); |
---|
542 | } |
---|
543 | if (!error) { |
---|
544 | if (efo.new_format) { |
---|
545 | // Export data using format 'new_format'. |
---|
546 | // Afterwards convert to wanted format using 'system'. |
---|
547 | |
---|
548 | awte_assert(efo.system); |
---|
549 | |
---|
550 | char *intermediate_export; |
---|
551 | error = AWTI_export_format(aw_root, efo.new_format, NULL, &intermediate_export); |
---|
552 | if (!error) { |
---|
553 | awte_assert(GB_is_privatefile(intermediate_export, GB_FALSE)); |
---|
554 | |
---|
555 | aw_status(GBS_global_string("Converting to %s", efo.suffix)); |
---|
556 | |
---|
557 | char *srt = GBS_global_string_copy("$<=%s:$>=%s", intermediate_export, *resulting_outname); |
---|
558 | char *sys = GBS_string_eval(efo.system, srt, 0); |
---|
559 | |
---|
560 | aw_status(GBS_global_string("exec '%s'", efo.system)); |
---|
561 | error = GB_system(sys); |
---|
562 | |
---|
563 | GB_unlink_or_warn(intermediate_export, &error); |
---|
564 | aw_status(1 - double(export_depth-1)/export_depth_max); |
---|
565 | |
---|
566 | free(sys); |
---|
567 | free(srt); |
---|
568 | } |
---|
569 | free(intermediate_export); |
---|
570 | } |
---|
571 | else { |
---|
572 | FILE *out = fopen(*resulting_outname, "wt"); |
---|
573 | if (!out) error = GB_export_IO_error("writing", *resulting_outname); |
---|
574 | else { |
---|
575 | XML_Document *xml = 0; |
---|
576 | |
---|
577 | aw_status("Saving data"); |
---|
578 | export_depth_max = export_depth; |
---|
579 | |
---|
580 | if (efo.export_mode == AWTI_EXPORT_XML) { |
---|
581 | xml = new XML_Document("ARB_SEQ_EXPORT", "arb_seq_export.dtd", out); |
---|
582 | { |
---|
583 | char *db_name = aw_root->awar(AWAR_DB_NAME)->read_string(); |
---|
584 | xml->add_attribute("database", db_name); |
---|
585 | free(db_name); |
---|
586 | } |
---|
587 | xml->add_attribute("export_date", GB_date_string()); |
---|
588 | { |
---|
589 | char *fulldtd = AWT_unfold_path("lib/dtd", "ARBHOME"); |
---|
590 | XML_Comment rem(GBS_global_string("There's a basic version of ARB_seq_export.dtd in %s\n" |
---|
591 | "but you might need to expand it by yourself,\n" |
---|
592 | "because the ARB-database may contain any kind of fields.", |
---|
593 | fulldtd)); |
---|
594 | free(fulldtd); |
---|
595 | } |
---|
596 | } |
---|
597 | |
---|
598 | int allCount = 0; |
---|
599 | for (GBDATA *gb_species = esd->first_species(); |
---|
600 | gb_species && !error; |
---|
601 | gb_species = esd->next_species(gb_species)) |
---|
602 | { |
---|
603 | allCount++; |
---|
604 | } |
---|
605 | |
---|
606 | int count = 0; |
---|
607 | for (GBDATA *gb_species = esd->first_species(); |
---|
608 | gb_species && !error; |
---|
609 | gb_species = esd->next_species(gb_species)) |
---|
610 | { |
---|
611 | aw_status(GBS_global_string("Saving species %i/%i", ++count, allCount)); |
---|
612 | switch (efo.export_mode) { |
---|
613 | case AWTI_EXPORT_USING_FORM: |
---|
614 | error = export_species_using_form(out, gb_species, efo.form); |
---|
615 | break; |
---|
616 | |
---|
617 | case AWTI_EXPORT_XML: |
---|
618 | error = AWTI_XML_recursive(gb_species); |
---|
619 | break; |
---|
620 | |
---|
621 | case AWTI_EXPORT_INVALID: |
---|
622 | gb_assert(0); |
---|
623 | break; |
---|
624 | } |
---|
625 | aw_status(double(count)/allCount); |
---|
626 | } |
---|
627 | |
---|
628 | delete xml; |
---|
629 | fclose(out); |
---|
630 | } |
---|
631 | } |
---|
632 | } |
---|
633 | |
---|
634 | if (error) { |
---|
635 | if (*resulting_outname) { |
---|
636 | GB_unlink_or_warn(*resulting_outname, NULL); |
---|
637 | freeset(*resulting_outname, 0); |
---|
638 | } |
---|
639 | } |
---|
640 | |
---|
641 | export_depth--; |
---|
642 | |
---|
643 | return error; |
---|
644 | } |
---|
645 | |
---|
646 | static GB_ERROR AWTI_export_format_multiple(AW_root *aw_root, const char *formname, const char *outname, bool multiple, char **resulting_outname) { |
---|
647 | GB_ERROR error = 0; |
---|
648 | |
---|
649 | aw_status(0.); |
---|
650 | |
---|
651 | if (multiple) { |
---|
652 | char *path, *name, *suffix; |
---|
653 | GB_split_full_path(outname, &path, NULL, &name, &suffix); |
---|
654 | |
---|
655 | *resulting_outname = NULL; |
---|
656 | |
---|
657 | size_t species_count = esd->count_species(); |
---|
658 | size_t count = 0; |
---|
659 | |
---|
660 | for (GBDATA *gb_species = esd->first_species(); |
---|
661 | gb_species && !error; |
---|
662 | gb_species = esd->next_species(gb_species)) |
---|
663 | { |
---|
664 | const char *species_name = GBT_read_char_pntr(gb_species, "name"); |
---|
665 | if (!species_name) error = "Can't export unnamed species"; |
---|
666 | else { |
---|
667 | const char *fname = GB_append_suffix(GBS_global_string("%s_%s", name, species_name), suffix); |
---|
668 | aw_status(fname); |
---|
669 | |
---|
670 | char *oname = strdup(GB_concat_path(path, fname)); |
---|
671 | char *res_oname; |
---|
672 | |
---|
673 | esd->set_single_mode(gb_species); // means: only export 'gb_species' |
---|
674 | error = AWTI_export_format(aw_root, formname, oname, &res_oname); |
---|
675 | esd->set_single_mode(NULL); |
---|
676 | |
---|
677 | aw_status(++count/double(species_count)); |
---|
678 | |
---|
679 | if (!*resulting_outname || // not set yet |
---|
680 | (res_oname && strcmp(*resulting_outname, res_oname)>0)) // or smaller than set one |
---|
681 | { |
---|
682 | reassign(*resulting_outname, res_oname); |
---|
683 | } |
---|
684 | |
---|
685 | free(res_oname); |
---|
686 | free(oname); |
---|
687 | } |
---|
688 | } |
---|
689 | } |
---|
690 | else { |
---|
691 | error = AWTI_export_format(aw_root, formname, outname, resulting_outname); |
---|
692 | } |
---|
693 | |
---|
694 | return error; |
---|
695 | } |
---|
696 | |
---|
697 | // ---------------------------------------- |
---|
698 | |
---|
699 | void AWTC_export_go_cb(AW_window *aww, AW_CL cl_gb_main, AW_CL res_from_awt_create_select_filter) { |
---|
700 | GBDATA *gb_main = (GBDATA*)cl_gb_main; |
---|
701 | GB_transaction dummy(gb_main); |
---|
702 | adfiltercbstruct *acbs = (adfiltercbstruct*)res_from_awt_create_select_filter; |
---|
703 | |
---|
704 | aw_openstatus("Exporting data"); |
---|
705 | |
---|
706 | AW_root *awr = aww->get_root(); |
---|
707 | char *formname = awr->awar(AWAR_EXPORT_FORM"/file_name")->read_string(); |
---|
708 | int multiple = awr->awar(AWAR_EXPORT_MULTIPLE_FILES)->read_int(); |
---|
709 | int marked_only = awr->awar(AWAR_EXPORT_MARKED)->read_int(); |
---|
710 | int cut_stop_codon = awr->awar(AWAR_EXPORT_CUTSTOP)->read_int(); |
---|
711 | int compress = awr->awar(AWAR_EXPORT_COMPRESS)->read_int(); |
---|
712 | GB_ERROR error = 0; |
---|
713 | |
---|
714 | char *outname = awr->awar(AWAR_EXPORT_FILE"/file_name")->read_string(); |
---|
715 | char *real_outname = 0; // with suffix (name of first file if multiple) |
---|
716 | |
---|
717 | AP_filter *filter = awt_get_filter(awr, acbs); |
---|
718 | esd = new export_sequence_data(gb_main, marked_only, filter, cut_stop_codon, compress); |
---|
719 | GB_set_export_sequence_hook(exported_sequence); |
---|
720 | |
---|
721 | error = esd->detectVerticalGaps(); |
---|
722 | if (!error) { |
---|
723 | error = AWTI_export_format_multiple(awr, formname, outname, multiple, &real_outname); |
---|
724 | } |
---|
725 | GB_set_export_sequence_hook(0); |
---|
726 | delete esd; |
---|
727 | esd = 0; |
---|
728 | |
---|
729 | aw_closestatus(); |
---|
730 | if (error) aw_message(error); |
---|
731 | |
---|
732 | if (real_outname) awr->awar(AWAR_EXPORT_FILE"/file_name")->write_string(real_outname); |
---|
733 | |
---|
734 | awt_refresh_selection_box(awr, AWAR_EXPORT_FILE); |
---|
735 | |
---|
736 | free(real_outname); |
---|
737 | free(outname); |
---|
738 | free(formname); |
---|
739 | |
---|
740 | } |
---|
741 | |
---|
742 | void AWTC_create_export_awars(AW_root *awr, AW_default def) { |
---|
743 | aw_create_selection_box_awars(awr, AWAR_EXPORT_FORM, GB_path_in_ARBLIB("export", NULL), ".eft", "*", AW_ROOT_DEFAULT, true); |
---|
744 | aw_create_selection_box_awars(awr, AWAR_EXPORT_FILE, "", "", "noname"); |
---|
745 | |
---|
746 | awr->awar_string(AWAR_EXPORT_ALI,"16s",def); |
---|
747 | awr->awar_int(AWAR_EXPORT_MULTIPLE_FILES, 0, def); |
---|
748 | |
---|
749 | awr->awar_int(AWAR_EXPORT_MARKED, 1, def); // marked only |
---|
750 | awr->awar_int(AWAR_EXPORT_COMPRESS, 1, def); // vertical gaps |
---|
751 | awr->awar_string(AWAR_EXPORT_FILTER_NAME, "none", def); // no default filter |
---|
752 | awr->awar_string(AWAR_EXPORT_FILTER_FILTER, "", def); |
---|
753 | AW_awar *awar_ali = awr->awar_string(AWAR_EXPORT_FILTER_ALI, "", def); |
---|
754 | awar_ali->map("presets/use"); // map to default alignment |
---|
755 | |
---|
756 | awr->awar_int(AWAR_EXPORT_CUTSTOP, 0, def); // dont cut stop-codon |
---|
757 | } |
---|
758 | |
---|
759 | static char *get_format_default_suffix(const char *formname, GB_ERROR& error) { |
---|
760 | export_format efs; |
---|
761 | error = awtc_read_export_format(&efs, formname, false); |
---|
762 | |
---|
763 | if (!error && efs.suffix) return strdup(efs.suffix); |
---|
764 | return NULL; |
---|
765 | } |
---|
766 | |
---|
767 | static void export_form_changed_cb(AW_root *aw_root) { |
---|
768 | // called when selected export format changes |
---|
769 | // -> automatically correct filename suffix |
---|
770 | // -> restrict view to suffix |
---|
771 | |
---|
772 | static char *previous_suffix = 0; |
---|
773 | |
---|
774 | GB_ERROR error = 0; |
---|
775 | AW_awar *awar_form = aw_root->awar(AWAR_EXPORT_FORM"/file_name"); |
---|
776 | char *current_format = awar_form->read_string(); |
---|
777 | |
---|
778 | if (current_format) { |
---|
779 | if (GB_is_regularfile(current_format)) { |
---|
780 | char *current_suffix = get_format_default_suffix(current_format, error); |
---|
781 | if (!error) { |
---|
782 | // Note: current_suffix may be NULL.. is that ok? |
---|
783 | |
---|
784 | // modify export filename and view |
---|
785 | |
---|
786 | AW_awar *awar_filter = aw_root->awar(AWAR_EXPORT_FILE"/filter"); |
---|
787 | AW_awar *awar_export = aw_root->awar(AWAR_EXPORT_FILE"/file_name"); |
---|
788 | |
---|
789 | awar_filter->write_string(""); |
---|
790 | |
---|
791 | char *exportname = awar_export->read_string(); |
---|
792 | |
---|
793 | { |
---|
794 | char *path, *nameOnly, *suffix; |
---|
795 | GB_split_full_path(exportname, &path, NULL, &nameOnly, &suffix); |
---|
796 | |
---|
797 | if (suffix) { |
---|
798 | if (previous_suffix && ARB_stricmp(suffix, previous_suffix) == 0) freedup(suffix, current_suffix); // remove old suffix |
---|
799 | else freedup(suffix, GB_append_suffix(suffix, current_suffix)); // don't know existing suffix -> append |
---|
800 | } |
---|
801 | else suffix = strdup(current_suffix); |
---|
802 | |
---|
803 | const char *new_exportname = GB_concat_path(path, GB_append_suffix(nameOnly, suffix)); |
---|
804 | if (new_exportname) awar_export->write_string(new_exportname); |
---|
805 | |
---|
806 | free(suffix); |
---|
807 | free(nameOnly); |
---|
808 | free(path); |
---|
809 | } |
---|
810 | |
---|
811 | free(exportname); |
---|
812 | |
---|
813 | awar_filter->write_string(current_suffix); |
---|
814 | |
---|
815 | // remember last applied suffix |
---|
816 | reassign(previous_suffix, current_suffix); |
---|
817 | } |
---|
818 | |
---|
819 | free(current_suffix); |
---|
820 | } |
---|
821 | free(current_format); |
---|
822 | } |
---|
823 | |
---|
824 | if (error) aw_message(error); |
---|
825 | } |
---|
826 | |
---|
827 | AW_window *open_AWTC_export_window(AW_root *awr,GBDATA *gb_main) |
---|
828 | { |
---|
829 | static AW_window_simple *aws = 0; |
---|
830 | if (aws) return aws; |
---|
831 | |
---|
832 | AWTC_create_export_awars(awr, AW_ROOT_DEFAULT); |
---|
833 | |
---|
834 | aws = new AW_window_simple; |
---|
835 | |
---|
836 | aws->init( awr, "ARB_EXPORT", "ARB EXPORT"); |
---|
837 | aws->load_xfig("awt/export_db.fig"); |
---|
838 | |
---|
839 | aws->at("close"); |
---|
840 | aws->callback(AW_POPDOWN); |
---|
841 | aws->create_button("CLOSE", "CLOSE","C"); |
---|
842 | |
---|
843 | aws->at("help"); |
---|
844 | aws->callback(AW_POPUP_HELP,(AW_CL)"arb_export.hlp"); |
---|
845 | aws->create_button("HELP", "HELP","H"); |
---|
846 | |
---|
847 | awt_create_selection_box(aws,AWAR_EXPORT_FILE,"f" ); |
---|
848 | |
---|
849 | awt_create_selection_box(aws,AWAR_EXPORT_FORM,"","ARBHOME", false ); |
---|
850 | |
---|
851 | aws->get_root()->awar(AWAR_EXPORT_FORM"/file_name")->add_callback(export_form_changed_cb); |
---|
852 | |
---|
853 | aws->at("allmarked"); |
---|
854 | aws->create_option_menu(AWAR_EXPORT_MARKED); |
---|
855 | aws->insert_option("all", "a", 0); |
---|
856 | aws->insert_option("marked", "m", 1); |
---|
857 | aws->update_option_menu(); |
---|
858 | |
---|
859 | aws->at("compress"); |
---|
860 | aws->create_option_menu(AWAR_EXPORT_COMPRESS); |
---|
861 | aws->insert_option("no", "n", 0); |
---|
862 | aws->insert_option("vertical gaps", "v", 1); |
---|
863 | aws->insert_option("all gaps", "a", 2); |
---|
864 | aws->update_option_menu(); |
---|
865 | |
---|
866 | aws->at("seqfilter"); |
---|
867 | adfiltercbstruct *filtercd = awt_create_select_filter(aws->get_root(), gb_main, AWAR_EXPORT_FILTER_NAME); |
---|
868 | aws->callback(AW_POPUP, (AW_CL)awt_create_select_filter_win, (AW_CL)filtercd); |
---|
869 | aws->create_button("SELECT_FILTER", AWAR_EXPORT_FILTER_NAME); |
---|
870 | |
---|
871 | aws->at("cutstop"); |
---|
872 | aws->create_toggle(AWAR_EXPORT_CUTSTOP); |
---|
873 | |
---|
874 | aws->at("multiple"); |
---|
875 | aws->create_toggle(AWAR_EXPORT_MULTIPLE_FILES); |
---|
876 | |
---|
877 | aws->at("go"); |
---|
878 | aws->highlight(); |
---|
879 | aws->callback(AWTC_export_go_cb,(AW_CL)gb_main, (AW_CL)filtercd); |
---|
880 | aws->create_button("GO", "GO","G"); |
---|
881 | |
---|
882 | return aws; |
---|
883 | } |
---|