| 1 | #include "GDE_extglob.h" |
|---|
| 2 | #include "GDE_awars.h" |
|---|
| 3 | |
|---|
| 4 | #include <awt_filter.hxx> |
|---|
| 5 | #include <aw_window.hxx> |
|---|
| 6 | #include <aw_root.hxx> |
|---|
| 7 | #include <aw_question.hxx> |
|---|
| 8 | #include <aw_awar.hxx> |
|---|
| 9 | #include <aw_msg.hxx> |
|---|
| 10 | #include <aw_file.hxx> |
|---|
| 11 | #include <AP_filter.hxx> |
|---|
| 12 | #include <arb_progress.h> |
|---|
| 13 | #include <arb_strbuf.h> |
|---|
| 14 | #include <arb_global_defs.h> |
|---|
| 15 | |
|---|
| 16 | #include <set> |
|---|
| 17 | #include <string> |
|---|
| 18 | |
|---|
| 19 | #include <unistd.h> |
|---|
| 20 | #include <macros.hxx> |
|---|
| 21 | #include <xcmd.hxx> |
|---|
| 22 | |
|---|
| 23 | using namespace std; |
|---|
| 24 | |
|---|
| 25 | extern adfiltercbstruct *agde_filter; |
|---|
| 26 | |
|---|
| 27 | /* |
|---|
| 28 | ReplaceArgs(): |
|---|
| 29 | Replace all command line arguments with the appropriate values |
|---|
| 30 | stored for the chosen menu item. |
|---|
| 31 | |
|---|
| 32 | Copyright (c) 1989-1990, University of Illinois board of trustees. All |
|---|
| 33 | rights reserved. Written by Steven Smith at the Center for Prokaryote Genome |
|---|
| 34 | Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. |
|---|
| 35 | Carl Woese. |
|---|
| 36 | |
|---|
| 37 | Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. |
|---|
| 38 | All rights reserved. |
|---|
| 39 | |
|---|
| 40 | */ |
|---|
| 41 | |
|---|
| 42 | |
|---|
| 43 | static char *ReplaceArgs(AW_root *awr, char *Action, GmenuItem *gmenuitem, int number) { |
|---|
| 44 | /* |
|---|
| 45 | * The basic idea is to replace all of the symbols in the method |
|---|
| 46 | * string with the values picked in the dialog box. The method |
|---|
| 47 | * is the general command line structure. All arguments have two |
|---|
| 48 | * parts : a label and a value. Values are the |
|---|
| 49 | * associated arguments that some flags require. All symbols that |
|---|
| 50 | * require argvalue replacement should have a '$' infront of the symbol |
|---|
| 51 | * name in the itemmethod definition. |
|---|
| 52 | * |
|---|
| 53 | * If '$symbol' is prefixed by '!' ARB_GDE does a label replacement, i.e. insert |
|---|
| 54 | * the value visible in GUI. Only works for argchoice arguments! |
|---|
| 55 | * This is intended for informational use (e.g. to write used settings |
|---|
| 56 | * into the comment of a generated tree). |
|---|
| 57 | * |
|---|
| 58 | * An example command line replacement would be: |
|---|
| 59 | * |
|---|
| 60 | * itemmethod=> "lpr -P $arg1 $arg2" |
|---|
| 61 | * |
|---|
| 62 | * arglabel arg1=> "To printer?" |
|---|
| 63 | * argvalue arg1=> "lw" |
|---|
| 64 | * |
|---|
| 65 | * arglabel arg2=> "File name?" |
|---|
| 66 | * argvalue arg2=> "foobar" |
|---|
| 67 | * |
|---|
| 68 | * final command line: |
|---|
| 69 | * |
|---|
| 70 | * lpr -P lw foobar |
|---|
| 71 | * |
|---|
| 72 | */ |
|---|
| 73 | |
|---|
| 74 | char *textvalue = NULp; |
|---|
| 75 | const char *labelvalue = NULp; |
|---|
| 76 | |
|---|
| 77 | const GmenuItemArg& currArg = gmenuitem->arg[number]; |
|---|
| 78 | |
|---|
| 79 | const char *symbol = currArg.symbol; |
|---|
| 80 | int type = currArg.type; |
|---|
| 81 | |
|---|
| 82 | if (type == SLIDER) { |
|---|
| 83 | char *awarname = GDE_makeawarname(gmenuitem, number); |
|---|
| 84 | textvalue = awr->awar(awarname)->read_as_string(); |
|---|
| 85 | free(awarname); |
|---|
| 86 | } |
|---|
| 87 | else if (type == FILE_SELECTOR) { |
|---|
| 88 | char *awar_base = GDE_maketmpawarname(gmenuitem, number); |
|---|
| 89 | textvalue = AW_get_selected_fullname(awr, awar_base); |
|---|
| 90 | free(awar_base); |
|---|
| 91 | } |
|---|
| 92 | else if (type == CHOOSER || |
|---|
| 93 | type == CHOICE_TREE || |
|---|
| 94 | type == CHOICE_SAI || |
|---|
| 95 | type == CHOICE_MENU || |
|---|
| 96 | type == CHOICE_LIST || |
|---|
| 97 | type == CHOICE_WEIGHTS || |
|---|
| 98 | type == TEXTFIELD) |
|---|
| 99 | { |
|---|
| 100 | char *awarname = GDE_makeawarname(gmenuitem, number); |
|---|
| 101 | textvalue = awr->awar(awarname)->read_string(); |
|---|
| 102 | |
|---|
| 103 | if (currArg.choice) { |
|---|
| 104 | for (int c = 0; c<currArg.numchoices && !labelvalue; ++c) { |
|---|
| 105 | GargChoice& choice = currArg.choice[c]; |
|---|
| 106 | if (choice.method) { |
|---|
| 107 | if (strcmp(choice.method, textvalue) == 0) { |
|---|
| 108 | labelvalue = choice.label; |
|---|
| 109 | } |
|---|
| 110 | } |
|---|
| 111 | } |
|---|
| 112 | } |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | if (!textvalue) ARB_calloc(textvalue, 1); |
|---|
| 116 | if (!symbol) symbol = ""; |
|---|
| 117 | |
|---|
| 118 | set<string>warned_about; |
|---|
| 119 | int conversion_warning = 0; |
|---|
| 120 | |
|---|
| 121 | const int symLen = strlen(symbol); |
|---|
| 122 | int actlen = strlen(Action); |
|---|
| 123 | |
|---|
| 124 | for (int i, j = 0; (i=Find2(Action+j, symbol)) != -1;) { |
|---|
| 125 | i += j; |
|---|
| 126 | ++j; |
|---|
| 127 | if (i>0 && Action[i-1] == '$') { |
|---|
| 128 | const char *replaceBy = textvalue; |
|---|
| 129 | int skip = 1; // skip '$' |
|---|
| 130 | |
|---|
| 131 | if (i>1 && Action[i-2] == '!') { // use label (if available) |
|---|
| 132 | if (labelvalue) { |
|---|
| 133 | replaceBy = labelvalue; |
|---|
| 134 | skip = 2; // skip '!$' |
|---|
| 135 | } |
|---|
| 136 | else { |
|---|
| 137 | aw_message(GBS_global_string("[ARB_GDE]: Cannot access label of '%s'\n", symbol)); |
|---|
| 138 | return NULp; // @@@ ignores resources (should only occur during development) |
|---|
| 139 | } |
|---|
| 140 | } |
|---|
| 141 | |
|---|
| 142 | int repLen = strlen(replaceBy); |
|---|
| 143 | int remLen = skip+symLen; |
|---|
| 144 | |
|---|
| 145 | GBS_strstruct temp(actlen-remLen+repLen+1); |
|---|
| 146 | |
|---|
| 147 | temp.ncat(Action, i-skip); |
|---|
| 148 | temp.ncat(replaceBy, repLen); |
|---|
| 149 | temp.cat(Action+i+symLen); |
|---|
| 150 | |
|---|
| 151 | actlen = temp.get_position(); |
|---|
| 152 | freeset(Action, temp.release()); |
|---|
| 153 | } |
|---|
| 154 | else { |
|---|
| 155 | if (warned_about.find(symbol) == warned_about.end()) { |
|---|
| 156 | fprintf(stderr, |
|---|
| 157 | "old arb version converted '%s' to '%s' (now only '$%s' is converted)\n", |
|---|
| 158 | symbol, textvalue, symbol); |
|---|
| 159 | conversion_warning++; |
|---|
| 160 | warned_about.insert(symbol); |
|---|
| 161 | } |
|---|
| 162 | } |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | if (conversion_warning) { |
|---|
| 166 | fprintf(stderr, |
|---|
| 167 | "Conversion warnings occurred in Action:\n'%s'\n", |
|---|
| 168 | Action); |
|---|
| 169 | } |
|---|
| 170 | |
|---|
| 171 | free(textvalue); |
|---|
| 172 | return Action; |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | static void ReplaceString(char*& Action, const char *olds, const char *news) { |
|---|
| 176 | size_t oldslen = strlen(olds); |
|---|
| 177 | size_t newslen = strlen(news); |
|---|
| 178 | size_t actlen = strlen(Action); |
|---|
| 179 | |
|---|
| 180 | int i; |
|---|
| 181 | for (; (i=Find2(Action, olds)) != -1;) { |
|---|
| 182 | GBS_strstruct temp(actlen-oldslen+newslen+1); |
|---|
| 183 | |
|---|
| 184 | temp.ncat(Action, i); |
|---|
| 185 | temp.ncat(news, newslen); |
|---|
| 186 | temp.cat(Action+i+oldslen); |
|---|
| 187 | |
|---|
| 188 | actlen = temp.get_position(); |
|---|
| 189 | freeset(Action, temp.release()); |
|---|
| 190 | } |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | static void ReplaceFile(char*& Action, GfileFormat file) { |
|---|
| 194 | ReplaceString(Action, file.symbol, file.name); |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | static bool FindAndRemoveAsyncAndSign(char*& Action) { |
|---|
| 198 | // remove any trailing '&' from the configured command. |
|---|
| 199 | // returns true if removed. |
|---|
| 200 | |
|---|
| 201 | bool hadAndSign = false; |
|---|
| 202 | char *andSign = strrchr(Action, 0); |
|---|
| 203 | while (andSign>Action) { |
|---|
| 204 | --andSign; |
|---|
| 205 | if (!isspace(andSign[0])) { |
|---|
| 206 | // looking at last non-space char |
|---|
| 207 | if (andSign[0] == '&') { |
|---|
| 208 | hadAndSign = true; |
|---|
| 209 | andSign[0] = 0; // truncate |
|---|
| 210 | } |
|---|
| 211 | break; |
|---|
| 212 | } |
|---|
| 213 | } |
|---|
| 214 | return hadAndSign; |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | static void GDE_freesequ(NA_Sequence *sequ) { |
|---|
| 218 | if (sequ) { |
|---|
| 219 | freenull(sequ->comments); |
|---|
| 220 | freenull(sequ->baggage); |
|---|
| 221 | freenull(sequ->sequence); |
|---|
| 222 | } |
|---|
| 223 | } |
|---|
| 224 | |
|---|
| 225 | NA_Alignment::NA_Alignment(GBDATA *gb_main_) : |
|---|
| 226 | id(NULp), |
|---|
| 227 | description(NULp), |
|---|
| 228 | authority(NULp), |
|---|
| 229 | numelements(0), |
|---|
| 230 | maxnumelements(0), |
|---|
| 231 | maxlen(0), |
|---|
| 232 | rel_offset(0), |
|---|
| 233 | element(NULp), |
|---|
| 234 | numgroups(0), |
|---|
| 235 | group(NULp), |
|---|
| 236 | format(0), |
|---|
| 237 | gb_main(gb_main_) |
|---|
| 238 | { |
|---|
| 239 | GB_transaction ta(gb_main); |
|---|
| 240 | alignment_name = GBT_get_default_alignment(gb_main); |
|---|
| 241 | alignment_type = GBT_get_alignment_type(gb_main, alignment_name); |
|---|
| 242 | } |
|---|
| 243 | |
|---|
| 244 | NA_Alignment::~NA_Alignment() { |
|---|
| 245 | free(id); |
|---|
| 246 | free(description); |
|---|
| 247 | free(authority); |
|---|
| 248 | free(alignment_name); |
|---|
| 249 | |
|---|
| 250 | for (unsigned long i=0; i<numelements; i++) { |
|---|
| 251 | GDE_freesequ(element+i); |
|---|
| 252 | } |
|---|
| 253 | } |
|---|
| 254 | |
|---|
| 255 | static GB_ERROR write_sequence_autoinc_alisize(GBDATA *gb_data, long& ali_len, const char *sequence, int seq_len) { |
|---|
| 256 | /* writes sequence data. |
|---|
| 257 | * Specials things done: |
|---|
| 258 | * - cuts content beyond 'ali_len' if nothing relevant there |
|---|
| 259 | * - increments alignment length (stored in DB and parameter) |
|---|
| 260 | */ |
|---|
| 261 | |
|---|
| 262 | GB_ERROR error = NULp; |
|---|
| 263 | int part_len = seq_len; // size that will be written |
|---|
| 264 | if (seq_len > ali_len) { // sequence longer than alignment |
|---|
| 265 | // check whether it can be cutoff w/o loosing anything relevant |
|---|
| 266 | int oversize = seq_len-ali_len; |
|---|
| 267 | int irrelevant = strspn(sequence+ali_len, "-.nN"); // @@@ this has to be different for AA! |
|---|
| 268 | int relevant_oversize = oversize-irrelevant; |
|---|
| 269 | |
|---|
| 270 | part_len = ali_len+relevant_oversize; |
|---|
| 271 | |
|---|
| 272 | if (relevant_oversize) { // got some relevant data behind alignment length -> increase alignment length |
|---|
| 273 | int new_ali_len = part_len; |
|---|
| 274 | GBDATA *gb_main = GB_get_root(gb_data); |
|---|
| 275 | const char *ali_name = GB_read_key_pntr(GB_get_father(gb_data)); |
|---|
| 276 | |
|---|
| 277 | gde_assert(GBT_get_alignment_len(gb_main, ali_name) == ali_len); |
|---|
| 278 | |
|---|
| 279 | error = GBT_set_alignment_len(gb_main, ali_name, new_ali_len); |
|---|
| 280 | ali_len = new_ali_len; |
|---|
| 281 | } |
|---|
| 282 | } |
|---|
| 283 | |
|---|
| 284 | if (!error) { |
|---|
| 285 | if (part_len<seq_len) { |
|---|
| 286 | char *seq_part = ARB_strndup(sequence, part_len); |
|---|
| 287 | error = GB_write_string(gb_data, seq_part); |
|---|
| 288 | free(seq_part); |
|---|
| 289 | } |
|---|
| 290 | else { |
|---|
| 291 | gde_assert(part_len == seq_len); |
|---|
| 292 | error = GB_write_string(gb_data, sequence); |
|---|
| 293 | } |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | return error; |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | inline bool isgap(char c) { return GAP::is_std_gap(c); } |
|---|
| 300 | inline bool isTU(char c) { return c == 'T' || c == 'U'; } |
|---|
| 301 | |
|---|
| 302 | inline char eatgaps(const char *seq, int& index) { |
|---|
| 303 | /*! increments index forward to next base (or EOS) |
|---|
| 304 | * @return first gap char seen or 0 |
|---|
| 305 | */ |
|---|
| 306 | if (isgap(seq[index])) { |
|---|
| 307 | char gap = seq[index++]; |
|---|
| 308 | while (isgap(seq[index])) ++index; |
|---|
| 309 | return gap; |
|---|
| 310 | } |
|---|
| 311 | return 0; |
|---|
| 312 | } |
|---|
| 313 | |
|---|
| 314 | static char *fix_aligned_data(const char *old_seq, const char *new_seq, GB_alignment_type ali_type) { |
|---|
| 315 | char *fixed = ARB_strdup(new_seq); |
|---|
| 316 | |
|---|
| 317 | int o = 0; |
|---|
| 318 | int n = 0; |
|---|
| 319 | int f = 0; |
|---|
| 320 | |
|---|
| 321 | bool fixTU = ali_type == GB_AT_RNA || ali_type == GB_AT_DNA; |
|---|
| 322 | char TU = ali_type == GB_AT_RNA ? 'U' : 'T'; |
|---|
| 323 | char tu = tolower(TU); |
|---|
| 324 | |
|---|
| 325 | while (old_seq[o]) { |
|---|
| 326 | char og = eatgaps(old_seq, o); |
|---|
| 327 | char ng = eatgaps(new_seq, n); |
|---|
| 328 | |
|---|
| 329 | if (og && ng && og != ng) memset(fixed+f, og, n-f); |
|---|
| 330 | f = n; |
|---|
| 331 | |
|---|
| 332 | char oc = old_seq[o++]; |
|---|
| 333 | char nc = new_seq[n++]; |
|---|
| 334 | if (!nc) break; |
|---|
| 335 | |
|---|
| 336 | char oC = toupper(oc); |
|---|
| 337 | char nC = toupper(nc); |
|---|
| 338 | |
|---|
| 339 | if (fixTU && isTU(nC) && isTU(oC)) fixed[f] = (oc == oC) ? TU : tu; |
|---|
| 340 | else if (oc != nc && oC == nC) fixed[f] = oc; |
|---|
| 341 | |
|---|
| 342 | f++; |
|---|
| 343 | } |
|---|
| 344 | |
|---|
| 345 | return fixed; |
|---|
| 346 | } |
|---|
| 347 | |
|---|
| 348 | static void export_to_DB(NA_Alignment& dataset, size_t oldnumelements, bool aligned_data) { |
|---|
| 349 | /*! (re-)import data into arb DB |
|---|
| 350 | * @param dataset normally has been read from file (which was created by external tool) |
|---|
| 351 | * @param oldnumelements start index into dataset |
|---|
| 352 | * @param aligned_data if true => only import sequences; expect checksums did not change; repair some minor, unwanted changes (case, T<>U, gaptype) |
|---|
| 353 | */ |
|---|
| 354 | if (dataset.numelements == oldnumelements) return; |
|---|
| 355 | gde_assert(dataset.numelements > oldnumelements); // otherwise this is a noop |
|---|
| 356 | |
|---|
| 357 | GBDATA *gb_main = db_access.gb_main; |
|---|
| 358 | GB_ERROR error = GB_begin_transaction(gb_main); |
|---|
| 359 | const char *ali_name = dataset.alignment_name; |
|---|
| 360 | long maxalignlen = GBT_get_alignment_len(gb_main, ali_name); |
|---|
| 361 | |
|---|
| 362 | if (maxalignlen <= 0 && !error) { |
|---|
| 363 | error = GB_await_error(); |
|---|
| 364 | } |
|---|
| 365 | |
|---|
| 366 | long lotyp = 0; |
|---|
| 367 | if (!error) { |
|---|
| 368 | GB_alignment_type at = GBT_get_alignment_type(gb_main, ali_name); |
|---|
| 369 | |
|---|
| 370 | switch (at) { |
|---|
| 371 | case GB_AT_DNA: lotyp = DNA; break; |
|---|
| 372 | case GB_AT_RNA: lotyp = RNA; break; |
|---|
| 373 | case GB_AT_AA: lotyp = PROTEIN; break; |
|---|
| 374 | case GB_AT_UNKNOWN: lotyp = DNA; break; |
|---|
| 375 | } |
|---|
| 376 | } |
|---|
| 377 | |
|---|
| 378 | unsigned long i; |
|---|
| 379 | const long oldalignlen = maxalignlen; |
|---|
| 380 | bool auto_format = false; |
|---|
| 381 | |
|---|
| 382 | AW_repeated_question overwrite_question; |
|---|
| 383 | AW_repeated_question checksum_change_question; |
|---|
| 384 | |
|---|
| 385 | arb_progress progress("importing", dataset.numelements-oldnumelements+1); // +1 avoids zero-progress |
|---|
| 386 | for (i = oldnumelements; !error && i < dataset.numelements; i++) { |
|---|
| 387 | NA_Sequence *sequ = dataset.element+i; |
|---|
| 388 | int seqtyp, issame = 0; |
|---|
| 389 | |
|---|
| 390 | seqtyp = sequ->elementtype; |
|---|
| 391 | if ((seqtyp == lotyp) || ((seqtyp == DNA) && (lotyp == RNA)) || ((seqtyp == RNA) && (lotyp == DNA))) { |
|---|
| 392 | issame = 1; |
|---|
| 393 | } |
|---|
| 394 | else { |
|---|
| 395 | aw_message(GBS_global_string("Warning: sequence type of species '%s' changed", sequ->short_name)); |
|---|
| 396 | } |
|---|
| 397 | |
|---|
| 398 | if (sequ->tmatrix) { |
|---|
| 399 | for (long j = 0; j < sequ->seqlen; j++) { |
|---|
| 400 | sequ->sequence[j] = (char)sequ->tmatrix[sequ->sequence[j]]; |
|---|
| 401 | } |
|---|
| 402 | sequ->sequence[sequ->seqlen] = 0; |
|---|
| 403 | } |
|---|
| 404 | |
|---|
| 405 | char *savename = GBS_string_2_key(sequ->short_name); |
|---|
| 406 | |
|---|
| 407 | sequ->gb_species = NULp; |
|---|
| 408 | |
|---|
| 409 | const char *new_seq = (const char *)sequ->sequence; |
|---|
| 410 | int new_seq_len = sequ->seqlen; |
|---|
| 411 | |
|---|
| 412 | gde_assert(new_seq[new_seq_len] == 0); |
|---|
| 413 | gde_assert((int)strlen(new_seq) == new_seq_len); |
|---|
| 414 | |
|---|
| 415 | if (!issame) { // save as extended |
|---|
| 416 | GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, savename); |
|---|
| 417 | |
|---|
| 418 | if (!gb_extended) error = GB_await_error(); |
|---|
| 419 | else { |
|---|
| 420 | sequ->gb_species = gb_extended; |
|---|
| 421 | GBDATA *gb_data = GBT_add_data(gb_extended, ali_name, "data", GB_STRING); |
|---|
| 422 | |
|---|
| 423 | if (!gb_data) error = GB_await_error(); |
|---|
| 424 | else { |
|---|
| 425 | error = write_sequence_autoinc_alisize(gb_data, maxalignlen, new_seq, new_seq_len); |
|---|
| 426 | if (new_seq_len<maxalignlen) auto_format = true; |
|---|
| 427 | } |
|---|
| 428 | } |
|---|
| 429 | } |
|---|
| 430 | else { // save as sequence |
|---|
| 431 | GBDATA *gb_species_data = GBT_get_species_data(gb_main); |
|---|
| 432 | if (!gb_species_data) error = GB_await_error(); |
|---|
| 433 | else { |
|---|
| 434 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, savename); |
|---|
| 435 | bool fix_data_changes = false; |
|---|
| 436 | |
|---|
| 437 | GB_topSecurityLevel unsecured(gb_main); |
|---|
| 438 | |
|---|
| 439 | if (gb_species) { // new element that already exists !!!! |
|---|
| 440 | enum ReplaceMode { REPLACE_SPEC = 0, REIMPORT_SEQ = 1, SKIP_IMPORT = 2 } replace_mode; |
|---|
| 441 | |
|---|
| 442 | if (aligned_data) { |
|---|
| 443 | replace_mode = REIMPORT_SEQ; |
|---|
| 444 | } |
|---|
| 445 | else { |
|---|
| 446 | const char *question = |
|---|
| 447 | GBS_global_string("You are (re-)importing a species '%s'.\n" |
|---|
| 448 | "That species already exists in your database!\n" |
|---|
| 449 | "\n" |
|---|
| 450 | "Possible actions:\n" |
|---|
| 451 | "\n" |
|---|
| 452 | " - overwrite existing species (all fields)\n" |
|---|
| 453 | " - overwrite the sequence (does not change other fields)\n" |
|---|
| 454 | " - skip import of the species\n", |
|---|
| 455 | savename); |
|---|
| 456 | |
|---|
| 457 | replace_mode = (ReplaceMode)overwrite_question.get_answer("GDE_overwrite", question, "Overwrite species,Overwrite sequence only,Skip entry", "all", false); |
|---|
| 458 | } |
|---|
| 459 | |
|---|
| 460 | switch (replace_mode) { |
|---|
| 461 | case SKIP_IMPORT: |
|---|
| 462 | gb_species = NULp; |
|---|
| 463 | break; |
|---|
| 464 | case REPLACE_SPEC: |
|---|
| 465 | error = GB_delete(gb_species); |
|---|
| 466 | gb_species = NULp; |
|---|
| 467 | if (error) break; |
|---|
| 468 | // fall-through |
|---|
| 469 | case REIMPORT_SEQ: |
|---|
| 470 | gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, savename, true); |
|---|
| 471 | if (!gb_species) error = GB_await_error(); |
|---|
| 472 | break; |
|---|
| 473 | } |
|---|
| 474 | |
|---|
| 475 | fix_data_changes = replace_mode == REIMPORT_SEQ; |
|---|
| 476 | } |
|---|
| 477 | else { |
|---|
| 478 | if (aligned_data) { |
|---|
| 479 | aw_message(GBS_global_string("Warning: new species '%s' has been created (unexpected; possible naming problems)", savename)); |
|---|
| 480 | } |
|---|
| 481 | gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, savename, true); |
|---|
| 482 | if (!gb_species) error = GB_await_error(); |
|---|
| 483 | } |
|---|
| 484 | |
|---|
| 485 | if (gb_species) { |
|---|
| 486 | gde_assert(!error); |
|---|
| 487 | sequ->gb_species = gb_species; |
|---|
| 488 | |
|---|
| 489 | GBDATA *gb_data = GBT_add_data(gb_species, ali_name, "data", GB_STRING); // does only add if not already existing |
|---|
| 490 | if (!gb_data) error = GB_await_error(); |
|---|
| 491 | else { |
|---|
| 492 | GBDATA *gb_old_data = GBT_find_sequence(gb_species, ali_name); |
|---|
| 493 | bool writeSequence = true; |
|---|
| 494 | if (gb_old_data) { // we already have data -> compare checksums |
|---|
| 495 | const char *old_seq = GB_read_char_pntr(gb_old_data); |
|---|
| 496 | |
|---|
| 497 | long old_checksum = 0; |
|---|
| 498 | long new_checksum = 0; |
|---|
| 499 | bool calcStdChecksum = true; |
|---|
| 500 | if (fix_data_changes) { |
|---|
| 501 | char *new_seq_fixed = fix_aligned_data(old_seq, new_seq, dataset.alignment_type); // apply some fixes to (realigned) data |
|---|
| 502 | |
|---|
| 503 | switch (dataset.alignment_type) { |
|---|
| 504 | case GB_AT_DNA: |
|---|
| 505 | case GB_AT_RNA: { |
|---|
| 506 | char *old_TU = GBS_string_eval(old_seq, ":T=U:t=u"); |
|---|
| 507 | char *new_TU = GBS_string_eval(new_seq_fixed, ":T=U:t=u"); |
|---|
| 508 | |
|---|
| 509 | old_checksum = GBS_checksum(old_TU, 1, "-."); |
|---|
| 510 | new_checksum = GBS_checksum(new_TU, 1, "-."); |
|---|
| 511 | |
|---|
| 512 | free(new_TU); |
|---|
| 513 | free(old_TU); |
|---|
| 514 | break; |
|---|
| 515 | } |
|---|
| 516 | case GB_AT_AA: |
|---|
| 517 | case GB_AT_UNKNOWN: |
|---|
| 518 | old_checksum = GBS_checksum(old_seq, 1, "-."); |
|---|
| 519 | new_checksum = GBS_checksum(new_seq_fixed, 1, "-."); |
|---|
| 520 | break; |
|---|
| 521 | } |
|---|
| 522 | |
|---|
| 523 | if (new_checksum == old_checksum) { // fix succeeded |
|---|
| 524 | free(sequ->sequence); |
|---|
| 525 | sequ->sequence = (NA_Base*)new_seq_fixed; |
|---|
| 526 | new_seq = new_seq_fixed; |
|---|
| 527 | calcStdChecksum = false; |
|---|
| 528 | } |
|---|
| 529 | else { |
|---|
| 530 | fprintf(stderr, "Checksum changed for '%s':\nold='%s'\nfix='%s' (failed)\nnew='%s'\n", savename, old_seq, new_seq_fixed, new_seq); |
|---|
| 531 | free(new_seq_fixed); |
|---|
| 532 | } |
|---|
| 533 | } |
|---|
| 534 | if (calcStdChecksum) { |
|---|
| 535 | old_checksum = GBS_checksum(old_seq, 1, "-."); |
|---|
| 536 | new_checksum = GBS_checksum(new_seq, 1, "-."); |
|---|
| 537 | } |
|---|
| 538 | |
|---|
| 539 | if (old_checksum != new_checksum) { |
|---|
| 540 | if (!fix_data_changes) { // already dumped above |
|---|
| 541 | fprintf(stderr, "Checksum changed for '%s':\nold='%s'\nnew='%s'\n", savename, old_seq, new_seq); |
|---|
| 542 | } |
|---|
| 543 | |
|---|
| 544 | char *question = GBS_global_string_copy("Warning: Sequence checksum of '%s' has changed!\n" |
|---|
| 545 | "This should NOT happen if you aligned sequences!\n" |
|---|
| 546 | "(see console for changes to sequence)", savename); |
|---|
| 547 | |
|---|
| 548 | const char *questionID = aligned_data ? "GDE_accept_aligner_seqchange" : "GDE_accept_seqchange"; |
|---|
| 549 | |
|---|
| 550 | enum ChangeMode { |
|---|
| 551 | ACCEPT_CHANGE = 0, |
|---|
| 552 | REJECT_CHANGE = 1, |
|---|
| 553 | } change_mode = (ChangeMode)checksum_change_question.get_answer(questionID, question, "Accept change,Reject", "all", false); |
|---|
| 554 | |
|---|
| 555 | if (change_mode == REJECT_CHANGE) writeSequence = false; |
|---|
| 556 | |
|---|
| 557 | aw_message(GBS_global_string("Warning: Sequence checksum for '%s' has changed (%s)", |
|---|
| 558 | savename, writeSequence ? "accepted" : "rejected")); |
|---|
| 559 | free(question); |
|---|
| 560 | } |
|---|
| 561 | } |
|---|
| 562 | if (writeSequence) { |
|---|
| 563 | error = write_sequence_autoinc_alisize(gb_data, maxalignlen, new_seq, new_seq_len); |
|---|
| 564 | if (new_seq_len<maxalignlen) auto_format = true; |
|---|
| 565 | } |
|---|
| 566 | } |
|---|
| 567 | } |
|---|
| 568 | } |
|---|
| 569 | } |
|---|
| 570 | free(savename); |
|---|
| 571 | progress.inc_and_check_user_abort(error); |
|---|
| 572 | } |
|---|
| 573 | |
|---|
| 574 | if (!auto_format) auto_format = oldalignlen != maxalignlen; |
|---|
| 575 | |
|---|
| 576 | if (auto_format) { |
|---|
| 577 | if (db_access.format_ali) { |
|---|
| 578 | GB_topSecurityLevel unsecured(gb_main); |
|---|
| 579 | error = db_access.format_ali(gb_main, ali_name); |
|---|
| 580 | } |
|---|
| 581 | } |
|---|
| 582 | |
|---|
| 583 | progress.done(); |
|---|
| 584 | |
|---|
| 585 | GB_end_transaction_show_error(db_access.gb_main, error, aw_message); |
|---|
| 586 | } |
|---|
| 587 | |
|---|
| 588 | static char *preCreateTempfile(const char *name) { |
|---|
| 589 | // creates a tempfile and returns heapcopy of fullpath |
|---|
| 590 | // exits in case of error |
|---|
| 591 | char *fullname = GB_create_tempfile(name); |
|---|
| 592 | |
|---|
| 593 | if (!fullname) aw_message(GBS_global_string("[ARB_GDE]: %s", GB_await_error())); |
|---|
| 594 | return fullname; |
|---|
| 595 | } |
|---|
| 596 | |
|---|
| 597 | static const char *jobLabel(const char *itemLabel) { |
|---|
| 598 | char *jlab = GB_command_interpreter(itemLabel, "/[^a-zA-Z0-9]//", db_access.gb_main); |
|---|
| 599 | if (!jlab) { |
|---|
| 600 | fprintf(stderr, "error generating jobLabel: %s\n", GB_await_error()); |
|---|
| 601 | jlab = strdup("someJob"); |
|---|
| 602 | } |
|---|
| 603 | RETURN_LOCAL_ALLOC(jlab); |
|---|
| 604 | } |
|---|
| 605 | |
|---|
| 606 | void GDE_startaction_cb(AW_window *aw, GmenuItem *gmenuitem) { |
|---|
| 607 | gde_assert(!GB_have_error()); |
|---|
| 608 | |
|---|
| 609 | AW_root *aw_root = aw->get_root(); |
|---|
| 610 | GmenuItem *current_item = gmenuitem; |
|---|
| 611 | |
|---|
| 612 | GapCompression compress = static_cast<GapCompression>(aw_root->awar(AWAR_GDE_COMPRESSION)->read_int()); |
|---|
| 613 | arb_progress progress(current_item->label); |
|---|
| 614 | NA_Alignment DataSet(db_access.gb_main); |
|---|
| 615 | int stop = 0; |
|---|
| 616 | |
|---|
| 617 | if (current_item->numinputs>0) { |
|---|
| 618 | TypeInfo typeinfo = UNKNOWN_TYPEINFO; |
|---|
| 619 | { |
|---|
| 620 | for (int j=0; j<current_item->numinputs; j++) { |
|---|
| 621 | if (j == 0) { typeinfo = current_item->input[j].typeinfo; } |
|---|
| 622 | else if (current_item->input[j].typeinfo != typeinfo) { |
|---|
| 623 | aw_message("'intyped' must be same for all inputs (config error in GDE menu file)"); |
|---|
| 624 | stop = 1; |
|---|
| 625 | } |
|---|
| 626 | } |
|---|
| 627 | } |
|---|
| 628 | gde_assert(typeinfo != UNKNOWN_TYPEINFO); |
|---|
| 629 | |
|---|
| 630 | if (!stop) { |
|---|
| 631 | AP_filter *filter2 = awt_get_filter(agde_filter); |
|---|
| 632 | gde_assert(gmenuitem->seqtype != '-'); // inputs w/o seqtype? impossible! |
|---|
| 633 | { |
|---|
| 634 | GB_ERROR error = awt_invalid_filter(filter2); |
|---|
| 635 | if (error) { |
|---|
| 636 | aw_message(error); |
|---|
| 637 | stop = 1; |
|---|
| 638 | } |
|---|
| 639 | } |
|---|
| 640 | |
|---|
| 641 | if (!stop) { |
|---|
| 642 | GB_transaction ta(DataSet.gb_main); |
|---|
| 643 | progress.subtitle("reading database"); |
|---|
| 644 | |
|---|
| 645 | long cutoff_stop_codon = aw_root->awar(AWAR_GDE_CUTOFF_STOPCODON)->read_int(); |
|---|
| 646 | bool marked = (aw_root->awar(AWAR_GDE_SPECIES)->read_int() != 0); |
|---|
| 647 | |
|---|
| 648 | if (db_access.get_sequences) { |
|---|
| 649 | stop = ReadArbdb2(DataSet, filter2, compress, cutoff_stop_codon, typeinfo); |
|---|
| 650 | } |
|---|
| 651 | else { |
|---|
| 652 | stop = ReadArbdb(DataSet, marked, filter2, compress, cutoff_stop_codon, typeinfo); |
|---|
| 653 | } |
|---|
| 654 | } |
|---|
| 655 | delete filter2; |
|---|
| 656 | } |
|---|
| 657 | |
|---|
| 658 | if (!stop && DataSet.numelements==0) { |
|---|
| 659 | aw_message("no sequences selected"); |
|---|
| 660 | stop = 1; |
|---|
| 661 | } |
|---|
| 662 | } |
|---|
| 663 | |
|---|
| 664 | if (!stop) { |
|---|
| 665 | int select_mode = (current_item->numinputs>0) ? ALL : NONE; |
|---|
| 666 | int pid = getpid(); |
|---|
| 667 | |
|---|
| 668 | static int fileindx = 0; |
|---|
| 669 | for (int j=0; j<current_item->numinputs; j++) { |
|---|
| 670 | GfileFormat& gfile = current_item->input[j]; |
|---|
| 671 | |
|---|
| 672 | char buffer[GBUFSIZ]; |
|---|
| 673 | sprintf(buffer, "gde%d_%d", pid, fileindx++); |
|---|
| 674 | gfile.name = preCreateTempfile(buffer); |
|---|
| 675 | |
|---|
| 676 | switch (gfile.format) { |
|---|
| 677 | case GENBANK: WriteGen (DataSet, gfile.name, select_mode); break; |
|---|
| 678 | case NA_FLAT: WriteNA_Flat(DataSet, gfile.name, select_mode); break; |
|---|
| 679 | case GDE: WriteGDE (DataSet, gfile.name, select_mode); break; |
|---|
| 680 | default: break; |
|---|
| 681 | } |
|---|
| 682 | } |
|---|
| 683 | |
|---|
| 684 | for (int j=0; j<current_item->numoutputs; j++) { |
|---|
| 685 | char buffer[GBUFSIZ]; |
|---|
| 686 | sprintf(buffer, "gde%d_%d", pid, fileindx++); |
|---|
| 687 | current_item->output[j].name = preCreateTempfile(buffer); |
|---|
| 688 | } |
|---|
| 689 | |
|---|
| 690 | { |
|---|
| 691 | // Create the command line for external the function call |
|---|
| 692 | char *Action = ARB_strdup(current_item->method); |
|---|
| 693 | |
|---|
| 694 | while (1) { |
|---|
| 695 | char *oldAction = ARB_strdup(Action); |
|---|
| 696 | |
|---|
| 697 | for (int j=0; j<current_item->numargs; j++) Action = ReplaceArgs(aw_root, Action, gmenuitem, j); |
|---|
| 698 | bool changed = strcmp(oldAction, Action) != 0; |
|---|
| 699 | free(oldAction); |
|---|
| 700 | |
|---|
| 701 | if (!changed) break; |
|---|
| 702 | } |
|---|
| 703 | |
|---|
| 704 | for (int j=0; j<current_item->numinputs; j++) ReplaceFile(Action, current_item->input[j]); |
|---|
| 705 | for (int j=0; j<current_item->numoutputs; j++) ReplaceFile(Action, current_item->output[j]); |
|---|
| 706 | |
|---|
| 707 | if (Find(Action, "$FILTER") == true) { |
|---|
| 708 | char *filter_name = AWT_get_combined_filter_name(aw_root, AWAR_PREFIX_GDE_TEMP); |
|---|
| 709 | ReplaceString(Action, "$FILTER", filter_name); |
|---|
| 710 | free(filter_name); |
|---|
| 711 | } |
|---|
| 712 | |
|---|
| 713 | static int jobCounter = 1; |
|---|
| 714 | char *jobID = GBS_global_string_copy("agde_%s_%i", jobLabel(current_item->label), jobCounter++); |
|---|
| 715 | |
|---|
| 716 | if (Find(Action, "$AGDE_JOBID") == true) { |
|---|
| 717 | ReplaceString(Action, "$AGDE_JOBID", jobID); |
|---|
| 718 | } |
|---|
| 719 | |
|---|
| 720 | bool configuredAsync = FindAndRemoveAsyncAndSign(Action); |
|---|
| 721 | |
|---|
| 722 | // call and go... |
|---|
| 723 | progress.subtitle("calling external program"); |
|---|
| 724 | fprintf(stderr, "---------------------------------------- [executing %s]\n", jobID); |
|---|
| 725 | { |
|---|
| 726 | // if playing back macro |
|---|
| 727 | // => set timeout for arb_wait (called from RUN_IN_WINDOW) |
|---|
| 728 | // => macro will continue automatically after command finishes. |
|---|
| 729 | bool playback = is_executing_macro(aw_root); |
|---|
| 730 | if (playback) GB_setenv("ARB_WAIT_TIMEOUT", "7"); |
|---|
| 731 | |
|---|
| 732 | // Note: ARB_system here neither opens terminal window nor waits for keypress. |
|---|
| 733 | // When required, both happens via code defined in ../GDEHELP/ARB_GDEmenus.source@RUN_IN_WINDOW |
|---|
| 734 | XCMD_TYPE xcmdtype = configuredAsync ? XCMD_SERVSYNC_HIDDEN : XCMD_SYNC_HIDDEN; |
|---|
| 735 | aw_message_if(ARB_system(Action, XCmdType(xcmdtype, db_access.gb_main))); |
|---|
| 736 | |
|---|
| 737 | if (playback) GB_setenv("ARB_WAIT_TIMEOUT", ""); |
|---|
| 738 | } |
|---|
| 739 | fprintf(stderr, "---------------------------------------- [done with %s]\n", jobID); |
|---|
| 740 | |
|---|
| 741 | free(jobID); |
|---|
| 742 | free(Action); |
|---|
| 743 | } |
|---|
| 744 | |
|---|
| 745 | size_t oldnumelements = DataSet.numelements; |
|---|
| 746 | |
|---|
| 747 | for (int j=0; j<current_item->numoutputs; j++) { |
|---|
| 748 | switch (current_item->output[j].format) { |
|---|
| 749 | case GENBANK: |
|---|
| 750 | case NA_FLAT: |
|---|
| 751 | case GDE: |
|---|
| 752 | LoadData(current_item->output[j].name, DataSet); |
|---|
| 753 | break; |
|---|
| 754 | default: |
|---|
| 755 | gde_assert(0); |
|---|
| 756 | break; |
|---|
| 757 | } |
|---|
| 758 | } |
|---|
| 759 | for (int j=0; j<current_item->numoutputs; j++) { |
|---|
| 760 | if (!current_item->output[j].save) { |
|---|
| 761 | unlink(current_item->output[j].name); |
|---|
| 762 | } |
|---|
| 763 | } |
|---|
| 764 | |
|---|
| 765 | for (int j=0; j<current_item->numinputs; j++) { |
|---|
| 766 | if (!current_item->input[j].save) { |
|---|
| 767 | unlink(current_item->input[j].name); |
|---|
| 768 | } |
|---|
| 769 | } |
|---|
| 770 | |
|---|
| 771 | export_to_DB(DataSet, oldnumelements, current_item->aligned); |
|---|
| 772 | } |
|---|
| 773 | |
|---|
| 774 | gde_assert(!GB_have_error()); |
|---|
| 775 | } |
|---|
| 776 | |
|---|
| 777 | // -------------------------------------------------------------------------------- |
|---|
| 778 | |
|---|
| 779 | #ifdef UNIT_TESTS |
|---|
| 780 | #ifndef TEST_UNIT_H |
|---|
| 781 | #include <test_unit.h> |
|---|
| 782 | #endif |
|---|
| 783 | |
|---|
| 784 | static arb_test::match_expectation fixed_as(GB_alignment_type ali_type, const char *old, const char *expected_fix, const char *aligned) { |
|---|
| 785 | using namespace arb_test; |
|---|
| 786 | char *fixed = fix_aligned_data(old, aligned, ali_type); |
|---|
| 787 | match_expectation e = that(fixed).is_equal_to(expected_fix); |
|---|
| 788 | free(fixed); |
|---|
| 789 | return e; |
|---|
| 790 | } |
|---|
| 791 | |
|---|
| 792 | #define TEST_FIX_ALIGNED(t,o,f,a) TEST_EXPECTATION(fixed_as(t,o,f,a)) |
|---|
| 793 | #define TEST_FIX_ALIGNED__BROKEN(t,o,fw,fg,a) TEST_EXPECTATION__BROKEN(fixed_as(t,o,fw,a), fixed_as(t,o,fg,a)) |
|---|
| 794 | |
|---|
| 795 | void TEST_fix_aligned_data() { |
|---|
| 796 | TEST_FIX_ALIGNED(GB_AT_RNA, |
|---|
| 797 | "...A---CG..G--U.....", // old |
|---|
| 798 | "..AC--G..GU...", // fixed: gaps corrected; T->U |
|---|
| 799 | "--AC--G--GT---"); // aligned |
|---|
| 800 | |
|---|
| 801 | TEST_FIX_ALIGNED(GB_AT_RNA, |
|---|
| 802 | "A---CG..G--U", // old (no gaps at border) |
|---|
| 803 | "--AC--G..GU---", // fixed: gaps corrected; T->U |
|---|
| 804 | "--AC--G--GT---"); // aligned |
|---|
| 805 | |
|---|
| 806 | TEST_FIX_ALIGNED(GB_AT_RNA, |
|---|
| 807 | "...A---CG..G--U.....", // old |
|---|
| 808 | "AC--G..GU", // fixed: gaps corrected; T->U |
|---|
| 809 | "AC--G--GT"); // aligned (no gaps at border) |
|---|
| 810 | |
|---|
| 811 | TEST_FIX_ALIGNED(GB_AT_RNA, |
|---|
| 812 | "A---CG..G--U", // old |
|---|
| 813 | "AC-----GT", // not fixed |
|---|
| 814 | "AC-----GT"); // aligned (bases changed!) |
|---|
| 815 | |
|---|
| 816 | TEST_FIX_ALIGNED(GB_AT_DNA, |
|---|
| 817 | "A---cTUu..G--t", // old |
|---|
| 818 | "AcT--Tt..Gt", // fixed: case restored; U's convert to T's |
|---|
| 819 | "ACT--UT--GU"); // aligned |
|---|
| 820 | |
|---|
| 821 | TEST_FIX_ALIGNED(GB_AT_RNA, |
|---|
| 822 | "A---cTUu..G--t", // old |
|---|
| 823 | "AcU--Uu..Gu", // fixed: case restored; T's convert to U's |
|---|
| 824 | "ACT--UT--GU"); // aligned |
|---|
| 825 | } |
|---|
| 826 | |
|---|
| 827 | #endif // UNIT_TESTS |
|---|
| 828 | |
|---|
| 829 | // -------------------------------------------------------------------------------- |
|---|