| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : NT_dbrepair.cxx // |
|---|
| 4 | // Purpose : repair database bugs // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in May 2008 // |
|---|
| 7 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 8 | // http://www.arb-home.de/ // |
|---|
| 9 | // // |
|---|
| 10 | // =============================================================== // |
|---|
| 11 | |
|---|
| 12 | #include "NT_local.h" |
|---|
| 13 | |
|---|
| 14 | #include <arbdbt.h> |
|---|
| 15 | #include <adGene.h> |
|---|
| 16 | |
|---|
| 17 | #include <items.h> |
|---|
| 18 | #include <GEN.hxx> |
|---|
| 19 | #include <EXP.hxx> |
|---|
| 20 | #include <aw_color_groups.hxx> |
|---|
| 21 | #include <aw_msg.hxx> |
|---|
| 22 | #include <arb_progress.h> |
|---|
| 23 | #include <aw_question.hxx> |
|---|
| 24 | |
|---|
| 25 | #include <arb_str.h> |
|---|
| 26 | #include <arb_strarray.h> |
|---|
| 27 | |
|---|
| 28 | #include <map> |
|---|
| 29 | #include <set> |
|---|
| 30 | #include <string> |
|---|
| 31 | #include <vector> |
|---|
| 32 | |
|---|
| 33 | using namespace std; |
|---|
| 34 | |
|---|
| 35 | #if defined(WARN_TODO) |
|---|
| 36 | #warning the whole fix mechanism should be part of some lower-level-library |
|---|
| 37 | // meanwhile DB checks are only performed by ARB_NTREE |
|---|
| 38 | // ItemSelector should go to same library as this module |
|---|
| 39 | #endif |
|---|
| 40 | |
|---|
| 41 | // -------------------------------------------------------------------------------- |
|---|
| 42 | // CheckedConsistencies provides an easy way to automatically correct flues in the database |
|---|
| 43 | // by calling a check routine exactly once. |
|---|
| 44 | // |
|---|
| 45 | // For an example see nt_check_database_consistency() |
|---|
| 46 | // |
|---|
| 47 | // Note: this makes problems if DB is loaded with older ARB version and some already |
|---|
| 48 | // fixed flues a put into DB again. |
|---|
| 49 | // see http://bugs.arb-home.de/ticket/143 |
|---|
| 50 | |
|---|
| 51 | typedef GB_ERROR (*item_check_fun)(GBDATA *gb_item, ItemSelector& sel); |
|---|
| 52 | |
|---|
| 53 | typedef map<string, item_check_fun> item_check_map; |
|---|
| 54 | typedef item_check_map::const_iterator item_check_iter; |
|---|
| 55 | |
|---|
| 56 | class CheckedConsistencies : virtual Noncopyable { |
|---|
| 57 | GBDATA *gb_main; |
|---|
| 58 | size_t species_count; |
|---|
| 59 | size_t sai_count; |
|---|
| 60 | set<string> consistencies; |
|---|
| 61 | item_check_map item_checks; |
|---|
| 62 | |
|---|
| 63 | GB_ERROR perform_selected_item_checks(ItemSelector& sel); |
|---|
| 64 | |
|---|
| 65 | public: |
|---|
| 66 | |
|---|
| 67 | CheckedConsistencies(GBDATA *gb_main_) : gb_main(gb_main_) { |
|---|
| 68 | GB_transaction ta(gb_main); |
|---|
| 69 | GBDATA *gb_checks = GB_search(gb_main, "checks", GB_CREATE_CONTAINER); |
|---|
| 70 | |
|---|
| 71 | for (GBDATA *gb_check = GB_entry(gb_checks, "check"); gb_check; gb_check = GB_nextEntry(gb_check)) { |
|---|
| 72 | consistencies.insert(GB_read_char_pntr(gb_check)); |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | species_count = GBT_get_species_count(gb_main); |
|---|
| 76 | sai_count = GBT_get_SAI_count(gb_main); |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | bool was_performed(const string& check_name) const { |
|---|
| 80 | return consistencies.find(check_name) != consistencies.end(); |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | GB_ERROR register_as_performed(const string& check_name) { |
|---|
| 84 | GB_ERROR error = 0; |
|---|
| 85 | if (was_performed(check_name)) { |
|---|
| 86 | printf("check '%s' already has been registered before. Duplicated check name?\n", check_name.c_str()); |
|---|
| 87 | } |
|---|
| 88 | else { |
|---|
| 89 | GB_transaction ta(gb_main); |
|---|
| 90 | |
|---|
| 91 | GBDATA *gb_checks = GB_search(gb_main, "checks", GB_CREATE_CONTAINER); |
|---|
| 92 | GBDATA *gb_check = GB_create(gb_checks, "check", GB_STRING); |
|---|
| 93 | |
|---|
| 94 | if (!gb_check) error = GB_await_error(); |
|---|
| 95 | else error = GB_write_string(gb_check, check_name.c_str()); |
|---|
| 96 | |
|---|
| 97 | if (!error) consistencies.insert(check_name); |
|---|
| 98 | } |
|---|
| 99 | return error; |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | void perform_check(const string& check_name, |
|---|
| 103 | GB_ERROR (*do_check)(GBDATA *gb_main, size_t species, size_t sais), |
|---|
| 104 | GB_ERROR& error) |
|---|
| 105 | { |
|---|
| 106 | if (!error && !was_performed(check_name)) { |
|---|
| 107 | arb_progress progress(check_name.c_str()); |
|---|
| 108 | error = do_check(gb_main, species_count, sai_count); |
|---|
| 109 | if (!error) register_as_performed(check_name); |
|---|
| 110 | } |
|---|
| 111 | } |
|---|
| 112 | |
|---|
| 113 | void register_item_check(const string& check_name, item_check_fun item_check) { |
|---|
| 114 | if (!was_performed(check_name)) { |
|---|
| 115 | item_checks[check_name] = item_check; |
|---|
| 116 | } |
|---|
| 117 | } |
|---|
| 118 | |
|---|
| 119 | void perform_item_checks(GB_ERROR& error); |
|---|
| 120 | |
|---|
| 121 | GB_ERROR forgetDoneChecks() { |
|---|
| 122 | GB_ERROR error = 0; |
|---|
| 123 | GB_transaction ta(gb_main); |
|---|
| 124 | |
|---|
| 125 | GBDATA *gb_checks = GB_search(gb_main, "checks", GB_CREATE_CONTAINER); |
|---|
| 126 | for (GBDATA *gb_check = GB_entry(gb_checks, "check"); gb_check && !error; gb_check = GB_nextEntry(gb_check)) { |
|---|
| 127 | char *check_name = GB_read_string(gb_check); |
|---|
| 128 | |
|---|
| 129 | #if defined(DEBUG) |
|---|
| 130 | printf("Deleting check '%s'\n", check_name); |
|---|
| 131 | #endif // DEBUG |
|---|
| 132 | error = GB_delete(gb_check); |
|---|
| 133 | consistencies.erase(check_name); |
|---|
| 134 | free(check_name); |
|---|
| 135 | } |
|---|
| 136 | return error; |
|---|
| 137 | } |
|---|
| 138 | }; |
|---|
| 139 | |
|---|
| 140 | GB_ERROR CheckedConsistencies::perform_selected_item_checks(ItemSelector& sel) { |
|---|
| 141 | GB_ERROR error = NULL; |
|---|
| 142 | item_check_iter end = item_checks.end(); |
|---|
| 143 | |
|---|
| 144 | for (GBDATA *gb_cont = sel.get_first_item_container(gb_main, NULL, QUERY_ALL_ITEMS); |
|---|
| 145 | gb_cont && !error; |
|---|
| 146 | gb_cont = sel.get_next_item_container(gb_cont, QUERY_ALL_ITEMS)) |
|---|
| 147 | { |
|---|
| 148 | for (GBDATA *gb_item = sel.get_first_item(gb_cont, QUERY_ALL_ITEMS); |
|---|
| 149 | gb_item && !error; |
|---|
| 150 | gb_item = sel.get_next_item(gb_item, QUERY_ALL_ITEMS)) |
|---|
| 151 | { |
|---|
| 152 | for (item_check_iter chk = item_checks.begin(); chk != end && !error; ++chk) { |
|---|
| 153 | error = chk->second(gb_item, sel); |
|---|
| 154 | } |
|---|
| 155 | } |
|---|
| 156 | } |
|---|
| 157 | |
|---|
| 158 | return error; |
|---|
| 159 | } |
|---|
| 160 | |
|---|
| 161 | void CheckedConsistencies::perform_item_checks(GB_ERROR& error) { |
|---|
| 162 | if (!item_checks.empty()) { |
|---|
| 163 | if (!error) { |
|---|
| 164 | GB_transaction ta(gb_main); |
|---|
| 165 | bool is_genome_db = GEN_is_genome_db(gb_main, -1); |
|---|
| 166 | |
|---|
| 167 | error = perform_selected_item_checks(SPECIES_get_selector()); |
|---|
| 168 | if (!error && is_genome_db) { |
|---|
| 169 | error = perform_selected_item_checks(GEN_get_selector()); |
|---|
| 170 | if (!error) error = perform_selected_item_checks(EXP_get_selector()); |
|---|
| 171 | } |
|---|
| 172 | |
|---|
| 173 | error = ta.close(error); |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | if (!error) { |
|---|
| 177 | item_check_iter end = item_checks.end(); |
|---|
| 178 | for (item_check_iter chk = item_checks.begin(); chk != end && !error; ++chk) { |
|---|
| 179 | error = register_as_performed(chk->first); |
|---|
| 180 | } |
|---|
| 181 | |
|---|
| 182 | if (!error) item_checks.clear(); |
|---|
| 183 | } |
|---|
| 184 | } |
|---|
| 185 | } |
|---|
| 186 | |
|---|
| 187 | // -------------------------------------------------------------------------------- |
|---|
| 188 | |
|---|
| 189 | static GB_ERROR NT_fix_gene_data(GBDATA *gb_main, size_t species_count, size_t /* sai_count */) { |
|---|
| 190 | GB_transaction ta(gb_main); |
|---|
| 191 | arb_progress progress(species_count); |
|---|
| 192 | |
|---|
| 193 | size_t deleted_gene_datas = 0; |
|---|
| 194 | size_t generated_gene_datas = 0; |
|---|
| 195 | GB_ERROR error = 0; |
|---|
| 196 | |
|---|
| 197 | for (GBDATA *gb_species = GBT_first_species(gb_main); |
|---|
| 198 | gb_species && !error; |
|---|
| 199 | gb_species = GBT_next_species(gb_species)) |
|---|
| 200 | { |
|---|
| 201 | bool is_organism = (GB_entry(gb_species, GENOM_ALIGNMENT) != 0); // same test as GEN_is_organism, but w/o genome-db-assertion |
|---|
| 202 | GBDATA *gb_gene_data = GEN_find_gene_data(gb_species); |
|---|
| 203 | |
|---|
| 204 | if (is_organism && !gb_gene_data) { |
|---|
| 205 | gb_gene_data = GEN_findOrCreate_gene_data(gb_species); // @@@ check result & handle error |
|---|
| 206 | generated_gene_datas++; |
|---|
| 207 | } |
|---|
| 208 | else if (!is_organism && gb_gene_data) { |
|---|
| 209 | GBDATA *gb_child = GB_child(gb_gene_data); |
|---|
| 210 | if (!gb_child) { |
|---|
| 211 | error = GB_delete(gb_gene_data); |
|---|
| 212 | if (!error) deleted_gene_datas++; |
|---|
| 213 | } |
|---|
| 214 | else { |
|---|
| 215 | error = GBS_global_string("Non-empty 'gene_data' found for species '%s',\n" |
|---|
| 216 | "which has no alignment '" GENOM_ALIGNMENT "',\n" |
|---|
| 217 | "i.e. which is not regarded as full-genome organism.\n" |
|---|
| 218 | "This causes problems - please fix!", |
|---|
| 219 | GBT_read_name(gb_species)); |
|---|
| 220 | } |
|---|
| 221 | } |
|---|
| 222 | |
|---|
| 223 | progress.inc_and_check_user_abort(error); |
|---|
| 224 | } |
|---|
| 225 | |
|---|
| 226 | if (!error) { |
|---|
| 227 | if (deleted_gene_datas) { |
|---|
| 228 | aw_message(GBS_global_string("Deleted %zu useless empty 'gene_data' entries.", deleted_gene_datas)); |
|---|
| 229 | } |
|---|
| 230 | if (generated_gene_datas) { |
|---|
| 231 | aw_message(GBS_global_string("Re-created %zu missing 'gene_data' entries.\nThese organisms have no genes yet!", generated_gene_datas)); |
|---|
| 232 | } |
|---|
| 233 | } |
|---|
| 234 | return ta.close(error); |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | // -------------------------------------------------------------------------------- |
|---|
| 238 | |
|---|
| 239 | static GBDATA *expectField(GBDATA *gb_gene, const char *field, GB_ERROR& data_error) { |
|---|
| 240 | GBDATA *gb_field = 0; |
|---|
| 241 | if (!data_error) { |
|---|
| 242 | gb_field = GB_entry(gb_gene, field); |
|---|
| 243 | if (!gb_field) data_error = GBS_global_string("Expected field '%s' missing", field); |
|---|
| 244 | } |
|---|
| 245 | return gb_field; |
|---|
| 246 | } |
|---|
| 247 | |
|---|
| 248 | static GBDATA *disexpectField(GBDATA *gb_gene, const char *field, GB_ERROR& data_error) { |
|---|
| 249 | GBDATA *gb_field = 0; |
|---|
| 250 | if (!data_error) { |
|---|
| 251 | gb_field = GB_entry(gb_gene, field); |
|---|
| 252 | if (gb_field) data_error = GBS_global_string("Unexpected field '%s' exists (wrong value in pos_joined?)", field); |
|---|
| 253 | } |
|---|
| 254 | GBS_reuse_buffer(field); |
|---|
| 255 | return gb_field; |
|---|
| 256 | } |
|---|
| 257 | |
|---|
| 258 | static GB_ERROR NT_convert_gene_locations(GBDATA *gb_main, size_t species_count, size_t /* sai_count */) { |
|---|
| 259 | GB_transaction ta(gb_main); |
|---|
| 260 | GB_ERROR error = 0; |
|---|
| 261 | long fixed_genes = 0; |
|---|
| 262 | long skipped_genes = 0; |
|---|
| 263 | long genes = 0; |
|---|
| 264 | |
|---|
| 265 | typedef vector<GBDATA*> GBvec; |
|---|
| 266 | GBvec toDelete; |
|---|
| 267 | |
|---|
| 268 | arb_progress progress(species_count); |
|---|
| 269 | |
|---|
| 270 | for (GBDATA *gb_organism = GEN_first_organism(gb_main); |
|---|
| 271 | gb_organism && !error; |
|---|
| 272 | gb_organism = GEN_next_organism(gb_organism)) |
|---|
| 273 | { |
|---|
| 274 | GBDATA *gb_gene_data = GEN_find_gene_data(gb_organism); |
|---|
| 275 | nt_assert(gb_gene_data); |
|---|
| 276 | if (gb_gene_data) { |
|---|
| 277 | for (GBDATA *gb_gene = GEN_first_gene_rel_gene_data(gb_gene_data); |
|---|
| 278 | gb_gene && !error; |
|---|
| 279 | gb_gene = GEN_next_gene(gb_gene)) |
|---|
| 280 | { |
|---|
| 281 | genes++; |
|---|
| 282 | |
|---|
| 283 | int parts = 1; |
|---|
| 284 | { |
|---|
| 285 | GBDATA *gb_pos_joined = GB_entry(gb_gene, "pos_joined"); |
|---|
| 286 | if (gb_pos_joined) parts = GB_read_int(gb_pos_joined); // its a joined gene |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | GBDATA *gb_pos_start = GB_entry(gb_gene, "pos_start"); // test for new format |
|---|
| 290 | if (!gb_pos_start) { |
|---|
| 291 | GBDATA *gb_pos_begin = GB_entry(gb_gene, "pos_begin"); // test for old format |
|---|
| 292 | if (!gb_pos_begin) { |
|---|
| 293 | error = "Neither 'pos_begin' nor 'pos_start' found - format of gene location is unknown"; |
|---|
| 294 | } |
|---|
| 295 | } |
|---|
| 296 | |
|---|
| 297 | if (!gb_pos_start && !error) { // assume old format |
|---|
| 298 | // parts<-1 would be valid in new format, but here we have old format |
|---|
| 299 | if (parts<1) error = GBS_global_string("Illegal value in 'pos_joined' (%i)", parts); |
|---|
| 300 | |
|---|
| 301 | GB_ERROR data_error = 0; // error in this gene -> don't convert |
|---|
| 302 | GEN_position *pos = GEN_new_position(parts, false); // all were joinable (no information about it was stored) |
|---|
| 303 | |
|---|
| 304 | // parse old gene information into 'pos' |
|---|
| 305 | // |
|---|
| 306 | // old-format was: |
|---|
| 307 | // Start-Positions: pos_begin, pos_begin2, pos_begin3, ... |
|---|
| 308 | // End-Positions: pos_end, pos_end2, pos_end3, ... |
|---|
| 309 | // Joined?: pos_joined (always >= 1) |
|---|
| 310 | // Complement: complement (one entry for all parts) |
|---|
| 311 | // Certainty: pos_uncertain (maybe pos_uncertain1 etc.) |
|---|
| 312 | |
|---|
| 313 | int complement = 0; |
|---|
| 314 | { |
|---|
| 315 | GBDATA *gb_complement = GB_entry(gb_gene, "complement"); |
|---|
| 316 | if (gb_complement) { |
|---|
| 317 | complement = GB_read_byte(gb_complement); |
|---|
| 318 | toDelete.push_back(gb_complement); |
|---|
| 319 | } |
|---|
| 320 | } |
|---|
| 321 | |
|---|
| 322 | bool has_uncertain_fields = false; |
|---|
| 323 | for (int p = 1; p <= parts && !error && !data_error; ++p) { |
|---|
| 324 | GBDATA *gb_pos_begin = 0; |
|---|
| 325 | GBDATA *gb_pos_end = 0; |
|---|
| 326 | const char *pos_uncertain_field = 0; |
|---|
| 327 | |
|---|
| 328 | if (p == 1) { |
|---|
| 329 | gb_pos_begin = expectField(gb_gene, "pos_begin", data_error); |
|---|
| 330 | gb_pos_end = expectField(gb_gene, "pos_end", data_error); |
|---|
| 331 | |
|---|
| 332 | pos_uncertain_field = "pos_uncertain"; |
|---|
| 333 | } |
|---|
| 334 | else { |
|---|
| 335 | const char *pos_begin_field = GBS_global_string("pos_begin%i", p); |
|---|
| 336 | const char *pos_end_field = GBS_global_string("pos_end%i", p); |
|---|
| 337 | |
|---|
| 338 | gb_pos_begin = expectField(gb_gene, pos_begin_field, data_error); |
|---|
| 339 | gb_pos_end = expectField(gb_gene, pos_end_field, data_error); |
|---|
| 340 | |
|---|
| 341 | GBS_reuse_buffer(pos_end_field); |
|---|
| 342 | GBS_reuse_buffer(pos_begin_field); |
|---|
| 343 | |
|---|
| 344 | if (!data_error) pos_uncertain_field = GBS_global_string("pos_uncertain%i", p); |
|---|
| 345 | } |
|---|
| 346 | |
|---|
| 347 | int pospos = complement ? (parts-p) : (p-1); |
|---|
| 348 | |
|---|
| 349 | if (!data_error) { |
|---|
| 350 | GBDATA *gb_pos_uncertain = GB_entry(gb_gene, pos_uncertain_field); |
|---|
| 351 | |
|---|
| 352 | if (!gb_pos_uncertain) { |
|---|
| 353 | if (has_uncertain_fields) data_error = GBS_global_string("Expected field '%s' missing", pos_uncertain_field); |
|---|
| 354 | } |
|---|
| 355 | else { |
|---|
| 356 | if (p == 1) has_uncertain_fields = true; |
|---|
| 357 | else { |
|---|
| 358 | if (!has_uncertain_fields) { |
|---|
| 359 | data_error = GBS_global_string("Found '%s' as first certainty-information", pos_uncertain_field); |
|---|
| 360 | } |
|---|
| 361 | } |
|---|
| 362 | } |
|---|
| 363 | |
|---|
| 364 | if (!data_error) { |
|---|
| 365 | int begin = GB_read_int(gb_pos_begin); |
|---|
| 366 | int end = GB_read_int(gb_pos_end); |
|---|
| 367 | |
|---|
| 368 | pos->start_pos[pospos] = begin; |
|---|
| 369 | pos->stop_pos[pospos] = end; |
|---|
| 370 | pos->complement[pospos] = complement; // set all complement entries to same value (old format only had one complement entry) |
|---|
| 371 | |
|---|
| 372 | if (gb_pos_uncertain) { |
|---|
| 373 | const char *uncertain = GB_read_char_pntr(gb_pos_uncertain); |
|---|
| 374 | |
|---|
| 375 | if (!uncertain) error = GB_await_error(); |
|---|
| 376 | else { |
|---|
| 377 | if (!pos->start_uncertain) GEN_use_uncertainties(pos); |
|---|
| 378 | |
|---|
| 379 | if (strlen(uncertain) != 2) { |
|---|
| 380 | data_error = "wrong length"; |
|---|
| 381 | } |
|---|
| 382 | else { |
|---|
| 383 | for (int up = 0; up<2; up++) { |
|---|
| 384 | if (strchr("<=>", uncertain[up]) == 0) { |
|---|
| 385 | data_error = GBS_global_string("illegal character '%c'", uncertain[up]); |
|---|
| 386 | } |
|---|
| 387 | else { |
|---|
| 388 | (up == 0 ? pos->start_uncertain[pospos] : pos->stop_uncertain[pospos]) = uncertain[up]; |
|---|
| 389 | } |
|---|
| 390 | } |
|---|
| 391 | } |
|---|
| 392 | |
|---|
| 393 | |
|---|
| 394 | toDelete.push_back(gb_pos_uncertain); |
|---|
| 395 | } |
|---|
| 396 | } |
|---|
| 397 | |
|---|
| 398 | toDelete.push_back(gb_pos_begin); |
|---|
| 399 | toDelete.push_back(gb_pos_end); |
|---|
| 400 | } |
|---|
| 401 | } |
|---|
| 402 | } |
|---|
| 403 | |
|---|
| 404 | for (int p = parts+1; p <= parts+4 && !error && !data_error; ++p) { |
|---|
| 405 | disexpectField(gb_gene, GBS_global_string("pos_begin%i", p), data_error); |
|---|
| 406 | disexpectField(gb_gene, GBS_global_string("pos_end%i", p), data_error); |
|---|
| 407 | disexpectField(gb_gene, GBS_global_string("complement%i", p), data_error); |
|---|
| 408 | disexpectField(gb_gene, GBS_global_string("pos_uncertain%i", p), data_error); |
|---|
| 409 | } |
|---|
| 410 | |
|---|
| 411 | // now save new position data |
|---|
| 412 | |
|---|
| 413 | if (data_error) { |
|---|
| 414 | skipped_genes++; |
|---|
| 415 | } |
|---|
| 416 | else if (!error) { |
|---|
| 417 | error = GEN_write_position(gb_gene, pos, 0); |
|---|
| 418 | |
|---|
| 419 | if (!error) { |
|---|
| 420 | // delete old-format entries |
|---|
| 421 | GBvec::const_iterator end = toDelete.end(); |
|---|
| 422 | for (GBvec::const_iterator i = toDelete.begin(); i != end && !error; ++i) { |
|---|
| 423 | GBDATA *gb_del = *i; |
|---|
| 424 | error = GB_delete(gb_del); |
|---|
| 425 | } |
|---|
| 426 | |
|---|
| 427 | if (!error) fixed_genes++; |
|---|
| 428 | } |
|---|
| 429 | } |
|---|
| 430 | |
|---|
| 431 | toDelete.clear(); |
|---|
| 432 | GEN_free_position(pos); |
|---|
| 433 | |
|---|
| 434 | if (data_error || error) { |
|---|
| 435 | char *gene_id = GEN_global_gene_identifier(gb_gene, gb_organism); |
|---|
| 436 | if (error) { |
|---|
| 437 | error = GBS_global_string("Gene '%s': %s", gene_id, error); |
|---|
| 438 | } |
|---|
| 439 | else { |
|---|
| 440 | aw_message(GBS_global_string("Gene '%s' was not converted, fix data manually!\nReason: %s", gene_id, data_error)); |
|---|
| 441 | } |
|---|
| 442 | free(gene_id); |
|---|
| 443 | } |
|---|
| 444 | } |
|---|
| 445 | } |
|---|
| 446 | } |
|---|
| 447 | |
|---|
| 448 | progress.inc_and_check_user_abort(error); |
|---|
| 449 | } |
|---|
| 450 | |
|---|
| 451 | if (!error) { |
|---|
| 452 | if (fixed_genes>0) aw_message(GBS_global_string("Fixed location entries of %li genes.", fixed_genes)); |
|---|
| 453 | if (skipped_genes>0) { |
|---|
| 454 | aw_message(GBS_global_string("Didn't fix location entries of %li genes (see warnings).", skipped_genes)); |
|---|
| 455 | error = "Not all gene locations were fixed.\nFix manually, save DB and restart ARB with that DB.\nMake sure you have a backup copy of the original DB!"; |
|---|
| 456 | } |
|---|
| 457 | |
|---|
| 458 | if (fixed_genes || skipped_genes) { |
|---|
| 459 | long already_fixed_genes = genes-(fixed_genes+skipped_genes); |
|---|
| 460 | if (already_fixed_genes>0) aw_message(GBS_global_string("Location entries of %li genes already were in new format.", already_fixed_genes)); |
|---|
| 461 | } |
|---|
| 462 | } |
|---|
| 463 | |
|---|
| 464 | return error; |
|---|
| 465 | } |
|---|
| 466 | |
|---|
| 467 | |
|---|
| 468 | // -------------------------------------------------------------------------------- |
|---|
| 469 | |
|---|
| 470 | static GB_ERROR NT_del_mark_move_REF(GBDATA *gb_main, size_t species_count, size_t sai_count) { |
|---|
| 471 | GB_transaction ta(gb_main); |
|---|
| 472 | GB_ERROR error = 0; |
|---|
| 473 | size_t all = species_count+sai_count; |
|---|
| 474 | size_t removed = 0; |
|---|
| 475 | |
|---|
| 476 | // delete 'mark' entries from all alignments of species/SAIs |
|---|
| 477 | |
|---|
| 478 | arb_progress progress(all); |
|---|
| 479 | ConstStrArray ali_names; |
|---|
| 480 | GBT_get_alignment_names(ali_names, gb_main); |
|---|
| 481 | |
|---|
| 482 | for (int pass = 0; pass < 2 && !error; ++pass) { |
|---|
| 483 | for (GBDATA *gb_item = (pass == 0) ? GBT_first_species(gb_main) : GBT_first_SAI(gb_main); |
|---|
| 484 | gb_item && !error; |
|---|
| 485 | gb_item = (pass == 0) ? GBT_next_species(gb_item) : GBT_next_SAI(gb_item)) |
|---|
| 486 | { |
|---|
| 487 | for (int ali = 0; ali_names[ali] && !error; ++ali) { |
|---|
| 488 | GBDATA *gb_ali = GB_entry(gb_item, ali_names[ali]); |
|---|
| 489 | if (gb_ali) { |
|---|
| 490 | GBDATA *gb_mark = GB_entry(gb_ali, "mark"); |
|---|
| 491 | if (gb_mark) { |
|---|
| 492 | error = GB_delete(gb_mark); |
|---|
| 493 | removed++; |
|---|
| 494 | } |
|---|
| 495 | } |
|---|
| 496 | } |
|---|
| 497 | |
|---|
| 498 | progress.inc_and_check_user_abort(error); |
|---|
| 499 | } |
|---|
| 500 | } |
|---|
| 501 | |
|---|
| 502 | { |
|---|
| 503 | char *helix_name = GBT_get_default_helix(gb_main); |
|---|
| 504 | GBDATA *gb_helix = GBT_find_SAI(gb_main, helix_name); |
|---|
| 505 | |
|---|
| 506 | if (gb_helix) { |
|---|
| 507 | for (int ali = 0; ali_names[ali] && !error; ++ali) { |
|---|
| 508 | GBDATA *gb_ali = GB_entry(gb_helix, ali_names[ali]); |
|---|
| 509 | GBDATA *gb_old_ref = GB_entry(gb_ali, "REF"); |
|---|
| 510 | GBDATA *gb_new_ref = GB_entry(gb_ali, "_REF"); |
|---|
| 511 | |
|---|
| 512 | if (gb_old_ref) { |
|---|
| 513 | if (gb_new_ref) { |
|---|
| 514 | error = GBS_global_string("SAI:%s has 'REF' and '_REF' in '%s' (data corrupt?!)", |
|---|
| 515 | helix_name, ali_names[ali]); |
|---|
| 516 | } |
|---|
| 517 | else { // move info from REF -> _REF |
|---|
| 518 | char *content = GB_read_string(gb_old_ref); |
|---|
| 519 | if (!content) error = GB_await_error(); |
|---|
| 520 | else { |
|---|
| 521 | gb_new_ref = GB_create(gb_ali, "_REF", GB_STRING); |
|---|
| 522 | if (!gb_new_ref) error = GB_await_error(); |
|---|
| 523 | else { |
|---|
| 524 | error = GB_write_string(gb_new_ref, content); |
|---|
| 525 | if (!error) error = GB_delete(gb_old_ref); |
|---|
| 526 | } |
|---|
| 527 | free(content); |
|---|
| 528 | } |
|---|
| 529 | } |
|---|
| 530 | } |
|---|
| 531 | } |
|---|
| 532 | } |
|---|
| 533 | |
|---|
| 534 | free(helix_name); |
|---|
| 535 | } |
|---|
| 536 | |
|---|
| 537 | if (!error) { |
|---|
| 538 | if (removed) { |
|---|
| 539 | aw_message(GBS_global_string("Deleted %zu useless 'mark' entries.", removed)); |
|---|
| 540 | } |
|---|
| 541 | } |
|---|
| 542 | |
|---|
| 543 | return ta.close(error); |
|---|
| 544 | } |
|---|
| 545 | |
|---|
| 546 | // -------------------------------------------------------------------------------- |
|---|
| 547 | |
|---|
| 548 | static bool testDictionaryCompression(GBDATA *gbd, GBQUARK key_quark, bool testUse) { |
|---|
| 549 | // returns true, if |
|---|
| 550 | // testUse == true and ANY entries below 'gbd' with quark 'key_quark' uses dictionary compression |
|---|
| 551 | // testUse == false and ALL entries below 'gbd' with quark 'key_quark' can be decompressed w/o errors |
|---|
| 552 | |
|---|
| 553 | nt_assert(GB_read_type(gbd) == GB_DB); |
|---|
| 554 | |
|---|
| 555 | for (GBDATA *gb_sub = GB_child(gbd); gb_sub; gb_sub = GB_nextChild(gb_sub)) { |
|---|
| 556 | switch (GB_read_type(gb_sub)) { |
|---|
| 557 | case GB_DB: |
|---|
| 558 | // return false if any compression failed or return true if any uses dict-compression |
|---|
| 559 | if (testDictionaryCompression(gb_sub, key_quark, testUse) == testUse) return testUse; |
|---|
| 560 | break; |
|---|
| 561 | |
|---|
| 562 | case GB_STRING: |
|---|
| 563 | case GB_LINK: |
|---|
| 564 | if (GB_get_quark(gb_sub) == key_quark && GB_is_dictionary_compressed(gb_sub)) { |
|---|
| 565 | if (testUse) return true; |
|---|
| 566 | |
|---|
| 567 | const char *decompressed = GB_read_char_pntr(gb_sub); |
|---|
| 568 | if (!decompressed) return false; |
|---|
| 569 | } |
|---|
| 570 | break; |
|---|
| 571 | |
|---|
| 572 | default: |
|---|
| 573 | break; |
|---|
| 574 | } |
|---|
| 575 | } |
|---|
| 576 | |
|---|
| 577 | return !testUse; |
|---|
| 578 | } |
|---|
| 579 | |
|---|
| 580 | class Dict; |
|---|
| 581 | typedef SmartPtr<Dict> DictPtr; |
|---|
| 582 | |
|---|
| 583 | |
|---|
| 584 | class KeyInfo : virtual Noncopyable { |
|---|
| 585 | string name; // keyname |
|---|
| 586 | DictPtr original; |
|---|
| 587 | |
|---|
| 588 | bool compressionTested; |
|---|
| 589 | bool compressed; |
|---|
| 590 | |
|---|
| 591 | void init() { |
|---|
| 592 | compressionTested = false; |
|---|
| 593 | compressed = false; |
|---|
| 594 | } |
|---|
| 595 | |
|---|
| 596 | public: |
|---|
| 597 | KeyInfo(const char *Name) : name(Name) { init(); } |
|---|
| 598 | KeyInfo(const char *Name, DictPtr originalDict) : name(Name), original(originalDict) { init(); } |
|---|
| 599 | |
|---|
| 600 | void testCompressed(GBDATA *gb_main) { |
|---|
| 601 | nt_assert(!compressionTested); |
|---|
| 602 | compressed = testDictionaryCompression(gb_main, GB_find_or_create_quark(gb_main, name.c_str()), true); |
|---|
| 603 | compressionTested = true; |
|---|
| 604 | } |
|---|
| 605 | |
|---|
| 606 | const string& getName() const { return name; } |
|---|
| 607 | |
|---|
| 608 | bool isCompressed() const { |
|---|
| 609 | nt_assert(compressionTested); |
|---|
| 610 | return compressed; |
|---|
| 611 | } |
|---|
| 612 | }; |
|---|
| 613 | |
|---|
| 614 | |
|---|
| 615 | class Dict : virtual Noncopyable { |
|---|
| 616 | string group; // lowercase keyname |
|---|
| 617 | string orgkey; |
|---|
| 618 | DictData *data; |
|---|
| 619 | |
|---|
| 620 | map<string, bool> decompressWorks; // key -> bool |
|---|
| 621 | |
|---|
| 622 | public: |
|---|
| 623 | static GBDATA *gb_main; |
|---|
| 624 | |
|---|
| 625 | Dict(const char *Group, const char *OrgKey, DictData *Data) : group(Group), orgkey(OrgKey), data(Data) {} |
|---|
| 626 | |
|---|
| 627 | const string& getGroup() const { return group; } |
|---|
| 628 | const string& getOriginalKey() const { return orgkey; } |
|---|
| 629 | |
|---|
| 630 | bool mayBeUsedWith(const string& key) const { return strcasecmp(group.c_str(), key.c_str()) == 0; } |
|---|
| 631 | |
|---|
| 632 | GB_ERROR assignToKey(const string& key) const { return GB_set_dictionary(gb_main, key.c_str(), data); } |
|---|
| 633 | GB_ERROR unassignFromKey(const string& key) const { return GB_set_dictionary(gb_main, key.c_str(), NULL); } |
|---|
| 634 | |
|---|
| 635 | bool canDecompress(const string& key) { |
|---|
| 636 | nt_assert(mayBeUsedWith(key)); |
|---|
| 637 | if (decompressWorks.find(key) == decompressWorks.end()) { |
|---|
| 638 | bool works = false; |
|---|
| 639 | GB_ERROR error = assignToKey(key); |
|---|
| 640 | |
|---|
| 641 | if (!error) works = testDictionaryCompression(gb_main, GB_find_or_create_quark(gb_main, key.c_str()), false); |
|---|
| 642 | decompressWorks[key] = works; |
|---|
| 643 | |
|---|
| 644 | GB_ERROR err2 = unassignFromKey(key); |
|---|
| 645 | if (err2) { |
|---|
| 646 | aw_message(GBS_global_string("Error while removing @dictionary from key '%s': %s", key.c_str(), err2)); |
|---|
| 647 | } |
|---|
| 648 | } |
|---|
| 649 | return decompressWorks[key]; |
|---|
| 650 | } |
|---|
| 651 | }; |
|---|
| 652 | GBDATA *Dict::gb_main = NULL; |
|---|
| 653 | |
|---|
| 654 | |
|---|
| 655 | typedef map<string, int> KeyCounter; // groupname -> occur count |
|---|
| 656 | typedef SmartPtr<KeyInfo> KeyInfoPtr; |
|---|
| 657 | typedef map<string, KeyInfoPtr> Keys; // keyname -> info |
|---|
| 658 | typedef map<string, DictPtr> DictMap; |
|---|
| 659 | typedef vector<DictPtr> Dicts; |
|---|
| 660 | typedef set<string> StringSet; |
|---|
| 661 | |
|---|
| 662 | #define STATUS_PREFIX "Dictionary: " |
|---|
| 663 | |
|---|
| 664 | template<typename CONT, typename KEY> |
|---|
| 665 | bool contains(const CONT& container, const KEY& key) { |
|---|
| 666 | return container.find(key) != container.end(); |
|---|
| 667 | } |
|---|
| 668 | |
|---|
| 669 | static GB_ERROR findAffectedKeys(GBDATA *gb_key_data, KeyCounter& kcount, Keys& keys, Dicts& dicts) { |
|---|
| 670 | GB_ERROR error = 0; |
|---|
| 671 | GBDATA *gb_main = GB_get_root(gb_key_data); |
|---|
| 672 | |
|---|
| 673 | for (int pass = 1; pass <= 2; ++pass) { |
|---|
| 674 | for (GBDATA *gb_key = GB_entry(gb_key_data, "@key"); !error && gb_key; gb_key = GB_nextEntry(gb_key)) { |
|---|
| 675 | GBDATA *gb_name = GB_entry(gb_key, "@name"); |
|---|
| 676 | const char *keyName = GB_read_char_pntr(gb_name); |
|---|
| 677 | |
|---|
| 678 | if (!keyName) { |
|---|
| 679 | error = GBS_global_string("@key w/o @name (%s)", GB_await_error()); |
|---|
| 680 | } |
|---|
| 681 | else { |
|---|
| 682 | char *keyGroup = strdup(keyName); |
|---|
| 683 | ARB_strlower(keyGroup); |
|---|
| 684 | |
|---|
| 685 | switch (pass) { |
|---|
| 686 | case 1: |
|---|
| 687 | kcount[keyGroup]++; |
|---|
| 688 | break; |
|---|
| 689 | case 2: |
|---|
| 690 | if (kcount[keyGroup]>1) { |
|---|
| 691 | GBDATA *gb_dictionary = GB_entry(gb_key, "@dictionary"); |
|---|
| 692 | if (gb_dictionary) { |
|---|
| 693 | DictPtr dict = new Dict(keyGroup, keyName, GB_get_dictionary(gb_main, keyName)); |
|---|
| 694 | keys[keyName] = new KeyInfo(keyName, dict); |
|---|
| 695 | dicts.push_back(dict); |
|---|
| 696 | } |
|---|
| 697 | else keys[keyName] = new KeyInfo(keyName); |
|---|
| 698 | } |
|---|
| 699 | else kcount.erase(keyGroup); |
|---|
| 700 | break; |
|---|
| 701 | } |
|---|
| 702 | free(keyGroup); |
|---|
| 703 | } |
|---|
| 704 | } |
|---|
| 705 | } |
|---|
| 706 | return error; |
|---|
| 707 | } |
|---|
| 708 | |
|---|
| 709 | static GB_ERROR deleteDataOfKey(GBDATA *gbd, GBQUARK key_quark, StringSet& deletedData, long& deleted, long& notDeleted) { |
|---|
| 710 | GB_ERROR error = 0; |
|---|
| 711 | for (GBDATA *gb_sub = GB_child(gbd); gb_sub; gb_sub = GB_nextChild(gb_sub)) { |
|---|
| 712 | switch (GB_read_type(gb_sub)) { |
|---|
| 713 | case GB_DB: |
|---|
| 714 | error = deleteDataOfKey(gb_sub, key_quark, deletedData, deleted, notDeleted); |
|---|
| 715 | break; |
|---|
| 716 | |
|---|
| 717 | case GB_STRING: |
|---|
| 718 | case GB_LINK: |
|---|
| 719 | if (GB_get_quark(gb_sub) == key_quark) { |
|---|
| 720 | if (GB_is_dictionary_compressed(gb_sub)) { |
|---|
| 721 | string path(GB_get_db_path(gb_sub)); |
|---|
| 722 | error = GB_delete(gb_sub); |
|---|
| 723 | if (!error) { |
|---|
| 724 | deletedData.insert(path); |
|---|
| 725 | deleted++; |
|---|
| 726 | } |
|---|
| 727 | } |
|---|
| 728 | else { |
|---|
| 729 | notDeleted++; |
|---|
| 730 | } |
|---|
| 731 | } |
|---|
| 732 | break; |
|---|
| 733 | default: |
|---|
| 734 | break; |
|---|
| 735 | } |
|---|
| 736 | } |
|---|
| 737 | return error; |
|---|
| 738 | } |
|---|
| 739 | |
|---|
| 740 | static char *readFirstCompressedDataOf(GBDATA *gbd, GBQUARK key_quark) { |
|---|
| 741 | char *data = 0; |
|---|
| 742 | for (GBDATA *gb_sub = GB_child(gbd); !data && gb_sub; gb_sub = GB_nextChild(gb_sub)) { |
|---|
| 743 | switch (GB_read_type(gb_sub)) { |
|---|
| 744 | case GB_DB: |
|---|
| 745 | data = readFirstCompressedDataOf(gb_sub, key_quark); |
|---|
| 746 | break; |
|---|
| 747 | |
|---|
| 748 | case GB_STRING: |
|---|
| 749 | case GB_LINK: |
|---|
| 750 | if (GB_get_quark(gb_sub) == key_quark) { |
|---|
| 751 | if (GB_is_dictionary_compressed(gb_sub)) { |
|---|
| 752 | data = GB_read_as_string(gb_sub); |
|---|
| 753 | } |
|---|
| 754 | } |
|---|
| 755 | break; |
|---|
| 756 | default: |
|---|
| 757 | break; |
|---|
| 758 | } |
|---|
| 759 | } |
|---|
| 760 | return data; |
|---|
| 761 | } |
|---|
| 762 | |
|---|
| 763 | |
|---|
| 764 | static GB_ERROR NT_fix_dict_compress(GBDATA *gb_main, size_t, size_t) { |
|---|
| 765 | GB_transaction ta(gb_main); |
|---|
| 766 | GBDATA *gb_key_data = GB_search(gb_main, GB_SYSTEM_FOLDER "/" GB_SYSTEM_KEY_DATA, GB_FIND); |
|---|
| 767 | GB_ERROR error = 0; |
|---|
| 768 | |
|---|
| 769 | Dict::gb_main = gb_main; |
|---|
| 770 | |
|---|
| 771 | if (!gb_key_data) { |
|---|
| 772 | error = "No " GB_SYSTEM_KEY_DATA " found.. DB corrupted?"; |
|---|
| 773 | } |
|---|
| 774 | else { |
|---|
| 775 | KeyCounter kcount; // strlwr(keyname) -> count |
|---|
| 776 | Keys keys; |
|---|
| 777 | Dicts dicts; |
|---|
| 778 | |
|---|
| 779 | error = findAffectedKeys(gb_key_data, kcount, keys, dicts); |
|---|
| 780 | |
|---|
| 781 | // count affected keys |
|---|
| 782 | int affectedKeys = 0; |
|---|
| 783 | for (KeyCounter::iterator kci = kcount.begin(); kci != kcount.end(); ++kci) { |
|---|
| 784 | affectedKeys += kci->second; |
|---|
| 785 | } |
|---|
| 786 | |
|---|
| 787 | if (!error && affectedKeys>0) { |
|---|
| 788 | // check which keys are compressed |
|---|
| 789 | |
|---|
| 790 | { |
|---|
| 791 | arb_progress progress(STATUS_PREFIX "search compressed data", affectedKeys); |
|---|
| 792 | |
|---|
| 793 | for (Keys::iterator ki = keys.begin(); ki != keys.end(); ++ki) { |
|---|
| 794 | KeyInfoPtr k = ki->second; |
|---|
| 795 | k->testCompressed(gb_main); |
|---|
| 796 | ++progress; |
|---|
| 797 | } |
|---|
| 798 | } |
|---|
| 799 | |
|---|
| 800 | // test which key/dict combinations work |
|---|
| 801 | int combinations = 0; // possible key/dict combinations |
|---|
| 802 | |
|---|
| 803 | DictMap use; // keyname -> dictionary (which dictionary to use) |
|---|
| 804 | StringSet multiDecompressible; // keys which can be decompressed with multiple dictionaries |
|---|
| 805 | |
|---|
| 806 | for (int pass = 1; pass <= 2; ++pass) { |
|---|
| 807 | arb_progress *progress = NULL; |
|---|
| 808 | if (pass == 2 && combinations) progress = new arb_progress(STATUS_PREFIX "test compression", combinations); |
|---|
| 809 | |
|---|
| 810 | for (Dicts::iterator di = dicts.begin(); di != dicts.end(); ++di) { |
|---|
| 811 | DictPtr d = *di; |
|---|
| 812 | |
|---|
| 813 | for (Keys::iterator ki = keys.begin(); ki != keys.end(); ++ki) { |
|---|
| 814 | KeyInfoPtr k = ki->second; |
|---|
| 815 | const string& keyname = k->getName(); |
|---|
| 816 | |
|---|
| 817 | if (k->isCompressed() && d->mayBeUsedWith(keyname)) { |
|---|
| 818 | switch (pass) { |
|---|
| 819 | case 1: |
|---|
| 820 | combinations++; |
|---|
| 821 | break; |
|---|
| 822 | case 2: |
|---|
| 823 | if (d->canDecompress(keyname)) { |
|---|
| 824 | if (!contains(use, keyname)) { // first dictionary working with keyname |
|---|
| 825 | use[keyname] = d; |
|---|
| 826 | } |
|---|
| 827 | else { // already have another dictionary working with keyname |
|---|
| 828 | multiDecompressible.insert(keyname); |
|---|
| 829 | } |
|---|
| 830 | } |
|---|
| 831 | ++(*progress); |
|---|
| 832 | break; |
|---|
| 833 | } |
|---|
| 834 | } |
|---|
| 835 | } |
|---|
| 836 | } |
|---|
| 837 | delete progress; |
|---|
| 838 | } |
|---|
| 839 | |
|---|
| 840 | StringSet notDecompressible; // keys which can be decompressed with none of the dictionaries |
|---|
| 841 | for (Keys::iterator ki = keys.begin(); ki != keys.end(); ++ki) { |
|---|
| 842 | KeyInfoPtr k = ki->second; |
|---|
| 843 | const string& keyname = k->getName(); |
|---|
| 844 | |
|---|
| 845 | if (k->isCompressed()) { |
|---|
| 846 | if (!contains(use, keyname)) notDecompressible.insert(keyname); |
|---|
| 847 | if (contains(multiDecompressible, keyname)) use.erase(keyname); |
|---|
| 848 | } |
|---|
| 849 | } |
|---|
| 850 | |
|---|
| 851 | bool dataLost = false; |
|---|
| 852 | int reassigned = 0; |
|---|
| 853 | |
|---|
| 854 | if (!notDecompressible.empty()) { |
|---|
| 855 | // bad .. found undecompressible data |
|---|
| 856 | int nd_count = notDecompressible.size(); |
|---|
| 857 | aw_message(GBS_global_string("Detected corrupted dictionary compression\n" |
|---|
| 858 | "Data of %i DB-keys is lost and will be deleted", nd_count)); |
|---|
| 859 | |
|---|
| 860 | arb_progress progress(STATUS_PREFIX "deleting corrupt data", nd_count); |
|---|
| 861 | |
|---|
| 862 | StringSet deletedData; |
|---|
| 863 | long deleted = 0; |
|---|
| 864 | long notDeleted = 0; |
|---|
| 865 | |
|---|
| 866 | for (StringSet::iterator ki = notDecompressible.begin(); !error && ki != notDecompressible.end(); ++ki) { |
|---|
| 867 | const string& keyname = *ki; |
|---|
| 868 | |
|---|
| 869 | error = deleteDataOfKey(gb_main, GB_find_or_create_quark(gb_main, keyname.c_str()), deletedData, deleted, notDeleted); |
|---|
| 870 | ++progress; |
|---|
| 871 | } |
|---|
| 872 | |
|---|
| 873 | if (!error) { |
|---|
| 874 | nt_assert(deleted); // at least 1 db-entry should have been deleted |
|---|
| 875 | |
|---|
| 876 | aw_message(GBS_global_string("Deleted %li of %li affected DB-entries", deleted, deleted+notDeleted)); |
|---|
| 877 | aw_message("see console for a list of affected keys"); |
|---|
| 878 | |
|---|
| 879 | printf("Deleted keys:\n"); |
|---|
| 880 | for (StringSet::iterator di = deletedData.begin(); di != deletedData.end(); ++di) { |
|---|
| 881 | printf("* %s\n", di->c_str()); |
|---|
| 882 | } |
|---|
| 883 | } |
|---|
| 884 | } |
|---|
| 885 | |
|---|
| 886 | if (!error && !multiDecompressible.empty()) { |
|---|
| 887 | for (StringSet::iterator ki = multiDecompressible.begin(); !error && ki != multiDecompressible.end(); ++ki) { |
|---|
| 888 | const string& keyname = *ki; |
|---|
| 889 | int possible = 0; |
|---|
| 890 | vector<DictPtr> possibleDicts; |
|---|
| 891 | |
|---|
| 892 | printf("--------------------------------------------------------------------------------\n"); |
|---|
| 893 | |
|---|
| 894 | for (Dicts::iterator di = dicts.begin(); !error && di != dicts.end(); ++di) { |
|---|
| 895 | DictPtr d = *di; |
|---|
| 896 | if (d->mayBeUsedWith(keyname) && d->canDecompress(keyname)) { |
|---|
| 897 | error = d->assignToKey(keyname); |
|---|
| 898 | if (!error) { |
|---|
| 899 | char *data = readFirstCompressedDataOf(gb_main, GB_find_or_create_quark(gb_main, keyname.c_str())); |
|---|
| 900 | |
|---|
| 901 | nt_assert(data); |
|---|
| 902 | possible++; |
|---|
| 903 | printf("possibility %i = '%s'\n", possible, data); |
|---|
| 904 | free(data); |
|---|
| 905 | |
|---|
| 906 | possibleDicts.push_back(d); |
|---|
| 907 | |
|---|
| 908 | error = d->unassignFromKey(keyname); |
|---|
| 909 | } |
|---|
| 910 | } |
|---|
| 911 | } |
|---|
| 912 | |
|---|
| 913 | if (!error) { |
|---|
| 914 | nt_assert(possible>0); |
|---|
| 915 | |
|---|
| 916 | int selected; |
|---|
| 917 | if (possible>1) { |
|---|
| 918 | char *question = GBS_global_string_copy("%i possibilities to decompress field '%s' have been detected\n" |
|---|
| 919 | "and example data was dumped to the console.\n" |
|---|
| 920 | "Please examine output and decide which is the correct possibility!", |
|---|
| 921 | possible, keyname.c_str()); |
|---|
| 922 | |
|---|
| 923 | const char *buttons = "Abort"; |
|---|
| 924 | for (int p = 1; p <= possible; ++p) buttons = GBS_global_string("%s,%i", buttons, p); |
|---|
| 925 | selected = aw_question("dict_decompress_bug", question, buttons, false, NULL); |
|---|
| 926 | free(question); |
|---|
| 927 | } |
|---|
| 928 | else { |
|---|
| 929 | selected = 1; |
|---|
| 930 | } |
|---|
| 931 | |
|---|
| 932 | if (!selected) { |
|---|
| 933 | error = "Aborted by user"; |
|---|
| 934 | } |
|---|
| 935 | else { |
|---|
| 936 | use[keyname] = possibleDicts[selected-1]; |
|---|
| 937 | } |
|---|
| 938 | } |
|---|
| 939 | } |
|---|
| 940 | } |
|---|
| 941 | |
|---|
| 942 | // now all redundancies should be eliminated and we can assign dictionaries to affected keys |
|---|
| 943 | if (!error) { |
|---|
| 944 | for (Keys::iterator ki = keys.begin(); !error && ki != keys.end(); ++ki) { |
|---|
| 945 | KeyInfoPtr k = ki->second; |
|---|
| 946 | const string& keyname = k->getName(); |
|---|
| 947 | |
|---|
| 948 | if (k->isCompressed()) { |
|---|
| 949 | if (!contains(use, keyname)) { |
|---|
| 950 | error = GBS_global_string("No dictionary detected for key '%s'", keyname.c_str()); |
|---|
| 951 | } |
|---|
| 952 | else { |
|---|
| 953 | DictPtr d = use[keyname]; |
|---|
| 954 | |
|---|
| 955 | if (d->getOriginalKey() != keyname) { |
|---|
| 956 | d->assignToKey(keyname); // set the dictionary |
|---|
| 957 | aw_message(GBS_global_string("Assigning '%s'-dictionary to '%s'", |
|---|
| 958 | d->getOriginalKey().c_str(), keyname.c_str())); |
|---|
| 959 | reassigned++; |
|---|
| 960 | } |
|---|
| 961 | } |
|---|
| 962 | } |
|---|
| 963 | } |
|---|
| 964 | } |
|---|
| 965 | |
|---|
| 966 | if (dataLost||reassigned) { |
|---|
| 967 | aw_message(dataLost |
|---|
| 968 | ? "We apologize for the data-loss." |
|---|
| 969 | : "No conflicts detected in compressed data."); |
|---|
| 970 | aw_message("Dictionaries fixed.\n" |
|---|
| 971 | "Please save your database with a new name."); |
|---|
| 972 | } |
|---|
| 973 | } |
|---|
| 974 | } |
|---|
| 975 | |
|---|
| 976 | Dict::gb_main = NULL; |
|---|
| 977 | return ta.close(error); |
|---|
| 978 | } |
|---|
| 979 | |
|---|
| 980 | // -------------------------------------------------------------------------------- |
|---|
| 981 | |
|---|
| 982 | static GB_ERROR remove_dup_colors(GBDATA *gb_item, ItemSelector& IF_DEBUG(sel)) { |
|---|
| 983 | // Databases out there may contain multiple 'ARB_color' entries. |
|---|
| 984 | // Due to some already fixed bug - maybe introduced in r5309 and fixed in r5825 |
|---|
| 985 | |
|---|
| 986 | GBDATA *gb_color = GB_entry(gb_item, AW_COLOR_GROUP_ENTRY); |
|---|
| 987 | GB_ERROR error = NULL; |
|---|
| 988 | |
|---|
| 989 | #if defined(DEBUG) |
|---|
| 990 | int del_count = 0; |
|---|
| 991 | #endif // DEBUG |
|---|
| 992 | |
|---|
| 993 | if (gb_color) { |
|---|
| 994 | GB_push_my_security(gb_color); |
|---|
| 995 | while (!error) { |
|---|
| 996 | GBDATA *gb_next_color = GB_nextEntry(gb_color); |
|---|
| 997 | if (!gb_next_color) break; |
|---|
| 998 | |
|---|
| 999 | error = GB_delete(gb_next_color); |
|---|
| 1000 | #if defined(DEBUG) |
|---|
| 1001 | if (!error) del_count++; |
|---|
| 1002 | #endif // DEBUG |
|---|
| 1003 | } |
|---|
| 1004 | GB_pop_my_security(gb_color); |
|---|
| 1005 | } |
|---|
| 1006 | |
|---|
| 1007 | #if defined(DEBUG) |
|---|
| 1008 | if (del_count) fprintf(stderr, |
|---|
| 1009 | "- deleted %i duplicated '" AW_COLOR_GROUP_ENTRY "' from %s '%s'\n", |
|---|
| 1010 | del_count, |
|---|
| 1011 | sel.item_name, |
|---|
| 1012 | sel.generate_item_id(GB_get_root(gb_item), gb_item)); |
|---|
| 1013 | #endif // DEBUG |
|---|
| 1014 | |
|---|
| 1015 | return error; |
|---|
| 1016 | } |
|---|
| 1017 | |
|---|
| 1018 | // -------------------------------------------------------------------------------- |
|---|
| 1019 | |
|---|
| 1020 | GB_ERROR NT_repair_DB(GBDATA *gb_main) { |
|---|
| 1021 | // status is already open and will be closed by caller! |
|---|
| 1022 | |
|---|
| 1023 | CheckedConsistencies check(gb_main); |
|---|
| 1024 | GB_ERROR err = 0; |
|---|
| 1025 | bool is_genome_db; |
|---|
| 1026 | { |
|---|
| 1027 | GB_transaction ta(gb_main); |
|---|
| 1028 | is_genome_db = GEN_is_genome_db(gb_main, -1); |
|---|
| 1029 | } |
|---|
| 1030 | |
|---|
| 1031 | check.perform_check("fix gene_data", NT_fix_gene_data, err); |
|---|
| 1032 | check.perform_check("fix_dict_compress", NT_fix_dict_compress, err); // do this before NT_del_mark_move_REF (cause 'REF' is affected) |
|---|
| 1033 | check.perform_check("del_mark_move_REF", NT_del_mark_move_REF, err); |
|---|
| 1034 | |
|---|
| 1035 | if (is_genome_db) { |
|---|
| 1036 | check.perform_check("convert_gene_locations", NT_convert_gene_locations, err); |
|---|
| 1037 | } |
|---|
| 1038 | |
|---|
| 1039 | check.register_item_check("duplicated_item_colors", remove_dup_colors); |
|---|
| 1040 | check.perform_item_checks(err); |
|---|
| 1041 | |
|---|
| 1042 | return err; |
|---|
| 1043 | } |
|---|
| 1044 | |
|---|
| 1045 | void NT_rerepair_DB(AW_window*, AW_CL cl_gbmain, AW_CL) { |
|---|
| 1046 | // re-perform all DB checks |
|---|
| 1047 | GBDATA *gb_main = reinterpret_cast<GBDATA*>(cl_gbmain); |
|---|
| 1048 | GB_ERROR err = 0; |
|---|
| 1049 | { |
|---|
| 1050 | CheckedConsistencies check(gb_main); |
|---|
| 1051 | err = check.forgetDoneChecks(); |
|---|
| 1052 | } |
|---|
| 1053 | if (!err) { |
|---|
| 1054 | arb_progress progress("DB-Repair"); |
|---|
| 1055 | err = NT_repair_DB(gb_main); |
|---|
| 1056 | } |
|---|
| 1057 | |
|---|
| 1058 | if (err) aw_message(err); |
|---|
| 1059 | } |
|---|
| 1060 | |
|---|
| 1061 | |
|---|