| 1 | // ================================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : ptclean.cxx // |
|---|
| 4 | // Purpose : prepare db for ptserver/ptpan // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in September 2011 // |
|---|
| 7 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 8 | // http://www.arb-home.de/ // |
|---|
| 9 | // // |
|---|
| 10 | // ================================================================= // |
|---|
| 11 | |
|---|
| 12 | #include "ptclean.h" |
|---|
| 13 | #include <arbdbt.h> |
|---|
| 14 | #include <arb_progress.h> |
|---|
| 15 | #include <arb_diff.h> |
|---|
| 16 | |
|---|
| 17 | #define pt_assert(cond) arb_assert(cond) |
|---|
| 18 | |
|---|
| 19 | class EntryRemover : virtual Noncopyable { |
|---|
| 20 | // deletes all entries from DB, which are of no use for PTSERVER |
|---|
| 21 | |
|---|
| 22 | GBDATA *gb_main; |
|---|
| 23 | GB_transaction ta; |
|---|
| 24 | char *ali_name; |
|---|
| 25 | |
|---|
| 26 | mutable arb_progress progress; |
|---|
| 27 | |
|---|
| 28 | enum Need { |
|---|
| 29 | NONE, |
|---|
| 30 | ALL, |
|---|
| 31 | SOME_OF_ROOT, |
|---|
| 32 | SOME_OF_SPECIES_DATA, |
|---|
| 33 | SOME_OF_EXTENDED_DATA, |
|---|
| 34 | SOME_OF_SPECIES, |
|---|
| 35 | SOME_OF_EXTENDED, |
|---|
| 36 | SOME_OF_ALI_CONTAINER, // below species or SAI |
|---|
| 37 | }; |
|---|
| 38 | |
|---|
| 39 | Need data_needed(GBDATA *gbd, const char *keyname, Need from) const { |
|---|
| 40 | switch (from) { |
|---|
| 41 | case SOME_OF_ROOT: |
|---|
| 42 | if (strcmp(keyname, GB_SYSTEM_FOLDER)== 0) return ALL; |
|---|
| 43 | if (strcmp(keyname, "genom_db") == 0) return ALL; |
|---|
| 44 | if (strcmp(keyname, "gene_map") == 0) return ALL; |
|---|
| 45 | if (strcmp(keyname, "species_data") == 0) return SOME_OF_SPECIES_DATA; |
|---|
| 46 | if (strcmp(keyname, "extended_data") == 0) return SOME_OF_EXTENDED_DATA; |
|---|
| 47 | if (strcmp(keyname, "presets") == 0) return ALL; |
|---|
| 48 | break; |
|---|
| 49 | |
|---|
| 50 | case SOME_OF_SPECIES_DATA: |
|---|
| 51 | if (strcmp(keyname, "species") == 0) { |
|---|
| 52 | progress.inc(); |
|---|
| 53 | return SOME_OF_SPECIES; |
|---|
| 54 | } |
|---|
| 55 | break; |
|---|
| 56 | |
|---|
| 57 | case SOME_OF_EXTENDED_DATA: |
|---|
| 58 | if (strcmp(keyname, "extended") == 0) { |
|---|
| 59 | const char *sainame = GBT_get_name_or_description(gbd); |
|---|
| 60 | if (strcmp(sainame, "ECOLI") == 0) return SOME_OF_EXTENDED; |
|---|
| 61 | if (strcmp(sainame, "HELIX") == 0) return SOME_OF_EXTENDED; |
|---|
| 62 | if (strcmp(sainame, "HELIX_NR") == 0) return SOME_OF_EXTENDED; |
|---|
| 63 | } |
|---|
| 64 | break; |
|---|
| 65 | |
|---|
| 66 | case SOME_OF_SPECIES: |
|---|
| 67 | if (strcmp(keyname, "abspos") == 0) return ALL; |
|---|
| 68 | if (strcmp(keyname, "start") == 0) return ALL; |
|---|
| 69 | if (strcmp(keyname, "stop") == 0) return ALL; |
|---|
| 70 | FALLTHROUGH; |
|---|
| 71 | |
|---|
| 72 | case SOME_OF_EXTENDED: |
|---|
| 73 | if (strcmp(keyname, "name") == 0) return ALL; |
|---|
| 74 | if (strcmp(keyname, "acc") == 0) return ALL; |
|---|
| 75 | if (strcmp(keyname, "full_name") == 0) return ALL; |
|---|
| 76 | if (strcmp(keyname, ali_name) == 0) return SOME_OF_ALI_CONTAINER; |
|---|
| 77 | break; |
|---|
| 78 | |
|---|
| 79 | case SOME_OF_ALI_CONTAINER: |
|---|
| 80 | if (strcmp(keyname, "data") == 0) return ALL; |
|---|
| 81 | break; |
|---|
| 82 | |
|---|
| 83 | case NONE: pt_assert(0); break; |
|---|
| 84 | case ALL: pt_assert(0); break; |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | return NONE; |
|---|
| 88 | } |
|---|
| 89 | |
|---|
| 90 | GB_ERROR del_child(GBDATA *gb_entry, const char *keyname, Need from) { |
|---|
| 91 | GB_ERROR error = NULp; |
|---|
| 92 | Need need = data_needed(gb_entry, keyname, from); |
|---|
| 93 | switch (need) { |
|---|
| 94 | case NONE: error = GB_delete(gb_entry); break; |
|---|
| 95 | case ALL: break; |
|---|
| 96 | default: error = del_subentries(gb_entry, need); break; |
|---|
| 97 | } |
|---|
| 98 | return error; |
|---|
| 99 | } |
|---|
| 100 | GB_ERROR del_subentries(GBDATA *gb_father, Need from) { |
|---|
| 101 | GB_ERROR error = NULp; |
|---|
| 102 | if (!GB_is_temporary(gb_father)) { |
|---|
| 103 | GBDATA *gb_next_child = NULp; |
|---|
| 104 | for (GBDATA *gb_child = GB_child(gb_father); gb_child && !error; gb_child = gb_next_child) { |
|---|
| 105 | gb_next_child = GB_nextChild(gb_child); |
|---|
| 106 | const char *key = GB_read_key_pntr(gb_child); |
|---|
| 107 | error = del_child(gb_child, key, from); |
|---|
| 108 | } |
|---|
| 109 | } |
|---|
| 110 | return error; |
|---|
| 111 | } |
|---|
| 112 | |
|---|
| 113 | public: |
|---|
| 114 | EntryRemover(GBDATA *gb_main_) |
|---|
| 115 | : gb_main(gb_main_), |
|---|
| 116 | ta(gb_main), |
|---|
| 117 | ali_name(GBT_get_default_alignment(gb_main)), |
|---|
| 118 | progress("Remove unused database entries", GBT_get_species_count(gb_main)) |
|---|
| 119 | {} |
|---|
| 120 | ~EntryRemover() { |
|---|
| 121 | free(ali_name); |
|---|
| 122 | progress.done(); |
|---|
| 123 | } |
|---|
| 124 | |
|---|
| 125 | GB_ERROR del_unwanted_entries() { |
|---|
| 126 | GB_ERROR error; |
|---|
| 127 | if (!ali_name) error = GB_await_error(); // (should) handle error from GBT_get_default_alignment called in ctor |
|---|
| 128 | else error = del_subentries(gb_main, SOME_OF_ROOT); |
|---|
| 129 | return error; |
|---|
| 130 | } |
|---|
| 131 | }; |
|---|
| 132 | |
|---|
| 133 | inline GB_ERROR clean_ptserver_database(GBDATA *gb_main) { |
|---|
| 134 | return EntryRemover(gb_main).del_unwanted_entries(); |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | GB_ERROR cleanup_ptserver_database(GBDATA *gb_main) { |
|---|
| 138 | GB_ERROR error = GB_request_undo_type(gb_main, GB_UNDO_NONE); |
|---|
| 139 | if (!error) { |
|---|
| 140 | GB_topSecurityLevel unsecured(gb_main); |
|---|
| 141 | error = clean_ptserver_database(gb_main); |
|---|
| 142 | } |
|---|
| 143 | return error; |
|---|
| 144 | } |
|---|
| 145 | // -------------------------------------------------------------------------------- |
|---|
| 146 | |
|---|
| 147 | #ifdef UNIT_TESTS |
|---|
| 148 | #ifndef TEST_UNIT_H |
|---|
| 149 | #include <test_unit.h> |
|---|
| 150 | #endif |
|---|
| 151 | #include <arb_file.h> |
|---|
| 152 | |
|---|
| 153 | void TEST_SLOW_ptclean() { |
|---|
| 154 | GB_shell shell; |
|---|
| 155 | GBDATA *gb_main = GB_open("TEST_pt_src.arb", "rw"); |
|---|
| 156 | const char *saveas = "TEST_pt_cleaned.arb"; |
|---|
| 157 | |
|---|
| 158 | TEST_REJECT_NULL(gb_main); |
|---|
| 159 | TEST_EXPECT_NO_ERROR(cleanup_ptserver_database(gb_main)); |
|---|
| 160 | TEST_EXPECT_NO_ERROR(GB_save_as(gb_main, saveas, "a")); |
|---|
| 161 | GB_close(gb_main); |
|---|
| 162 | |
|---|
| 163 | // #define TEST_AUTO_UPDATE |
|---|
| 164 | #if defined(TEST_AUTO_UPDATE) |
|---|
| 165 | TEST_COPY_FILE("TEST_pt_cleaned.arb", "TEST_pt_cleaned_expected.arb"); |
|---|
| 166 | #else |
|---|
| 167 | TEST_EXPECT_TEXTFILES_EQUAL("TEST_pt_cleaned.arb", "TEST_pt_cleaned_expected.arb"); |
|---|
| 168 | #endif |
|---|
| 169 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
|---|
| 170 | } |
|---|
| 171 | TEST_PUBLISH(TEST_SLOW_ptclean); |
|---|
| 172 | |
|---|
| 173 | #endif // UNIT_TESTS |
|---|
| 174 | |
|---|
| 175 | // -------------------------------------------------------------------------------- |
|---|
| 176 | |
|---|