| 1 | // ========================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : ED4_detect_bad_ali.cxx // |
|---|
| 4 | // Purpose : detect alignment errors in helix regions // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in May 23 // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ========================================================= // |
|---|
| 10 | |
|---|
| 11 | #include "ed4_detect_bad_ali.hxx" |
|---|
| 12 | #include "ed4_class.hxx" |
|---|
| 13 | |
|---|
| 14 | #include <AW_helix.hxx> |
|---|
| 15 | |
|---|
| 16 | #include <aw_awar_defs.hxx> |
|---|
| 17 | #include <aw_awar.hxx> |
|---|
| 18 | #include <aw_msg.hxx> |
|---|
| 19 | #include <aw_select.hxx> |
|---|
| 20 | #include <aw_root.hxx> |
|---|
| 21 | |
|---|
| 22 | #include <arbdbt.h> |
|---|
| 23 | #include <arb_defs.h> |
|---|
| 24 | |
|---|
| 25 | #include <string> |
|---|
| 26 | #include <map> |
|---|
| 27 | #include <vector> |
|---|
| 28 | #include <items.h> |
|---|
| 29 | #include <item_sel_list.h> |
|---|
| 30 | |
|---|
| 31 | using namespace std; |
|---|
| 32 | |
|---|
| 33 | const long NOT_CALCULATED = -1; |
|---|
| 34 | |
|---|
| 35 | typedef map<string, long> BadPositionsForSpecies; |
|---|
| 36 | typedef const BadPositionsForSpecies BadPositionsForSpeciesConst; |
|---|
| 37 | |
|---|
| 38 | typedef map<size_t, size_t> PairedPositions; |
|---|
| 39 | |
|---|
| 40 | class HelixAlignmentQuality : virtual Noncopyable { |
|---|
| 41 | GBDATA *gb_main; |
|---|
| 42 | const AW_helix& helix; |
|---|
| 43 | |
|---|
| 44 | string ali; |
|---|
| 45 | string error; |
|---|
| 46 | |
|---|
| 47 | char symbol2count[256]; |
|---|
| 48 | |
|---|
| 49 | BadPositionsForSpecies bad_positions; // list of examined species |
|---|
| 50 | PairedPositions paired_positions; // list of paring positions of all involved helices |
|---|
| 51 | |
|---|
| 52 | void note_error(GB_ERROR err) { |
|---|
| 53 | if (err && !has_error()) error = err; |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | void init_bad_positions(const char *species_list) { |
|---|
| 57 | bad_positions.clear(); |
|---|
| 58 | if (!has_error()) { |
|---|
| 59 | ConstStrArray species; |
|---|
| 60 | GBT_split_string(species, species_list, ";", SPLIT_DROPEMPTY); |
|---|
| 61 | |
|---|
| 62 | for (size_t i = 0; i<species.size(); ++i) { |
|---|
| 63 | bad_positions[species[i]] = NOT_CALCULATED; |
|---|
| 64 | } |
|---|
| 65 | if (bad_positions.empty()) { |
|---|
| 66 | note_error("No species found"); |
|---|
| 67 | } |
|---|
| 68 | } |
|---|
| 69 | } |
|---|
| 70 | void add_pair_position(size_t pos, size_t pair_pos) { |
|---|
| 71 | if (pos>pair_pos) swap(pos, pair_pos); |
|---|
| 72 | arb_assert(pos<pair_pos); |
|---|
| 73 | if (paired_positions.find(pos) != paired_positions.end()) { |
|---|
| 74 | note_error(GBS_global_string("Duplicate entry for helix position %i", info2bio(pos))); |
|---|
| 75 | } |
|---|
| 76 | else { |
|---|
| 77 | paired_positions[pos] = pair_pos; |
|---|
| 78 | } |
|---|
| 79 | } |
|---|
| 80 | void init_paired_positions(const CharPtrArray& helices) { |
|---|
| 81 | paired_positions.clear(); |
|---|
| 82 | if (!has_error()) { |
|---|
| 83 | if (helices.size() == 1 && strcmp(helices[0], "*") == 0) { |
|---|
| 84 | // use all helices |
|---|
| 85 | for (long pos = helix.first_pair_position(); pos != -1; pos = helix.next_pair_position(pos)) { |
|---|
| 86 | const BI_helix_entry& entry = helix.entry(pos); |
|---|
| 87 | if (entry.helix_nr[0] == '-') { // only add left helices |
|---|
| 88 | add_pair_position(pos, entry.pair_pos); |
|---|
| 89 | } |
|---|
| 90 | } |
|---|
| 91 | } |
|---|
| 92 | else { |
|---|
| 93 | for (size_t i = 0; i<helices.size() && !has_error(); ++i) { |
|---|
| 94 | long firstPos = helix.first_position(helices[i]); |
|---|
| 95 | if (firstPos == -1) { |
|---|
| 96 | note_error(GBS_global_string("Invalid helix number '%s'", helices[i])); |
|---|
| 97 | } |
|---|
| 98 | else { |
|---|
| 99 | long lastPos = helix.last_position(helices[i]); |
|---|
| 100 | arb_assert(lastPos != -1); // otherwise BI_helix is corrupted |
|---|
| 101 | |
|---|
| 102 | for (long pos = firstPos; pos<=lastPos && pos>0 && !has_error(); ) { |
|---|
| 103 | const BI_helix_entry& entry = helix.entry(pos); |
|---|
| 104 | add_pair_position(pos, entry.pair_pos); |
|---|
| 105 | pos = entry.next_pair_pos; |
|---|
| 106 | } |
|---|
| 107 | } |
|---|
| 108 | } |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | if (paired_positions.empty()) { |
|---|
| 112 | note_error("List of helix positions to examine is empty"); |
|---|
| 113 | } |
|---|
| 114 | } |
|---|
| 115 | } |
|---|
| 116 | void init_bad_symbol_table(const char *bad_symbols) { |
|---|
| 117 | memset(symbol2count, 0, 256); |
|---|
| 118 | int count = 0;; |
|---|
| 119 | for (int i = 0; bad_symbols[i]; ++i) { |
|---|
| 120 | if (bad_symbols[i] != ' ') { // ignore space |
|---|
| 121 | symbol2count[safeCharIndex(bad_symbols[i])] = 1; |
|---|
| 122 | ++count; |
|---|
| 123 | } |
|---|
| 124 | } |
|---|
| 125 | if (!count) { |
|---|
| 126 | note_error("no 'bad' helix symbol defined"); |
|---|
| 127 | } |
|---|
| 128 | } |
|---|
| 129 | |
|---|
| 130 | long calculate_bad_positions(const string& species) { |
|---|
| 131 | long bad = -1; |
|---|
| 132 | |
|---|
| 133 | { |
|---|
| 134 | GB_transaction ta(gb_main); |
|---|
| 135 | GBDATA *gb_species = GBT_find_species(gb_main, species.c_str()); |
|---|
| 136 | |
|---|
| 137 | if (!gb_species) { |
|---|
| 138 | note_error(GBS_global_string("no such species: '%s'", species.c_str())); |
|---|
| 139 | } |
|---|
| 140 | else { |
|---|
| 141 | GBDATA *gb_seq = GBT_find_sequence(gb_species, ali.c_str()); |
|---|
| 142 | if (!gb_seq) { |
|---|
| 143 | note_error(GBS_global_string("species '%s' has no data in alignment '%s'", |
|---|
| 144 | species.c_str(), ali.c_str())); |
|---|
| 145 | } |
|---|
| 146 | else { |
|---|
| 147 | GB_CSTR sequence = GB_read_char_pntr(gb_seq); |
|---|
| 148 | const size_t seq_len = GB_read_string_count(gb_seq); |
|---|
| 149 | |
|---|
| 150 | bad = 0; |
|---|
| 151 | for (PairedPositions::const_iterator pp = paired_positions.begin(); pp != paired_positions.end(); ++pp) { |
|---|
| 152 | const size_t p1 = pp->first; |
|---|
| 153 | const size_t p2 = pp->second; |
|---|
| 154 | |
|---|
| 155 | const char b1 = p1<seq_len ? sequence[p1] : '.'; |
|---|
| 156 | const char b2 = p2<seq_len ? sequence[p2] : '.'; |
|---|
| 157 | |
|---|
| 158 | const char sym = helix.get_symbol(b1, b2); |
|---|
| 159 | bad += symbol2count[safeCharIndex(sym)]; |
|---|
| 160 | } |
|---|
| 161 | } |
|---|
| 162 | } |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | return bad; |
|---|
| 166 | } |
|---|
| 167 | |
|---|
| 168 | void update_bad_position_counts() { |
|---|
| 169 | for (BadPositionsForSpecies::iterator bp = bad_positions.begin(); bp != bad_positions.end(); ++bp) { |
|---|
| 170 | if (bp->second == NOT_CALCULATED) { |
|---|
| 171 | bp->second = calculate_bad_positions(bp->first); |
|---|
| 172 | } |
|---|
| 173 | } |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | public: |
|---|
| 177 | HelixAlignmentQuality(GBDATA *gb_main_, const AW_helix& helix_, const char *ali_, |
|---|
| 178 | const char *species_list, // @@@ pass ConstStrArray instead? |
|---|
| 179 | const CharPtrArray& helices, // contains helix numbers to use (as text) or one entry containing a '*' (meaning: all) |
|---|
| 180 | const char *bad_symbols) |
|---|
| 181 | : gb_main(gb_main_), |
|---|
| 182 | helix(helix_), |
|---|
| 183 | ali(ali_) |
|---|
| 184 | { |
|---|
| 185 | note_error(helix.get_error()); |
|---|
| 186 | init_bad_positions(species_list); |
|---|
| 187 | init_paired_positions(helices); |
|---|
| 188 | init_bad_symbol_table(bad_symbols); |
|---|
| 189 | } |
|---|
| 190 | |
|---|
| 191 | bool has_error() const { return !error.empty(); } |
|---|
| 192 | GB_ERROR get_error() const { return has_error() ? error.c_str() : NULp; } |
|---|
| 193 | |
|---|
| 194 | size_t size() const { arb_assert(!has_error()); return bad_positions.size(); } |
|---|
| 195 | size_t positions() const { arb_assert(!has_error()); return paired_positions.size(); } |
|---|
| 196 | |
|---|
| 197 | BadPositionsForSpeciesConst results() { |
|---|
| 198 | arb_assert(!has_error()); |
|---|
| 199 | update_bad_position_counts(); |
|---|
| 200 | return bad_positions; |
|---|
| 201 | } |
|---|
| 202 | }; |
|---|
| 203 | |
|---|
| 204 | // -------------------------------------------------------------------------------- |
|---|
| 205 | |
|---|
| 206 | class BadPositionsOrder { |
|---|
| 207 | BadPositionsForSpeciesConst bad_pos; |
|---|
| 208 | |
|---|
| 209 | public: |
|---|
| 210 | BadPositionsOrder(BadPositionsForSpeciesConst bad_pos_) : |
|---|
| 211 | bad_pos(bad_pos_) |
|---|
| 212 | {} |
|---|
| 213 | |
|---|
| 214 | bool operator()(const string& s1, const string& s2) const { |
|---|
| 215 | long b1 = bad_pos.find(s1)->second; |
|---|
| 216 | long b2 = bad_pos.find(s2)->second; |
|---|
| 217 | long cmp = b2 - b1; |
|---|
| 218 | if (!cmp) cmp = s1.compare(s2); |
|---|
| 219 | return cmp<0; |
|---|
| 220 | } |
|---|
| 221 | }; |
|---|
| 222 | |
|---|
| 223 | typedef vector<string> StringVector; |
|---|
| 224 | |
|---|
| 225 | static void getSpeciesSortedByBadPositions(BadPositionsForSpeciesConst& bad_pos, StringVector& result) { |
|---|
| 226 | result.clear(); |
|---|
| 227 | |
|---|
| 228 | for (BadPositionsForSpeciesConst::const_iterator s = bad_pos.begin(); s != bad_pos.end(); ++s) { |
|---|
| 229 | long badCount = long(s->second); |
|---|
| 230 | if (badCount>0) result.push_back(s->first); |
|---|
| 231 | } |
|---|
| 232 | |
|---|
| 233 | sort(result.begin(), result.end(), BadPositionsOrder(bad_pos)); |
|---|
| 234 | } |
|---|
| 235 | |
|---|
| 236 | static void parse_helix_list(StrArray& result_helices, const char *helix_list) { |
|---|
| 237 | ConstStrArray list_entries; |
|---|
| 238 | GBT_split_string(list_entries, helix_list, ','); |
|---|
| 239 | |
|---|
| 240 | for (size_t i = 0; i<list_entries.size(); ++i) { |
|---|
| 241 | const char *entry = list_entries[i]; |
|---|
| 242 | if (strcmp(entry, "*") == 0) { |
|---|
| 243 | result_helices.erase(); |
|---|
| 244 | result_helices.put(strdup("*")); |
|---|
| 245 | return; |
|---|
| 246 | } |
|---|
| 247 | else { |
|---|
| 248 | const char *minus = strchr(entry, '-'); |
|---|
| 249 | if (minus) { |
|---|
| 250 | int h1 = atoi(entry); |
|---|
| 251 | int h2 = atoi(minus+1); |
|---|
| 252 | |
|---|
| 253 | if (h1>h2) swap(h1, h2); |
|---|
| 254 | |
|---|
| 255 | for (int h = h1; h<=h2; ++h) { |
|---|
| 256 | result_helices.put(GBS_global_string_copy("%i", h)); |
|---|
| 257 | } |
|---|
| 258 | } |
|---|
| 259 | else { |
|---|
| 260 | result_helices.put(strdup(entry)); |
|---|
| 261 | } |
|---|
| 262 | } |
|---|
| 263 | } |
|---|
| 264 | } |
|---|
| 265 | |
|---|
| 266 | #define StrArray_FROM_HELIXLIST(varname, helixlist) StrArray varname; parse_helix_list(varname, helixlist) |
|---|
| 267 | |
|---|
| 268 | // -------------------------------------------------------------------------------- |
|---|
| 269 | |
|---|
| 270 | #ifdef UNIT_TESTS |
|---|
| 271 | #ifndef TEST_UNIT_H |
|---|
| 272 | #include <test_unit.h> |
|---|
| 273 | #endif |
|---|
| 274 | #include <arb_strbuf.h> |
|---|
| 275 | |
|---|
| 276 | static char *BadPositionsToString(BadPositionsForSpeciesConst& bad_pos) { |
|---|
| 277 | StringVector sortedSpecies; |
|---|
| 278 | getSpeciesSortedByBadPositions(bad_pos, sortedSpecies); |
|---|
| 279 | |
|---|
| 280 | GBS_strstruct result(sortedSpecies.size()*(8+1+3+1)+1); |
|---|
| 281 | |
|---|
| 282 | for (StringVector::const_iterator s = sortedSpecies.begin(); s != sortedSpecies.end(); ++s) { |
|---|
| 283 | const string& species = *s; |
|---|
| 284 | long badCount = bad_pos.find(species)->second; |
|---|
| 285 | result.ncat(species.c_str(), species.length()); |
|---|
| 286 | result.put('='); |
|---|
| 287 | result.putlong(badCount); |
|---|
| 288 | result.put(','); |
|---|
| 289 | } |
|---|
| 290 | |
|---|
| 291 | result.cut_tail(1); |
|---|
| 292 | return result.release(); |
|---|
| 293 | } |
|---|
| 294 | |
|---|
| 295 | |
|---|
| 296 | |
|---|
| 297 | void TEST_count_bad_ali_positions() { |
|---|
| 298 | GB_shell shell; |
|---|
| 299 | { |
|---|
| 300 | AW_root aw_root("min_ascii.arb", UNITTEST_MOCK); |
|---|
| 301 | GBDATA *gb_main = GB_open("TEST_trees.arb", "r"); |
|---|
| 302 | |
|---|
| 303 | char *ali = GBT_get_default_alignment(gb_main); |
|---|
| 304 | |
|---|
| 305 | AW_helix helix(&aw_root); |
|---|
| 306 | helix.init(gb_main, ali); |
|---|
| 307 | |
|---|
| 308 | char *marked_species; |
|---|
| 309 | char *all_species; |
|---|
| 310 | { |
|---|
| 311 | GB_transaction ta(gb_main); |
|---|
| 312 | |
|---|
| 313 | marked_species = GBT_store_marked_species(gb_main, false); |
|---|
| 314 | GBT_mark_all(gb_main, 1); |
|---|
| 315 | all_species = GBT_store_marked_species(gb_main, false); |
|---|
| 316 | } |
|---|
| 317 | |
|---|
| 318 | StrArray_FROM_HELIXLIST(helices_none, ""); |
|---|
| 319 | StrArray_FROM_HELIXLIST(helices_all, "*"); |
|---|
| 320 | StrArray_FROM_HELIXLIST(helices_1to5, "1-5"); |
|---|
| 321 | StrArray_FROM_HELIXLIST(helices_4to5, "4-5"); |
|---|
| 322 | StrArray_FROM_HELIXLIST(helices_1, "1"); |
|---|
| 323 | StrArray_FROM_HELIXLIST(helices_51, "5,1"); |
|---|
| 324 | StrArray_FROM_HELIXLIST(helices_514, "5,1,4"); |
|---|
| 325 | StrArray_FROM_HELIXLIST(helices_515, "5,1,5"); |
|---|
| 326 | |
|---|
| 327 | { |
|---|
| 328 | HelixAlignmentQuality marked_quality(gb_main, helix, ali, marked_species, helices_1, "#"); |
|---|
| 329 | HelixAlignmentQuality all_quality (gb_main, helix, ali, all_species, helices_51, "#"); |
|---|
| 330 | HelixAlignmentQuality all2_quality (gb_main, helix, ali, all_species, helices_514, "#*+"); |
|---|
| 331 | HelixAlignmentQuality all3_quality (gb_main, helix, ali, all_species, helices_all, "#*+"); |
|---|
| 332 | HelixAlignmentQuality all4_quality (gb_main, helix, ali, all_species, helices_4to5, "#*+"); |
|---|
| 333 | |
|---|
| 334 | TEST_EXPECT_NO_ERROR(marked_quality.get_error()); |
|---|
| 335 | TEST_EXPECT_NO_ERROR(all_quality.get_error()); |
|---|
| 336 | TEST_EXPECT_NO_ERROR(all2_quality.get_error()); |
|---|
| 337 | TEST_EXPECT_NO_ERROR(all3_quality.get_error()); |
|---|
| 338 | TEST_EXPECT_NO_ERROR(all4_quality.get_error()); |
|---|
| 339 | |
|---|
| 340 | // test number of examined species: |
|---|
| 341 | TEST_EXPECT_EQUAL(marked_quality.size(), 6); |
|---|
| 342 | TEST_EXPECT_EQUAL(all_quality.size(), 15); |
|---|
| 343 | TEST_EXPECT_EQUAL(all2_quality.size(), 15); |
|---|
| 344 | TEST_EXPECT_EQUAL(all3_quality.size(), 15); |
|---|
| 345 | TEST_EXPECT_EQUAL(all4_quality.size(), 15); |
|---|
| 346 | |
|---|
| 347 | TEST_EXPECT_EQUAL(marked_quality.positions(), 10); |
|---|
| 348 | TEST_EXPECT_EQUAL(all_quality.positions(), 18); |
|---|
| 349 | TEST_EXPECT_EQUAL(all2_quality.positions(), 20); |
|---|
| 350 | TEST_EXPECT_EQUAL(all3_quality.positions(), 20); |
|---|
| 351 | TEST_EXPECT_EQUAL(all4_quality.positions(), 10); |
|---|
| 352 | |
|---|
| 353 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(BadPositionsToString(marked_quality.results()), "CorGluta=3,CorAquat=1,CytAquat=1"); |
|---|
| 354 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(BadPositionsToString(all_quality.results()), "CloTyro3=7,CytAquat=7,CloInnoc=3,CorGluta=3,CloBifer=1,CloCarni=1,CloPaste=1,CloTyro2=1,CloTyro4=1,CloTyrob=1,CorAquat=1"); |
|---|
| 355 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(BadPositionsToString(all2_quality.results()), "CloTyro3=10,CytAquat=8,CloTyro2=7,CloTyro4=7,CloTyrob=7,CorGluta=4,CloInnoc=3,CloBifer=2,CloButy2=1,CloButyr=1,CloCarni=1,CloPaste=1,CorAquat=1"); |
|---|
| 356 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(BadPositionsToString(all3_quality.results()), "CloTyro3=10,CytAquat=8,CloTyro2=7,CloTyro4=7,CloTyrob=7,CorGluta=4,CloInnoc=3,CloBifer=2,CloButy2=1,CloButyr=1,CloCarni=1,CloPaste=1,CorAquat=1"); |
|---|
| 357 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(BadPositionsToString(all4_quality.results()), "CytAquat=6,CloTyro3=5,CloInnoc=2,CloTyro2=2,CloTyro4=2,CloTyrob=2,CloBifer=1"); |
|---|
| 358 | } |
|---|
| 359 | |
|---|
| 360 | // test errors: |
|---|
| 361 | { |
|---|
| 362 | HelixAlignmentQuality err1(gb_main, helix, ali, "", helices_51, "#"); |
|---|
| 363 | HelixAlignmentQuality err2(gb_main, helix, ali, marked_species, helices_none, "#"); |
|---|
| 364 | HelixAlignmentQuality err3(gb_main, helix, ali, marked_species, helices_51, " "); // define no bad helix symbol (spaces are ignored) |
|---|
| 365 | HelixAlignmentQuality err4(gb_main, helix, ali, marked_species, helices_515, "#"); |
|---|
| 366 | HelixAlignmentQuality err5(gb_main, helix, ali, "fakeSpec", helices_51, "#"); |
|---|
| 367 | HelixAlignmentQuality err6(gb_main, helix, ali, marked_species, helices_1to5, "#"); |
|---|
| 368 | |
|---|
| 369 | TEST_EXPECT_ERROR_CONTAINS(err1.get_error(), "No species found"); |
|---|
| 370 | TEST_EXPECT_ERROR_CONTAINS(err2.get_error(), "Invalid helix number ''"); |
|---|
| 371 | TEST_EXPECT_ERROR_CONTAINS(err3.get_error(), "no 'bad' helix symbol defined"); |
|---|
| 372 | TEST_EXPECT_ERROR_CONTAINS(err4.get_error(), "Duplicate entry for helix position 65"); |
|---|
| 373 | |
|---|
| 374 | TEST_EXPECT_NO_ERROR(err5.get_error()); // no error yet |
|---|
| 375 | TEST_EXPECT_EQUAL(err5.size(), 1); // number of species |
|---|
| 376 | TEST_EXPECT_EQUAL(err5.positions(), 18); |
|---|
| 377 | err5.results(); // triggers error |
|---|
| 378 | TEST_EXPECT_ERROR_CONTAINS(err5.get_error(), "no such species: 'fakeSpec'"); |
|---|
| 379 | |
|---|
| 380 | TEST_EXPECT_ERROR_CONTAINS(err6.get_error(), "Invalid helix number '2'"); |
|---|
| 381 | } |
|---|
| 382 | |
|---|
| 383 | free(all_species); |
|---|
| 384 | free(marked_species); |
|---|
| 385 | free(ali); |
|---|
| 386 | |
|---|
| 387 | GB_close(gb_main); |
|---|
| 388 | } |
|---|
| 389 | } |
|---|
| 390 | |
|---|
| 391 | #endif // UNIT_TESTS |
|---|
| 392 | |
|---|
| 393 | // -------------------------------------------------------------------------------- |
|---|
| 394 | |
|---|
| 395 | #define AWAR_BADALI_BASE "badali/" |
|---|
| 396 | #define AWAR_BADALI_HELIXLIST AWAR_BADALI_BASE "helixlist" // helix-numbers (comma-separated) |
|---|
| 397 | #define AWAR_BADALI_SYMBOLS AWAR_BADALI_BASE "symbols" // helix-symbols which count as "bad" |
|---|
| 398 | |
|---|
| 399 | #define AWAR_BADALI_TEMP "tmp/" AWAR_BADALI_BASE |
|---|
| 400 | #define AWAR_BADALI_SPECIES AWAR_BADALI_TEMP "selected" // selected species |
|---|
| 401 | #define AWAR_BADALI_FIELD AWAR_BADALI_TEMP "field" // name of field to store bad positions |
|---|
| 402 | |
|---|
| 403 | void ED4_create_detect_bad_alignment_awars(AW_root *aw_root, AW_default aw_def) { |
|---|
| 404 | aw_root->awar_string(AWAR_BADALI_HELIXLIST, "", aw_def)->set_srt(" =,:,,=,"); |
|---|
| 405 | aw_root->awar_string(AWAR_BADALI_SYMBOLS, "#", aw_def); |
|---|
| 406 | aw_root->awar_string(AWAR_BADALI_SPECIES, "", aw_def); |
|---|
| 407 | aw_root->awar_string(AWAR_BADALI_FIELD, "", aw_def); |
|---|
| 408 | } |
|---|
| 409 | |
|---|
| 410 | static ARB_ERROR add_species_to_list_cb(ED4_base *base, StrArray *species) { |
|---|
| 411 | ARB_ERROR error = NULp; |
|---|
| 412 | |
|---|
| 413 | if (base->is_species_manager()) { |
|---|
| 414 | ED4_species_manager *species_manager = base->to_species_manager(); |
|---|
| 415 | ED4_species_name_terminal *species_name_terminal = species_manager->search_spec_child_rek(LEV_SPECIES_NAME)->to_species_name_terminal(); |
|---|
| 416 | |
|---|
| 417 | if (species_name_terminal->get_species_pointer() && !species_manager->is_SAI_manager()) { |
|---|
| 418 | char *species_name = GB_read_as_string(species_name_terminal->get_species_pointer()); |
|---|
| 419 | species->put(species_name); |
|---|
| 420 | } |
|---|
| 421 | } |
|---|
| 422 | |
|---|
| 423 | return error; |
|---|
| 424 | } |
|---|
| 425 | |
|---|
| 426 | static char *create_list_of_loaded_species() { |
|---|
| 427 | GB_transaction ta(ED4_ROOT->get_gb_main()); |
|---|
| 428 | StrArray species; |
|---|
| 429 | ARB_ERROR error = ED4_ROOT->root_group_man->route_down_hierarchy(makeED4_route_cb(add_species_to_list_cb, &species)); |
|---|
| 430 | aw_message_if(error.deliver()); |
|---|
| 431 | return GBT_join_strings(species, ';'); |
|---|
| 432 | } |
|---|
| 433 | |
|---|
| 434 | static void calc_and_update_alignment_errors_cb(AW_window *aww, AW_selection_list *sellst) { |
|---|
| 435 | // Performs the following jobs: |
|---|
| 436 | // - calculate alignment errors. |
|---|
| 437 | // - update the list of worst helix alignments (in GUI). |
|---|
| 438 | // - update/write contents of database field (alignment error count; optional) |
|---|
| 439 | |
|---|
| 440 | AW_root *aw_root = aww->get_root(); |
|---|
| 441 | GBDATA *gb_main = ED4_ROOT->get_gb_main(); |
|---|
| 442 | const char *target_field; |
|---|
| 443 | |
|---|
| 444 | { |
|---|
| 445 | GB_transaction ta(gb_main); |
|---|
| 446 | |
|---|
| 447 | target_field = prepare_and_get_selected_itemfield(aw_root, AWAR_BADALI_FIELD, gb_main, SPECIES_get_selector(), FIF_ALLOW_NONE); |
|---|
| 448 | if (!target_field) { |
|---|
| 449 | if (GB_have_error()) { |
|---|
| 450 | aw_message(GBS_global_string("Invalid target field selected: %s", GB_await_error())); |
|---|
| 451 | return; |
|---|
| 452 | } |
|---|
| 453 | } |
|---|
| 454 | } |
|---|
| 455 | |
|---|
| 456 | char *species_list = create_list_of_loaded_species(); |
|---|
| 457 | char *bad_symbols = aw_root->awar(AWAR_BADALI_SYMBOLS)->read_string(); |
|---|
| 458 | |
|---|
| 459 | StrArray_FROM_HELIXLIST(helices, aw_root->awar(AWAR_BADALI_HELIXLIST)->read_char_pntr()); |
|---|
| 460 | |
|---|
| 461 | // @@@ better show progress (next command may need much time) |
|---|
| 462 | HelixAlignmentQuality quality(gb_main, |
|---|
| 463 | *ED4_ROOT->helix, |
|---|
| 464 | ED4_ROOT->get_alignment_name(), |
|---|
| 465 | species_list, |
|---|
| 466 | helices, |
|---|
| 467 | bad_symbols); |
|---|
| 468 | |
|---|
| 469 | sellst->clear(); |
|---|
| 470 | if (!quality.has_error()) { |
|---|
| 471 | BadPositionsForSpeciesConst& bad_pos = quality.results(); |
|---|
| 472 | if (!quality.has_error()) { |
|---|
| 473 | const bool writeToField = target_field; |
|---|
| 474 | if (writeToField) { |
|---|
| 475 | GB_transaction ta(gb_main); |
|---|
| 476 | GB_ERROR error = NULp; |
|---|
| 477 | GBDATA *gb_species_data = GBT_get_species_data(gb_main); |
|---|
| 478 | |
|---|
| 479 | for (BadPositionsForSpeciesConst::const_iterator bp = bad_pos.begin(); bp != bad_pos.end() && !error; ++bp) { // this loop includes zero counts |
|---|
| 480 | const char* species_name = bp->first.c_str(); |
|---|
| 481 | long bad_positions = bp->second; |
|---|
| 482 | |
|---|
| 483 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, species_name); |
|---|
| 484 | if (!gb_species) { |
|---|
| 485 | error = GBS_global_string("Could not find species '%s' (Reason: %s)", species_name, GB_await_error()); |
|---|
| 486 | } |
|---|
| 487 | else { |
|---|
| 488 | GBDATA *gb_field = GBT_searchOrCreate_itemfield_according_to_changekey(gb_species, target_field, SPECIES_get_selector().change_key_path); |
|---|
| 489 | if (!gb_field) error = GB_await_error(); |
|---|
| 490 | if (!error) error = GB_write_lossless_int(gb_field, bad_positions); |
|---|
| 491 | } |
|---|
| 492 | } |
|---|
| 493 | |
|---|
| 494 | aw_message_if(error); |
|---|
| 495 | } |
|---|
| 496 | |
|---|
| 497 | StringVector order; |
|---|
| 498 | getSpeciesSortedByBadPositions(bad_pos, order); |
|---|
| 499 | |
|---|
| 500 | for (StringVector::const_iterator species = order.begin(); species != order.end(); ++species) { // this loop excludes zero counts |
|---|
| 501 | const char *species_name = species->c_str(); |
|---|
| 502 | size_t bad = bad_pos.find(*species)->second; |
|---|
| 503 | |
|---|
| 504 | sellst->insert(GBS_global_string("%-8s | %zu", species_name, bad), species_name); |
|---|
| 505 | } |
|---|
| 506 | sellst->insert_default("<acceptable helix alignment>", ""); |
|---|
| 507 | } |
|---|
| 508 | } |
|---|
| 509 | |
|---|
| 510 | if (quality.has_error()) { |
|---|
| 511 | sellst->clear(); |
|---|
| 512 | sellst->insert_default(GBS_global_string("<Error: %s>", quality.get_error()), ""); |
|---|
| 513 | } |
|---|
| 514 | |
|---|
| 515 | sellst->update(); |
|---|
| 516 | |
|---|
| 517 | free(bad_symbols); |
|---|
| 518 | free(species_list); |
|---|
| 519 | } |
|---|
| 520 | |
|---|
| 521 | enum JumpWhy { BY_SELECTION, BY_BUTTON }; |
|---|
| 522 | |
|---|
| 523 | static void jump_to_next_helix_cb(AW_window *, JumpWhy called) { |
|---|
| 524 | ED4_MostRecentWinContext context; |
|---|
| 525 | |
|---|
| 526 | ED4_window *win = current_ed4w(); |
|---|
| 527 | AW_root *aw_root = win->aww->get_root(); |
|---|
| 528 | |
|---|
| 529 | StrArray_FROM_HELIXLIST(helices, aw_root->awar(AWAR_BADALI_HELIXLIST)->read_char_pntr()); |
|---|
| 530 | |
|---|
| 531 | AW_awar *awar_helixnr = aw_root->awar(win->awar_path_for_helixNr); |
|---|
| 532 | int current_helixnr = atoi(awar_helixnr->read_char_pntr()); |
|---|
| 533 | int wanted_helixnr = 0; |
|---|
| 534 | int preference = current_helixnr>0 ? 1 : -1; |
|---|
| 535 | |
|---|
| 536 | if (current_helixnr == 0) { |
|---|
| 537 | wanted_helixnr = atoi(helices[0]); |
|---|
| 538 | } |
|---|
| 539 | else { |
|---|
| 540 | if (called == BY_SELECTION) { |
|---|
| 541 | wanted_helixnr = abs(current_helixnr); |
|---|
| 542 | } |
|---|
| 543 | else { // if called == BY_BUTTON => select new helix |
|---|
| 544 | wanted_helixnr = atoi(helices[0]); |
|---|
| 545 | for (size_t i = 0; i<helices.size()-1; ++i) { |
|---|
| 546 | if (abs(current_helixnr) == abs(atoi(helices[i]))) { |
|---|
| 547 | wanted_helixnr = atoi(helices[i+1]); |
|---|
| 548 | } |
|---|
| 549 | } |
|---|
| 550 | } |
|---|
| 551 | } |
|---|
| 552 | |
|---|
| 553 | wanted_helixnr *= preference; // choose left or right helix |
|---|
| 554 | |
|---|
| 555 | if (wanted_helixnr != 0) { |
|---|
| 556 | if (wanted_helixnr != current_helixnr || called == BY_BUTTON) { // avoid triggering callback w/o real change |
|---|
| 557 | awar_helixnr->write_string(GBS_global_string("%i", wanted_helixnr)); |
|---|
| 558 | // callback is NOT bound to AWAR (but to inputfield) -> call it manually: |
|---|
| 559 | ED4_set_helixnr(win->aww, win->awar_path_for_helixNr); |
|---|
| 560 | } |
|---|
| 561 | } |
|---|
| 562 | else { |
|---|
| 563 | aw_message("No helix to jump to."); |
|---|
| 564 | } |
|---|
| 565 | } |
|---|
| 566 | |
|---|
| 567 | enum SelectedAwar { SELECTED_SPECIES, SELECTED_BAD_ALI }; |
|---|
| 568 | |
|---|
| 569 | static void selected_changed_cb(AW_root *aw_root, SelectedAwar whatChanged) { |
|---|
| 570 | static bool in_callback = false; |
|---|
| 571 | |
|---|
| 572 | if (!in_callback) { |
|---|
| 573 | LocallyModify<bool> flag(in_callback, true); |
|---|
| 574 | |
|---|
| 575 | const char *source_awar_name = AWAR_BADALI_SPECIES; |
|---|
| 576 | const char *dest_awar_name = AWAR_SPECIES_NAME; |
|---|
| 577 | |
|---|
| 578 | if (whatChanged == SELECTED_SPECIES) { |
|---|
| 579 | swap(source_awar_name, dest_awar_name); |
|---|
| 580 | } |
|---|
| 581 | |
|---|
| 582 | const char *selected_species = aw_root->awar(source_awar_name)->read_char_pntr(); |
|---|
| 583 | if (selected_species[0]) { // if nothing selected => do not change other AWAR |
|---|
| 584 | aw_root->awar(dest_awar_name)->write_string(selected_species); |
|---|
| 585 | if (SELECTED_BAD_ALI) { |
|---|
| 586 | jump_to_next_helix_cb(NULp, BY_SELECTION); |
|---|
| 587 | } |
|---|
| 588 | } |
|---|
| 589 | } |
|---|
| 590 | } |
|---|
| 591 | |
|---|
| 592 | void ED4_popup_detect_bad_alignment_window(AW_window *editor_window, const WindowCallback *helixSettings_cb) { |
|---|
| 593 | static AW_window_simple *aws = NULp; |
|---|
| 594 | |
|---|
| 595 | ED4_LocalWinContext uses(editor_window); |
|---|
| 596 | |
|---|
| 597 | if (!aws) { |
|---|
| 598 | AW_root *aw_root = editor_window->get_root(); |
|---|
| 599 | |
|---|
| 600 | aws = new AW_window_simple; |
|---|
| 601 | |
|---|
| 602 | aws->init(aw_root, "DETECT_BAD_ALI", "Detect bad helix alignment"); |
|---|
| 603 | aws->load_xfig("edit4/detect_bad_ali.fig"); |
|---|
| 604 | |
|---|
| 605 | aws->button_length(10); |
|---|
| 606 | |
|---|
| 607 | aws->at("close"); |
|---|
| 608 | aws->callback(AW_POPDOWN); |
|---|
| 609 | aws->create_button("CLOSE", "CLOSE", "C"); |
|---|
| 610 | |
|---|
| 611 | aws->at("help"); |
|---|
| 612 | aws->callback(makeHelpCallback("bad_alignment.hlp")); |
|---|
| 613 | aws->create_button("HELP", "HELP", "H"); |
|---|
| 614 | |
|---|
| 615 | aws->at("list"); |
|---|
| 616 | AW_selection_list *sellst = aws->create_selection_list(AWAR_BADALI_SPECIES); |
|---|
| 617 | sellst->insert_default("<not calculated>", ""); |
|---|
| 618 | sellst->update(); |
|---|
| 619 | |
|---|
| 620 | aws->at("config"); |
|---|
| 621 | aws->auto_space(5, 5); |
|---|
| 622 | aws->label_length(20); |
|---|
| 623 | |
|---|
| 624 | aws->label("Helices to examine\n" |
|---|
| 625 | "(comma-separated list)"); |
|---|
| 626 | aws->create_input_field(AWAR_BADALI_HELIXLIST, 15); |
|---|
| 627 | |
|---|
| 628 | aws->callback(makeWindowCallback(jump_to_next_helix_cb, BY_BUTTON)); |
|---|
| 629 | aws->create_autosize_button("JUMP", "Jump", "J"); |
|---|
| 630 | |
|---|
| 631 | aws->at_newline(); |
|---|
| 632 | |
|---|
| 633 | aws->label("Detected symbols"); |
|---|
| 634 | aws->create_input_field(AWAR_BADALI_SYMBOLS, 5); |
|---|
| 635 | |
|---|
| 636 | aws->callback(*helixSettings_cb); |
|---|
| 637 | aws->create_autosize_button("HELIX_SETTINGS", "Helix settings", "H"); |
|---|
| 638 | |
|---|
| 639 | aws->at_newline(); |
|---|
| 640 | aws->label("Write to database field"); |
|---|
| 641 | create_itemfield_selection_button(aws, FieldSelDef(AWAR_BADALI_FIELD, ED4_ROOT->get_gb_main(), SPECIES_get_selector(), FIELD_FILTER_INT_WRITEABLE, "target field", SF_ALLOW_NEW), NULp); |
|---|
| 642 | |
|---|
| 643 | aws->at("calc"); |
|---|
| 644 | aws->callback(makeWindowCallback(calc_and_update_alignment_errors_cb, sellst)); |
|---|
| 645 | aws->create_button("CALCULATE", "Calculate", "C"); |
|---|
| 646 | |
|---|
| 647 | aw_root->awar(AWAR_BADALI_SPECIES)->add_callback(makeRootCallback(selected_changed_cb, SELECTED_BAD_ALI)); |
|---|
| 648 | aw_root->awar(AWAR_SPECIES_NAME)->add_callback(makeRootCallback(selected_changed_cb, SELECTED_SPECIES)); |
|---|
| 649 | } |
|---|
| 650 | |
|---|
| 651 | e4_assert(aws); |
|---|
| 652 | aws->activate(); |
|---|
| 653 | |
|---|
| 654 | } |
|---|
| 655 | |
|---|
| 656 | |
|---|