| 1 | // ============================================================ // |
|---|
| 2 | // // |
|---|
| 3 | // File : FilteredExport.cxx // |
|---|
| 4 | // Purpose : encapsulate SAI-filtered fasta exporter // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in June 2017 // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ============================================================ // |
|---|
| 10 | |
|---|
| 11 | #include "FilteredExport.h" |
|---|
| 12 | #include <arbdbt.h> |
|---|
| 13 | #include <gb_aci.h> |
|---|
| 14 | #include <arb_progress.h> |
|---|
| 15 | #include <algorithm> |
|---|
| 16 | |
|---|
| 17 | AP_filter *FilterDefinition::make_filter(GBDATA *gb_main, const char *aliName, size_t aliSize) const { |
|---|
| 18 | /*! generate defined filter |
|---|
| 19 | * @param aliName name of alignment to filter |
|---|
| 20 | * @return generated filter or NULp (error is exported in that case) |
|---|
| 21 | */ |
|---|
| 22 | GB_ERROR error = NULp; |
|---|
| 23 | AP_filter *filter = NULp; |
|---|
| 24 | |
|---|
| 25 | { |
|---|
| 26 | GBDATA *gb_sai = GBT_expect_SAI(gb_main, sai_name.c_str()); |
|---|
| 27 | if (!gb_sai) error = GB_await_error(); |
|---|
| 28 | else { |
|---|
| 29 | GBDATA *gb_data = GBT_find_sequence(gb_sai, aliName); |
|---|
| 30 | if (!gb_data) { |
|---|
| 31 | error = GBS_global_string("SAI '%s' has no data in alignment '%s'", sai_name.c_str(), aliName); |
|---|
| 32 | } |
|---|
| 33 | else { |
|---|
| 34 | #if defined(ASSERTION_USED) |
|---|
| 35 | long datasize = GB_read_count(gb_data); // may be less than ali-length! |
|---|
| 36 | arb_assert(datasize == long(aliSize)); // @@@ write a test failing this assertion (BLOCK and PASS need to handle this differently) |
|---|
| 37 | #endif |
|---|
| 38 | |
|---|
| 39 | char *sai_data = GB_read_as_string(gb_data); // @@@ NOT_ALL_SAI_HAVE_DATA |
|---|
| 40 | if (!sai_data) error = GB_await_error(); |
|---|
| 41 | else { |
|---|
| 42 | bool blockChars = (type == BLOCK) != inverse; |
|---|
| 43 | CharRangeTable crt(characters.c_str()); |
|---|
| 44 | if (blockChars) { |
|---|
| 45 | filter = new AP_filter(sai_data, crt.expandedRange(), aliSize); // blocks characters |
|---|
| 46 | } |
|---|
| 47 | else { |
|---|
| 48 | AP_filter inv_filt(sai_data, crt.expandedRange(), aliSize); |
|---|
| 49 | filter = new AP_filter(NOT, inv_filt); |
|---|
| 50 | } |
|---|
| 51 | free(sai_data); |
|---|
| 52 | } |
|---|
| 53 | } |
|---|
| 54 | } |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | arb_assert(contradicted(filter, error)); |
|---|
| 58 | if (error) GB_export_error(error); |
|---|
| 59 | return filter; |
|---|
| 60 | } |
|---|
| 61 | |
|---|
| 62 | FilteredExport::FilteredExport(GBDATA *gb_main_, const char *aliname_, size_t alisize_) : |
|---|
| 63 | gb_main(gb_main_), |
|---|
| 64 | aliname(nulldup(aliname_)), |
|---|
| 65 | alisize(alisize_), |
|---|
| 66 | accept_missing_data(false), |
|---|
| 67 | header_ACI(strdup("readdb(name)")), |
|---|
| 68 | sequence_ACI(NULp), |
|---|
| 69 | count_table(NULp), |
|---|
| 70 | minCount(0), |
|---|
| 71 | filter(alisize), |
|---|
| 72 | filter_added(false) |
|---|
| 73 | {} |
|---|
| 74 | |
|---|
| 75 | FilteredExport::~FilteredExport() { |
|---|
| 76 | free(header_ACI); |
|---|
| 77 | free(sequence_ACI); |
|---|
| 78 | free(aliname); |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | GB_ERROR FilteredExport::add_SAI_filter(const FilterDefinition& filterDef) { |
|---|
| 82 | AP_filter *newFilter = filterDef.make_filter(gb_main, aliname, alisize); |
|---|
| 83 | if (!newFilter) { |
|---|
| 84 | return GB_await_error(); |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | if (!filter_added) { |
|---|
| 88 | filter = *newFilter; |
|---|
| 89 | filter_added = true; |
|---|
| 90 | } |
|---|
| 91 | else { |
|---|
| 92 | switch (filterDef.get_type()) { |
|---|
| 93 | case PASS: filter = AP_filter(filter, OR, *newFilter); break; |
|---|
| 94 | case BLOCK: filter = AP_filter(filter, AND, *newFilter); break; |
|---|
| 95 | } |
|---|
| 96 | } |
|---|
| 97 | delete newFilter; |
|---|
| 98 | return NULp; |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | int FilteredExport::count_bases(const char *seq) const { |
|---|
| 102 | int count = 0; |
|---|
| 103 | for (int p = 0; seq[p]; ++p) { |
|---|
| 104 | count += count_table.isSet(seq[p]); |
|---|
| 105 | } |
|---|
| 106 | return count; |
|---|
| 107 | } |
|---|
| 108 | |
|---|
| 109 | char *FilteredExport::get_filtered_sequence(GBDATA *gb_species, const char*& reason) const { |
|---|
| 110 | /* returns filtered sequence or |
|---|
| 111 | * NULp (which means "do not export") |
|---|
| 112 | * - if an error occurred (error is exported only in that case!), |
|---|
| 113 | * - if species had no data in alignment (and accept_missing_data is true) or |
|---|
| 114 | * - if filtered sequence does not have min. required base count. |
|---|
| 115 | * If NULp returned and no error exported -> 'reason' gets set! |
|---|
| 116 | */ |
|---|
| 117 | arb_assert(gb_species); |
|---|
| 118 | arb_assert(!GB_have_error()); |
|---|
| 119 | |
|---|
| 120 | reason = NULp; |
|---|
| 121 | |
|---|
| 122 | char *seq = NULp; |
|---|
| 123 | GBDATA *gb_data = GBT_find_sequence(gb_species, aliname); |
|---|
| 124 | |
|---|
| 125 | if (!gb_data) { |
|---|
| 126 | if (GB_have_error()) { |
|---|
| 127 | GB_export_error(GB_failedTo_error("read sequence of ", GBT_get_name_or_description(gb_species), GB_await_error())); |
|---|
| 128 | } |
|---|
| 129 | else { |
|---|
| 130 | if (accept_missing_data) { |
|---|
| 131 | reason = "has no data"; |
|---|
| 132 | } |
|---|
| 133 | else { |
|---|
| 134 | GB_export_errorf("species '%s' has no data in '%s'", GBT_get_name_or_description(gb_species), aliname); |
|---|
| 135 | } |
|---|
| 136 | } |
|---|
| 137 | } |
|---|
| 138 | else { |
|---|
| 139 | GB_ERROR error = filter.is_invalid(); |
|---|
| 140 | if (error) GB_export_error(error); |
|---|
| 141 | else { |
|---|
| 142 | const char *cseq = GB_read_char_pntr(gb_data); |
|---|
| 143 | seq = filter.filter_string(cseq); |
|---|
| 144 | |
|---|
| 145 | // check min. requirements: |
|---|
| 146 | if (minCount>0) { // otherwise check would always succeed |
|---|
| 147 | int count = count_bases(seq); |
|---|
| 148 | if (count<minCount) { // too few bases -> do not export |
|---|
| 149 | freenull(seq); |
|---|
| 150 | reason = "not enough base-characters left"; |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | } |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | if (seq && sequence_ACI) { |
|---|
| 157 | char *seq_postprocessed = GB_command_interpreter_in_env(seq, sequence_ACI, GBL_simple_call_env(gb_species)); |
|---|
| 158 | if (seq_postprocessed) { |
|---|
| 159 | freeset(seq, seq_postprocessed); |
|---|
| 160 | } |
|---|
| 161 | else { |
|---|
| 162 | char *error = strdup(GB_await_error()); |
|---|
| 163 | GB_export_errorf("Failed to post-process sequence data of species '%s' (Reason: %s)", GBT_get_name_or_description(gb_species), error); |
|---|
| 164 | free(error); |
|---|
| 165 | freenull(seq); |
|---|
| 166 | } |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | arb_assert(contradicted(seq, GB_have_error() || reason)); |
|---|
| 170 | arb_assert(implicated(!seq, contradicted(GB_have_error(), reason))); |
|---|
| 171 | |
|---|
| 172 | return seq; |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | char *FilteredExport::get_fasta_header(GBDATA *gb_species) const { |
|---|
| 176 | return GB_command_interpreter_in_env("", header_ACI, GBL_simple_call_env(gb_species)); |
|---|
| 177 | } |
|---|
| 178 | |
|---|
| 179 | GB_ERROR FilteredExport::write_fasta(FILE *out) { |
|---|
| 180 | GB_ERROR error = NULp; |
|---|
| 181 | int exported = 0; |
|---|
| 182 | int skipped = 0; |
|---|
| 183 | |
|---|
| 184 | { |
|---|
| 185 | arb_progress progress("Write sequence data", GBT_get_species_count(gb_main)); |
|---|
| 186 | |
|---|
| 187 | for (GBDATA *gb_species = GBT_first_species(gb_main); |
|---|
| 188 | gb_species && !error; |
|---|
| 189 | gb_species = GBT_next_species(gb_species)) |
|---|
| 190 | { |
|---|
| 191 | const char *reason; |
|---|
| 192 | char *filt_seq = get_filtered_sequence(gb_species, reason); |
|---|
| 193 | |
|---|
| 194 | if (filt_seq) { |
|---|
| 195 | ++exported; // count exported species |
|---|
| 196 | fputc('>', out); |
|---|
| 197 | { |
|---|
| 198 | char *header = get_fasta_header(gb_species); |
|---|
| 199 | if (header) { |
|---|
| 200 | fputs(header, out); |
|---|
| 201 | free(header); |
|---|
| 202 | } |
|---|
| 203 | else { |
|---|
| 204 | error = GB_await_error(); |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | fputc('\n', out); |
|---|
| 208 | |
|---|
| 209 | fputs(filt_seq, out); |
|---|
| 210 | fputc('\n', out); |
|---|
| 211 | |
|---|
| 212 | free(filt_seq); |
|---|
| 213 | } |
|---|
| 214 | else { |
|---|
| 215 | if (reason) { |
|---|
| 216 | ++skipped; // count skipped |
|---|
| 217 | fprintf(stderr, "Skipped species '%s' (Reason: %s)\n", GBT_get_name_or_description(gb_species), reason); |
|---|
| 218 | } |
|---|
| 219 | else { |
|---|
| 220 | error = GB_await_error(); |
|---|
| 221 | } |
|---|
| 222 | } |
|---|
| 223 | ++progress; |
|---|
| 224 | } |
|---|
| 225 | |
|---|
| 226 | if (error) progress.done(); |
|---|
| 227 | } |
|---|
| 228 | |
|---|
| 229 | if (!error) { |
|---|
| 230 | if (exported) { |
|---|
| 231 | fprintf(stderr, "Summary: %i species exported", exported); |
|---|
| 232 | if (skipped) fprintf(stderr, ", %i species skipped", skipped); |
|---|
| 233 | fputc('\n', stderr); |
|---|
| 234 | } |
|---|
| 235 | else { |
|---|
| 236 | fprintf(stderr, "Summary: all %i species skipped (warning: generated empty file)\n", skipped); |
|---|
| 237 | } |
|---|
| 238 | } |
|---|
| 239 | |
|---|
| 240 | return error; |
|---|
| 241 | } |
|---|
| 242 | |
|---|
| 243 | // -------------------------------------------------------------------------------- |
|---|
| 244 | |
|---|
| 245 | #ifdef UNIT_TESTS |
|---|
| 246 | #ifndef TEST_UNIT_H |
|---|
| 247 | #include <test_unit.h> |
|---|
| 248 | #endif |
|---|
| 249 | |
|---|
| 250 | #define TEST_EXPECT_CRT_DEFINES(arg,expected) TEST_EXPECT_EQUAL(CharRangeTable(arg).expandedRange(), expected) |
|---|
| 251 | |
|---|
| 252 | #define ABC "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
|---|
| 253 | #define abc "abcdefghijklmnopqrstuvwxyz" |
|---|
| 254 | |
|---|
| 255 | void TEST_CharRangeTable() { |
|---|
| 256 | TEST_EXPECT_CRT_DEFINES("abc", "abc"); |
|---|
| 257 | TEST_EXPECT_CRT_DEFINES("cba", "abc"); |
|---|
| 258 | TEST_EXPECT_CRT_DEFINES("c-a", "-ac"); // unwanted reverse range does not expand! |
|---|
| 259 | TEST_EXPECT_CRT_DEFINES("a-c", "abc"); |
|---|
| 260 | |
|---|
| 261 | TEST_EXPECT_CRT_DEFINES("a-db-e", "abcde"); |
|---|
| 262 | TEST_EXPECT_CRT_DEFINES("a-de-b", "-abcde"); |
|---|
| 263 | |
|---|
| 264 | TEST_EXPECT_CRT_DEFINES("-ab", "-ab"); |
|---|
| 265 | TEST_EXPECT_CRT_DEFINES("a-b", "ab"); |
|---|
| 266 | TEST_EXPECT_CRT_DEFINES("ab-", "-ab"); |
|---|
| 267 | |
|---|
| 268 | TEST_EXPECT_CRT_DEFINES("a-ac-c", "ac"); |
|---|
| 269 | |
|---|
| 270 | TEST_EXPECT_CRT_DEFINES("a-zA-Z", ABC abc); |
|---|
| 271 | |
|---|
| 272 | // dangerous ranges are not expanded: |
|---|
| 273 | TEST_EXPECT_CRT_DEFINES(".-=", "-.="); |
|---|
| 274 | TEST_EXPECT_CRT_DEFINES("=-.", "-.="); |
|---|
| 275 | |
|---|
| 276 | TEST_EXPECT_CRT_DEFINES("a-Z", "-Za"); |
|---|
| 277 | TEST_EXPECT_CRT_DEFINES("A-z", "-Az"); |
|---|
| 278 | } |
|---|
| 279 | |
|---|
| 280 | #undef abc |
|---|
| 281 | #undef ABC |
|---|
| 282 | |
|---|
| 283 | #define TEST_EXPECT_SEQWITHLENGTH__NOERROREXPORTED(create_seqcopy, expected_length) do { \ |
|---|
| 284 | char *seqcopy; \ |
|---|
| 285 | TEST_EXPECT_RESULT__NOERROREXPORTED(seqcopy = create_seqcopy); \ |
|---|
| 286 | TEST_EXPECT_EQUAL(strlen(seqcopy), expected_length); \ |
|---|
| 287 | free(seqcopy); \ |
|---|
| 288 | } while(0) |
|---|
| 289 | |
|---|
| 290 | void TEST_FilteredExport() { |
|---|
| 291 | // see also ../../TOOLS/arb_test.cxx@SAI_FILTERED_EXPORT_TESTS |
|---|
| 292 | |
|---|
| 293 | GB_shell shell; |
|---|
| 294 | GBDATA *gb_main = GB_open("TEST_prot_tiny.arb", "r"); // ../../UNIT_TESTER/run/TEST_prot_tiny.arb |
|---|
| 295 | |
|---|
| 296 | // only "CytLyti6" has data in 'ali_dna_incomplete' |
|---|
| 297 | |
|---|
| 298 | { |
|---|
| 299 | char *ali_name = NULp; |
|---|
| 300 | size_t ali_size = 0; |
|---|
| 301 | |
|---|
| 302 | { |
|---|
| 303 | GB_transaction ta(gb_main); |
|---|
| 304 | ali_name = GBT_get_default_alignment(gb_main); |
|---|
| 305 | TEST_REJECT_NULL(ali_name); |
|---|
| 306 | ali_size = GBT_get_alignment_len(gb_main, ali_name); |
|---|
| 307 | TEST_REJECT(ali_size<=0); |
|---|
| 308 | } |
|---|
| 309 | |
|---|
| 310 | { |
|---|
| 311 | FilteredExport exporter(gb_main, ali_name, ali_size); |
|---|
| 312 | FilteredExport exporter_incomplete(gb_main, "ali_dna_incomplete", ali_size); |
|---|
| 313 | |
|---|
| 314 | { |
|---|
| 315 | GB_transaction ta(gb_main); |
|---|
| 316 | |
|---|
| 317 | GBDATA *gb_spec1 = GBT_find_species(gb_main, "StrRamo3"); // ~ 60 AA |
|---|
| 318 | GBDATA *gb_spec2 = GBT_find_species(gb_main, "MucRacem"); // more AA |
|---|
| 319 | GBDATA *gb_spec3 = GBT_find_species(gb_main, "BctFra12"); |
|---|
| 320 | GBDATA *gb_spec4 = GBT_find_species(gb_main, "CytLyti6"); |
|---|
| 321 | |
|---|
| 322 | TEST_REJECT_NULL(gb_spec1); |
|---|
| 323 | TEST_REJECT_NULL(gb_spec2); |
|---|
| 324 | TEST_REJECT_NULL(gb_spec3); |
|---|
| 325 | TEST_REJECT_NULL(gb_spec4); |
|---|
| 326 | |
|---|
| 327 | const char *reason; |
|---|
| 328 | TEST_EXPECT_SEQWITHLENGTH__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec1, reason), ali_size); |
|---|
| 329 | TEST_EXPECT_SEQWITHLENGTH__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec2, reason), ali_size); |
|---|
| 330 | |
|---|
| 331 | TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS(exporter_incomplete.get_filtered_sequence(gb_spec3, reason), "has no data in 'ali_dna_incomplete'"); |
|---|
| 332 | TEST_EXPECT_NULL(reason); |
|---|
| 333 | |
|---|
| 334 | exporter_incomplete.do_accept_missing_data(); // changes state of exporter_incomplete! |
|---|
| 335 | |
|---|
| 336 | TEST_EXPECT_NORESULT__NOERROREXPORTED(exporter_incomplete.get_filtered_sequence(gb_spec3, reason)); |
|---|
| 337 | TEST_EXPECT_EQUAL(reason, "has no data"); |
|---|
| 338 | |
|---|
| 339 | TEST_EXPECT_SEQWITHLENGTH__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec4, reason), ali_size); |
|---|
| 340 | |
|---|
| 341 | // test header-generation: |
|---|
| 342 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_fasta_header(gb_spec1), "StrRamo3"); |
|---|
| 343 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_fasta_header(gb_spec2), "MucRacem"); |
|---|
| 344 | |
|---|
| 345 | exporter.set_header_ACI("readdb(name);\",\";readdb(full_name)"); // use real ACI for header |
|---|
| 346 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_fasta_header(gb_spec3), "BctFra12,Bacteroides fragilis"); |
|---|
| 347 | |
|---|
| 348 | exporter.set_header_ACI("readdb(name);\",\";readdb(ali_dna);\",\";readdb(nosuchfield);\",\";readdb(full_name)"); // wrong accepted use (try to read a container and a unknown field) |
|---|
| 349 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_fasta_header(gb_spec4), "CytLyti6,,,Cytophaga lytica"); // both produce empty output |
|---|
| 350 | |
|---|
| 351 | exporter.set_header_ACI("readdb(name);\",\";bugme"); // wrong rejected use (unknown command) |
|---|
| 352 | TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS(exporter.get_fasta_header(gb_spec4), "Unknown command 'bugme'"); // aborts with error |
|---|
| 353 | |
|---|
| 354 | // test sequences are skipped if to few bases remain: |
|---|
| 355 | exporter.set_required_baseCount("ACGT", 185); // (bit more than 3*60) |
|---|
| 356 | TEST_EXPECT_NORESULT__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec1, reason)); |
|---|
| 357 | TEST_EXPECT_EQUAL(reason, "not enough base-characters left"); |
|---|
| 358 | exporter.reset_required_baseCount(); |
|---|
| 359 | |
|---|
| 360 | // test FilterDefinition |
|---|
| 361 | FilterDefinition pvp_blck_variab("POS_VAR_BY_PARSIMONY", BLOCK, true, "-=.0123"); |
|---|
| 362 | FilterDefinition pvp_pass_variab("POS_VAR_BY_PARSIMONY", PASS, true, "-=.0123"); |
|---|
| 363 | FilterDefinition pvp_blck_cnsrvd("POS_VAR_BY_PARSIMONY", BLOCK, false, "-=.012345"); |
|---|
| 364 | FilterDefinition pvp_pass_cnsrvd("POS_VAR_BY_PARSIMONY", PASS , false, "-=.012345"); |
|---|
| 365 | |
|---|
| 366 | { |
|---|
| 367 | AP_filter *filt_blck_variab; |
|---|
| 368 | AP_filter *filt_pass_variab; |
|---|
| 369 | AP_filter *filt_blck_cnsrvd; |
|---|
| 370 | AP_filter *filt_pass_cnsrvd; |
|---|
| 371 | |
|---|
| 372 | TEST_EXPECT_RESULT__NOERROREXPORTED(filt_blck_variab = pvp_blck_variab.make_filter(gb_main, ali_name, ali_size)); |
|---|
| 373 | TEST_EXPECT_RESULT__NOERROREXPORTED(filt_pass_variab = pvp_pass_variab.make_filter(gb_main, ali_name, ali_size)); |
|---|
| 374 | TEST_EXPECT_RESULT__NOERROREXPORTED(filt_blck_cnsrvd = pvp_blck_cnsrvd.make_filter(gb_main, ali_name, ali_size)); |
|---|
| 375 | TEST_EXPECT_RESULT__NOERROREXPORTED(filt_pass_cnsrvd = pvp_pass_cnsrvd.make_filter(gb_main, ali_name, ali_size)); |
|---|
| 376 | |
|---|
| 377 | TEST_EXPECT_EQUAL(filt_blck_variab->get_length(), ali_size); |
|---|
| 378 | TEST_EXPECT_EQUAL(filt_pass_variab->get_length(), ali_size); |
|---|
| 379 | TEST_EXPECT_EQUAL(filt_blck_cnsrvd->get_length(), ali_size); |
|---|
| 380 | TEST_EXPECT_EQUAL(filt_pass_cnsrvd->get_length(), ali_size); |
|---|
| 381 | |
|---|
| 382 | TEST_EXPECT_EQUAL(filt_blck_variab->get_filtered_length(), 135); |
|---|
| 383 | TEST_EXPECT_EQUAL(filt_pass_variab->get_filtered_length(), ali_size-135); |
|---|
| 384 | TEST_EXPECT_EQUAL(filt_blck_cnsrvd->get_filtered_length(), ali_size-45); |
|---|
| 385 | TEST_EXPECT_EQUAL(filt_pass_cnsrvd->get_filtered_length(), 45); |
|---|
| 386 | |
|---|
| 387 | delete filt_pass_cnsrvd; |
|---|
| 388 | delete filt_blck_cnsrvd; |
|---|
| 389 | delete filt_pass_variab; |
|---|
| 390 | delete filt_blck_variab; |
|---|
| 391 | } |
|---|
| 392 | |
|---|
| 393 | // test get_filtered_sequence with real filters: |
|---|
| 394 | TEST_EXPECT_NO_ERROR(exporter.add_SAI_filter(pvp_blck_variab)); |
|---|
| 395 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec1, reason), "TCCACGCACCAAGTCCT--GGACTTTG--GACCGGGTCCCTGACG-ATG-CCGGGGACG---GACTGG--TC--GGGCCGCT-ACGG---CGCTCGGCCGCG-------GCCG-CTGGG----CCGCCGT....."); |
|---|
| 396 | |
|---|
| 397 | exporter.clear_SAI_filters(); |
|---|
| 398 | |
|---|
| 399 | TEST_EXPECT_NO_ERROR(exporter.add_SAI_filter(pvp_pass_cnsrvd)); |
|---|
| 400 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec1, reason), "TCACCAAGTTGGGTCGACCGGGAATGGGCCAGCCCGCGCTGGCCC"); |
|---|
| 401 | |
|---|
| 402 | exporter.clear_SAI_filters(); |
|---|
| 403 | |
|---|
| 404 | TEST_EXPECT_NO_ERROR(exporter.add_SAI_filter(pvp_blck_cnsrvd)); |
|---|
| 405 | TEST_EXPECT_NO_ERROR(exporter.add_SAI_filter(pvp_blck_variab)); |
|---|
| 406 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec1, reason), "CGCACCC--ACTTTG--GACCGGCCTCG-ATG-GCG---GCG--TC--GCGT-CG---CGTCGG-------GCG-CG----GCGT....."); |
|---|
| 407 | |
|---|
| 408 | // test sequence post-processing |
|---|
| 409 | exporter.set_sequence_ACI(":.=-"); // convert dots to hyphens (using SRT) |
|---|
| 410 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(exporter.get_filtered_sequence(gb_spec1, reason), "CGCACCC--ACTTTG--GACCGGCCTCG-ATG-GCG---GCG--TC--GCGT-CG---CGTCGG-------GCG-CG----GCGT-----"); |
|---|
| 411 | |
|---|
| 412 | exporter.set_sequence_ACI(":bad"); // malformed expression |
|---|
| 413 | TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS(exporter.get_filtered_sequence(gb_spec1, reason), "SRT ERROR: no '=' found in command 'bad'"); |
|---|
| 414 | } |
|---|
| 415 | } |
|---|
| 416 | free(ali_name); |
|---|
| 417 | } |
|---|
| 418 | GB_close(gb_main); |
|---|
| 419 | } |
|---|
| 420 | |
|---|
| 421 | #endif // UNIT_TESTS |
|---|
| 422 | |
|---|
| 423 | // -------------------------------------------------------------------------------- |
|---|
| 424 | |
|---|
| 425 | |
|---|