| 1 | // ============================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : pvp.cxx // |
|---|
| 4 | // Purpose : calculate positional variability (parsimony) // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ============================================================= // |
|---|
| 10 | |
|---|
| 11 | #include "pvp.h" |
|---|
| 12 | #include "AP_pos_var.h" |
|---|
| 13 | |
|---|
| 14 | #include <TreeNode.h> |
|---|
| 15 | #include <arb_progress.h> |
|---|
| 16 | |
|---|
| 17 | GB_ERROR PVP_calculate(GBDATA *gb_main, const char *ali_name, const char *tree_name, const char *target_SAI_name) { |
|---|
| 18 | GB_ERROR error = NULp; |
|---|
| 19 | GB_transaction ta(gb_main); |
|---|
| 20 | long ali_len = GBT_get_alignment_len(gb_main, ali_name); |
|---|
| 21 | |
|---|
| 22 | if (ali_len <= 0) { |
|---|
| 23 | error = GB_await_error(); |
|---|
| 24 | } |
|---|
| 25 | else { |
|---|
| 26 | // @@@ we would like to have only one weighted progress (#789) below! |
|---|
| 27 | // * in the 1st section it should increment about 10-20% |
|---|
| 28 | // * in the 2nd section it should increment about 80-90% |
|---|
| 29 | // both shall be driven by child progresses! |
|---|
| 30 | |
|---|
| 31 | arb_progress *progress = new arb_progress("Calculating positional variability"); |
|---|
| 32 | progress->subtitle("Loading Tree"); |
|---|
| 33 | |
|---|
| 34 | // get tree |
|---|
| 35 | TreeNode *tree; |
|---|
| 36 | { |
|---|
| 37 | tree = GBT_read_tree(gb_main, tree_name, new SimpleRoot); |
|---|
| 38 | if (!tree) { |
|---|
| 39 | error = GB_await_error(); |
|---|
| 40 | } |
|---|
| 41 | else { |
|---|
| 42 | GBT_link_tree(tree, gb_main, true, NULp, NULp); |
|---|
| 43 | } |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | if (!error) { |
|---|
| 47 | delete progress; |
|---|
| 48 | progress = new arb_progress("Calculating positional variability"); |
|---|
| 49 | progress->subtitle("Counting mutations"); |
|---|
| 50 | |
|---|
| 51 | GB_alignment_type at = GBT_get_alignment_type(gb_main, ali_name); |
|---|
| 52 | bool isNUC = at==GB_AT_DNA || at==GB_AT_RNA; |
|---|
| 53 | |
|---|
| 54 | AP_pos_var pvp(gb_main, ali_name, ali_len, isNUC, tree_name); |
|---|
| 55 | |
|---|
| 56 | error = pvp.delete_aliEntry_from_SAI(target_SAI_name); |
|---|
| 57 | if (!error) error = pvp.retrieve(tree); |
|---|
| 58 | if (!error) error = pvp.save_aliEntry_to_SAI(target_SAI_name); |
|---|
| 59 | } |
|---|
| 60 | |
|---|
| 61 | delete progress; |
|---|
| 62 | |
|---|
| 63 | destroy(tree); |
|---|
| 64 | } |
|---|
| 65 | |
|---|
| 66 | ta.close(error); |
|---|
| 67 | |
|---|
| 68 | return error; |
|---|
| 69 | } |
|---|
| 70 | |
|---|
| 71 | // -------------------------------------------------------------------------------- |
|---|
| 72 | |
|---|
| 73 | #ifdef UNIT_TESTS |
|---|
| 74 | #ifndef TEST_UNIT_H |
|---|
| 75 | #include <test_unit.h> |
|---|
| 76 | #endif |
|---|
| 77 | |
|---|
| 78 | static arb_test::match_expectation saidata_equal(GBDATA *gb_main, const char *sainame, const char *aliname, const char *expected_data) { |
|---|
| 79 | using namespace arb_test; |
|---|
| 80 | |
|---|
| 81 | GB_transaction ta(gb_main); |
|---|
| 82 | |
|---|
| 83 | GBDATA *gb_sai = GBT_find_SAI(gb_main, sainame); |
|---|
| 84 | GBDATA *gb_saiali = GB_entry(gb_sai, aliname); |
|---|
| 85 | GBDATA *gb_saidata = GB_entry(gb_saiali, "data"); |
|---|
| 86 | |
|---|
| 87 | const char *saidata = GB_read_char_pntr(gb_saidata); |
|---|
| 88 | |
|---|
| 89 | return that(saidata).is_equal_to(expected_data); |
|---|
| 90 | } |
|---|
| 91 | static arb_test::match_expectation saitype_equal(GBDATA *gb_main, const char *sainame, const char *aliname, const char *expected_type) { |
|---|
| 92 | using namespace arb_test; |
|---|
| 93 | |
|---|
| 94 | GB_transaction ta(gb_main); |
|---|
| 95 | |
|---|
| 96 | GBDATA *gb_sai = GBT_find_SAI(gb_main, sainame); |
|---|
| 97 | GBDATA *gb_saiali = GB_entry(gb_sai, aliname); |
|---|
| 98 | GBDATA *gb_saitype = GB_entry(gb_saiali, "_TYPE"); |
|---|
| 99 | |
|---|
| 100 | const char *saitype = GB_read_char_pntr(gb_saitype); |
|---|
| 101 | |
|---|
| 102 | return that(saitype).is_equal_to(expected_type); |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | #define TEST_EXPECT_SAIDATA_EQUAL(expected) TEST_EXPECTATION(saidata_equal(gb_main,SAI_name,aliname,expected)) |
|---|
| 106 | #define TEST_EXPECT_SAITYPE_EQUAL(expected) TEST_EXPECTATION(saitype_equal(gb_main,SAI_name,aliname,expected)) |
|---|
| 107 | |
|---|
| 108 | void TEST_pvp() { |
|---|
| 109 | GB_shell shell; |
|---|
| 110 | const char *SAI_name = "POS_VAR_BY_PARSIMONY"; |
|---|
| 111 | |
|---|
| 112 | { // nuc data: |
|---|
| 113 | GBDATA *gb_main = GB_open("TEST_nuc.arb", "rw"); |
|---|
| 114 | const char *aliname = "ali_16s"; |
|---|
| 115 | TEST_EXPECT_NO_ERROR(PVP_calculate(gb_main, aliname, "tree_nuc", SAI_name)); |
|---|
| 116 | |
|---|
| 117 | TEST_EXPECT_SAITYPE_EQUAL("PVP: Positional Variability by Parsimony: tree 'tree_nuc' ntaxa 12"); |
|---|
| 118 | TEST_EXPECT_SAIDATA_EQUAL(".----222----7------32-7-7----774-----77-----747-77-7-47-----------73--433-.3134--1-41-4-4011222337--" |
|---|
| 119 | "....-632320124--11141-4-..24312434--3----7-77---7----774-4-4-34--44-77-732777424--7-----3247------44" |
|---|
| 120 | "33----------2--73063442221633737777326110244-3----73--2--4-773--4-7--3774777137-7774437274-3372--7--" |
|---|
| 121 | "--7------47-7---24--73--------2-4--7374---74-734------43-777-47-7---3---3----------7-3---3----------" |
|---|
| 122 | "-----7-----4-7----7--4-4------3--3---3----4-4---4-47-------77---4-----447-7--4-7-----377--7-7434373-" |
|---|
| 123 | "-77--5412243422747--7-441244321337--777--3434333-77---777---------7------------7-----------7---77--4" |
|---|
| 124 | "-7----477----4---7-----------4-774-4---7--742237-7-777-7---4---327447----4----43477423-----447-7--77" |
|---|
| 125 | "21234--7---7477-77777-727777---7-3--4---77------7------7---4--4--4-4---7---7------------47727--47777" |
|---|
| 126 | "4477-----7--4-7747-------47---7-7-7--------------------------74--7----------43-7--7477-774734-6----7" |
|---|
| 127 | "47---6-42-447-774-4--------77-----7-34-------7-------747------77-7---------4------------4-----------" |
|---|
| 128 | "7-----------------------4--4---7-.---7--------74437-------7-3424-732233427--7-72333127-6.--7777---6-" |
|---|
| 129 | "-424347-------7-------47--------------77----77---------------7------------------77437-77------4-743-" |
|---|
| 130 | "77-3-77---7----7-3377-------7-7-74--7-7---------7-7---4-------74---7----7------7-3447----7----------" |
|---|
| 131 | "--------2443----7777-33--2-4444-7--44437---37---7746----34447-47---7----7--4--4----------4--4--7----" |
|---|
| 132 | "44-----7---------7-4777-----3-77774-7------------7----47---------------------7--7-47---47-774777744-" |
|---|
| 133 | "--34-7-44227----3...7712.1----123-7742-4-777774-7774----4-3--7--------------------77--447--744--77-7" |
|---|
| 134 | "---------------3-..."); |
|---|
| 135 | |
|---|
| 136 | // tests reported errors: |
|---|
| 137 | TEST_EXPECT_ERROR_CONTAINS(PVP_calculate(gb_main, "ali_61s", "tree_nuc", SAI_name), "alignment 'ali_61s' not found"); |
|---|
| 138 | TEST_EXPECT_ERROR_CONTAINS(PVP_calculate(gb_main, "ali_16s", "tree_miss", SAI_name), "tree not found"); |
|---|
| 139 | |
|---|
| 140 | GB_close(gb_main); |
|---|
| 141 | } |
|---|
| 142 | |
|---|
| 143 | { // protein data: |
|---|
| 144 | GBDATA *gb_main = GB_open("TEST_prot.arb", "rw"); |
|---|
| 145 | const char *aliname = "ali_tuf_pro"; |
|---|
| 146 | TEST_EXPECT_NO_ERROR(PVP_calculate(gb_main, aliname, "tree_prot", SAI_name)); |
|---|
| 147 | |
|---|
| 148 | TEST_EXPECT_SAITYPE_EQUAL("PVP: Positional Variability by Parsimony: tree 'tree_prot' ntaxa 11"); |
|---|
| 149 | TEST_EXPECT_SAIDATA_EQUAL("-----4-634363-4-3-4666----6--6-6-6666466666664666466666-43-34366633334446-4666-364-6--66666664-44646" |
|---|
| 150 | "666-4---4-66---6--66-4-66-6666444-6666466666--4--4--6466633346-62--3-3436343243266464433446433434662" |
|---|
| 151 | "2-4634-663466644346314-23364666646243343-6366-34632-0632-46646-666-43-6-6---46-46-3-3432-32634464334" |
|---|
| 152 | "443636666-66461612636----64664642421634-3-33444443632334-1-3-64-44664646634-433366666623624426246444" |
|---|
| 153 | "4436330334446-63434344622344666320664664--666643--66-446341446--6------6--466----------"); |
|---|
| 154 | |
|---|
| 155 | GB_close(gb_main); |
|---|
| 156 | } |
|---|
| 157 | } |
|---|
| 158 | |
|---|
| 159 | #endif // UNIT_TESTS |
|---|
| 160 | |
|---|
| 161 | // -------------------------------------------------------------------------------- |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | |
|---|