| 1 | // ============================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : arb_consensus_tree.cxx // |
|---|
| 4 | // Purpose : build consensus tree with the same library // |
|---|
| 5 | // as ARB-NJ // |
|---|
| 6 | // // |
|---|
| 7 | // Coded by Ralf Westram (coder@reallysoft.de) in March 2012 // |
|---|
| 8 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 9 | // http://www.arb-home.de/ // |
|---|
| 10 | // // |
|---|
| 11 | // ============================================================= // |
|---|
| 12 | |
|---|
| 13 | #include <CT_ctree.hxx> |
|---|
| 14 | #include <TreeRead.h> |
|---|
| 15 | #include <TreeWrite.h> |
|---|
| 16 | #include <arb_str.h> |
|---|
| 17 | #include <arb_diff.h> |
|---|
| 18 | #include <arb_defs.h> |
|---|
| 19 | |
|---|
| 20 | using namespace std; |
|---|
| 21 | |
|---|
| 22 | static TreeNode *build_consensus_tree(const CharPtrArray& input_trees, GB_ERROR& error, size_t& different_species, double weight, char *&comment) { |
|---|
| 23 | // read all input trees, generate and return consensus tree |
|---|
| 24 | // (Note: the 'weight' used here doesn't matter atm, since all trees are added with the same weight) |
|---|
| 25 | |
|---|
| 26 | arb_assert(!error); |
|---|
| 27 | error = NULp; |
|---|
| 28 | comment = NULp; |
|---|
| 29 | |
|---|
| 30 | TreeNode *consense_tree = NULp; |
|---|
| 31 | if (input_trees.empty()) { |
|---|
| 32 | error = "no trees given"; |
|---|
| 33 | } |
|---|
| 34 | else { |
|---|
| 35 | ConsensusTreeBuilder tree_builder; |
|---|
| 36 | |
|---|
| 37 | for (size_t i = 0; !error && i<input_trees.size(); ++i) { |
|---|
| 38 | char *warnings = NULp; |
|---|
| 39 | |
|---|
| 40 | TreeRoot *root = new SizeAwareRoot; // will be deleted when tree gets deleted |
|---|
| 41 | SizeAwareTree *tree = DOWNCAST(SizeAwareTree*, TREE_load(input_trees[i], root, NULp, true, &warnings)); |
|---|
| 42 | if (!tree) { |
|---|
| 43 | error = GBS_global_string("Failed to load tree '%s' (Reason: %s)", input_trees[i], GB_await_error()); |
|---|
| 44 | } |
|---|
| 45 | else { |
|---|
| 46 | if (warnings) { |
|---|
| 47 | GB_warningf("while loading tree '%s':\n%s", input_trees[i], warnings); |
|---|
| 48 | free(warnings); |
|---|
| 49 | } |
|---|
| 50 | tree_builder.add(tree, input_trees[i], weight); |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | |
|---|
| 54 | if (!error) consense_tree = tree_builder.get(different_species, error); |
|---|
| 55 | if (!error) comment = tree_builder.get_tree_remark(); |
|---|
| 56 | } |
|---|
| 57 | arb_assert(contradicted(consense_tree, error)); |
|---|
| 58 | return consense_tree; |
|---|
| 59 | } |
|---|
| 60 | |
|---|
| 61 | static char *create_tree_name(const char *savename) { |
|---|
| 62 | // create a DB treename (using savename as hint) |
|---|
| 63 | char *tree_name; |
|---|
| 64 | { |
|---|
| 65 | // as default use part behind '/' and remove file extension |
|---|
| 66 | const char *lslash = strrchr(savename, '/'); |
|---|
| 67 | if (lslash) savename = lslash+1; |
|---|
| 68 | |
|---|
| 69 | const char *ldot = strrchr(savename, '.'); |
|---|
| 70 | |
|---|
| 71 | tree_name = ldot ? ARB_strpartdup(savename, ldot-1) : ARB_strdup(savename); |
|---|
| 72 | if (tree_name[0] == 0) freedup(tree_name, "tree_consensus"); |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | // make sure tree name starts with 'tree_' |
|---|
| 76 | if (!ARB_strBeginsWith(tree_name, "tree_")) { |
|---|
| 77 | freeset(tree_name, GBS_global_string_copy("tree_%s", tree_name)); |
|---|
| 78 | } |
|---|
| 79 | return tree_name; |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | static GB_ERROR save_tree_as_newick(TreeNode *tree, const char *savename, const char *comment) { |
|---|
| 83 | // save a tree to a newick file |
|---|
| 84 | |
|---|
| 85 | // since ARB only saves trees out of a database, |
|---|
| 86 | // i use a hack here: |
|---|
| 87 | // - create temp DB |
|---|
| 88 | // - save tree there |
|---|
| 89 | // - save to newick as usual |
|---|
| 90 | |
|---|
| 91 | GB_shell shell; |
|---|
| 92 | GBDATA *gb_main = GB_open("", "crw"); |
|---|
| 93 | GB_ERROR error = NULp; |
|---|
| 94 | |
|---|
| 95 | if (!gb_main) { |
|---|
| 96 | error = GB_await_error(); |
|---|
| 97 | } |
|---|
| 98 | else { |
|---|
| 99 | char *db_tree_name = create_tree_name(savename); |
|---|
| 100 | |
|---|
| 101 | { |
|---|
| 102 | GB_transaction ta(gb_main); |
|---|
| 103 | error = GBT_write_tree_with_remark(gb_main, db_tree_name, tree, comment); |
|---|
| 104 | } |
|---|
| 105 | if (!error) { |
|---|
| 106 | Node_ID_Labeler labels_use_IDs; |
|---|
| 107 | error = TREE_write_Newick(gb_main, db_tree_name, labels_use_IDs, true, true, true, true, TREE_SINGLE_QUOTES, savename); |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | free(db_tree_name); |
|---|
| 111 | GB_close(gb_main); |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | if (error) { |
|---|
| 115 | error = GBS_global_string("Failed to save tree to '%s' (Reason: %s)", savename, error); |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | return error; |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | int ARB_main(int argc, char *argv[]) { |
|---|
| 122 | GB_ERROR error = NULp; |
|---|
| 123 | |
|---|
| 124 | if (argc<2) { |
|---|
| 125 | printf("Usage: arb_consensus_tree [options] [tree]+\n" |
|---|
| 126 | "Purpose: Create a consensus tree out of multiple trees\n" |
|---|
| 127 | " options:\n" |
|---|
| 128 | " -w outfile write consensus tree to outfile\n"); |
|---|
| 129 | |
|---|
| 130 | // @@@ wanted options |
|---|
| 131 | // - do not add relative frequency of used subtrees as bootstrap values |
|---|
| 132 | // - multifurcate branches with bootstrap value below XXX |
|---|
| 133 | // - eliminate branches with bootstrap value below YYY |
|---|
| 134 | // - ... ? |
|---|
| 135 | } |
|---|
| 136 | else { |
|---|
| 137 | char *savename = NULp; |
|---|
| 138 | |
|---|
| 139 | ConstStrArray input_tree_names; |
|---|
| 140 | |
|---|
| 141 | for (int a = 1; a<argc; ++a) { |
|---|
| 142 | const char *arg = argv[a]; |
|---|
| 143 | if (arg[0] == '-') { |
|---|
| 144 | switch (arg[1]) { |
|---|
| 145 | case 'w': savename = ARB_strdup(argv[++a]); break; |
|---|
| 146 | default : error = GBS_global_string("Unknown switch '-%c'", arg[1]); break; |
|---|
| 147 | } |
|---|
| 148 | } |
|---|
| 149 | else { |
|---|
| 150 | input_tree_names.put(argv[a]); |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | if (!error && input_tree_names.empty()) error = "no input trees specified"; |
|---|
| 155 | |
|---|
| 156 | if (!error) { |
|---|
| 157 | size_t species_count; |
|---|
| 158 | char *comment; |
|---|
| 159 | TreeNode *cons_tree = build_consensus_tree(input_tree_names, error, species_count, 1.0, comment); |
|---|
| 160 | |
|---|
| 161 | if (!cons_tree) { |
|---|
| 162 | error = GBS_global_string("Failed to build consensus tree (Reason: %s)", error); |
|---|
| 163 | } |
|---|
| 164 | else { |
|---|
| 165 | size_t leafs = GBT_count_leafs(cons_tree); |
|---|
| 166 | double percent = size_t((leafs*1000)/species_count)/10.0; |
|---|
| 167 | |
|---|
| 168 | printf("Generated tree contains %.1f%% of species (%zu of %zu found in input trees)\n", |
|---|
| 169 | percent, leafs, species_count); |
|---|
| 170 | |
|---|
| 171 | if (savename) { |
|---|
| 172 | error = save_tree_as_newick(cons_tree, savename, comment); |
|---|
| 173 | } |
|---|
| 174 | else { |
|---|
| 175 | printf("successfully created consensus tree\n" |
|---|
| 176 | "(no savename specified -> tree not saved)\n"); |
|---|
| 177 | } |
|---|
| 178 | UNCOVERED(); |
|---|
| 179 | destroy(cons_tree); |
|---|
| 180 | } |
|---|
| 181 | free(comment); |
|---|
| 182 | } |
|---|
| 183 | free(savename); |
|---|
| 184 | } |
|---|
| 185 | |
|---|
| 186 | if (error) { |
|---|
| 187 | printf("Error in arb_consensus_tree: %s\n", error); |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | return error ? EXIT_FAILURE : EXIT_SUCCESS; |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | // -------------------------------------------------------------------------------- |
|---|
| 194 | |
|---|
| 195 | #ifdef UNIT_TESTS |
|---|
| 196 | #ifndef TEST_UNIT_H |
|---|
| 197 | #include <test_unit.h> |
|---|
| 198 | #endif |
|---|
| 199 | |
|---|
| 200 | #include "command_output.h" |
|---|
| 201 | |
|---|
| 202 | // #define TEST_AUTO_UPDATE // uncomment to update expected trees (if more than date differs) |
|---|
| 203 | |
|---|
| 204 | static char *custom_tree_name(int dir, const char *name) { return GBS_global_string_copy("consense/%i/%s.tree", dir, name); } |
|---|
| 205 | static char *custom_numbered_tree_name(int dir, const char *name, int treeNr) { return GBS_global_string_copy("consense/%i/%s_%i.tree", dir, name, treeNr); } |
|---|
| 206 | |
|---|
| 207 | static void add_inputnames(StrArray& to, int dir, const char *basename, int first_tree, int last_tree) { |
|---|
| 208 | for (int t = first_tree; t <= last_tree; ++t) { |
|---|
| 209 | to.put(custom_numbered_tree_name(dir, basename, t)); |
|---|
| 210 | } |
|---|
| 211 | } |
|---|
| 212 | |
|---|
| 213 | static double calc_intree_distance(TreeNode *tree) { |
|---|
| 214 | if (tree->is_leaf()) return 0.0; |
|---|
| 215 | return |
|---|
| 216 | tree->leftlen + |
|---|
| 217 | tree->rightlen + |
|---|
| 218 | calc_intree_distance(tree->get_leftson()) + |
|---|
| 219 | calc_intree_distance(tree->get_rightson()); |
|---|
| 220 | } |
|---|
| 221 | |
|---|
| 222 | #define LENSUM_EPSILON .000001 |
|---|
| 223 | |
|---|
| 224 | static arb_test::match_expectation consense_tree_generated(TreeNode *tree, GB_ERROR error, size_t species_count, size_t expected_species_count, double expected_intree_distance) { |
|---|
| 225 | using namespace arb_test; |
|---|
| 226 | expectation_group expected; |
|---|
| 227 | |
|---|
| 228 | expected.add(that(error).is_equal_to_NULL()); |
|---|
| 229 | expected.add(that(tree).does_differ_from_NULL()); |
|---|
| 230 | |
|---|
| 231 | if (tree) { |
|---|
| 232 | expected.add(that(species_count).is_equal_to(expected_species_count)); |
|---|
| 233 | expected.add(that(GBT_count_leafs(tree)).is_equal_to(expected_species_count)); |
|---|
| 234 | expected.add(that(calc_intree_distance(tree)).fulfills(epsilon_similar(LENSUM_EPSILON), expected_intree_distance)); |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | return all().ofgroup(expected); |
|---|
| 238 | } |
|---|
| 239 | |
|---|
| 240 | static arb_test::match_expectation build_expected_consensus_tree(const int treedir, const char *basename, int first_tree, int last_tree, double weight, const char *outbasename, size_t expected_species_count, double expected_intree_distance) { |
|---|
| 241 | using namespace arb_test; |
|---|
| 242 | expectation_group expected; |
|---|
| 243 | arb_suppress_progress hideProgress; |
|---|
| 244 | |
|---|
| 245 | GB_ERROR error = NULp; |
|---|
| 246 | StrArray input_tree_names; |
|---|
| 247 | add_inputnames(input_tree_names, treedir, basename, first_tree, last_tree); |
|---|
| 248 | |
|---|
| 249 | size_t species_count; |
|---|
| 250 | char *comment; |
|---|
| 251 | TreeNode *tree = build_consensus_tree(input_tree_names, error, species_count, weight, comment); |
|---|
| 252 | expected.add(consense_tree_generated(tree, error, species_count, expected_species_count, expected_intree_distance)); |
|---|
| 253 | |
|---|
| 254 | char *saveas = custom_tree_name(treedir, outbasename); |
|---|
| 255 | error = save_tree_as_newick(tree, saveas, comment); |
|---|
| 256 | expected.add(that(error).is_equal_to_NULL()); |
|---|
| 257 | |
|---|
| 258 | if (!error) { |
|---|
| 259 | char *expected_save = custom_tree_name(treedir, GBS_global_string("%s_expected", outbasename)); |
|---|
| 260 | bool exported_as_expected = arb_test::textfiles_have_difflines_ignoreDates(saveas, expected_save, 0); |
|---|
| 261 | |
|---|
| 262 | #if defined(TEST_AUTO_UPDATE) |
|---|
| 263 | if (!exported_as_expected) { |
|---|
| 264 | TEST_COPY_FILE(saveas, expected_save); |
|---|
| 265 | } |
|---|
| 266 | #else // !defined(TEST_AUTO_UPDATE) |
|---|
| 267 | expected.add(that(exported_as_expected).is_equal_to(true)); |
|---|
| 268 | #endif |
|---|
| 269 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
|---|
| 270 | free(expected_save); |
|---|
| 271 | } |
|---|
| 272 | |
|---|
| 273 | free(saveas); |
|---|
| 274 | free(comment); |
|---|
| 275 | destroy(tree); |
|---|
| 276 | |
|---|
| 277 | return all().ofgroup(expected); |
|---|
| 278 | } |
|---|
| 279 | |
|---|
| 280 | void TEST_consensus_tree_1() { |
|---|
| 281 | TEST_EXPECTATION(build_expected_consensus_tree(1, "bootstrapped", 1, 5, 0.7, "consense1", 22, 0.925779)); |
|---|
| 282 | // ../UNIT_TESTER/run/consense/1/consense1.tree |
|---|
| 283 | } |
|---|
| 284 | void TEST_consensus_tree_1_single() { |
|---|
| 285 | TEST_EXPECTATION(build_expected_consensus_tree(1, "bootstrapped", 1, 1, 0.01, "consense1_single", 22, 0.924610)); |
|---|
| 286 | // ../UNIT_TESTER/run/consense/1/consense1_single.tree |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | void TEST_consensus_tree_2() { |
|---|
| 290 | TEST_EXPECTATION(build_expected_consensus_tree(2, "bootstrapped", 1, 4, 2.5, "consense2", 59, 2.849827)); |
|---|
| 291 | // ../UNIT_TESTER/run/consense/2/consense2.tree |
|---|
| 292 | } |
|---|
| 293 | |
|---|
| 294 | void TEST_consensus_tree_3() { |
|---|
| 295 | TEST_EXPECTATION(build_expected_consensus_tree(3, "bootstrapped", 1, 3, 137.772, "consense3", 128, 2.170685)); |
|---|
| 296 | // ../UNIT_TESTER/run/consense/3/consense3.tree |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | void TEST_consensus_tree_from_disjunct_trees() { |
|---|
| 300 | TEST_EXPECTATION(build_expected_consensus_tree(4, "disjunct", 1, 2, 137.772, "disjunct_merged", 15, 2.034290)); |
|---|
| 301 | // ../UNIT_TESTER/run/consense/4/disjunct_merged.tree |
|---|
| 302 | } |
|---|
| 303 | |
|---|
| 304 | void TEST_consensus_tree_from_partly_overlapping_trees() { |
|---|
| 305 | // tree_disjunct_3 contains 7 species |
|---|
| 306 | // (3 from upper subtree (tree_disjunct_1) and 4 from lower subtree (tree_disjunct_2)) |
|---|
| 307 | |
|---|
| 308 | TEST_EXPECTATION(build_expected_consensus_tree(4, "disjunct", 1, 3, 137.772, "overlap_merged", 15, 2.596455)); |
|---|
| 309 | // ../UNIT_TESTER/run/consense/4/overlap_merged.tree |
|---|
| 310 | } |
|---|
| 311 | |
|---|
| 312 | void TEST_consensus_tree_from_minimal_overlapping_trees() { |
|---|
| 313 | // tree_disjunct_0 only contains 2 species (1 from upper and 1 from lower subtree). |
|---|
| 314 | TEST_EXPECTATION(build_expected_consensus_tree(4, "disjunct", 0, 2, 137.772, "overlap_mini_merged", 15, 2.750745)); |
|---|
| 315 | // ../UNIT_TESTER/run/consense/4/overlap_mini_merged.tree |
|---|
| 316 | } |
|---|
| 317 | |
|---|
| 318 | void TEST_consensus_tree_described_in_arbhelp() { |
|---|
| 319 | // see ../HELP_SOURCE/source/consense_algo.hlp |
|---|
| 320 | TEST_EXPECTATION(build_expected_consensus_tree(5, "help", 1, 2, 2.0, "help_merged", 6, 1.050000)); |
|---|
| 321 | // ../UNIT_TESTER/run/consense/5/help_merged.tree |
|---|
| 322 | } |
|---|
| 323 | |
|---|
| 324 | void TEST_consensus_tree_from_trees_overlapping_by_twothirds() { |
|---|
| 325 | // These 3 trees where copied from an existing tree. |
|---|
| 326 | // From each copy one third of all species has been removed |
|---|
| 327 | // (removed sets were disjunct) |
|---|
| 328 | TEST_EXPECTATION(build_expected_consensus_tree(6, "overlap_two_thirds", 1, 3, 19.2, "overlap_twothirds_merged", 15, 3.561680)); |
|---|
| 329 | // ../UNIT_TESTER/run/consense/6/overlap_twothirds_merged.tree |
|---|
| 330 | } |
|---|
| 331 | |
|---|
| 332 | void TEST_consensus_tree_from_mostly_overlapping_trees() { |
|---|
| 333 | // the 3 trees were copied from tree_disjunct_source. |
|---|
| 334 | // from each tree 2 (different) species were deleted. |
|---|
| 335 | TEST_EXPECTATION(build_expected_consensus_tree(7, "disjunct_del2", 1, 3, 137.772, "overlap_mostly", 15, 1.820057)); |
|---|
| 336 | // ../UNIT_TESTER/run/consense/7/overlap_mostly.tree |
|---|
| 337 | } |
|---|
| 338 | |
|---|
| 339 | void TEST_consensus_tree_from_mostly_overlapping_trees_2() { |
|---|
| 340 | // the 3 trees were copied from tree_disjunct1 |
|---|
| 341 | // from each tree 1 (different) species was deleted. |
|---|
| 342 | TEST_EXPECTATION(build_expected_consensus_tree(8, "overlap2", 1, 3, 137.772, "overlap2_mostly", 8, 0.529109)); |
|---|
| 343 | // ../UNIT_TESTER/run/consense/8/overlap2_mostly.tree |
|---|
| 344 | } |
|---|
| 345 | TEST_PUBLISH(TEST_consensus_tree_from_mostly_overlapping_trees_2); |
|---|
| 346 | |
|---|
| 347 | |
|---|
| 348 | #define REPEATED_TESTS |
|---|
| 349 | |
|---|
| 350 | #if defined(REPEATED_TESTS) |
|---|
| 351 | void TEST_consensus_tree_generation_is_deterministic() { |
|---|
| 352 | TEST_consensus_tree_described_in_arbhelp(); |
|---|
| 353 | TEST_consensus_tree_from_minimal_overlapping_trees(); |
|---|
| 354 | TEST_consensus_tree_from_partly_overlapping_trees(); |
|---|
| 355 | TEST_consensus_tree_from_disjunct_trees(); |
|---|
| 356 | TEST_consensus_tree_3(); |
|---|
| 357 | TEST_consensus_tree_2(); |
|---|
| 358 | TEST_consensus_tree_1_single(); |
|---|
| 359 | TEST_consensus_tree_1(); |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | void TEST_arb_consensus_tree() { |
|---|
| 363 | TEST_STDOUT_CONTAINS("(arb_consensus_tree -x || true)", "Unknown switch '-x'"); |
|---|
| 364 | TEST_STDOUT_CONTAINS("(arb_consensus_tree -w sth || true)", "no input trees specified"); |
|---|
| 365 | |
|---|
| 366 | { |
|---|
| 367 | char *saveas = custom_tree_name(1, "consense1"); |
|---|
| 368 | char *expected = custom_tree_name(1, "consense1_expected"); |
|---|
| 369 | |
|---|
| 370 | TEST_OUTPUT_CONTAINS("arb_consensus_tree" |
|---|
| 371 | " -w consense/1/consense1.tree" |
|---|
| 372 | " consense/1/bootstrapped_1.tree" |
|---|
| 373 | " consense/1/bootstrapped_2.tree" |
|---|
| 374 | " consense/1/bootstrapped_3.tree" |
|---|
| 375 | " consense/1/bootstrapped_4.tree" |
|---|
| 376 | " consense/1/bootstrapped_5.tree", |
|---|
| 377 | (const char *)NULp, |
|---|
| 378 | "Created new database \"\""); |
|---|
| 379 | |
|---|
| 380 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(saveas, expected, 0); |
|---|
| 381 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
|---|
| 382 | |
|---|
| 383 | free(expected); |
|---|
| 384 | free(saveas); |
|---|
| 385 | } |
|---|
| 386 | |
|---|
| 387 | { |
|---|
| 388 | char *saveas = custom_tree_name(2, "consense2"); |
|---|
| 389 | char *expected = custom_tree_name(2, "consense2_expected"); |
|---|
| 390 | |
|---|
| 391 | TEST_OUTPUT_CONTAINS("arb_consensus_tree" |
|---|
| 392 | " -w consense/2/consense2.tree" |
|---|
| 393 | " consense/2/bootstrapped_1.tree" |
|---|
| 394 | " consense/2/bootstrapped_2.tree" |
|---|
| 395 | " consense/2/bootstrapped_3.tree" |
|---|
| 396 | " consense/2/bootstrapped_4.tree", |
|---|
| 397 | (const char *)NULp, |
|---|
| 398 | "Created new database \"\""); |
|---|
| 399 | |
|---|
| 400 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(saveas, expected, 0); |
|---|
| 401 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
|---|
| 402 | |
|---|
| 403 | free(expected); |
|---|
| 404 | free(saveas); |
|---|
| 405 | } |
|---|
| 406 | } |
|---|
| 407 | #endif // REPEATED_TESTS |
|---|
| 408 | |
|---|
| 409 | // #define TREEIO_AUTO_UPDATE // uncomment to auto-update expected test-results |
|---|
| 410 | // #define TREEIO_AUTO_UPDATE_IF_EXPORT_DIFFERS // uncomment to auto-update expected test-results |
|---|
| 411 | // #define TREEIO_AUTO_UPDATE_IF_REEXPORT_DIFFERS // uncomment to auto-update expected test-results |
|---|
| 412 | |
|---|
| 413 | static const char *findFirstNameContaining(TreeNode *tree, const char *part) { |
|---|
| 414 | const char *found = NULp; |
|---|
| 415 | if (tree->name && strstr(tree->name, part)) { |
|---|
| 416 | found = tree->name; |
|---|
| 417 | } |
|---|
| 418 | else if (!tree->is_leaf()) { |
|---|
| 419 | found = findFirstNameContaining(tree->get_leftson(), part); |
|---|
| 420 | if (!found) found = findFirstNameContaining(tree->get_rightson(), part); |
|---|
| 421 | } |
|---|
| 422 | return found; |
|---|
| 423 | } |
|---|
| 424 | |
|---|
| 425 | void TEST_SLOW_treeIO_stable() { |
|---|
| 426 | const char *dbname = "trees/bootstrap_groups.arb"; |
|---|
| 427 | const char *treename = "tree_bootstrap_and_groups"; |
|---|
| 428 | const char *savename = "bg"; |
|---|
| 429 | |
|---|
| 430 | GB_shell shell; |
|---|
| 431 | GBDATA *gb_main = GB_open(dbname, "rw"); |
|---|
| 432 | |
|---|
| 433 | TEST_REJECT_NULL(gb_main); |
|---|
| 434 | |
|---|
| 435 | char *outfile = GBS_global_string_copy("trees/%s.tree", savename); |
|---|
| 436 | |
|---|
| 437 | for (int save_branchlengths = 0; save_branchlengths <= 1; ++save_branchlengths) { |
|---|
| 438 | for (int save_bootstraps = 0; save_bootstraps <= 1; ++save_bootstraps) { |
|---|
| 439 | for (int save_groupnames = 0; save_groupnames <= 1; ++save_groupnames) { |
|---|
| 440 | bool quoting_occurs = save_bootstraps && save_groupnames; |
|---|
| 441 | for (int pretty = 0; pretty <= 1; ++pretty) { |
|---|
| 442 | |
|---|
| 443 | for (int quoting = TREE_DISALLOW_QUOTES; quoting <= (quoting_occurs ? TREE_DOUBLE_QUOTES : TREE_DISALLOW_QUOTES); ++quoting) { |
|---|
| 444 | TREE_node_quoting quoteMode = TREE_node_quoting(quoting); |
|---|
| 445 | |
|---|
| 446 | char *paramID = GBS_global_string_copy("%s_%s%s%s_%i", |
|---|
| 447 | pretty ? "p" : "s", |
|---|
| 448 | save_bootstraps ? "Bs" : "", |
|---|
| 449 | save_groupnames ? "Grp" : "", |
|---|
| 450 | save_branchlengths ? "Len" : "", |
|---|
| 451 | quoteMode); |
|---|
| 452 | |
|---|
| 453 | TEST_ANNOTATE(GBS_global_string("for paramID='%s'", paramID)); |
|---|
| 454 | |
|---|
| 455 | { |
|---|
| 456 | Node_ID_Labeler labels_use_IDs; |
|---|
| 457 | GB_ERROR export_error = TREE_write_Newick(gb_main, treename, labels_use_IDs, save_branchlengths, save_bootstraps, save_groupnames, pretty, quoteMode, outfile); |
|---|
| 458 | TEST_EXPECT_NULL(export_error); |
|---|
| 459 | } |
|---|
| 460 | |
|---|
| 461 | char *expectedfile = GBS_global_string_copy("trees/%s_exp_%s.tree", savename, paramID); |
|---|
| 462 | |
|---|
| 463 | #if defined(TREEIO_AUTO_UPDATE) |
|---|
| 464 | system(GBS_global_string("cp %s %s", outfile, expectedfile)); |
|---|
| 465 | #else // !defined(TREEIO_AUTO_UPDATE) |
|---|
| 466 | bool exported_as_expected = arb_test::textfiles_have_difflines_ignoreDates(expectedfile, outfile, 0); |
|---|
| 467 | #if defined(TREEIO_AUTO_UPDATE_IF_EXPORT_DIFFERS) |
|---|
| 468 | if (!exported_as_expected) { |
|---|
| 469 | system(GBS_global_string("cp %s %s", outfile, expectedfile)); |
|---|
| 470 | } |
|---|
| 471 | #else // !defined(TREEIO_AUTO_UPDATE_IF_EXPORT_DIFFERS) |
|---|
| 472 | TEST_EXPECT(exported_as_expected); |
|---|
| 473 | #endif |
|---|
| 474 | |
|---|
| 475 | // reimport exported tree |
|---|
| 476 | const char *reloaded_treename = "tree_reloaded"; |
|---|
| 477 | { |
|---|
| 478 | char *comment = NULp; |
|---|
| 479 | TreeNode *tree = TREE_load(expectedfile, new SimpleRoot, &comment, true, NULp); |
|---|
| 480 | GB_ERROR load_error = tree ? NULp : GB_await_error(); |
|---|
| 481 | |
|---|
| 482 | TEST_EXPECTATION(all().of(that(tree).does_differ_from_NULL(), |
|---|
| 483 | that(load_error).is_equal_to_NULL())); |
|---|
| 484 | // store tree in DB |
|---|
| 485 | { |
|---|
| 486 | GB_transaction ta(gb_main); |
|---|
| 487 | GB_ERROR store_error = GBT_write_tree_with_remark(gb_main, reloaded_treename, tree, comment); |
|---|
| 488 | TEST_EXPECT_NULL(store_error); |
|---|
| 489 | } |
|---|
| 490 | free(comment); |
|---|
| 491 | |
|---|
| 492 | if (save_groupnames) { |
|---|
| 493 | const char *quotedGroup = findFirstNameContaining(tree, "quoted"); |
|---|
| 494 | const char *underscoreGroup = findFirstNameContaining(tree, "bs100"); |
|---|
| 495 | TEST_EXPECT_EQUAL(quotedGroup, "quoted"); |
|---|
| 496 | TEST_EXPECT_EQUAL(underscoreGroup, "__bs100"); |
|---|
| 497 | } |
|---|
| 498 | const char *capsLeaf = findFirstNameContaining(tree, "Caps"); |
|---|
| 499 | TEST_EXPECT_EQUAL(capsLeaf, "_MhuCaps"); |
|---|
| 500 | |
|---|
| 501 | destroy(tree); |
|---|
| 502 | } |
|---|
| 503 | |
|---|
| 504 | // export again |
|---|
| 505 | { |
|---|
| 506 | Node_ID_Labeler labels_use_IDs; |
|---|
| 507 | GB_ERROR reexport_error = TREE_write_Newick(gb_main, reloaded_treename, labels_use_IDs, save_branchlengths, save_bootstraps, save_groupnames, pretty, quoteMode, outfile); |
|---|
| 508 | TEST_EXPECT_NULL(reexport_error); |
|---|
| 509 | } |
|---|
| 510 | |
|---|
| 511 | // eliminate comments added by loading/saving |
|---|
| 512 | char *outfile2 = GBS_global_string_copy("trees/%s2.tree", savename); |
|---|
| 513 | { |
|---|
| 514 | char *cmd = GBS_global_string_copy("cat %s" |
|---|
| 515 | " | grep -v 'Loaded from trees/.*_exp_'" |
|---|
| 516 | " | grep -v 'tree_reloaded saved to'" |
|---|
| 517 | " > %s", outfile, outfile2); |
|---|
| 518 | TEST_EXPECT_NO_ERROR(GBK_system(cmd)); |
|---|
| 519 | free(cmd); |
|---|
| 520 | } |
|---|
| 521 | |
|---|
| 522 | bool reexported_as_expected = arb_test::textfiles_have_difflines(expectedfile, outfile2, 0); |
|---|
| 523 | |
|---|
| 524 | #if defined(TREEIO_AUTO_UPDATE_IF_REEXPORT_DIFFERS) |
|---|
| 525 | if (!reexported_as_expected) { |
|---|
| 526 | system(GBS_global_string("cp %s %s", outfile2, expectedfile)); |
|---|
| 527 | } |
|---|
| 528 | #else // !defined(TREEIO_AUTO_UPDATE_IF_REEXPORT_DIFFERS) |
|---|
| 529 | TEST_EXPECT(reexported_as_expected); |
|---|
| 530 | #endif |
|---|
| 531 | |
|---|
| 532 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(outfile2)); |
|---|
| 533 | free(outfile2); |
|---|
| 534 | #endif |
|---|
| 535 | free(expectedfile); |
|---|
| 536 | free(paramID); |
|---|
| 537 | } |
|---|
| 538 | } |
|---|
| 539 | } |
|---|
| 540 | } |
|---|
| 541 | } |
|---|
| 542 | TEST_ANNOTATE(NULp); |
|---|
| 543 | |
|---|
| 544 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(outfile)); |
|---|
| 545 | free(outfile); |
|---|
| 546 | |
|---|
| 547 | GB_close(gb_main); |
|---|
| 548 | } |
|---|
| 549 | |
|---|
| 550 | void TEST_CONSENSUS_TREE_functionality() { |
|---|
| 551 | // functionality wanted in TreeNode (for use in library CONSENSUS_TREE) |
|---|
| 552 | |
|---|
| 553 | char *comment = NULp; |
|---|
| 554 | |
|---|
| 555 | SizeAwareTree *tree = DOWNCAST(SizeAwareTree*, TREE_load("trees/bg_exp_p_GrpLen_0.tree", |
|---|
| 556 | new SizeAwareRoot, |
|---|
| 557 | &comment, false, NULp)); |
|---|
| 558 | // -> ../UNIT_TESTER/run/trees/bg_exp_p_GrpLen_0.tree |
|---|
| 559 | |
|---|
| 560 | #define ORG_1111 "(AticSea6,(RblAerol,RblMesop))" |
|---|
| 561 | #define TOP_1111 "((RblAerol,RblMesop),AticSea6)" |
|---|
| 562 | #define BOT_1111 ORG_1111 |
|---|
| 563 | |
|---|
| 564 | #define ORG_11121 "((DnrShiba,RsbElon4),MmbAlkal)" |
|---|
| 565 | #define TOP_11121 ORG_11121 |
|---|
| 566 | #define BOT_11121 "(MmbAlkal,(DnrShiba,RsbElon4))" |
|---|
| 567 | |
|---|
| 568 | #define ORG_11122 "((MabPelag,MabSalin),PaoMaris)" |
|---|
| 569 | #define TOP_11122 ORG_11122 |
|---|
| 570 | #define BOT_11122 "(PaoMaris,(MabPelag,MabSalin))" |
|---|
| 571 | |
|---|
| 572 | #define ORG_1112 "(" ORG_11121 "," ORG_11122 ")" |
|---|
| 573 | #define TOP_1112 "(" TOP_11121 "," TOP_11122 ")" |
|---|
| 574 | #define BOT_1112 "(" BOT_11121 "," BOT_11122 ")" |
|---|
| 575 | #define EDG_1112 "(" TOP_11121 "," BOT_11122 ")" |
|---|
| 576 | |
|---|
| 577 | #define ORG_111 "(" ORG_1111 "," ORG_1112 ")" |
|---|
| 578 | #define TOP_111 "(" TOP_1112 "," TOP_1111 ")" |
|---|
| 579 | #define BOT_111 "(" BOT_1111 "," BOT_1112 ")" |
|---|
| 580 | #define EDG_111 "(" EDG_1112 "," BOT_1111 ")" |
|---|
| 581 | |
|---|
| 582 | #define ORG_112 "(OnlGran2,RsnAnta2)" |
|---|
| 583 | #define TOP_112 ORG_112 |
|---|
| 584 | #define BOT_112 ORG_112 |
|---|
| 585 | |
|---|
| 586 | #define ORG_11 "(" ORG_111 "," ORG_112 ")" |
|---|
| 587 | #define TOP_11 "(" TOP_111 "," TOP_112 ")" |
|---|
| 588 | #define BOT_11 "(" BOT_112 "," BOT_111 ")" |
|---|
| 589 | #define EDG_11 "(" EDG_111 "," BOT_112 ")" |
|---|
| 590 | |
|---|
| 591 | #define ORG_12 "(_MhuCaps,ThtNivea)" |
|---|
| 592 | #define TOP_12 "(ThtNivea,_MhuCaps)" |
|---|
| 593 | #define BOT_12 TOP_12 |
|---|
| 594 | |
|---|
| 595 | #define ORG_1 "(" ORG_11 "," ORG_12 ")" |
|---|
| 596 | #define TOP_1 "(" TOP_11 "," TOP_12 ")" |
|---|
| 597 | #define BOT_1 "(" BOT_12 "," BOT_11 ")" |
|---|
| 598 | #define EDG_1 "(" EDG_11 "," BOT_12 ")" |
|---|
| 599 | |
|---|
| 600 | #define ORG_2 "((LbnMarin,LbnzAlb4),LbnAlexa)" |
|---|
| 601 | #define TOP_2 ORG_2 |
|---|
| 602 | #define BOT_2 "(LbnAlexa,(LbnMarin,LbnzAlb4))" |
|---|
| 603 | |
|---|
| 604 | // test swap_sons |
|---|
| 605 | TEST_EXPECT_VALID_TREE(tree); |
|---|
| 606 | TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" ORG_1 "," ORG_2 ");"); |
|---|
| 607 | tree->swap_sons(); |
|---|
| 608 | TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" ORG_2 "," ORG_1 ");"); |
|---|
| 609 | |
|---|
| 610 | // test reorder_tree |
|---|
| 611 | TEST_EXPECT_VALID_TREE(tree); |
|---|
| 612 | TreeOrder order[] = { BIG_BRANCHES_TO_TOP, BIG_BRANCHES_TO_BOTTOM, BIG_BRANCHES_TO_EDGE }; |
|---|
| 613 | |
|---|
| 614 | for (size_t o1 = 0; o1<ARRAY_ELEMS(order); ++o1) { |
|---|
| 615 | TreeOrder to_order = order[o1]; |
|---|
| 616 | for (size_t o2 = 0; o2<ARRAY_ELEMS(order); ++o2) { |
|---|
| 617 | TreeOrder from_order = order[o2]; |
|---|
| 618 | |
|---|
| 619 | for (int rotate = 0; rotate<=1; ++rotate) { |
|---|
| 620 | tree->reorder_tree(from_order); |
|---|
| 621 | if (rotate) tree->rotate_subtree(); |
|---|
| 622 | tree->reorder_tree(to_order); |
|---|
| 623 | |
|---|
| 624 | switch (to_order) { |
|---|
| 625 | case BIG_BRANCHES_TO_TOP: TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" TOP_1 "," TOP_2 ");"); break; |
|---|
| 626 | case BIG_BRANCHES_TO_EDGE: TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" EDG_1 "," BOT_2 ");"); break; |
|---|
| 627 | case BIG_BRANCHES_TO_BOTTOM: TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" BOT_2 "," BOT_1 ");"); break; |
|---|
| 628 | default: TEST_REJECT(true); break; |
|---|
| 629 | } |
|---|
| 630 | |
|---|
| 631 | } |
|---|
| 632 | } |
|---|
| 633 | } |
|---|
| 634 | |
|---|
| 635 | // test rotate_subtree |
|---|
| 636 | TEST_EXPECT_VALID_TREE(tree); |
|---|
| 637 | tree->reorder_tree(BIG_BRANCHES_TO_TOP); |
|---|
| 638 | tree->rotate_subtree(); TEST_EXPECT_NEWICK(nSIMPLE, tree, "((LbnAlexa,(LbnzAlb4,LbnMarin)),((_MhuCaps,ThtNivea),((RsnAnta2,OnlGran2),((AticSea6,(RblMesop,RblAerol)),((PaoMaris,(MabSalin,MabPelag)),(MmbAlkal,(RsbElon4,DnrShiba)))))));"); |
|---|
| 639 | tree->rotate_subtree(); TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" TOP_1 "," TOP_2 ");"); |
|---|
| 640 | |
|---|
| 641 | |
|---|
| 642 | // test set_root |
|---|
| 643 | TEST_EXPECT_VALID_TREE(tree); |
|---|
| 644 | TreeNode *AticSea6Grandpa = tree->findLeafNamed("AticSea6")->get_father()->get_father(); |
|---|
| 645 | TEST_REJECT_NULL(AticSea6Grandpa); |
|---|
| 646 | TEST_EXPECT_VALID_TREE(AticSea6Grandpa); |
|---|
| 647 | |
|---|
| 648 | AticSea6Grandpa->set_root(); |
|---|
| 649 | TEST_EXPECT_NEWICK(nSIMPLE, tree, |
|---|
| 650 | "((" ORG_1112 "," TOP_1111 ")," // AticSea6 is direct son of TOP_1111 |
|---|
| 651 | "((" ORG_2 "," TOP_12 ")," ORG_112 "));"); |
|---|
| 652 | |
|---|
| 653 | // test auto-detection of "best" root |
|---|
| 654 | TEST_EXPECT_VALID_TREE(tree); |
|---|
| 655 | tree->get_tree_root()->find_innermost_edge().set_root(); |
|---|
| 656 | TEST_EXPECT_NEWICK(nLENGTH, tree, |
|---|
| 657 | "((((LbnMarin:0.019,LbnzAlb4:0.003):0.016,LbnAlexa:0.032):0.122,(ThtNivea:0.230,_MhuCaps:0.194):0.427):0.076," |
|---|
| 658 | "(((((DnrShiba:0.076,RsbElon4:0.053):0.034,MmbAlkal:0.069):0.016,((MabPelag:0.001,MabSalin:0.009):0.095,PaoMaris:0.092):0.036):0.030,((RblAerol:0.085,RblMesop:0.042):0.238,AticSea6:0.111):0.018):0.036,(OnlGran2:0.057,RsnAnta2:0.060):0.021):0.076);"); |
|---|
| 659 | |
|---|
| 660 | TEST_EXPECT_VALID_TREE(tree); |
|---|
| 661 | destroy(tree); |
|---|
| 662 | free(comment); |
|---|
| 663 | } |
|---|
| 664 | |
|---|
| 665 | #endif // UNIT_TESTS |
|---|
| 666 | |
|---|
| 667 | // -------------------------------------------------------------------------------- |
|---|