1 | // ============================================================= // |
---|
2 | // // |
---|
3 | // File : arb_consensus_tree.cxx // |
---|
4 | // Purpose : build consensus tree with the same library // |
---|
5 | // as ARB-NJ // |
---|
6 | // // |
---|
7 | // Coded by Ralf Westram (coder@reallysoft.de) in March 2012 // |
---|
8 | // Institute of Microbiology (Technical University Munich) // |
---|
9 | // http://www.arb-home.de/ // |
---|
10 | // // |
---|
11 | // ============================================================= // |
---|
12 | |
---|
13 | #include <CT_ctree.hxx> |
---|
14 | #include <TreeRead.h> |
---|
15 | #include <TreeWrite.h> |
---|
16 | #include <arb_str.h> |
---|
17 | #include <arb_diff.h> |
---|
18 | #include <arb_defs.h> |
---|
19 | |
---|
20 | using namespace std; |
---|
21 | |
---|
22 | static TreeNode *build_consensus_tree(const CharPtrArray& input_trees, GB_ERROR& error, size_t& different_species, double weight, char *&comment) { |
---|
23 | // read all input trees, generate and return consensus tree |
---|
24 | // (Note: the 'weight' used here doesn't matter atm, since all trees are added with the same weight) |
---|
25 | |
---|
26 | arb_assert(!error); |
---|
27 | error = NULp; |
---|
28 | comment = NULp; |
---|
29 | |
---|
30 | TreeNode *consense_tree = NULp; |
---|
31 | if (input_trees.empty()) { |
---|
32 | error = "no trees given"; |
---|
33 | } |
---|
34 | else { |
---|
35 | ConsensusTreeBuilder tree_builder; |
---|
36 | |
---|
37 | for (size_t i = 0; !error && i<input_trees.size(); ++i) { |
---|
38 | char *warnings = NULp; |
---|
39 | |
---|
40 | TreeRoot *root = new SizeAwareRoot; // will be deleted when tree gets deleted |
---|
41 | SizeAwareTree *tree = DOWNCAST(SizeAwareTree*, TREE_load(input_trees[i], root, NULp, true, &warnings)); |
---|
42 | if (!tree) { |
---|
43 | error = GBS_global_string("Failed to load tree '%s' (Reason: %s)", input_trees[i], GB_await_error()); |
---|
44 | } |
---|
45 | else { |
---|
46 | if (warnings) { |
---|
47 | GB_warningf("while loading tree '%s':\n%s", input_trees[i], warnings); |
---|
48 | free(warnings); |
---|
49 | } |
---|
50 | tree_builder.add(tree, input_trees[i], weight); |
---|
51 | } |
---|
52 | } |
---|
53 | |
---|
54 | if (!error) consense_tree = tree_builder.get(different_species, error); |
---|
55 | if (!error) comment = tree_builder.get_tree_remark(); |
---|
56 | } |
---|
57 | arb_assert(contradicted(consense_tree, error)); |
---|
58 | return consense_tree; |
---|
59 | } |
---|
60 | |
---|
61 | static char *create_tree_name(const char *savename) { |
---|
62 | // create a DB treename (using savename as hint) |
---|
63 | char *tree_name; |
---|
64 | { |
---|
65 | // as default use part behind '/' and remove file extension |
---|
66 | const char *lslash = strrchr(savename, '/'); |
---|
67 | if (lslash) savename = lslash+1; |
---|
68 | |
---|
69 | const char *ldot = strrchr(savename, '.'); |
---|
70 | |
---|
71 | tree_name = ldot ? ARB_strpartdup(savename, ldot-1) : ARB_strdup(savename); |
---|
72 | if (tree_name[0] == 0) freedup(tree_name, "tree_consensus"); |
---|
73 | } |
---|
74 | |
---|
75 | // make sure tree name starts with 'tree_' |
---|
76 | if (!ARB_strBeginsWith(tree_name, "tree_")) { |
---|
77 | freeset(tree_name, GBS_global_string_copy("tree_%s", tree_name)); |
---|
78 | } |
---|
79 | return tree_name; |
---|
80 | } |
---|
81 | |
---|
82 | static GB_ERROR save_tree_as_newick(TreeNode *tree, const char *savename, const char *comment) { |
---|
83 | // save a tree to a newick file |
---|
84 | |
---|
85 | // since ARB only saves trees out of a database, |
---|
86 | // i use a hack here: |
---|
87 | // - create temp DB |
---|
88 | // - save tree there |
---|
89 | // - save to newick as usual |
---|
90 | |
---|
91 | GB_shell shell; |
---|
92 | GBDATA *gb_main = GB_open("", "crw"); |
---|
93 | GB_ERROR error = NULp; |
---|
94 | |
---|
95 | if (!gb_main) { |
---|
96 | error = GB_await_error(); |
---|
97 | } |
---|
98 | else { |
---|
99 | char *db_tree_name = create_tree_name(savename); |
---|
100 | |
---|
101 | { |
---|
102 | GB_transaction ta(gb_main); |
---|
103 | error = GBT_write_tree_with_remark(gb_main, db_tree_name, tree, comment); |
---|
104 | } |
---|
105 | if (!error) { |
---|
106 | Node_ID_Labeler labels_use_IDs; |
---|
107 | error = TREE_write_Newick(gb_main, db_tree_name, labels_use_IDs, true, true, true, true, TREE_SINGLE_QUOTES, savename); |
---|
108 | } |
---|
109 | |
---|
110 | free(db_tree_name); |
---|
111 | GB_close(gb_main); |
---|
112 | } |
---|
113 | |
---|
114 | if (error) { |
---|
115 | error = GBS_global_string("Failed to save tree to '%s' (Reason: %s)", savename, error); |
---|
116 | } |
---|
117 | |
---|
118 | return error; |
---|
119 | } |
---|
120 | |
---|
121 | int ARB_main(int argc, char *argv[]) { |
---|
122 | GB_ERROR error = NULp; |
---|
123 | |
---|
124 | if (argc<2) { |
---|
125 | printf("Usage: arb_consensus_tree [options] [tree]+\n" |
---|
126 | "Purpose: Create a consensus tree out of multiple trees\n" |
---|
127 | " options:\n" |
---|
128 | " -w outfile write consensus tree to outfile\n"); |
---|
129 | |
---|
130 | // @@@ wanted options |
---|
131 | // - do not add relative frequency of used subtrees as bootstrap values |
---|
132 | // - multifurcate branches with bootstrap value below XXX |
---|
133 | // - eliminate branches with bootstrap value below YYY |
---|
134 | // - ... ? |
---|
135 | } |
---|
136 | else { |
---|
137 | char *savename = NULp; |
---|
138 | |
---|
139 | ConstStrArray input_tree_names; |
---|
140 | |
---|
141 | for (int a = 1; a<argc; ++a) { |
---|
142 | const char *arg = argv[a]; |
---|
143 | if (arg[0] == '-') { |
---|
144 | switch (arg[1]) { |
---|
145 | case 'w': savename = ARB_strdup(argv[++a]); break; |
---|
146 | default : error = GBS_global_string("Unknown switch '-%c'", arg[1]); break; |
---|
147 | } |
---|
148 | } |
---|
149 | else { |
---|
150 | input_tree_names.put(argv[a]); |
---|
151 | } |
---|
152 | } |
---|
153 | |
---|
154 | if (!error && input_tree_names.empty()) error = "no input trees specified"; |
---|
155 | |
---|
156 | if (!error) { |
---|
157 | size_t species_count; |
---|
158 | char *comment; |
---|
159 | TreeNode *cons_tree = build_consensus_tree(input_tree_names, error, species_count, 1.0, comment); |
---|
160 | |
---|
161 | if (!cons_tree) { |
---|
162 | error = GBS_global_string("Failed to build consensus tree (Reason: %s)", error); |
---|
163 | } |
---|
164 | else { |
---|
165 | size_t leafs = GBT_count_leafs(cons_tree); |
---|
166 | double percent = size_t((leafs*1000)/species_count)/10.0; |
---|
167 | |
---|
168 | printf("Generated tree contains %.1f%% of species (%zu of %zu found in input trees)\n", |
---|
169 | percent, leafs, species_count); |
---|
170 | |
---|
171 | if (savename) { |
---|
172 | error = save_tree_as_newick(cons_tree, savename, comment); |
---|
173 | } |
---|
174 | else { |
---|
175 | printf("successfully created consensus tree\n" |
---|
176 | "(no savename specified -> tree not saved)\n"); |
---|
177 | } |
---|
178 | UNCOVERED(); |
---|
179 | destroy(cons_tree); |
---|
180 | } |
---|
181 | free(comment); |
---|
182 | } |
---|
183 | free(savename); |
---|
184 | } |
---|
185 | |
---|
186 | if (error) { |
---|
187 | printf("Error in arb_consensus_tree: %s\n", error); |
---|
188 | } |
---|
189 | |
---|
190 | return error ? EXIT_FAILURE : EXIT_SUCCESS; |
---|
191 | } |
---|
192 | |
---|
193 | // -------------------------------------------------------------------------------- |
---|
194 | |
---|
195 | #ifdef UNIT_TESTS |
---|
196 | #ifndef TEST_UNIT_H |
---|
197 | #include <test_unit.h> |
---|
198 | #endif |
---|
199 | |
---|
200 | #include "command_output.h" |
---|
201 | |
---|
202 | // #define TEST_AUTO_UPDATE // uncomment to update expected trees (if more than date differs) |
---|
203 | |
---|
204 | static char *custom_tree_name(int dir, const char *name) { return GBS_global_string_copy("consense/%i/%s.tree", dir, name); } |
---|
205 | static char *custom_numbered_tree_name(int dir, const char *name, int treeNr) { return GBS_global_string_copy("consense/%i/%s_%i.tree", dir, name, treeNr); } |
---|
206 | |
---|
207 | static void add_inputnames(StrArray& to, int dir, const char *basename, int first_tree, int last_tree) { |
---|
208 | for (int t = first_tree; t <= last_tree; ++t) { |
---|
209 | to.put(custom_numbered_tree_name(dir, basename, t)); |
---|
210 | } |
---|
211 | } |
---|
212 | |
---|
213 | static double calc_intree_distance(TreeNode *tree) { |
---|
214 | if (tree->is_leaf()) return 0.0; |
---|
215 | return |
---|
216 | tree->leftlen + |
---|
217 | tree->rightlen + |
---|
218 | calc_intree_distance(tree->get_leftson()) + |
---|
219 | calc_intree_distance(tree->get_rightson()); |
---|
220 | } |
---|
221 | |
---|
222 | #define LENSUM_EPSILON .000001 |
---|
223 | |
---|
224 | static arb_test::match_expectation consense_tree_generated(TreeNode *tree, GB_ERROR error, size_t species_count, size_t expected_species_count, double expected_intree_distance) { |
---|
225 | using namespace arb_test; |
---|
226 | expectation_group expected; |
---|
227 | |
---|
228 | expected.add(that(error).is_equal_to_NULL()); |
---|
229 | expected.add(that(tree).does_differ_from_NULL()); |
---|
230 | |
---|
231 | if (tree) { |
---|
232 | expected.add(that(species_count).is_equal_to(expected_species_count)); |
---|
233 | expected.add(that(GBT_count_leafs(tree)).is_equal_to(expected_species_count)); |
---|
234 | expected.add(that(calc_intree_distance(tree)).fulfills(epsilon_similar(LENSUM_EPSILON), expected_intree_distance)); |
---|
235 | } |
---|
236 | |
---|
237 | return all().ofgroup(expected); |
---|
238 | } |
---|
239 | |
---|
240 | static arb_test::match_expectation build_expected_consensus_tree(const int treedir, const char *basename, int first_tree, int last_tree, double weight, const char *outbasename, size_t expected_species_count, double expected_intree_distance) { |
---|
241 | using namespace arb_test; |
---|
242 | expectation_group expected; |
---|
243 | arb_suppress_progress hideProgress; |
---|
244 | |
---|
245 | GB_ERROR error = NULp; |
---|
246 | StrArray input_tree_names; |
---|
247 | add_inputnames(input_tree_names, treedir, basename, first_tree, last_tree); |
---|
248 | |
---|
249 | size_t species_count; |
---|
250 | char *comment; |
---|
251 | TreeNode *tree = build_consensus_tree(input_tree_names, error, species_count, weight, comment); |
---|
252 | expected.add(consense_tree_generated(tree, error, species_count, expected_species_count, expected_intree_distance)); |
---|
253 | |
---|
254 | char *saveas = custom_tree_name(treedir, outbasename); |
---|
255 | error = save_tree_as_newick(tree, saveas, comment); |
---|
256 | expected.add(that(error).is_equal_to_NULL()); |
---|
257 | |
---|
258 | if (!error) { |
---|
259 | char *expected_save = custom_tree_name(treedir, GBS_global_string("%s_expected", outbasename)); |
---|
260 | bool exported_as_expected = arb_test::textfiles_have_difflines_ignoreDates(saveas, expected_save, 0); |
---|
261 | |
---|
262 | #if defined(TEST_AUTO_UPDATE) |
---|
263 | if (!exported_as_expected) { |
---|
264 | TEST_COPY_FILE(saveas, expected_save); |
---|
265 | } |
---|
266 | #else // !defined(TEST_AUTO_UPDATE) |
---|
267 | expected.add(that(exported_as_expected).is_equal_to(true)); |
---|
268 | #endif |
---|
269 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
---|
270 | free(expected_save); |
---|
271 | } |
---|
272 | |
---|
273 | free(saveas); |
---|
274 | free(comment); |
---|
275 | destroy(tree); |
---|
276 | |
---|
277 | return all().ofgroup(expected); |
---|
278 | } |
---|
279 | |
---|
280 | void TEST_consensus_tree_1() { |
---|
281 | TEST_EXPECTATION(build_expected_consensus_tree(1, "bootstrapped", 1, 5, 0.7, "consense1", 22, 0.925779)); |
---|
282 | // ../UNIT_TESTER/run/consense/1/consense1.tree |
---|
283 | } |
---|
284 | void TEST_consensus_tree_1_single() { |
---|
285 | TEST_EXPECTATION(build_expected_consensus_tree(1, "bootstrapped", 1, 1, 0.01, "consense1_single", 22, 0.924610)); |
---|
286 | // ../UNIT_TESTER/run/consense/1/consense1_single.tree |
---|
287 | } |
---|
288 | |
---|
289 | void TEST_consensus_tree_2() { |
---|
290 | TEST_EXPECTATION(build_expected_consensus_tree(2, "bootstrapped", 1, 4, 2.5, "consense2", 59, 2.849827)); |
---|
291 | // ../UNIT_TESTER/run/consense/2/consense2.tree |
---|
292 | } |
---|
293 | |
---|
294 | void TEST_consensus_tree_3() { |
---|
295 | TEST_EXPECTATION(build_expected_consensus_tree(3, "bootstrapped", 1, 3, 137.772, "consense3", 128, 2.170685)); |
---|
296 | // ../UNIT_TESTER/run/consense/3/consense3.tree |
---|
297 | } |
---|
298 | |
---|
299 | void TEST_consensus_tree_from_disjunct_trees() { |
---|
300 | TEST_EXPECTATION(build_expected_consensus_tree(4, "disjunct", 1, 2, 137.772, "disjunct_merged", 15, 2.034290)); |
---|
301 | // ../UNIT_TESTER/run/consense/4/disjunct_merged.tree |
---|
302 | } |
---|
303 | |
---|
304 | void TEST_consensus_tree_from_partly_overlapping_trees() { |
---|
305 | // tree_disjunct_3 contains 7 species |
---|
306 | // (3 from upper subtree (tree_disjunct_1) and 4 from lower subtree (tree_disjunct_2)) |
---|
307 | |
---|
308 | TEST_EXPECTATION(build_expected_consensus_tree(4, "disjunct", 1, 3, 137.772, "overlap_merged", 15, 2.596455)); |
---|
309 | // ../UNIT_TESTER/run/consense/4/overlap_merged.tree |
---|
310 | } |
---|
311 | |
---|
312 | void TEST_consensus_tree_from_minimal_overlapping_trees() { |
---|
313 | // tree_disjunct_0 only contains 2 species (1 from upper and 1 from lower subtree). |
---|
314 | TEST_EXPECTATION(build_expected_consensus_tree(4, "disjunct", 0, 2, 137.772, "overlap_mini_merged", 15, 2.750745)); |
---|
315 | // ../UNIT_TESTER/run/consense/4/overlap_mini_merged.tree |
---|
316 | } |
---|
317 | |
---|
318 | void TEST_consensus_tree_described_in_arbhelp() { |
---|
319 | // see ../HELP_SOURCE/source/consense_algo.hlp |
---|
320 | TEST_EXPECTATION(build_expected_consensus_tree(5, "help", 1, 2, 2.0, "help_merged", 6, 1.050000)); |
---|
321 | // ../UNIT_TESTER/run/consense/5/help_merged.tree |
---|
322 | } |
---|
323 | |
---|
324 | void TEST_consensus_tree_from_trees_overlapping_by_twothirds() { |
---|
325 | // These 3 trees where copied from an existing tree. |
---|
326 | // From each copy one third of all species has been removed |
---|
327 | // (removed sets were disjunct) |
---|
328 | TEST_EXPECTATION(build_expected_consensus_tree(6, "overlap_two_thirds", 1, 3, 19.2, "overlap_twothirds_merged", 15, 3.561680)); |
---|
329 | // ../UNIT_TESTER/run/consense/6/overlap_twothirds_merged.tree |
---|
330 | } |
---|
331 | |
---|
332 | void TEST_consensus_tree_from_mostly_overlapping_trees() { |
---|
333 | // the 3 trees were copied from tree_disjunct_source. |
---|
334 | // from each tree 2 (different) species were deleted. |
---|
335 | TEST_EXPECTATION(build_expected_consensus_tree(7, "disjunct_del2", 1, 3, 137.772, "overlap_mostly", 15, 1.820057)); |
---|
336 | // ../UNIT_TESTER/run/consense/7/overlap_mostly.tree |
---|
337 | } |
---|
338 | |
---|
339 | void TEST_consensus_tree_from_mostly_overlapping_trees_2() { |
---|
340 | // the 3 trees were copied from tree_disjunct1 |
---|
341 | // from each tree 1 (different) species was deleted. |
---|
342 | TEST_EXPECTATION(build_expected_consensus_tree(8, "overlap2", 1, 3, 137.772, "overlap2_mostly", 8, 0.529109)); |
---|
343 | // ../UNIT_TESTER/run/consense/8/overlap2_mostly.tree |
---|
344 | } |
---|
345 | TEST_PUBLISH(TEST_consensus_tree_from_mostly_overlapping_trees_2); |
---|
346 | |
---|
347 | |
---|
348 | #define REPEATED_TESTS |
---|
349 | |
---|
350 | #if defined(REPEATED_TESTS) |
---|
351 | void TEST_consensus_tree_generation_is_deterministic() { |
---|
352 | TEST_consensus_tree_described_in_arbhelp(); |
---|
353 | TEST_consensus_tree_from_minimal_overlapping_trees(); |
---|
354 | TEST_consensus_tree_from_partly_overlapping_trees(); |
---|
355 | TEST_consensus_tree_from_disjunct_trees(); |
---|
356 | TEST_consensus_tree_3(); |
---|
357 | TEST_consensus_tree_2(); |
---|
358 | TEST_consensus_tree_1_single(); |
---|
359 | TEST_consensus_tree_1(); |
---|
360 | } |
---|
361 | |
---|
362 | void TEST_arb_consensus_tree() { |
---|
363 | TEST_STDOUT_CONTAINS("(arb_consensus_tree -x || true)", "Unknown switch '-x'"); |
---|
364 | TEST_STDOUT_CONTAINS("(arb_consensus_tree -w sth || true)", "no input trees specified"); |
---|
365 | |
---|
366 | { |
---|
367 | char *saveas = custom_tree_name(1, "consense1"); |
---|
368 | char *expected = custom_tree_name(1, "consense1_expected"); |
---|
369 | |
---|
370 | TEST_OUTPUT_CONTAINS("arb_consensus_tree" |
---|
371 | " -w consense/1/consense1.tree" |
---|
372 | " consense/1/bootstrapped_1.tree" |
---|
373 | " consense/1/bootstrapped_2.tree" |
---|
374 | " consense/1/bootstrapped_3.tree" |
---|
375 | " consense/1/bootstrapped_4.tree" |
---|
376 | " consense/1/bootstrapped_5.tree", |
---|
377 | (const char *)NULp, |
---|
378 | "Created new database \"\""); |
---|
379 | |
---|
380 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(saveas, expected, 0); |
---|
381 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
---|
382 | |
---|
383 | free(expected); |
---|
384 | free(saveas); |
---|
385 | } |
---|
386 | |
---|
387 | { |
---|
388 | char *saveas = custom_tree_name(2, "consense2"); |
---|
389 | char *expected = custom_tree_name(2, "consense2_expected"); |
---|
390 | |
---|
391 | TEST_OUTPUT_CONTAINS("arb_consensus_tree" |
---|
392 | " -w consense/2/consense2.tree" |
---|
393 | " consense/2/bootstrapped_1.tree" |
---|
394 | " consense/2/bootstrapped_2.tree" |
---|
395 | " consense/2/bootstrapped_3.tree" |
---|
396 | " consense/2/bootstrapped_4.tree", |
---|
397 | (const char *)NULp, |
---|
398 | "Created new database \"\""); |
---|
399 | |
---|
400 | TEST_EXPECT_TEXTFILE_DIFFLINES_IGNORE_DATES(saveas, expected, 0); |
---|
401 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(saveas)); |
---|
402 | |
---|
403 | free(expected); |
---|
404 | free(saveas); |
---|
405 | } |
---|
406 | } |
---|
407 | #endif // REPEATED_TESTS |
---|
408 | |
---|
409 | // #define TREEIO_AUTO_UPDATE // uncomment to auto-update expected test-results |
---|
410 | // #define TREEIO_AUTO_UPDATE_IF_EXPORT_DIFFERS // uncomment to auto-update expected test-results |
---|
411 | // #define TREEIO_AUTO_UPDATE_IF_REEXPORT_DIFFERS // uncomment to auto-update expected test-results |
---|
412 | |
---|
413 | static const char *findFirstNameContaining(TreeNode *tree, const char *part) { |
---|
414 | const char *found = NULp; |
---|
415 | if (tree->name && strstr(tree->name, part)) { |
---|
416 | found = tree->name; |
---|
417 | } |
---|
418 | else if (!tree->is_leaf()) { |
---|
419 | found = findFirstNameContaining(tree->get_leftson(), part); |
---|
420 | if (!found) found = findFirstNameContaining(tree->get_rightson(), part); |
---|
421 | } |
---|
422 | return found; |
---|
423 | } |
---|
424 | |
---|
425 | void TEST_SLOW_treeIO_stable() { |
---|
426 | const char *dbname = "trees/bootstrap_groups.arb"; |
---|
427 | const char *treename = "tree_bootstrap_and_groups"; |
---|
428 | const char *savename = "bg"; |
---|
429 | |
---|
430 | GB_shell shell; |
---|
431 | GBDATA *gb_main = GB_open(dbname, "rw"); |
---|
432 | |
---|
433 | TEST_REJECT_NULL(gb_main); |
---|
434 | |
---|
435 | char *outfile = GBS_global_string_copy("trees/%s.tree", savename); |
---|
436 | |
---|
437 | for (int save_branchlengths = 0; save_branchlengths <= 1; ++save_branchlengths) { |
---|
438 | for (int save_bootstraps = 0; save_bootstraps <= 1; ++save_bootstraps) { |
---|
439 | for (int save_groupnames = 0; save_groupnames <= 1; ++save_groupnames) { |
---|
440 | bool quoting_occurs = save_bootstraps && save_groupnames; |
---|
441 | for (int pretty = 0; pretty <= 1; ++pretty) { |
---|
442 | |
---|
443 | for (int quoting = TREE_DISALLOW_QUOTES; quoting <= (quoting_occurs ? TREE_DOUBLE_QUOTES : TREE_DISALLOW_QUOTES); ++quoting) { |
---|
444 | TREE_node_quoting quoteMode = TREE_node_quoting(quoting); |
---|
445 | |
---|
446 | char *paramID = GBS_global_string_copy("%s_%s%s%s_%i", |
---|
447 | pretty ? "p" : "s", |
---|
448 | save_bootstraps ? "Bs" : "", |
---|
449 | save_groupnames ? "Grp" : "", |
---|
450 | save_branchlengths ? "Len" : "", |
---|
451 | quoteMode); |
---|
452 | |
---|
453 | TEST_ANNOTATE(GBS_global_string("for paramID='%s'", paramID)); |
---|
454 | |
---|
455 | { |
---|
456 | Node_ID_Labeler labels_use_IDs; |
---|
457 | GB_ERROR export_error = TREE_write_Newick(gb_main, treename, labels_use_IDs, save_branchlengths, save_bootstraps, save_groupnames, pretty, quoteMode, outfile); |
---|
458 | TEST_EXPECT_NULL(export_error); |
---|
459 | } |
---|
460 | |
---|
461 | char *expectedfile = GBS_global_string_copy("trees/%s_exp_%s.tree", savename, paramID); |
---|
462 | |
---|
463 | #if defined(TREEIO_AUTO_UPDATE) |
---|
464 | system(GBS_global_string("cp %s %s", outfile, expectedfile)); |
---|
465 | #else // !defined(TREEIO_AUTO_UPDATE) |
---|
466 | bool exported_as_expected = arb_test::textfiles_have_difflines_ignoreDates(expectedfile, outfile, 0); |
---|
467 | #if defined(TREEIO_AUTO_UPDATE_IF_EXPORT_DIFFERS) |
---|
468 | if (!exported_as_expected) { |
---|
469 | system(GBS_global_string("cp %s %s", outfile, expectedfile)); |
---|
470 | } |
---|
471 | #else // !defined(TREEIO_AUTO_UPDATE_IF_EXPORT_DIFFERS) |
---|
472 | TEST_EXPECT(exported_as_expected); |
---|
473 | #endif |
---|
474 | |
---|
475 | // reimport exported tree |
---|
476 | const char *reloaded_treename = "tree_reloaded"; |
---|
477 | { |
---|
478 | char *comment = NULp; |
---|
479 | TreeNode *tree = TREE_load(expectedfile, new SimpleRoot, &comment, true, NULp); |
---|
480 | GB_ERROR load_error = tree ? NULp : GB_await_error(); |
---|
481 | |
---|
482 | TEST_EXPECTATION(all().of(that(tree).does_differ_from_NULL(), |
---|
483 | that(load_error).is_equal_to_NULL())); |
---|
484 | // store tree in DB |
---|
485 | { |
---|
486 | GB_transaction ta(gb_main); |
---|
487 | GB_ERROR store_error = GBT_write_tree_with_remark(gb_main, reloaded_treename, tree, comment); |
---|
488 | TEST_EXPECT_NULL(store_error); |
---|
489 | } |
---|
490 | free(comment); |
---|
491 | |
---|
492 | if (save_groupnames) { |
---|
493 | const char *quotedGroup = findFirstNameContaining(tree, "quoted"); |
---|
494 | const char *underscoreGroup = findFirstNameContaining(tree, "bs100"); |
---|
495 | TEST_EXPECT_EQUAL(quotedGroup, "quoted"); |
---|
496 | TEST_EXPECT_EQUAL(underscoreGroup, "__bs100"); |
---|
497 | } |
---|
498 | const char *capsLeaf = findFirstNameContaining(tree, "Caps"); |
---|
499 | TEST_EXPECT_EQUAL(capsLeaf, "_MhuCaps"); |
---|
500 | |
---|
501 | destroy(tree); |
---|
502 | } |
---|
503 | |
---|
504 | // export again |
---|
505 | { |
---|
506 | Node_ID_Labeler labels_use_IDs; |
---|
507 | GB_ERROR reexport_error = TREE_write_Newick(gb_main, reloaded_treename, labels_use_IDs, save_branchlengths, save_bootstraps, save_groupnames, pretty, quoteMode, outfile); |
---|
508 | TEST_EXPECT_NULL(reexport_error); |
---|
509 | } |
---|
510 | |
---|
511 | // eliminate comments added by loading/saving |
---|
512 | char *outfile2 = GBS_global_string_copy("trees/%s2.tree", savename); |
---|
513 | { |
---|
514 | char *cmd = GBS_global_string_copy("cat %s" |
---|
515 | " | grep -v 'Loaded from trees/.*_exp_'" |
---|
516 | " | grep -v 'tree_reloaded saved to'" |
---|
517 | " > %s", outfile, outfile2); |
---|
518 | TEST_EXPECT_NO_ERROR(GBK_system(cmd)); |
---|
519 | free(cmd); |
---|
520 | } |
---|
521 | |
---|
522 | bool reexported_as_expected = arb_test::textfiles_have_difflines(expectedfile, outfile2, 0); |
---|
523 | |
---|
524 | #if defined(TREEIO_AUTO_UPDATE_IF_REEXPORT_DIFFERS) |
---|
525 | if (!reexported_as_expected) { |
---|
526 | system(GBS_global_string("cp %s %s", outfile2, expectedfile)); |
---|
527 | } |
---|
528 | #else // !defined(TREEIO_AUTO_UPDATE_IF_REEXPORT_DIFFERS) |
---|
529 | TEST_EXPECT(reexported_as_expected); |
---|
530 | #endif |
---|
531 | |
---|
532 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(outfile2)); |
---|
533 | free(outfile2); |
---|
534 | #endif |
---|
535 | free(expectedfile); |
---|
536 | free(paramID); |
---|
537 | } |
---|
538 | } |
---|
539 | } |
---|
540 | } |
---|
541 | } |
---|
542 | TEST_ANNOTATE(NULp); |
---|
543 | |
---|
544 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(outfile)); |
---|
545 | free(outfile); |
---|
546 | |
---|
547 | GB_close(gb_main); |
---|
548 | } |
---|
549 | |
---|
550 | void TEST_CONSENSUS_TREE_functionality() { |
---|
551 | // functionality wanted in TreeNode (for use in library CONSENSUS_TREE) |
---|
552 | |
---|
553 | char *comment = NULp; |
---|
554 | |
---|
555 | SizeAwareTree *tree = DOWNCAST(SizeAwareTree*, TREE_load("trees/bg_exp_p_GrpLen_0.tree", |
---|
556 | new SizeAwareRoot, |
---|
557 | &comment, false, NULp)); |
---|
558 | // -> ../UNIT_TESTER/run/trees/bg_exp_p_GrpLen_0.tree |
---|
559 | |
---|
560 | #define ORG_1111 "(AticSea6,(RblAerol,RblMesop))" |
---|
561 | #define TOP_1111 "((RblAerol,RblMesop),AticSea6)" |
---|
562 | #define BOT_1111 ORG_1111 |
---|
563 | |
---|
564 | #define ORG_11121 "((DnrShiba,RsbElon4),MmbAlkal)" |
---|
565 | #define TOP_11121 ORG_11121 |
---|
566 | #define BOT_11121 "(MmbAlkal,(DnrShiba,RsbElon4))" |
---|
567 | |
---|
568 | #define ORG_11122 "((MabPelag,MabSalin),PaoMaris)" |
---|
569 | #define TOP_11122 ORG_11122 |
---|
570 | #define BOT_11122 "(PaoMaris,(MabPelag,MabSalin))" |
---|
571 | |
---|
572 | #define ORG_1112 "(" ORG_11121 "," ORG_11122 ")" |
---|
573 | #define TOP_1112 "(" TOP_11121 "," TOP_11122 ")" |
---|
574 | #define BOT_1112 "(" BOT_11121 "," BOT_11122 ")" |
---|
575 | #define EDG_1112 "(" TOP_11121 "," BOT_11122 ")" |
---|
576 | |
---|
577 | #define ORG_111 "(" ORG_1111 "," ORG_1112 ")" |
---|
578 | #define TOP_111 "(" TOP_1112 "," TOP_1111 ")" |
---|
579 | #define BOT_111 "(" BOT_1111 "," BOT_1112 ")" |
---|
580 | #define EDG_111 "(" EDG_1112 "," BOT_1111 ")" |
---|
581 | |
---|
582 | #define ORG_112 "(OnlGran2,RsnAnta2)" |
---|
583 | #define TOP_112 ORG_112 |
---|
584 | #define BOT_112 ORG_112 |
---|
585 | |
---|
586 | #define ORG_11 "(" ORG_111 "," ORG_112 ")" |
---|
587 | #define TOP_11 "(" TOP_111 "," TOP_112 ")" |
---|
588 | #define BOT_11 "(" BOT_112 "," BOT_111 ")" |
---|
589 | #define EDG_11 "(" EDG_111 "," BOT_112 ")" |
---|
590 | |
---|
591 | #define ORG_12 "(_MhuCaps,ThtNivea)" |
---|
592 | #define TOP_12 "(ThtNivea,_MhuCaps)" |
---|
593 | #define BOT_12 TOP_12 |
---|
594 | |
---|
595 | #define ORG_1 "(" ORG_11 "," ORG_12 ")" |
---|
596 | #define TOP_1 "(" TOP_11 "," TOP_12 ")" |
---|
597 | #define BOT_1 "(" BOT_12 "," BOT_11 ")" |
---|
598 | #define EDG_1 "(" EDG_11 "," BOT_12 ")" |
---|
599 | |
---|
600 | #define ORG_2 "((LbnMarin,LbnzAlb4),LbnAlexa)" |
---|
601 | #define TOP_2 ORG_2 |
---|
602 | #define BOT_2 "(LbnAlexa,(LbnMarin,LbnzAlb4))" |
---|
603 | |
---|
604 | // test swap_sons |
---|
605 | TEST_EXPECT_VALID_TREE(tree); |
---|
606 | TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" ORG_1 "," ORG_2 ");"); |
---|
607 | tree->swap_sons(); |
---|
608 | TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" ORG_2 "," ORG_1 ");"); |
---|
609 | |
---|
610 | // test reorder_tree |
---|
611 | TEST_EXPECT_VALID_TREE(tree); |
---|
612 | TreeOrder order[] = { BIG_BRANCHES_TO_TOP, BIG_BRANCHES_TO_BOTTOM, BIG_BRANCHES_TO_EDGE }; |
---|
613 | |
---|
614 | for (size_t o1 = 0; o1<ARRAY_ELEMS(order); ++o1) { |
---|
615 | TreeOrder to_order = order[o1]; |
---|
616 | for (size_t o2 = 0; o2<ARRAY_ELEMS(order); ++o2) { |
---|
617 | TreeOrder from_order = order[o2]; |
---|
618 | |
---|
619 | for (int rotate = 0; rotate<=1; ++rotate) { |
---|
620 | tree->reorder_tree(from_order); |
---|
621 | if (rotate) tree->rotate_subtree(); |
---|
622 | tree->reorder_tree(to_order); |
---|
623 | |
---|
624 | switch (to_order) { |
---|
625 | case BIG_BRANCHES_TO_TOP: TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" TOP_1 "," TOP_2 ");"); break; |
---|
626 | case BIG_BRANCHES_TO_EDGE: TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" EDG_1 "," BOT_2 ");"); break; |
---|
627 | case BIG_BRANCHES_TO_BOTTOM: TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" BOT_2 "," BOT_1 ");"); break; |
---|
628 | default: TEST_REJECT(true); break; |
---|
629 | } |
---|
630 | |
---|
631 | } |
---|
632 | } |
---|
633 | } |
---|
634 | |
---|
635 | // test rotate_subtree |
---|
636 | TEST_EXPECT_VALID_TREE(tree); |
---|
637 | tree->reorder_tree(BIG_BRANCHES_TO_TOP); |
---|
638 | tree->rotate_subtree(); TEST_EXPECT_NEWICK(nSIMPLE, tree, "((LbnAlexa,(LbnzAlb4,LbnMarin)),((_MhuCaps,ThtNivea),((RsnAnta2,OnlGran2),((AticSea6,(RblMesop,RblAerol)),((PaoMaris,(MabSalin,MabPelag)),(MmbAlkal,(RsbElon4,DnrShiba)))))));"); |
---|
639 | tree->rotate_subtree(); TEST_EXPECT_NEWICK(nSIMPLE, tree, "(" TOP_1 "," TOP_2 ");"); |
---|
640 | |
---|
641 | |
---|
642 | // test set_root |
---|
643 | TEST_EXPECT_VALID_TREE(tree); |
---|
644 | TreeNode *AticSea6Grandpa = tree->findLeafNamed("AticSea6")->get_father()->get_father(); |
---|
645 | TEST_REJECT_NULL(AticSea6Grandpa); |
---|
646 | TEST_EXPECT_VALID_TREE(AticSea6Grandpa); |
---|
647 | |
---|
648 | AticSea6Grandpa->set_root(); |
---|
649 | TEST_EXPECT_NEWICK(nSIMPLE, tree, |
---|
650 | "((" ORG_1112 "," TOP_1111 ")," // AticSea6 is direct son of TOP_1111 |
---|
651 | "((" ORG_2 "," TOP_12 ")," ORG_112 "));"); |
---|
652 | |
---|
653 | // test auto-detection of "best" root |
---|
654 | TEST_EXPECT_VALID_TREE(tree); |
---|
655 | tree->get_tree_root()->find_innermost_edge().set_root(); |
---|
656 | TEST_EXPECT_NEWICK(nLENGTH, tree, |
---|
657 | "((((LbnMarin:0.019,LbnzAlb4:0.003):0.016,LbnAlexa:0.032):0.122,(ThtNivea:0.230,_MhuCaps:0.194):0.427):0.076," |
---|
658 | "(((((DnrShiba:0.076,RsbElon4:0.053):0.034,MmbAlkal:0.069):0.016,((MabPelag:0.001,MabSalin:0.009):0.095,PaoMaris:0.092):0.036):0.030,((RblAerol:0.085,RblMesop:0.042):0.238,AticSea6:0.111):0.018):0.036,(OnlGran2:0.057,RsnAnta2:0.060):0.021):0.076);"); |
---|
659 | |
---|
660 | TEST_EXPECT_VALID_TREE(tree); |
---|
661 | destroy(tree); |
---|
662 | free(comment); |
---|
663 | } |
---|
664 | |
---|
665 | #endif // UNIT_TESTS |
---|
666 | |
---|
667 | // -------------------------------------------------------------------------------- |
---|