| 1 | // ============================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : arb_read_tree.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ============================================================= // |
|---|
| 10 | |
|---|
| 11 | #include <TreeRead.h> |
|---|
| 12 | #include <TreeNode.h> |
|---|
| 13 | #include <arb_strbuf.h> |
|---|
| 14 | #include <arb_defs.h> |
|---|
| 15 | #include <ctime> |
|---|
| 16 | |
|---|
| 17 | |
|---|
| 18 | static void add_bootstrap(TreeNode *node, double hundred) { |
|---|
| 19 | // add_bootstrap interprets the length of the branches as bootstrap value |
|---|
| 20 | // (this is needed by Phylip DNAPARS/PROTPARS with bootstrapping) |
|---|
| 21 | // |
|---|
| 22 | // 'hundred' specifies which value represents 100% |
|---|
| 23 | |
|---|
| 24 | // covered by unittest in ./arb_test.cxx@TEST_SLOW_arb_read_tree |
|---|
| 25 | if (node->is_leaf()) { |
|---|
| 26 | node->remove_remark(); |
|---|
| 27 | return; |
|---|
| 28 | } |
|---|
| 29 | |
|---|
| 30 | node->leftlen /= hundred; |
|---|
| 31 | node->rightlen /= hundred; |
|---|
| 32 | |
|---|
| 33 | double left_bs = node->leftlen * 100.0; |
|---|
| 34 | double right_bs = node->rightlen * 100.0; |
|---|
| 35 | |
|---|
| 36 | #if defined(DEBUG) && 0 |
|---|
| 37 | fprintf(stderr, "node->leftlen = %f left_bs = %f\n", node->leftlen, left_bs); |
|---|
| 38 | fprintf(stderr, "node->rightlen = %f right_bs = %f\n", node->rightlen, right_bs); |
|---|
| 39 | #endif // DEBUG |
|---|
| 40 | |
|---|
| 41 | if (!node->leftson->is_leaf()) { |
|---|
| 42 | node->leftson ->set_bootstrap(left_bs); |
|---|
| 43 | node->leftlen = DEFAULT_BRANCH_LENGTH; // reset branchlengths |
|---|
| 44 | add_bootstrap(node->get_leftson(), hundred); |
|---|
| 45 | } |
|---|
| 46 | |
|---|
| 47 | if (!node->rightson->is_leaf()) { |
|---|
| 48 | node->rightson->set_bootstrap(right_bs); |
|---|
| 49 | node->rightlen = DEFAULT_BRANCH_LENGTH; // @@@ also do for leafs? (does not seem to affect test results; why?) |
|---|
| 50 | add_bootstrap(node->get_rightson(), hundred); |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | |
|---|
| 54 | static GBDATA *gb_msg_main = NULp; |
|---|
| 55 | |
|---|
| 56 | static void show_message(const char *msg) { |
|---|
| 57 | if (gb_msg_main) { |
|---|
| 58 | GBT_message(gb_msg_main, msg); |
|---|
| 59 | } |
|---|
| 60 | else { |
|---|
| 61 | fflush(stdout); |
|---|
| 62 | printf("arb_read_tree: %s\n", msg); |
|---|
| 63 | } |
|---|
| 64 | } |
|---|
| 65 | static void show_error(GB_ERROR error) { |
|---|
| 66 | if (error) show_message(GBS_global_string("Error running arb_read_tree (%s)", error)); |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | static void error_with_usage(GB_ERROR error) { |
|---|
| 70 | fputs("Usage: arb_read_tree [options] tree_name treefile [comment]\n" |
|---|
| 71 | "Available options:\n" |
|---|
| 72 | " -db database savename specify database and savename (default is 'running ARB')\n" |
|---|
| 73 | " -scale factor scale branchlengths by 'factor'\n" |
|---|
| 74 | " -consense numberOfTrees reinterpret branchlengths as consense values\n" |
|---|
| 75 | " -commentFromFile file read tree comment from 'file'\n" |
|---|
| 76 | , stdout); |
|---|
| 77 | |
|---|
| 78 | show_error(error); |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | struct parameters { |
|---|
| 82 | const char *dbname; |
|---|
| 83 | const char *dbsavename; |
|---|
| 84 | const char *tree_name; |
|---|
| 85 | const char *treefilename; |
|---|
| 86 | const char *comment; |
|---|
| 87 | const char *commentFile; |
|---|
| 88 | |
|---|
| 89 | bool scale; |
|---|
| 90 | double scale_factor; |
|---|
| 91 | |
|---|
| 92 | bool consense; |
|---|
| 93 | int calculated_trees; |
|---|
| 94 | |
|---|
| 95 | parameters() |
|---|
| 96 | : dbname(":"), |
|---|
| 97 | dbsavename(NULp), |
|---|
| 98 | tree_name(NULp), |
|---|
| 99 | treefilename(NULp), |
|---|
| 100 | comment(NULp), |
|---|
| 101 | commentFile(NULp), |
|---|
| 102 | scale(false), |
|---|
| 103 | scale_factor(0.0), |
|---|
| 104 | consense(false), |
|---|
| 105 | calculated_trees(0) |
|---|
| 106 | {} |
|---|
| 107 | |
|---|
| 108 | #define SHIFT_ARGS(off) do { argc -= off; argv += off; } while (0) |
|---|
| 109 | #define SHIFT_NONSWITCHES(off) do { nonSwitches -= off; nonSwitch += off; } while (0) |
|---|
| 110 | |
|---|
| 111 | GB_ERROR scan(int argc, char **argv) { |
|---|
| 112 | GB_ERROR error = NULp; |
|---|
| 113 | |
|---|
| 114 | const char *nonSwitch_buf[20]; |
|---|
| 115 | const char **nonSwitch = nonSwitch_buf; |
|---|
| 116 | int nonSwitches = 0; |
|---|
| 117 | |
|---|
| 118 | SHIFT_ARGS(1); // position onto first argument |
|---|
| 119 | |
|---|
| 120 | while (argc>0 && !error) { |
|---|
| 121 | if (strcmp("-scale", argv[0]) == 0) { |
|---|
| 122 | scale = true; |
|---|
| 123 | if (argc<2) error = "-scale expects a 2nd argument (scale factor)"; |
|---|
| 124 | else { |
|---|
| 125 | scale_factor = atof(argv[1]); |
|---|
| 126 | SHIFT_ARGS(2); |
|---|
| 127 | } |
|---|
| 128 | } |
|---|
| 129 | else if (strcmp("-consense", argv[0]) == 0) { |
|---|
| 130 | consense = true; |
|---|
| 131 | if (argc<2) error = "-consense expects a 2nd argument (number of trees)"; |
|---|
| 132 | else { |
|---|
| 133 | calculated_trees = atoi(argv[1]); |
|---|
| 134 | if (calculated_trees < 1) { |
|---|
| 135 | error = GBS_global_string("Illegal # of trees (%i) for -consense (minimum is 1)", calculated_trees); |
|---|
| 136 | } |
|---|
| 137 | else SHIFT_ARGS(2); |
|---|
| 138 | } |
|---|
| 139 | } |
|---|
| 140 | else if (strcmp("-commentFromFile", argv[0]) == 0) { |
|---|
| 141 | if (argc<2) error = "-commentFromFile expects a 2nd argument (file containing comment)"; |
|---|
| 142 | else { |
|---|
| 143 | commentFile = argv[1]; |
|---|
| 144 | SHIFT_ARGS(2); |
|---|
| 145 | } |
|---|
| 146 | } |
|---|
| 147 | else if (strcmp("-db", argv[0]) == 0) { |
|---|
| 148 | if (argc<3) error = "-db expects two arguments (database and savename)"; |
|---|
| 149 | else { |
|---|
| 150 | dbname = argv[1]; |
|---|
| 151 | dbsavename = argv[2]; |
|---|
| 152 | SHIFT_ARGS(3); |
|---|
| 153 | } |
|---|
| 154 | } |
|---|
| 155 | else { |
|---|
| 156 | nonSwitch[nonSwitches++] = argv[0]; |
|---|
| 157 | SHIFT_ARGS(1); |
|---|
| 158 | } |
|---|
| 159 | } |
|---|
| 160 | |
|---|
| 161 | if (!error) { |
|---|
| 162 | if (!nonSwitches) error = "Missing argument 'tree_name'"; |
|---|
| 163 | else { |
|---|
| 164 | tree_name = nonSwitch[0]; |
|---|
| 165 | SHIFT_NONSWITCHES(1); |
|---|
| 166 | } |
|---|
| 167 | } |
|---|
| 168 | if (!error) { |
|---|
| 169 | if (!nonSwitches) error = "Missing argument 'treefile'"; |
|---|
| 170 | else { |
|---|
| 171 | treefilename = nonSwitch[0]; |
|---|
| 172 | SHIFT_NONSWITCHES(1); |
|---|
| 173 | } |
|---|
| 174 | } |
|---|
| 175 | if (!error && nonSwitches>0) { |
|---|
| 176 | comment = nonSwitch[0]; |
|---|
| 177 | SHIFT_NONSWITCHES(1); |
|---|
| 178 | } |
|---|
| 179 | |
|---|
| 180 | if (!error && nonSwitches>0) { |
|---|
| 181 | error = GBS_global_string("unexpected argument(s): %s ..", nonSwitch[0]); |
|---|
| 182 | } |
|---|
| 183 | return error; |
|---|
| 184 | } |
|---|
| 185 | }; |
|---|
| 186 | |
|---|
| 187 | int main(int argc, char **argv) { |
|---|
| 188 | parameters param; |
|---|
| 189 | GB_ERROR error = param.scan(argc, argv); |
|---|
| 190 | |
|---|
| 191 | GBDATA *gb_main = NULp; |
|---|
| 192 | bool connectToArb = strcmp(param.dbname, ":") == 0; |
|---|
| 193 | GB_shell shell; |
|---|
| 194 | |
|---|
| 195 | if (!error || connectToArb) { |
|---|
| 196 | gb_main = GB_open(param.dbname, connectToArb ? "r" : "rw"); |
|---|
| 197 | if (connectToArb) gb_msg_main = gb_main; |
|---|
| 198 | } |
|---|
| 199 | |
|---|
| 200 | if (error) error_with_usage(error); |
|---|
| 201 | else { |
|---|
| 202 | if (!gb_main) { |
|---|
| 203 | if (connectToArb) error = "you have to start an arbdb server first"; |
|---|
| 204 | else error = GBS_global_string("can't open db (Reason: %s)", GB_await_error()); |
|---|
| 205 | } |
|---|
| 206 | |
|---|
| 207 | char *comment_from_file = NULp; |
|---|
| 208 | char *comment_from_treefile = NULp; |
|---|
| 209 | TreeNode *tree = NULp; |
|---|
| 210 | |
|---|
| 211 | if (!error) { |
|---|
| 212 | if (param.commentFile) { |
|---|
| 213 | comment_from_file = GB_read_file(param.commentFile); |
|---|
| 214 | if (!comment_from_file) { |
|---|
| 215 | comment_from_file = GBS_global_string_copy("Error reading from comment-file '%s':\n%s", param.commentFile, GB_await_error()); |
|---|
| 216 | } |
|---|
| 217 | } |
|---|
| 218 | |
|---|
| 219 | show_message(GBS_global_string("Reading tree from '%s' ..", param.treefilename)); |
|---|
| 220 | { |
|---|
| 221 | char *warnings = NULp; |
|---|
| 222 | bool allow_length_scaling = !param.consense && !param.scale; |
|---|
| 223 | |
|---|
| 224 | tree = TREE_load(param.treefilename, new SimpleRoot, &comment_from_treefile, allow_length_scaling, &warnings); |
|---|
| 225 | if (!tree) { |
|---|
| 226 | error = GB_await_error(); |
|---|
| 227 | } |
|---|
| 228 | else if (warnings) { |
|---|
| 229 | show_message(warnings); |
|---|
| 230 | free(warnings); |
|---|
| 231 | } |
|---|
| 232 | } |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | if (!error) { |
|---|
| 236 | if (param.scale) { |
|---|
| 237 | show_message(GBS_global_string("Scaling branch lengths by factor %f", param.scale_factor)); |
|---|
| 238 | TREE_scale(tree, param.scale_factor, 1.0); |
|---|
| 239 | } |
|---|
| 240 | |
|---|
| 241 | if (param.consense) { |
|---|
| 242 | if (param.calculated_trees < 1) { |
|---|
| 243 | error = "Minimum for -consense is 1"; |
|---|
| 244 | } |
|---|
| 245 | else { |
|---|
| 246 | show_message(GBS_global_string("Reinterpreting branch lengths as consense values (%i trees)", param.calculated_trees)); |
|---|
| 247 | add_bootstrap(tree, param.calculated_trees); |
|---|
| 248 | } |
|---|
| 249 | } |
|---|
| 250 | } |
|---|
| 251 | |
|---|
| 252 | if (!error) { |
|---|
| 253 | error = GB_begin_transaction(gb_main); |
|---|
| 254 | |
|---|
| 255 | if (!error && tree->is_leaf()) error = "Cannot load tree (need at least 2 leafs)"; |
|---|
| 256 | if (!error) error = GBT_write_tree(gb_main, param.tree_name, tree); |
|---|
| 257 | |
|---|
| 258 | if (!error) { |
|---|
| 259 | // write tree comment |
|---|
| 260 | const char *comments[] = { |
|---|
| 261 | param.comment, |
|---|
| 262 | comment_from_file, |
|---|
| 263 | comment_from_treefile, |
|---|
| 264 | }; |
|---|
| 265 | |
|---|
| 266 | GBS_strstruct tree_remark(5000); |
|---|
| 267 | bool empty = true; |
|---|
| 268 | |
|---|
| 269 | for (size_t c = 0; c<ARRAY_ELEMS(comments); c++) { |
|---|
| 270 | if (comments[c]) { |
|---|
| 271 | if (!empty) tree_remark.put('\n'); |
|---|
| 272 | tree_remark.cat(comments[c]); |
|---|
| 273 | empty = false; |
|---|
| 274 | } |
|---|
| 275 | } |
|---|
| 276 | |
|---|
| 277 | error = GBT_write_tree_remark(gb_main, param.tree_name, tree_remark.get_data()); |
|---|
| 278 | } |
|---|
| 279 | |
|---|
| 280 | error = GB_end_transaction(gb_main, error); |
|---|
| 281 | } |
|---|
| 282 | |
|---|
| 283 | if (error) show_error(error); |
|---|
| 284 | else show_message(GBS_global_string("Tree %s read into the database", param.tree_name)); |
|---|
| 285 | |
|---|
| 286 | UNCOVERED(); |
|---|
| 287 | destroy(tree); |
|---|
| 288 | free(comment_from_file); |
|---|
| 289 | free(comment_from_treefile); |
|---|
| 290 | } |
|---|
| 291 | |
|---|
| 292 | if (gb_main) { |
|---|
| 293 | if (!error && !connectToArb) { |
|---|
| 294 | error = GB_save_as(gb_main, param.dbsavename, "a"); |
|---|
| 295 | if (error) show_error(error); |
|---|
| 296 | } |
|---|
| 297 | GB_close(gb_main); |
|---|
| 298 | } |
|---|
| 299 | |
|---|
| 300 | return error ? EXIT_FAILURE : EXIT_SUCCESS; |
|---|
| 301 | } |
|---|