| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : arb_export_newick.cxx // |
|---|
| 4 | // Purpose : used by the SILVA pipeline to export trees for // |
|---|
| 5 | // which the tree leafs are labeled by NDS and not // |
|---|
| 6 | // by the species ID (name) of the sequence. // |
|---|
| 7 | // // |
|---|
| 8 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 9 | // http://www.arb-home.de/ // |
|---|
| 10 | // // |
|---|
| 11 | // =============================================================== // |
|---|
| 12 | |
|---|
| 13 | #include <TreeWrite.h> |
|---|
| 14 | #include <TreeNode.h> |
|---|
| 15 | #include <arb_handlers.h> |
|---|
| 16 | #include <arb_global_defs.h> |
|---|
| 17 | #include <gb_aci.h> |
|---|
| 18 | #include <string> |
|---|
| 19 | #include <cstdlib> |
|---|
| 20 | |
|---|
| 21 | using namespace std; |
|---|
| 22 | |
|---|
| 23 | class CLI : virtual Noncopyable { |
|---|
| 24 | bool helpWanted; |
|---|
| 25 | GB_ERROR error; |
|---|
| 26 | |
|---|
| 27 | string database; // name of input database |
|---|
| 28 | string tree; // name of the tree to export |
|---|
| 29 | string newick_file; // name of the file the newick tree is exported to |
|---|
| 30 | string leaf_aci; // aci to generate the leaf names |
|---|
| 31 | LabelQuoting quoting_mode; // none, single or double. single and double will be forced |
|---|
| 32 | bool add_branch_length; // true -> branch lengths added to the newick tree |
|---|
| 33 | bool add_bootstraps; // true -> bootstrap values added to the newick tree |
|---|
| 34 | bool add_group_names; // true -> group names added to the newick tree |
|---|
| 35 | bool replace_problem_chars; // true -> problem chars are replaced in the newick tree |
|---|
| 36 | bool pretty; // true -> prettify the newick tree |
|---|
| 37 | |
|---|
| 38 | static inline const char *getarg(int& argc, const char**& argv) { |
|---|
| 39 | return argc>0 ? (--argc,*argv++) : NULp; |
|---|
| 40 | } |
|---|
| 41 | inline const char *expect_arg(int& argc, const char**& argv) { |
|---|
| 42 | const char *arg = getarg(argc, argv); |
|---|
| 43 | if (!arg) { |
|---|
| 44 | error = "expected argument missing"; |
|---|
| 45 | arg = ""; |
|---|
| 46 | } |
|---|
| 47 | return arg; |
|---|
| 48 | } |
|---|
| 49 | inline LabelQuoting parse_quoting_mode(int& argc, const char**& argv) { |
|---|
| 50 | const char *quoting_mode_str= expect_arg(argc, argv); |
|---|
| 51 | if (strcasecmp(quoting_mode_str, "none") == 0) { |
|---|
| 52 | return LABEL_DISALLOW_QUOTES; |
|---|
| 53 | } else if (strcasecmp(quoting_mode_str, "single") == 0) { |
|---|
| 54 | return LabelQuoting(LABEL_SINGLE_QUOTES | LABEL_FORCE_QUOTES); |
|---|
| 55 | } else if (strcasecmp(quoting_mode_str, "double") == 0) { |
|---|
| 56 | return LabelQuoting(LABEL_DOUBLE_QUOTES | LABEL_FORCE_QUOTES); |
|---|
| 57 | } else { |
|---|
| 58 | error = GBS_global_string("unknown quoting mode '%s'", quoting_mode_str); |
|---|
| 59 | return LABEL_DISALLOW_QUOTES; |
|---|
| 60 | } |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | void parse(int& argc, const char**& argv) { |
|---|
| 64 | const char *arg = getarg(argc, argv); |
|---|
| 65 | if (arg) { |
|---|
| 66 | if (strcmp(arg, "--db") == 0) database = expect_arg(argc, argv); |
|---|
| 67 | else if (strcmp(arg, "--tree") == 0) tree = expect_arg(argc, argv); |
|---|
| 68 | else if (strcmp(arg, "--newick-file") == 0) newick_file = expect_arg(argc, argv); |
|---|
| 69 | else if (strcmp(arg, "--leaf-aci") == 0) leaf_aci = expect_arg(argc, argv); |
|---|
| 70 | else if (strcmp(arg, "--quoting") == 0) quoting_mode = parse_quoting_mode(argc, argv); |
|---|
| 71 | else if (strcmp(arg, "--add-branch-lengths") == 0) add_branch_length = true; |
|---|
| 72 | else if (strcmp(arg, "--add-bootstraps") == 0) add_bootstraps = true; |
|---|
| 73 | else if (strcmp(arg, "--add-group-names") == 0) add_group_names = true; |
|---|
| 74 | else if (strcmp(arg, "--replace-problem-chars") == 0) replace_problem_chars = true; |
|---|
| 75 | else if (strcmp(arg, "--pretty") == 0) pretty = true; |
|---|
| 76 | else if (strcmp(arg, "--help") == 0) helpWanted = true; |
|---|
| 77 | else { |
|---|
| 78 | error = GBS_global_string("unexpected argument '%s'", arg); |
|---|
| 79 | } |
|---|
| 80 | } |
|---|
| 81 | } |
|---|
| 82 | void check_required_arguments() { |
|---|
| 83 | if (database.empty()) error = "no input database specified"; |
|---|
| 84 | else if (tree.empty()) error = "no tree name specified"; |
|---|
| 85 | else if (newick_file.empty()) error = "no output file specified"; |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | public: |
|---|
| 89 | CLI(int argc, const char **argv) : |
|---|
| 90 | helpWanted(false), |
|---|
| 91 | error(NULp), |
|---|
| 92 | leaf_aci("readdb(\"name\")"), |
|---|
| 93 | quoting_mode(LABEL_DISALLOW_QUOTES), |
|---|
| 94 | add_branch_length(false), |
|---|
| 95 | add_bootstraps(false), |
|---|
| 96 | add_group_names(false), |
|---|
| 97 | replace_problem_chars(false), |
|---|
| 98 | pretty(false) |
|---|
| 99 | |
|---|
| 100 | { |
|---|
| 101 | --argc; ++argv; |
|---|
| 102 | while (!error && argc>0 && !helpWanted) { |
|---|
| 103 | parse(argc, argv); |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | if (!helpWanted) { // do not check extended conditions, if '--help' seen |
|---|
| 107 | if (!error) { |
|---|
| 108 | check_required_arguments(); |
|---|
| 109 | if (error) helpWanted = true; |
|---|
| 110 | } |
|---|
| 111 | } |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | void show_help() const { |
|---|
| 115 | fputs("\n" |
|---|
| 116 | "arb_export_newick -- export a tree in newick format\n" |
|---|
| 117 | "Usage: arb_export_newick [switches]\n" |
|---|
| 118 | "\n" |
|---|
| 119 | "mandatory arguments:\n" |
|---|
| 120 | "--db <dbname> ARB database to export from\n" |
|---|
| 121 | "--tree <treename> name of the tree to export\n" |
|---|
| 122 | "--newick-file <outname> name of generated newick file\n" |
|---|
| 123 | "\n" |
|---|
| 124 | "switches:\n" |
|---|
| 125 | "--leaf-aci <aci> specify content for the leaf names using ACI\n" |
|---|
| 126 | " (default: \"readdb(name)\"; see http://help.arb-home.de/aci.html)\n" |
|---|
| 127 | "--quoting <mode> none, single, double. Single and double are forced.\n" |
|---|
| 128 | " (default: none)\n" |
|---|
| 129 | "--add-branch-lengths add the branch lengths to the newick file.\n" |
|---|
| 130 | " (default: branch lengths are omitted)\n" |
|---|
| 131 | "--add-bootstraps add the bootstrap values to the newick file.\n" |
|---|
| 132 | " (default: bootstrap values are omitted)\n" |
|---|
| 133 | "--add-group-names add the group names to the newick file.\n" |
|---|
| 134 | " (default: group names are omitted)\n" |
|---|
| 135 | "--replace-problem-chars problematic characters in names will be replaced\n" |
|---|
| 136 | " (default: no characters are replaced)\n" |
|---|
| 137 | "--pretty prettify the newick tree\n" |
|---|
| 138 | " (default: tree is not prettified)\n" |
|---|
| 139 | "--help show this help message\n" |
|---|
| 140 | "\n" |
|---|
| 141 | ,stderr); |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | bool help_wanted() const { return helpWanted; } |
|---|
| 145 | GB_ERROR get_error() const { return error; } |
|---|
| 146 | |
|---|
| 147 | const char *get_database() const { return database.c_str(); } |
|---|
| 148 | const char *get_tree() const { return tree.c_str(); } |
|---|
| 149 | const char *get_newick_file() const { return newick_file.c_str(); } |
|---|
| 150 | const char *get_leaf_aci() const { return leaf_aci.c_str(); } |
|---|
| 151 | LabelQuoting get_quoting_mode() const { return quoting_mode; } |
|---|
| 152 | |
|---|
| 153 | bool shall_add_branch_length() const { return add_branch_length; } |
|---|
| 154 | bool shall_add_bootstraps() const { return add_bootstraps; } |
|---|
| 155 | bool shall_add_group_names() const { return add_group_names; } |
|---|
| 156 | bool shall_replace_problem_chars() const { return replace_problem_chars; } |
|---|
| 157 | bool shall_be_pretty() const { return pretty; } |
|---|
| 158 | }; |
|---|
| 159 | |
|---|
| 160 | |
|---|
| 161 | class ACI_Labeler: public TreeLabeler { |
|---|
| 162 | SmartCharPtr leaf_aci; |
|---|
| 163 | |
|---|
| 164 | public: |
|---|
| 165 | explicit ACI_Labeler(const char *leaf_aci_) : leaf_aci(strdup(leaf_aci_)) {} |
|---|
| 166 | |
|---|
| 167 | const char *speciesLabel(GBDATA *gb_main, GBDATA *gb_species, TreeNode *, const char *tree_name) const OVERRIDE { |
|---|
| 168 | GBL_env env(gb_main, tree_name); |
|---|
| 169 | GBL_call_env callEnv(gb_species, env); |
|---|
| 170 | |
|---|
| 171 | char* node_text = GB_command_interpreter_in_env("", leaf_aci.content(), callEnv); |
|---|
| 172 | if (!node_text) { |
|---|
| 173 | GB_ERROR ndsError = GB_await_error(); |
|---|
| 174 | node_text = GBS_global_string_copy("<error: %s>", ndsError); |
|---|
| 175 | GB_export_error(ndsError); |
|---|
| 176 | } |
|---|
| 177 | |
|---|
| 178 | RETURN_LOCAL_ALLOC(node_text); |
|---|
| 179 | } |
|---|
| 180 | const char *groupLabel(GBDATA *, GBDATA *, TreeNode *innerNode, const char *) const { |
|---|
| 181 | // ACI is not used for group names |
|---|
| 182 | return innerNode->name; |
|---|
| 183 | } |
|---|
| 184 | }; |
|---|
| 185 | |
|---|
| 186 | static GB_ERROR export_newick(const CLI& args) { |
|---|
| 187 | |
|---|
| 188 | ARB_redirect_handlers_to(stderr, stderr); |
|---|
| 189 | GB_ERROR error = NULp; |
|---|
| 190 | |
|---|
| 191 | const char *dbname = args.get_database(); |
|---|
| 192 | GB_shell shell; |
|---|
| 193 | GBDATA *gb_main = GB_open(dbname, "r"); |
|---|
| 194 | |
|---|
| 195 | if (!gb_main) { |
|---|
| 196 | error = GB_await_error(); |
|---|
| 197 | } |
|---|
| 198 | else { |
|---|
| 199 | ACI_Labeler labeler(args.get_leaf_aci()); |
|---|
| 200 | |
|---|
| 201 | LabelQuoting quoting_mode = args.get_quoting_mode(); |
|---|
| 202 | if (args.shall_replace_problem_chars()) { |
|---|
| 203 | quoting_mode = LabelQuoting(quoting_mode|LABEL_FORCE_REPLACE); |
|---|
| 204 | } |
|---|
| 205 | |
|---|
| 206 | error = TREE_write_Newick(gb_main, |
|---|
| 207 | args.get_tree(), |
|---|
| 208 | labeler, |
|---|
| 209 | args.shall_add_branch_length(), |
|---|
| 210 | args.shall_add_bootstraps(), |
|---|
| 211 | args.shall_add_group_names(), |
|---|
| 212 | args.shall_be_pretty(), |
|---|
| 213 | quoting_mode, |
|---|
| 214 | args.get_newick_file()); |
|---|
| 215 | |
|---|
| 216 | // get possible NDS error, too |
|---|
| 217 | if (!error) error = GB_incur_error(); |
|---|
| 218 | GB_close(gb_main); |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | return error; |
|---|
| 222 | } |
|---|
| 223 | |
|---|
| 224 | int main(int argc, char **argv) { |
|---|
| 225 | |
|---|
| 226 | CLI args(argc, const_cast<const char**>(argv)); |
|---|
| 227 | GB_ERROR error = args.get_error(); |
|---|
| 228 | |
|---|
| 229 | if (!error && args.help_wanted()) { |
|---|
| 230 | args.show_help(); |
|---|
| 231 | return EXIT_FAILURE; |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | if (!error) error = export_newick(args); |
|---|
| 235 | |
|---|
| 236 | if (error) { |
|---|
| 237 | fprintf(stderr, "Error: %s\n", error); |
|---|
| 238 | return EXIT_FAILURE; |
|---|
| 239 | } |
|---|
| 240 | return EXIT_SUCCESS; |
|---|
| 241 | } |
|---|