1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : arb_export_newick.cxx // |
---|
4 | // Purpose : used by the SILVA pipeline to export trees for // |
---|
5 | // which the tree leafs are labeled by NDS and not // |
---|
6 | // by the species ID (name) of the sequence. // |
---|
7 | // // |
---|
8 | // Institute of Microbiology (Technical University Munich) // |
---|
9 | // http://www.arb-home.de/ // |
---|
10 | // // |
---|
11 | // =============================================================== // |
---|
12 | |
---|
13 | #include <TreeWrite.h> |
---|
14 | #include <TreeNode.h> |
---|
15 | #include <arb_handlers.h> |
---|
16 | #include <arb_global_defs.h> |
---|
17 | #include <gb_aci.h> |
---|
18 | #include <string> |
---|
19 | #include <cstdlib> |
---|
20 | |
---|
21 | using namespace std; |
---|
22 | |
---|
23 | class CLI : virtual Noncopyable { |
---|
24 | bool helpWanted; |
---|
25 | GB_ERROR error; |
---|
26 | |
---|
27 | string database; // name of input database |
---|
28 | string tree; // name of the tree to export |
---|
29 | string newick_file; // name of the file the newick tree is exported to |
---|
30 | string leaf_aci; // aci to generate the leaf names |
---|
31 | LabelQuoting quoting_mode; // none, single or double. single and double will be forced |
---|
32 | bool add_branch_length; // true -> branch lengths added to the newick tree |
---|
33 | bool add_bootstraps; // true -> bootstrap values added to the newick tree |
---|
34 | bool add_group_names; // true -> group names added to the newick tree |
---|
35 | bool replace_problem_chars; // true -> problem chars are replaced in the newick tree |
---|
36 | bool pretty; // true -> prettify the newick tree |
---|
37 | |
---|
38 | static inline const char *getarg(int& argc, const char**& argv) { |
---|
39 | return argc>0 ? (--argc,*argv++) : NULp; |
---|
40 | } |
---|
41 | inline const char *expect_arg(int& argc, const char**& argv) { |
---|
42 | const char *arg = getarg(argc, argv); |
---|
43 | if (!arg) { |
---|
44 | error = "expected argument missing"; |
---|
45 | arg = ""; |
---|
46 | } |
---|
47 | return arg; |
---|
48 | } |
---|
49 | inline LabelQuoting parse_quoting_mode(int& argc, const char**& argv) { |
---|
50 | const char *quoting_mode_str= expect_arg(argc, argv); |
---|
51 | if (strcasecmp(quoting_mode_str, "none") == 0) { |
---|
52 | return LABEL_DISALLOW_QUOTES; |
---|
53 | } else if (strcasecmp(quoting_mode_str, "single") == 0) { |
---|
54 | return LabelQuoting(LABEL_SINGLE_QUOTES | LABEL_FORCE_QUOTES); |
---|
55 | } else if (strcasecmp(quoting_mode_str, "double") == 0) { |
---|
56 | return LabelQuoting(LABEL_DOUBLE_QUOTES | LABEL_FORCE_QUOTES); |
---|
57 | } else { |
---|
58 | error = GBS_global_string("unknown quoting mode '%s'", quoting_mode_str); |
---|
59 | return LABEL_DISALLOW_QUOTES; |
---|
60 | } |
---|
61 | } |
---|
62 | |
---|
63 | void parse(int& argc, const char**& argv) { |
---|
64 | const char *arg = getarg(argc, argv); |
---|
65 | if (arg) { |
---|
66 | if (strcmp(arg, "--db") == 0) database = expect_arg(argc, argv); |
---|
67 | else if (strcmp(arg, "--tree") == 0) tree = expect_arg(argc, argv); |
---|
68 | else if (strcmp(arg, "--newick-file") == 0) newick_file = expect_arg(argc, argv); |
---|
69 | else if (strcmp(arg, "--leaf-aci") == 0) leaf_aci = expect_arg(argc, argv); |
---|
70 | else if (strcmp(arg, "--quoting") == 0) quoting_mode = parse_quoting_mode(argc, argv); |
---|
71 | else if (strcmp(arg, "--add-branch-lengths") == 0) add_branch_length = true; |
---|
72 | else if (strcmp(arg, "--add-bootstraps") == 0) add_bootstraps = true; |
---|
73 | else if (strcmp(arg, "--add-group-names") == 0) add_group_names = true; |
---|
74 | else if (strcmp(arg, "--replace-problem-chars") == 0) replace_problem_chars = true; |
---|
75 | else if (strcmp(arg, "--pretty") == 0) pretty = true; |
---|
76 | else if (strcmp(arg, "--help") == 0) helpWanted = true; |
---|
77 | else { |
---|
78 | error = GBS_global_string("unexpected argument '%s'", arg); |
---|
79 | } |
---|
80 | } |
---|
81 | } |
---|
82 | void check_required_arguments() { |
---|
83 | if (database.empty()) error = "no input database specified"; |
---|
84 | else if (tree.empty()) error = "no tree name specified"; |
---|
85 | else if (newick_file.empty()) error = "no output file specified"; |
---|
86 | } |
---|
87 | |
---|
88 | public: |
---|
89 | CLI(int argc, const char **argv) : |
---|
90 | helpWanted(false), |
---|
91 | error(NULp), |
---|
92 | leaf_aci("readdb(\"name\")"), |
---|
93 | quoting_mode(LABEL_DISALLOW_QUOTES), |
---|
94 | add_branch_length(false), |
---|
95 | add_bootstraps(false), |
---|
96 | add_group_names(false), |
---|
97 | replace_problem_chars(false), |
---|
98 | pretty(false) |
---|
99 | |
---|
100 | { |
---|
101 | --argc; ++argv; |
---|
102 | while (!error && argc>0 && !helpWanted) { |
---|
103 | parse(argc, argv); |
---|
104 | } |
---|
105 | |
---|
106 | if (!helpWanted) { // do not check extended conditions, if '--help' seen |
---|
107 | if (!error) { |
---|
108 | check_required_arguments(); |
---|
109 | if (error) helpWanted = true; |
---|
110 | } |
---|
111 | } |
---|
112 | } |
---|
113 | |
---|
114 | void show_help() const { |
---|
115 | fputs("\n" |
---|
116 | "arb_export_newick -- export a tree in newick format\n" |
---|
117 | "Usage: arb_export_newick [switches]\n" |
---|
118 | "\n" |
---|
119 | "mandatory arguments:\n" |
---|
120 | "--db <dbname> ARB database to export from\n" |
---|
121 | "--tree <treename> name of the tree to export\n" |
---|
122 | "--newick-file <outname> name of generated newick file\n" |
---|
123 | "\n" |
---|
124 | "switches:\n" |
---|
125 | "--leaf-aci <aci> specify content for the leaf names using ACI\n" |
---|
126 | " (default: \"readdb(name)\"; see http://help.arb-home.de/aci.html)\n" |
---|
127 | "--quoting <mode> none, single, double. Single and double are forced.\n" |
---|
128 | " (default: none)\n" |
---|
129 | "--add-branch-lengths add the branch lengths to the newick file.\n" |
---|
130 | " (default: branch lengths are omitted)\n" |
---|
131 | "--add-bootstraps add the bootstrap values to the newick file.\n" |
---|
132 | " (default: bootstrap values are omitted)\n" |
---|
133 | "--add-group-names add the group names to the newick file.\n" |
---|
134 | " (default: group names are omitted)\n" |
---|
135 | "--replace-problem-chars problematic characters in names will be replaced\n" |
---|
136 | " (default: no characters are replaced)\n" |
---|
137 | "--pretty prettify the newick tree\n" |
---|
138 | " (default: tree is not prettified)\n" |
---|
139 | "--help show this help message\n" |
---|
140 | "\n" |
---|
141 | ,stderr); |
---|
142 | } |
---|
143 | |
---|
144 | bool help_wanted() const { return helpWanted; } |
---|
145 | GB_ERROR get_error() const { return error; } |
---|
146 | |
---|
147 | const char *get_database() const { return database.c_str(); } |
---|
148 | const char *get_tree() const { return tree.c_str(); } |
---|
149 | const char *get_newick_file() const { return newick_file.c_str(); } |
---|
150 | const char *get_leaf_aci() const { return leaf_aci.c_str(); } |
---|
151 | LabelQuoting get_quoting_mode() const { return quoting_mode; } |
---|
152 | |
---|
153 | bool shall_add_branch_length() const { return add_branch_length; } |
---|
154 | bool shall_add_bootstraps() const { return add_bootstraps; } |
---|
155 | bool shall_add_group_names() const { return add_group_names; } |
---|
156 | bool shall_replace_problem_chars() const { return replace_problem_chars; } |
---|
157 | bool shall_be_pretty() const { return pretty; } |
---|
158 | }; |
---|
159 | |
---|
160 | |
---|
161 | class ACI_Labeler: public TreeLabeler { |
---|
162 | SmartCharPtr leaf_aci; |
---|
163 | |
---|
164 | public: |
---|
165 | explicit ACI_Labeler(const char *leaf_aci_) : leaf_aci(strdup(leaf_aci_)) {} |
---|
166 | |
---|
167 | const char *speciesLabel(GBDATA *gb_main, GBDATA *gb_species, TreeNode *, const char *tree_name) const OVERRIDE { |
---|
168 | GBL_env env(gb_main, tree_name); |
---|
169 | GBL_call_env callEnv(gb_species, env); |
---|
170 | |
---|
171 | char* node_text = GB_command_interpreter_in_env("", leaf_aci.content(), callEnv); |
---|
172 | if (!node_text) { |
---|
173 | GB_ERROR ndsError = GB_await_error(); |
---|
174 | node_text = GBS_global_string_copy("<error: %s>", ndsError); |
---|
175 | GB_export_error(ndsError); |
---|
176 | } |
---|
177 | |
---|
178 | RETURN_LOCAL_ALLOC(node_text); |
---|
179 | } |
---|
180 | const char *groupLabel(GBDATA *, GBDATA *, TreeNode *innerNode, const char *) const { |
---|
181 | // ACI is not used for group names |
---|
182 | return innerNode->name; |
---|
183 | } |
---|
184 | }; |
---|
185 | |
---|
186 | static GB_ERROR export_newick(const CLI& args) { |
---|
187 | |
---|
188 | ARB_redirect_handlers_to(stderr, stderr); |
---|
189 | GB_ERROR error = NULp; |
---|
190 | |
---|
191 | const char *dbname = args.get_database(); |
---|
192 | GB_shell shell; |
---|
193 | GBDATA *gb_main = GB_open(dbname, "r"); |
---|
194 | |
---|
195 | if (!gb_main) { |
---|
196 | error = GB_await_error(); |
---|
197 | } |
---|
198 | else { |
---|
199 | ACI_Labeler labeler(args.get_leaf_aci()); |
---|
200 | |
---|
201 | LabelQuoting quoting_mode = args.get_quoting_mode(); |
---|
202 | if (args.shall_replace_problem_chars()) { |
---|
203 | quoting_mode = LabelQuoting(quoting_mode|LABEL_FORCE_REPLACE); |
---|
204 | } |
---|
205 | |
---|
206 | error = TREE_write_Newick(gb_main, |
---|
207 | args.get_tree(), |
---|
208 | labeler, |
---|
209 | args.shall_add_branch_length(), |
---|
210 | args.shall_add_bootstraps(), |
---|
211 | args.shall_add_group_names(), |
---|
212 | args.shall_be_pretty(), |
---|
213 | quoting_mode, |
---|
214 | args.get_newick_file()); |
---|
215 | |
---|
216 | // get possible NDS error, too |
---|
217 | if (!error) error = GB_incur_error(); |
---|
218 | GB_close(gb_main); |
---|
219 | } |
---|
220 | |
---|
221 | return error; |
---|
222 | } |
---|
223 | |
---|
224 | int main(int argc, char **argv) { |
---|
225 | |
---|
226 | CLI args(argc, const_cast<const char**>(argv)); |
---|
227 | GB_ERROR error = args.get_error(); |
---|
228 | |
---|
229 | if (!error && args.help_wanted()) { |
---|
230 | args.show_help(); |
---|
231 | return EXIT_FAILURE; |
---|
232 | } |
---|
233 | |
---|
234 | if (!error) error = export_newick(args); |
---|
235 | |
---|
236 | if (error) { |
---|
237 | fprintf(stderr, "Error: %s\n", error); |
---|
238 | return EXIT_FAILURE; |
---|
239 | } |
---|
240 | return EXIT_SUCCESS; |
---|
241 | } |
---|