1 | // =========================================================== // |
---|
2 | // // |
---|
3 | // File : NT_taxonomy.cxx // |
---|
4 | // Purpose : Compare two trees by taxonomy // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in May 2015 // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =========================================================== // |
---|
10 | |
---|
11 | #include "ad_trees.h" |
---|
12 | #include "NT_local.h" |
---|
13 | |
---|
14 | #include <aw_window.hxx> |
---|
15 | #include <aw_root.hxx> |
---|
16 | #include <aw_awar.hxx> |
---|
17 | #include <aw_msg.hxx> |
---|
18 | |
---|
19 | #include <awt_sel_boxes.hxx> |
---|
20 | #include <TreeCallbacks.hxx> |
---|
21 | #include <TreeAdmin.h> |
---|
22 | |
---|
23 | #include <arb_progress.h> |
---|
24 | #include <arb_global_defs.h> |
---|
25 | #include <item_sel_list.h> |
---|
26 | |
---|
27 | #include <set> |
---|
28 | #include <map> |
---|
29 | |
---|
30 | #define TREE_COMPARE_PREFIX "ad_tree/compare/" |
---|
31 | |
---|
32 | #define AWAR_TREE_COMPARE_ACTION TREE_COMPARE_PREFIX "action" |
---|
33 | #define AWAR_TREE_COMPARE_MIN_TAX_LEVELS TREE_COMPARE_PREFIX "taxdist" |
---|
34 | #define AWAR_TREE_COMPARE_WRITE_FIELD TREE_COMPARE_PREFIX "field" |
---|
35 | |
---|
36 | enum Action { // uses same values as NT_mark_all_cb; see ../SL/TREEDISP/TreeCallbacks.cxx@MARK_MODE_LOWER_BITS |
---|
37 | UNMARK = 0, |
---|
38 | MARK = 1, |
---|
39 | INVERT = 2, |
---|
40 | }; |
---|
41 | |
---|
42 | enum Target { |
---|
43 | ALL, |
---|
44 | TAX, |
---|
45 | COMMON, |
---|
46 | MISSING_LEFT, |
---|
47 | MISSING_RIGHT, |
---|
48 | }; |
---|
49 | |
---|
50 | static TreeNode *findParentGroup(TreeNode *node) { // @@@ DRY vs/using TreeNode::find_parent_with_groupInfo? |
---|
51 | TreeNode *parent_group = NULp; |
---|
52 | |
---|
53 | while (!parent_group && node->father) { |
---|
54 | node = node->father; |
---|
55 | if (node->is_normal_group()) { |
---|
56 | parent_group = node; |
---|
57 | } |
---|
58 | } |
---|
59 | |
---|
60 | return parent_group; |
---|
61 | } |
---|
62 | |
---|
63 | static int countTaxLevel(TreeNode *node) { // @@@ DRY vs TreeNode::calc_clade_level???? |
---|
64 | int taxlevel = node->is_leaf() ? 0 : node->is_normal_group(); |
---|
65 | TreeNode *parent_group = findParentGroup(node); |
---|
66 | while (parent_group) { |
---|
67 | ++taxlevel; |
---|
68 | parent_group = findParentGroup(parent_group); |
---|
69 | } |
---|
70 | return taxlevel; |
---|
71 | } |
---|
72 | |
---|
73 | static int calcTaxDifference(TreeNode *g1, TreeNode *g2) { |
---|
74 | // returns difference in taxonomy-levels |
---|
75 | // |
---|
76 | // difference defined such that: |
---|
77 | // diff("/A/B", "/A/B") == 0 |
---|
78 | // diff("/A/B", "/A" ) == 1 |
---|
79 | // diff("/A/B", "/A/C") == 2 |
---|
80 | // diff("/A/B/C", "/A/D/E") == 4 |
---|
81 | // diff("/A/B/C", "/A/D/C") == 4 |
---|
82 | // diff("/A/B/C", "/A/C") == 3 |
---|
83 | |
---|
84 | nt_assert(g1->is_normal_group() && g2->is_normal_group()); // has to be called with root-nodes of groups! |
---|
85 | |
---|
86 | TreeNode *p1 = findParentGroup(g1); |
---|
87 | TreeNode *p2 = findParentGroup(g2); |
---|
88 | |
---|
89 | int taxdiff = 0; |
---|
90 | if (p1) { |
---|
91 | if (p2) { |
---|
92 | int pdiff = calcTaxDifference(p1, p2); |
---|
93 | int p1diff = calcTaxDifference(p1, g2) + 1; |
---|
94 | int p2diff = calcTaxDifference(g1, p2) + 1; |
---|
95 | |
---|
96 | if (pdiff || strcmp(g1->name, g2->name) != 0) { |
---|
97 | // if parent-taxonomy differs -> ignore names of g1/g2 -> diff=2 |
---|
98 | // if parent-taxonomy matches -> if names of g1 and g2 match -> no diff |
---|
99 | // if names of g1 and g2 differ -> diff=2 |
---|
100 | pdiff += 2; |
---|
101 | } |
---|
102 | |
---|
103 | taxdiff = std::min(pdiff, std::min(p1diff, p2diff)); |
---|
104 | } |
---|
105 | else { |
---|
106 | taxdiff = calcTaxDifference(p1, g2) + 1; |
---|
107 | } |
---|
108 | } |
---|
109 | else { |
---|
110 | if (p2) { |
---|
111 | taxdiff = calcTaxDifference(g1, p2) + 1; |
---|
112 | } |
---|
113 | else { |
---|
114 | if (strcmp(g1->name, g2->name) != 0) { // logic similar to (p1 && p2) above |
---|
115 | taxdiff = 2; |
---|
116 | } |
---|
117 | } |
---|
118 | } |
---|
119 | |
---|
120 | return taxdiff; |
---|
121 | } |
---|
122 | |
---|
123 | class SpeciesInTwoTrees { |
---|
124 | TreeNode *tree1; |
---|
125 | TreeNode *tree2; |
---|
126 | |
---|
127 | public: |
---|
128 | SpeciesInTwoTrees() |
---|
129 | : tree1(NULp), |
---|
130 | tree2(NULp) |
---|
131 | {} |
---|
132 | |
---|
133 | void setSpecies(TreeNode *species, bool first) { |
---|
134 | nt_assert(species->is_leaf()); |
---|
135 | if (first) { |
---|
136 | nt_assert(!tree1); |
---|
137 | tree1 = species; |
---|
138 | } |
---|
139 | else { |
---|
140 | nt_assert(!tree2); |
---|
141 | tree2 = species; |
---|
142 | } |
---|
143 | } |
---|
144 | |
---|
145 | bool occursInBothTrees() const { return tree1 && tree2; } |
---|
146 | int calcTaxDiffLevel() const { |
---|
147 | TreeNode *parent_group1 = findParentGroup(tree1); |
---|
148 | TreeNode *parent_group2 = findParentGroup(tree2); |
---|
149 | |
---|
150 | int taxdiff = 0; |
---|
151 | |
---|
152 | if (parent_group1) { |
---|
153 | if (parent_group2) { |
---|
154 | taxdiff = calcTaxDifference(parent_group1, parent_group2); |
---|
155 | } |
---|
156 | else { |
---|
157 | taxdiff = countTaxLevel(parent_group1); |
---|
158 | } |
---|
159 | } |
---|
160 | else { |
---|
161 | if (parent_group2) { |
---|
162 | taxdiff = countTaxLevel(parent_group2); |
---|
163 | } |
---|
164 | // else -> both outside any group -> no diff |
---|
165 | } |
---|
166 | |
---|
167 | return taxdiff; |
---|
168 | } |
---|
169 | }; |
---|
170 | |
---|
171 | typedef std::set<const char *, charpLess> NameSet; |
---|
172 | typedef std::map<const char *, SpeciesInTwoTrees, charpLess> TwoTreeMap; |
---|
173 | |
---|
174 | static void mapTree(TreeNode *node, TwoTreeMap& tmap, bool first) { |
---|
175 | if (node->is_leaf()) { |
---|
176 | nt_assert(node->name); |
---|
177 | tmap[node->name].setSpecies(node, first); |
---|
178 | } |
---|
179 | else { |
---|
180 | mapTree(node->leftson, tmap, first); |
---|
181 | mapTree(node->rightson, tmap, first); |
---|
182 | } |
---|
183 | } |
---|
184 | |
---|
185 | static void mark_action(AW_window *aws, TREE_canvas *ntw, Target target) { |
---|
186 | AW_root *aw_root = aws->get_root(); |
---|
187 | |
---|
188 | Action action = Action(aw_root->awar(AWAR_TREE_COMPARE_ACTION)->read_int()); |
---|
189 | |
---|
190 | arb_progress progress("Mark species"); |
---|
191 | if (target == ALL) { |
---|
192 | NT_mark_all_cb(NULp, ntw, action); |
---|
193 | } |
---|
194 | else { |
---|
195 | progress.subtitle("Loading trees"); |
---|
196 | |
---|
197 | const char *treename_left = TreeAdmin::source_tree_awar(aw_root)->read_char_pntr(); |
---|
198 | const char *treename_right = TreeAdmin::dest_tree_awar(aw_root)->read_char_pntr(); |
---|
199 | |
---|
200 | GBDATA *gb_main = ntw->gb_main; |
---|
201 | GB_transaction ta(gb_main); |
---|
202 | |
---|
203 | GBDATA *gb_species_data = GBT_get_species_data(gb_main); |
---|
204 | |
---|
205 | TreeNode *tree_left = GBT_read_tree(gb_main, treename_left, new SimpleRoot); |
---|
206 | TreeNode *tree_right = NULp; |
---|
207 | |
---|
208 | GB_ERROR load_error = NULp; |
---|
209 | if (!tree_left) { |
---|
210 | load_error = GB_await_error(); |
---|
211 | } |
---|
212 | else { |
---|
213 | tree_right = GBT_read_tree(gb_main, treename_right, new SimpleRoot); |
---|
214 | if (!tree_right) load_error = GB_await_error(); |
---|
215 | } |
---|
216 | |
---|
217 | size_t missing = 0; |
---|
218 | size_t targetted = 0; |
---|
219 | bool had_error = false; |
---|
220 | |
---|
221 | if (load_error) { |
---|
222 | aw_message(load_error); |
---|
223 | had_error = load_error; |
---|
224 | } |
---|
225 | else { |
---|
226 | nt_assert(tree_left && tree_right); |
---|
227 | if (target == TAX) { |
---|
228 | int min_tax_levels = atoi(aw_root->awar(AWAR_TREE_COMPARE_MIN_TAX_LEVELS)->read_char_pntr()); |
---|
229 | |
---|
230 | TwoTreeMap tmap; |
---|
231 | mapTree(tree_left, tmap, true); |
---|
232 | mapTree(tree_right, tmap, false); |
---|
233 | |
---|
234 | size_t commonSpeciesCount = 0; |
---|
235 | for (TwoTreeMap::iterator s = tmap.begin(); s != tmap.end(); ++s) { |
---|
236 | if (s->second.occursInBothTrees()) commonSpeciesCount++; |
---|
237 | } |
---|
238 | |
---|
239 | const char *fieldName = prepare_and_get_selected_itemfield(aw_root, AWAR_TREE_COMPARE_WRITE_FIELD, gb_main, SPECIES_get_selector(), FIF_ALLOW_NONE); |
---|
240 | bool writeToField = fieldName; |
---|
241 | GB_ERROR error = GB_incur_error(); |
---|
242 | |
---|
243 | if (!error) { |
---|
244 | arb_progress subprogress("Comparing taxonomy info", commonSpeciesCount); |
---|
245 | for (TwoTreeMap::iterator s = tmap.begin(); s != tmap.end() && !error; ++s) { |
---|
246 | const SpeciesInTwoTrees& species = s->second; |
---|
247 | |
---|
248 | if (species.occursInBothTrees()) { |
---|
249 | int taxDiffLevel = species.calcTaxDiffLevel(); |
---|
250 | if (taxDiffLevel>min_tax_levels) { |
---|
251 | ++targetted; |
---|
252 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, s->first); |
---|
253 | if (!gb_species) { |
---|
254 | ++missing; |
---|
255 | } |
---|
256 | else { |
---|
257 | switch (action) { |
---|
258 | case UNMARK: GB_write_flag(gb_species, 0); break; |
---|
259 | case MARK: GB_write_flag(gb_species, 1); break; |
---|
260 | case INVERT: GB_write_flag(gb_species, !GB_read_flag(gb_species)); break; |
---|
261 | } |
---|
262 | if (writeToField) { |
---|
263 | GBDATA *gb_field = GBT_searchOrCreate_itemfield_according_to_changekey(gb_species, fieldName, SPECIES_get_selector().change_key_path); |
---|
264 | if (!gb_field) error = GB_await_error(); |
---|
265 | if (!error) error = GB_write_lossless_int(gb_field, taxDiffLevel); |
---|
266 | } |
---|
267 | } |
---|
268 | } |
---|
269 | subprogress.inc_and_check_user_abort(error); |
---|
270 | } |
---|
271 | } |
---|
272 | } |
---|
273 | aw_message_if(error); |
---|
274 | had_error = error; |
---|
275 | } |
---|
276 | else { |
---|
277 | progress.subtitle("Intersecting tree members"); |
---|
278 | |
---|
279 | NameSet in_left; |
---|
280 | NameSet in_right; |
---|
281 | { |
---|
282 | size_t count_left, count_right; |
---|
283 | GB_CSTR *names_left = GBT_get_names_of_species_in_tree(tree_left, &count_left); |
---|
284 | GB_CSTR *names_right = GBT_get_names_of_species_in_tree(tree_right, &count_right); |
---|
285 | |
---|
286 | for(size_t i= 0; i<count_left; ++i) in_left .insert(names_left[i]); |
---|
287 | for(size_t i= 0; i<count_right; ++i) in_right.insert(names_right[i]); |
---|
288 | |
---|
289 | free(names_right); |
---|
290 | free(names_left); |
---|
291 | } |
---|
292 | |
---|
293 | { |
---|
294 | NameSet& in_one = target == MISSING_LEFT ? in_right : in_left; |
---|
295 | NameSet& in_other = target == MISSING_LEFT ? in_left : in_right; |
---|
296 | |
---|
297 | for (NameSet::const_iterator i = in_one.begin(); i != in_one.end(); ++i) { |
---|
298 | bool is_in_other = in_other.find(*i) != in_other.end(); |
---|
299 | bool is_target = is_in_other == (target == COMMON); |
---|
300 | |
---|
301 | if (is_target) { |
---|
302 | ++targetted; |
---|
303 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, *i); |
---|
304 | if (!gb_species) { |
---|
305 | ++missing; |
---|
306 | } |
---|
307 | else { |
---|
308 | switch (action) { |
---|
309 | case UNMARK: GB_write_flag(gb_species, 0); break; |
---|
310 | case MARK: GB_write_flag(gb_species, 1); break; |
---|
311 | case INVERT: GB_write_flag(gb_species, !GB_read_flag(gb_species)); break; |
---|
312 | } |
---|
313 | } |
---|
314 | } |
---|
315 | } |
---|
316 | } |
---|
317 | } |
---|
318 | } |
---|
319 | |
---|
320 | if (!had_error) { |
---|
321 | if (!targetted) { |
---|
322 | aw_message("Warning: no species targetted"); |
---|
323 | } |
---|
324 | else if (missing) { |
---|
325 | aw_message(GBS_global_string("Warning: %zu targeted species could not be found\n" |
---|
326 | "(might be caused by zombies in your trees)", missing)); |
---|
327 | } |
---|
328 | } |
---|
329 | |
---|
330 | destroy(tree_right); |
---|
331 | destroy(tree_left); |
---|
332 | } |
---|
333 | } |
---|
334 | |
---|
335 | void NT_create_compare_taxonomy_awars(AW_root *aw_root, AW_default props) { |
---|
336 | char *currTree = aw_root->awar(AWAR_TREE_NAME)->read_string(); |
---|
337 | |
---|
338 | aw_root->awar_int (AWAR_TREE_COMPARE_ACTION, MARK, props); |
---|
339 | aw_root->awar_string(AWAR_TREE_COMPARE_MIN_TAX_LEVELS, "0", props); |
---|
340 | aw_root->awar_string(AWAR_TREE_COMPARE_WRITE_FIELD, NO_FIELD_SELECTED, props); |
---|
341 | |
---|
342 | free(currTree); |
---|
343 | } |
---|
344 | |
---|
345 | AW_window *NT_create_compare_taxonomy_window(AW_root *aw_root, TREE_canvas *ntw) { |
---|
346 | AW_window_simple *aws = new AW_window_simple; |
---|
347 | aws->init(aw_root, "COMPARE_TAXONOMY", "Compare taxonomy"); |
---|
348 | aws->load_xfig("compare_taxonomy.fig"); |
---|
349 | |
---|
350 | aws->at("close"); |
---|
351 | aws->callback(AW_POPDOWN); |
---|
352 | aws->create_button("CLOSE", "CLOSE", "C"); |
---|
353 | |
---|
354 | aws->at("help"); |
---|
355 | aws->callback(makeHelpCallback("compare_taxonomy.hlp")); |
---|
356 | aws->create_button("HELP", "HELP", "H"); |
---|
357 | |
---|
358 | aws->at("action"); |
---|
359 | aws->create_toggle_field(AWAR_TREE_COMPARE_ACTION, NULp, ""); |
---|
360 | aws->insert_default_toggle("mark", "m", MARK); |
---|
361 | aws->insert_toggle ("unmark", "u", UNMARK); |
---|
362 | aws->insert_toggle ("invert", "i", INVERT); |
---|
363 | aws->update_toggle_field(); |
---|
364 | |
---|
365 | aws->at("all"); aws->callback(makeWindowCallback(mark_action, ntw, ALL)); aws->create_autosize_button("all", "all species"); |
---|
366 | aws->at("tax"); aws->callback(makeWindowCallback(mark_action, ntw, TAX)); aws->create_autosize_button("tax", "species with taxonomy changed"); |
---|
367 | aws->at("common"); aws->callback(makeWindowCallback(mark_action, ntw, COMMON)); aws->create_autosize_button("common", "common species"); |
---|
368 | aws->at("missleft"); aws->callback(makeWindowCallback(mark_action, ntw, MISSING_LEFT)); aws->create_autosize_button("missleft", "species missing in left tree"); |
---|
369 | aws->at("missright"); aws->callback(makeWindowCallback(mark_action, ntw, MISSING_RIGHT)); aws->create_autosize_button("missright", "species missing in right tree"); |
---|
370 | |
---|
371 | aws->at("levels"); |
---|
372 | aws->create_input_field(AWAR_TREE_COMPARE_MIN_TAX_LEVELS, 5); |
---|
373 | |
---|
374 | create_itemfield_selection_button(aws, FieldSelDef(AWAR_TREE_COMPARE_WRITE_FIELD, ntw->gb_main, SPECIES_get_selector(), FIELD_FILTER_INT_WRITEABLE, "taxdiff-field", SF_ALLOW_NEW), "field"); |
---|
375 | |
---|
376 | NT_create_twoTreeSelection(aws); |
---|
377 | |
---|
378 | return aws; |
---|
379 | } |
---|
380 | |
---|
381 | |
---|