| 1 | // =========================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : NT_taxonomy.cxx // |
|---|
| 4 | // Purpose : Compare two trees by taxonomy // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in May 2015 // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =========================================================== // |
|---|
| 10 | |
|---|
| 11 | #include "ad_trees.h" |
|---|
| 12 | #include "NT_local.h" |
|---|
| 13 | |
|---|
| 14 | #include <aw_window.hxx> |
|---|
| 15 | #include <aw_root.hxx> |
|---|
| 16 | #include <aw_awar.hxx> |
|---|
| 17 | #include <aw_msg.hxx> |
|---|
| 18 | |
|---|
| 19 | #include <awt_sel_boxes.hxx> |
|---|
| 20 | #include <TreeCallbacks.hxx> |
|---|
| 21 | #include <TreeAdmin.h> |
|---|
| 22 | |
|---|
| 23 | #include <arb_progress.h> |
|---|
| 24 | #include <arb_global_defs.h> |
|---|
| 25 | #include <item_sel_list.h> |
|---|
| 26 | |
|---|
| 27 | #include <set> |
|---|
| 28 | #include <map> |
|---|
| 29 | |
|---|
| 30 | #define TREE_COMPARE_PREFIX "ad_tree/compare/" |
|---|
| 31 | |
|---|
| 32 | #define AWAR_TREE_COMPARE_ACTION TREE_COMPARE_PREFIX "action" |
|---|
| 33 | #define AWAR_TREE_COMPARE_MIN_TAX_LEVELS TREE_COMPARE_PREFIX "taxdist" |
|---|
| 34 | #define AWAR_TREE_COMPARE_WRITE_FIELD TREE_COMPARE_PREFIX "field" |
|---|
| 35 | |
|---|
| 36 | enum Action { // uses same values as NT_mark_all_cb; see ../SL/TREEDISP/TreeCallbacks.cxx@MARK_MODE_LOWER_BITS |
|---|
| 37 | UNMARK = 0, |
|---|
| 38 | MARK = 1, |
|---|
| 39 | INVERT = 2, |
|---|
| 40 | }; |
|---|
| 41 | |
|---|
| 42 | enum Target { |
|---|
| 43 | ALL, |
|---|
| 44 | TAX, |
|---|
| 45 | COMMON, |
|---|
| 46 | MISSING_LEFT, |
|---|
| 47 | MISSING_RIGHT, |
|---|
| 48 | }; |
|---|
| 49 | |
|---|
| 50 | static TreeNode *findParentGroup(TreeNode *node) { // @@@ DRY vs/using TreeNode::find_parent_with_groupInfo? |
|---|
| 51 | TreeNode *parent_group = NULp; |
|---|
| 52 | |
|---|
| 53 | while (!parent_group && node->father) { |
|---|
| 54 | node = node->father; |
|---|
| 55 | if (node->is_normal_group()) { |
|---|
| 56 | parent_group = node; |
|---|
| 57 | } |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | return parent_group; |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | static int countTaxLevel(TreeNode *node) { // @@@ DRY vs TreeNode::calc_clade_level???? |
|---|
| 64 | int taxlevel = node->is_leaf() ? 0 : node->is_normal_group(); |
|---|
| 65 | TreeNode *parent_group = findParentGroup(node); |
|---|
| 66 | while (parent_group) { |
|---|
| 67 | ++taxlevel; |
|---|
| 68 | parent_group = findParentGroup(parent_group); |
|---|
| 69 | } |
|---|
| 70 | return taxlevel; |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | static int calcTaxDifference(TreeNode *g1, TreeNode *g2) { |
|---|
| 74 | // returns difference in taxonomy-levels |
|---|
| 75 | // |
|---|
| 76 | // difference defined such that: |
|---|
| 77 | // diff("/A/B", "/A/B") == 0 |
|---|
| 78 | // diff("/A/B", "/A" ) == 1 |
|---|
| 79 | // diff("/A/B", "/A/C") == 2 |
|---|
| 80 | // diff("/A/B/C", "/A/D/E") == 4 |
|---|
| 81 | // diff("/A/B/C", "/A/D/C") == 4 |
|---|
| 82 | // diff("/A/B/C", "/A/C") == 3 |
|---|
| 83 | |
|---|
| 84 | nt_assert(g1->is_normal_group() && g2->is_normal_group()); // has to be called with root-nodes of groups! |
|---|
| 85 | |
|---|
| 86 | TreeNode *p1 = findParentGroup(g1); |
|---|
| 87 | TreeNode *p2 = findParentGroup(g2); |
|---|
| 88 | |
|---|
| 89 | int taxdiff = 0; |
|---|
| 90 | if (p1) { |
|---|
| 91 | if (p2) { |
|---|
| 92 | int pdiff = calcTaxDifference(p1, p2); |
|---|
| 93 | int p1diff = calcTaxDifference(p1, g2) + 1; |
|---|
| 94 | int p2diff = calcTaxDifference(g1, p2) + 1; |
|---|
| 95 | |
|---|
| 96 | if (pdiff || strcmp(g1->name, g2->name) != 0) { |
|---|
| 97 | // if parent-taxonomy differs -> ignore names of g1/g2 -> diff=2 |
|---|
| 98 | // if parent-taxonomy matches -> if names of g1 and g2 match -> no diff |
|---|
| 99 | // if names of g1 and g2 differ -> diff=2 |
|---|
| 100 | pdiff += 2; |
|---|
| 101 | } |
|---|
| 102 | |
|---|
| 103 | taxdiff = std::min(pdiff, std::min(p1diff, p2diff)); |
|---|
| 104 | } |
|---|
| 105 | else { |
|---|
| 106 | taxdiff = calcTaxDifference(p1, g2) + 1; |
|---|
| 107 | } |
|---|
| 108 | } |
|---|
| 109 | else { |
|---|
| 110 | if (p2) { |
|---|
| 111 | taxdiff = calcTaxDifference(g1, p2) + 1; |
|---|
| 112 | } |
|---|
| 113 | else { |
|---|
| 114 | if (strcmp(g1->name, g2->name) != 0) { // logic similar to (p1 && p2) above |
|---|
| 115 | taxdiff = 2; |
|---|
| 116 | } |
|---|
| 117 | } |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | return taxdiff; |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | class SpeciesInTwoTrees { |
|---|
| 124 | TreeNode *tree1; |
|---|
| 125 | TreeNode *tree2; |
|---|
| 126 | |
|---|
| 127 | public: |
|---|
| 128 | SpeciesInTwoTrees() |
|---|
| 129 | : tree1(NULp), |
|---|
| 130 | tree2(NULp) |
|---|
| 131 | {} |
|---|
| 132 | |
|---|
| 133 | void setSpecies(TreeNode *species, bool first) { |
|---|
| 134 | nt_assert(species->is_leaf()); |
|---|
| 135 | if (first) { |
|---|
| 136 | nt_assert(!tree1); |
|---|
| 137 | tree1 = species; |
|---|
| 138 | } |
|---|
| 139 | else { |
|---|
| 140 | nt_assert(!tree2); |
|---|
| 141 | tree2 = species; |
|---|
| 142 | } |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | bool occursInBothTrees() const { return tree1 && tree2; } |
|---|
| 146 | int calcTaxDiffLevel() const { |
|---|
| 147 | TreeNode *parent_group1 = findParentGroup(tree1); |
|---|
| 148 | TreeNode *parent_group2 = findParentGroup(tree2); |
|---|
| 149 | |
|---|
| 150 | int taxdiff = 0; |
|---|
| 151 | |
|---|
| 152 | if (parent_group1) { |
|---|
| 153 | if (parent_group2) { |
|---|
| 154 | taxdiff = calcTaxDifference(parent_group1, parent_group2); |
|---|
| 155 | } |
|---|
| 156 | else { |
|---|
| 157 | taxdiff = countTaxLevel(parent_group1); |
|---|
| 158 | } |
|---|
| 159 | } |
|---|
| 160 | else { |
|---|
| 161 | if (parent_group2) { |
|---|
| 162 | taxdiff = countTaxLevel(parent_group2); |
|---|
| 163 | } |
|---|
| 164 | // else -> both outside any group -> no diff |
|---|
| 165 | } |
|---|
| 166 | |
|---|
| 167 | return taxdiff; |
|---|
| 168 | } |
|---|
| 169 | }; |
|---|
| 170 | |
|---|
| 171 | typedef std::set<const char *, charpLess> NameSet; |
|---|
| 172 | typedef std::map<const char *, SpeciesInTwoTrees, charpLess> TwoTreeMap; |
|---|
| 173 | |
|---|
| 174 | static void mapTree(TreeNode *node, TwoTreeMap& tmap, bool first) { |
|---|
| 175 | if (node->is_leaf()) { |
|---|
| 176 | nt_assert(node->name); |
|---|
| 177 | tmap[node->name].setSpecies(node, first); |
|---|
| 178 | } |
|---|
| 179 | else { |
|---|
| 180 | mapTree(node->leftson, tmap, first); |
|---|
| 181 | mapTree(node->rightson, tmap, first); |
|---|
| 182 | } |
|---|
| 183 | } |
|---|
| 184 | |
|---|
| 185 | static void mark_action(AW_window *aws, TREE_canvas *ntw, Target target) { |
|---|
| 186 | AW_root *aw_root = aws->get_root(); |
|---|
| 187 | |
|---|
| 188 | Action action = Action(aw_root->awar(AWAR_TREE_COMPARE_ACTION)->read_int()); |
|---|
| 189 | |
|---|
| 190 | arb_progress progress("Mark species"); |
|---|
| 191 | if (target == ALL) { |
|---|
| 192 | NT_mark_all_cb(NULp, ntw, action); |
|---|
| 193 | } |
|---|
| 194 | else { |
|---|
| 195 | progress.subtitle("Loading trees"); |
|---|
| 196 | |
|---|
| 197 | const char *treename_left = TreeAdmin::source_tree_awar(aw_root)->read_char_pntr(); |
|---|
| 198 | const char *treename_right = TreeAdmin::dest_tree_awar(aw_root)->read_char_pntr(); |
|---|
| 199 | |
|---|
| 200 | GBDATA *gb_main = ntw->gb_main; |
|---|
| 201 | GB_transaction ta(gb_main); |
|---|
| 202 | |
|---|
| 203 | GBDATA *gb_species_data = GBT_get_species_data(gb_main); |
|---|
| 204 | |
|---|
| 205 | TreeNode *tree_left = GBT_read_tree(gb_main, treename_left, new SimpleRoot); |
|---|
| 206 | TreeNode *tree_right = NULp; |
|---|
| 207 | |
|---|
| 208 | GB_ERROR load_error = NULp; |
|---|
| 209 | if (!tree_left) { |
|---|
| 210 | load_error = GB_await_error(); |
|---|
| 211 | } |
|---|
| 212 | else { |
|---|
| 213 | tree_right = GBT_read_tree(gb_main, treename_right, new SimpleRoot); |
|---|
| 214 | if (!tree_right) load_error = GB_await_error(); |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | size_t missing = 0; |
|---|
| 218 | size_t targetted = 0; |
|---|
| 219 | bool had_error = false; |
|---|
| 220 | |
|---|
| 221 | if (load_error) { |
|---|
| 222 | aw_message(load_error); |
|---|
| 223 | had_error = load_error; |
|---|
| 224 | } |
|---|
| 225 | else { |
|---|
| 226 | nt_assert(tree_left && tree_right); |
|---|
| 227 | if (target == TAX) { |
|---|
| 228 | int min_tax_levels = atoi(aw_root->awar(AWAR_TREE_COMPARE_MIN_TAX_LEVELS)->read_char_pntr()); |
|---|
| 229 | |
|---|
| 230 | TwoTreeMap tmap; |
|---|
| 231 | mapTree(tree_left, tmap, true); |
|---|
| 232 | mapTree(tree_right, tmap, false); |
|---|
| 233 | |
|---|
| 234 | size_t commonSpeciesCount = 0; |
|---|
| 235 | for (TwoTreeMap::iterator s = tmap.begin(); s != tmap.end(); ++s) { |
|---|
| 236 | if (s->second.occursInBothTrees()) commonSpeciesCount++; |
|---|
| 237 | } |
|---|
| 238 | |
|---|
| 239 | const char *fieldName = prepare_and_get_selected_itemfield(aw_root, AWAR_TREE_COMPARE_WRITE_FIELD, gb_main, SPECIES_get_selector(), FIF_ALLOW_NONE); |
|---|
| 240 | bool writeToField = fieldName; |
|---|
| 241 | GB_ERROR error = GB_incur_error(); |
|---|
| 242 | |
|---|
| 243 | if (!error) { |
|---|
| 244 | arb_progress subprogress("Comparing taxonomy info", commonSpeciesCount); |
|---|
| 245 | for (TwoTreeMap::iterator s = tmap.begin(); s != tmap.end() && !error; ++s) { |
|---|
| 246 | const SpeciesInTwoTrees& species = s->second; |
|---|
| 247 | |
|---|
| 248 | if (species.occursInBothTrees()) { |
|---|
| 249 | int taxDiffLevel = species.calcTaxDiffLevel(); |
|---|
| 250 | if (taxDiffLevel>min_tax_levels) { |
|---|
| 251 | ++targetted; |
|---|
| 252 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, s->first); |
|---|
| 253 | if (!gb_species) { |
|---|
| 254 | ++missing; |
|---|
| 255 | } |
|---|
| 256 | else { |
|---|
| 257 | switch (action) { |
|---|
| 258 | case UNMARK: GB_write_flag(gb_species, 0); break; |
|---|
| 259 | case MARK: GB_write_flag(gb_species, 1); break; |
|---|
| 260 | case INVERT: GB_write_flag(gb_species, !GB_read_flag(gb_species)); break; |
|---|
| 261 | } |
|---|
| 262 | if (writeToField) { |
|---|
| 263 | GBDATA *gb_field = GBT_searchOrCreate_itemfield_according_to_changekey(gb_species, fieldName, SPECIES_get_selector().change_key_path); |
|---|
| 264 | if (!gb_field) error = GB_await_error(); |
|---|
| 265 | if (!error) error = GB_write_lossless_int(gb_field, taxDiffLevel); |
|---|
| 266 | } |
|---|
| 267 | } |
|---|
| 268 | } |
|---|
| 269 | subprogress.inc_and_check_user_abort(error); |
|---|
| 270 | } |
|---|
| 271 | } |
|---|
| 272 | } |
|---|
| 273 | aw_message_if(error); |
|---|
| 274 | had_error = error; |
|---|
| 275 | } |
|---|
| 276 | else { |
|---|
| 277 | progress.subtitle("Intersecting tree members"); |
|---|
| 278 | |
|---|
| 279 | NameSet in_left; |
|---|
| 280 | NameSet in_right; |
|---|
| 281 | { |
|---|
| 282 | size_t count_left, count_right; |
|---|
| 283 | GB_CSTR *names_left = GBT_get_names_of_species_in_tree(tree_left, &count_left); |
|---|
| 284 | GB_CSTR *names_right = GBT_get_names_of_species_in_tree(tree_right, &count_right); |
|---|
| 285 | |
|---|
| 286 | for(size_t i= 0; i<count_left; ++i) in_left .insert(names_left[i]); |
|---|
| 287 | for(size_t i= 0; i<count_right; ++i) in_right.insert(names_right[i]); |
|---|
| 288 | |
|---|
| 289 | free(names_right); |
|---|
| 290 | free(names_left); |
|---|
| 291 | } |
|---|
| 292 | |
|---|
| 293 | { |
|---|
| 294 | NameSet& in_one = target == MISSING_LEFT ? in_right : in_left; |
|---|
| 295 | NameSet& in_other = target == MISSING_LEFT ? in_left : in_right; |
|---|
| 296 | |
|---|
| 297 | for (NameSet::const_iterator i = in_one.begin(); i != in_one.end(); ++i) { |
|---|
| 298 | bool is_in_other = in_other.find(*i) != in_other.end(); |
|---|
| 299 | bool is_target = is_in_other == (target == COMMON); |
|---|
| 300 | |
|---|
| 301 | if (is_target) { |
|---|
| 302 | ++targetted; |
|---|
| 303 | GBDATA *gb_species = GBT_find_species_rel_species_data(gb_species_data, *i); |
|---|
| 304 | if (!gb_species) { |
|---|
| 305 | ++missing; |
|---|
| 306 | } |
|---|
| 307 | else { |
|---|
| 308 | switch (action) { |
|---|
| 309 | case UNMARK: GB_write_flag(gb_species, 0); break; |
|---|
| 310 | case MARK: GB_write_flag(gb_species, 1); break; |
|---|
| 311 | case INVERT: GB_write_flag(gb_species, !GB_read_flag(gb_species)); break; |
|---|
| 312 | } |
|---|
| 313 | } |
|---|
| 314 | } |
|---|
| 315 | } |
|---|
| 316 | } |
|---|
| 317 | } |
|---|
| 318 | } |
|---|
| 319 | |
|---|
| 320 | if (!had_error) { |
|---|
| 321 | if (!targetted) { |
|---|
| 322 | aw_message("Warning: no species targetted"); |
|---|
| 323 | } |
|---|
| 324 | else if (missing) { |
|---|
| 325 | aw_message(GBS_global_string("Warning: %zu targeted species could not be found\n" |
|---|
| 326 | "(might be caused by zombies in your trees)", missing)); |
|---|
| 327 | } |
|---|
| 328 | } |
|---|
| 329 | |
|---|
| 330 | destroy(tree_right); |
|---|
| 331 | destroy(tree_left); |
|---|
| 332 | } |
|---|
| 333 | } |
|---|
| 334 | |
|---|
| 335 | void NT_create_compare_taxonomy_awars(AW_root *aw_root, AW_default props) { |
|---|
| 336 | char *currTree = aw_root->awar(AWAR_TREE_NAME)->read_string(); |
|---|
| 337 | |
|---|
| 338 | aw_root->awar_int (AWAR_TREE_COMPARE_ACTION, MARK, props); |
|---|
| 339 | aw_root->awar_string(AWAR_TREE_COMPARE_MIN_TAX_LEVELS, "0", props); |
|---|
| 340 | aw_root->awar_string(AWAR_TREE_COMPARE_WRITE_FIELD, NO_FIELD_SELECTED, props); |
|---|
| 341 | |
|---|
| 342 | free(currTree); |
|---|
| 343 | } |
|---|
| 344 | |
|---|
| 345 | AW_window *NT_create_compare_taxonomy_window(AW_root *aw_root, TREE_canvas *ntw) { |
|---|
| 346 | AW_window_simple *aws = new AW_window_simple; |
|---|
| 347 | aws->init(aw_root, "COMPARE_TAXONOMY", "Compare taxonomy"); |
|---|
| 348 | aws->load_xfig("compare_taxonomy.fig"); |
|---|
| 349 | |
|---|
| 350 | aws->at("close"); |
|---|
| 351 | aws->callback(AW_POPDOWN); |
|---|
| 352 | aws->create_button("CLOSE", "CLOSE", "C"); |
|---|
| 353 | |
|---|
| 354 | aws->at("help"); |
|---|
| 355 | aws->callback(makeHelpCallback("compare_taxonomy.hlp")); |
|---|
| 356 | aws->create_button("HELP", "HELP", "H"); |
|---|
| 357 | |
|---|
| 358 | aws->at("action"); |
|---|
| 359 | aws->create_toggle_field(AWAR_TREE_COMPARE_ACTION); |
|---|
| 360 | aws->insert_default_toggle("mark", "m", MARK); |
|---|
| 361 | aws->insert_toggle ("unmark", "u", UNMARK); |
|---|
| 362 | aws->insert_toggle ("invert", "i", INVERT); |
|---|
| 363 | aws->update_toggle_field(); |
|---|
| 364 | |
|---|
| 365 | aws->at("all"); aws->callback(makeWindowCallback(mark_action, ntw, ALL)); aws->create_autosize_button("all", "all species"); |
|---|
| 366 | aws->at("tax"); aws->callback(makeWindowCallback(mark_action, ntw, TAX)); aws->create_autosize_button("tax", "species with taxonomy changed"); |
|---|
| 367 | aws->at("common"); aws->callback(makeWindowCallback(mark_action, ntw, COMMON)); aws->create_autosize_button("common", "common species"); |
|---|
| 368 | aws->at("missleft"); aws->callback(makeWindowCallback(mark_action, ntw, MISSING_LEFT)); aws->create_autosize_button("missleft", "species missing in left tree"); |
|---|
| 369 | aws->at("missright"); aws->callback(makeWindowCallback(mark_action, ntw, MISSING_RIGHT)); aws->create_autosize_button("missright", "species missing in right tree"); |
|---|
| 370 | |
|---|
| 371 | aws->at("levels"); |
|---|
| 372 | aws->create_input_field(AWAR_TREE_COMPARE_MIN_TAX_LEVELS, 5); |
|---|
| 373 | |
|---|
| 374 | create_itemfield_selection_button(aws, FieldSelDef(AWAR_TREE_COMPARE_WRITE_FIELD, ntw->gb_main, SPECIES_get_selector(), FIELD_FILTER_INT_WRITEABLE, "taxdiff-field", SF_ALLOW_NEW), "field"); |
|---|
| 375 | |
|---|
| 376 | NT_create_twoTreeSelection(aws); |
|---|
| 377 | |
|---|
| 378 | return aws; |
|---|
| 379 | } |
|---|
| 380 | |
|---|
| 381 | |
|---|