| 1 | #include <arbdb.h> |
|---|
| 2 | #include <arbdbt.h> |
|---|
| 3 | #include <aw_root.hxx> |
|---|
| 4 | #include <aw_window.hxx> |
|---|
| 5 | #include <awt_tree.hxx> |
|---|
| 6 | #include <awt.hxx> |
|---|
| 7 | |
|---|
| 8 | #include <awt_tree_cmp.hxx> |
|---|
| 9 | |
|---|
| 10 | #include <cstdlib> |
|---|
| 11 | #include <cstdio> |
|---|
| 12 | #include <cstring> |
|---|
| 13 | #include <string> |
|---|
| 14 | |
|---|
| 15 | using namespace std; |
|---|
| 16 | |
|---|
| 17 | AWT_species_set_root::AWT_species_set_root(GBDATA *gb_maini,long nspeciesi){ |
|---|
| 18 | memset((char *)this,0,sizeof(*this)); |
|---|
| 19 | gb_main = gb_maini; |
|---|
| 20 | nspecies = nspeciesi; |
|---|
| 21 | sets = (AWT_species_set **)GB_calloc(sizeof(AWT_species_set *),(size_t)(nspecies*2+1)); |
|---|
| 22 | |
|---|
| 23 | int i; |
|---|
| 24 | for (i=0;i<256;i++){ |
|---|
| 25 | int j = i; |
|---|
| 26 | int count = 0; |
|---|
| 27 | while (j) { // count bits |
|---|
| 28 | if (j&1) count ++; |
|---|
| 29 | j = j>>1; |
|---|
| 30 | } |
|---|
| 31 | diff_bits[i] = count; |
|---|
| 32 | } |
|---|
| 33 | species_hash = GBS_create_hash(nspecies, GB_IGNORE_CASE); |
|---|
| 34 | species_counter = 1; |
|---|
| 35 | } |
|---|
| 36 | |
|---|
| 37 | AWT_species_set_root::~AWT_species_set_root(){ |
|---|
| 38 | int i; |
|---|
| 39 | for (i=0;i<nsets;i++){ |
|---|
| 40 | delete sets[i]; |
|---|
| 41 | } |
|---|
| 42 | delete sets; |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | void AWT_species_set_root::add(const char *species_name){ |
|---|
| 46 | if (GBS_read_hash(species_hash,species_name) ){ |
|---|
| 47 | aw_message(GBS_global_string("Warning: Species '%s' is found more than once in tree", species_name)); |
|---|
| 48 | }else{ |
|---|
| 49 | GBS_write_hash(species_hash,species_name,species_counter++); |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | |
|---|
| 53 | void AWT_species_set_root::add(AWT_species_set *set){ |
|---|
| 54 | awt_assert(nsets<nspecies*2); |
|---|
| 55 | sets[nsets++] = set; |
|---|
| 56 | } |
|---|
| 57 | |
|---|
| 58 | AWT_species_set *AWT_species_set_root::search(AWT_species_set *set,long *best_co){ |
|---|
| 59 | AWT_species_set *result = 0; |
|---|
| 60 | long i; |
|---|
| 61 | long best_cost = 0x7fffffff; |
|---|
| 62 | unsigned char *sbs = set->bitstring; |
|---|
| 63 | for (i=nsets-1;i>=0;i--){ |
|---|
| 64 | long j; |
|---|
| 65 | long sum = 0; |
|---|
| 66 | unsigned char *rsb = sets[i]->bitstring; |
|---|
| 67 | for (j=nspecies/8 -1 + 1; j>=0;j--){ |
|---|
| 68 | sum += diff_bits[ (sbs[j]) ^ (rsb[j]) ]; |
|---|
| 69 | } |
|---|
| 70 | if (sum > nspecies/2) sum = nspecies - sum; // take always the minimum |
|---|
| 71 | if (sum < best_cost){ |
|---|
| 72 | best_cost = sum; |
|---|
| 73 | result = sets[i]; |
|---|
| 74 | } |
|---|
| 75 | } |
|---|
| 76 | *best_co = best_cost; |
|---|
| 77 | return result; |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | int AWT_species_set_root::search(AWT_species_set *set,FILE *log_file){ |
|---|
| 81 | long netto_cost; |
|---|
| 82 | double best_cost; |
|---|
| 83 | AWT_species_set *bs = this->search(set,&netto_cost); |
|---|
| 84 | best_cost = netto_cost + set->unfound_species_count * 0.0001; |
|---|
| 85 | if (best_cost < bs->best_cost){ |
|---|
| 86 | bs->best_cost = best_cost; |
|---|
| 87 | bs->best_node = set->node; |
|---|
| 88 | } |
|---|
| 89 | if (log_file){ |
|---|
| 90 | if ( netto_cost){ |
|---|
| 91 | fprintf(log_file, "Node '%s' placed not optimal, %li errors\n", |
|---|
| 92 | set->node->name, |
|---|
| 93 | netto_cost); |
|---|
| 94 | } |
|---|
| 95 | } |
|---|
| 96 | return netto_cost; |
|---|
| 97 | } |
|---|
| 98 | |
|---|
| 99 | // -------------------------------------------------------------------------------- |
|---|
| 100 | // AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,char *species_name) |
|---|
| 101 | // -------------------------------------------------------------------------------- |
|---|
| 102 | AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,char *species_name){ |
|---|
| 103 | memset((char *)this,0,sizeof(*this)); |
|---|
| 104 | bitstring = (unsigned char *)GB_calloc(sizeof(char),size_t(ssr->nspecies/8)+sizeof(long)+1); |
|---|
| 105 | long species_index = GBS_read_hash(ssr->species_hash,species_name); |
|---|
| 106 | if (species_index){ |
|---|
| 107 | bitstring[species_index/8] |= 1 << (species_index %8); |
|---|
| 108 | }else{ |
|---|
| 109 | unfound_species_count = 1; |
|---|
| 110 | } |
|---|
| 111 | this->node = nodei; |
|---|
| 112 | best_cost = 0x7fffffff; |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | // -------------------------------------------------------------------------------- |
|---|
| 116 | // AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,AWT_species_set *l,AWT_species_set *r) |
|---|
| 117 | // -------------------------------------------------------------------------------- |
|---|
| 118 | AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,AWT_species_set *l,AWT_species_set *r){ |
|---|
| 119 | memset((char *)this,0,sizeof(*this)); |
|---|
| 120 | this->node = node; |
|---|
| 121 | long j = ssr->nspecies/8+1; |
|---|
| 122 | bitstring = (unsigned char *)GB_calloc(sizeof(char),size_t(ssr->nspecies/8)+5); |
|---|
| 123 | long *lbits = (long *)l->bitstring; |
|---|
| 124 | long *rbits = (long *)r->bitstring; |
|---|
| 125 | long *dest = (long *)bitstring; |
|---|
| 126 | for (j = ssr->nspecies/8/sizeof(long) - 1 + 1;j>=0;j--){ |
|---|
| 127 | dest[j] = lbits[j] | rbits[j]; |
|---|
| 128 | } |
|---|
| 129 | unfound_species_count = l->unfound_species_count + r->unfound_species_count; |
|---|
| 130 | this->node = nodei; |
|---|
| 131 | best_cost = 0x7fffffff; |
|---|
| 132 | } |
|---|
| 133 | |
|---|
| 134 | AWT_species_set::~AWT_species_set(){ |
|---|
| 135 | free(bitstring); |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | AWT_species_set *AWT_species_set_root::move_tree_2_ssr(AP_tree *node){ |
|---|
| 139 | AWT_species_set *ss; |
|---|
| 140 | if (node->is_leaf){ |
|---|
| 141 | this->add(node->name); |
|---|
| 142 | ss = new AWT_species_set(node,this,node->name); |
|---|
| 143 | // ssr->add(ss); |
|---|
| 144 | }else{ |
|---|
| 145 | AWT_species_set *ls = this->move_tree_2_ssr(node->leftson); |
|---|
| 146 | AWT_species_set *rs = this->move_tree_2_ssr(node->rightson); |
|---|
| 147 | ss = new AWT_species_set(node,this,ls,rs); |
|---|
| 148 | this->add(ss); |
|---|
| 149 | } |
|---|
| 150 | return ss; |
|---|
| 151 | } |
|---|
| 152 | |
|---|
| 153 | AWT_species_set *AWT_species_set_root::find_best_matches_info(AP_tree *tree_source, FILE *log, bool compare_node_info){ |
|---|
| 154 | /* Go through all node of the source tree and search for the best |
|---|
| 155 | * matching node in dest_tree (meaning searching ssr->sets) |
|---|
| 156 | * If a match is found, set ssr->sets to this match) |
|---|
| 157 | */ |
|---|
| 158 | AWT_species_set *ss; |
|---|
| 159 | if (tree_source->is_leaf){ |
|---|
| 160 | aw_status(this->status++/(double)this->mstatus); |
|---|
| 161 | ss = new AWT_species_set(tree_source,this,tree_source->name); |
|---|
| 162 | return ss; |
|---|
| 163 | } |
|---|
| 164 | aw_status(this->status++/(double)this->mstatus); |
|---|
| 165 | AWT_species_set *ls = this->find_best_matches_info(tree_source->leftson,log,compare_node_info); |
|---|
| 166 | AWT_species_set *rs = this->find_best_matches_info(tree_source->rightson,log,compare_node_info); |
|---|
| 167 | |
|---|
| 168 | ss = new AWT_species_set(tree_source,this,ls,rs); // Generate new bitstring |
|---|
| 169 | if (compare_node_info){ |
|---|
| 170 | int mismatches = this->search(ss,log); // Search optimal position |
|---|
| 171 | delete ss->node->remark_branch; |
|---|
| 172 | ss->node->remark_branch = 0; |
|---|
| 173 | if (mismatches){ |
|---|
| 174 | char remark[20]; |
|---|
| 175 | sprintf(remark,"# %i",mismatches); // the #-sign is important (otherwise TREE_write_Newick will not work correctly) |
|---|
| 176 | ss->node->remark_branch = strdup(remark); |
|---|
| 177 | } |
|---|
| 178 | }else{ |
|---|
| 179 | if(tree_source->name){ |
|---|
| 180 | this->search(ss,log); // Search optimal position |
|---|
| 181 | } |
|---|
| 182 | } |
|---|
| 183 | delete rs; |
|---|
| 184 | delete ls; |
|---|
| 185 | return ss; // return bitstring for this node |
|---|
| 186 | } |
|---|
| 187 | |
|---|
| 188 | static bool containsMarkedSpecies(const AP_tree *tree) { |
|---|
| 189 | if (tree->is_leaf) { |
|---|
| 190 | GBDATA *gb_species = tree->gb_node; |
|---|
| 191 | int flag = GB_read_flag(gb_species); |
|---|
| 192 | return flag!=0; |
|---|
| 193 | } |
|---|
| 194 | return containsMarkedSpecies(tree->leftson) || containsMarkedSpecies(tree->rightson); |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | GB_ERROR AWT_species_set_root::copy_node_infos(FILE *log, bool delete_old_nodes, bool nodes_with_marked_only) { |
|---|
| 198 | GB_ERROR error = 0; |
|---|
| 199 | long j; |
|---|
| 200 | |
|---|
| 201 | for (j=this->nsets-1;j>=0;j--){ |
|---|
| 202 | AWT_species_set *set = this->sets[j]; |
|---|
| 203 | char *old_group_name = 0; |
|---|
| 204 | bool insert_new_node = set->best_node && set->best_node->name; |
|---|
| 205 | |
|---|
| 206 | if (nodes_with_marked_only && insert_new_node) { |
|---|
| 207 | int hasMarked = containsMarkedSpecies(set->node); |
|---|
| 208 | if (!hasMarked) insert_new_node = false; |
|---|
| 209 | } |
|---|
| 210 | |
|---|
| 211 | if (set->node->gb_node && (delete_old_nodes || insert_new_node)) { // There is already a node, delete old |
|---|
| 212 | if (set->node->name == 0) { |
|---|
| 213 | GBDATA *gb_name = GB_entry(set->node->gb_node, "group_name"); |
|---|
| 214 | if (gb_name) { |
|---|
| 215 | set->node->name = GB_read_string(gb_name); |
|---|
| 216 | } |
|---|
| 217 | else { |
|---|
| 218 | set->node->name = strdup("<gb_node w/o name>"); |
|---|
| 219 | } |
|---|
| 220 | } |
|---|
| 221 | |
|---|
| 222 | old_group_name = strdup(set->node->name); // store old_group_name to rename new inserted node |
|---|
| 223 | #if defined(DEBUG) |
|---|
| 224 | printf("delete node '%s'\n", set->node->name); |
|---|
| 225 | #endif // DEBUG |
|---|
| 226 | error = GB_delete(set->node->gb_node); |
|---|
| 227 | if (error) break; |
|---|
| 228 | if (log) fprintf(log,"Destination Tree not empty, destination node '%s' deleted\n", old_group_name); |
|---|
| 229 | set->node->gb_node = 0; |
|---|
| 230 | set->node->name = 0; |
|---|
| 231 | } |
|---|
| 232 | if (insert_new_node) { |
|---|
| 233 | set->node->gb_node = GB_create_container(set->node->tree_root->gb_tree,"node"); |
|---|
| 234 | error = GB_copy(set->node->gb_node,set->best_node->gb_node); |
|---|
| 235 | if (error) break; |
|---|
| 236 | |
|---|
| 237 | GB_dump(set->node->gb_node); |
|---|
| 238 | |
|---|
| 239 | GBDATA *gb_name = GB_entry(set->node->gb_node, "group_name"); |
|---|
| 240 | gb_assert(gb_name); |
|---|
| 241 | if (gb_name) { |
|---|
| 242 | char *name = GB_read_string(gb_name); |
|---|
| 243 | |
|---|
| 244 | if (old_group_name && strcmp(old_group_name, name)!=0 && !delete_old_nodes) { |
|---|
| 245 | // there was a group with a different name and we are adding nodes |
|---|
| 246 | string new_group_name = (string)name+" [was: "+old_group_name+"]"; |
|---|
| 247 | GB_write_string(gb_name, new_group_name.c_str()); |
|---|
| 248 | delete name; name = GB_read_string(gb_name); |
|---|
| 249 | } |
|---|
| 250 | #if defined(DEBUG) |
|---|
| 251 | printf("insert node '%s'\n", name); |
|---|
| 252 | #endif // DEBUG |
|---|
| 253 | delete name; |
|---|
| 254 | } |
|---|
| 255 | } |
|---|
| 256 | delete old_group_name; |
|---|
| 257 | } |
|---|
| 258 | |
|---|
| 259 | return error; |
|---|
| 260 | } |
|---|
| 261 | |
|---|
| 262 | void AWT_move_info(GBDATA *gb_main, const char *tree_source,const char *tree_dest,const char *log_file, bool compare_node_info, bool delete_old_nodes, bool nodes_with_marked_only) { |
|---|
| 263 | GB_ERROR error = 0; |
|---|
| 264 | FILE *log = 0; |
|---|
| 265 | |
|---|
| 266 | if (log_file) { |
|---|
| 267 | log = fopen(log_file, "w"); |
|---|
| 268 | fprintf(log, |
|---|
| 269 | "LOGFILE: %s node info\n" |
|---|
| 270 | "\n" |
|---|
| 271 | " Source tree '%s'\n" |
|---|
| 272 | "Destination tree '%s'\n" |
|---|
| 273 | "\n", |
|---|
| 274 | delete_old_nodes ? "Moving" : "Adding", |
|---|
| 275 | tree_source, tree_dest); |
|---|
| 276 | } |
|---|
| 277 | |
|---|
| 278 | GB_begin_transaction(gb_main); |
|---|
| 279 | |
|---|
| 280 | AP_tree *source = new AP_tree(0); |
|---|
| 281 | AP_tree *dest = new AP_tree(0); |
|---|
| 282 | AP_tree_root *rsource = new AP_tree_root(gb_main, source, tree_source); |
|---|
| 283 | AP_tree_root *rdest = new AP_tree_root(gb_main, dest, tree_dest); |
|---|
| 284 | |
|---|
| 285 | aw_openstatus("Comparing Topologies"); |
|---|
| 286 | |
|---|
| 287 | aw_status("Load Tree 1"); |
|---|
| 288 | error = source->load(rsource, 1, GB_FALSE, GB_FALSE, 0, 0); // link to database |
|---|
| 289 | if (!error) { |
|---|
| 290 | aw_status("Load Tree 2"); |
|---|
| 291 | error = dest->load(rdest, 1, GB_FALSE, GB_FALSE, 0, 0); // link also |
|---|
| 292 | if (!error) { |
|---|
| 293 | long nspecies = dest->arb_tree_leafsum2(); |
|---|
| 294 | AWT_species_set_root *ssr = new AWT_species_set_root(gb_main, nspecies); |
|---|
| 295 | |
|---|
| 296 | aw_status("Building Search Table for Tree 2"); |
|---|
| 297 | ssr->move_tree_2_ssr(dest); |
|---|
| 298 | |
|---|
| 299 | aw_status("Compare Both Trees"); |
|---|
| 300 | ssr->mstatus = source->arb_tree_leafsum2() * 2; |
|---|
| 301 | ssr->status = 0; |
|---|
| 302 | |
|---|
| 303 | if (ssr->mstatus < 2) error = GB_export_error("Destination tree has less than 3 species"); |
|---|
| 304 | else { |
|---|
| 305 | AWT_species_set *root_setl = ssr->find_best_matches_info(source->leftson, log, compare_node_info); |
|---|
| 306 | AWT_species_set *root_setr = ssr->find_best_matches_info(source->rightson, log, compare_node_info); |
|---|
| 307 | |
|---|
| 308 | if (!compare_node_info) { |
|---|
| 309 | aw_status("Copy Node Information"); |
|---|
| 310 | ssr->copy_node_infos(log, delete_old_nodes, nodes_with_marked_only); |
|---|
| 311 | } |
|---|
| 312 | |
|---|
| 313 | long dummy = 0; |
|---|
| 314 | AWT_species_set *new_root_setl = ssr->search(root_setl, &dummy); |
|---|
| 315 | AWT_species_set *new_root_setr = ssr->search(root_setr, &dummy); |
|---|
| 316 | AP_tree *new_rootl = (AP_tree *) new_root_setl->node; |
|---|
| 317 | AP_tree *new_rootr = (AP_tree *) new_root_setr->node; |
|---|
| 318 | |
|---|
| 319 | new_rootl->set_root(); // set root correctly |
|---|
| 320 | new_rootr->set_root(); // set root correctly |
|---|
| 321 | |
|---|
| 322 | aw_status("Save Tree"); |
|---|
| 323 | |
|---|
| 324 | AP_tree *root = new_rootr; |
|---|
| 325 | while (root->father) root = root->father; |
|---|
| 326 | |
|---|
| 327 | error = GBT_write_tree(gb_main, rdest->gb_tree, 0, root->get_gbt_tree()); |
|---|
| 328 | if (!error) error = GBT_write_tree(gb_main, rsource->gb_tree, 0, source->get_gbt_tree()); |
|---|
| 329 | } |
|---|
| 330 | } |
|---|
| 331 | } |
|---|
| 332 | |
|---|
| 333 | if (log) { |
|---|
| 334 | if (error) fprintf(log, "\nError: %s\n", error); // write error to log as well |
|---|
| 335 | fclose(log); |
|---|
| 336 | } |
|---|
| 337 | |
|---|
| 338 | aw_closestatus(); |
|---|
| 339 | |
|---|
| 340 | delete source; |
|---|
| 341 | delete dest; |
|---|
| 342 | delete rsource; |
|---|
| 343 | delete rdest; |
|---|
| 344 | |
|---|
| 345 | GB_end_transaction_show_error(gb_main, error, aw_message); |
|---|
| 346 | } |
|---|
| 347 | |
|---|