1 | #include <arbdb.h> |
---|
2 | #include <arbdbt.h> |
---|
3 | #include <aw_root.hxx> |
---|
4 | #include <aw_window.hxx> |
---|
5 | #include <awt_tree.hxx> |
---|
6 | #include <awt.hxx> |
---|
7 | |
---|
8 | #include <awt_tree_cmp.hxx> |
---|
9 | |
---|
10 | #include <cstdlib> |
---|
11 | #include <cstdio> |
---|
12 | #include <cstring> |
---|
13 | #include <string> |
---|
14 | |
---|
15 | using namespace std; |
---|
16 | |
---|
17 | AWT_species_set_root::AWT_species_set_root(GBDATA *gb_maini,long nspeciesi){ |
---|
18 | memset((char *)this,0,sizeof(*this)); |
---|
19 | gb_main = gb_maini; |
---|
20 | nspecies = nspeciesi; |
---|
21 | sets = (AWT_species_set **)GB_calloc(sizeof(AWT_species_set *),(size_t)(nspecies*2+1)); |
---|
22 | |
---|
23 | int i; |
---|
24 | for (i=0;i<256;i++){ |
---|
25 | int j = i; |
---|
26 | int count = 0; |
---|
27 | while (j) { // count bits |
---|
28 | if (j&1) count ++; |
---|
29 | j = j>>1; |
---|
30 | } |
---|
31 | diff_bits[i] = count; |
---|
32 | } |
---|
33 | species_hash = GBS_create_hash(nspecies, GB_IGNORE_CASE); |
---|
34 | species_counter = 1; |
---|
35 | } |
---|
36 | |
---|
37 | AWT_species_set_root::~AWT_species_set_root(){ |
---|
38 | int i; |
---|
39 | for (i=0;i<nsets;i++){ |
---|
40 | delete sets[i]; |
---|
41 | } |
---|
42 | delete sets; |
---|
43 | } |
---|
44 | |
---|
45 | void AWT_species_set_root::add(const char *species_name){ |
---|
46 | if (GBS_read_hash(species_hash,species_name) ){ |
---|
47 | aw_message(GBS_global_string("Warning: Species '%s' is found more than once in tree", species_name)); |
---|
48 | }else{ |
---|
49 | GBS_write_hash(species_hash,species_name,species_counter++); |
---|
50 | } |
---|
51 | } |
---|
52 | |
---|
53 | void AWT_species_set_root::add(AWT_species_set *set){ |
---|
54 | awt_assert(nsets<nspecies*2); |
---|
55 | sets[nsets++] = set; |
---|
56 | } |
---|
57 | |
---|
58 | AWT_species_set *AWT_species_set_root::search(AWT_species_set *set,long *best_co){ |
---|
59 | AWT_species_set *result = 0; |
---|
60 | long i; |
---|
61 | long best_cost = 0x7fffffff; |
---|
62 | unsigned char *sbs = set->bitstring; |
---|
63 | for (i=nsets-1;i>=0;i--){ |
---|
64 | long j; |
---|
65 | long sum = 0; |
---|
66 | unsigned char *rsb = sets[i]->bitstring; |
---|
67 | for (j=nspecies/8 -1 + 1; j>=0;j--){ |
---|
68 | sum += diff_bits[ (sbs[j]) ^ (rsb[j]) ]; |
---|
69 | } |
---|
70 | if (sum > nspecies/2) sum = nspecies - sum; // take always the minimum |
---|
71 | if (sum < best_cost){ |
---|
72 | best_cost = sum; |
---|
73 | result = sets[i]; |
---|
74 | } |
---|
75 | } |
---|
76 | *best_co = best_cost; |
---|
77 | return result; |
---|
78 | } |
---|
79 | |
---|
80 | int AWT_species_set_root::search(AWT_species_set *set,FILE *log_file){ |
---|
81 | long netto_cost; |
---|
82 | double best_cost; |
---|
83 | AWT_species_set *bs = this->search(set,&netto_cost); |
---|
84 | best_cost = netto_cost + set->unfound_species_count * 0.0001; |
---|
85 | if (best_cost < bs->best_cost){ |
---|
86 | bs->best_cost = best_cost; |
---|
87 | bs->best_node = set->node; |
---|
88 | } |
---|
89 | if (log_file){ |
---|
90 | if ( netto_cost){ |
---|
91 | fprintf(log_file, "Node '%s' placed not optimal, %li errors\n", |
---|
92 | set->node->name, |
---|
93 | netto_cost); |
---|
94 | } |
---|
95 | } |
---|
96 | return netto_cost; |
---|
97 | } |
---|
98 | |
---|
99 | // -------------------------------------------------------------------------------- |
---|
100 | // AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,char *species_name) |
---|
101 | // -------------------------------------------------------------------------------- |
---|
102 | AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,char *species_name){ |
---|
103 | memset((char *)this,0,sizeof(*this)); |
---|
104 | bitstring = (unsigned char *)GB_calloc(sizeof(char),size_t(ssr->nspecies/8)+sizeof(long)+1); |
---|
105 | long species_index = GBS_read_hash(ssr->species_hash,species_name); |
---|
106 | if (species_index){ |
---|
107 | bitstring[species_index/8] |= 1 << (species_index %8); |
---|
108 | }else{ |
---|
109 | unfound_species_count = 1; |
---|
110 | } |
---|
111 | this->node = nodei; |
---|
112 | best_cost = 0x7fffffff; |
---|
113 | } |
---|
114 | |
---|
115 | // -------------------------------------------------------------------------------- |
---|
116 | // AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,AWT_species_set *l,AWT_species_set *r) |
---|
117 | // -------------------------------------------------------------------------------- |
---|
118 | AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,AWT_species_set *l,AWT_species_set *r){ |
---|
119 | memset((char *)this,0,sizeof(*this)); |
---|
120 | this->node = node; |
---|
121 | long j = ssr->nspecies/8+1; |
---|
122 | bitstring = (unsigned char *)GB_calloc(sizeof(char),size_t(ssr->nspecies/8)+5); |
---|
123 | long *lbits = (long *)l->bitstring; |
---|
124 | long *rbits = (long *)r->bitstring; |
---|
125 | long *dest = (long *)bitstring; |
---|
126 | for (j = ssr->nspecies/8/sizeof(long) - 1 + 1;j>=0;j--){ |
---|
127 | dest[j] = lbits[j] | rbits[j]; |
---|
128 | } |
---|
129 | unfound_species_count = l->unfound_species_count + r->unfound_species_count; |
---|
130 | this->node = nodei; |
---|
131 | best_cost = 0x7fffffff; |
---|
132 | } |
---|
133 | |
---|
134 | AWT_species_set::~AWT_species_set(){ |
---|
135 | free(bitstring); |
---|
136 | } |
---|
137 | |
---|
138 | AWT_species_set *AWT_species_set_root::move_tree_2_ssr(AP_tree *node){ |
---|
139 | AWT_species_set *ss; |
---|
140 | if (node->is_leaf){ |
---|
141 | this->add(node->name); |
---|
142 | ss = new AWT_species_set(node,this,node->name); |
---|
143 | // ssr->add(ss); |
---|
144 | }else{ |
---|
145 | AWT_species_set *ls = this->move_tree_2_ssr(node->leftson); |
---|
146 | AWT_species_set *rs = this->move_tree_2_ssr(node->rightson); |
---|
147 | ss = new AWT_species_set(node,this,ls,rs); |
---|
148 | this->add(ss); |
---|
149 | } |
---|
150 | return ss; |
---|
151 | } |
---|
152 | |
---|
153 | AWT_species_set *AWT_species_set_root::find_best_matches_info(AP_tree *tree_source, FILE *log, bool compare_node_info){ |
---|
154 | /* Go through all node of the source tree and search for the best |
---|
155 | * matching node in dest_tree (meaning searching ssr->sets) |
---|
156 | * If a match is found, set ssr->sets to this match) |
---|
157 | */ |
---|
158 | AWT_species_set *ss; |
---|
159 | if (tree_source->is_leaf){ |
---|
160 | aw_status(this->status++/(double)this->mstatus); |
---|
161 | ss = new AWT_species_set(tree_source,this,tree_source->name); |
---|
162 | return ss; |
---|
163 | } |
---|
164 | aw_status(this->status++/(double)this->mstatus); |
---|
165 | AWT_species_set *ls = this->find_best_matches_info(tree_source->leftson,log,compare_node_info); |
---|
166 | AWT_species_set *rs = this->find_best_matches_info(tree_source->rightson,log,compare_node_info); |
---|
167 | |
---|
168 | ss = new AWT_species_set(tree_source,this,ls,rs); // Generate new bitstring |
---|
169 | if (compare_node_info){ |
---|
170 | int mismatches = this->search(ss,log); // Search optimal position |
---|
171 | delete ss->node->remark_branch; |
---|
172 | ss->node->remark_branch = 0; |
---|
173 | if (mismatches){ |
---|
174 | char remark[20]; |
---|
175 | sprintf(remark,"# %i",mismatches); // the #-sign is important (otherwise TREE_write_Newick will not work correctly) |
---|
176 | ss->node->remark_branch = strdup(remark); |
---|
177 | } |
---|
178 | }else{ |
---|
179 | if(tree_source->name){ |
---|
180 | this->search(ss,log); // Search optimal position |
---|
181 | } |
---|
182 | } |
---|
183 | delete rs; |
---|
184 | delete ls; |
---|
185 | return ss; // return bitstring for this node |
---|
186 | } |
---|
187 | |
---|
188 | static bool containsMarkedSpecies(const AP_tree *tree) { |
---|
189 | if (tree->is_leaf) { |
---|
190 | GBDATA *gb_species = tree->gb_node; |
---|
191 | int flag = GB_read_flag(gb_species); |
---|
192 | return flag!=0; |
---|
193 | } |
---|
194 | return containsMarkedSpecies(tree->leftson) || containsMarkedSpecies(tree->rightson); |
---|
195 | } |
---|
196 | |
---|
197 | GB_ERROR AWT_species_set_root::copy_node_infos(FILE *log, bool delete_old_nodes, bool nodes_with_marked_only) { |
---|
198 | GB_ERROR error = 0; |
---|
199 | long j; |
---|
200 | |
---|
201 | for (j=this->nsets-1;j>=0;j--){ |
---|
202 | AWT_species_set *set = this->sets[j]; |
---|
203 | char *old_group_name = 0; |
---|
204 | bool insert_new_node = set->best_node && set->best_node->name; |
---|
205 | |
---|
206 | if (nodes_with_marked_only && insert_new_node) { |
---|
207 | int hasMarked = containsMarkedSpecies(set->node); |
---|
208 | if (!hasMarked) insert_new_node = false; |
---|
209 | } |
---|
210 | |
---|
211 | if (set->node->gb_node && (delete_old_nodes || insert_new_node)) { // There is already a node, delete old |
---|
212 | if (set->node->name == 0) { |
---|
213 | GBDATA *gb_name = GB_entry(set->node->gb_node, "group_name"); |
---|
214 | if (gb_name) { |
---|
215 | set->node->name = GB_read_string(gb_name); |
---|
216 | } |
---|
217 | else { |
---|
218 | set->node->name = strdup("<gb_node w/o name>"); |
---|
219 | } |
---|
220 | } |
---|
221 | |
---|
222 | old_group_name = strdup(set->node->name); // store old_group_name to rename new inserted node |
---|
223 | #if defined(DEBUG) |
---|
224 | printf("delete node '%s'\n", set->node->name); |
---|
225 | #endif // DEBUG |
---|
226 | error = GB_delete(set->node->gb_node); |
---|
227 | if (error) break; |
---|
228 | if (log) fprintf(log,"Destination Tree not empty, destination node '%s' deleted\n", old_group_name); |
---|
229 | set->node->gb_node = 0; |
---|
230 | set->node->name = 0; |
---|
231 | } |
---|
232 | if (insert_new_node) { |
---|
233 | set->node->gb_node = GB_create_container(set->node->tree_root->gb_tree,"node"); |
---|
234 | error = GB_copy(set->node->gb_node,set->best_node->gb_node); |
---|
235 | if (error) break; |
---|
236 | |
---|
237 | GB_dump(set->node->gb_node); |
---|
238 | |
---|
239 | GBDATA *gb_name = GB_entry(set->node->gb_node, "group_name"); |
---|
240 | gb_assert(gb_name); |
---|
241 | if (gb_name) { |
---|
242 | char *name = GB_read_string(gb_name); |
---|
243 | |
---|
244 | if (old_group_name && strcmp(old_group_name, name)!=0 && !delete_old_nodes) { |
---|
245 | // there was a group with a different name and we are adding nodes |
---|
246 | string new_group_name = (string)name+" [was: "+old_group_name+"]"; |
---|
247 | GB_write_string(gb_name, new_group_name.c_str()); |
---|
248 | delete name; name = GB_read_string(gb_name); |
---|
249 | } |
---|
250 | #if defined(DEBUG) |
---|
251 | printf("insert node '%s'\n", name); |
---|
252 | #endif // DEBUG |
---|
253 | delete name; |
---|
254 | } |
---|
255 | } |
---|
256 | delete old_group_name; |
---|
257 | } |
---|
258 | |
---|
259 | return error; |
---|
260 | } |
---|
261 | |
---|
262 | void AWT_move_info(GBDATA *gb_main, const char *tree_source,const char *tree_dest,const char *log_file, bool compare_node_info, bool delete_old_nodes, bool nodes_with_marked_only) { |
---|
263 | GB_ERROR error = 0; |
---|
264 | FILE *log = 0; |
---|
265 | |
---|
266 | if (log_file) { |
---|
267 | log = fopen(log_file, "w"); |
---|
268 | fprintf(log, |
---|
269 | "LOGFILE: %s node info\n" |
---|
270 | "\n" |
---|
271 | " Source tree '%s'\n" |
---|
272 | "Destination tree '%s'\n" |
---|
273 | "\n", |
---|
274 | delete_old_nodes ? "Moving" : "Adding", |
---|
275 | tree_source, tree_dest); |
---|
276 | } |
---|
277 | |
---|
278 | GB_begin_transaction(gb_main); |
---|
279 | |
---|
280 | AP_tree *source = new AP_tree(0); |
---|
281 | AP_tree *dest = new AP_tree(0); |
---|
282 | AP_tree_root *rsource = new AP_tree_root(gb_main, source, tree_source); |
---|
283 | AP_tree_root *rdest = new AP_tree_root(gb_main, dest, tree_dest); |
---|
284 | |
---|
285 | aw_openstatus("Comparing Topologies"); |
---|
286 | |
---|
287 | aw_status("Load Tree 1"); |
---|
288 | error = source->load(rsource, 1, GB_FALSE, GB_FALSE, 0, 0); // link to database |
---|
289 | if (!error) { |
---|
290 | aw_status("Load Tree 2"); |
---|
291 | error = dest->load(rdest, 1, GB_FALSE, GB_FALSE, 0, 0); // link also |
---|
292 | if (!error) { |
---|
293 | long nspecies = dest->arb_tree_leafsum2(); |
---|
294 | AWT_species_set_root *ssr = new AWT_species_set_root(gb_main, nspecies); |
---|
295 | |
---|
296 | aw_status("Building Search Table for Tree 2"); |
---|
297 | ssr->move_tree_2_ssr(dest); |
---|
298 | |
---|
299 | aw_status("Compare Both Trees"); |
---|
300 | ssr->mstatus = source->arb_tree_leafsum2() * 2; |
---|
301 | ssr->status = 0; |
---|
302 | |
---|
303 | if (ssr->mstatus < 2) error = GB_export_error("Destination tree has less than 3 species"); |
---|
304 | else { |
---|
305 | AWT_species_set *root_setl = ssr->find_best_matches_info(source->leftson, log, compare_node_info); |
---|
306 | AWT_species_set *root_setr = ssr->find_best_matches_info(source->rightson, log, compare_node_info); |
---|
307 | |
---|
308 | if (!compare_node_info) { |
---|
309 | aw_status("Copy Node Information"); |
---|
310 | ssr->copy_node_infos(log, delete_old_nodes, nodes_with_marked_only); |
---|
311 | } |
---|
312 | |
---|
313 | long dummy = 0; |
---|
314 | AWT_species_set *new_root_setl = ssr->search(root_setl, &dummy); |
---|
315 | AWT_species_set *new_root_setr = ssr->search(root_setr, &dummy); |
---|
316 | AP_tree *new_rootl = (AP_tree *) new_root_setl->node; |
---|
317 | AP_tree *new_rootr = (AP_tree *) new_root_setr->node; |
---|
318 | |
---|
319 | new_rootl->set_root(); // set root correctly |
---|
320 | new_rootr->set_root(); // set root correctly |
---|
321 | |
---|
322 | aw_status("Save Tree"); |
---|
323 | |
---|
324 | AP_tree *root = new_rootr; |
---|
325 | while (root->father) root = root->father; |
---|
326 | |
---|
327 | error = GBT_write_tree(gb_main, rdest->gb_tree, 0, root->get_gbt_tree()); |
---|
328 | if (!error) error = GBT_write_tree(gb_main, rsource->gb_tree, 0, source->get_gbt_tree()); |
---|
329 | } |
---|
330 | } |
---|
331 | } |
---|
332 | |
---|
333 | if (log) { |
---|
334 | if (error) fprintf(log, "\nError: %s\n", error); // write error to log as well |
---|
335 | fclose(log); |
---|
336 | } |
---|
337 | |
---|
338 | aw_closestatus(); |
---|
339 | |
---|
340 | delete source; |
---|
341 | delete dest; |
---|
342 | delete rsource; |
---|
343 | delete rdest; |
---|
344 | |
---|
345 | GB_end_transaction_show_error(gb_main, error, aw_message); |
---|
346 | } |
---|
347 | |
---|