source: branches/port5/AWT/AWT_tree_cmp.cxx

Last change on this file was 6143, checked in by westram, 16 years ago
  • backport [6141] (parts changing code, but only strings and comments)
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.3 KB
Line 
1#include <arbdb.h>
2#include <arbdbt.h>
3#include <aw_root.hxx>
4#include <aw_window.hxx>
5#include <awt_tree.hxx>
6#include <awt.hxx>
7
8#include <awt_tree_cmp.hxx>
9
10#include <cstdlib>
11#include <cstdio>
12#include <cstring>
13#include <string>
14
15using namespace std;
16
17AWT_species_set_root::AWT_species_set_root(GBDATA *gb_maini,long nspeciesi){
18    memset((char *)this,0,sizeof(*this));
19    gb_main = gb_maini;
20    nspecies = nspeciesi;
21    sets = (AWT_species_set **)GB_calloc(sizeof(AWT_species_set *),(size_t)(nspecies*2+1));
22
23    int i;
24    for (i=0;i<256;i++){
25        int j = i;
26        int count = 0;
27        while (j) {             // count bits
28            if (j&1) count ++;
29            j = j>>1;
30        }
31        diff_bits[i] = count;
32    }
33    species_hash = GBS_create_hash(nspecies, GB_IGNORE_CASE);
34    species_counter = 1;
35}
36
37AWT_species_set_root::~AWT_species_set_root(){
38    int i;
39    for (i=0;i<nsets;i++){
40        delete sets[i];
41    }
42    delete sets;
43}
44
45void AWT_species_set_root::add(const char *species_name){
46    if (GBS_read_hash(species_hash,species_name) ){
47        aw_message(GBS_global_string("Warning: Species '%s' is found more than once in tree", species_name));
48    }else{
49        GBS_write_hash(species_hash,species_name,species_counter++);
50    }
51}
52
53void AWT_species_set_root::add(AWT_species_set *set){
54    awt_assert(nsets<nspecies*2);
55    sets[nsets++] = set;
56}
57
58AWT_species_set *AWT_species_set_root::search(AWT_species_set *set,long *best_co){
59    AWT_species_set *result = 0;
60    long i;
61    long best_cost = 0x7fffffff;
62    unsigned char *sbs = set->bitstring;
63    for (i=nsets-1;i>=0;i--){
64        long j;
65        long sum = 0;
66        unsigned char *rsb = sets[i]->bitstring;
67        for (j=nspecies/8 -1 + 1; j>=0;j--){
68            sum += diff_bits[ (sbs[j]) ^ (rsb[j]) ];
69        }
70        if (sum > nspecies/2) sum = nspecies - sum; // take always the minimum
71        if (sum < best_cost){
72            best_cost = sum;
73            result = sets[i];
74        }
75    }
76    *best_co = best_cost;
77    return result;
78}
79
80int AWT_species_set_root::search(AWT_species_set *set,FILE *log_file){
81    long netto_cost;
82    double best_cost;
83    AWT_species_set *bs = this->search(set,&netto_cost);
84    best_cost = netto_cost + set->unfound_species_count * 0.0001;
85    if (best_cost < bs->best_cost){
86        bs->best_cost = best_cost;
87        bs->best_node = set->node;
88    }
89    if (log_file){
90        if ( netto_cost){
91            fprintf(log_file, "Node '%s' placed not optimal, %li errors\n",
92                    set->node->name,
93                    netto_cost);
94        }
95    }
96    return netto_cost;
97}
98
99// --------------------------------------------------------------------------------
100//     AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,char *species_name)
101// --------------------------------------------------------------------------------
102AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,char *species_name){
103    memset((char *)this,0,sizeof(*this));
104    bitstring = (unsigned char *)GB_calloc(sizeof(char),size_t(ssr->nspecies/8)+sizeof(long)+1);
105    long species_index = GBS_read_hash(ssr->species_hash,species_name);
106    if (species_index){
107        bitstring[species_index/8] |= 1 << (species_index %8);
108    }else{
109        unfound_species_count = 1;
110    }
111    this->node = nodei;
112    best_cost = 0x7fffffff;
113}
114
115// --------------------------------------------------------------------------------
116//     AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,AWT_species_set *l,AWT_species_set *r)
117// --------------------------------------------------------------------------------
118AWT_species_set::AWT_species_set(AP_tree *nodei,AWT_species_set_root *ssr,AWT_species_set *l,AWT_species_set *r){
119    memset((char *)this,0,sizeof(*this));
120    this->node = node;
121    long j = ssr->nspecies/8+1;
122    bitstring = (unsigned char *)GB_calloc(sizeof(char),size_t(ssr->nspecies/8)+5);
123    long *lbits = (long *)l->bitstring;
124    long *rbits = (long *)r->bitstring;
125    long *dest = (long *)bitstring;
126    for (j = ssr->nspecies/8/sizeof(long) - 1 + 1;j>=0;j--){
127        dest[j] = lbits[j] | rbits[j];
128    }
129    unfound_species_count = l->unfound_species_count + r->unfound_species_count;
130    this->node = nodei;
131    best_cost = 0x7fffffff;
132}
133
134AWT_species_set::~AWT_species_set(){
135    free(bitstring);
136}
137
138AWT_species_set *AWT_species_set_root::move_tree_2_ssr(AP_tree *node){
139    AWT_species_set *ss;
140    if (node->is_leaf){
141        this->add(node->name);
142        ss = new AWT_species_set(node,this,node->name);
143        //      ssr->add(ss);
144    }else{
145        AWT_species_set *ls = this->move_tree_2_ssr(node->leftson);
146        AWT_species_set *rs = this->move_tree_2_ssr(node->rightson);
147        ss = new AWT_species_set(node,this,ls,rs);
148        this->add(ss);
149    }
150    return ss;
151}
152
153AWT_species_set *AWT_species_set_root::find_best_matches_info(AP_tree *tree_source, FILE *log, bool compare_node_info){
154    /* Go through all node of the source tree and search for the best
155     * matching node in dest_tree (meaning searching ssr->sets)
156     * If a match is found, set ssr->sets to this match)
157     */
158    AWT_species_set *ss;
159    if (tree_source->is_leaf){
160        aw_status(this->status++/(double)this->mstatus);
161        ss = new AWT_species_set(tree_source,this,tree_source->name);
162        return ss;
163    }
164    aw_status(this->status++/(double)this->mstatus);
165    AWT_species_set *ls = this->find_best_matches_info(tree_source->leftson,log,compare_node_info);
166    AWT_species_set *rs = this->find_best_matches_info(tree_source->rightson,log,compare_node_info);
167
168    ss = new AWT_species_set(tree_source,this,ls,rs); // Generate new bitstring
169    if (compare_node_info){
170        int mismatches = this->search(ss,log);  // Search optimal position
171        delete ss->node->remark_branch;
172        ss->node->remark_branch = 0;
173        if (mismatches){
174            char remark[20];
175            sprintf(remark,"# %i",mismatches); // the #-sign is important (otherwise TREE_write_Newick will not work correctly)
176            ss->node->remark_branch = strdup(remark);
177        }
178    }else{
179        if(tree_source->name){
180            this->search(ss,log);       // Search optimal position
181        }
182    }
183    delete rs;
184    delete ls;
185    return ss;                  // return bitstring for this node
186}
187
188static bool containsMarkedSpecies(const AP_tree *tree) {
189    if (tree->is_leaf) {
190        GBDATA *gb_species = tree->gb_node;
191        int flag = GB_read_flag(gb_species);
192        return flag!=0;
193    }
194    return containsMarkedSpecies(tree->leftson) || containsMarkedSpecies(tree->rightson);
195}
196
197GB_ERROR AWT_species_set_root::copy_node_infos(FILE *log, bool delete_old_nodes, bool nodes_with_marked_only) {
198    GB_ERROR error = 0;
199    long j;
200
201    for (j=this->nsets-1;j>=0;j--){
202        AWT_species_set *set = this->sets[j];
203        char *old_group_name  = 0;
204        bool  insert_new_node = set->best_node && set->best_node->name;
205
206        if (nodes_with_marked_only && insert_new_node) {
207            int hasMarked = containsMarkedSpecies(set->node);
208            if (!hasMarked) insert_new_node = false;
209        }
210
211        if (set->node->gb_node && (delete_old_nodes || insert_new_node)) { // There is already a node, delete old
212            if (set->node->name == 0) {
213                GBDATA *gb_name = GB_entry(set->node->gb_node, "group_name");
214                if (gb_name) {
215                    set->node->name = GB_read_string(gb_name);
216                }
217                else {
218                    set->node->name = strdup("<gb_node w/o name>");
219                }
220            }
221
222            old_group_name = strdup(set->node->name); // store old_group_name to rename new inserted node
223#if defined(DEBUG)
224            printf("delete node '%s'\n", set->node->name);
225#endif // DEBUG
226            error = GB_delete(set->node->gb_node);
227            if (error) break;
228            if (log) fprintf(log,"Destination Tree not empty, destination node '%s' deleted\n",  old_group_name);
229            set->node->gb_node = 0;
230            set->node->name = 0;
231        }
232        if (insert_new_node) {
233            set->node->gb_node = GB_create_container(set->node->tree_root->gb_tree,"node");
234            error = GB_copy(set->node->gb_node,set->best_node->gb_node);
235            if (error) break;
236
237            GB_dump(set->node->gb_node);
238
239            GBDATA *gb_name = GB_entry(set->node->gb_node, "group_name");
240            gb_assert(gb_name);
241            if (gb_name) {
242                char *name = GB_read_string(gb_name);
243
244                if (old_group_name && strcmp(old_group_name, name)!=0 && !delete_old_nodes) {
245                    // there was a group with a different name and we are adding nodes
246                    string new_group_name = (string)name+" [was: "+old_group_name+"]";
247                    GB_write_string(gb_name, new_group_name.c_str());
248                    delete name; name = GB_read_string(gb_name);
249                }
250#if defined(DEBUG)
251                printf("insert node '%s'\n", name);
252#endif // DEBUG
253                delete name;
254            }
255        }
256        delete old_group_name;
257    }
258
259    return error;
260}
261
262void AWT_move_info(GBDATA *gb_main, const char *tree_source,const char *tree_dest,const char *log_file, bool compare_node_info, bool delete_old_nodes, bool nodes_with_marked_only) {
263    GB_ERROR  error = 0;
264    FILE     *log   = 0;
265
266    if (log_file) {
267        log = fopen(log_file, "w");
268        fprintf(log,
269                "LOGFILE: %s node info\n"
270                "\n"
271                "     Source tree '%s'\n"
272                "Destination tree '%s'\n"
273                "\n", 
274                delete_old_nodes ? "Moving" : "Adding",
275                tree_source, tree_dest);
276    }
277
278    GB_begin_transaction(gb_main);
279
280    AP_tree      *source  = new AP_tree(0);
281    AP_tree      *dest    = new AP_tree(0);
282    AP_tree_root *rsource = new AP_tree_root(gb_main, source, tree_source);
283    AP_tree_root *rdest   = new AP_tree_root(gb_main, dest, tree_dest);
284
285    aw_openstatus("Comparing Topologies");
286
287    aw_status("Load Tree 1");
288    error = source->load(rsource, 1, GB_FALSE, GB_FALSE, 0, 0); // link to database
289    if (!error) {
290        aw_status("Load Tree 2");
291        error = dest->load(rdest, 1, GB_FALSE, GB_FALSE, 0, 0); // link also
292        if (!error) {
293            long                  nspecies = dest->arb_tree_leafsum2();
294            AWT_species_set_root *ssr      = new AWT_species_set_root(gb_main, nspecies);
295
296            aw_status("Building Search Table for Tree 2");
297            ssr->move_tree_2_ssr(dest);
298
299            aw_status("Compare Both Trees");
300            ssr->mstatus = source->arb_tree_leafsum2() * 2;
301            ssr->status  = 0;
302
303            if (ssr->mstatus < 2) error = GB_export_error("Destination tree has less than 3 species");
304            else {
305                AWT_species_set *root_setl = ssr->find_best_matches_info(source->leftson,  log, compare_node_info);
306                AWT_species_set *root_setr = ssr->find_best_matches_info(source->rightson, log, compare_node_info);
307
308                if (!compare_node_info) {
309                    aw_status("Copy Node Information");
310                    ssr->copy_node_infos(log, delete_old_nodes, nodes_with_marked_only);
311                }
312               
313                long             dummy         = 0;
314                AWT_species_set *new_root_setl = ssr->search(root_setl, &dummy);
315                AWT_species_set *new_root_setr = ssr->search(root_setr, &dummy);
316                AP_tree         *new_rootl     = (AP_tree *) new_root_setl->node;
317                AP_tree         *new_rootr     = (AP_tree *) new_root_setr->node;
318
319                new_rootl->set_root(); // set root correctly
320                new_rootr->set_root(); // set root correctly
321
322                aw_status("Save Tree");
323               
324                AP_tree *root             = new_rootr;
325                while (root->father) root = root->father;
326
327                error             = GBT_write_tree(gb_main, rdest->gb_tree, 0, root->get_gbt_tree());
328                if (!error) error = GBT_write_tree(gb_main, rsource->gb_tree, 0, source->get_gbt_tree());
329            }
330        }
331    }
332
333    if (log) {
334        if (error) fprintf(log, "\nError: %s\n", error);       // write error to log as well
335        fclose(log);
336    }
337
338    aw_closestatus();
339
340    delete source;
341    delete dest;
342    delete rsource;
343    delete rdest;
344
345    GB_end_transaction_show_error(gb_main, error, aw_message);
346}
347
Note: See TracBrowser for help on using the repository browser.