source: tags/arb-6.0.4/DIST/DI_compress_matrix.cxx

Last change on this file was 11401, checked in by westram, 10 years ago
  • reintegrates 'tree' into 'trunk':
    • consensus trees:
      • support for merging partial trees ("worked" before, but results were crap; implements #65)
      • generated trees are automatically re-rooted and -ordered
      • always list source trees in consensus-tree-comment; show info about partial trees
      • fixed progress bar
    • made GBT_TREE a base class of other tree classes (implements #31)
    • save tree properties in properties (not in DB)
    • new functions 'Remove zombies/marked from ALL trees'
    • tree load/save: layout fixes
    • unit tests
      • added tests for basic tree modifications (PARSIMONY)
    • performance:
      • compute_tree updates tree information in one traversal
      • tree generators are now capable to generate any type of tree (w/o needing to copy it once)
    • bugfixes:
      • NNI (of marked species) was also always performed for colored species
      • centered beautify-order is stable now
      • improved 'search optimal root'
  • adds:
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1#include "di_matr.hxx"
2
3int DI_MATRIX::search_group(GBT_TREE *node, GB_HASH *hash, size_t& groupcnt, char *groupname, DI_ENTRY **groups) {
4    //  returns 1 only if groupname != null and there are species for that group
5
6    if (node->is_leaf) {
7        if (!node->name) return 0;
8        DI_ENTRY *phentry = (DI_ENTRY *)GBS_read_hash(hash, node->name);
9        if (!phentry) {     // Species is not a member of tree
10            return 0;
11        }
12        if (groupname) {        // There is a group for this species
13            phentry->group_nr = groupcnt;
14            return 1;
15        }
16        return 0;
17    }
18    char *myname;
19    if (groupname) {
20        myname = groupname;
21    }
22    else {
23        myname = 0;
24        if (node->gb_node && node->name) {      // but we are a group
25            GBDATA *gb_grouped = GB_entry(node->gb_node, "grouped");
26            if (gb_grouped && GB_read_byte(gb_grouped)) {
27                myname = node->name;
28            }
29        }
30    }
31    int erg = search_group(node->leftson, hash, groupcnt, myname, groups) +
32        search_group(node->rightson, hash, groupcnt, myname, groups);
33
34    if (!groupname) {       // we are not a sub group
35        if (myname) {       // but we are a group
36            if (erg>0) {            // it is used
37                groups[groupcnt] = new DI_ENTRY(myname, this);
38                groupcnt++;
39            }
40        }
41        return 0;
42    }
43    else {
44        return erg;         // We are using an inherited group
45    }
46}
47
48char *DI_MATRIX::compress(GBT_TREE *tree) {
49    // create a hash table of species
50    GB_HASH *hash = GBS_create_hash(nentries, GB_IGNORE_CASE);
51    char *error = 0;
52    for (size_t i=0; i<nentries; i++) {
53        if (entries[i]->name) {
54            GBS_write_hash(hash, entries[i]->name, (long)entries[i]);
55        }
56        entries[i]->group_nr = -1;
57    }
58
59    size_t     groupcnt = 0;
60    DI_ENTRY **groups   = new DI_ENTRY *[nentries];
61    search_group(tree, hash, groupcnt, 0, groups); // search a group for all species and create groups
62
63    DI_ENTRY **found_groups = 0;
64
65    if (groupcnt) { // if we found groups => make copy of group array
66        found_groups = new DI_ENTRY *[groupcnt];
67        memcpy(found_groups, groups, groupcnt*sizeof(*groups));
68    }
69
70    int nongroupcnt = 0; // count # of species NOT in groups and copy them to 'groups'
71    for (size_t i=0; i<nentries; i++) {
72        if (entries[i]->name && entries[i]->group_nr == -1) { // species not in groups
73            groups[nongroupcnt] = new DI_ENTRY(entries[i]->name, this);
74            entries[i]->group_nr = nongroupcnt++;
75        }
76        else { // species is in group => add nentries to group_nr
77            entries[i]->group_nr = entries[i]->group_nr + nentries;
78        }
79    }
80
81    // append groups to end of 'groups'
82    if (groupcnt) {
83        memcpy(groups+nongroupcnt, found_groups, groupcnt*sizeof(*groups)); // copy groups to end of 'groups'
84        delete [] found_groups;
85        found_groups = 0;
86
87        for (size_t i=0; i<nentries; i++) {
88            if (entries[i]->name && entries[i]->group_nr>=int(nentries)) {
89                entries[i]->group_nr = entries[i]->group_nr - nentries + nongroupcnt; // correct group_nr's for species in groups
90            }
91        }
92    }
93
94    groupcnt += nongroupcnt; // now groupcnt is # of groups + # of non-group-species
95
96    AP_smatrix count(groupcnt);
97    AP_smatrix *sum = new AP_smatrix(groupcnt);
98
99    // Now we have create a new DI_ENTRY table, let's do the matrix
100    for (size_t i=0; i<nentries; i++) {
101        for (size_t j=0; j<=i; j++) {
102            int x = entries[i]->group_nr;   if (x<0) continue;
103            int y = entries[j]->group_nr;   if (y<0) continue;
104            // x,y are the destination i,j
105            count.set(x, y, count.get(x, y)+1.0);
106            sum->set(x, y, sum->get(x, y)+matrix->get(i, j));
107        }
108    }
109
110    for (size_t i=0; i<groupcnt; i++) {    // get the arithmetic average
111        for (size_t j=0; j<=i; j++) {
112            AP_FLOAT c = count.get(i, j);
113            if (c > 0) {
114                sum->set(i, j, sum->get(i, j) / c);
115            }
116        }
117    }
118    delete matrix;
119    matrix = sum;
120
121    for (size_t i=0; i<nentries; i++) { // delete everything
122        delete entries[i];
123        entries[i] = 0;
124    }
125    free(entries);
126
127    entries = groups;
128    nentries = groupcnt;
129    entries_mem_size = groupcnt;
130    matrix_type = DI_MATRIX_COMPRESSED;
131
132    GBS_free_hash(hash);
133    return error;
134}
Note: See TracBrowser for help on using the repository browser.