1 | #include "di_matr.hxx" |
---|
2 | |
---|
3 | int DI_MATRIX::search_group(GBT_TREE *node, GB_HASH *hash, size_t& groupcnt, char *groupname, DI_ENTRY **groups) { |
---|
4 | // returns 1 only if groupname != null and there are species for that group |
---|
5 | |
---|
6 | if (node->is_leaf) { |
---|
7 | if (!node->name) return 0; |
---|
8 | DI_ENTRY *phentry = (DI_ENTRY *)GBS_read_hash(hash, node->name); |
---|
9 | if (!phentry) { // Species is not a member of tree |
---|
10 | return 0; |
---|
11 | } |
---|
12 | if (groupname) { // There is a group for this species |
---|
13 | phentry->group_nr = groupcnt; |
---|
14 | return 1; |
---|
15 | } |
---|
16 | return 0; |
---|
17 | } |
---|
18 | char *myname; |
---|
19 | if (groupname) { |
---|
20 | myname = groupname; |
---|
21 | } |
---|
22 | else { |
---|
23 | myname = 0; |
---|
24 | if (node->gb_node && node->name) { // but we are a group |
---|
25 | GBDATA *gb_grouped = GB_entry(node->gb_node, "grouped"); |
---|
26 | if (gb_grouped && GB_read_byte(gb_grouped)) { |
---|
27 | myname = node->name; |
---|
28 | } |
---|
29 | } |
---|
30 | } |
---|
31 | int erg = search_group(node->leftson, hash, groupcnt, myname, groups) + |
---|
32 | search_group(node->rightson, hash, groupcnt, myname, groups); |
---|
33 | |
---|
34 | if (!groupname) { // we are not a sub group |
---|
35 | if (myname) { // but we are a group |
---|
36 | if (erg>0) { // it is used |
---|
37 | groups[groupcnt] = new DI_ENTRY(myname, this); |
---|
38 | groupcnt++; |
---|
39 | } |
---|
40 | } |
---|
41 | return 0; |
---|
42 | } |
---|
43 | else { |
---|
44 | return erg; // We are using an inherited group |
---|
45 | } |
---|
46 | } |
---|
47 | |
---|
48 | char *DI_MATRIX::compress(GBT_TREE *tree) { |
---|
49 | // create a hash table of species |
---|
50 | GB_HASH *hash = GBS_create_hash(nentries, GB_IGNORE_CASE); |
---|
51 | char *error = 0; |
---|
52 | for (size_t i=0; i<nentries; i++) { |
---|
53 | if (entries[i]->name) { |
---|
54 | GBS_write_hash(hash, entries[i]->name, (long)entries[i]); |
---|
55 | } |
---|
56 | entries[i]->group_nr = -1; |
---|
57 | } |
---|
58 | |
---|
59 | size_t groupcnt = 0; |
---|
60 | DI_ENTRY **groups = new DI_ENTRY *[nentries]; |
---|
61 | search_group(tree, hash, groupcnt, 0, groups); // search a group for all species and create groups |
---|
62 | |
---|
63 | DI_ENTRY **found_groups = 0; |
---|
64 | |
---|
65 | if (groupcnt) { // if we found groups => make copy of group array |
---|
66 | found_groups = new DI_ENTRY *[groupcnt]; |
---|
67 | memcpy(found_groups, groups, groupcnt*sizeof(*groups)); |
---|
68 | } |
---|
69 | |
---|
70 | int nongroupcnt = 0; // count # of species NOT in groups and copy them to 'groups' |
---|
71 | for (size_t i=0; i<nentries; i++) { |
---|
72 | if (entries[i]->name && entries[i]->group_nr == -1) { // species not in groups |
---|
73 | groups[nongroupcnt] = new DI_ENTRY(entries[i]->name, this); |
---|
74 | entries[i]->group_nr = nongroupcnt++; |
---|
75 | } |
---|
76 | else { // species is in group => add nentries to group_nr |
---|
77 | entries[i]->group_nr = entries[i]->group_nr + nentries; |
---|
78 | } |
---|
79 | } |
---|
80 | |
---|
81 | // append groups to end of 'groups' |
---|
82 | if (groupcnt) { |
---|
83 | memcpy(groups+nongroupcnt, found_groups, groupcnt*sizeof(*groups)); // copy groups to end of 'groups' |
---|
84 | delete [] found_groups; |
---|
85 | found_groups = 0; |
---|
86 | |
---|
87 | for (size_t i=0; i<nentries; i++) { |
---|
88 | if (entries[i]->name && entries[i]->group_nr>=int(nentries)) { |
---|
89 | entries[i]->group_nr = entries[i]->group_nr - nentries + nongroupcnt; // correct group_nr's for species in groups |
---|
90 | } |
---|
91 | } |
---|
92 | } |
---|
93 | |
---|
94 | groupcnt += nongroupcnt; // now groupcnt is # of groups + # of non-group-species |
---|
95 | |
---|
96 | AP_smatrix count(groupcnt); |
---|
97 | AP_smatrix *sum = new AP_smatrix(groupcnt); |
---|
98 | |
---|
99 | // Now we have create a new DI_ENTRY table, let's do the matrix |
---|
100 | for (size_t i=0; i<nentries; i++) { |
---|
101 | for (size_t j=0; j<=i; j++) { |
---|
102 | int x = entries[i]->group_nr; if (x<0) continue; |
---|
103 | int y = entries[j]->group_nr; if (y<0) continue; |
---|
104 | // x,y are the destination i,j |
---|
105 | count.set(x, y, count.get(x, y)+1.0); |
---|
106 | sum->set(x, y, sum->get(x, y)+matrix->get(i, j)); |
---|
107 | } |
---|
108 | } |
---|
109 | |
---|
110 | for (size_t i=0; i<groupcnt; i++) { // get the arithmetic average |
---|
111 | for (size_t j=0; j<=i; j++) { |
---|
112 | AP_FLOAT c = count.get(i, j); |
---|
113 | if (c > 0) { |
---|
114 | sum->set(i, j, sum->get(i, j) / c); |
---|
115 | } |
---|
116 | } |
---|
117 | } |
---|
118 | delete matrix; |
---|
119 | matrix = sum; |
---|
120 | |
---|
121 | for (size_t i=0; i<nentries; i++) { // delete everything |
---|
122 | delete entries[i]; |
---|
123 | entries[i] = 0; |
---|
124 | } |
---|
125 | free(entries); |
---|
126 | |
---|
127 | entries = groups; |
---|
128 | nentries = groupcnt; |
---|
129 | entries_mem_size = groupcnt; |
---|
130 | matrix_type = DI_MATRIX_COMPRESSED; |
---|
131 | |
---|
132 | GBS_free_hash(hash); |
---|
133 | return error; |
---|
134 | } |
---|