1 | #include <stdio.h> |
---|
2 | #include <stdlib.h> |
---|
3 | #include <string.h> |
---|
4 | #include <malloc.h> |
---|
5 | #include <memory.h> |
---|
6 | |
---|
7 | #include <arbdb.h> |
---|
8 | #include <arbdbt.h> |
---|
9 | |
---|
10 | #include "cat_tree.hxx" |
---|
11 | |
---|
12 | |
---|
13 | const char *CAT_field_names[CAT_FIELD_MAX]; // Fields which are extracted from the arb database |
---|
14 | // syntax: field[:word/(range of words] |
---|
15 | // ex full_name |
---|
16 | |
---|
17 | double CATL_tree_max_deep(GBT_TREE *tree){ |
---|
18 | if (tree->is_leaf) return 0.0; |
---|
19 | double l,r; |
---|
20 | l = CATL_tree_max_deep(tree->leftson); |
---|
21 | r = CATL_tree_max_deep(tree->rightson); |
---|
22 | if (tree->leftlen<0) tree->leftlen = 0; |
---|
23 | if (tree->rightlen<0) tree->rightlen = 0; |
---|
24 | l += tree->leftlen; |
---|
25 | r += tree->rightlen; |
---|
26 | if (l>r) return l; |
---|
27 | return r; |
---|
28 | } |
---|
29 | |
---|
30 | int CATL_size_of_tree(GBT_TREE *tree){ |
---|
31 | if (tree->is_leaf) return 1; |
---|
32 | return CATL_size_of_tree(tree->leftson) + |
---|
33 | CATL_size_of_tree(tree->rightson) +1; |
---|
34 | } |
---|
35 | |
---|
36 | |
---|
37 | CAT_tree *new_CAT_tree(int size) |
---|
38 | { |
---|
39 | CAT_tree *THIS = (CAT_tree *)calloc(sizeof(CAT_tree) + sizeof(CAT_node) * (size-1),1); |
---|
40 | |
---|
41 | CAT_field_names[CAT_FIELD_NAME] = "name"; |
---|
42 | CAT_field_names[CAT_FIELD_FULL_NAME] = "full_name"; |
---|
43 | CAT_field_names[CAT_FIELD_GROUP_NAME] = "group_name"; |
---|
44 | CAT_field_names[CAT_FIELD_ACC] = "acc"; |
---|
45 | CAT_field_names[CAT_FIELD_STRAIN] = "strain"; |
---|
46 | return THIS; |
---|
47 | } |
---|
48 | |
---|
49 | |
---|
50 | void *cat_mem_files[CAT_FIELD_MAX]; |
---|
51 | |
---|
52 | |
---|
53 | GB_ERROR cat_write_cat_tree(CAT_tree *cat_tree, FILE *out){ |
---|
54 | char *cat_strings[CAT_FIELD_MAX]; |
---|
55 | long cat_offsets[CAT_FIELD_MAX]; |
---|
56 | long cat_sizes[CAT_FIELD_MAX]; |
---|
57 | int i,j,node; |
---|
58 | for (i=0;i<CAT_FIELD_MAX;i++) cat_strings[i] = GBS_strclose(cat_mem_files[i],0); |
---|
59 | long sizeoftree = sizeof(CAT_tree) + sizeof(CAT_node) * (cat_tree->nnodes-1); |
---|
60 | long offset = sizeoftree; |
---|
61 | for (i=0;i<CAT_FIELD_MAX;i++){ |
---|
62 | int len = strlen(cat_strings[i])+1; |
---|
63 | cat_sizes[i] = len; |
---|
64 | // change '^A' seperated strings to \0 sep. strings |
---|
65 | for (j=0;j<len;j++) if (cat_strings[i][j] == 1) cat_strings[i][j] = 0; |
---|
66 | cat_offsets[i] = offset; |
---|
67 | offset += len; |
---|
68 | } |
---|
69 | |
---|
70 | for (node = 0; node < cat_tree->nnodes; node++) { |
---|
71 | for (i=0;i<CAT_FIELD_MAX;i++) { |
---|
72 | if ( cat_tree->nodes[node].field_offsets[i] == 0) continue; |
---|
73 | // no field |
---|
74 | cat_tree->nodes[node].field_offsets[i] += cat_offsets[i]; |
---|
75 | } |
---|
76 | } |
---|
77 | |
---|
78 | if ( !fwrite( (char *)cat_tree,(int)(sizeoftree), 1, out) ) { |
---|
79 | return GB_export_error("Disk Full"); |
---|
80 | } |
---|
81 | |
---|
82 | |
---|
83 | for (i=0;i<CAT_FIELD_MAX;i++){ |
---|
84 | if (!fwrite( cat_strings[i], (int)cat_sizes[i], 1, out) ) { |
---|
85 | return GB_export_error("Disk Full"); |
---|
86 | } |
---|
87 | } |
---|
88 | return 0; |
---|
89 | } |
---|
90 | |
---|
91 | long cat_convert_tree_2_cat_rek(GBT_TREE *tree, CAT_tree *cat_tree, int deep, double scale, |
---|
92 | int *level_counter, int& leaf_counter, int& node_counter){ |
---|
93 | int nodec = node_counter++; |
---|
94 | CAT_node *node = & cat_tree->nodes[nodec]; |
---|
95 | node->deep = deep; |
---|
96 | if (tree->is_leaf) { |
---|
97 | node->numbering[CAT_NUMBERING_LEAFS_ONLY] = leaf_counter++; |
---|
98 | node->numbering[CAT_NUMBERING_LEVELS] = node->numbering[CAT_NUMBERING_LEAFS_ONLY] + cat_tree->nnodes/2; |
---|
99 | }else{ |
---|
100 | node->numbering[CAT_NUMBERING_LEVELS] = level_counter[deep]; |
---|
101 | level_counter[deep]++; |
---|
102 | |
---|
103 | node->leftson = cat_convert_tree_2_cat_rek(tree->leftson,cat_tree,deep+1,scale, |
---|
104 | level_counter,leaf_counter,node_counter); |
---|
105 | node->rightson = cat_convert_tree_2_cat_rek(tree->rightson,cat_tree,deep+1,scale, |
---|
106 | level_counter,leaf_counter,node_counter); |
---|
107 | cat_tree->nodes[node->leftson].father = nodec; |
---|
108 | cat_tree->nodes[node->rightson].father = nodec; |
---|
109 | |
---|
110 | double l = tree->leftlen; |
---|
111 | double r = tree->rightlen; |
---|
112 | |
---|
113 | if (l<0) l = 0.0; |
---|
114 | if (r<0) r = 0.0; |
---|
115 | |
---|
116 | cat_tree->nodes[node->leftson].branch_length_float = l; |
---|
117 | cat_tree->nodes[node->rightson].branch_length_float = r; |
---|
118 | |
---|
119 | l *= scale; |
---|
120 | r *= scale; |
---|
121 | if (l>0 && l<1) l = 1; |
---|
122 | if (r>0 && r<1) r = 1; |
---|
123 | |
---|
124 | if (l> 255) l = 255; |
---|
125 | if (r > 255) l = 255; |
---|
126 | |
---|
127 | cat_tree->nodes[node->leftson].branch_length_byte = (unsigned char)l; |
---|
128 | cat_tree->nodes[node->rightson].branch_length_byte = (unsigned char)r; |
---|
129 | } |
---|
130 | if (tree->gb_node) { |
---|
131 | int i; |
---|
132 | const char *f; |
---|
133 | GBDATA *gbd; |
---|
134 | int start = 0; |
---|
135 | int end = 1000000; |
---|
136 | for (i=0;i<CAT_FIELD_MAX;i++) { |
---|
137 | start = 0; |
---|
138 | end = 1000000; |
---|
139 | f = CAT_field_names[i]; |
---|
140 | gbd = GB_find(tree->gb_node,f,0,down_level); |
---|
141 | if (gbd){ |
---|
142 | char * s= GB_read_as_string(gbd); |
---|
143 | if (s && strlen(s)){ // append text to output stream |
---|
144 | node->field_offsets[i] = GBS_memoffset(cat_mem_files[i]); |
---|
145 | GBS_strcat(cat_mem_files[i],s); |
---|
146 | GBS_chrcat(cat_mem_files[i],1); // seperated by '^A' |
---|
147 | delete s; |
---|
148 | } |
---|
149 | } |
---|
150 | } |
---|
151 | gbd = GB_find(tree->gb_node,"grouped",0,down_level); |
---|
152 | if (gbd) { |
---|
153 | node->is_grouped_in = GB_read_byte(gbd); |
---|
154 | } |
---|
155 | node->color_in = T2J_COLOR_NO - GB_read_flag(tree->gb_node); |
---|
156 | }else{ |
---|
157 | if (tree->name){ |
---|
158 | if (tree->is_leaf){ |
---|
159 | char *name = strdup(tree->name); |
---|
160 | char *fullname = strchr(name,'#'); |
---|
161 | if (fullname){ |
---|
162 | *(fullname++) = 0; |
---|
163 | }else{ |
---|
164 | fullname = name; |
---|
165 | } |
---|
166 | |
---|
167 | node->field_offsets[CAT_FIELD_NAME] = GBS_memoffset(cat_mem_files[CAT_FIELD_NAME]); |
---|
168 | GBS_strcat(cat_mem_files[CAT_FIELD_NAME],name); |
---|
169 | GBS_chrcat(cat_mem_files[CAT_FIELD_NAME],1); // seperated by '^A' |
---|
170 | |
---|
171 | node->field_offsets[CAT_FIELD_FULL_NAME] = GBS_memoffset(cat_mem_files[CAT_FIELD_FULL_NAME]); |
---|
172 | GBS_strcat(cat_mem_files[CAT_FIELD_FULL_NAME],fullname); |
---|
173 | GBS_chrcat(cat_mem_files[CAT_FIELD_FULL_NAME],1);// seperated by '^A' |
---|
174 | delete name; |
---|
175 | }else{ |
---|
176 | node->field_offsets[CAT_FIELD_GROUP_NAME] = GBS_memoffset(cat_mem_files[CAT_FIELD_GROUP_NAME]); |
---|
177 | GBS_strcat(cat_mem_files[CAT_FIELD_GROUP_NAME],tree->name); |
---|
178 | GBS_chrcat(cat_mem_files[CAT_FIELD_GROUP_NAME],1); // seperated by '^A' |
---|
179 | } |
---|
180 | }else{ |
---|
181 | node->color_in = T2J_COLOR_UNKNOWN; |
---|
182 | } |
---|
183 | } |
---|
184 | return nodec; |
---|
185 | } |
---|
186 | |
---|
187 | /** calculates the leafes for all inner nodes */ |
---|
188 | long calcnleafs(CAT_tree *cat_tree, int nodenr){ |
---|
189 | if (cat_tree->nodes[nodenr].leftson) { |
---|
190 | int sum = calcnleafs(cat_tree, cat_tree->nodes[nodenr].leftson) + |
---|
191 | calcnleafs(cat_tree, cat_tree->nodes[nodenr].rightson); |
---|
192 | cat_tree->nodes[nodenr].nleafs = sum; |
---|
193 | return sum; |
---|
194 | } |
---|
195 | cat_tree->nodes[nodenr].nleafs = 1; |
---|
196 | return 1; |
---|
197 | } |
---|
198 | |
---|
199 | CAT_tree *convert_GBT_tree_2_CAT_tree(GBT_TREE *tree) { |
---|
200 | CAT_tree *cat_tree; |
---|
201 | int size_of_tree = CATL_size_of_tree(tree); |
---|
202 | cat_tree = new_CAT_tree(size_of_tree); |
---|
203 | cat_tree->nnodes = size_of_tree; |
---|
204 | int leaf_counter = 0; |
---|
205 | int node_counter = 0; |
---|
206 | int *level_counter = (int *)calloc(sizeof(int),size_of_tree); |
---|
207 | |
---|
208 | double scale = 255/CATL_tree_max_deep(tree); |
---|
209 | cat_convert_tree_2_cat_rek(tree, cat_tree, 0,scale, |
---|
210 | level_counter, leaf_counter, node_counter); |
---|
211 | int i; |
---|
212 | for (i = 1; i < size_of_tree; i++) level_counter[i] += level_counter[i-1]; |
---|
213 | for (i = 0; i < size_of_tree; i++) { |
---|
214 | CAT_node *node = & cat_tree->nodes[i]; |
---|
215 | if (node->deep && node->leftson){ |
---|
216 | node->numbering[CAT_NUMBERING_LEVELS] += level_counter[node->deep-1]; |
---|
217 | } |
---|
218 | } |
---|
219 | calcnleafs(cat_tree,0); |
---|
220 | |
---|
221 | delete level_counter; |
---|
222 | return cat_tree; |
---|
223 | } |
---|
224 | |
---|
225 | void cat_debug_tree(CAT_tree *cat_tree){ |
---|
226 | int i; |
---|
227 | for (i=0;i< cat_tree->nnodes; i++){ |
---|
228 | CAT_node *node = & cat_tree->nodes[i]; |
---|
229 | printf("%4i f%4li l%4li r%4li lfn%4li lvl%4li d%4li bl%4i ", |
---|
230 | i, |
---|
231 | node->father, |
---|
232 | node->leftson, |
---|
233 | node->rightson, |
---|
234 | node->numbering[CAT_NUMBERING_LEAFS_ONLY], |
---|
235 | node->numbering[CAT_NUMBERING_LEVELS], |
---|
236 | node->deep, |
---|
237 | (int)node->branch_length_byte); |
---|
238 | int j; |
---|
239 | for (j=0;j<CAT_FIELD_MAX;j++){ |
---|
240 | if (node->field_offsets[j]) { |
---|
241 | printf("%15s ",cat_tree->data+node->field_offsets[j]); |
---|
242 | } |
---|
243 | } |
---|
244 | printf("\n"); |
---|
245 | } |
---|
246 | } |
---|
247 | |
---|
248 | GB_ERROR create_and_save_CAT_tree(GBT_TREE *tree, const char *path){ |
---|
249 | GB_ERROR error = 0; |
---|
250 | int i; |
---|
251 | for (i = 0; i < CAT_FIELD_MAX; i++){ |
---|
252 | cat_mem_files[i] = GBS_stropen(CAT_INIT_STRING_FILE_SIZE); |
---|
253 | GBS_chrcat(cat_mem_files[i],1); // write 1 byte so that all further |
---|
254 | // entries get a non zero offset |
---|
255 | } |
---|
256 | CAT_tree *cat_tree = convert_GBT_tree_2_CAT_tree(tree); |
---|
257 | if (!cat_tree) return GB_get_error(); |
---|
258 | FILE *out = fopen(path,"w"); |
---|
259 | if (!out) return GB_export_error("Sorry cannot write to file '%s'",path); |
---|
260 | error = cat_write_cat_tree(cat_tree,out); |
---|
261 | fclose(out); |
---|
262 | return error; |
---|
263 | |
---|
264 | } |
---|
265 | |
---|
266 | GB_ERROR create_and_save_CAT_tree(GBDATA *gb_main, const char *tree_name, const char *path){ |
---|
267 | GB_transaction dummy(gb_main); |
---|
268 | |
---|
269 | GBT_TREE *tree = GBT_read_tree(gb_main,tree_name, sizeof(GBT_TREE) ); |
---|
270 | if (!tree) return GB_get_error(); |
---|
271 | GB_ERROR error = GBT_link_tree(tree,gb_main,GB_FALSE); |
---|
272 | if (error) return error; |
---|
273 | error = create_and_save_CAT_tree(tree,path); |
---|
274 | return error; |
---|
275 | } |
---|