1 | // =========================================================== // |
---|
2 | // // |
---|
3 | // File : awt_tree.hxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =========================================================== // |
---|
10 | |
---|
11 | #ifndef AWT_TREE_HXX |
---|
12 | #define AWT_TREE_HXX |
---|
13 | |
---|
14 | #define AP_F_LOADED ((AW_active)1) |
---|
15 | #define AP_F_NLOADED ((AW_active)2) |
---|
16 | #define AP_F_SEQUENCES ((AW_active)4) |
---|
17 | #define AP_F_MATRIX ((AW_active)8) |
---|
18 | #define AP_F_TREE ((AW_active)16) |
---|
19 | #define AP_F_ALL ((AW_active)-1) |
---|
20 | |
---|
21 | #define GROUPED_SUM 2 // min. no of species in a group which should be drawn as box |
---|
22 | |
---|
23 | class AW_window; |
---|
24 | class AW_root; |
---|
25 | |
---|
26 | #ifndef AWT_PRO_A_NUCS_HXX |
---|
27 | #include <awt_pro_a_nucs.hxx> |
---|
28 | #endif |
---|
29 | #ifndef AW_COLOR_GROUPS_HXX |
---|
30 | #include <aw_color_groups.hxx> |
---|
31 | #endif |
---|
32 | #ifndef ARBDBT_H |
---|
33 | #include <arbdbt.h> |
---|
34 | #endif |
---|
35 | |
---|
36 | typedef unsigned char uchar; |
---|
37 | enum { |
---|
38 | AWT_GC_CURSOR=0, |
---|
39 | AWT_GC_BRANCH_REMARK, |
---|
40 | AWT_GC_BOOTSTRAP, |
---|
41 | AWT_GC_BOOTSTRAP_LIMITED, |
---|
42 | AWT_GC_GROUPS, |
---|
43 | AWT_GC_SELECTED, // == zero mismatches |
---|
44 | AWT_GC_UNDIFF, |
---|
45 | AWT_GC_NSELECTED, // no hit |
---|
46 | AWT_GC_SOME_MISMATCHES, |
---|
47 | |
---|
48 | // for multiprobecoloring |
---|
49 | |
---|
50 | AWT_GC_BLACK, AWT_GC_YELLOW, |
---|
51 | AWT_GC_RED, AWT_GC_MAGENTA, |
---|
52 | AWT_GC_GREEN, AWT_GC_CYAN, |
---|
53 | AWT_GC_BLUE, AWT_GC_WHITE, |
---|
54 | |
---|
55 | AWT_GC_FIRST_COLOR_GROUP, |
---|
56 | AWT_GC_MAX = AWT_GC_FIRST_COLOR_GROUP+AW_COLOR_GROUPS |
---|
57 | }; // AW_gc |
---|
58 | |
---|
59 | typedef enum { |
---|
60 | NOTHING=0, // nothing to buffer in AP_tree node |
---|
61 | STRUCTURE=1, // only structure |
---|
62 | SEQUENCE=2, // only sequence |
---|
63 | BOTH=3, // sequence & treestructure is buffered |
---|
64 | ROOT=7 // old root is buffered |
---|
65 | } AP_STACK_MODE; |
---|
66 | |
---|
67 | typedef enum { |
---|
68 | AP_FALSE, |
---|
69 | AP_TRUE |
---|
70 | } AP_BOOL; |
---|
71 | |
---|
72 | typedef enum { |
---|
73 | AP_UPDATE_OK = 0, |
---|
74 | AP_UPDATE_RELINKED = -1, |
---|
75 | AP_UPDATE_RELOADED = 1, |
---|
76 | AP_UPDATE_ERROR = 2 |
---|
77 | } AP_UPDATE_FLAGS; |
---|
78 | |
---|
79 | typedef enum { // flags zum kennzeichnen von knoten |
---|
80 | AP_LEFT, |
---|
81 | AP_RIGHT, |
---|
82 | AP_FATHER, |
---|
83 | AP_LEFTSON, |
---|
84 | AP_RIGHTSON, |
---|
85 | AP_NONE |
---|
86 | } AP_TREE_SIDE; |
---|
87 | |
---|
88 | typedef enum { // bit flags |
---|
89 | AWT_REMOVE_MARKED = GBT_REMOVE_MARKED, |
---|
90 | AWT_REMOVE_NOT_MARKED = GBT_REMOVE_NOT_MARKED, |
---|
91 | AWT_REMOVE_DELETED = GBT_REMOVE_DELETED, |
---|
92 | AWT_REMOVE_NO_SEQUENCE = 8, |
---|
93 | AWT_REMOVE_BUT_DONT_FREE = 16 |
---|
94 | |
---|
95 | // please keep AWT_REMOVE_TYPE in sync with GBT_TREE_REMOVE_TYPE |
---|
96 | // see ../ARBDB/arbdbt.h@sync_GBT_TREE_REMOVE_TYPE_AWT_REMOVE_TYPE |
---|
97 | |
---|
98 | } AWT_REMOVE_TYPE; |
---|
99 | |
---|
100 | typedef double AP_FLOAT; |
---|
101 | |
---|
102 | enum AWT_FILTER_SIMPLIFY { |
---|
103 | AWT_FILTER_SIMPLIFY_NONE, |
---|
104 | AWT_FILTER_SIMPLIFY_DNA, |
---|
105 | AWT_FILTER_SIMPLIFY_PROTEIN |
---|
106 | }; |
---|
107 | |
---|
108 | class AP_filter { |
---|
109 | public: |
---|
110 | char *filter_mask; // 0 1 |
---|
111 | long filter_len; |
---|
112 | long real_len; // how many 1 |
---|
113 | long update; |
---|
114 | uchar simplify[256]; |
---|
115 | int *filterpos_2_seqpos; |
---|
116 | int *bootstrap; // if set then sizeof(bootstrap) == real_len; bootstrap[i] points to random original positions [0..filter_len] |
---|
117 | |
---|
118 | GB_ERROR init(const char *filter,const char *zerobases, long size); |
---|
119 | GB_ERROR init(long size); |
---|
120 | void calc_filter_2_seq(); |
---|
121 | void enable_bootstrap(); |
---|
122 | void enable_simplify(AWT_FILTER_SIMPLIFY type); |
---|
123 | char *to_string(); // convert to 0/1 string |
---|
124 | AP_filter(void); |
---|
125 | ~AP_filter(void); |
---|
126 | }; |
---|
127 | |
---|
128 | |
---|
129 | |
---|
130 | class AP_weights { |
---|
131 | protected: |
---|
132 | friend class AP_sequence; |
---|
133 | friend class AP_sequence_parsimony; |
---|
134 | friend class AP_sequence_protein; |
---|
135 | friend class AP_sequence_protein_old; |
---|
136 | |
---|
137 | GB_UINT4 *weights; |
---|
138 | |
---|
139 | public: |
---|
140 | |
---|
141 | long weight_len; |
---|
142 | AP_filter *filter; |
---|
143 | long update; |
---|
144 | bool dummy_weights; // if true all weights are == 1 |
---|
145 | |
---|
146 | AP_weights(void); |
---|
147 | char *init(AP_filter *fil); // init weights |
---|
148 | char *init(GB_UINT4 *w, AP_filter *fil); |
---|
149 | ~AP_weights(void); |
---|
150 | }; |
---|
151 | |
---|
152 | class AP_rates { |
---|
153 | public: |
---|
154 | AP_FLOAT *rates; |
---|
155 | long rate_len; |
---|
156 | AP_filter *filter; |
---|
157 | long update; |
---|
158 | |
---|
159 | AP_rates(void); |
---|
160 | char *init(AP_filter *fil); |
---|
161 | char *init(AP_FLOAT * ra, AP_filter *fil); |
---|
162 | ~AP_rates(void); |
---|
163 | void print(void); |
---|
164 | }; |
---|
165 | |
---|
166 | class AP_smatrix { // Symmetrical Matrix (upper triangular matrix) |
---|
167 | public: |
---|
168 | AP_FLOAT **m; // m[i][j] i<= j !!!! |
---|
169 | long size; |
---|
170 | AP_smatrix(long si); |
---|
171 | ~AP_smatrix(void); |
---|
172 | void set(long i, long j, AP_FLOAT val) { if (i>j) m[i][j] = val; else m[j][i] = val; }; |
---|
173 | AP_FLOAT get(long i, long j) { if (i>j) return m[i][j]; else return m[j][i]; }; |
---|
174 | }; |
---|
175 | |
---|
176 | class AP_matrix { // Matrix |
---|
177 | public: |
---|
178 | AP_FLOAT **m; |
---|
179 | char **x_description; // optional discription, strdupped |
---|
180 | char **y_description; |
---|
181 | long size; |
---|
182 | AP_matrix(long si); |
---|
183 | ~AP_matrix(void); |
---|
184 | void create_awars(AW_root *awr,const char *awar_prefix); |
---|
185 | void read_awars(AW_root *awr,const char *awar_prefix); |
---|
186 | void normize(); // set average non diag element to 1.0 (only for descripted elements |
---|
187 | void create_input_fields(AW_window *aww,const char *awar_prefix); |
---|
188 | void set_description(const char *xstring,const char *ystring); |
---|
189 | void set(int i, int j, AP_FLOAT val) { m[i][j] = val;}; |
---|
190 | AP_FLOAT get(int i, int j) { return m[i][j];}; |
---|
191 | }; |
---|
192 | |
---|
193 | class AP_tree_root; |
---|
194 | |
---|
195 | class AP_sequence { |
---|
196 | protected: |
---|
197 | AP_FLOAT cashed_real_len; |
---|
198 | |
---|
199 | public: |
---|
200 | AP_tree_root *root; |
---|
201 | static char *mutation_per_site; // if != 0 then mutations are set by combine |
---|
202 | static char *static_mutation_per_site[3]; // if != 0 then mutations are set by combine |
---|
203 | |
---|
204 | AP_BOOL is_set_flag; |
---|
205 | long sequence_len; |
---|
206 | long update; |
---|
207 | AP_FLOAT costs; |
---|
208 | |
---|
209 | static long global_combineCount; |
---|
210 | |
---|
211 | AP_sequence(AP_tree_root *rooti); |
---|
212 | virtual ~AP_sequence(void); |
---|
213 | |
---|
214 | virtual AP_sequence *dup(void) = 0; // used to get the real new element |
---|
215 | virtual void set_gb(GBDATA *gb_sequence ); // by default calls set((char *)) |
---|
216 | virtual void set(const char *sequence) = 0; |
---|
217 | /* seq = acgtututu */ |
---|
218 | virtual AP_FLOAT combine(const AP_sequence* lefts, const AP_sequence *rights) = 0; |
---|
219 | virtual void partial_match(const AP_sequence* part, long *overlap, long *penalty) const = 0; |
---|
220 | virtual AP_FLOAT real_len(void); |
---|
221 | }; |
---|
222 | |
---|
223 | |
---|
224 | class AP_tree; |
---|
225 | |
---|
226 | class AP_tree_root { |
---|
227 | public: |
---|
228 | GBDATA *gb_main; |
---|
229 | GBDATA *gb_tree; |
---|
230 | GBDATA *gb_tree_gone; // if all leafes have been removed by tree operations, remember 'gb_tree' here (see inform_about_changed_root) |
---|
231 | GBDATA *gb_species_data; |
---|
232 | GBDATA *gb_table_data; |
---|
233 | long tree_timer; |
---|
234 | long species_timer; |
---|
235 | long table_timer; |
---|
236 | char *tree_name; |
---|
237 | AP_tree *tree_template; |
---|
238 | AP_sequence *sequence_template; |
---|
239 | |
---|
240 | AP_filter *filter; |
---|
241 | AP_weights *weights; |
---|
242 | AP_rates *rates; |
---|
243 | AP_smatrix *matrix; |
---|
244 | |
---|
245 | AP_tree_root(GBDATA *gb_main, AP_tree *tree_proto,const char *name); |
---|
246 | void update_timers(void); // update the timer |
---|
247 | GB_BOOL is_tree_updated(void); |
---|
248 | GB_BOOL is_species_updated(void); |
---|
249 | |
---|
250 | |
---|
251 | char *(*root_changed)(void *cd, AP_tree *old, AP_tree *newroot); |
---|
252 | void *root_changed_cd; |
---|
253 | char *(*node_deleted)(void *cd, AP_tree *old); |
---|
254 | void *node_deleted_cd; |
---|
255 | |
---|
256 | AP_tree *tree; |
---|
257 | char *inform_about_changed_root( AP_tree *old, AP_tree *newroot); |
---|
258 | |
---|
259 | char *inform_about_delete( AP_tree *old); |
---|
260 | |
---|
261 | ~AP_tree_root(); |
---|
262 | }; |
---|
263 | |
---|
264 | |
---|
265 | struct AP_tree_members { |
---|
266 | public: |
---|
267 | // elements from struct a_tree_node |
---|
268 | |
---|
269 | // struct arb_flags |
---|
270 | unsigned int grouped:1; // indicates a folded group |
---|
271 | unsigned int hidden:1; // not shown because a father is a folded group |
---|
272 | unsigned int has_marked_children:1; // at least one child is marked |
---|
273 | unsigned int callback_exists:1; |
---|
274 | unsigned int gc:6; // color |
---|
275 | |
---|
276 | char left_linewidth; |
---|
277 | char right_linewidth; |
---|
278 | // struct arb_data |
---|
279 | int leave_sum; // number of leaf children of this node |
---|
280 | int view_sum; // virtual size of node for display ( isgrouped?sqrt(leave_sum):leave_sum |
---|
281 | |
---|
282 | float tree_depth; // max length of path; for drawing triangles */ |
---|
283 | float min_tree_depth; /* min length of path; for drawing triangle */ |
---|
284 | float spread; |
---|
285 | |
---|
286 | float left_angle; |
---|
287 | float right_angle; |
---|
288 | |
---|
289 | void clear() { |
---|
290 | grouped = 0; |
---|
291 | hidden = 0; |
---|
292 | has_marked_children = 0; |
---|
293 | callback_exists = 0; |
---|
294 | gc = 0; |
---|
295 | left_linewidth = 0; |
---|
296 | right_linewidth = 0; |
---|
297 | leave_sum = 0; |
---|
298 | view_sum = 0; |
---|
299 | tree_depth = 0; |
---|
300 | min_tree_depth = 0; |
---|
301 | spread = 0; |
---|
302 | left_angle = 0; |
---|
303 | right_angle = 0; |
---|
304 | } |
---|
305 | }; |
---|
306 | |
---|
307 | struct AP_branch_members { |
---|
308 | public: |
---|
309 | unsigned int kl_marked:1; // kernighan lin marked |
---|
310 | unsigned int touched:1; // nni and kl |
---|
311 | |
---|
312 | void clear() { |
---|
313 | kl_marked = 0; |
---|
314 | touched = 0; |
---|
315 | } |
---|
316 | }; |
---|
317 | |
---|
318 | |
---|
319 | |
---|
320 | class AP_tree { |
---|
321 | public: |
---|
322 | virtual ~AP_tree(void); // leave this here to force creation of virtual table |
---|
323 | private: |
---|
324 | void _load_sequences_rek(char *use,GB_BOOL set_by_gbdata, long max, long *counter) ; // uses seq->filter |
---|
325 | protected: |
---|
326 | |
---|
327 | public: |
---|
328 | GBT_TREE_ELEMENTS( AP_tree); |
---|
329 | AP_tree_members gr; |
---|
330 | AP_branch_members br; |
---|
331 | AP_FLOAT mutation_rate; |
---|
332 | unsigned long stack_level; |
---|
333 | AP_tree_root *tree_root; |
---|
334 | AP_sequence *sequence; |
---|
335 | |
---|
336 | AP_tree(AP_tree_root *tree_root); |
---|
337 | |
---|
338 | GBT_TREE *get_gbt_tree() { return (GBT_TREE*)this; } |
---|
339 | |
---|
340 | int compute_tree(GBDATA *gb_main); |
---|
341 | |
---|
342 | int arb_tree_set_leafsum_viewsum(); // count all visible leafs -> gr.viewsum + gr.leafsum |
---|
343 | int arb_tree_leafsum2();// count all leafs |
---|
344 | |
---|
345 | void calc_hidden_flag(int father_is_hidden); |
---|
346 | virtual int calc_color();// start a transaction first |
---|
347 | |
---|
348 | virtual int calc_color_probes(GB_HASH *hashptr); //new function for coloring the tree; ak |
---|
349 | |
---|
350 | GBT_LEN arb_tree_min_deep(); |
---|
351 | GBT_LEN arb_tree_deep(); |
---|
352 | |
---|
353 | void load_sequences_rek(char *use,GB_BOOL set_by_gbdata,GB_BOOL show_status) ; // uses seq->filter |
---|
354 | virtual void parsimony_rek(); |
---|
355 | |
---|
356 | virtual AP_tree *dup(void); |
---|
357 | |
---|
358 | virtual void insert(AP_tree *new_brother); |
---|
359 | virtual void remove(void); // no delete of father |
---|
360 | virtual void swap_assymetric(AP_TREE_SIDE modus); // 0 = AP_LEFT_son 1=AP_RIGHT_son |
---|
361 | void swap_sons(void); // exchange sons |
---|
362 | |
---|
363 | GB_ERROR cantMoveTo(AP_tree *new_brother); // use this to detect impossible moves |
---|
364 | virtual void moveTo(AP_tree *new_brother,AP_FLOAT rel_pos); // move to new brother |
---|
365 | |
---|
366 | virtual void set_root(void); |
---|
367 | virtual void delete_tree(void); |
---|
368 | virtual void remove_leafs(GBDATA *gb_main,int awt_remove_type); |
---|
369 | |
---|
370 | void remove_bootstrap(GBDATA *); // remove bootstrap values from subtree |
---|
371 | void reset_branchlengths(GBDATA *); // reset branchlengths of subtree to 0.1 |
---|
372 | void scale_branchlengths(GBDATA *gb_main, double factor); |
---|
373 | void bootstrap2branchlen(GBDATA *); // copy bootstraps to branchlengths |
---|
374 | void branchlen2bootstrap(GBDATA *); // copy branchlengths to bootstraps |
---|
375 | |
---|
376 | virtual void test_tree(void) const; |
---|
377 | |
---|
378 | virtual AP_FLOAT costs(void); /* cost of a tree (number of changes ..)*/ |
---|
379 | |
---|
380 | virtual AP_BOOL push(AP_STACK_MODE, unsigned long); |
---|
381 | virtual void pop(unsigned long); |
---|
382 | virtual AP_BOOL clear( unsigned long stack_update, unsigned long user_push_counter); |
---|
383 | virtual void unhash_sequence(void); |
---|
384 | |
---|
385 | void move_gbt_2_ap(GBT_TREE *tree, bool insert_remove_cb); // moves all node/leaf information from struct GBT_TREE to AP_tree |
---|
386 | void load_node_info(); // load linewidth etc |
---|
387 | |
---|
388 | GB_ERROR load(AP_tree_root *tree_static, |
---|
389 | bool link_to_database, bool insert_delete_cbs, bool show_status, |
---|
390 | int *zombies, int *duplicates) __ATTR__USERESULT; |
---|
391 | |
---|
392 | virtual GB_ERROR saveTree() __ATTR__USERESULT; |
---|
393 | GB_ERROR relink() __ATTR__USERESULT; |
---|
394 | |
---|
395 | virtual AP_UPDATE_FLAGS check_update(); |
---|
396 | |
---|
397 | virtual void update(); |
---|
398 | |
---|
399 | |
---|
400 | void buildLeafList(AP_tree **&list, long &num); // returns a list of leafs |
---|
401 | void buildNodeList(AP_tree **&list, long &num); // returns a list of nodes (leafs and internal nodes, but not root) |
---|
402 | void buildBranchList(AP_tree **&list, long &num, AP_BOOL create_terminal_branches, int deep); |
---|
403 | |
---|
404 | AP_tree **getRandomNodes(int nnodes); // returns a list of random nodes (no leafs) |
---|
405 | |
---|
406 | AP_BOOL is_son(AP_tree *father); |
---|
407 | AP_tree *brother(void) const; |
---|
408 | void set_fatherson(AP_tree *new_son); |
---|
409 | void set_fathernotson(AP_tree *new_son); |
---|
410 | |
---|
411 | virtual void clear_branch_flags(void); |
---|
412 | void touch_branch(void) { if (!father->father) father->leftson->br.touched =1 ; else this->br.touched=1; }; |
---|
413 | int get_branch_flag(void) { return (!father->father) ? father->leftson->br.touched : this->br.touched; } ; |
---|
414 | |
---|
415 | GBT_LEN get_branchlength() const { |
---|
416 | if (father) { |
---|
417 | if (father->rightson == this) return father->rightlen; |
---|
418 | return father->leftlen; |
---|
419 | } |
---|
420 | return 0; |
---|
421 | } |
---|
422 | void set_branchlength(GBT_LEN newlen) { |
---|
423 | if (father) { |
---|
424 | if (father->rightson == this) |
---|
425 | father->rightlen = newlen; |
---|
426 | else |
---|
427 | father->leftlen = newlen; |
---|
428 | } |
---|
429 | } |
---|
430 | |
---|
431 | GB_ERROR move_group_info(AP_tree *new_group) __ATTR__USERESULT; |
---|
432 | void mark_duplicates(GBDATA *gb_main); |
---|
433 | void mark_long_branches(GBDATA *gb_main, double min_rel_diff, double min_abs_diff); |
---|
434 | void mark_deep_branches(GBDATA *gb_main,int rel_depth); |
---|
435 | void mark_degenerated_branches(GBDATA *gb_main,double degeneration_factor); |
---|
436 | void justify_branch_lenghs(GBDATA *gb_main); |
---|
437 | void relink_tree(GBDATA *gb_main, void (*relinker)(GBDATA *&ref_gb_node, char *&ref_name, GB_HASH *organism_hash), GB_HASH *organism_hash); |
---|
438 | }; |
---|
439 | |
---|
440 | long AP_timer(void); |
---|
441 | |
---|
442 | #else |
---|
443 | #error awt_tree.hxx included twice |
---|
444 | #endif // AWT_TREE_HXX |
---|