1 | #ifndef ARB_ASSERT_H |
---|
2 | #include <arb_assert.h> |
---|
3 | #endif |
---|
4 | #define st_assert(bed) arb_assert(bed) |
---|
5 | |
---|
6 | enum AWT_dna_base { |
---|
7 | ST_A, |
---|
8 | ST_C, |
---|
9 | ST_G, |
---|
10 | ST_T, |
---|
11 | ST_GAP, |
---|
12 | ST_MAX_BASE, |
---|
13 | ST_UNKNOWN = -1 |
---|
14 | }; |
---|
15 | |
---|
16 | extern class AWT_dna_table { |
---|
17 | char char_to_enum_table[256]; |
---|
18 | public: |
---|
19 | AWT_dna_base char_to_enum(char i) { |
---|
20 | return (AWT_dna_base)char_to_enum_table[(unsigned char)i]; |
---|
21 | } |
---|
22 | AWT_dna_table(); |
---|
23 | } awt_dna_table; |
---|
24 | |
---|
25 | typedef unsigned char ST_ML_Color; |
---|
26 | |
---|
27 | const int ST_MAX_SEQ_PART = 256; |
---|
28 | // should be greater than the editor width |
---|
29 | // otherwise extrem performance penalties |
---|
30 | const int ST_BUCKET_SIZE = 16; |
---|
31 | // at minumum ST_BUCKET_SIZE characters are calculated per call |
---|
32 | const int LD_BUCKET_SIZE = 4; // log dualis of ST_BUCKET_SIZE |
---|
33 | |
---|
34 | class ST_base_vector { |
---|
35 | public: |
---|
36 | float b[ST_MAX_BASE]; // acgt- |
---|
37 | int ld_lik; |
---|
38 | float lik; // likelihood = 2^ld_lik * lik * (b[0] + b[1] + b[2] ..) |
---|
39 | void set(char base, ST_base_vector *frequencies); |
---|
40 | inline void mult(ST_base_vector *other); |
---|
41 | void check_overflow(); |
---|
42 | void print(); |
---|
43 | }; |
---|
44 | |
---|
45 | class ST_rate_matrix { |
---|
46 | float m[ST_MAX_BASE][ST_MAX_BASE]; |
---|
47 | public: |
---|
48 | void set(double dist, double TT_ratio); |
---|
49 | inline void mult(ST_base_vector *in, ST_base_vector *out); |
---|
50 | void print(); |
---|
51 | }; |
---|
52 | |
---|
53 | class ST_ML; |
---|
54 | class AWT_csp; |
---|
55 | |
---|
56 | /** Note: Because we have only limited memory we split the |
---|
57 | sequence into ST_MAX_SEQ_PART long parts */ |
---|
58 | class ST_sequence_ml : private AP_sequence { |
---|
59 | friend class ST_ML; |
---|
60 | public: |
---|
61 | |
---|
62 | GBDATA *gb_data; // the sequence |
---|
63 | static ST_base_vector *tmp_out; // len = alignment length |
---|
64 | |
---|
65 | protected: |
---|
66 | |
---|
67 | ST_ML *st_ml; // link to a global ST object |
---|
68 | ST_base_vector *sequence; // A part of the sequence |
---|
69 | int last_updated; |
---|
70 | ST_ML_Color *color_out; |
---|
71 | int *color_out_valid_till; // color_out is valid up to |
---|
72 | |
---|
73 | public: |
---|
74 | void delete_sequence(); // remove link to database |
---|
75 | void sequence_change(); // sequence has changed in db |
---|
76 | AP_FLOAT combine(const AP_sequence* lefts, const AP_sequence *rights); |
---|
77 | void partial_match(const AP_sequence* part, long *overlap, long *penalty) const; |
---|
78 | ST_sequence_ml(AP_tree_root *rooti, ST_ML *st_ml); |
---|
79 | ~ST_sequence_ml(); |
---|
80 | AP_sequence *dup(void); |
---|
81 | |
---|
82 | void set(const char *sequence); |
---|
83 | void set_gb(GBDATA *gbd); |
---|
84 | |
---|
85 | void set_sequence(); // start at st_ml->base |
---|
86 | |
---|
87 | void go(const ST_sequence_ml *lefts, double leftl, |
---|
88 | const ST_sequence_ml *rights, double rightl); |
---|
89 | void ungo(); // undo go |
---|
90 | |
---|
91 | void calc_out(ST_sequence_ml *sequence_of_brother, double dist); |
---|
92 | void print(); |
---|
93 | }; |
---|
94 | |
---|
95 | class AW_window; |
---|
96 | typedef void (*AW_CB0)(AW_window*); |
---|
97 | |
---|
98 | class ST_ML { |
---|
99 | char *alignment_name; |
---|
100 | friend AP_tree *st_ml_convert_species_name_to_node(ST_ML *st_ml, |
---|
101 | const char *species_name); |
---|
102 | GB_HASH *hash_2_ap_tree; // hash table to get from name to tree_node |
---|
103 | GB_HASH *keep_species_hash; // temporary hash to find |
---|
104 | int refresh_n; |
---|
105 | int *not_valid; // which columns are valid |
---|
106 | |
---|
107 | ST_sequence_ml *do_tree(AP_tree *node); |
---|
108 | void undo_tree(AP_tree *node); //opposite of do_tree |
---|
109 | void insert_tree_into_hash_rek(AP_tree *node); |
---|
110 | void create_matrizes(double max_disti, int nmatrizes); |
---|
111 | void create_frequencies(); |
---|
112 | static long delete_species(const char *key, long val); |
---|
113 | public: |
---|
114 | AP_tree_root *tree_root; |
---|
115 | int latest_modification; // last mod; |
---|
116 | int base; |
---|
117 | int to; |
---|
118 | AW_CB0 refresh_func; |
---|
119 | AW_window *aw_window; |
---|
120 | |
---|
121 | GBDATA *gb_main; |
---|
122 | float *ttratio; // column independent |
---|
123 | ST_base_vector *base_frequencies; // column independent |
---|
124 | ST_base_vector *inv_base_frequencies; // column independent |
---|
125 | float *rates; // column independent |
---|
126 | double max_dist; // max_dist for rate_matrizes |
---|
127 | double step_size; // max_dist/step_size matrizes |
---|
128 | int max_matr; |
---|
129 | ST_rate_matrix *rate_matrizes; // for each distance a new matrix |
---|
130 | long alignment_len; |
---|
131 | AWT_csp *awt_csp; |
---|
132 | void set_modified(int *what = 0); |
---|
133 | void set_refresh(); // set flag for refresh |
---|
134 | |
---|
135 | ~ST_ML(); |
---|
136 | ST_ML(GBDATA *gb_main); |
---|
137 | void print(); |
---|
138 | int is_inited; |
---|
139 | |
---|
140 | GB_ERROR init(const char *tree_name, const char *alignment_name, |
---|
141 | const char *species_names, int marked_only, |
---|
142 | const char *filter_string, AWT_csp *awt_csp); |
---|
143 | // species_names is 0 -> all [marked] species (else species_names is a (char)1 seperated list of species) |
---|
144 | // filter_string==0 -> no filter |
---|
145 | |
---|
146 | void clear_all(); // delete all caches |
---|
147 | |
---|
148 | |
---|
149 | ST_sequence_ml *get_ml_vectors(char *species_name, AP_tree *node, |
---|
150 | int start_ali_pos, int end_ali_pos); |
---|
151 | ST_ML_Color *get_color_string(char *species_name, AP_tree *node, |
---|
152 | int start_ali_pos, int end_ali_pos); |
---|
153 | |
---|
154 | int update_ml_likelihood(char *result[4], int *latest_update, |
---|
155 | char *species_name, AP_tree *node); |
---|
156 | |
---|
157 | int refresh_needed(); |
---|
158 | }; |
---|