source: tags/cvs_2_svn/STAT/st_ml.hxx

Last change on this file was 5330, checked in by westram, 16 years ago
  • mostly const fixes
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1#ifndef ARB_ASSERT_H
2#include <arb_assert.h>
3#endif
4#define st_assert(bed) arb_assert(bed)
5
6enum AWT_dna_base {
7    ST_A,
8    ST_C,
9    ST_G,
10    ST_T,
11    ST_GAP,
12    ST_MAX_BASE,
13    ST_UNKNOWN = -1
14};
15
16extern class AWT_dna_table {
17    char char_to_enum_table[256];
18public:
19    AWT_dna_base char_to_enum(char i) {
20        return (AWT_dna_base)char_to_enum_table[(unsigned char)i];
21    }
22    AWT_dna_table();
23} awt_dna_table;
24
25typedef unsigned char ST_ML_Color;
26
27const int ST_MAX_SEQ_PART = 256;
28// should be greater than the editor width
29// otherwise extrem performance penalties
30const int ST_BUCKET_SIZE = 16;
31// at minumum ST_BUCKET_SIZE characters are calculated per call
32const int LD_BUCKET_SIZE = 4; // log dualis of ST_BUCKET_SIZE
33
34class ST_base_vector {
35public:
36    float b[ST_MAX_BASE]; // acgt-
37    int ld_lik;
38    float lik; // likelihood  = 2^ld_lik * lik * (b[0] + b[1] + b[2] ..)
39    void set(char base, ST_base_vector *frequencies);
40    inline void mult(ST_base_vector *other);
41    void check_overflow();
42    void print();
43};
44
45class ST_rate_matrix {
46    float m[ST_MAX_BASE][ST_MAX_BASE];
47public:
48    void set(double dist, double TT_ratio);
49    inline void mult(ST_base_vector *in, ST_base_vector *out);
50    void print();
51};
52
53class ST_ML;
54class AWT_csp;
55
56/** Note: Because we have only limited memory we split the
57 sequence into ST_MAX_SEQ_PART long parts */
58class ST_sequence_ml : private AP_sequence {
59    friend class ST_ML;
60public:
61
62    GBDATA *gb_data; // the sequence
63    static ST_base_vector *tmp_out; // len = alignment length
64
65protected:
66
67    ST_ML *st_ml; // link to a global ST object
68    ST_base_vector *sequence; // A part of the sequence
69    int last_updated;
70    ST_ML_Color *color_out;
71    int *color_out_valid_till; // color_out is valid up to
72
73public:
74    void delete_sequence(); // remove link to database
75    void sequence_change(); // sequence has changed in db
76    AP_FLOAT combine(const AP_sequence* lefts, const AP_sequence *rights);
77    void partial_match(const AP_sequence* part, long *overlap, long *penalty) const;
78    ST_sequence_ml(AP_tree_root *rooti, ST_ML *st_ml);
79    ~ST_sequence_ml();
80    AP_sequence *dup(void);
81
82    void set(const char *sequence);
83    void set_gb(GBDATA *gbd);
84
85    void set_sequence(); // start at st_ml->base
86
87    void go(const ST_sequence_ml *lefts, double leftl,
88            const ST_sequence_ml *rights, double rightl);
89    void ungo(); // undo go
90
91    void calc_out(ST_sequence_ml *sequence_of_brother, double dist);
92    void print();
93};
94
95class AW_window;
96typedef void (*AW_CB0)(AW_window*);
97
98class ST_ML {
99    char *alignment_name;
100    friend AP_tree *st_ml_convert_species_name_to_node(ST_ML *st_ml,
101            const char *species_name);
102    GB_HASH *hash_2_ap_tree; // hash table to get from name to tree_node
103    GB_HASH *keep_species_hash; // temporary hash to find
104    int refresh_n;
105    int *not_valid; // which columns are valid
106
107    ST_sequence_ml *do_tree(AP_tree *node);
108    void undo_tree(AP_tree *node); //opposite of do_tree
109    void insert_tree_into_hash_rek(AP_tree *node);
110    void create_matrizes(double max_disti, int nmatrizes);
111    void create_frequencies();
112    static long delete_species(const char *key, long val);
113public:
114    AP_tree_root *tree_root;
115    int latest_modification; // last mod;
116    int base;
117    int to;
118    AW_CB0 refresh_func;
119    AW_window *aw_window;
120
121    GBDATA *gb_main;
122    float *ttratio; // column independent
123    ST_base_vector *base_frequencies; // column independent
124    ST_base_vector *inv_base_frequencies; // column independent
125    float *rates; // column independent
126    double max_dist; // max_dist for rate_matrizes
127    double step_size; // max_dist/step_size matrizes
128    int max_matr;
129    ST_rate_matrix *rate_matrizes; // for each distance a new matrix
130    long alignment_len;
131    AWT_csp *awt_csp;
132    void set_modified(int *what = 0);
133    void set_refresh(); // set flag for refresh
134
135    ~ST_ML();
136    ST_ML(GBDATA *gb_main);
137    void print();
138    int is_inited;
139
140    GB_ERROR init(const char *tree_name, const char *alignment_name,
141            const char *species_names, int marked_only,
142            const char *filter_string, AWT_csp *awt_csp);
143    // species_names is 0 -> all [marked] species (else species_names is a (char)1 seperated list of species)
144    // filter_string==0 -> no filter
145
146    void clear_all(); // delete all caches
147
148
149    ST_sequence_ml *get_ml_vectors(char *species_name, AP_tree *node,
150            int start_ali_pos, int end_ali_pos);
151    ST_ML_Color *get_color_string(char *species_name, AP_tree *node,
152            int start_ali_pos, int end_ali_pos);
153
154    int update_ml_likelihood(char *result[4], int *latest_update,
155            char *species_name, AP_tree *node);
156
157    int refresh_needed();
158};
Note: See TracBrowser for help on using the repository browser.