1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : distanalyse.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include "di_matr.hxx" |
---|
12 | #include <AP_seq_dna.hxx> |
---|
13 | #include <AP_filter.hxx> |
---|
14 | #include <aw_awar.hxx> |
---|
15 | #include <aw_msg.hxx> |
---|
16 | #include <aw_root.hxx> |
---|
17 | #include <algorithm> |
---|
18 | |
---|
19 | using std::min; |
---|
20 | using std::max; |
---|
21 | |
---|
22 | void DI_MATRIX::analyse() { |
---|
23 | if (is_AA) { |
---|
24 | if (nentries> 100) { |
---|
25 | aw_message("A lot of sequences!\n ==> fast Kimura selected! (instead of PAM)"); |
---|
26 | aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_KIMURA); |
---|
27 | } |
---|
28 | else { |
---|
29 | aw_message("Only limited number of sequences!\n" |
---|
30 | " ==> slow PAM selected! (instead of Kimura)"); |
---|
31 | aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_PAM); |
---|
32 | } |
---|
33 | } |
---|
34 | else { |
---|
35 | long mean_len = 0; |
---|
36 | float min_gc = 9999.9; |
---|
37 | float max_gc = 0.0; |
---|
38 | long min_len = 9999999; |
---|
39 | long max_len = 0; |
---|
40 | |
---|
41 | // calculate meanvalue of sequencelength: |
---|
42 | for (size_t row=0; row<nentries; row++) { |
---|
43 | const char *sequ = entries[row]->sequence_parsimony->get_sequence(); |
---|
44 | size_t flen = aliview->get_length(); |
---|
45 | |
---|
46 | long act_gci = 0; |
---|
47 | long act_len = 0; |
---|
48 | |
---|
49 | for (size_t pos=0; pos<flen; pos++) { |
---|
50 | char ch = sequ[pos]; |
---|
51 | if (ch == AP_C || ch == AP_G) act_gci++; |
---|
52 | if (ch == AP_A || ch == AP_C || ch == AP_G || ch == AP_T) act_len++; |
---|
53 | } |
---|
54 | |
---|
55 | mean_len += act_len; |
---|
56 | |
---|
57 | float act_gc = ((float) act_gci) / act_len; |
---|
58 | |
---|
59 | min_gc = min(min_gc, act_gc); |
---|
60 | max_gc = max(max_gc, act_gc); |
---|
61 | |
---|
62 | min_len = min(min_len, act_len); |
---|
63 | max_len = max(max_len, act_len); |
---|
64 | } |
---|
65 | |
---|
66 | if (min_len * 1.3 < max_len) { |
---|
67 | aw_message("Warning: The length of sequences differs significantly!\n" |
---|
68 | " Be careful: Neighbour Joining is sensitive to\n" |
---|
69 | " this kind of \"error\""); |
---|
70 | } |
---|
71 | mean_len /= nentries; |
---|
72 | |
---|
73 | if (mean_len < 100) { |
---|
74 | aw_message("Too short sequences!\n ==> No correction selected!"); |
---|
75 | aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_NONE); |
---|
76 | } |
---|
77 | else if (mean_len < 300) { |
---|
78 | aw_message("Meanlength shorter than 300\n ==> Jukes Cantor selected!"); |
---|
79 | aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_JUKES_CANTOR); |
---|
80 | } |
---|
81 | else if ((mean_len < 1000) || ((max_gc / min_gc) < 1.2)) { |
---|
82 | const char *reason; |
---|
83 | if (mean_len < 1000) reason = "Sequences are too short for Olsen!"; |
---|
84 | else reason = GBS_global_string("Maximal GC (%f) : Minimal GC (%f) < 1.2", max_gc, min_gc); |
---|
85 | |
---|
86 | reason = GBS_global_string("%s ==> Felsenstein selected!", reason); |
---|
87 | aw_message(reason); |
---|
88 | aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_FELSENSTEIN); |
---|
89 | } |
---|
90 | else { |
---|
91 | aw_message("Olsen selected!"); |
---|
92 | aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_OLSEN); |
---|
93 | } |
---|
94 | } |
---|
95 | } |
---|