source: branches/profile/DIST/distanalyse.cxx

Last change on this file was 11401, checked in by westram, 10 years ago
  • reintegrates 'tree' into 'trunk':
    • consensus trees:
      • support for merging partial trees ("worked" before, but results were crap; implements #65)
      • generated trees are automatically re-rooted and -ordered
      • always list source trees in consensus-tree-comment; show info about partial trees
      • fixed progress bar
    • made GBT_TREE a base class of other tree classes (implements #31)
    • save tree properties in properties (not in DB)
    • new functions 'Remove zombies/marked from ALL trees'
    • tree load/save: layout fixes
    • unit tests
      • added tests for basic tree modifications (PARSIMONY)
    • performance:
      • compute_tree updates tree information in one traversal
      • tree generators are now capable to generate any type of tree (w/o needing to copy it once)
    • bugfixes:
      • NNI (of marked species) was also always performed for colored species
      • centered beautify-order is stable now
      • improved 'search optimal root'
  • adds:
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : distanalyse.cxx                                   //
4//   Purpose   :                                                   //
5//                                                                 //
6//   Institute of Microbiology (Technical University Munich)       //
7//   http://www.arb-home.de/                                       //
8//                                                                 //
9// =============================================================== //
10
11#include "di_matr.hxx"
12#include <AP_seq_dna.hxx>
13#include <AP_filter.hxx>
14#include <aw_awar.hxx>
15#include <aw_msg.hxx>
16#include <aw_root.hxx>
17#include <algorithm>
18
19using std::min;
20using std::max;
21
22void DI_MATRIX::analyse() {
23    if (is_AA) {
24        if (nentries> 100) {
25            aw_message("A lot of sequences!\n   ==> fast Kimura selected! (instead of PAM)");
26            aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_KIMURA);
27        }
28        else {
29            aw_message("Only limited number of sequences!\n"
30                       "   ==> slow PAM selected! (instead of Kimura)");
31            aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_PAM);
32        }
33    }
34    else {
35        long  mean_len = 0;
36        float min_gc   = 9999.9;
37        float max_gc   = 0.0;
38        long  min_len  = 9999999;
39        long  max_len  = 0;
40
41        // calculate meanvalue of sequencelength:
42        for (size_t row=0; row<nentries; row++) {
43            const char *sequ = entries[row]->sequence_parsimony->get_sequence();
44            size_t      flen = aliview->get_length();
45
46            long act_gci = 0;
47            long act_len = 0;
48
49            for (size_t pos=0; pos<flen; pos++) {
50                char ch = sequ[pos];
51                if (ch == AP_C || ch == AP_G) act_gci++;
52                if (ch == AP_A || ch == AP_C || ch == AP_G || ch == AP_T) act_len++;
53            }
54
55            mean_len += act_len;
56
57            float act_gc = ((float) act_gci) / act_len;
58
59            min_gc = min(min_gc, act_gc);
60            max_gc = max(max_gc, act_gc);
61
62            min_len = min(min_len, act_len);
63            max_len = max(max_len, act_len);
64        }
65
66        if (min_len * 1.3 < max_len) {
67            aw_message("Warning: The length of sequences differs significantly!\n"
68                       "        Be careful: Neighbour Joining is sensitive to\n"
69                       "        this kind of \"error\"");
70        }
71        mean_len /= nentries;
72
73        if (mean_len < 100) {
74            aw_message("Too short sequences!\n   ==> No correction selected!");
75            aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_NONE);
76        }
77        else if (mean_len < 300) {
78            aw_message("Meanlength shorter than 300\n   ==> Jukes Cantor selected!");
79            aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_JUKES_CANTOR);
80        }
81        else if ((mean_len < 1000) || ((max_gc / min_gc) < 1.2)) {
82            const char *reason;
83            if (mean_len < 1000) reason = "Sequences are too short for Olsen!";
84            else                reason = GBS_global_string("Maximal GC (%f) : Minimal GC (%f) < 1.2", max_gc, min_gc);
85
86            reason = GBS_global_string("%s  ==> Felsenstein selected!", reason);
87            aw_message(reason);
88            aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_FELSENSTEIN);
89        }
90        else {
91            aw_message("Olsen selected!");
92            aw_root->awar(AWAR_DIST_CORR_TRANS)->write_int(DI_TRANSFORMATION_OLSEN);
93        }
94    }
95}
Note: See TracBrowser for help on using the repository browser.