source: branches/profile/RNACMA/Analyser.cxx

Last change on this file was 12271, checked in by westram, 10 years ago
  • no magic exit codes in RNACMA
  • print proper errors
File size: 5.6 KB
Line 
1/*
2 * Analyser.cpp
3 *
4 * Responsible for interacting with the Cma code and Arb.
5 *
6 *  Created on: Feb 15, 2010
7 *      Author: breno
8 *
9 *  Institute of Microbiology (Technical University Munich)
10 *  http://www.arb-home.de/
11 */
12
13#include "Analyser.h"
14#include <time.h>
15#include <iostream>
16#include <iterator>
17#include "dbconn.h"
18
19/**
20 * Constructor
21 */
22Analyser::Analyser() {
23
24    //Definition of the alphabet
25    vector<string> alphabet(0);
26    alphabet.push_back("A");
27    alphabet.push_back("C");
28    alphabet.push_back("G");
29    alphabet.push_back("T");
30
31    loader = new AlignedSequenceLoader;
32    VecVecType *seqs = loader->getSequences();
33
34    cma = new Cma(alphabet, seqs->size());
35}
36
37/**
38 * Destructor.
39 */
40Analyser::~Analyser() {
41    delete loader;
42    delete cma;
43}
44
45/**
46 * Getter for the Cma object.
47 *
48 * @return the Cma object.
49 */
50Cma* Analyser::getCma() {
51    return cma;
52}
53
54/**
55 * Returns the AlignedSequenceLoader object.
56 *
57 * @return the AlignedSequenceLoader object.
58 */
59AlignedSequenceLoader* Analyser::getLoader() {
60    return loader;
61}
62
63/**
64 * Saves the clusters to the DB as SAI.
65 *
66 * @param clusters: the cluster indices.
67 * @param threshold: the clustering threshold used.
68 *
69 * @return an GB_ERROR if some DB transaction fails.
70 */
71GB_ERROR Analyser::saveSAI(vector<size_t> clusters, double threshold) {
72
73    GBDATA   *gb_main = runningDatabase();
74    GB_ERROR  error   = GB_push_transaction(gb_main);
75    if (error) {
76        cout << "ERROR 1" << endl;
77    }
78
79    char *al_name = GBT_get_default_alignment(gb_main);
80
81    vector<char> clusts = normalizeClusters(clusters);
82    // build result string
83    stringstream ss;
84    copy(clusts.begin(), clusts.end(), ostream_iterator<char> (ss, ""));
85    string result = ss.str();
86
87    //save
88    GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main, getSAIname());
89    GBDATA *gb_data = GBT_add_data(gb_sai, al_name, "data", GB_STRING);
90    error = GB_write_string(gb_data, result.c_str());
91    if (error) {
92        cout << "RNACMA-Error: " << error << "\n";
93        exit(EXIT_FAILURE);
94    }
95
96    GBDATA *gb_options = GBT_add_data(gb_sai, al_name, "_TYPE", GB_STRING);
97    stringstream options;
98    options << "CMA_CLUSTERING: [threshold: " << threshold << "]";
99    error = GB_write_string(gb_options, options.str().c_str());
100
101    GB_commit_transaction(gb_main);
102
103    return error;
104}
105
106/**
107 * Gives clusters a reasonable name. The clustering algorithm may return
108 * clusters with indices 123 even though there are only 50 clusters.
109 * Here we normalise the cluster names, in this example we would have
110 * only clusters from 0..50 as result.
111 *
112 * @param clusters: the result of the clustering algorithm.
113 *
114 * @return the new cluster names (note that the cluster index is a char
115 *         because we have to be able to show it in the SAI, where we
116 *         are allowed to use only one character for each position).
117 */
118vector<char> Analyser::normalizeClusters(vector<size_t> clusters) {
119
120    vector<char> result;
121    map<unsigned int, char> rename_map;
122    rename_map[0] = '-';
123    char classes = '0';
124
125    for (vector<size_t>::iterator it = clusters.begin(); it != clusters.end(); ++it) {
126        if (rename_map.find(*it) == rename_map.end()) {
127            rename_map[*it] = classes++;
128        }
129        result.push_back(rename_map[*it]);
130    }
131    return result;
132
133}
134
135//--------------------------------
136
137int main(void) {
138    cout
139        << "arb_rnacma -- correlated mutation analysis" << endl
140        << "              computes clusters of correlated positions" << endl
141        << "(C) 2010 Lehrstuhl fuer Mikrobiologie, TU Muenchen" << endl
142        << "Written 2009/2010 by Breno Faria" << endl
143        << endl
144        << "arb_rnacma uses the eigen C++ library (http://eigen.tuxfamily.org/)" << endl
145        << "eigen is copyrighted by LGPL3" << endl
146        << endl;
147
148    Analyser *a   = new Analyser;
149    Cma      *cma = a->getCma();
150
151    cma->computeMutualInformationP(*(a->getLoader()->getSequences()));
152
153    list<MITuple> mituple = cma->compute_mituples(cma->getMIp());
154
155    cout << endl
156         << "The highest MI value was: " << mituple.begin()->MI
157         << " at position (" << mituple.begin()->pos1 << ", "
158         << mituple.begin()->pos2 << ")." << endl
159         << "(Note: pairs with MI-values down to the threshold will be joined in one cluster)" << endl;
160
161    while (true) {
162        cout << endl
163             << "Press Ctrl-d to quit or" << endl
164             << "choose a threshold value for the clustering process: ";
165
166
167        string input;
168        cin >> input;
169
170        if (input.empty()) {
171            cout << endl << "quit" << endl;
172            break;
173        }
174
175        double threshold = strtod(input.c_str(), NULL);
176        cout << "Building clusters with threshold = " << threshold << endl;
177
178        VectorXi cl = cma->computeClusters(mituple, size_t(cma->getMIp().cols()), threshold);
179        vector<size_t> *clusters = new vector<size_t> (0, 0);
180        AlignedSequenceLoader *l = a->getLoader();
181        size_t i = 0;
182        size_t j = 0;
183        for (vector<size_t>::iterator it = l->getPositionMap()->begin(); it
184                 != l->getPositionMap()->end(); ++it) {
185            while (i < *it) {
186                clusters->push_back(0);
187                i++;
188            }
189            clusters->push_back(cl[j]);
190            j++;
191            i++;
192        }
193
194        GB_ERROR e = a->saveSAI(*clusters, threshold);
195
196        if (e) {
197            cout << "Error" << endl;
198        }
199
200        cout << "Saved results to SAI '" << Analyser::getSAIname() << "'" << endl
201             << "(To analyse the results, open the editor and visualise the clusters using the SAI annotations)" << endl;
202    }
203
204    delete a;
205
206}
Note: See TracBrowser for help on using the repository browser.