source: tags/ms_ra2q2/RNACMA/Analyser.cxx

Last change on this file was 17362, checked in by westram, 6 years ago
  • exception in RNACMA
    • handle
    • fix message
File size: 5.9 KB
Line 
1/*
2 * Analyser.cpp
3 *
4 * Responsible for interacting with the Cma code and Arb.
5 *
6 *  Created on: Feb 15, 2010
7 *      Author: breno
8 *
9 *  Institute of Microbiology (Technical University Munich)
10 *  http://www.arb-home.de/
11 */
12
13#include "Analyser.h"
14#include <time.h>
15#include <iostream>
16#include <iterator>
17#include "dbconn.h"
18
19/**
20 * Constructor
21 */
22Analyser::Analyser() {
23
24    //Definition of the alphabet
25    vector<string> alphabet(0);
26    alphabet.push_back("A");
27    alphabet.push_back("C");
28    alphabet.push_back("G");
29    alphabet.push_back("T");
30
31    loader = new AlignedSequenceLoader;
32    VecVecType *seqs = loader->getSequences();
33
34    cma = new Cma(alphabet, seqs->size());
35}
36
37/**
38 * Destructor.
39 */
40Analyser::~Analyser() {
41    delete loader;
42    delete cma;
43}
44
45/**
46 * Getter for the Cma object.
47 *
48 * @return the Cma object.
49 */
50Cma* Analyser::getCma() {
51    return cma;
52}
53
54/**
55 * Returns the AlignedSequenceLoader object.
56 *
57 * @return the AlignedSequenceLoader object.
58 */
59AlignedSequenceLoader* Analyser::getLoader() {
60    return loader;
61}
62
63/**
64 * Saves the clusters to the DB as SAI.
65 *
66 * @param clusters: the cluster indices.
67 * @param threshold: the clustering threshold used.
68 *
69 * @return an GB_ERROR if some DB transaction fails.
70 */
71GB_ERROR Analyser::saveSAI(vector<size_t> clusters, double threshold) {
72
73    GBDATA   *gb_main = runningDatabase();
74    GB_ERROR  error   = GB_push_transaction(gb_main);
75    if (error) {
76        cout << "ERROR 1" << endl;
77    }
78
79    char *al_name = GBT_get_default_alignment(gb_main);
80
81    vector<char> clusts = normalizeClusters(clusters);
82    // build result string
83    stringstream ss;
84    copy(clusts.begin(), clusts.end(), ostream_iterator<char> (ss, ""));
85    string result = ss.str();
86
87    //save
88    GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main, getSAIname());
89    GBDATA *gb_data = GBT_add_data(gb_sai, al_name, "data", GB_STRING);
90    error = GB_write_string(gb_data, result.c_str());
91    if (error) {
92        cout << "RNACMA-Error: " << error << "\n";
93        exit(EXIT_FAILURE);
94    }
95
96    GBDATA *gb_options = GBT_add_data(gb_sai, al_name, "_TYPE", GB_STRING);
97    stringstream options;
98    options << "CMA_CLUSTERING: [threshold: " << threshold << "]";
99    error = GB_write_string(gb_options, options.str().c_str());
100
101    GB_commit_transaction(gb_main);
102
103    return error;
104}
105
106/**
107 * Gives clusters a reasonable name. The clustering algorithm may return
108 * clusters with indices 123 even though there are only 50 clusters.
109 * Here we normalise the cluster names, in this example we would have
110 * only clusters from 0..50 as result.
111 *
112 * @param clusters: the result of the clustering algorithm.
113 *
114 * @return the new cluster names (note that the cluster index is a char
115 *         because we have to be able to show it in the SAI, where we
116 *         are allowed to use only one character for each position).
117 */
118vector<char> Analyser::normalizeClusters(vector<size_t> clusters) {
119
120    vector<char> result;
121    map<unsigned int, char> rename_map;
122    rename_map[0] = '-';
123    char classes = '0';
124
125    for (vector<size_t>::iterator it = clusters.begin(); it != clusters.end(); ++it) {
126        if (rename_map.find(*it) == rename_map.end()) {
127            rename_map[*it] = classes++;
128        }
129        result.push_back(rename_map[*it]);
130    }
131    return result;
132
133}
134
135//--------------------------------
136
137int main(void) {
138    cout
139        << "arb_rnacma -- correlated mutation analysis" << endl
140        << "              computes clusters of correlated positions" << endl
141        << "(C) 2010 Lehrstuhl fuer Mikrobiologie, TU Muenchen" << endl
142        << "Written 2009/2010 by Breno Faria" << endl
143        << endl
144        << "arb_rnacma uses the eigen C++ library (http://eigen.tuxfamily.org/)" << endl
145        << "eigen is copyrighted by LGPL3" << endl
146        << endl;
147
148    try {
149        Analyser *a   = new Analyser;
150        Cma      *cma = a->getCma();
151
152        cma->computeMutualInformationP(*(a->getLoader()->getSequences()));
153
154        list<MITuple> mituple = cma->compute_mituples(cma->getMIp());
155
156        cout << endl
157             << "The highest MI value was: " << mituple.begin()->MI
158             << " at position (" << mituple.begin()->pos1 << ", "
159             << mituple.begin()->pos2 << ")." << endl
160             << "(Note: pairs with MI-values down to the threshold will be joined in one cluster)" << endl;
161
162        while (true) {
163            cout << endl
164                 << "Press Ctrl-d to quit or" << endl
165                 << "choose a threshold value for the clustering process: ";
166
167
168            string input;
169            cin >> input;
170
171            if (input.empty()) {
172                cout << endl << "quit" << endl;
173                break;
174            }
175
176            double threshold = strtod(input.c_str(), NULp);
177            cout << "Building clusters with threshold = " << threshold << endl;
178
179            VectorXi cl = cma->computeClusters(mituple, size_t(cma->getMIp().cols()), threshold);
180            vector<size_t> *clusters = new vector<size_t> (0, 0);
181            AlignedSequenceLoader *l = a->getLoader();
182            size_t i = 0;
183            size_t j = 0;
184            for (vector<size_t>::iterator it = l->getPositionMap()->begin(); it
185                     != l->getPositionMap()->end(); ++it) {
186                while (i < *it) {
187                    clusters->push_back(0);
188                    i++;
189                }
190                clusters->push_back(cl[j]);
191                j++;
192                i++;
193            }
194
195            GB_ERROR e = a->saveSAI(*clusters, threshold);
196
197            if (e) {
198                cout << "Error" << endl;
199            }
200
201            cout << "Saved results to SAI '" << Analyser::getSAIname() << "'" << endl
202                 << "(To analyse the results, open the editor and visualise the clusters using the SAI annotations)" << endl;
203        }
204
205        delete a;
206    }
207    catch (const char *err) {
208        cout << endl << "Exception in arb_rnacma: " << err << ". terminating." << endl;
209        return EXIT_FAILURE;
210    }
211
212    return EXIT_SUCCESS;
213}
Note: See TracBrowser for help on using the repository browser.