source: tags/svn.1.5.4/RNACMA/Analyser.cxx

Last change on this file was 7898, checked in by westram, 14 years ago
  • removed RNACMA debug code
File size: 5.5 KB
Line 
1/*
2 * Analyser.cpp
3 *
4 * Responsible for interacting with the Cma code and Arb.
5 *
6 *  Created on: Feb 15, 2010
7 *      Author: breno
8 */
9
10#include "Analyser.h"
11#include <time.h>
12#include <iostream>
13#include <iterator>
14#include "dbconn.h"
15
16/**
17 * Constructor
18 */
19Analyser::Analyser() {
20
21    //Definition of the alphabet
22    vector<string> alphabet(0);
23    alphabet.push_back("A");
24    alphabet.push_back("C");
25    alphabet.push_back("G");
26    alphabet.push_back("T");
27
28    loader = new AlignedSequenceLoader();
29    VecVecType *seqs = loader->getSequences();
30
31    cma = new Cma(alphabet, seqs->size());
32}
33
34/**
35 * Destructor.
36 */
37Analyser::~Analyser() {
38    delete loader;
39    delete cma;
40}
41
42/**
43 * Getter for the Cma object.
44 *
45 * @return the Cma object.
46 */
47Cma* Analyser::getCma() {
48    return cma;
49}
50
51/**
52 * Returns the AlignedSequenceLoader object.
53 *
54 * @return the AlignedSequenceLoader object.
55 */
56AlignedSequenceLoader* Analyser::getLoader() {
57    return loader;
58}
59
60/**
61 * Saves the clusters to the DB as SAI.
62 *
63 * @param clusters: the cluster indices.
64 * @param threshold: the clustering threshold used.
65 *
66 * @return an GB_ERROR if some DB transaction fails.
67 */
68GB_ERROR Analyser::saveSAI(vector<size_t> clusters, double threshold) {
69
70    GBDATA   *gb_main = runningDatabase();
71    GB_ERROR  error   = GB_push_transaction(gb_main);
72    if (error) {
73        cout << "ERROR 1" << endl;
74    }
75
76    char *al_name = GBT_get_default_alignment(gb_main);
77
78    vector<char> clusts = normalizeClusters(clusters);
79    // build result string
80    stringstream ss;
81    copy(clusts.begin(), clusts.end(), ostream_iterator<char> (ss, ""));
82    string result = ss.str();
83
84    //save
85    GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main, getSAIname());
86    GBDATA *gb_data = GBT_add_data(gb_sai, al_name, "data", GB_STRING);
87    error = GB_write_string(gb_data, result.c_str());
88    if (error) {
89        cout << "ERROR 2" << endl;
90        exit(1);
91    }
92
93    GBDATA *gb_options = GBT_add_data(gb_sai, al_name, "_TYPE", GB_STRING);
94    stringstream options;
95    options << "CMA_CLUSTERING: [threshold: " << threshold << "]";
96    error = GB_write_string(gb_options, options.str().c_str());
97
98    GB_commit_transaction(gb_main);
99
100    return error;
101}
102
103/**
104 * Gives clusters a reasonable name. The clustering algorithm may return
105 * clusters with indices 123 even though there are only 50 clusters.
106 * Here we normalise the cluster names, in this example we would have
107 * only clusters from 0..50 as result.
108 *
109 * @param clusters: the result of the clustering algorithm.
110 *
111 * @return the new cluster names (note that the cluster index is a char
112 *         because we have to be able to show it in the SAI, where we
113 *         are allowed to use only one character for each position).
114 */
115vector<char> Analyser::normalizeClusters(vector<size_t> clusters) {
116
117    vector<char> result;
118    map<unsigned int, char> rename_map;
119    rename_map[0] = '-';
120    char classes = '0';
121
122    for (vector<size_t>::iterator it = clusters.begin(); it != clusters.end(); ++it) {
123        if (rename_map.find(*it) == rename_map.end()) {
124            rename_map[*it] = classes++;
125        }
126        result.push_back(rename_map[*it]);
127    }
128    return result;
129
130}
131
132//--------------------------------
133
134int main(void) {
135    cout
136        << "arb_rnacma -- correlated mutation analysis" << endl
137        << "              computes clusters of correlated positions" << endl
138        << "(C) 2010 Lehrstuhl fuer Mikrobiologie, TU Muenchen" << endl
139        << "Written 2009/2010 by Breno Faria" << endl
140        << endl
141        << "arb_rnacma uses the eigen C++ library (http://eigen.tuxfamily.org/)" << endl
142        << "eigen is copyrighted by LGPL3" << endl
143        << endl;
144
145    Analyser *a   = new Analyser();
146    Cma      *cma = a->getCma();
147
148    cma->computeMutualInformationP(*(a->getLoader()->getSequences()));
149
150    list<MITuple> mituple = cma->compute_mituples(cma->getMIp());
151
152    cout << endl
153         << "The highest MI value was: " << mituple.begin()->MI
154         << " at position (" << mituple.begin()->pos1 << ", "
155         << mituple.begin()->pos2 << ")." << endl
156         << "(Note: pairs with MI-values down to the threshold will be joined in one cluster)" << endl;
157
158    while (true) {
159        cout << endl
160             << "Press Ctrl-d to quit or" << endl
161             << "choose a threshold value for the clustering process: ";
162
163
164        string input;
165        cin >> input;
166
167        if (input.empty()) {
168            cout << endl << "quit" << endl;
169            break;
170        }
171
172        double threshold = strtod(input.c_str(), NULL);
173        cout << "Building clusters with threshold = " << threshold << endl;
174
175        VectorXi cl = cma->computeClusters(mituple, size_t(cma->getMIp().cols()), threshold);
176        vector<size_t> *clusters = new vector<size_t> (0, 0);
177        AlignedSequenceLoader *l = a->getLoader();
178        size_t i = 0;
179        size_t j = 0;
180        for (vector<size_t>::iterator it = l->getPositionMap()->begin(); it
181                 != l->getPositionMap()->end(); ++it) {
182            while (i < *it) {
183                clusters->push_back(0);
184                i++;
185            }
186            clusters->push_back(cl[j]);
187            j++;
188            i++;
189        }
190
191        GB_ERROR e = a->saveSAI(*clusters, threshold);
192
193        if (e) {
194            cout << "Error" << endl;
195        }
196
197        cout << "Saved results to SAI '" << Analyser::getSAIname() << "'" << endl
198             << "(To analyse the results, open the editor and visualise the clusters using the SAI annotations)" << endl;
199    }
200
201    delete a;
202
203}
Note: See TracBrowser for help on using the repository browser.