source: trunk/GDE/SINA/builddir/src/unit_tests/cseq_test.cpp

Last change on this file was 19170, checked in by westram, 2 years ago
  • sina source
    • unpack + remove tarball
    • no longer ignore sina builddir.
File size: 11.6 KB
Line 
1/*
2Copyright (c) 2012-2013 Arne Boeckman
3Copyright (c) 2012-2013 Elmar Pruesse
4
5This file is part of SINA.
6
7SINA is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#include <vector>
22#include <algorithm>
23#include <fstream>
24
25#define BOOST_TEST_MODULE cseq_module
26#include <boost/test/unit_test.hpp>
27
28#include <boost/algorithm/string.hpp>
29
30#include "../cseq.h"
31#include "../aligned_base.h"
32using sina::cseq;
33using sina::cseq_base;
34using sina::aligned_base;
35
36#include <string>
37using std::string;
38
39#include <iostream>
40
41#define CASE BOOST_AUTO_TEST_CASE
42#define FIXTURE_CASE BOOST_FIXTURE_TEST_CASE
43#define EQUAL BOOST_CHECK_EQUAL
44#define EQUAL_COLLECTIONS BOOST_CHECK_EQUAL_COLLECTIONS
45#define THROW BOOST_CHECK_THROW
46
47BOOST_AUTO_TEST_SUITE(cseq_test);
48
49const string rna = "AGCURYKMSWBDHVN";
50const string rna_aligned = "--A-G---CUR-YKM-S---WBD-HVN---";
51const string rna_aligned_complemented = "--T-C---GAR-YKM-S---WBD-HVN---";
52const string rna_aligned_dots = "..A-G---CUR-YKM-S---WBD-HVN...";
53
54#define test_empty(cs)                         \
55    EQUAL((cs).size(), 0u);                    \
56    EQUAL((cs).getWidth(), 0u);                \
57    EQUAL((cs).getBases(), string());          \
58    EQUAL((cs).getAligned(true), string());    \
59    EQUAL((cs).end() - c.begin(), 0);          \
60    EQUAL((cs).rend() - c.rbegin(), 0);        \
61    EQUAL((cs).getName(), string());
62
63
64#define test_data(cs, name, _aligned)                               \
65    {                                                               \
66        string aligned = (_aligned);                                \
67        string unaligned = aligned;                                 \
68        boost::erase_all(unaligned, "-");                           \
69        EQUAL((cs).size(), unaligned.size());                       \
70        EQUAL((cs).getWidth(), aligned.size());                     \
71        EQUAL((cs).getBases(), unaligned);                          \
72        EQUAL((cs).getAligned(true), aligned);                      \
73        EQUAL((cs).end() - (cs).begin(), (long)unaligned.size());   \
74        EQUAL((cs).rend() - (cs).rbegin(), (long)unaligned.size()); \
75        EQUAL((cs).getName(), name);                                \
76    }
77
78
79CASE(test_constructor_empty) {
80    cseq c;
81
82    test_empty(c);
83}
84
85CASE(test_constructur_normal) {
86    const string name("thename");
87    cseq c(name.c_str(), rna.c_str());
88
89    test_data(c, name, rna);
90}
91
92CASE(test_constructor_copy) {
93    cseq c("", rna.c_str());
94    cseq d = c;
95
96    test_data(d, "", rna);
97}
98
99CASE(test_append,
100     *boost::unit_test::expected_failures(1)) {
101    cseq c;
102
103    c.append(rna);
104    test_data(c, "", rna);
105    c.append("");
106    test_data(c, "", rna);
107    c.append(rna);
108    test_data(c, "", rna+rna);
109    c.clearSequence();
110    test_empty(c);
111    c.append(rna_aligned);
112    test_data(c, "", rna_aligned);
113    c.append(rna);
114    test_data(c, "", rna_aligned+rna);
115    c.append(rna_aligned);
116    test_data(c, "", rna_aligned+rna+rna_aligned);
117
118
119    aligned_base b(0,'A');
120    std::cerr << "==================" << std::endl
121              << "Triggering 1 message: "
122              << "\"$ cseq::append(): wrong order! A(0<75)\""
123              << std::endl;
124    c.append(b);
125    std::cerr << "Triggering 1 error: "
126              << "c.getWidth() == 75 and aligned.size() == 76"
127              << std::endl;
128    test_data(c, "", rna_aligned+rna+rna_aligned + "A");
129    std::cerr << "==================" << std::endl << std::endl;
130
131}
132
133
134CASE(test_setWidth) {
135    cseq c;
136    string twentygaps="--------------------";
137
138    c.setWidth(20);
139    test_data(c, "", twentygaps);
140    c.setWidth(40);
141    test_data(c, "", twentygaps + twentygaps);
142    c.setWidth(20);
143    test_data(c, "", twentygaps);
144
145    c.setWidth(0);
146    test_data(c, "", "");
147
148    c.append(rna_aligned);
149    c.setWidth(rna_aligned.size() + 20);
150    test_data(c, "", rna_aligned + twentygaps);
151    c.setWidth(rna_aligned.size());
152    test_data(c, "", rna_aligned);
153    c.setWidth(27);
154    test_data(c, "", "--A-G---CUR-YKM-S---WBD-HVN");
155    c.setWidth(26);
156    test_data(c, "", "--A-G---CUR-YKM-S---WBDHVN");
157    c.setWidth(25);
158    test_data(c, "", "--A-G---CUR-YKM-S--WBDHVN");
159    c.setWidth(23);
160    test_data(c, "", "--A-G---CUR-YKM-SWBDHVN");
161    c.setWidth(22);
162    test_data(c, "", "--A-G---CUR-YKMSWBDHVN");
163    c.setWidth(21);
164    test_data(c, "", "--A-G---CURYKMSWBDHVN");
165    c.setWidth(20);
166    test_data(c, "", "--A-G--CURYKMSWBDHVN");
167    c.setWidth(19);
168    test_data(c, "", "--A-G-CURYKMSWBDHVN");
169    c.setWidth(18);
170    test_data(c, "", "--A-GCURYKMSWBDHVN");
171    c.setWidth(17);
172    test_data(c, "", "--AGCURYKMSWBDHVN");
173    c.setWidth(16);
174    test_data(c, "", "-AGCURYKMSWBDHVN");
175    c.setWidth(15);
176    test_data(c, "", "AGCURYKMSWBDHVN");
177}
178
179CASE(test_setWidth_throw) {
180    cseq c("", rna_aligned.c_str());
181    THROW(c.setWidth(14), std::runtime_error);
182}
183
184CASE(test_dna) {
185    string rna = boost::to_lower_copy(rna_aligned);
186    string dna = boost::replace_all_copy(rna, "u", "t");
187    string DNA = boost::replace_all_copy(rna_aligned, "U", "T");
188    string RNA = rna_aligned;
189    cseq c("", rna.c_str());
190    cseq d("", dna.c_str());
191    EQUAL(c.getAligned(true, true), dna);
192    EQUAL(c.getAligned(true, false), rna);
193    EQUAL(d.getAligned(true, true), dna);
194    EQUAL(d.getAligned(true, false), rna);
195    c.upperCaseAll();
196    d.upperCaseAll();
197    EQUAL(c.getAligned(true, true), DNA);
198    EQUAL(c.getAligned(true, false), RNA);
199    EQUAL(d.getAligned(true, true), DNA);
200    EQUAL(d.getAligned(true, false), RNA);
201}
202
203CASE(test_operator_access) {
204    cseq c("", rna_aligned.c_str());
205    for (unsigned int i = 0; i < c.size(); ++i) {
206        EQUAL(rna_aligned[i], c[i]);
207    }
208}
209
210CASE(test_reverse) {
211    string name("testtt");
212    cseq c(name.c_str(), rna_aligned.c_str());
213    string reversed = rna_aligned;
214    std::reverse(reversed.begin(), reversed.end());
215
216    c.reverse();
217    test_data(c, name, reversed);
218    c.reverse();
219    test_data(c, name, rna_aligned);
220}
221
222CASE(test_lowercase) {
223    string rna_aligned_lower = boost::to_lower_copy(rna_aligned);
224    string rna_lower = boost::to_lower_copy(rna);
225    cseq c("", rna_lower.c_str());
226    EQUAL(c.getAligned(true), rna_lower);
227}
228
229CASE(test_uppercase){
230    string rna_aligned_lower = boost::to_lower_copy(rna_aligned);
231    string rna_lower = boost::to_lower_copy(rna);
232    cseq c("", rna_lower.c_str());
233    c.upperCaseAll();
234    EQUAL(c.getAligned(true), rna);
235    c.append(rna_aligned_lower);
236    c.upperCaseAll();
237    EQUAL(c.getAligned(true), rna + rna_aligned);
238}
239
240CASE(test_complement)
241{
242    //reference for complements:
243    //http://www.animalgenome.org/edu/gene/genetic-code.html
244    cseq c("", rna.c_str());
245    c.complement();
246    EQUAL(c.size(),rna.length());
247    EQUAL(c.getBases(),"UCGAYRMKSWVHDBN");
248}
249
250class compression_data {
251public:
252    const cseq c_alig; 
253    const cseq c_unalig;
254    static const unsigned char c_alig_compr[];
255    static const unsigned char c_unalig_compr[];
256
257    compression_data() 
258        : c_alig("", rna_aligned.c_str()),
259          c_unalig("", rna.c_str())
260    {
261    }
262};
263
264const unsigned char
265compression_data::c_alig_compr[] = {
266    0x23, 0x00, 0x80, 0x00, 0x78, 0xda, 0x73, 0x74, 
267    0x77, 0x0e, 0x0d, 0x8a, 0xf4, 0xf6, 0x0d, 0x0e,
268    0x77, 0x72, 0xf1, 0x08, 0xf3, 0xd3, 0x63, 0x62, 
269    0x62, 0x61, 0x64, 0x64, 0x02, 0x22, 0x08, 0xc5,
270    0xc2, 0x40, 0x63, 0x00, 0x00, 0x40, 0xd5, 0x04,
271    0xcc
272        /* (used this to generate verification data)
273        std::ofstream of("/tmp/thefile1");
274        of << std::string((char*)&data.front(), data.size());
275        */
276};
277
278const unsigned char
279compression_data::c_unalig_compr[] = {
280    0x23, 0x00, 0x80, 0x00, 0x78, 0xda, 0x73, 0x74, 
281    0x77, 0x0e, 0x0d, 0x8a, 0xf4, 0xf6, 0x0d, 0x0e,
282    0x77, 0x72, 0xf1, 0x08, 0xf3, 0xd3, 0x63, 0x60, 
283    0x44, 0x05, 0x0c, 0x34, 0x06, 0x00, 0x3a, 0xad,
284    0x04, 0xbd
285};
286
287FIXTURE_CASE(test_compress_unaligned, compression_data) {
288    std::vector<unsigned char> data;
289    cseq c = c_unalig;
290    c.compressAligned(data);
291    EQUAL_COLLECTIONS(data.begin(), data.end(), c_unalig_compr, 
292                      c_unalig_compr + sizeof(c_unalig_compr));
293}
294
295FIXTURE_CASE(test_compress_aligned, compression_data) {
296    std::vector<unsigned char> data;
297    cseq c = c_alig;
298    c.compressAligned(data);
299    EQUAL_COLLECTIONS(data.begin(), data.end(), c_alig_compr, 
300                      c_alig_compr + sizeof(c_alig_compr));
301}
302
303FIXTURE_CASE(test_decompress_unaligned, compression_data) {
304    cseq c;
305    c.assignFromCompressed(c_unalig_compr, sizeof(c_unalig_compr));
306    test_data(c, "", rna);
307}
308
309FIXTURE_CASE(test_decompress_aligned, compression_data) {
310    cseq c;
311    c.assignFromCompressed(c_alig_compr, sizeof(c_alig_compr));
312    test_data(c, "", rna_aligned);
313}
314
315
316CASE(test_dots) {
317    cseq c("", rna_aligned.c_str());
318    EQUAL(c.getAligned(false),rna_aligned_dots);
319}
320
321
322CASE(test_write_alignment) {
323    std::stringstream out;
324    std::vector<cseq> vs{
325        {"1", rna_aligned.c_str()},
326        {"2", rna_aligned.c_str()}
327    };
328    std::vector<const cseq_base*> vsp;
329    for (auto& i : vs) {
330        vsp.push_back(&i);
331    }
332    cseq::write_alignment(out, vsp, 0, rna_aligned.size()-1, false);
333    EQUAL(out.str(),
334          "Dumping pos 0 through 29:\n"
335          "AGCURYKMSWBDHVN-  0-1 <---(## NEW ##)  <---(%% ORIG %%) \n\n"
336        );
337
338    out.str(std::string());
339    cseq::write_alignment(out, vsp, 0, rna_aligned.size()-1, true);
340    EQUAL(out.str(),
341          "Dumping pos 0 through 29:\n"
342          "\033[34mA"
343          "\033[35mG"
344          "\033[32mC"
345          "\033[33mU"
346          "\033[0mR"
347          "YKMSWBDHVN-  0-1 <---(## NEW ##)  <---(%% ORIG %%) \n\n"
348        );
349
350    out.str(std::string());
351    cseq::write_alignment(out, vsp, 0, rna_aligned.size(), false);
352    EQUAL(out.str(), "cseq::write_alignment(): range out of bounds!\n");
353
354    out.str(std::string());
355    vs = std::vector<cseq>{{"1", "ACGU"}};
356    vsp.clear();
357    vsp.push_back(&vs[0]);
358    cseq::write_alignment(out, vsp, 0, 3, true);
359    EQUAL(out.str(),
360          "Dumping pos 0 through 3:\n"
361          "\033[34mA"
362          "\033[32mC"
363          "\033[35mG"
364          "\033[33mU"
365          "\033[0m"
366          "  0 <---(## NEW ##) \n\n");
367}
368
369CASE(test_write_alignment_empty) {
370    std::stringstream out;
371    std::vector<const cseq_base*> vsp;
372    cseq::write_alignment(out, vsp, 0, 0, false);
373    EQUAL(out.str(), "cseq::write_alignment(): no sequences?\n");
374}
375
376CASE(test_ostream_operator) {
377    const char* name = "test_name";
378    cseq c(name);
379    std::stringstream out;
380    out << c << "";
381    EQUAL(out.str(), name);
382}
383
384CASE(test_attr) {
385    const char* name = "test_name";
386    cseq c(name);
387    c.set_attr("test", 123);
388    EQUAL(c.get_attr<int>("test"), 123);
389    EQUAL(c.get_attr<std::string>("test"), "123");
390    c.set_attr("test", "hello");
391    EQUAL(c.get_attr<std::string>("test"), "hello");
392    c.set_attr("test", 2.3f);
393    EQUAL(c.get_attr<float>("test"), 2.3f);
394}
395
396BOOST_AUTO_TEST_SUITE_END(); // cseq_test
397
398/*
399  Local Variables:
400  mode:c++
401  c-file-style:"stroustrup"
402  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
403  indent-tabs-mode:nil
404  fill-column:99
405  End:
406*/
407// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:encoding=utf-8:textwidth=99 :
408
Note: See TracBrowser for help on using the repository browser.