source: trunk/GDE/SINA/builddir/src/unit_tests/kmer_search_test.cpp

Last change on this file was 19170, checked in by westram, 2 years ago
  • sina source
    • unpack + remove tarball
    • no longer ignore sina builddir.
File size: 5.3 KB
Line 
1/*
2  Copyright (c) 2006-2018 Elmar Pruesse <elmar.pruesse@ucdenver.edu>
3
4  This file is part of SINA.
5  SINA is free software: you can redistribute it and/or modify it under
6  the terms of the GNU General Public License as published by the Free
7  Software Foundation, either version 3 of the License, or (at your
8  option) any later version.
9
10  SINA is distributed in the hope that it will be useful, but WITHOUT ANY
11  WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  for more details.
14
15  You should have received a copy of the GNU General Public License
16  along with SINA.  If not, see <http://www.gnu.org/licenses/>.
17
18  Additional permission under GNU GPL version 3 section 7
19
20  If you modify SINA, or any covered work, by linking or combining it
21  with components of ARB (or a modified version of that software),
22  containing parts covered by the terms of the
23  ARB-public-library-license, the licensors of SINA grant you additional
24  permission to convey the resulting work. Corresponding Source for a
25  non-source form of such a combination shall include the source code
26  for the parts of ARB used as well as that of the covered work.
27*/
28
29#include "../kmer_search.h"
30#include "../query_pt.h"
31
32#define BOOST_TEST_MODULE kmer_search
33
34#include <boost/test/unit_test.hpp>
35#include <boost/test/data/test_case.hpp>
36
37#include <boost/filesystem.hpp>
38namespace fs = boost::filesystem;
39
40#include <random>
41#include <set>
42#include <algorithm>
43
44#include "../query_arb.h"
45
46
47using namespace sina;
48
49/** shuffles only the first n items **/
50template<class ITER>
51ITER shuffle_n(ITER begin, ITER end, size_t n) {
52    size_t items = std::distance(begin, end);
53    while (n--) {
54        ITER it = begin;
55        int ran = std::rand() % items;
56        std::advance(it, ran);
57        std::swap(*begin, *it);
58        --items, ++begin;
59    }
60    return begin;
61}
62
63struct Fixture {
64    query_arb *arbdb;
65    std::vector<std::string> ids;
66    const unsigned int N{1000};
67    const unsigned int M{50};
68
69    Fixture() {
70        std::srand(1234);
71        int argc = boost::unit_test::framework::master_test_suite().argc;
72        char** argv = boost::unit_test::framework::master_test_suite().argv;
73        BOOST_REQUIRE(argc>1);
74        fs::path database = argv[1];
75        BOOST_TEST_MESSAGE("Loading test database " <<  database << " for testing");
76        arbdb = query_arb::getARBDB(database);
77        ids = arbdb->getSequenceNames();
78        BOOST_REQUIRE(ids.size() > N);
79        shuffle_n(ids.begin(), ids.end(), N);
80    }
81
82    ~Fixture() {
83        BOOST_TEST_MESSAGE("Destroying fixture");
84    }
85};
86
87BOOST_AUTO_TEST_SUITE(search_tests);
88
89BOOST_FIXTURE_TEST_CASE(kmer_simple1, Fixture) {
90    fs::path dbname = arbdb->getFileName();
91    fs::path idxname = fs::path(dbname).replace_extension("sidx");
92    if (fs::exists(idxname)) {
93        fs::remove(idxname);
94    }
95    // test runs twice, once with index cache absent (removed above if present),
96    // and once with index cache generated during first run.
97    for (int run = 0; run < 2; ++run) {
98        kmer_search::release_kmer_search(dbname);
99        kmer_search *search_index = kmer_search::get_kmer_search(dbname);
100        search::result_vector family;
101        for (unsigned int i = 0; i < N; i++) {
102            if (i % (N/50) == 0) {
103                std::cerr << ".";
104            }
105            cseq query = arbdb->getCseq(ids[i]);
106            search_index->find(query, family, M);
107            BOOST_TEST(family.size() == M);
108            float max_score = family[0].score;
109            auto self = std::find_if(family.begin(), family.end(),
110                                     [&](const search::result_item &i) {
111                                         return i.sequence->getName() == query.getName();}
112                );
113            BOOST_REQUIRE((self != family.end()));
114            if (self != family.end()) {
115                BOOST_TEST(self->score == max_score);
116            }
117        }
118        std::cerr << std::endl;
119        delete search_index;
120    }
121}
122
123
124BOOST_FIXTURE_TEST_CASE(pt_simple, Fixture, *boost::unit_test::tolerance(0.0001)) {
125    search *search_index = query_pt::get_pt_search(arbdb->getFileName());
126    search::result_vector family;
127    for (unsigned int i = 0; i < N; i++) {
128        if (i % (N/50) == 0) {
129            std::cerr << ".";
130        }
131        cseq query = arbdb->getCseq(ids[i]);
132        search_index->find(query, family, M);
133        BOOST_TEST(family.size() == M);
134        float max_score = family[0].score;
135        auto self = std::find_if(family.begin(), family.end(),
136                                 [&](const search::result_item &i) {
137                                return i.sequence->getName() == query.getName();}
138            );
139        BOOST_TEST((self != family.end()));
140        // PT server counts duplicate kmers twice, allow for some discrepancy
141        BOOST_TEST(self->score > max_score - 5); // FIXME: now relative?
142    }
143    std::cerr << std::endl;
144    delete search_index;
145}
146
147
148BOOST_AUTO_TEST_SUITE_END(); // kmer_search_test
149
150/*
151  Local Variables:
152  mode:c++
153  c-file-style:"stroustrup"
154  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
155  indent-tabs-mode:nil
156  fill-column:99
157  End:
158*/
159// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:encoding=utf-8:textwidth=99 :
Note: See TracBrowser for help on using the repository browser.