source: trunk/GDE/SINA/builddir/src/log.cpp

Last change on this file was 19170, checked in by westram, 2 years ago
  • sina source
    • unpack + remove tarball
    • no longer ignore sina builddir.
File size: 13.2 KB
Line 
1/*
2Copyright (c) 2006-2018 Elmar Pruesse <elmar.pruesse@ucdenver.edu>
3
4This file is part of SINA.
5SINA is free software: you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation, either version 3 of the License, or (at your
8option) any later version.
9
10SINA is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13for more details.
14
15You should have received a copy of the GNU General Public License
16along with SINA.  If not, see <http://www.gnu.org/licenses/>.
17
18Additional permission under GNU GPL version 3 section 7
19
20If you modify SINA, or any covered work, by linking or combining it
21with components of ARB (or a modified version of that software),
22containing parts covered by the terms of the
23ARB-public-library-license, the licensors of SINA grant you additional
24permission to convey the resulting work. Corresponding Source for a
25non-source form of such a combination shall include the source code
26for the parts of ARB used as well as that of the covered work.
27*/
28
29#include "log.h"
30 
31#include <list>
32using std::list;
33
34#include <string>
35using std::string;
36
37#include <sstream>
38using std::stringstream;
39
40#include <iostream>
41using std::endl;
42
43#include <map>
44using std::map;
45
46#include <fstream>
47#include <memory>
48
49#include <boost/program_options.hpp>
50#include <boost/program_options/options_description.hpp>
51#include <boost/filesystem.hpp>
52
53#include "spdlog/spdlog.h"
54#include "spdlog/sinks/stdout_color_sinks.h"
55#include "spdlog/sinks/basic_file_sink.h"
56
57#include "cseq.h"
58#include "cseq_comparator.h"
59#include "query_arb.h"
60#include "query_arb.h"
61#include "progress.h"
62#include "search.h"
63
64using namespace sina;
65namespace po = boost::program_options;
66namespace fs = boost::filesystem;
67using spdlog::level::level_enum;
68
69static auto logger = Log::create_logger("log");
70
71// having a counter in boost::program_options is difficult:
72
73/* Type to be handled as counter */
74template<typename T>
75struct counting_type {
76    T val;
77    explicit counting_type(const T& t) : val(t) {}
78    static T initial() { return 0; };
79    static T increment(const T& t) { return t+1; }
80};
81
82/* Validator handling options of counting_type<T> */
83template<typename T>
84void validate(boost::any& v,
85              const std::vector<std::string>&  /*xs*/,
86              counting_type<T>* /*unused*/, long /*unused*/) {
87    if (v.empty()) {
88        v = counting_type<T>::increment(counting_type<T>::initial());
89    } else {
90        v = counting_type<T>::increment(boost::any_cast<T>(v));
91    }
92}
93
94/* Specialization of typed_value for counting_type
95 *
96 * This is necessary so the value store can contain T while handling
97 * is done as counting_type<T>
98 */
99template<typename T>
100class counting_value : public po::typed_value<counting_type<T>, char> {
101public:
102    /* The store is of type T, but typed_value doesn't know that. */
103    using super = po::typed_value<counting_type<T>, char>;
104    explicit counting_value(T* store_to)
105        : super(reinterpret_cast<counting_type<T>*>(store_to))
106    {
107        super::zero_tokens();
108        //default_value(counting_type<T>::initial(), "");
109    }
110
111    counting_value* default_value(const T& v, const std::string& x) {
112        super::default_value(counting_type<T>(v),x);
113    }
114
115    /* Same here, need turn any(int) to any(counting<int>) before notify */
116    void notify(const boost::any& value_store) const override {
117        const auto* value = boost::any_cast<T>(&value_store);
118        if (value) {
119            boost::any vs(*reinterpret_cast<const counting_type<T>*>(value));
120            super::notify(vs);
121        } else {
122            super::notify(value_store);
123        }
124    }
125};
126
127
128template<typename T>
129po::typed_value<counting_type<T> >* counter(T* t) {
130  return new counting_value<T>(t);
131}
132
133template<typename T>
134po::typed_value<counting_type<T> >* counter() {
135  return new counting_value<T>(0);
136}
137
138
139
140struct Log::options {
141    int quiet_count{0};
142    int verbose_count{0};
143    level_enum verbosity{spdlog::level::warn};
144    bool show_diff{false};
145    bool show_dist{false};
146    bool colors{false};
147    fs::path origdb;
148    fs::path logfile;
149};
150
151std::unique_ptr<Log::options> Log::opts;
152
153// Since this is modified from other compilation units before
154// this compilation unit is initialized, we must use a
155// pointer, lest the vector get initialized twice (i.e.
156// overwritten during program load). The pointer should
157// be initialized to 0 automagically.
158std::vector<spdlog::sink_ptr> *sinks;
159
160void
161Log::get_options_description(po::options_description& main,
162                             po::options_description& adv) {
163    opts = std::unique_ptr<options>(new options);
164
165    main.add_options()
166        ("verbose,v", counter<int>(&opts->verbose_count), "increase verbosity")
167        ("quiet,q", counter<int>(&opts->quiet_count), "decrease verbosity")
168        ("log-file", po::value<fs::path>(&opts->logfile), "file to write log to")
169        ;
170
171    po::options_description od("Logging");
172    od.add_options()
173        ("show-diff", po::bool_switch(&opts->show_diff), "show difference to original alignment")
174        ("show-dist", po::bool_switch(&opts->show_dist), "show distance to original alignment")
175        ("orig-db", po::value<fs::path>(&opts->origdb), "ARB DB containing original alignment")
176        ("colors", po::bool_switch(&opts->colors), "distinguish printed bases using colors")
177        ;
178
179    adv.add(od);
180}
181
182void
183Log::validate_vm(po::variables_map& vm,
184                 po::options_description& /*desc*/) {
185    // calculate effective log level
186    auto verbosity = static_cast<int>(opts->verbosity);
187    verbosity += opts->quiet_count;
188    verbosity -= opts->verbose_count;
189    opts->verbosity = static_cast<level_enum>(verbosity);
190    opts->verbosity = std::min(
191        std::max(opts->verbosity, spdlog::level::trace),
192        spdlog::level::off);
193
194    // create logging sinks
195    auto console_sink = (*sinks)[0];
196    console_sink->set_level(opts->verbosity);
197    console_sink->set_pattern("%T [%n] %^%v%$");
198
199    if (vm.count("log-file") != 0u) {
200        auto file_sink = std::make_shared<spdlog::sinks::basic_file_sink_mt>(
201            opts->logfile.native(), true);
202        file_sink->set_level(std::min(spdlog::level::info, opts->verbosity));
203        sinks->push_back(file_sink);
204    }
205
206    // update sinks of pre-existing loggers
207    spdlog::apply_all([&](std::shared_ptr<spdlog::logger> l) {
208            l->sinks() = *sinks;
209            l->set_level(spdlog::level::trace);
210        });
211
212    logger->info("Loglevel set to {}",
213                 to_string_view(opts->verbosity));
214
215    // database for computing distance to test case
216    if (vm["orig-db"].empty()) {
217        if (!vm["db"].empty()) {
218            opts->origdb = vm["db"].as<fs::path>();
219        }
220    }
221}
222
223void
224Log::add_sink(spdlog::sink_ptr sink) {
225    sinks->push_back(sink);
226    spdlog::apply_all([&](std::shared_ptr<spdlog::logger> l) {
227                          l->sinks() = *sinks;
228                      });
229}
230
231void
232Log::remove_sink(spdlog::sink_ptr sink) {
233    sinks->erase(std::remove(sinks->begin(), sinks->end(), sink), sinks->end());
234    spdlog::apply_all([&](std::shared_ptr<spdlog::logger> l) {
235                          l->sinks() = *sinks;
236                      });
237}
238
239
240std::shared_ptr<spdlog::logger>
241Log::create_logger(std::string name) {
242    auto logger = spdlog::get(name);
243    if (logger) {
244        return logger;
245    }
246    if (sinks == nullptr) {
247        sinks = new std::vector<spdlog::sink_ptr>();
248        sinks->push_back(std::make_shared<terminal_stderr_sink_mt>());
249    }
250    logger = std::make_shared<spdlog::logger>(name, sinks->begin(), sinks->end());
251    spdlog::register_logger(logger);
252    return logger;
253}
254
255
256/// pipeline stuff ///
257
258struct Log::printer::priv_data {
259    ~priv_data();
260
261    int sequence_num{0};
262
263    // stats
264    double total_sps{0};
265    double total_cpm{0};
266    double total_idty{0};
267    double total_bps{0};
268
269    std::ofstream out;
270
271    query_arb *arb{nullptr};
272
273    std::vector<int> helix_pairs;
274
275    void show_dist(cseq& orig, cseq& aligned, search::result_vector& ref);
276};
277
278
279void Log::printer::priv_data::show_dist(cseq& orig, cseq& aligned, search::result_vector& ref) {
280    if (arb != nullptr) {  // we have a comparison db
281        string name = orig.getName();
282        orig = arb->getCseq(name);
283        logger->info("len-orig: {}", orig.size());
284        logger->info("len-alig: {}", aligned.size());
285    }
286    if (orig.getWidth() != aligned.getWidth()) {
287        logger->error("Cannot show dist - {} and {} have lengths {} and {}",
288                     orig.getName(), aligned.getName(), orig.getWidth(), aligned.getWidth());
289        return;
290    }
291
292    cseq_comparator cmp_exact(CMP_IUPAC_EXACT, CMP_DIST_NONE,
293                              CMP_COVER_QUERY, false);
294    float sps = cmp_exact(orig, aligned);
295
296    logger->info("orig_idty: {:.6f}", sps);
297    total_sps += sps;
298
299    if (ref.empty()) {
300        logger->info("reference / search result empty?");
301        return;
302    }
303
304    cseq_comparator cmp_optimistic(CMP_IUPAC_OPTIMISTIC, CMP_DIST_NONE,
305                                   CMP_COVER_QUERY, false);
306
307    auto scored = ref; // copy
308    for (auto& item : scored) {
309        item.score = cmp_optimistic(orig, *item.sequence);
310    }
311
312    std::sort(scored.begin(), scored.end());
313    auto &closest = *scored.rbegin();
314
315    float orig_idty = closest.score;
316    total_idty += orig_idty;
317    logger->info("orig_closest_idty: {:.6f}", orig_idty);
318
319    float aligned_idty = cmp_optimistic(aligned, *closest.sequence);
320    logger->info("closest_idty: {:.6f}", aligned_idty);
321    float cpm = orig_idty - aligned_idty;
322    logger->info("cpm: {:.6f}", cpm);
323
324    total_cpm += cpm;
325}
326
327
328Log::printer::printer()
329    : data(new priv_data())
330{
331    if (!opts->origdb.empty()) {
332        data->arb = query_arb::getARBDB(opts->origdb);
333    }
334    if (data->arb != nullptr) {
335        data->helix_pairs = data->arb->getPairs();
336    } 
337
338    /*
339    data->out.open(opts->logfile.c_str(), std::ios_base::app);
340
341    if (!data->out) {
342        stringstream tmp;
343        tmp << "Unable to open file \"" << opts->logfile << "\" for writing.";
344        throw std::runtime_error(tmp.str());
345    }
346    */
347}
348
349Log::printer::printer(const printer& o) = default;
350Log::printer& Log::printer::operator=(const printer& o) = default;
351Log::printer::~printer() = default;
352
353Log::printer::priv_data::~priv_data() {
354    if (Log::opts->show_dist) {
355        logger->warn("avg_sps: {:.6f}", total_sps / sequence_num);
356        logger->warn("avg_cpm: {:.6f}", total_cpm / sequence_num);
357        logger->warn("avg_idty: {:.6f}", total_idty / sequence_num);
358        logger->warn("avg_bps: {:.6f}", total_bps / sequence_num);
359    }
360}
361
362/// actual "filter" ///
363
364tray
365Log::printer::operator()(tray t) {
366    if (t.input_sequence == nullptr) {
367        throw std::runtime_error("Received broken tray in " __FILE__);
368    }
369
370    logger->info("sequence_number: {}", t.seqno);
371    logger->info("sequence_identifier: {}", t.input_sequence->getName());
372
373    if (t.aligned_sequence == nullptr) {
374        logger->info("{}: {}", query_arb::fn_align_log, t.log.str());
375        logger->info("{}: {}", query_arb::fn_fullname,
376                     t.input_sequence->get_attr<string>(query_arb::fn_fullname));
377        logger->info("alignment failed!");
378        return t;
379    }
380    ++data->sequence_num;
381    cseq& aligned = *t.aligned_sequence;
382
383    float bps = aligned.calcPairScore(data->helix_pairs);
384    data->total_bps += bps;
385    aligned.set_attr(query_arb::fn_bpscore, (int)(100 * bps));
386    aligned.set_attr(query_arb::fn_align_log, t.log.str());
387    aligned.set_attr(query_arb::fn_nuc, (int)aligned.size());
388
389    if (aligned.size() != 0u) {
390        aligned.set_attr(query_arb::fn_astart, (int)aligned.begin()->getPosition());
391        aligned.set_attr(query_arb::fn_astop, (int)((--aligned.end())->getPosition()));
392    } else {  // shouldn't happen, but let's be careful
393        aligned.set_attr(query_arb::fn_astart, 0);
394        aligned.set_attr(query_arb::fn_astop, 0);
395    }
396
397    for (auto& ap: aligned.get_attrs()) {
398        string val = boost::apply_visitor(lexical_cast_visitor<string>(),
399                                          ap.second);
400        logger->info("{}: {}", ap.first, val);
401    }
402
403    search::result_vector ref;
404    if (t.search_result != nullptr) {
405        ref = *t.search_result;
406    } else if (t.alignment_reference != nullptr) {
407        ref = *t.alignment_reference;
408    }
409
410    if (opts->show_dist) {
411        data->show_dist(*t.input_sequence, aligned, ref);
412    }
413
414    if (opts->show_diff) {
415        std::vector<const cseq_base*> refptrs;
416        for (auto& i : ref) {
417            refptrs.push_back(i.sequence);
418        }
419        refptrs.push_back(t.input_sequence);
420        refptrs.push_back(t.aligned_sequence);
421
422        stringstream tmp;
423        for (auto part : t.input_sequence->find_differing_parts(aligned)) {
424            cseq::write_alignment(tmp, refptrs, part.first, part.second, opts->colors);
425        }
426        tmp << endl << endl;
427        logger->info(tmp.str());
428    }
429
430    return t;
431}
432
433
434/*
435  Local Variables:
436  mode:c++
437  c-file-style:"stroustrup"
438  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . 0))
439  indent-tabs-mode:nil
440  fill-column:99
441  End:
442*/
443// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:encoding=utf-8:textwidth=99 :
Note: See TracBrowser for help on using the repository browser.