source: trunk/GDE/SINA/builddir/src/cseq_comparator.h

Last change on this file was 19170, checked in by westram, 2 years ago
  • sina source
    • unpack + remove tarball
    • no longer ignore sina builddir.
File size: 5.5 KB
Line 
1/*
2Copyright (c) 2006-2018 Elmar Pruesse <elmar.pruesse@ucdenver.edu>
3
4This file is part of SINA.
5SINA is free software: you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation, either version 3 of the License, or (at your
8option) any later version.
9
10SINA is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13for more details.
14
15You should have received a copy of the GNU General Public License
16along with SINA.  If not, see <http://www.gnu.org/licenses/>.
17
18Additional permission under GNU GPL version 3 section 7
19
20If you modify SINA, or any covered work, by linking or combining it
21with components of ARB (or a modified version of that software),
22containing parts covered by the terms of the
23ARB-public-library-license, the licensors of SINA grant you additional
24permission to convey the resulting work. Corresponding Source for a
25non-source form of such a combination shall include the source code
26for the parts of ARB used as well as that of the covered work.
27*/
28
29#ifndef _CSEQ_COMPARATOR_H_
30#define _CSEQ_COMPARATOR_H_
31
32#include "cseq.h"
33#include <boost/program_options.hpp>
34
35namespace sina {
36
37/**
38 * Type defining how IUPAC encoded bases should
39 * be compared.
40 * OPTIMISTIC: A=N
41 * PESSIMISTIC: A!=N
42 */
43enum CMP_IUPAC_TYPE {
44    CMP_IUPAC_OPTIMISTIC,
45    CMP_IUPAC_PESSIMISTIC,
46    CMP_IUPAC_EXACT
47};
48
49/**
50 * Type defining which distance correction should
51 * be applied.
52 * NONE: do not correct
53 * JC: use Jukes Cantor
54 */
55enum CMP_DIST_TYPE {
56    CMP_DIST_NONE,
57    CMP_DIST_JC
58};
59
60/**
61 * Type defining relative to what the distance/identity/similarity
62 * should be computet.
63 * ABS: absolute (relative to 1)
64 * QUERY: relative to query length
65 * TARGET: relative to reference sequence length
66 * OVERLAP: relative to length of overlapping part
67 * ALL: relative to total alignment positions
68 *      e.g.:  s1:------------AGCUAGCU-----
69 *             s2:----------------AGCUAGCU-
70 *             positions:     ^^^^^^^^^^^^ = 12
71 *             equal:             ^^^^     = 4
72 *             distance:          4/12     = 0.33333...
73 * AVERAGE: relative to average of query and target length
74 * MIN, MAX: relative to length of shorter/longer of query/target
75 * NOGAP: relative to match/mismatch columns
76 */
77enum CMP_COVER_TYPE {
78    CMP_COVER_ABS,
79    CMP_COVER_QUERY,
80    CMP_COVER_TARGET,
81    CMP_COVER_OVERLAP,
82    CMP_COVER_ALL,
83    CMP_COVER_AVERAGE,
84    CMP_COVER_MIN,
85    CMP_COVER_MAX,
86    CMP_COVER_NOGAP
87};
88
89/**
90 * validator interpreting CLI encoding of CMP_IUPAC_TYPE
91 * for boost_program_options
92 */
93void 
94validate(boost::any& v, const std::vector<std::string>& values,
95         CMP_IUPAC_TYPE* /*unused*/, int /*unused*/);
96
97/**
98 * validator interpreting CLI encoding of CMP_DIST_TYPE
99 * for boost_program_options
100 */
101void 
102validate(boost::any& v, const std::vector<std::string>& values,
103         CMP_DIST_TYPE* /*unused*/, int /*unused*/);
104
105/**
106 * validator interpreting CLI encoding of CMP_COVER_TYPE
107 * for boost_program_options
108 */
109void 
110validate(boost::any& v, const std::vector<std::string>& values,
111         CMP_COVER_TYPE* /*unused*/, int /*unused*/);
112
113
114class cseq_comparator {
115public:
116    /**
117     * Builds options_description to pass class parameters
118     * to CLI interface.
119     * @param prefix   prefix for option names
120     */
121    static boost::program_options::options_description
122    get_options_description(const char* prefix="");
123   
124    /**
125     * Factory method building a comparator from the variables
126     * map retrieved via boost program_options.
127     * @param vm       variables map containing parsed cmdline
128     * @param prefix   prefix for option names
129     */
130    static cseq_comparator
131    make_from_variables_map(boost::program_options::variables_map& vm,
132                            const char* prefix="");
133   
134    /**
135     * Constructor taking explicit configuration.
136     * See type definition for an explanation of the values.
137     *
138     * @param iupac
139     * @param dist
140     * @param cover
141     * @param filter_lc if true lower case bases at the beginning
142     *                  or end are ignored.
143     */
144    cseq_comparator(CMP_IUPAC_TYPE iupac, CMP_DIST_TYPE dist, 
145                    CMP_COVER_TYPE cover, bool filter_lc);
146   
147    /**
148     * Default constructor
149     */
150    cseq_comparator();
151   
152    /**
153     * Operator provided by the comparator object.
154     * @param query  query sequence
155     * @param target target sequence
156     */
157    float 
158    operator()(const cseq& query, const cseq& target);
159   
160private:
161    CMP_IUPAC_TYPE iupac_rule{CMP_IUPAC_OPTIMISTIC};
162    CMP_DIST_TYPE dist_rule{CMP_DIST_NONE};
163    CMP_COVER_TYPE cover_rule{CMP_COVER_QUERY};
164    bool filter_lc_rule{false};
165};
166
167/**
168 * ostream output operator for CMP_IUPAC_TYPE
169 */
170std::ostream& 
171operator<<(std::ostream& out, const CMP_IUPAC_TYPE& t);
172
173/**
174 * ostream output operator for CMP_IUPAC_TYPE
175 */
176std::ostream& 
177operator<<(std::ostream& out, const CMP_DIST_TYPE& t);
178
179/**
180 * ostream output operator for CMP_IUPAC_TYPE
181 */
182std::ostream& 
183operator<<(std::ostream& out, const CMP_COVER_TYPE& t);
184
185} // namespace sina
186
187
188#endif // _CSEQ_COMPARATOR_H_
189
190/*
191  Local Variables:
192  mode:c++
193  c-file-style:"stroustrup"
194  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . 0))
195  indent-tabs-mode:nil
196  fill-column:99
197  End:
198*/
199// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:encoding=utf-8:textwidth=99 :
Note: See TracBrowser for help on using the repository browser.