source: trunk/GDE/SINA/builddir/src/cseq.h

Last change on this file was 19170, checked in by westram, 2 years ago
  • sina source
    • unpack + remove tarball
    • no longer ignore sina builddir.
File size: 8.8 KB
Line 
1/*
2Copyright (c) 2006-2018 Elmar Pruesse <elmar.pruesse@ucdenver.edu>
3
4This file is part of SINA.
5SINA is free software: you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation, either version 3 of the License, or (at your
8option) any later version.
9
10SINA is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13for more details.
14
15You should have received a copy of the GNU General Public License
16along with SINA.  If not, see <http://www.gnu.org/licenses/>.
17
18Additional permission under GNU GPL version 3 section 7
19
20If you modify SINA, or any covered work, by linking or combining it
21with components of ARB (or a modified version of that software),
22containing parts covered by the terms of the
23ARB-public-library-license, the licensors of SINA grant you additional
24permission to convey the resulting work. Corresponding Source for a
25non-source form of such a combination shall include the source code
26for the parts of ARB used as well as that of the covered work.
27*/
28
29#ifndef _CSEQ_H_
30#define _CSEQ_H_
31
32#include <string>
33#include <list>
34#include <utility>
35#include <vector>
36#include <map>
37#include <sstream>
38#include <iostream>
39#include <algorithm>
40#include "aligned_base.h"
41
42#include <boost/variant.hpp>
43#include <boost/lexical_cast.hpp>
44#include <boost/tuple/tuple.hpp>
45
46namespace sina {
47
48/* compressed sequence. instead of gapping the sequence, all characters
49   have positions */
50class cseq_base {
51public:
52    using idx_type = unsigned int;
53    using vidx_type = aligned_base::idx_type;
54    using value_type = aligned_base;
55    class iterator;
56    class const_iterator;
57    class const_reverse_iterator;
58    using pn_iterator = iterator;
59    using const_pn_iterator = const_iterator;
60
61    // Constructors / assignment operator
62
63    cseq_base(const char* _name, const char* _data = nullptr);
64    cseq_base() = default;
65    cseq_base& operator=(const cseq_base& rhs) = default;
66    cseq_base(const cseq_base& orig) = default;
67
68    /* remove sequence */
69    void clearSequence();
70
71    /* append bases to alignment */
72    cseq_base& append(const char* str);
73    cseq_base& append(const std::string& str);
74    cseq_base& append(const aligned_base& ab);
75
76    /* get size in bases */
77    vidx_type size() const { return bases.size(); }
78
79    /* get aligned base vector */
80    const std::vector<aligned_base>& getAlignedBases() const { return bases; }
81
82    /* get size in columns */
83    vidx_type getWidth() const { return alignment_width; }
84
85    /* set number of columns
86     *  - shifts bases inwards from the right as needed
87     *  - throws if newWidth smaller than size()
88     */
89    void setWidth(vidx_type newWidth);
90
91    /* handle insertions (multiple bases with identical position)
92     * created during alignment */
93    void fix_duplicate_positions(std::ostream& /*log*/, bool lowercase, bool remove);
94
95    /* does nothing, cseq_base is always sorted by positions */
96    static void sort() {}
97
98    /* reverse the sequence */
99    void reverse();
100
101    /* complement all bases */
102    void complement();
103
104    /* convert all bases to upper case */
105    void upperCaseAll();
106
107    // eval methods
108
109    void setAlignedBases(const std::vector<aligned_base>& vab) { bases = vab; }
110    std::string getAligned(bool nodots=false, bool dna=false) const;
111    std::string getBases() const;
112
113    /* get sequence name */
114    std::string getName() const { return name; }
115
116    /* set sequence name */
117    void setName(std::string n) { name=std::move(n); }
118
119    void assignFromCompressed(const void *data, size_t len);
120    void compressAligned(std::vector<unsigned char> &out);
121
122    //Fixme how is the score calculated?
123    //      what should the content of pairs be?
124    float calcPairScore(const std::vector<int>& pairs);
125
126    iterator begin();
127    const_iterator begin() const;
128    const_reverse_iterator rbegin() const;
129    iterator end();
130    const_iterator end() const;
131    const_reverse_iterator rend() const;
132
133    iterator getIterator(cseq_base::vidx_type i);
134    const_iterator getIterator(cseq_base::vidx_type i) const;
135
136    char operator [](vidx_type i) const;
137    const aligned_base& getById(idx_type i) const { return bases[i]; } 
138
139
140    // io operations dealing with vector<*cseq_base>
141    static void write_alignment(std::ostream& ofs, std::vector<const cseq_base*>& seqs,
142                                cseq_base::idx_type from_pos, cseq_base::idx_type to_pos,
143                                bool colors = false);
144
145
146
147    friend std::ostream& operator<<(std::ostream& out, const cseq_base& c);
148
149    bool operator==(const cseq_base& rhs) const {
150        return name == rhs.name && bases == rhs.bases; // &&
151        //attributes == rhs.attributes;
152           // && score-rhs.score<0.0001;
153    }
154    bool operator!=(const cseq_base& rhs) const {
155        return name != rhs.name || bases != rhs.bases; // ||
156        //attributes != rhs.attributes;
157            // || score != rhs.score;
158    }
159    bool operator<(const cseq_base& rhs) const { return name < rhs.name; }
160    bool operator>(const cseq_base& rhs) const { return name > rhs.name; }
161    std::vector<std::pair<unsigned int, unsigned int>> find_differing_parts(const cseq_base& right) const;
162
163private:
164    std::string name;
165    std::vector<aligned_base> bases;
166    unsigned int alignment_width{0};
167
168    template <typename FUNC>
169    friend void traverse(const cseq_base& A, const cseq_base& B, FUNC F);
170};
171
172
173std::ostream& operator<<(std::ostream& out, const cseq_base& c);
174
175
176class cseq_base::iterator
177    : public std::vector<aligned_base>::iterator
178{
179public:
180    iterator(std::vector<aligned_base>::iterator it)
181        : std::vector<aligned_base>::iterator(it) {}
182    iterator() = default;
183
184    using pn_iterator = iterator;
185    iterator prev_begin() const { iterator n(*this); return --n; }
186    iterator prev_end() const { return (*this); }
187    iterator next_begin() const { iterator n(*this); return ++n; }
188    iterator next_end() const { iterator n(*this); return n+2; }
189protected:
190
191private:
192
193};
194
195class cseq_base::const_iterator
196    : public std::vector<aligned_base>::const_iterator
197{
198 public:
199    const_iterator(std::vector<aligned_base>::const_iterator it)
200        : std::vector<aligned_base>::const_iterator(it) {}
201    const_iterator() = default;
202
203    using const_pn_iterator = const_iterator;
204    const_iterator prev_begin() const {
205        const_iterator n(*this); return --n; }
206    const_iterator prev_end() const { return (*this); }
207    const_iterator next_begin() const {
208        const_iterator n(*this); return ++n; }
209    const_iterator next_end() const {
210        const_iterator n(*this); return n+2; }
211 protected:
212
213 private:
214
215};
216
217class cseq_base::const_reverse_iterator
218    : public std::vector<aligned_base>::const_reverse_iterator
219{
220 public:
221    const_reverse_iterator(const std::vector<aligned_base>::const_reverse_iterator& it)
222      : std::vector<aligned_base>::const_reverse_iterator(it) {}
223    const_reverse_iterator() = default;
224
225 protected:
226
227 private:
228
229};
230
231template<typename T> class lexical_cast_visitor;
232
233class annotated_cseq : public cseq_base {
234public:
235    using variant = boost::variant<std::string, char, int, float>;
236
237    annotated_cseq(const char* _name, const char* _data = nullptr)
238        : cseq_base(_name, _data) {}
239    annotated_cseq() : cseq_base() {}
240
241    template<typename T>
242    void set_attr(const std::string& key, T val) {
243        attributes[key] = val;
244    }
245
246    bool has_attr(const std::string& key) const {
247        return attributes.find(key) != attributes.end();
248    }
249
250    template<typename T>
251    T get_attr(const std::string& attr) const {
252        const auto it = attributes.find(attr);
253        if (it != attributes.end()) {
254            return boost::apply_visitor(lexical_cast_visitor<T>(), it->second);
255        } else {
256            return T();
257        }
258    }
259    template<typename T>
260    T get_attr(const std::string& attr, T value) const {
261        const auto it = attributes.find(attr);
262        if (it != attributes.end()) {
263            return boost::apply_visitor(lexical_cast_visitor<T>(), it->second);
264        } else {
265            return value;
266        }
267    }
268
269    const std::map<std::string, variant>& get_attrs() const { return attributes; }
270private:
271    std::map<std::string,variant> attributes;
272};
273
274/* specialization returning variant directly */
275template<> inline annotated_cseq::variant
276annotated_cseq::get_attr<annotated_cseq::variant>(const std::string& attr) const {
277    return attributes.at(attr);
278}
279
280
281typedef annotated_cseq cseq;
282
283} // namespace sina
284
285#include "cseq_impl.h"
286
287#endif
288
289/*
290  Local Variables:
291  mode:c++
292  c-file-style:"stroustrup"
293  c-file-offsets:((innamespace . 0)(inline-open . 0)(case-label . +))
294  indent-tabs-mode:nil
295  fill-column:99
296  End:
297*/
298// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=8:softtabstop=4:encoding=utf-8:textwidth=99 :
299
Note: See TracBrowser for help on using the repository browser.