1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : AP_seq_protein.hxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #ifndef AP_SEQ_PROTEIN_HXX |
---|
12 | #define AP_SEQ_PROTEIN_HXX |
---|
13 | |
---|
14 | #ifndef AP_SEQUENCE_HXX |
---|
15 | #include <AP_sequence.hxx> |
---|
16 | #endif |
---|
17 | #ifndef DOWNCAST_H |
---|
18 | #include <downcast.h> |
---|
19 | #endif |
---|
20 | |
---|
21 | enum AP_PROTEINS { |
---|
22 | APP_ILLEGAL = 0, |
---|
23 | |
---|
24 | APP_A = (1 << 0), // Ala |
---|
25 | APP_C = (1 << 1), // Cys |
---|
26 | APP_D = (1 << 2), // Asp |
---|
27 | APP_E = (1 << 3), // Glu |
---|
28 | APP_F = (1 << 4), // Phe |
---|
29 | APP_G = (1 << 5), // Gly |
---|
30 | APP_H = (1 << 6), // His |
---|
31 | APP_I = (1 << 7), // Ile |
---|
32 | APP_K = (1 << 8), // Lys |
---|
33 | APP_L = (1 << 9), // Leu |
---|
34 | APP_M = (1 << 10), // Met |
---|
35 | APP_N = (1 << 11), // Asn |
---|
36 | APP_P = (1 << 12), // Pro |
---|
37 | APP_Q = (1 << 13), // Gln |
---|
38 | APP_R = (1 << 14), // Arg |
---|
39 | APP_S = (1 << 15), // Ser |
---|
40 | APP_T = (1 << 16), // Thr |
---|
41 | APP_V = (1 << 17), // Val |
---|
42 | APP_W = (1 << 18), // Trp |
---|
43 | APP_Y = (1 << 19), // Tyr |
---|
44 | APP_STAR = (1 << 20), // * |
---|
45 | APP_GAP = (1 << 21), // known gap ('-') |
---|
46 | |
---|
47 | // -------------------- above are bit values, below combinations of them |
---|
48 | |
---|
49 | APP_X = (APP_GAP-1), // Xaa (any real codon) |
---|
50 | APP_DOT = APP_X | APP_GAP, // maybe a codon, maybe a gap |
---|
51 | |
---|
52 | APP_B = APP_D | APP_N, // Asx ( = Asp | Asn ) |
---|
53 | APP_Z = APP_E | APP_Q, // Glx ( = Glu | Gln ) |
---|
54 | APP_J = APP_I | APP_L, // Xle ( = Ile | Leu ) |
---|
55 | |
---|
56 | APP_MAX = (APP_GAP<<1)-1, |
---|
57 | }; |
---|
58 | |
---|
59 | class AP_sequence_protein FINAL_TYPE : public AP_combinableSeq { // derived from a Noncopyable |
---|
60 | AP_PROTEINS *seq_prot; |
---|
61 | AP_PROTEINS *mut1; // combination of sequences reachable with up to 1 nucleotide mutation per codon |
---|
62 | AP_PROTEINS *mut2; // combination of sequences reachable with up to 2 nucleotide mutations per codons |
---|
63 | // Note: ANY protein or a gap is reachable with up to 3 mutations per codon |
---|
64 | |
---|
65 | AP_FLOAT count_weighted_bases() const OVERRIDE; |
---|
66 | void set(const char *isequence) OVERRIDE; |
---|
67 | void unset() OVERRIDE; |
---|
68 | |
---|
69 | const AP_PROTEINS *get_mut1() const { lazy_load_sequence(); ap_assert(mut1); return mut1; } |
---|
70 | const AP_PROTEINS *get_mut2() const { lazy_load_sequence(); ap_assert(mut2); return mut2; } |
---|
71 | |
---|
72 | public: |
---|
73 | AP_sequence_protein(const AliView *aliview); |
---|
74 | ~AP_sequence_protein() OVERRIDE; |
---|
75 | |
---|
76 | const AP_PROTEINS *get_sequence() const { lazy_load_sequence(); ap_assert(seq_prot); return seq_prot; } |
---|
77 | |
---|
78 | AP_combinableSeq *dup() const OVERRIDE; // used to get the real new element |
---|
79 | Mutations combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights, char *mutation_per_site = NULp) OVERRIDE; |
---|
80 | Mutations mutations_if_combined_with(const AP_combinableSeq *other) OVERRIDE; |
---|
81 | void partial_match(const AP_combinableSeq *part, long *overlap, long *penalty) const OVERRIDE; |
---|
82 | uint32_t checksum() const OVERRIDE; |
---|
83 | int cmp_combined(const AP_combinableSeq *other) const OVERRIDE; |
---|
84 | }; |
---|
85 | |
---|
86 | |
---|
87 | |
---|
88 | #else |
---|
89 | #error AP_seq_protein.hxx included twice |
---|
90 | #endif // AP_SEQ_PROTEIN_HXX |
---|