| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : AP_seq_protein.hxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #ifndef AP_SEQ_PROTEIN_HXX |
|---|
| 12 | #define AP_SEQ_PROTEIN_HXX |
|---|
| 13 | |
|---|
| 14 | #ifndef AP_SEQUENCE_HXX |
|---|
| 15 | #include <AP_sequence.hxx> |
|---|
| 16 | #endif |
|---|
| 17 | #ifndef DOWNCAST_H |
|---|
| 18 | #include <downcast.h> |
|---|
| 19 | #endif |
|---|
| 20 | |
|---|
| 21 | enum AP_PROTEINS { |
|---|
| 22 | APP_ILLEGAL = 0, |
|---|
| 23 | |
|---|
| 24 | APP_A = (1 << 0), // Ala |
|---|
| 25 | APP_C = (1 << 1), // Cys |
|---|
| 26 | APP_D = (1 << 2), // Asp |
|---|
| 27 | APP_E = (1 << 3), // Glu |
|---|
| 28 | APP_F = (1 << 4), // Phe |
|---|
| 29 | APP_G = (1 << 5), // Gly |
|---|
| 30 | APP_H = (1 << 6), // His |
|---|
| 31 | APP_I = (1 << 7), // Ile |
|---|
| 32 | APP_K = (1 << 8), // Lys |
|---|
| 33 | APP_L = (1 << 9), // Leu |
|---|
| 34 | APP_M = (1 << 10), // Met |
|---|
| 35 | APP_N = (1 << 11), // Asn |
|---|
| 36 | APP_P = (1 << 12), // Pro |
|---|
| 37 | APP_Q = (1 << 13), // Gln |
|---|
| 38 | APP_R = (1 << 14), // Arg |
|---|
| 39 | APP_S = (1 << 15), // Ser |
|---|
| 40 | APP_T = (1 << 16), // Thr |
|---|
| 41 | APP_V = (1 << 17), // Val |
|---|
| 42 | APP_W = (1 << 18), // Trp |
|---|
| 43 | APP_Y = (1 << 19), // Tyr |
|---|
| 44 | APP_STAR = (1 << 20), // * |
|---|
| 45 | APP_GAP = (1 << 21), // known gap ('-') |
|---|
| 46 | |
|---|
| 47 | // -------------------- above are bit values, below combinations of them |
|---|
| 48 | |
|---|
| 49 | APP_X = (APP_GAP-1), // Xaa (any real codon) |
|---|
| 50 | APP_DOT = APP_X | APP_GAP, // maybe a codon, maybe a gap |
|---|
| 51 | |
|---|
| 52 | APP_B = APP_D | APP_N, // Asx ( = Asp | Asn ) |
|---|
| 53 | APP_Z = APP_E | APP_Q, // Glx ( = Glu | Gln ) |
|---|
| 54 | APP_J = APP_I | APP_L, // Xle ( = Ile | Leu ) |
|---|
| 55 | |
|---|
| 56 | APP_MAX = (APP_GAP<<1)-1, |
|---|
| 57 | }; |
|---|
| 58 | |
|---|
| 59 | class AP_sequence_protein FINAL_TYPE : public AP_combinableSeq { // derived from a Noncopyable |
|---|
| 60 | AP_PROTEINS *seq_prot; |
|---|
| 61 | AP_PROTEINS *mut1; // combination of sequences reachable with up to 1 nucleotide mutation per codon |
|---|
| 62 | AP_PROTEINS *mut2; // combination of sequences reachable with up to 2 nucleotide mutations per codons |
|---|
| 63 | // Note: ANY protein or a gap is reachable with up to 3 mutations per codon |
|---|
| 64 | |
|---|
| 65 | AP_FLOAT count_weighted_bases() const OVERRIDE; |
|---|
| 66 | void set(const char *isequence) OVERRIDE; |
|---|
| 67 | void unset() OVERRIDE; |
|---|
| 68 | |
|---|
| 69 | const AP_PROTEINS *get_mut1() const { lazy_load_sequence(); ap_assert(mut1); return mut1; } |
|---|
| 70 | const AP_PROTEINS *get_mut2() const { lazy_load_sequence(); ap_assert(mut2); return mut2; } |
|---|
| 71 | |
|---|
| 72 | public: |
|---|
| 73 | AP_sequence_protein(const AliView *aliview); |
|---|
| 74 | ~AP_sequence_protein() OVERRIDE; |
|---|
| 75 | |
|---|
| 76 | const AP_PROTEINS *get_sequence() const { lazy_load_sequence(); ap_assert(seq_prot); return seq_prot; } |
|---|
| 77 | |
|---|
| 78 | AP_combinableSeq *dup() const OVERRIDE; // used to get the real new element |
|---|
| 79 | Mutations combine_seq(const AP_combinableSeq *lefts, const AP_combinableSeq *rights, char *mutation_per_site = NULp) OVERRIDE; |
|---|
| 80 | Mutations mutations_if_combined_with(const AP_combinableSeq *other) OVERRIDE; |
|---|
| 81 | void partial_match(const AP_combinableSeq *part, long *overlap, long *penalty) const OVERRIDE; |
|---|
| 82 | uint32_t checksum() const OVERRIDE; |
|---|
| 83 | int cmp_combined(const AP_combinableSeq *other) const OVERRIDE; |
|---|
| 84 | }; |
|---|
| 85 | |
|---|
| 86 | |
|---|
| 87 | |
|---|
| 88 | #else |
|---|
| 89 | #error AP_seq_protein.hxx included twice |
|---|
| 90 | #endif // AP_SEQ_PROTEIN_HXX |
|---|