| 1 | // =========================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : awt_pro_a_nucs.hxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =========================================================== // |
|---|
| 10 | |
|---|
| 11 | #ifndef AWT_PRO_A_NUCS_HXX |
|---|
| 12 | #define AWT_PRO_A_NUCS_HXX |
|---|
| 13 | |
|---|
| 14 | |
|---|
| 15 | enum AP_BASES { |
|---|
| 16 | AP_A = 1, |
|---|
| 17 | AP_C = 2, |
|---|
| 18 | AP_G = 4, |
|---|
| 19 | AP_T = 8, |
|---|
| 20 | AP_S = 16, // Space (GAP) |
|---|
| 21 | AP_N = 31, |
|---|
| 22 | AP_MAX = 32 |
|---|
| 23 | }; |
|---|
| 24 | |
|---|
| 25 | // typedef enum { |
|---|
| 26 | // AP_UNIVERSAL, AP_MITO, AP_VERTMITO, AP_FLYMITO, AP_YEASTMITO, AP_CILITATE |
|---|
| 27 | // } AP_CODE_TYPE; |
|---|
| 28 | |
|---|
| 29 | struct arb_r2a_pro_2_nucs { |
|---|
| 30 | struct arb_r2a_pro_2_nucs *next; |
|---|
| 31 | char nucbits[3]; // bitsets of nucs |
|---|
| 32 | |
|---|
| 33 | arb_r2a_pro_2_nucs(); |
|---|
| 34 | ~arb_r2a_pro_2_nucs(); |
|---|
| 35 | }; |
|---|
| 36 | |
|---|
| 37 | struct arb_r2a_pro_2_nuc { |
|---|
| 38 | char single_pro; |
|---|
| 39 | char tri_pro[3]; // null terminated (because of index) |
|---|
| 40 | int index; // < 0x007fffff |
|---|
| 41 | |
|---|
| 42 | struct arb_r2a_pro_2_nucs *nucs; |
|---|
| 43 | |
|---|
| 44 | arb_r2a_pro_2_nuc(); |
|---|
| 45 | ~arb_r2a_pro_2_nuc(); |
|---|
| 46 | }; |
|---|
| 47 | |
|---|
| 48 | // distance definition for one protein |
|---|
| 49 | typedef struct { |
|---|
| 50 | long patd[3]; // proteins at dist |
|---|
| 51 | // every bit in patd[x] represents one protein |
|---|
| 52 | // bit in patd[0] is set = > distance == 0 |
|---|
| 53 | // bit in patd[1] is set = > distance <= 1 |
|---|
| 54 | // bit in patd[2] is set = > distance <= 2 |
|---|
| 55 | |
|---|
| 56 | char nucbits[3]; // bitsets of nucs |
|---|
| 57 | |
|---|
| 58 | } AWT_PDP; |
|---|
| 59 | |
|---|
| 60 | class AWT_translator; |
|---|
| 61 | |
|---|
| 62 | class AWT_distance_meter { |
|---|
| 63 | AWT_PDP *dist_[64]; // sets of proteins with special distance [64 > max_aa |
|---|
| 64 | |
|---|
| 65 | long transform07[256]; // like dist.patd[1] but for bits 0-7 |
|---|
| 66 | long transform815[256]; |
|---|
| 67 | long transform1623[256]; |
|---|
| 68 | |
|---|
| 69 | public: |
|---|
| 70 | AWT_distance_meter(const AWT_translator *translator); |
|---|
| 71 | ~AWT_distance_meter(); |
|---|
| 72 | |
|---|
| 73 | const AWT_PDP *getDistance(int idx) const { return dist_[idx]; } |
|---|
| 74 | AWT_PDP *getDistance(int idx) { return dist_[idx]; } |
|---|
| 75 | }; |
|---|
| 76 | |
|---|
| 77 | |
|---|
| 78 | class AWT_translator { |
|---|
| 79 | private: |
|---|
| 80 | mutable AWT_distance_meter *distance_meter; // (mutable to allow lazy-evaluation) |
|---|
| 81 | |
|---|
| 82 | int code_nr; |
|---|
| 83 | GB_HASH *t2i_hash; // hash table trin >> singlepro |
|---|
| 84 | arb_r2a_pro_2_nuc *s2str[256]; // singlecode protein >> dna ... |
|---|
| 85 | long *pro_2_bitset; // |
|---|
| 86 | char *nuc_2_bitset; // dna to |
|---|
| 87 | unsigned char index_2_spro[64]; // 64 > max_aa |
|---|
| 88 | |
|---|
| 89 | int realmax_aa; // number of real AA + stop codon |
|---|
| 90 | int max_aa; // plus ambigous codes |
|---|
| 91 | |
|---|
| 92 | void build_table(unsigned char pbase, const char *tri_pro, const char *nuc); |
|---|
| 93 | long *create_pro_to_bits() const; |
|---|
| 94 | |
|---|
| 95 | public: |
|---|
| 96 | |
|---|
| 97 | AWT_translator(int arb_protein_code_nr); |
|---|
| 98 | ~AWT_translator(); |
|---|
| 99 | |
|---|
| 100 | const AWT_distance_meter *getDistanceMeter() const; |
|---|
| 101 | AWT_distance_meter *getDistanceMeter() { |
|---|
| 102 | return const_cast<AWT_distance_meter*>(const_cast<const AWT_translator*>(this)->getDistanceMeter()); |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | int CodeNr() const { return code_nr; } |
|---|
| 106 | const GB_HASH *T2iHash() const { return t2i_hash; } |
|---|
| 107 | const arb_r2a_pro_2_nuc *S2str(int index) const { return s2str[index]; } |
|---|
| 108 | const arb_r2a_pro_2_nuc * const *S2strArray() const { return s2str; } |
|---|
| 109 | const long * Pro2Bitset() const { return pro_2_bitset; } |
|---|
| 110 | unsigned char Index2Spro(int index) const { return index_2_spro[index]; } |
|---|
| 111 | int MaxAA() const { return max_aa; } |
|---|
| 112 | int RealmaxAA() const { return realmax_aa; } |
|---|
| 113 | }; |
|---|
| 114 | |
|---|
| 115 | #define AWAR_PROTEIN_TYPE "nt/protein_codon_type" |
|---|
| 116 | |
|---|
| 117 | char *AP_create_dna_to_ap_bases(); // create dna 2 nuc_bitset |
|---|
| 118 | |
|---|
| 119 | |
|---|
| 120 | // ------------------------------ |
|---|
| 121 | |
|---|
| 122 | int AWT_default_protein_type(GBDATA *gb_main = 0); // returns protein code selected in AWAR_PROTEIN_TYPE |
|---|
| 123 | |
|---|
| 124 | AWT_translator *AWT_get_translator(int code_nr); // use explicit protein code |
|---|
| 125 | AWT_translator *AWT_get_user_translator(GBDATA *gb_main = 0); // uses user setting for protein code from AWAR_PROTEIN_TYPE |
|---|
| 126 | // AWAR_PROTEIN_TYPE has to exist; the first call of AWT_get_user_translator needs 'gb_main'!=0 |
|---|
| 127 | |
|---|
| 128 | #else |
|---|
| 129 | #error awt_pro_a_nucs.hxx included twice |
|---|
| 130 | #endif // AWT_PRO_A_NUCS_HXX |
|---|