1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : AP_pro_a_nucs.hxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #ifndef AP_PRO_A_NUCS_HXX |
---|
12 | #define AP_PRO_A_NUCS_HXX |
---|
13 | |
---|
14 | #ifndef ARBDB_BASE_H |
---|
15 | #include <arbdb_base.h> |
---|
16 | #endif |
---|
17 | #ifndef ARBTOOLS_H |
---|
18 | #include <arbtools.h> |
---|
19 | #endif |
---|
20 | |
---|
21 | |
---|
22 | enum AP_BASES { |
---|
23 | AP_A = 1, |
---|
24 | AP_C = 2, |
---|
25 | AP_G = 4, |
---|
26 | AP_T = 8, |
---|
27 | AP_S = 16, // Space (GAP) |
---|
28 | AP_N = 31, |
---|
29 | AP_MAX = 32 |
---|
30 | }; |
---|
31 | |
---|
32 | struct arb_r2a_pro_2_nucs : virtual Noncopyable { |
---|
33 | struct arb_r2a_pro_2_nucs *next; |
---|
34 | char nucbits[3]; // bitsets of nucs |
---|
35 | |
---|
36 | arb_r2a_pro_2_nucs(); |
---|
37 | ~arb_r2a_pro_2_nucs(); |
---|
38 | }; |
---|
39 | |
---|
40 | struct arb_r2a_pro_2_nuc : virtual Noncopyable { |
---|
41 | char single_pro; |
---|
42 | char tri_pro[3]; // null terminated (because of index) |
---|
43 | int index; // < 0x007fffff |
---|
44 | |
---|
45 | struct arb_r2a_pro_2_nucs *nucs; |
---|
46 | |
---|
47 | arb_r2a_pro_2_nuc(); |
---|
48 | ~arb_r2a_pro_2_nuc(); |
---|
49 | }; |
---|
50 | |
---|
51 | struct AWT_PDP { // distance definition for one protein |
---|
52 | long patd[3]; // proteins at dist |
---|
53 | // every bit in patd[x] represents one protein |
---|
54 | // bit in patd[0] is set = > distance == 0 |
---|
55 | // bit in patd[1] is set = > distance <= 1 |
---|
56 | // bit in patd[2] is set = > distance <= 2 |
---|
57 | |
---|
58 | char nucbits[3]; // bitsets of nucs |
---|
59 | }; |
---|
60 | |
---|
61 | class AWT_translator; |
---|
62 | |
---|
63 | class AWT_distance_meter : virtual Noncopyable { |
---|
64 | AWT_PDP *dist_[64]; // sets of proteins with special distance [64 > max_aa |
---|
65 | |
---|
66 | long transform07[256]; // like dist.patd[1] but for bits 0-7 |
---|
67 | long transform815[256]; |
---|
68 | long transform1623[256]; |
---|
69 | |
---|
70 | public: |
---|
71 | AWT_distance_meter(const AWT_translator *translator); |
---|
72 | ~AWT_distance_meter(); |
---|
73 | |
---|
74 | const AWT_PDP *getDistance(int idx) const { return dist_[idx]; } |
---|
75 | AWT_PDP *getDistance(int idx) { return dist_[idx]; } |
---|
76 | }; |
---|
77 | |
---|
78 | |
---|
79 | class AWT_translator : virtual Noncopyable { |
---|
80 | private: |
---|
81 | mutable AWT_distance_meter *distance_meter; // (mutable to allow lazy-evaluation) |
---|
82 | |
---|
83 | int code_nr; |
---|
84 | GB_HASH *t2i_hash; // hash table trin >> singlepro |
---|
85 | arb_r2a_pro_2_nuc *s2str[256]; // singlecode protein >> dna ... |
---|
86 | long *pro_2_bitset; // |
---|
87 | char *nuc_2_bitset; // dna to |
---|
88 | unsigned char index_2_spro[64]; // 64 > max_aa |
---|
89 | |
---|
90 | int realmax_aa; // number of real AA + stop codon |
---|
91 | int max_aa; // plus ambiguous codes |
---|
92 | |
---|
93 | void build_table(unsigned char pbase, const char *tri_pro, const char *nuc); |
---|
94 | long *create_pro_to_bits() const; |
---|
95 | |
---|
96 | public: |
---|
97 | |
---|
98 | AWT_translator(int arb_protein_code_nr); |
---|
99 | ~AWT_translator(); |
---|
100 | |
---|
101 | const AWT_distance_meter *getDistanceMeter() const; |
---|
102 | AWT_distance_meter *getDistanceMeter() { |
---|
103 | return const_cast<AWT_distance_meter*>(const_cast<const AWT_translator*>(this)->getDistanceMeter()); |
---|
104 | } |
---|
105 | |
---|
106 | int CodeNr() const { return code_nr; } |
---|
107 | const GB_HASH *T2iHash() const { return t2i_hash; } |
---|
108 | const arb_r2a_pro_2_nuc *S2str(int index) const { return s2str[index]; } |
---|
109 | const arb_r2a_pro_2_nuc * const *S2strArray() const { return s2str; } |
---|
110 | const long * Pro2Bitset() const { return pro_2_bitset; } |
---|
111 | unsigned char Index2Spro(int index) const { return index_2_spro[index]; } |
---|
112 | int MaxAA() const { return max_aa; } |
---|
113 | int RealmaxAA() const { return realmax_aa; } |
---|
114 | }; |
---|
115 | |
---|
116 | #define AWAR_PROTEIN_TYPE "nt/protein_codon_type" |
---|
117 | |
---|
118 | char *AP_create_dna_to_ap_bases(); // create dna 2 nuc_bitset |
---|
119 | |
---|
120 | // ------------------------------ |
---|
121 | |
---|
122 | int AWT_default_protein_type(GBDATA *gb_main = 0); // returns protein code selected in AWAR_PROTEIN_TYPE |
---|
123 | |
---|
124 | AWT_translator *AWT_get_translator(int code_nr); // use explicit protein code |
---|
125 | AWT_translator *AWT_get_user_translator(GBDATA *gb_main = 0); // uses user setting for protein code from AWAR_PROTEIN_TYPE |
---|
126 | // AWAR_PROTEIN_TYPE has to exist; the first call of AWT_get_user_translator needs 'gb_main'!=0 |
---|
127 | |
---|
128 | #else |
---|
129 | #error AP_pro_a_nucs.hxx included twice |
---|
130 | #endif // AP_PRO_A_NUCS_HXX |
---|