1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : AWT_translate.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in June 2006 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // =============================================================== // |
---|
11 | |
---|
12 | #include "Translate.hxx" |
---|
13 | |
---|
14 | #include <AP_pro_a_nucs.hxx> |
---|
15 | #include <AP_codon_table.hxx> |
---|
16 | #include <arbdbt.h> |
---|
17 | |
---|
18 | #define tl_assert(cond) arb_assert(cond) |
---|
19 | |
---|
20 | GB_ERROR AWT_saveTranslationInfo(GBDATA *gb_species, int arb_transl_table, int codon_start) { |
---|
21 | int embl_transl_table = AWT_arb_code_nr_2_embl_transl_table(arb_transl_table); |
---|
22 | |
---|
23 | tl_assert(codon_start >= 0 && codon_start<3); // codon_start has to be 0..2 |
---|
24 | tl_assert(embl_transl_table >= 0); |
---|
25 | |
---|
26 | GB_ERROR error = GBT_write_string(gb_species, "transl_table", GBS_global_string("%i", embl_transl_table)); |
---|
27 | if (!error) error = GBT_write_string(gb_species, "codon_start", GBS_global_string("%i", codon_start+1)); |
---|
28 | |
---|
29 | return error; |
---|
30 | } |
---|
31 | |
---|
32 | GB_ERROR AWT_removeTranslationInfo(GBDATA *gb_species) { |
---|
33 | GB_ERROR error = NULL; |
---|
34 | |
---|
35 | GBDATA *gb_transl_table = GB_entry(gb_species, "transl_table"); |
---|
36 | if (gb_transl_table) error = GB_delete(gb_transl_table); |
---|
37 | |
---|
38 | if (!error) { |
---|
39 | GBDATA *gb_codon_start = GB_entry(gb_species, "codon_start"); |
---|
40 | if (gb_codon_start) error = GB_delete(gb_codon_start); |
---|
41 | } |
---|
42 | |
---|
43 | return error; |
---|
44 | } |
---|
45 | |
---|
46 | GB_ERROR AWT_getTranslationInfo(GBDATA *gb_item, int& arb_transl_table, int& codon_start) { |
---|
47 | // looks for sub-entries 'transl_table' and 'codon_start' of species (works for genes as well) |
---|
48 | // if found -> test for validity and translate 'transl_table' from EMBL to ARB table number |
---|
49 | // |
---|
50 | // returns: an error in case of problems |
---|
51 | // |
---|
52 | // 'arb_transl_table' is set to -1 if not found, otherwise it contains the arb table number |
---|
53 | // 'codon_start' is set to -1 if not found, otherwise it contains the codon_start (0..2) |
---|
54 | |
---|
55 | arb_transl_table = -1; // not found yet |
---|
56 | codon_start = -1; // not found yet |
---|
57 | |
---|
58 | GB_ERROR error = 0; |
---|
59 | GBDATA *gb_transl_table = GB_entry(gb_item, "transl_table"); |
---|
60 | |
---|
61 | if (gb_transl_table) { |
---|
62 | int embl_table = atoi(GB_read_char_pntr(gb_transl_table)); |
---|
63 | arb_transl_table = AWT_embl_transl_table_2_arb_code_nr(embl_table); |
---|
64 | if (arb_transl_table == -1) { // ill. table |
---|
65 | error = GBS_global_string("Illegal (or unsupported) value (%i) in 'transl_table'", embl_table); |
---|
66 | } |
---|
67 | } |
---|
68 | |
---|
69 | if (!error) { |
---|
70 | GBDATA *gb_codon_start = GB_entry(gb_item, "codon_start"); |
---|
71 | if (gb_codon_start) { |
---|
72 | int codon_start_value = atoi(GB_read_char_pntr(gb_codon_start)); |
---|
73 | |
---|
74 | if (codon_start_value<1 || codon_start_value>3) { |
---|
75 | error = GBS_global_string("Illegal value (%i) in 'codon_start' (allowed: 1..3)", codon_start_value); |
---|
76 | } |
---|
77 | else { |
---|
78 | codon_start = codon_start_value-1; // internal value is 0..2 |
---|
79 | } |
---|
80 | } |
---|
81 | else if (arb_transl_table != -1) { |
---|
82 | // default to codon_start 1 |
---|
83 | error = GBT_write_string(gb_item, "codon_start", "1"); |
---|
84 | if (!error) codon_start = 0; // internal value is 0..2 |
---|
85 | } |
---|
86 | } |
---|
87 | |
---|
88 | if (!error && arb_transl_table != codon_start) { |
---|
89 | if (arb_transl_table == -1) error = "Found 'codon_start', but 'transl_table' is missing"; |
---|
90 | else if (codon_start == -1) error = "Found 'transl_table', but 'codon_start' is missing"; |
---|
91 | } |
---|
92 | |
---|
93 | if (error) { // append species name to error message |
---|
94 | error = GBS_global_string("%s (item='%s')", error, GBT_read_name(gb_item)); |
---|
95 | } |
---|
96 | |
---|
97 | return error; |
---|
98 | } |
---|
99 | |
---|
100 | inline void memcpy3(char *dest, const char *source) { |
---|
101 | dest[0] = source[0]; |
---|
102 | dest[1] = source[1]; |
---|
103 | dest[2] = source[2]; |
---|
104 | } |
---|
105 | |
---|
106 | int AWT_pro_a_nucs_convert(int arb_code_nr, char *data, size_t size, size_t pos, bool translate_all, bool create_start_codon, bool append_stop_codon, int *translatedSize) { |
---|
107 | // if translate_all == true -> 'pos' > 1 produces a leading 'X' in protein data |
---|
108 | // (otherwise nucleotides in front of the starting pos are simply ignored) |
---|
109 | // |
---|
110 | // if 'create_start_codon' is true and the first generated codon is a start codon of the used |
---|
111 | // code, a 'M' is inserted instead of the codon |
---|
112 | // if 'append_stop_codon' is true, the stop codon is appended as '*'. This is only done, if the last |
---|
113 | // character not already is a stop codon. (Note: provide data with correct size) |
---|
114 | // |
---|
115 | // returns: |
---|
116 | // - the translated protein sequence in 'data' |
---|
117 | // - the length of the translated protein sequence in 'translatedSize' (if != 0) |
---|
118 | // - number of stop-codons in translated sequence as result |
---|
119 | |
---|
120 | arb_assert(pos <= 2); |
---|
121 | |
---|
122 | for (char *p = data; *p; p++) { |
---|
123 | char c = *p; |
---|
124 | if ((c>='a') && (c<='z')) c = c+'A'-'a'; |
---|
125 | if (c=='U') c = 'T'; |
---|
126 | *p = c; |
---|
127 | } |
---|
128 | |
---|
129 | char buffer[4]; |
---|
130 | buffer[3] = 0; |
---|
131 | |
---|
132 | char *dest = data; |
---|
133 | |
---|
134 | if (pos && translate_all) { |
---|
135 | for (char *p = data; p<data+pos; ++p) { |
---|
136 | char c = *p; |
---|
137 | if (c!='.' && c!='-') { // found a nucleotide |
---|
138 | *dest++ = 'X'; |
---|
139 | break; |
---|
140 | } |
---|
141 | } |
---|
142 | } |
---|
143 | |
---|
144 | int stops = 0; |
---|
145 | size_t i = pos; |
---|
146 | char startCodon = 0; |
---|
147 | const GB_HASH *t2i_hash = AWT_get_translator(arb_code_nr)->T2iHash(); |
---|
148 | |
---|
149 | if (create_start_codon) { |
---|
150 | memcpy3(buffer, data+pos); |
---|
151 | startCodon = AWT_is_start_codon(buffer, arb_code_nr); |
---|
152 | } |
---|
153 | |
---|
154 | for (char *p = data+pos; i+2<size; p+=3, i+=3) { |
---|
155 | memcpy3(buffer, p); |
---|
156 | int spro = (int)GBS_read_hash(t2i_hash, buffer); |
---|
157 | int C; |
---|
158 | if (!spro) { |
---|
159 | C = 'X'; |
---|
160 | } |
---|
161 | else { |
---|
162 | if (spro == '*') stops++; |
---|
163 | C = spro; |
---|
164 | if (spro == 's') C = 'S'; |
---|
165 | } |
---|
166 | *(dest++) = (char)C; |
---|
167 | } |
---|
168 | |
---|
169 | int tsize = dest-data; |
---|
170 | |
---|
171 | if (tsize>0) { // at least 1 amino written |
---|
172 | if (create_start_codon && startCodon) data[0] = startCodon; |
---|
173 | if (append_stop_codon && dest[-1] != '*') { |
---|
174 | *dest++ = '*'; |
---|
175 | tsize++; |
---|
176 | } |
---|
177 | } |
---|
178 | dest[0] = 0; |
---|
179 | |
---|
180 | if (translatedSize) *translatedSize = tsize; |
---|
181 | |
---|
182 | return stops; |
---|
183 | } |
---|
184 | |
---|
185 | |
---|
186 | |
---|
187 | |
---|
188 | |
---|