1 | /* |
---|
2 | * model1.c |
---|
3 | * |
---|
4 | * |
---|
5 | * Part of TREE-PUZZLE 5.0 (June 2000) |
---|
6 | * |
---|
7 | * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, |
---|
8 | * M. Vingron, and Arndt von Haeseler |
---|
9 | * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler |
---|
10 | * |
---|
11 | * All parts of the source except where indicated are distributed under |
---|
12 | * the GNU public licence. See http://www.opensource.org for details. |
---|
13 | */ |
---|
14 | |
---|
15 | |
---|
16 | /* definitions */ |
---|
17 | #define EXTERN extern |
---|
18 | |
---|
19 | /* prototypes */ |
---|
20 | #include <stdio.h> |
---|
21 | #include "util.h" |
---|
22 | #include "ml.h" |
---|
23 | |
---|
24 | /* number of states of the selected model */ |
---|
25 | int gettpmradix() |
---|
26 | { |
---|
27 | if (data_optn == 0) { /* nucleotides */ |
---|
28 | if (nuc_optn) return 4; |
---|
29 | if (SH_optn) return 16; |
---|
30 | } else if (data_optn == 1) { /* amino acids */ |
---|
31 | return 20; |
---|
32 | } else { /* two-state model */ |
---|
33 | return 2; |
---|
34 | } |
---|
35 | return 1; |
---|
36 | } |
---|
37 | |
---|
38 | /* relative transition frequencies */ |
---|
39 | void rtfdata(dmatrix q, double *f) |
---|
40 | { |
---|
41 | double alp, alpy, alpr; |
---|
42 | int i, j; |
---|
43 | |
---|
44 | if (data_optn == 0) |
---|
45 | { /* nucleotides */ |
---|
46 | |
---|
47 | if (nuc_optn) |
---|
48 | { /* 4x4 nucleotides */ |
---|
49 | alp = 2.0*TSparam; |
---|
50 | alpr = (alp * 2.0) / (YRparam + 1.0); |
---|
51 | alpy = YRparam * alpr; |
---|
52 | |
---|
53 | q[0][1] = 1; q[0][2] = alpr; q[0][3] = 1; |
---|
54 | q[1][2] = 1; q[1][3] = alpy; |
---|
55 | q[2][3] = 1; |
---|
56 | |
---|
57 | f[0] = 0.25; f[1] = 0.25; f[2] = 0.25; f[3] = 0.25; |
---|
58 | } |
---|
59 | |
---|
60 | if (SH_optn) |
---|
61 | { /* 16x16 nucleotides */ |
---|
62 | |
---|
63 | alp = 2.0*TSparam; |
---|
64 | |
---|
65 | q[0][1] = 1; q[0][2] = alp; q[0][3] = 1; q[0][4] = 1; |
---|
66 | q[0][5] = 0; q[0][6] = 0; q[0][7] = 0; q[0][8] = alp; |
---|
67 | q[0][9] = 0; q[0][10] = 0; q[0][11] = 0; q[0][12] = 1; |
---|
68 | q[0][13] = 0; q[0][14] = 0; q[0][15] = 0; |
---|
69 | |
---|
70 | q[1][2] = 1; q[1][3] = alp; q[1][4] = 0; q[1][5] = 1; |
---|
71 | q[1][6] = 0; q[1][7] = 0; q[1][8] = 0; q[1][9] = alp; |
---|
72 | q[1][10] = 0; q[1][11] = 0; q[1][12] = 0; q[1][13] = 1; |
---|
73 | q[1][14] = 0; q[1][15] = 0; |
---|
74 | |
---|
75 | q[2][3] = 1; q[2][4] = 0; q[2][5] = 0; q[2][6] = 1; |
---|
76 | q[2][7] = 0; q[2][8] = 0; q[2][9] = 0; q[2][10] = alp; |
---|
77 | q[2][11] = 0; q[2][12] = 0; q[2][13] = 0; q[2][14] = 1; |
---|
78 | q[2][15] = 0; |
---|
79 | |
---|
80 | q[3][4] = 0; q[3][5] = 0; q[3][6] = 0; q[3][7] = 1; |
---|
81 | q[3][8] = 0; q[3][9] = 0; q[3][10] = 0; q[3][11] = alp; |
---|
82 | q[3][12] = 0; q[3][13] = 0; q[3][14] = 0; q[3][15] = 1; |
---|
83 | |
---|
84 | q[4][5] = 1; q[4][6] = alp; q[4][7] = 1; q[4][8] = 1; |
---|
85 | q[4][9] = 0; q[4][10] = 0; q[4][11] = 0; q[4][12] = alp; |
---|
86 | q[4][13] = 0; q[4][14] = 0; q[4][15] = 0; |
---|
87 | |
---|
88 | q[5][6] = 1; q[5][7] = alp; q[5][8] = 0; q[5][9] = 1; |
---|
89 | q[5][10] = 0; q[5][11] = 0; q[5][12] = 0; q[5][13] = alp; |
---|
90 | q[5][14] = 0; q[5][15] = 0; |
---|
91 | |
---|
92 | q[6][7] = 1; q[6][8] = 0; q[6][9] = 0; q[6][10] = 1; |
---|
93 | q[6][11] = 0; q[6][12] = 0; q[6][13] = 0; q[6][14] = alp; |
---|
94 | q[6][15] = 0; |
---|
95 | |
---|
96 | q[7][8] = 0; q[7][9] = 0; q[7][10] = 0; q[7][11] = 1; |
---|
97 | q[7][12] = 0; q[7][13] = 0; q[7][14] = 0; q[7][15] = alp; |
---|
98 | |
---|
99 | q[8][9] = 1; q[8][10] = alp; q[8][11] = 1; q[8][12] = 1; |
---|
100 | q[8][13] = 0; q[8][14] = 0; q[8][15] = 0; |
---|
101 | |
---|
102 | q[9][10] = 1; q[9][11] = alp; q[9][12] = 0; q[9][13] = 1; |
---|
103 | q[9][14] = 0; q[9][15] = 0; |
---|
104 | |
---|
105 | q[10][11] = 1; q[10][12] = 0; q[10][13] = 0; q[10][14] = 1; |
---|
106 | q[10][15] = 0; |
---|
107 | |
---|
108 | q[11][12] = 0; q[11][13] = 0; q[11][14] = 0; q[11][15] = 1; |
---|
109 | |
---|
110 | q[12][13] = 1; q[12][14] = alp; q[12][15] = 1; |
---|
111 | |
---|
112 | q[13][14] = 1; q[13][15] = alp; |
---|
113 | |
---|
114 | q[14][15] = 1; |
---|
115 | |
---|
116 | |
---|
117 | for (i = 0; i < 16; i++) f[i] = 0.0625; |
---|
118 | } |
---|
119 | } |
---|
120 | else if (data_optn == 1) |
---|
121 | { /* amino acids */ |
---|
122 | if (Dayhf_optn) /* Dayhoff model */ |
---|
123 | { |
---|
124 | dyhfdata(q, f); |
---|
125 | } |
---|
126 | else if (Jtt_optn) /* JTT model */ |
---|
127 | { |
---|
128 | jttdata(q, f); |
---|
129 | } |
---|
130 | else if (blosum62_optn) /* BLOSUM 62 model */ |
---|
131 | { |
---|
132 | blosum62data(q, f); |
---|
133 | } |
---|
134 | else if (mtrev_optn) /* mtREV model */ |
---|
135 | { |
---|
136 | mtrevdata(q, f); |
---|
137 | } |
---|
138 | else if (cprev_optn) /* cpREV model */ |
---|
139 | { |
---|
140 | cprev45data(q, f); |
---|
141 | } |
---|
142 | else if (vtmv_optn) /* VT model */ |
---|
143 | { |
---|
144 | vtmvdata(q, f); |
---|
145 | } |
---|
146 | else /* if (wag_optn) */ /* WAG model */ |
---|
147 | { |
---|
148 | wagdata(q, f); |
---|
149 | } |
---|
150 | |
---|
151 | } |
---|
152 | else /* two-state model */ |
---|
153 | { |
---|
154 | q[0][1] = 1.0; |
---|
155 | |
---|
156 | f[0] = 0.5; f[1] = 0.5; |
---|
157 | } |
---|
158 | |
---|
159 | /* fill matrix from upper triangle */ |
---|
160 | for (i = 0; i < tpmradix; i++) |
---|
161 | { |
---|
162 | q[i][i] = 0.0; |
---|
163 | for (j = i+1; j < tpmradix; j++) |
---|
164 | { |
---|
165 | q[j][i] = q[i][j]; |
---|
166 | } |
---|
167 | } |
---|
168 | } |
---|
169 | |
---|
170 | /* transform letter codes to state numbers */ |
---|
171 | int code2int(cvector c) |
---|
172 | { if (data_optn == 0) { /* nucleotides */ |
---|
173 | if (nuc_optn) { /* 4x4 */ |
---|
174 | switch (c[0]) { |
---|
175 | case 'A': return 0; |
---|
176 | case 'C': return 1; |
---|
177 | case 'G': return 2; |
---|
178 | case 'T': return 3; |
---|
179 | case 'U': return 3; |
---|
180 | default : return 4; |
---|
181 | } |
---|
182 | } |
---|
183 | if (SH_optn) { /* 16x16 */ |
---|
184 | if (c[0] == 'A') { |
---|
185 | switch (c[1]) { |
---|
186 | case 'A': return 0; /* AA */ |
---|
187 | case 'C': return 1; /* AC */ |
---|
188 | case 'G': return 2; /* AG */ |
---|
189 | case 'T': return 3; /* AT */ |
---|
190 | case 'U': return 3; /* AT */ |
---|
191 | default: return 16; |
---|
192 | } |
---|
193 | } |
---|
194 | if (c[0] == 'C') { |
---|
195 | switch (c[1]) { |
---|
196 | case 'A': return 4; /* CA */ |
---|
197 | case 'C': return 5; /* CC */ |
---|
198 | case 'G': return 6; /* CG */ |
---|
199 | case 'T': return 7; /* CT */ |
---|
200 | case 'U': return 7; /* CT */ |
---|
201 | default: return 16; |
---|
202 | } |
---|
203 | } |
---|
204 | if (c[0] == 'G') { |
---|
205 | switch (c[1]) { |
---|
206 | case 'A': return 8; /* GA */ |
---|
207 | case 'C': return 9; /* GC */ |
---|
208 | case 'G': return 10; /* GG */ |
---|
209 | case 'T': return 11; /* GT */ |
---|
210 | case 'U': return 11; /* GT */ |
---|
211 | default: return 16; |
---|
212 | } |
---|
213 | } |
---|
214 | if (c[0] == 'T' || c[0] == 'U') { |
---|
215 | switch (c[1]) { |
---|
216 | case 'A': return 12; /* TA */ |
---|
217 | case 'C': return 13; /* TC */ |
---|
218 | case 'G': return 14; /* TG */ |
---|
219 | case 'T': return 15; /* TT */ |
---|
220 | case 'U': return 15; /* TT */ |
---|
221 | default: return 16; |
---|
222 | } |
---|
223 | } |
---|
224 | return 16; |
---|
225 | } |
---|
226 | } else if (data_optn == 1) { /* amino acids */ |
---|
227 | switch (c[0]) { |
---|
228 | case 'A': return 0; |
---|
229 | case 'C': return 4; |
---|
230 | case 'D': return 3; |
---|
231 | case 'E': return 6; |
---|
232 | case 'F': return 13; |
---|
233 | case 'G': return 7; |
---|
234 | case 'H': return 8; |
---|
235 | case 'I': return 9; |
---|
236 | case 'K': return 11; |
---|
237 | case 'L': return 10; |
---|
238 | case 'M': return 12; |
---|
239 | case 'N': return 2; |
---|
240 | case 'P': return 14; |
---|
241 | case 'Q': return 5; |
---|
242 | case 'R': return 1; |
---|
243 | case 'S': return 15; |
---|
244 | case 'T': return 16; |
---|
245 | case 'V': return 19; |
---|
246 | case 'W': return 17; |
---|
247 | case 'Y': return 18; |
---|
248 | default : return 20; |
---|
249 | } |
---|
250 | } else { /* two-state model */ |
---|
251 | switch (c[0]) { |
---|
252 | case '0': return 0; |
---|
253 | case '1': return 1; |
---|
254 | default : return 2; |
---|
255 | } |
---|
256 | } |
---|
257 | return 0; |
---|
258 | } |
---|
259 | |
---|
260 | /* return letter code belonging to state number */ |
---|
261 | char *int2code(int s) |
---|
262 | { |
---|
263 | if (data_optn == 0) { /* nucleotides */ |
---|
264 | if (nuc_optn) { /* 4x4 */ |
---|
265 | switch (s) { |
---|
266 | case 0: return "A"; |
---|
267 | case 1: return "C"; |
---|
268 | case 2: return "G"; |
---|
269 | case 3: return "T"; |
---|
270 | default : return "?"; |
---|
271 | } |
---|
272 | } |
---|
273 | if (SH_optn) { /* 16x16 */ |
---|
274 | switch (s) { |
---|
275 | case 0: return "AA"; |
---|
276 | case 1: return "AC"; |
---|
277 | case 2: return "AG"; |
---|
278 | case 3: return "AT"; |
---|
279 | case 4: return "CA"; |
---|
280 | case 5: return "CC"; |
---|
281 | case 6: return "CG"; |
---|
282 | case 7: return "CT"; |
---|
283 | case 8: return "GA"; |
---|
284 | case 9: return "GC"; |
---|
285 | case 10: return "GG"; |
---|
286 | case 11: return "GT"; |
---|
287 | case 12: return "TA"; |
---|
288 | case 13: return "TC"; |
---|
289 | case 14: return "TG"; |
---|
290 | case 15: return "TT"; |
---|
291 | default : return "??"; |
---|
292 | } |
---|
293 | } |
---|
294 | } else if (data_optn == 1) { /* amino acids */ |
---|
295 | switch (s) { |
---|
296 | case 0: return "A"; |
---|
297 | case 1: return "R"; |
---|
298 | case 2: return "N"; |
---|
299 | case 3: return "D"; |
---|
300 | case 4: return "C"; |
---|
301 | case 5: return "Q"; |
---|
302 | case 6: return "E"; |
---|
303 | case 7: return "G"; |
---|
304 | case 8: return "H"; |
---|
305 | case 9: return "I"; |
---|
306 | case 10: return "L"; |
---|
307 | case 11: return "K"; |
---|
308 | case 12: return "M"; |
---|
309 | case 13: return "F"; |
---|
310 | case 14: return "P"; |
---|
311 | case 15: return "S"; |
---|
312 | case 16: return "T"; |
---|
313 | case 17: return "W"; |
---|
314 | case 18: return "Y"; |
---|
315 | case 19: return "V"; |
---|
316 | default : return "?"; |
---|
317 | } |
---|
318 | } else { /* two-state model */ |
---|
319 | switch (s) { |
---|
320 | case 0: return "0"; |
---|
321 | case 1: return "1"; |
---|
322 | default : return "?"; |
---|
323 | } |
---|
324 | } |
---|
325 | return "?"; |
---|
326 | } |
---|