1 | /* |
---|
2 | * distan.c Adachi, J. 1995.10.26 |
---|
3 | * Copyright (C) 1992-1995 J. Adachi & M. Hasegawa. All rights reserved. |
---|
4 | */ |
---|
5 | |
---|
6 | #define VARIOTU 0 |
---|
7 | #define PUTDISLKLHD 0 |
---|
8 | #define DISTAN_DEBUG 0 |
---|
9 | #define PUTDISPLOT 0 |
---|
10 | #define PUTDISNUM 2000 |
---|
11 | |
---|
12 | #include "protml.h" |
---|
13 | |
---|
14 | #if PUTDISPLOT |
---|
15 | static void |
---|
16 | distan2(dislkl, initdist, seqi, seqj, seqw, nsite) |
---|
17 | dmatrix dislkl; |
---|
18 | double initdist; |
---|
19 | ivector seqi, seqj, seqw; |
---|
20 | { |
---|
21 | int i, j, k, l; |
---|
22 | double dist, distdiff, maxlkl, maxdis, coef; |
---|
23 | double lkl, ld1, lklhd, lkld1, lkld2; |
---|
24 | dmattpmty tprob, tdiff1, tdiff2; |
---|
25 | |
---|
26 | maxlkl = - DBL_MAX; |
---|
27 | for (l = 0, dist = 0.1; l < PUTDISNUM; l++) { |
---|
28 | coef = 0.5; |
---|
29 | dist = (l+1.0)/10.0; |
---|
30 | tdiffmtrx(dist, tprob, tdiff1, tdiff2); |
---|
31 | lklhd = lkld1 = lkld2 = 0.0; |
---|
32 | for (k = 0; k < nsite; k++) { |
---|
33 | i = seqi[k]; |
---|
34 | j = seqj[k]; |
---|
35 | lkl = tprob[i][j]; |
---|
36 | ld1 = tdiff1[i][j] / lkl; |
---|
37 | lklhd += log(lkl) * (double)seqw[k]; |
---|
38 | lkld1 += ld1 * (double)seqw[k]; |
---|
39 | lkld2 += (tdiff2[i][j]/lkl - ld1*ld1) * (double)seqw[k]; |
---|
40 | } |
---|
41 | distdiff = - (lkld1 / lkld2); |
---|
42 | dislkl[0][l] = lklhd; |
---|
43 | dislkl[1][l] = lkld1; |
---|
44 | dislkl[2][l] = lkld2; |
---|
45 | dislkl[3][l] = dist + distdiff; |
---|
46 | if (lkld2 < 0) { |
---|
47 | if (lkld1 > 0) { |
---|
48 | dislkl[4][l] = dist + distdiff; |
---|
49 | coef = 0.5; |
---|
50 | } else { |
---|
51 | if (dist + distdiff > dist * coef) { |
---|
52 | dislkl[4][l] = dist + distdiff; |
---|
53 | } else { |
---|
54 | dislkl[4][l] = dist * coef; |
---|
55 | coef *= 0.5; |
---|
56 | } |
---|
57 | } |
---|
58 | } else { |
---|
59 | dislkl[4][l] = dist * coef; |
---|
60 | coef *= 0.5; |
---|
61 | } |
---|
62 | if (dislkl[4][l] < LOWERLIMIT) dislkl[4][l] = LOWERLIMIT; |
---|
63 | if (dislkl[4][l] > UPPERLIMIT) dislkl[4][l] = UPPERLIMIT; |
---|
64 | if (lklhd > maxlkl) { |
---|
65 | maxlkl = lklhd; |
---|
66 | maxdis = dist; |
---|
67 | } |
---|
68 | } |
---|
69 | printf("# initdis: %.3f dis: %.3f ln L: %.3f\n",initdist,maxdis,maxlkl); |
---|
70 | } /* distan2 */ |
---|
71 | #endif /* PUTDISPLOT */ |
---|
72 | |
---|
73 | |
---|
74 | static void |
---|
75 | distan(dis, vdis, lklhd, ii, jj, seqi, seqj, seqw, nsite) |
---|
76 | double *dis, *vdis, *lklhd; |
---|
77 | int ii, jj; |
---|
78 | ivector seqi, seqj, seqw; |
---|
79 | int nsite; |
---|
80 | { |
---|
81 | int i, j, k, l; |
---|
82 | int maxloop = 10; |
---|
83 | double dist, distold, distdiff, coef; |
---|
84 | double lkl, ld1, lkld1, lkld2; |
---|
85 | dmattpmty tprob, tdiff1, tdiff2; |
---|
86 | |
---|
87 | dist = *dis; |
---|
88 | coef = INITCOEFMLE; |
---|
89 | for (l = 0; l < maxloop; l++) { |
---|
90 | tdiffmtrx(dist, tprob, tdiff1, tdiff2); |
---|
91 | lkld1 = lkld2 = 0.0; |
---|
92 | for (k = 0; k < nsite; k++) { |
---|
93 | i = seqi[k]; |
---|
94 | j = seqj[k]; |
---|
95 | lkl = tprob[i][j]; |
---|
96 | ld1 = tdiff1[i][j] / lkl; |
---|
97 | lkld1 += ld1 * (double)seqw[k]; |
---|
98 | lkld2 += (tdiff2[i][j] / lkl - ld1*ld1 ) * (double)seqw[k]; |
---|
99 | } |
---|
100 | distold = dist; |
---|
101 | distdiff = -(lkld1 / lkld2); |
---|
102 | if (lkld1 > 0) { |
---|
103 | dist += distdiff; |
---|
104 | coef = INITCOEFMLE; |
---|
105 | } else { |
---|
106 | if (lkld2 < 0) { |
---|
107 | if (dist + distdiff > dist * coef) { |
---|
108 | dist += distdiff; |
---|
109 | coef = INITCOEFMLE; |
---|
110 | } else { |
---|
111 | dist *= coef; |
---|
112 | coef *= INITCOEFMLE; |
---|
113 | } |
---|
114 | } else { |
---|
115 | dist *= coef; |
---|
116 | coef *= INITCOEFMLE; |
---|
117 | } |
---|
118 | } |
---|
119 | if (dist < LOWERLIMIT) dist = LOWERLIMIT; |
---|
120 | if (dist > UPPERLIMIT) dist = UPPERLIMIT; |
---|
121 | #if DISTAN_DEBUG |
---|
122 | if (Debug) |
---|
123 | printf("%3d %8.5f %8.5f %8.5f %10.5f %8.5f\n", |
---|
124 | l, lkld1, lkld2, distdiff, dist, dist - distold); |
---|
125 | #endif |
---|
126 | if (fabs(distold-dist) < DEPSILON && fabs(lkld1) < 1.0) /* EPSILON */ |
---|
127 | break; |
---|
128 | } |
---|
129 | if (Debug_optn && Debug) |
---|
130 | printf("%3d %8.5f %8.5f %8.5f %10.5f %8.5f\n", |
---|
131 | l+1,lkld1,lkld2,distdiff,dist, dist - distold); |
---|
132 | if (fabs(distold - dist) > DEPSILON) { |
---|
133 | fprintf(stderr, "ERROR, distance(%s,%s), non convergence!\n", |
---|
134 | Identif[ii], Identif[jj]); |
---|
135 | } |
---|
136 | if (lkld2 > 0.0) { |
---|
137 | fprintf(stderr, "ERROR, distance(%s,%s) estimation!\n", |
---|
138 | Identif[ii], Identif[jj]); |
---|
139 | fprintf(stderr, "second derivative is negative!\n"); |
---|
140 | } |
---|
141 | if (dist == UPPERLIMIT) { |
---|
142 | fprintf(stderr, "WARNING, distance(%s,%s) became upper limit!\n", |
---|
143 | Identif[ii], Identif[jj]); |
---|
144 | } |
---|
145 | *dis = dist; |
---|
146 | *vdis = 1.0 / fabs(lkld2); |
---|
147 | |
---|
148 | for (k = 0, lkl = 0.0; k < nsite; k++) { |
---|
149 | i = seqi[k]; |
---|
150 | j = seqj[k]; |
---|
151 | lkl += log(Freqtpm[i] * tprob[i][j]); |
---|
152 | } |
---|
153 | *lklhd = lkl; |
---|
154 | |
---|
155 | } /* distan */ |
---|
156 | |
---|
157 | |
---|
158 | void |
---|
159 | distance(distanmat, seqchar, maxspc, numsite) |
---|
160 | dmatrix distanmat; |
---|
161 | cmatrix seqchar; |
---|
162 | int maxspc, numsite; |
---|
163 | { |
---|
164 | int i, j, k, l, nsite, diff; |
---|
165 | double dist, vdis, lklhd; |
---|
166 | ivector seqi, seqj, seqw; |
---|
167 | cvector seqchi, seqchj; |
---|
168 | int gene[TPMRADIX + 1]; |
---|
169 | #if VARIOTU |
---|
170 | dvector variotu; |
---|
171 | #endif |
---|
172 | |
---|
173 | #if PUTDISLKLHD |
---|
174 | dmatrix lkldistan; |
---|
175 | dvector lkllinfotu; |
---|
176 | lkldistan = new_dmatrix(maxspc, maxspc); |
---|
177 | lkllinfotu = new_dvector(maxspc); |
---|
178 | for (i = 0; i < maxspc; i++) lkllinfotu[i] = 0.0; |
---|
179 | #endif |
---|
180 | #if PUTDISPLOT |
---|
181 | dmatrix dislkl; |
---|
182 | dislkl = new_dmatrix(5, PUTDISNUM); |
---|
183 | #endif |
---|
184 | #if VARIOTU |
---|
185 | variotu = new_dvector(maxspc); |
---|
186 | for (i = 0; i < maxspc; i++) variotu[i] = 0.0; |
---|
187 | #endif |
---|
188 | |
---|
189 | if (Verbs_optn) fprintf(stderr,"distance matrix [%d][%d]\n",maxspc,maxspc); |
---|
190 | seqi = new_ivector(numsite); |
---|
191 | seqj = new_ivector(numsite); |
---|
192 | seqw = new_ivector(numsite); |
---|
193 | for (i = 0; i < maxspc-1; i++) { |
---|
194 | seqchi = seqchar[i]; |
---|
195 | for (j = i+1; j < maxspc; j++) { |
---|
196 | seqchj = seqchar[j]; |
---|
197 | #if 0 |
---|
198 | for (k=0; k<numsite; k++) putchar(chint2ami(seqchi[k])); |
---|
199 | putchar('\n'); |
---|
200 | for (k=0; k<numsite; k++) putchar(chint2ami(seqchj[k])); |
---|
201 | putchar('\n'); |
---|
202 | #endif |
---|
203 | for ( k = 0; k < Tpmradix + 1; k++) gene[k] = 0; |
---|
204 | for ( k = 0; k < numsite; k++) { |
---|
205 | if (seqchi[k] == seqchj[k]) { |
---|
206 | gene[seqchi[k]]++; |
---|
207 | seqw[k] = FALSE; |
---|
208 | } else if (seqchi[k] == Tpmradix || seqchj[k] == Tpmradix) { |
---|
209 | seqw[k] = FALSE; |
---|
210 | } else { |
---|
211 | seqw[k] = TRUE; |
---|
212 | } |
---|
213 | } |
---|
214 | for ( k = 0, l = 0; k < numsite; k++) { |
---|
215 | if (seqw[k]) { |
---|
216 | seqi[l] = seqchi[k]; |
---|
217 | seqj[l] = seqchj[k]; |
---|
218 | seqw[l] = 1; |
---|
219 | l++; |
---|
220 | } |
---|
221 | } |
---|
222 | diff = l; |
---|
223 | for ( k = 0; k < Tpmradix; k++) { |
---|
224 | if (gene[k]) { |
---|
225 | seqi[l] = k; |
---|
226 | seqj[l] = k; |
---|
227 | seqw[l] = gene[k]; |
---|
228 | l++; |
---|
229 | } |
---|
230 | } |
---|
231 | nsite = l; |
---|
232 | #ifdef DISDEBUG |
---|
233 | for (k=0; k<nsite; k++) putchar(int2ami(seqi[k])); putchar('\n'); |
---|
234 | for (k=0; k<nsite; k++) putchar(int2ami(seqj[k])); putchar('\n'); |
---|
235 | for (k=0; k<nsite; k++) printf("%d",seqw[k]); putchar('\n'); |
---|
236 | #endif |
---|
237 | if (diff == 0) { |
---|
238 | dist = 0.0; |
---|
239 | vdis = 0.0; |
---|
240 | } else { |
---|
241 | if (diff < numsite) |
---|
242 | dist = -log(1.0 - (double)diff/(double)numsite)*100.0; |
---|
243 | else |
---|
244 | dist = -log(1.0 - (double)diff/(double)(numsite+1.0))*100.0; |
---|
245 | if (dist < LOWERLIMIT) dist = LOWERLIMIT; |
---|
246 | if (dist > UPPERLIMIT) dist = UPPERLIMIT; |
---|
247 | #if PUTDISPLOT |
---|
248 | distan2(dislkl, dist, seqi, seqj, seqw, nsite); |
---|
249 | #else |
---|
250 | distan(&dist, &vdis, &lklhd, i, j, seqi, seqj, seqw, nsite); |
---|
251 | #endif |
---|
252 | } |
---|
253 | distanmat[i][j] = dist; |
---|
254 | if (Varia_optn) |
---|
255 | distanmat[j][i] = vdis; |
---|
256 | else |
---|
257 | distanmat[j][i] = dist; |
---|
258 | #if VARIOTU |
---|
259 | variotu[i] += vdis; /* += vdis */ |
---|
260 | variotu[j] += vdis; |
---|
261 | #endif |
---|
262 | #if PUTDISLKLHD |
---|
263 | lklhd *= -100.0 / (double)nsite; |
---|
264 | lkldistan[i][j] = lkldistan[j][i] = lklhd; |
---|
265 | lkllinfotu[i] += lklhd; |
---|
266 | lkllinfotu[j] += lklhd; |
---|
267 | #endif |
---|
268 | } |
---|
269 | distanmat[i][i] = 0.0; |
---|
270 | } |
---|
271 | distanmat[maxspc-1][maxspc-1] = 0.0; |
---|
272 | free_ivector(seqw); |
---|
273 | free_ivector(seqj); |
---|
274 | free_ivector(seqi); |
---|
275 | |
---|
276 | #if VARIOTU |
---|
277 | for (i = 0; i < maxspc; i++) { |
---|
278 | printf("%3d %-7s %10.1f\n", i+1, Identif[i], variotu[i]); |
---|
279 | } |
---|
280 | #endif |
---|
281 | |
---|
282 | if (Debug) |
---|
283 | for (i = 0; i < maxspc; i++) { |
---|
284 | for (j = 0; j < maxspc; j++) { |
---|
285 | printf("%5.1f", distanmat[i][j]); |
---|
286 | } |
---|
287 | printf("\n"); |
---|
288 | } |
---|
289 | #if PUTDISLKLHD |
---|
290 | for (j = 0; j < maxspc; j++) printf("%5d", j+1); printf("\n"); |
---|
291 | for (j = 0; j < maxspc; j++) printf("%5.4s", Identif[j]); printf("\n"); |
---|
292 | for (i = 0; i < maxspc; i++) { |
---|
293 | for (j = 0; j < maxspc; j++) { |
---|
294 | printf("%5.0f", lkldistan[i][j]); |
---|
295 | } printf("\n"); |
---|
296 | } |
---|
297 | for (j = 0; j < maxspc; j++) { |
---|
298 | lkllinfotu[j] /= maxspc; |
---|
299 | printf("%5.0f", lkllinfotu[j]); |
---|
300 | } printf("\n"); |
---|
301 | for (i = 0; i < maxspc; i++) { |
---|
302 | for (j = 0, lklhd = 0.0; j < maxspc; j++) { |
---|
303 | lklhd += fabs(lkldistan[i][j] - lkllinfotu[i]); |
---|
304 | } |
---|
305 | printf("%3d %-7s %10.1f\n", i+1, Identif[i], lklhd); |
---|
306 | } |
---|
307 | free_dmatrix(lkldistan); |
---|
308 | free_dvector(lkllinfotu); |
---|
309 | exit(1); |
---|
310 | #endif |
---|
311 | #if PUTDISPLOT |
---|
312 | for (k = 0; k < PUTDISNUM; k++) { |
---|
313 | printf("%6.1f", (k+1.0) / 10.0); |
---|
314 | for (i = 0; i < 5; i++) |
---|
315 | printf(" %12.5f",dislkl[i][k]); |
---|
316 | putchar('\n'); |
---|
317 | } |
---|
318 | free_dmatrix(dislkl); |
---|
319 | exit(1); |
---|
320 | #endif |
---|
321 | #if VARIOTU |
---|
322 | free_dvector(variotu); |
---|
323 | #endif |
---|
324 | } /* distance */ |
---|
325 | |
---|
326 | |
---|
327 | static double |
---|
328 | determinant(amat, size) |
---|
329 | dmatrix amat; |
---|
330 | int size; |
---|
331 | { |
---|
332 | /* DETERMINANT ON LU DECOMPOSITION */ |
---|
333 | double eps = 1.0e-20; /* ! */ |
---|
334 | int i, j, k, maxi; |
---|
335 | double sum, tmp, maxb, det; |
---|
336 | dvector wk; |
---|
337 | ivector index; |
---|
338 | |
---|
339 | wk = new_dvector(size); |
---|
340 | index = new_ivector(size); |
---|
341 | det = 1.0; |
---|
342 | for (i = 0; i < size; i++) { |
---|
343 | maxb = 0.0; |
---|
344 | for (j = 0; j < size; j++) { |
---|
345 | if (fabs(amat[i][j]) > maxb) |
---|
346 | maxb = fabs(amat[i][j]); |
---|
347 | } |
---|
348 | if (maxb == 0.0) { |
---|
349 | fprintf(stderr, "determinant: singular matrix\n"); |
---|
350 | exit(1); |
---|
351 | } |
---|
352 | wk[i] = 1.0 / maxb; |
---|
353 | } |
---|
354 | for (j = 0; j < size; j++) { |
---|
355 | for (i = 0; i < j; i++) { |
---|
356 | sum = amat[i][j]; |
---|
357 | for (k = 0; k < i; k++) |
---|
358 | sum -= amat[i][k] * amat[k][j]; |
---|
359 | amat[i][j] = sum; |
---|
360 | } |
---|
361 | maxb = 0.0; |
---|
362 | for (i = j; i < size; i++) { |
---|
363 | sum = amat[i][j]; |
---|
364 | for (k = 0; k < j; k++) |
---|
365 | sum -= amat[i][k] * amat[k][j]; |
---|
366 | amat[i][j] = sum; |
---|
367 | tmp = wk[i] * fabs(sum); |
---|
368 | if (tmp >= maxb) { |
---|
369 | maxb = tmp; |
---|
370 | maxi = i; |
---|
371 | } |
---|
372 | } |
---|
373 | if (j != maxi) { |
---|
374 | for (k = 0; k < size; k++) { |
---|
375 | tmp = amat[maxi][k]; |
---|
376 | amat[maxi][k] = amat[j][k]; |
---|
377 | amat[j][k] = tmp; |
---|
378 | } |
---|
379 | det = -det; |
---|
380 | wk[maxi] = wk[j]; |
---|
381 | } |
---|
382 | index[j] = maxi; |
---|
383 | if (amat[j][j] == 0.0) |
---|
384 | amat[j][j] = eps; |
---|
385 | if (j != size - 1) { |
---|
386 | tmp = 1.0 / amat[j][j]; |
---|
387 | for (i = j + 1; i < size; i++) |
---|
388 | amat[i][j] *= tmp; |
---|
389 | } |
---|
390 | } |
---|
391 | |
---|
392 | for (i = 0; i < size; i++) { |
---|
393 | det *= amat[i][i]; |
---|
394 | } |
---|
395 | free_ivector(index); |
---|
396 | free_dvector(wk); |
---|
397 | return det; |
---|
398 | } /*_ determinant */ |
---|
399 | |
---|
400 | |
---|
401 | void |
---|
402 | lddistance(distanmat, seqchar, maxspc, numsite) |
---|
403 | dmatrix distanmat; |
---|
404 | cmatrix seqchar; |
---|
405 | int maxspc, numsite; |
---|
406 | { |
---|
407 | int i, j, k, x, y, nsite; |
---|
408 | double dist, comp; |
---|
409 | cvector seqchi, seqchj; |
---|
410 | dmatrix fxy, fxxyy; |
---|
411 | dvectpmty fx, fy; |
---|
412 | |
---|
413 | fxy = new_dmatrix(Tpmradix, Tpmradix); |
---|
414 | fxxyy = new_dmatrix(Tpmradix, Tpmradix); |
---|
415 | for (i = 0; i < maxspc-1; i++) { |
---|
416 | seqchi = seqchar[i]; |
---|
417 | for (j = i+1; j < maxspc; j++) { |
---|
418 | seqchj = seqchar[j]; |
---|
419 | for (x = 0; x < Tpmradix; x++) { |
---|
420 | for (y = 0; y < Tpmradix; y++) { |
---|
421 | fxy[x][y] = 0.0; |
---|
422 | fxxyy[x][y] = 0.0; |
---|
423 | } |
---|
424 | fx[x] = 0.0; |
---|
425 | fy[x] = 0.0; |
---|
426 | } |
---|
427 | nsite = 0; |
---|
428 | for (k = 0; k < numsite; k++) { |
---|
429 | if ( (x = seqchi[k]) != Tpmradix && |
---|
430 | (y = seqchj[k]) != Tpmradix ) { |
---|
431 | fxy[x][y] += 1.0; |
---|
432 | fx[x] += 1.0; |
---|
433 | fy[y] += 1.0; |
---|
434 | nsite++; |
---|
435 | } |
---|
436 | } |
---|
437 | for (x = 0; x < Tpmradix; x++) { |
---|
438 | for (y = 0; y < Tpmradix; y++) { |
---|
439 | fxy[x][y] /= nsite; |
---|
440 | } |
---|
441 | fxxyy[x][x] = fx[x] * fy[x]; |
---|
442 | } |
---|
443 | if (Debug) { |
---|
444 | for (x = 0; x < Tpmradix; x++) { |
---|
445 | putchar('\n'); |
---|
446 | for (y = 0; y < Tpmradix; y++) { |
---|
447 | printf("%10.5f", fxy[x][y]); |
---|
448 | }; |
---|
449 | } putchar('\n'); |
---|
450 | } |
---|
451 | dist = -log( determinant(fxy, Tpmradix) ); |
---|
452 | comp = log( determinant(fxxyy, Tpmradix) ) / 2.0; |
---|
453 | if (Debug) printf("%10.5f%10.5f\n", dist, comp); |
---|
454 | dist = ( dist + comp ) / Tpmradix; |
---|
455 | if (dist < LOWERLIMIT) dist = LOWERLIMIT; |
---|
456 | if (dist > UPPERLIMIT) dist = UPPERLIMIT; |
---|
457 | distanmat[i][j] = dist; |
---|
458 | distanmat[j][i] = dist; |
---|
459 | } |
---|
460 | distanmat[i][i] = 0.0; |
---|
461 | } |
---|
462 | distanmat[maxspc-1][maxspc-1] = 0.0; |
---|
463 | free_dmatrix(fxy); |
---|
464 | free_dmatrix(fxxyy); |
---|
465 | |
---|
466 | if (Debug) |
---|
467 | for (i = 0; i < maxspc; i++) { |
---|
468 | for (j = 0; j < maxspc; j++) { |
---|
469 | printf("%5.1f", distanmat[i][j]); |
---|
470 | } |
---|
471 | printf("\n"); |
---|
472 | } |
---|
473 | } /* lddistance */ |
---|
474 | |
---|
475 | |
---|
476 | static void |
---|
477 | pdistan(dis, seq, probk, weight, nptrn) |
---|
478 | double *dis; |
---|
479 | ivector seq; |
---|
480 | dmatrix probk; |
---|
481 | ivector weight; |
---|
482 | int nptrn; |
---|
483 | { |
---|
484 | int i, j, k, l; |
---|
485 | int maxloop = 30; |
---|
486 | double dist, distold, distdiff; |
---|
487 | double lkl, ld1, ld2, lkld1, lkld2, prod; |
---|
488 | dmattpmty tprob, tdiff1, tdiff2; |
---|
489 | |
---|
490 | dist = *dis; |
---|
491 | for (l = 0; l < maxloop; l++) { |
---|
492 | tdiffmtrx(dist, tprob, tdiff1, tdiff2); |
---|
493 | lkld1 = lkld2 = 0.0; |
---|
494 | for (k = 0; k < nptrn; k++) { |
---|
495 | j = seq[k]; |
---|
496 | /* printf(" %d", i); */ |
---|
497 | lkl = ld1 = ld2 = 0.0; |
---|
498 | for (i = 0; i < Tpmradix; i++) { |
---|
499 | prod = Freqtpm[i] * probk[k][i]; |
---|
500 | lkl += prod * tprob[i][j]; |
---|
501 | ld1 += prod * tdiff1[i][j]; |
---|
502 | ld2 += prod * tdiff2[i][j]; |
---|
503 | } |
---|
504 | ld1 /= lkl; |
---|
505 | lkld1 += ld1 * weight[k]; |
---|
506 | lkld2 += (ld2 / lkl - ld1 * ld1 ) * weight[k]; |
---|
507 | } |
---|
508 | distold = dist; |
---|
509 | distdiff = -(lkld1 / lkld2); |
---|
510 | if (distdiff > UPPERLIMIT - distold) { |
---|
511 | if (distold < UPPERLIMIT / 2.0) |
---|
512 | dist = LOWERLIMIT; |
---|
513 | else |
---|
514 | dist = UPPERLIMIT; |
---|
515 | } else |
---|
516 | dist += distdiff; |
---|
517 | if (dist < LOWERLIMIT) dist = LOWERLIMIT; |
---|
518 | if (dist > UPPERLIMIT) dist = UPPERLIMIT; |
---|
519 | #if DISTAN_DEBUG |
---|
520 | if (Debug) |
---|
521 | printf("%3d %8.5f %8.5f %8.5f %8.5f\n", |
---|
522 | l, lkld1, lkld2, distdiff, dist); |
---|
523 | #endif |
---|
524 | if (fabs(distold - dist) < DEPSILON) /* EPSILON */ |
---|
525 | break; |
---|
526 | } |
---|
527 | *dis = dist; |
---|
528 | } /* pdistan */ |
---|
529 | |
---|
530 | |
---|
531 | void |
---|
532 | tdistan(seqi, seqj, probk, weight, nptrn, len, lvari, triprob) |
---|
533 | ivector seqi, seqj; |
---|
534 | dmatrix probk; |
---|
535 | ivector weight; |
---|
536 | int nptrn; |
---|
537 | double *len, *lvari; |
---|
538 | dcube triprob; |
---|
539 | { |
---|
540 | int i, j, k, l, m, nl, cnvrg, maxloop = 3; |
---|
541 | double lenm, lenold, lendiff, lenpre, lkl, ld1, ld2, lkld1, lkld2; |
---|
542 | double sum, prod1, prod2, minarc, maxarc; |
---|
543 | dmattpmty tprob, tdiff1, tdiff2; |
---|
544 | ivector triseq[3], seq; |
---|
545 | dmatrix mprob, cprob, dprob; |
---|
546 | |
---|
547 | minarc = LOWERLIMIT; |
---|
548 | maxarc = 100.0; |
---|
549 | |
---|
550 | triseq[1] = seqi; |
---|
551 | triseq[2] = seqj; |
---|
552 | tprobmtrx(len[0], tprob); |
---|
553 | mprob = triprob[0]; |
---|
554 | for (k = 0; k < nptrn; k++) { |
---|
555 | for (i = 0; i < Tpmradix; i++) { |
---|
556 | for (j = 0, sum = 0.0; j < Tpmradix; j++) |
---|
557 | sum += probk[k][j] * tprob[j][i]; |
---|
558 | mprob[k][i] = sum; |
---|
559 | } |
---|
560 | } |
---|
561 | for (m = 1; m < 3; m++) { |
---|
562 | tprobmtrx(len[m], tprob); |
---|
563 | seq = triseq[m]; |
---|
564 | mprob = triprob[m]; |
---|
565 | for (k = 0; k < nptrn; k++) { |
---|
566 | j = seq[k]; |
---|
567 | for (i = 0; i < Tpmradix; i++) mprob[k][i] = tprob[j][i]; |
---|
568 | } |
---|
569 | } |
---|
570 | #if DISTAN_DEBUG |
---|
571 | if (Debug) |
---|
572 | printf(" %10.5f %10.5f %10.5f\n", len[0],len[1],len[2]); |
---|
573 | #endif |
---|
574 | cnvrg = 0; |
---|
575 | for (nl = 0; nl < 20; nl++) { |
---|
576 | |
---|
577 | mprob = triprob[0]; |
---|
578 | cprob = triprob[1]; |
---|
579 | dprob = triprob[2]; |
---|
580 | for (k = 0; k < nptrn; k++) { |
---|
581 | for (i = 0; i < Tpmradix; i++) |
---|
582 | cprob[k][i] *= dprob[k][i]; |
---|
583 | } |
---|
584 | lenm = len[0]; |
---|
585 | lenpre = lenm; |
---|
586 | for (l = 0; l < maxloop; l++) { |
---|
587 | tdiffmtrx(lenm, tprob, tdiff1, tdiff2); |
---|
588 | lkld1 = lkld2 = 0.0; |
---|
589 | for (k = 0; k < nptrn; k++) { |
---|
590 | lkl = ld1 = ld2 = 0.0; |
---|
591 | for (i = 0; i < Tpmradix; i++) { |
---|
592 | prod1 = probk[k][i]; |
---|
593 | /* prod1 = Freqtpm[i] * probk[k][i]; */ |
---|
594 | for (j = 0; j < Tpmradix; j++) { |
---|
595 | prod2 = prod1 * cprob[k][j]; |
---|
596 | lkl += prod2 * tprob[i][j]; |
---|
597 | ld1 += prod2 * tdiff1[i][j]; |
---|
598 | ld2 += prod2 * tdiff2[i][j]; |
---|
599 | } |
---|
600 | } |
---|
601 | ld1 /= lkl; |
---|
602 | lkld1 += ld1 * (double)weight[k]; |
---|
603 | lkld2 += (ld2 / lkl - ld1 * ld1) * (double)weight[k]; |
---|
604 | } |
---|
605 | lenold = lenm; |
---|
606 | lendiff = -(lkld1 / lkld2); |
---|
607 | #if 0 |
---|
608 | printf("%3d %10.5f %10.5f %10.5f %10.5f\n", |
---|
609 | l,lenm,lkld1,lkld2,lendiff); |
---|
610 | #endif |
---|
611 | if (lendiff > maxarc - lenold) { |
---|
612 | if (lenold < maxarc / 2.0) lenm = minarc; |
---|
613 | else lenm = maxarc; |
---|
614 | } else { |
---|
615 | lenm += lendiff; |
---|
616 | } |
---|
617 | if (lenm < minarc) lenm = minarc; |
---|
618 | if (lenm > maxarc) lenm = maxarc; |
---|
619 | len[0] = lenm; |
---|
620 | lvari[0] = fabs(lkld2); |
---|
621 | #if DISTAN_DEBUG |
---|
622 | if (Debug) |
---|
623 | printf("%3d %10.5f %10.5f %10.5f %10.5f %10.5f %10.5f\n", |
---|
624 | l,lenm,len[0],len[1],len[2],lkld1,lkld2); |
---|
625 | #endif |
---|
626 | if (fabs(lenold - lenm) < DEPSILON) |
---|
627 | break; |
---|
628 | } |
---|
629 | if (abs(lenm - lenpre) < DEPSILON) { |
---|
630 | cnvrg++; |
---|
631 | if (cnvrg >= 9) |
---|
632 | goto converge; |
---|
633 | } else { |
---|
634 | cnvrg = 0; |
---|
635 | } |
---|
636 | tprobmtrx(len[0], tprob); |
---|
637 | for (k = 0; k < nptrn; k++) { |
---|
638 | for (i = 0; i < Tpmradix; i++) { |
---|
639 | for (j = 0, sum = 0.0; j < Tpmradix; j++) |
---|
640 | sum += probk[k][j] * tprob[j][i]; |
---|
641 | mprob[k][i] = sum; |
---|
642 | } |
---|
643 | } |
---|
644 | |
---|
645 | for (m = 1; m < 3; m++) { |
---|
646 | if (m == 1) { |
---|
647 | mprob = triprob[1]; |
---|
648 | cprob = triprob[2]; |
---|
649 | dprob = triprob[0]; |
---|
650 | } else { |
---|
651 | mprob = triprob[2]; |
---|
652 | cprob = triprob[0]; |
---|
653 | dprob = triprob[1]; |
---|
654 | } |
---|
655 | for (k = 0; k < nptrn; k++) { |
---|
656 | for (i = 0; i < Tpmradix; i++) |
---|
657 | cprob[k][i] *= dprob[k][i]; |
---|
658 | } |
---|
659 | lenm = len[m]; |
---|
660 | lenpre = lenm; |
---|
661 | seq = triseq[m]; |
---|
662 | for (l = 0; l < maxloop; l++) { |
---|
663 | tdiffmtrx(lenm, tprob, tdiff1, tdiff2); |
---|
664 | lkld1 = lkld2 = 0.0; |
---|
665 | for (k = 0; k < nptrn; k++) { |
---|
666 | j = seq[k]; |
---|
667 | lkl = ld1 = ld2 = 0.0; |
---|
668 | for (i = 0; i < Tpmradix; i++) { |
---|
669 | lkl += tprob[j][i] * cprob[k][i]; |
---|
670 | ld1 += tdiff1[j][i] * cprob[k][i]; |
---|
671 | ld2 += tdiff2[j][i] * cprob[k][i]; |
---|
672 | } |
---|
673 | ld1 /= lkl; |
---|
674 | lkld1 += ld1 * (double)weight[k]; |
---|
675 | lkld2 += (ld2 / lkl - ld1 * ld1) * (double)weight[k]; |
---|
676 | } |
---|
677 | lenold = lenm; |
---|
678 | lendiff = -(lkld1 / lkld2); |
---|
679 | if (lendiff > UPPERLIMIT - lenold) { |
---|
680 | if (lenold < UPPERLIMIT / 2.0) lenm = LOWERLIMIT; |
---|
681 | else lenm = UPPERLIMIT; |
---|
682 | } else { |
---|
683 | lenm += lendiff; |
---|
684 | } |
---|
685 | if (lenm < LOWERLIMIT) lenm = LOWERLIMIT; |
---|
686 | if (lenm > UPPERLIMIT) lenm = UPPERLIMIT; |
---|
687 | len[m] = lenm; |
---|
688 | lvari[m] = fabs(lkld2); |
---|
689 | #if DISTAN_DEBUG |
---|
690 | if (Debug) |
---|
691 | printf("%3d %10.5f %10.5f %10.5f %10.5f %10.5f %10.5f\n", |
---|
692 | l,lenm,len[0],len[1],len[2],lkld1,lkld2); |
---|
693 | #endif |
---|
694 | if (fabs(lenold - lenm) < DEPSILON) |
---|
695 | break; |
---|
696 | } |
---|
697 | if (abs(lenm - lenpre) < DEPSILON) { |
---|
698 | cnvrg++; |
---|
699 | if (cnvrg >= 9) |
---|
700 | goto converge; |
---|
701 | } else { |
---|
702 | cnvrg = 0; |
---|
703 | } |
---|
704 | tprobmtrx(len[m], tprob); |
---|
705 | for (k = 0; k < nptrn; k++) { |
---|
706 | j = seq[k]; |
---|
707 | for (i = 0; i < Tpmradix; i++) |
---|
708 | mprob[k][i] = tprob[j][i]; |
---|
709 | } |
---|
710 | } |
---|
711 | #if DISTAN_DEBUG |
---|
712 | if (Debug) |
---|
713 | printf(" %10.5f %10.5f %10.5f\n", |
---|
714 | len[0],len[1],len[2]); |
---|
715 | #endif |
---|
716 | } |
---|
717 | |
---|
718 | converge: |
---|
719 | if (Debug) putchar('\n'); |
---|
720 | return; |
---|
721 | } /* tdistan */ |
---|
722 | |
---|
723 | |
---|
724 | void |
---|
725 | tdistan2(seqi, seqj, seqk, seqw, nsite, len, lvari, triprob) |
---|
726 | ivector seqi, seqj, seqk, seqw; |
---|
727 | int nsite; |
---|
728 | double *len, *lvari; |
---|
729 | dcube triprob; |
---|
730 | { |
---|
731 | int i, j, k, l, m, nl, cnvrg, maxloop = 2; |
---|
732 | double lenm, lenold, lendiff, lenpre, lkl, ld1, ld2, lkld1, lkld2; |
---|
733 | dmattpmty tprob, tdiff1, tdiff2; |
---|
734 | ivector triseq[3], seq; |
---|
735 | dmatrix mprob, cprob, dprob; |
---|
736 | |
---|
737 | triseq[0] = seqi; |
---|
738 | triseq[1] = seqj; |
---|
739 | triseq[2] = seqk; |
---|
740 | for (m = 0; m < 3; m++) { |
---|
741 | tprobmtrx(len[m], tprob); |
---|
742 | seq = triseq[m]; |
---|
743 | mprob = triprob[m]; |
---|
744 | for (k = 0; k < nsite; k++) { |
---|
745 | j = seq[k]; |
---|
746 | for (i = 0; i < Tpmradix; i++) |
---|
747 | mprob[k][i] = tprob[j][i]; |
---|
748 | } |
---|
749 | } |
---|
750 | #if DISTAN_DEBUG |
---|
751 | if (Debug) |
---|
752 | printf(" %10.5f %10.5f %10.5f\n", len[0],len[1],len[2]); |
---|
753 | #endif |
---|
754 | cnvrg = 0; |
---|
755 | for (nl = 0; nl < 9; nl++) { |
---|
756 | for (m = 0; m < 3; m++) { |
---|
757 | if (m == 0) { |
---|
758 | mprob = triprob[0]; |
---|
759 | cprob = triprob[1]; |
---|
760 | dprob = triprob[2]; |
---|
761 | } else if (m == 1) { |
---|
762 | mprob = triprob[1]; |
---|
763 | cprob = triprob[2]; |
---|
764 | dprob = triprob[0]; |
---|
765 | } else { |
---|
766 | mprob = triprob[2]; |
---|
767 | cprob = triprob[0]; |
---|
768 | dprob = triprob[1]; |
---|
769 | } |
---|
770 | for (k = 0; k < nsite; k++) { |
---|
771 | for (i = 0; i < Tpmradix; i++) |
---|
772 | cprob[k][i] *= dprob[k][i]; |
---|
773 | } |
---|
774 | lenm = len[m]; |
---|
775 | lenpre = lenm; |
---|
776 | seq = triseq[m]; |
---|
777 | for (l = 0; l < maxloop; l++) { |
---|
778 | tdiffmtrx(lenm, tprob, tdiff1, tdiff2); |
---|
779 | lkld1 = lkld2 = 0.0; |
---|
780 | for (k = 0; k < nsite; k++) { |
---|
781 | j = seq[k]; |
---|
782 | lkl = ld1 = ld2 = 0.0; |
---|
783 | for (i = 0; i < Tpmradix; i++) { |
---|
784 | lkl += tprob[j][i] * cprob[k][i]; |
---|
785 | ld1 += tdiff1[j][i] * cprob[k][i]; |
---|
786 | ld2 += tdiff2[j][i] * cprob[k][i]; |
---|
787 | } |
---|
788 | ld1 /= lkl; |
---|
789 | lkld1 += ld1 * (double)seqw[k]; |
---|
790 | lkld2 += (ld2 / lkl - ld1 * ld1) * (double)seqw[k]; |
---|
791 | } |
---|
792 | lenold = lenm; |
---|
793 | lendiff = -(lkld1 / lkld2); |
---|
794 | if (lendiff > UPPERLIMIT - lenold) { |
---|
795 | if (lenold < UPPERLIMIT / 2.0) lenm = LOWERLIMIT; |
---|
796 | else lenm = UPPERLIMIT; |
---|
797 | } else { |
---|
798 | lenm += lendiff; |
---|
799 | } |
---|
800 | if (lenm < LOWERLIMIT) lenm = LOWERLIMIT; |
---|
801 | if (lenm > UPPERLIMIT) lenm = UPPERLIMIT; |
---|
802 | len[m] = lenm; |
---|
803 | lvari[m] = fabs(lkld2); |
---|
804 | #if DISTAN_DEBUG |
---|
805 | if (Debug) |
---|
806 | printf("%3d %10.5f %10.5f %10.5f %10.5f %10.5f %10.5f\n", |
---|
807 | l,lenm,len[0],len[1],len[2],lkld1,lkld2); |
---|
808 | #endif |
---|
809 | if (fabs(lenold - lenm) < DEPSILON) |
---|
810 | break; |
---|
811 | } |
---|
812 | if (abs(lenm - lenpre) < DEPSILON) { |
---|
813 | cnvrg++; |
---|
814 | if (cnvrg >= 9) |
---|
815 | goto converge; |
---|
816 | } else { |
---|
817 | cnvrg = 0; |
---|
818 | } |
---|
819 | |
---|
820 | /* if (abs(lenm - lenpre) < DEPSILON) */ |
---|
821 | tprobmtrx(len[m], tprob); |
---|
822 | for (k = 0; k < nsite; k++) { |
---|
823 | j = seq[k]; |
---|
824 | for (i = 0; i < Tpmradix; i++) |
---|
825 | mprob[k][i] = tprob[j][i]; |
---|
826 | } |
---|
827 | } |
---|
828 | #if DISTAN_DEBUG |
---|
829 | if (Debug) |
---|
830 | printf(" %10.5f %10.5f %10.5f\n", |
---|
831 | len[0],len[1],len[2]); |
---|
832 | #endif |
---|
833 | } |
---|
834 | |
---|
835 | converge: |
---|
836 | if (Debug) putchar('\n'); |
---|
837 | return; |
---|
838 | } /* tdistan2 */ |
---|
839 | |
---|
840 | |
---|
841 | void |
---|
842 | tridistance(distanmat, seqconint, weight, maxspc, numptrn) |
---|
843 | dmatrix distanmat; |
---|
844 | imatrix seqconint; |
---|
845 | ivector weight; |
---|
846 | int maxspc, numptrn; |
---|
847 | { |
---|
848 | int i, j, k, kk, maxj, maxj2, maxj3; |
---|
849 | double dist, len[3], lvari[3], maxprob; /* vari */ |
---|
850 | ivector seqi, seqj; |
---|
851 | dvector disvec; |
---|
852 | dmatrix probk; |
---|
853 | dcube triprob; |
---|
854 | |
---|
855 | disvec = new_dvector(Maxspc); |
---|
856 | probk = new_dmatrix(numptrn, Tpmradix); |
---|
857 | triprob = new_dcube(3, numptrn, Tpmradix); |
---|
858 | |
---|
859 | for (i = 0; i < numptrn; i++) { |
---|
860 | for (j = 0; j < Tpmradix; j++) probk[i][j] = 0.0; |
---|
861 | /* for (j = 0; j < Tpmradix; j++) probk[i][j] = Freqtpm[j] * 1; */ |
---|
862 | for (j = 0; j < maxspc; j++) { |
---|
863 | kk = seqconint[j][i]; |
---|
864 | if (kk >= 0) { |
---|
865 | probk[i][kk] += 1.0; |
---|
866 | } else { |
---|
867 | for (k = 0; k < Tpmradix; k++) probk[i][k] += Freqtpm[k]; |
---|
868 | } |
---|
869 | } |
---|
870 | #if 0 |
---|
871 | for (j = 0; j < Tpmradix; j++) probk[i][j] /= maxspc; |
---|
872 | #endif |
---|
873 | /* for (j = 0; j < Tpmradix; j++) probk[i][j] = Freqtpm[j]; */ |
---|
874 | #if 1 |
---|
875 | for (j = 0, maxprob = 0.0; j < Tpmradix; j++) { |
---|
876 | if (probk[i][j] > maxprob) { |
---|
877 | maxj = j; |
---|
878 | maxprob = probk[i][j]; |
---|
879 | maxj3 = maxj2 = -1; |
---|
880 | } else if (probk[i][j] == maxprob) { |
---|
881 | maxj3 = maxj2; |
---|
882 | maxj2 = maxj; |
---|
883 | maxj = j; |
---|
884 | } |
---|
885 | probk[i][j] = 0.0; |
---|
886 | } |
---|
887 | if (maxprob == 1.0) { |
---|
888 | for (j = 0; j < Tpmradix; j++) probk[i][j] = Freqtpm[j]; |
---|
889 | } else { |
---|
890 | if (maxj3 != -1) { |
---|
891 | probk[i][maxj ] = 0.334; |
---|
892 | probk[i][maxj2] = 0.333; |
---|
893 | probk[i][maxj3] = 0.333; |
---|
894 | } else if (maxj2 != -1) { |
---|
895 | probk[i][maxj ] = 0.5; |
---|
896 | probk[i][maxj2] = 0.5; |
---|
897 | } else { |
---|
898 | probk[i][maxj ] = 1.0; |
---|
899 | } |
---|
900 | } |
---|
901 | if (maxprob <= (maxspc * 0.3)) { |
---|
902 | for (j = 0; j < Tpmradix; j++) probk[i][j] = Freqtpm[j]; |
---|
903 | } |
---|
904 | #endif |
---|
905 | #if 0 |
---|
906 | for (j=0;j<Tpmradix;j++) printf("%3.0f",probk[i][j]*100);putchar('\n'); |
---|
907 | #endif |
---|
908 | } |
---|
909 | for (i = 0; i < maxspc; i++) { |
---|
910 | dist = 10.0; |
---|
911 | pdistan(&dist, seqconint[i], probk, weight, numptrn); |
---|
912 | disvec[i] = dist; |
---|
913 | /* printf("%20.10f\n", dist); */ |
---|
914 | } |
---|
915 | for (i = 0; i < maxspc - 1; i++) { |
---|
916 | seqi = seqconint[i]; |
---|
917 | for (j = i+1; j < maxspc; j++) { |
---|
918 | seqj = seqconint[j]; |
---|
919 | #if 0 |
---|
920 | for (k = 0; k < numptrn; k++) { |
---|
921 | for (l = 0; l < Tpmradix; l++) probk[k][l] = 0.0; |
---|
922 | for (l = 0; l < maxspc; l++) { |
---|
923 | if (l == i || l == j) continue; |
---|
924 | kk = seqconint[l][k]; |
---|
925 | if (kk >= 0) { |
---|
926 | probk[k][kk] += 1.0; |
---|
927 | } else { |
---|
928 | for (m=0; m<Tpmradix; m++) probk[k][m] += Freqtpm[m]; |
---|
929 | } |
---|
930 | } |
---|
931 | for (l = 0; l < Tpmradix; l++) probk[k][l] /= (maxspc - 2); |
---|
932 | } |
---|
933 | #endif |
---|
934 | len[0] = (disvec[i] + disvec[j] - distanmat[i][j]) * 0.5; |
---|
935 | len[1] = (disvec[i] - disvec[j] + distanmat[i][j]) * 0.5; |
---|
936 | len[2] = (disvec[j] - disvec[i] + distanmat[i][j]) * 0.5; |
---|
937 | tdistan(seqi, seqj, probk, weight, numptrn, len, lvari, triprob); |
---|
938 | /* vari = sqrt(lvari[0] + lvari[1] + lvari[2]); */ |
---|
939 | dist = len[1] + len[2]; |
---|
940 | /* printf("%3d%3d%15.9f%15.9f%15.9f\n", i,j,len[0],len[1],len[2]); */ |
---|
941 | if (Info_optn) { |
---|
942 | distanmat[i][j] = len[1]; |
---|
943 | distanmat[j][i] = len[2]; |
---|
944 | } else { |
---|
945 | distanmat[i][j] = dist; |
---|
946 | distanmat[j][i] = dist; |
---|
947 | } |
---|
948 | } |
---|
949 | } |
---|
950 | |
---|
951 | free_dcube(triprob); |
---|
952 | free_dmatrix(probk); |
---|
953 | free_dvector(disvec); |
---|
954 | } /* tridistance */ |
---|
955 | |
---|
956 | #if 0 |
---|
957 | void |
---|
958 | tridistance2(distanmat, seqchar, maxspc, numsite) |
---|
959 | dmatrix distanmat; |
---|
960 | cmatrix seqchar; |
---|
961 | int maxspc, numsite; |
---|
962 | { |
---|
963 | int i, j, k, l, m, xi, xj, xk, nsite; |
---|
964 | double dist, vari, len[3], lvari[3]; |
---|
965 | dcube tridistan, triprob; |
---|
966 | double *ptr, *nptr; |
---|
967 | ivector seqi, seqj, seqk, seqw; |
---|
968 | cvector seqchi, seqchj, seqchk; |
---|
969 | int gene[TPMRADIX + 1]; |
---|
970 | |
---|
971 | seqi = new_ivector(numsite); |
---|
972 | seqj = new_ivector(numsite); |
---|
973 | seqk = new_ivector(numsite); |
---|
974 | seqw = new_ivector(numsite); |
---|
975 | tridistan = new_dcube(maxspc, maxspc, maxspc); |
---|
976 | triprob = new_dcube(3, numsite, Tpmradix); |
---|
977 | nptr = **tridistan + maxspc*maxspc*maxspc; |
---|
978 | for (ptr = **tridistan; ptr < nptr; ptr++) |
---|
979 | *ptr = 0.0; |
---|
980 | |
---|
981 | for (i = 0; i < maxspc-2; i++) { |
---|
982 | seqchi = seqchar[i]; |
---|
983 | for (j = i+1; j < maxspc-1; j++) { |
---|
984 | seqchj = seqchar[j]; |
---|
985 | for (k = j+1; k < maxspc; k++) { |
---|
986 | seqchk = seqchar[k]; |
---|
987 | |
---|
988 | for ( m = 0; m < Tpmradix + 1; m++) gene[m] = 0; |
---|
989 | for ( m = 0; m < numsite; m++) { |
---|
990 | xi = seqchi[m]; |
---|
991 | xj = seqchj[m]; |
---|
992 | xk = seqchk[m]; |
---|
993 | if (xi == xj && xi == xk) { |
---|
994 | gene[xi]++; |
---|
995 | seqw[m] = FALSE; |
---|
996 | } else if (xi == Tpmradix || xj == Tpmradix || |
---|
997 | xk == Tpmradix) { |
---|
998 | seqw[m] = FALSE; |
---|
999 | } else { |
---|
1000 | seqw[m] = TRUE; |
---|
1001 | } |
---|
1002 | } |
---|
1003 | for ( m = 0, l = 0; m < numsite; m++) { |
---|
1004 | if (seqw[m]) { |
---|
1005 | seqi[l] = seqchi[m]; |
---|
1006 | seqj[l] = seqchj[m]; |
---|
1007 | seqk[l] = seqchk[m]; |
---|
1008 | seqw[l] = 1; |
---|
1009 | l++; |
---|
1010 | } |
---|
1011 | } |
---|
1012 | for ( m = 0; m < Tpmradix; m++) { |
---|
1013 | if (gene[m]) { |
---|
1014 | seqi[l] = m; |
---|
1015 | seqj[l] = m; |
---|
1016 | seqk[l] = m; |
---|
1017 | seqw[l] = gene[m]; |
---|
1018 | l++; |
---|
1019 | } |
---|
1020 | } |
---|
1021 | nsite = l; |
---|
1022 | #ifdef DISDEBUG |
---|
1023 | for (m=0;m<nsite;m++) putchar(int2ami(seqi[m])); putchar('\n'); |
---|
1024 | for (m=0;m<nsite;m++) putchar(int2ami(seqj[m])); putchar('\n'); |
---|
1025 | for (m=0;m<nsite;m++) putchar(int2ami(seqk[m])); putchar('\n'); |
---|
1026 | for (m=0;m<nsite;m++) printf("%d",seqw[m]); putchar('\n'); |
---|
1027 | #endif |
---|
1028 | len[0] = (distanmat[i][j]+distanmat[i][k]-distanmat[j][k])/2.0; |
---|
1029 | len[1] = (distanmat[j][i]+distanmat[j][k]-distanmat[i][k])/2.0; |
---|
1030 | len[2] = (distanmat[k][i]+distanmat[k][j]-distanmat[i][j])/2.0; |
---|
1031 | tdistan2(seqi, seqj, seqk, seqw, nsite, len, lvari, triprob); |
---|
1032 | vari = sqrt(lvari[0] + lvari[1] + lvari[2]); |
---|
1033 | if (Varia_optn) { |
---|
1034 | tridistan[i][j][k] = (len[0] + len[1]) / vari; |
---|
1035 | tridistan[i][k][j] = (len[0] + len[2]) / vari; |
---|
1036 | tridistan[j][i][k] = (len[1] + len[0]) / vari; |
---|
1037 | tridistan[j][k][i] = (len[1] + len[2]) / vari; |
---|
1038 | tridistan[k][i][j] = (len[2] + len[0]) / vari; |
---|
1039 | tridistan[k][j][i] = (len[2] + len[1]) / vari; |
---|
1040 | } else { |
---|
1041 | tridistan[i][j][k] = (len[0] + len[1]); |
---|
1042 | tridistan[i][k][j] = (len[0] + len[2]); |
---|
1043 | tridistan[j][i][k] = (len[1] + len[0]); |
---|
1044 | tridistan[j][k][i] = (len[1] + len[2]); |
---|
1045 | tridistan[k][i][j] = (len[2] + len[0]); |
---|
1046 | tridistan[k][j][i] = (len[2] + len[1]); |
---|
1047 | } |
---|
1048 | } |
---|
1049 | } |
---|
1050 | } |
---|
1051 | |
---|
1052 | if (Debug) |
---|
1053 | for (i = 0; i < maxspc; i++) { |
---|
1054 | for (j = 0; j < maxspc; j++) { |
---|
1055 | for (k = 0; k < maxspc; k++) { |
---|
1056 | printf("%5.1f", tridistan[i][j][k]); |
---|
1057 | } |
---|
1058 | printf("\n"); |
---|
1059 | } |
---|
1060 | printf("\n"); |
---|
1061 | printf("\n"); |
---|
1062 | } |
---|
1063 | |
---|
1064 | if (Debug) |
---|
1065 | for (i = 0; i < maxspc; i++) { |
---|
1066 | printf("%s\n", Identif[i]); |
---|
1067 | for (j = 0; j < maxspc; j++) { |
---|
1068 | for (k = 0; k < maxspc; k++) { |
---|
1069 | printf("%10.6f", tridistan[i][j][k]); |
---|
1070 | /* if ((k+1)%5 == 0) |
---|
1071 | printf("\n"); */ |
---|
1072 | } |
---|
1073 | printf("\n"); |
---|
1074 | } |
---|
1075 | printf("\n"); |
---|
1076 | } |
---|
1077 | |
---|
1078 | for (i = 0; i < maxspc; i++) { |
---|
1079 | for (j = 0; j < maxspc; j++) { |
---|
1080 | for (k = 0, dist = 0.0; k < maxspc; k++) |
---|
1081 | dist += tridistan[i][j][k]; |
---|
1082 | dist /= (maxspc - 2); |
---|
1083 | distanmat[i][j] = dist; |
---|
1084 | } |
---|
1085 | } |
---|
1086 | |
---|
1087 | free_dcube(triprob); |
---|
1088 | free_dcube(tridistan); |
---|
1089 | free_ivector(seqw); |
---|
1090 | free_ivector(seqk); |
---|
1091 | free_ivector(seqj); |
---|
1092 | free_ivector(seqi); |
---|
1093 | } /* tridistance2 */ |
---|
1094 | #endif |
---|
1095 | |
---|
1096 | void |
---|
1097 | putdistance(identif, sciname, engname, distanmat, maxspc) |
---|
1098 | cmatrix identif; |
---|
1099 | cmatrix sciname; |
---|
1100 | cmatrix engname; |
---|
1101 | dmatrix distanmat; |
---|
1102 | int maxspc; |
---|
1103 | { |
---|
1104 | int i, j, k, numk, maxk; |
---|
1105 | |
---|
1106 | if (!Info_optn) { |
---|
1107 | |
---|
1108 | for (i = 0; i < maxspc; i++) { |
---|
1109 | printf("%s", identif[i]); |
---|
1110 | if (sciname[i] != '\0') printf(" %s", sciname[i]); |
---|
1111 | if (engname[i] != '\0') printf(" %s", engname[i]); |
---|
1112 | putchar('\n'); |
---|
1113 | for (j = 0; j < maxspc; j++) { |
---|
1114 | if (!Varia_optn) { |
---|
1115 | printf("%15.12f", distanmat[i][j] / 100.0); |
---|
1116 | } else { |
---|
1117 | if (i < j) printf("%15.12f", distanmat[i][j] / 100.0); |
---|
1118 | else printf("%15.12f", distanmat[i][j] / 10000.0); |
---|
1119 | } |
---|
1120 | if ((j+1)%5 == 0) putchar('\n'); |
---|
1121 | } |
---|
1122 | if (j%5 != 0) putchar('\n'); |
---|
1123 | } |
---|
1124 | |
---|
1125 | } else { /* Info_optn */ |
---|
1126 | |
---|
1127 | numk = 23; |
---|
1128 | for (k = 0; maxk = k + numk, k < maxspc; k += numk) { |
---|
1129 | if (maxk > maxspc) maxk = maxspc; |
---|
1130 | printf("\n%4s%-6s", "sub/","100"); |
---|
1131 | printf("%3d", k + 1); |
---|
1132 | for (j = k + 1; j < maxk; j++) printf("%3d", (j+1) % 100); |
---|
1133 | putchar('\n'); |
---|
1134 | printf("%4s%6s", "",""); |
---|
1135 | for (j = k; j < maxk; j++) printf("%3.2s", Identif[j]); |
---|
1136 | putchar('\n'); |
---|
1137 | for (i = 0; i < maxspc; i++) { |
---|
1138 | printf("%-4d%-6.6s", i + 1, Identif[i]); |
---|
1139 | for (j = k; j < maxk; j++) { |
---|
1140 | if (i != j) |
---|
1141 | printf("%3.0f", distanmat[i][j]); |
---|
1142 | else |
---|
1143 | printf("%3.2s", Identif[i]); |
---|
1144 | } |
---|
1145 | putchar('\n'); |
---|
1146 | } |
---|
1147 | } |
---|
1148 | |
---|
1149 | } |
---|
1150 | } /* putdistance */ |
---|
1151 | |
---|
1152 | void |
---|
1153 | checkseq(seqconint, maxspc, numptrn) |
---|
1154 | imatrix seqconint; |
---|
1155 | int maxspc; |
---|
1156 | int numptrn; |
---|
1157 | { |
---|
1158 | int i, j, k; |
---|
1159 | boolean same, sameotu; |
---|
1160 | |
---|
1161 | sameotu = FALSE; |
---|
1162 | for (i = 0; i < maxspc - 1; i++) { |
---|
1163 | for (j = i + 1; j < maxspc; j++) { |
---|
1164 | for (k = 0, same = TRUE; k < numptrn; k++) { |
---|
1165 | if (seqconint[i][k] != seqconint[j][k]) { |
---|
1166 | same = FALSE; |
---|
1167 | break; |
---|
1168 | } |
---|
1169 | } |
---|
1170 | if (same) { |
---|
1171 | fprintf(stderr, |
---|
1172 | "ERROR: %s[%d] & %s[%d] are same sequences!\n", |
---|
1173 | Identif[i], i+1, Identif[j], j+1); |
---|
1174 | sameotu = TRUE; |
---|
1175 | } |
---|
1176 | } |
---|
1177 | } |
---|
1178 | if (sameotu) { |
---|
1179 | fprintf(stderr, |
---|
1180 | "ERROR: Please remove same sequences except for one!\n"); |
---|
1181 | /* exit(1); */ |
---|
1182 | } |
---|
1183 | } /* checkseq */ |
---|