1 | /* |
---|
2 | * totalml.c Adachi, J. 1994.12.30 |
---|
3 | * Copyright (C) 1994 J. Adachi & M. Hasegawa, All rights reserved. |
---|
4 | */ |
---|
5 | |
---|
6 | #define DEBUG 0 |
---|
7 | |
---|
8 | #define MAIN_MODULE 1 |
---|
9 | #include "totalml.h" |
---|
10 | |
---|
11 | void |
---|
12 | copyright() |
---|
13 | { |
---|
14 | fprintf(stderr, "TotalML %s(%s) ", VERSION, DATE ); |
---|
15 | fprintf(stderr, "Total ML Inference of Molecular Phylogeny\n"); |
---|
16 | fprintf(stderr, "Copyright (C) 1994 J. Adachi & M. Hasegawa, "); |
---|
17 | fprintf(stderr, "All rights reserved.\n"); |
---|
18 | /* fprintf(stderr, " TotalML comes with NO WARRANTY\n"); */ |
---|
19 | } |
---|
20 | |
---|
21 | |
---|
22 | void |
---|
23 | usage() |
---|
24 | { |
---|
25 | copyright(); |
---|
26 | fprintf(stderr, "Usage: %s [switches] LLS_files\n", Prog_name); |
---|
27 | fprintf(stderr, " Help: %s -h\n", Prog_name); |
---|
28 | } |
---|
29 | |
---|
30 | |
---|
31 | void |
---|
32 | helpinfo() |
---|
33 | { |
---|
34 | copyright(); |
---|
35 | fprintf(stderr, "Usage: %s [switches] LLS_files\n", Prog_name); |
---|
36 | fprintf(stderr, "switches:\n"); |
---|
37 | fprintf(stderr, "-b no Bootstrap probabilities\n"); |
---|
38 | fprintf(stderr, "-v verbose to stderr\n"); |
---|
39 | fprintf(stderr, "-i, -w output some information\n"); |
---|
40 | } |
---|
41 | |
---|
42 | |
---|
43 | /* #if 0 */ |
---|
44 | /* void */ |
---|
45 | /* header(ofp, numseqs, allsite, commentp) */ |
---|
46 | /* FILE *ofp; */ |
---|
47 | /* int *numseqs; */ |
---|
48 | /* int *allsite; */ |
---|
49 | /* char **commentp; */ |
---|
50 | /* { */ |
---|
51 | /* time_t ct; */ |
---|
52 | /* char *datetime, *ip, *jp; */ |
---|
53 | |
---|
54 | /* fprintf(ofp, "%s %s ", Prog_name, VERSION); */ |
---|
55 | |
---|
56 | /* ct = time(NULL); */ |
---|
57 | /* datetime = ctime(&ct); */ |
---|
58 | /* for (ip = datetime+11, jp = datetime+20; *jp != '\n'; ) *ip++ = *jp++; */ |
---|
59 | /* *ip = '\0'; */ |
---|
60 | /* fputs(datetime + 4, ofp); */ |
---|
61 | /* fprintf(ofp, " %d data %d sites.\n", *numseqs, *allsite); */ |
---|
62 | /* /* fprintf(ofp, " %d data %d sites. %s\n", *numseqs, *allsite, *commentp); */ */ |
---|
63 | /* } /*_ header */ */ |
---|
64 | /* #endif */ |
---|
65 | |
---|
66 | |
---|
67 | void |
---|
68 | header(ofp, maxspc, numsite, commentp) |
---|
69 | FILE *ofp; |
---|
70 | int *maxspc; |
---|
71 | int *numsite; |
---|
72 | char **commentp; |
---|
73 | { |
---|
74 | char date[32]; |
---|
75 | |
---|
76 | strftime(date, 32, "%x", localtime(&Ct0)); |
---|
77 | fprintf(ofp, "%s %s(%s)", Prog_name, VERSION, date); |
---|
78 | fprintf(ofp, " %d OTUs %d sites. %s\n", *maxspc, *numsite, *commentp); |
---|
79 | } /*_ header */ |
---|
80 | |
---|
81 | |
---|
82 | |
---|
83 | void |
---|
84 | getsize(ifp, ntree, nsite, commentp) |
---|
85 | FILE *ifp; |
---|
86 | int *ntree; |
---|
87 | int *nsite; |
---|
88 | char **commentp; |
---|
89 | { |
---|
90 | char *cp, *np; |
---|
91 | char line[BUFLINE]; |
---|
92 | |
---|
93 | if (fgets(line, BUFLINE, ifp) != NULL) { |
---|
94 | if (sscanf(line, "%d %d", ntree, nsite) == 2) { |
---|
95 | for (cp = line; isdigit(*cp) || isspace(*cp) && *cp != '\0'; cp++) |
---|
96 | ; |
---|
97 | *commentp = new_cvector(strlen(cp) + 1); |
---|
98 | if (*cp != '\0') { |
---|
99 | for (np = *commentp; *cp != '\n' && *cp != '\0'; cp++) |
---|
100 | *np++ = *cp; |
---|
101 | *np = '\0'; |
---|
102 | } else { |
---|
103 | **commentp = '\0'; |
---|
104 | } |
---|
105 | if (Debug) |
---|
106 | fprintf(stdout, "%d trees, %d sites, %s\n", |
---|
107 | *ntree, *nsite, *commentp); |
---|
108 | } else { |
---|
109 | fputs(line, stderr); |
---|
110 | fprintf(stderr, "\nBad format, first line of input file.\n"); |
---|
111 | exit(1); |
---|
112 | } |
---|
113 | } else { |
---|
114 | fprintf(stderr, "\nCan't read input file.\n"); |
---|
115 | exit(1); |
---|
116 | } |
---|
117 | return; |
---|
118 | } /*_ getsize */ |
---|
119 | |
---|
120 | |
---|
121 | void |
---|
122 | getlnlklsite(ifp, alls, ntree, nsite) |
---|
123 | FILE *ifp; |
---|
124 | dmatrix alls; |
---|
125 | int ntree, nsite; |
---|
126 | { |
---|
127 | char line[BUFLINE]; |
---|
128 | char **cpp, *cp, *sp; |
---|
129 | int i, j, nt; |
---|
130 | double x; |
---|
131 | |
---|
132 | cpp = &cp; |
---|
133 | |
---|
134 | for (i = 0; i < ntree; i++) { |
---|
135 | if (Debug) fprintf(stdout, "Read %d tree \n", i+1); |
---|
136 | if (fgets(line, BUFLINE, ifp) != NULL) { |
---|
137 | if (sscanf(line, "# %d", &nt)) { |
---|
138 | if (Debug_optn) fputs(line, stdout); |
---|
139 | if (nt != i + 1) { |
---|
140 | fprintf(stderr, "\nCan't read %d tree! \"%d\"\n", i+1, nt); |
---|
141 | exit(1); |
---|
142 | } |
---|
143 | } else { |
---|
144 | puts(line); |
---|
145 | fprintf(stderr, "\nCan't read header of %d tree! \"%d\"\n", |
---|
146 | i+1, nt); |
---|
147 | exit(1); |
---|
148 | } |
---|
149 | } else { |
---|
150 | fprintf(stderr, "\nCan't read %d tree!\n", i+1); |
---|
151 | exit(1); |
---|
152 | } |
---|
153 | j = 0; |
---|
154 | while (j < nsite) { |
---|
155 | if (fgets(line, BUFLINE, ifp) != NULL) { |
---|
156 | sp = line; |
---|
157 | while (x = strtod(sp, cpp)) { |
---|
158 | if (Debug) printf("%7.2f", x); |
---|
159 | if (Debug && (j+1) % 10 == 0) putchar('\n'); |
---|
160 | alls[i][j] = x; |
---|
161 | sp = *cpp; |
---|
162 | j++; |
---|
163 | } |
---|
164 | } else { |
---|
165 | fprintf(stderr, "\nError: %d site!\n", j+1); |
---|
166 | exit(1); |
---|
167 | } |
---|
168 | } |
---|
169 | if (Debug && j % 10 != 0) putchar('\n'); |
---|
170 | |
---|
171 | } |
---|
172 | return; |
---|
173 | } /*_ getlnlklsite */ |
---|
174 | |
---|
175 | |
---|
176 | prlnlklsite(alls, ntree, nsite) |
---|
177 | dmatrix alls; |
---|
178 | int ntree, nsite; |
---|
179 | { |
---|
180 | int i, j; |
---|
181 | |
---|
182 | for (i = 0; i < ntree; i++) { |
---|
183 | printf("#%d tree\n", i+1); |
---|
184 | for (j = 0; j < nsite; j++) { |
---|
185 | printf("%7.2f", alls[i][j]); |
---|
186 | if ((j+1) % 10 == 0) putchar('\n'); |
---|
187 | } |
---|
188 | if (j % 10 != 0) putchar('\n'); |
---|
189 | } |
---|
190 | } /* prlnlklsite */ |
---|
191 | |
---|
192 | |
---|
193 | static int |
---|
194 | getdata(ifp, cnoseq) |
---|
195 | FILE *ifp; |
---|
196 | int cnoseq; |
---|
197 | { |
---|
198 | int ntree, nsite; |
---|
199 | |
---|
200 | getsize(ifp, &ntree, &nsite, &Comment); |
---|
201 | if (cnoseq == 0) { |
---|
202 | Numtree = ntree; |
---|
203 | Lnlklsite = new_dcube(Numseqs, 0, 0); |
---|
204 | } else { |
---|
205 | if (ntree != Numtree) { |
---|
206 | fprintf(stderr, "\nBad size, number of trees is %d\n", ntree); |
---|
207 | exit(1); |
---|
208 | } |
---|
209 | } |
---|
210 | if (nsite <= 0) { |
---|
211 | fprintf(stderr, "\nBad size, number of sites is %d\n", nsite); |
---|
212 | exit(1); |
---|
213 | } |
---|
214 | Alnlklsite = new_dmatrix(Numtree, nsite); |
---|
215 | getlnlklsite(ifp, Alnlklsite, Numtree, nsite); |
---|
216 | Lnlklsite[cnoseq] = Alnlklsite; |
---|
217 | Numsite[cnoseq] = nsite; |
---|
218 | if (Debug_optn) prlnlklsite(Alnlklsite, Numtree, nsite); |
---|
219 | return nsite; |
---|
220 | } |
---|
221 | |
---|
222 | void |
---|
223 | prlls() |
---|
224 | { |
---|
225 | int i, j, n, jmax; |
---|
226 | |
---|
227 | for (n = 0; n < Numseqs; n++) { |
---|
228 | printf("\nData: %d\n", n+1); |
---|
229 | for (i = 0; i < Numtree; i++) { |
---|
230 | printf("#%d tree\n", i+1); |
---|
231 | for (j = 0, jmax = Numsite[n]; j < jmax; j++) { |
---|
232 | printf("%7.2f", Lnlklsite[n][i][j]); |
---|
233 | if ((j+1) % 10 == 0) putchar('\n'); |
---|
234 | } if (j % 10 != 0) putchar('\n'); |
---|
235 | } |
---|
236 | } |
---|
237 | } |
---|
238 | |
---|
239 | |
---|
240 | void |
---|
241 | total() |
---|
242 | { |
---|
243 | int i, j, m, n, nsite, rs, nolltmax, same, allsame, mllbest; |
---|
244 | double lltmax, coefrand, coefboot; |
---|
245 | double sl1, sl2, suml1, suml2, ldiff, sdllt, nn1; |
---|
246 | ivector bspall, bsp, mllseq; |
---|
247 | imatrix bspseq; |
---|
248 | dvector lltall, llt, alls, mlls; |
---|
249 | dmatrix lltseq, alnlklsite; |
---|
250 | |
---|
251 | mllseq = new_ivector(Numseqs); |
---|
252 | bspall = new_ivector(Numtree); |
---|
253 | bspseq = new_imatrix(Numseqs, Numtree); |
---|
254 | lltall = new_dvector(Numtree); |
---|
255 | lltseq = new_dmatrix(Numseqs, Numtree); |
---|
256 | |
---|
257 | allsame = 0; |
---|
258 | for (i = 0; i < Numtree; i++) lltall[i] = 0.0; |
---|
259 | for (n = 0; n < Numseqs; n++) { |
---|
260 | llt = lltseq[n]; |
---|
261 | for (i = 0; i < Numtree; i++) llt[i] = 0.0; |
---|
262 | nsite = Numsite[n]; |
---|
263 | alnlklsite = Lnlklsite[n]; |
---|
264 | for (i = 0; i < Numtree; i++) { |
---|
265 | for (j = 0; j < nsite; j++) { |
---|
266 | llt[i] += alnlklsite[i][j]; |
---|
267 | lltall[i] += alnlklsite[i][j]; |
---|
268 | } |
---|
269 | } |
---|
270 | nolltmax = 0; |
---|
271 | lltmax = llt[0]; |
---|
272 | same = 0; |
---|
273 | for (i = 1; i < Numtree; i++) { |
---|
274 | if (llt[i] > lltmax) { |
---|
275 | nolltmax = i; |
---|
276 | lltmax = llt[i]; |
---|
277 | same = 0; |
---|
278 | } else if (llt[i] == lltmax) { |
---|
279 | same++; |
---|
280 | } |
---|
281 | } |
---|
282 | allsame += same; |
---|
283 | mllseq[n] = nolltmax; |
---|
284 | } |
---|
285 | nolltmax = 0; |
---|
286 | lltmax = lltall[0]; |
---|
287 | same = 0; |
---|
288 | for (i = 1; i < Numtree; i++) { |
---|
289 | if (lltall[i] > lltmax) { |
---|
290 | nolltmax = i; |
---|
291 | lltmax = lltall[i]; |
---|
292 | same = 0; |
---|
293 | } else if (lltall[i] == lltmax) { |
---|
294 | same++; |
---|
295 | } |
---|
296 | } |
---|
297 | allsame += same; |
---|
298 | mllbest = nolltmax; |
---|
299 | |
---|
300 | if (!Ctacit_optn) { |
---|
301 | printf("\n%-4s", "tree"); |
---|
302 | for (n = 0; n < Numseqs; n++) printf("%7d ", n+1); |
---|
303 | printf(" %8s\n", "total"); |
---|
304 | for (i = 0; i < Numtree; i++) { |
---|
305 | printf("%-4d", i+1); |
---|
306 | for (n = 0; n < Numseqs; n++) { |
---|
307 | if (i == mllseq[n]) |
---|
308 | /* printf("%8s", "ml"); */ |
---|
309 | printf("%8.1f", -lltseq[n][i]); |
---|
310 | else |
---|
311 | printf("%8.1f", lltseq[n][mllseq[n]] - lltseq[n][i]); |
---|
312 | } |
---|
313 | if (i == mllbest) { |
---|
314 | printf(" %8.1f\n", -lltall[i]); |
---|
315 | } else { |
---|
316 | printf(" %8.1f\n", lltall[mllbest]-lltall[i]); |
---|
317 | } |
---|
318 | |
---|
319 | printf("%-4s", ""); |
---|
320 | for (suml1 = suml2 = 0.0, n = 0; n < Numseqs; n++) { |
---|
321 | mlls = Lnlklsite[n][mllseq[n]]; |
---|
322 | nsite = Numsite[n]; |
---|
323 | nn1 = (double)(nsite / (nsite-1)); |
---|
324 | if (i == mllseq[n]) { |
---|
325 | printf("%8s", "ml"); |
---|
326 | } else { |
---|
327 | alls = Lnlklsite[n][i]; |
---|
328 | for (sl1 = sl2 = 0.0, j = 0; j < nsite; j++) { |
---|
329 | ldiff = alls[j] - mlls[j]; |
---|
330 | sl1 += ldiff; |
---|
331 | sl2 += ldiff * ldiff; |
---|
332 | } |
---|
333 | suml1 += sl1; |
---|
334 | suml2 += sl2; |
---|
335 | sl1 /= nsite; |
---|
336 | sdllt = sqrt( nn1 * (sl2 - sl1*sl1*nsite) ); |
---|
337 | printf("%8.1f", sdllt); |
---|
338 | } |
---|
339 | } |
---|
340 | if (i == mllbest) { |
---|
341 | printf(" %8s\n", "ML"); |
---|
342 | } else { |
---|
343 | suml1 /= Allsite; |
---|
344 | nn1 = (double)(Allsite / (Allsite-1)); |
---|
345 | sdllt = sqrt( nn1 * (suml2 - suml1*suml1*Allsite) ); |
---|
346 | printf(" %8.1f\n", sdllt); |
---|
347 | } |
---|
348 | |
---|
349 | } |
---|
350 | printf("%-4s", "sites"); |
---|
351 | printf("%7d", Numsite[0]); |
---|
352 | for (n = 1; n < Numseqs; n++) printf("%8d", Numsite[n]); |
---|
353 | printf(" %8d\n", Allsite); |
---|
354 | } |
---|
355 | |
---|
356 | coefboot = 1.0 / (double)NUMBOOTS; |
---|
357 | allsame = 0; |
---|
358 | for (n = 0; n < Numseqs; n++) { |
---|
359 | for (i = 0; i < Numtree; i++) bspseq[n][i] = 0; |
---|
360 | } |
---|
361 | for (i = 0; i < Numtree; i++) bspall[i] = 0; |
---|
362 | for (m = 0; m < NUMBOOTS; m++) { |
---|
363 | for (i = 0; i < Numtree; i++) lltall[i] = 0.0; |
---|
364 | for (n = 0; n < Numseqs; n++) { |
---|
365 | llt = lltseq[n]; |
---|
366 | bsp = bspseq[n]; |
---|
367 | for (i = 0; i < Numtree; i++) llt[i] = 0.0; |
---|
368 | nsite = Numsite[n]; |
---|
369 | alnlklsite = Lnlklsite[n]; |
---|
370 | coefrand = (double)nsite / ((double)RANDOM_MAX + 1.0); |
---|
371 | for (j = 0; j < nsite; j++) { |
---|
372 | rs = (int)( coefrand * (double)rand() ); /* RANDOM */ |
---|
373 | for (i = 0; i < Numtree; i++) { |
---|
374 | llt[i] += alnlklsite[i][rs]; |
---|
375 | lltall[i] += alnlklsite[i][rs]; |
---|
376 | } |
---|
377 | } |
---|
378 | nolltmax = 0; |
---|
379 | lltmax = llt[0]; |
---|
380 | same = 0; |
---|
381 | for (i = 1; i < Numtree; i++) { |
---|
382 | if (llt[i] > lltmax) { |
---|
383 | nolltmax = i; |
---|
384 | lltmax = llt[i]; |
---|
385 | same = 0; |
---|
386 | } else if (llt[i] == lltmax) { |
---|
387 | same++; |
---|
388 | } |
---|
389 | } |
---|
390 | allsame += same; |
---|
391 | bsp[nolltmax]++; |
---|
392 | } |
---|
393 | nolltmax = 0; |
---|
394 | lltmax = lltall[0]; |
---|
395 | same = 0; |
---|
396 | for (i = 1; i < Numtree; i++) { |
---|
397 | if (lltall[i] > lltmax) { |
---|
398 | nolltmax = i; |
---|
399 | lltmax = lltall[i]; |
---|
400 | same = 0; |
---|
401 | } else if (lltall[i] == lltmax) { |
---|
402 | same++; |
---|
403 | } |
---|
404 | } |
---|
405 | allsame += same; |
---|
406 | bspall[nolltmax]++; |
---|
407 | } |
---|
408 | if (allsame > 0) |
---|
409 | printf("\nsame bootstrap likelihood occured %d times\n", allsame); |
---|
410 | |
---|
411 | if (Ctacit_optn) { |
---|
412 | for (i = 0; i < Numtree; i++) printf(" %6.4f", bspall[i]*coefboot); |
---|
413 | printf(" %2d\n", mllbest+1); |
---|
414 | } else { |
---|
415 | printf("\n%-4s", "tree"); |
---|
416 | for (n = 0; n < Numseqs; n++) printf("%6d ", n+1); |
---|
417 | printf(" %8s\n", "total"); |
---|
418 | for (i = 0; i < Numtree; i++) { |
---|
419 | printf("%-4d", i+1); |
---|
420 | for (n = 0; n < Numseqs; n++) printf("%8.4f",bspseq[n][i]*coefboot); |
---|
421 | printf(" %8.4f\n", bspall[i]*coefboot); |
---|
422 | } |
---|
423 | #if 0 |
---|
424 | printf("%-4s", "sites"); |
---|
425 | printf("%8d", Numsite[0]); |
---|
426 | for (n = 1; n < Numseqs; n++) printf("%8d", Numsite[n]); |
---|
427 | printf(" %6d\n", Allsite); |
---|
428 | #endif |
---|
429 | } |
---|
430 | |
---|
431 | free_dmatrix(lltseq); |
---|
432 | free_dvector(lltall); |
---|
433 | free_imatrix(bspseq); |
---|
434 | free_ivector(bspall); |
---|
435 | free_ivector(mllseq); |
---|
436 | } |
---|
437 | |
---|
438 | |
---|
439 | main(argc, argv) |
---|
440 | int argc; |
---|
441 | char **argv; |
---|
442 | { |
---|
443 | int i, ch; |
---|
444 | int cnoseq, nsite; |
---|
445 | extern int Optindex; /* index of next argument */ |
---|
446 | extern char *Optargp; /* pointer to argument of current option */ |
---|
447 | |
---|
448 | Ct0 = time(NULL); |
---|
449 | if((Prog_name = strrchr(argv[0], DIR_CHAR)) != NULL ) |
---|
450 | Prog_name++; |
---|
451 | else |
---|
452 | Prog_name = argv[0]; |
---|
453 | |
---|
454 | Boots_optn = TRUE; /* default with User_optn*/ |
---|
455 | |
---|
456 | while((ch = mygetopt(argc, argv, SWITCHES)) != -1 ) { |
---|
457 | switch(ch) { |
---|
458 | case 'b': Boots_optn = FALSE; break; |
---|
459 | case 'i': Info_optn = TRUE; break; |
---|
460 | case 'v': Verbs_optn = TRUE; break; |
---|
461 | case 'w': Write_optn = TRUE; break; |
---|
462 | #if 1 |
---|
463 | case 'z': Debug_optn = TRUE; break; |
---|
464 | case 'Z': Debug = TRUE; break; |
---|
465 | #endif |
---|
466 | case 'C': Ctacit_optn = TRUE; break; |
---|
467 | case 'h': |
---|
468 | case 'H': helpinfo(); exit(1); |
---|
469 | default : usage(); exit(1); |
---|
470 | } |
---|
471 | } |
---|
472 | |
---|
473 | #ifdef DEBUG |
---|
474 | if (Debug) { |
---|
475 | printf("argc = %d\n",argc); |
---|
476 | for(i = 0; i < argc; i++) printf("argv[%d] = %s\n",i,argv[i]); |
---|
477 | putchar('\n'); |
---|
478 | printf("Optindex = %d\n",Optindex); |
---|
479 | printf("Optargp = %s\n",Optargp); |
---|
480 | } |
---|
481 | #endif |
---|
482 | |
---|
483 | if (Optindex == argc) { |
---|
484 | usage(); |
---|
485 | exit(1); |
---|
486 | } |
---|
487 | Numseqs = argc - Optindex; |
---|
488 | if (Debug) printf("Numseqs: %d\n", Numseqs); |
---|
489 | Numsite = new_ivector(Numseqs); |
---|
490 | |
---|
491 | if (Verbs_optn) copyright(); |
---|
492 | cnoseq = 0; |
---|
493 | Allsite = 0; |
---|
494 | while ((llsfp = fopen(argv[Optindex++], "r")) != NULL) { |
---|
495 | nsite = getdata(llsfp, cnoseq); |
---|
496 | cnoseq++; |
---|
497 | Allsite += nsite; |
---|
498 | } |
---|
499 | if (!Ctacit_optn) header(stdout, &Numseqs, &Allsite, &Comment); |
---|
500 | if (--Optindex != argc) { |
---|
501 | fprintf(stderr,"%s: can't open %s\n",Prog_name,argv[Optindex]); |
---|
502 | exit(1); |
---|
503 | } |
---|
504 | if (Debug) prlls(); |
---|
505 | total(); |
---|
506 | |
---|
507 | return 0; |
---|
508 | } |
---|