1 | #include "phylip.h" |
---|
2 | |
---|
3 | /* version 3.52c. (c) Copyright 1993 by Joseph Felsenstein. |
---|
4 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
---|
5 | Permission is granted to copy and use this program provided no fee is |
---|
6 | charged for it and provided that this copyright notice is not removed. */ |
---|
7 | |
---|
8 | #define maxcutter 8 /* maximum number of bases in a site */ |
---|
9 | #define maxtrees 8 /* maximum number of user trees */ |
---|
10 | #define smoothings 10 /* number of passes in smoothing algorithm */ |
---|
11 | #define iterations 10 /* number of iterates of EM for each branch */ |
---|
12 | #define nmlngth 10 /* number of characters max. in species name */ |
---|
13 | |
---|
14 | #define epsilon 0.00001 /* used in update */ |
---|
15 | #define extrap0 100.0 /* extrapolation factor to speed iteration */ |
---|
16 | #define initialv 0.1 /* starting value of branch length */ |
---|
17 | #define point "." |
---|
18 | |
---|
19 | #define ibmpc0 false |
---|
20 | #define ansi0 true |
---|
21 | #define vt520 false |
---|
22 | #define down 2 |
---|
23 | #define over 60 |
---|
24 | |
---|
25 | |
---|
26 | typedef double sitelike[maxcutter + 1]; |
---|
27 | typedef sitelike *phenotype; |
---|
28 | typedef Char **sequence; |
---|
29 | typedef Char naym[nmlngth]; |
---|
30 | typedef short longer[6]; |
---|
31 | typedef double **transmatrix; |
---|
32 | typedef transmatrix *transptr; |
---|
33 | |
---|
34 | typedef struct node { |
---|
35 | struct node *next, *back; |
---|
36 | boolean tip, iter, initialized; |
---|
37 | short branchnum, number; |
---|
38 | phenotype x; |
---|
39 | naym nayme; |
---|
40 | double v; |
---|
41 | short xcoord, ycoord, ymin, ymax; |
---|
42 | } node; |
---|
43 | |
---|
44 | typedef struct tree { |
---|
45 | node **nodep; |
---|
46 | transptr trans, transprod; |
---|
47 | double likelihood; |
---|
48 | node *start; |
---|
49 | } tree; |
---|
50 | |
---|
51 | FILE *infile, *outfile, *treefile; |
---|
52 | short numsp, numsp1, numsp2, sites, enzymes, endsite, weightsum, |
---|
53 | sitelength, inseed, outgrno, datasets, ith, i, j, l, |
---|
54 | jumb, njumble=0; |
---|
55 | double extrapol; |
---|
56 | boolean global, jumble, lengths, outgropt, weights, trout, trunc8, |
---|
57 | usertree, printdata, progress, treeprint, mulsets, firstset, |
---|
58 | interleaved, ibmpc, vt52, ansi; |
---|
59 | tree curtree, priortree, bestree, bestree2; |
---|
60 | sequence y; |
---|
61 | longer seed; |
---|
62 | short *enterorder; |
---|
63 | short *weight, *alias, *aliasweight; |
---|
64 | |
---|
65 | |
---|
66 | |
---|
67 | |
---|
68 | openfile(fp,filename,mode,application,perm) |
---|
69 | FILE **fp; |
---|
70 | char *filename; |
---|
71 | char *mode; |
---|
72 | char *application; |
---|
73 | char *perm; |
---|
74 | { |
---|
75 | FILE *of; |
---|
76 | char file[100]; |
---|
77 | strcpy(file,filename); |
---|
78 | while (1){ |
---|
79 | of = fopen(file,mode); |
---|
80 | if (of) |
---|
81 | break; |
---|
82 | else { |
---|
83 | switch (*mode){ |
---|
84 | case 'r': |
---|
85 | printf("%s: can't read %s\n",application,file); |
---|
86 | file[0] = '\0'; |
---|
87 | while (file[0] =='\0'){ |
---|
88 | printf("Please enter a new filename>"); |
---|
89 | gets(file);} |
---|
90 | break; |
---|
91 | case 'w': |
---|
92 | case 'a': |
---|
93 | printf("%s: can't write %s\n",application,file); |
---|
94 | file[0] = '\0'; |
---|
95 | while (file[0] =='\0'){ |
---|
96 | printf("Please enter a new filename>"); |
---|
97 | gets(file);} |
---|
98 | break; |
---|
99 | } |
---|
100 | } |
---|
101 | } |
---|
102 | *fp=of; |
---|
103 | if (perm != NULL) |
---|
104 | strcpy(perm,file); |
---|
105 | } |
---|
106 | |
---|
107 | |
---|
108 | void uppercase(ch) |
---|
109 | Char *ch; |
---|
110 | { |
---|
111 | /* convert ch to upper case -- either ASCII or EBCDIC */ |
---|
112 | *ch = (isupper (*ch) ? (*ch) : toupper(*ch)); |
---|
113 | } /* uppercase */ |
---|
114 | |
---|
115 | |
---|
116 | void getnums() |
---|
117 | { |
---|
118 | /* read and print out numbers of species and sites */ |
---|
119 | fscanf(infile, "%hd%hd%hd", &numsp, &sites, &enzymes); |
---|
120 | if (printdata) |
---|
121 | fprintf(outfile, "%4hd Species, %4hd Sites,%4hd Enzymes\n", |
---|
122 | numsp, sites, enzymes); |
---|
123 | numsp1 = numsp + 1; |
---|
124 | numsp2 = numsp * 2 - 2; |
---|
125 | } /* getnums */ |
---|
126 | |
---|
127 | void getoptions() |
---|
128 | { |
---|
129 | /* interactively set options */ |
---|
130 | short i, inseed0; |
---|
131 | Char ch; |
---|
132 | boolean done, done1; |
---|
133 | |
---|
134 | fprintf(outfile, "\nRestriction site Maximum Likelihood"); |
---|
135 | fprintf(outfile, " method, version %s\n\n",VERSION); |
---|
136 | putchar('\n'); |
---|
137 | sitelength = 6; |
---|
138 | trunc8 = true; |
---|
139 | extrapol = extrap0; |
---|
140 | global = false; |
---|
141 | jumble = false; |
---|
142 | njumble = 1; |
---|
143 | lengths = false; |
---|
144 | outgrno = 1; |
---|
145 | outgropt = false; |
---|
146 | trout = true; |
---|
147 | usertree = false; |
---|
148 | weights = false; |
---|
149 | printdata = false; |
---|
150 | progress = true; |
---|
151 | treeprint = true; |
---|
152 | interleaved = true; |
---|
153 | for (;;) { |
---|
154 | printf("%s", (ansi ? ("\033[2J\033[H") : |
---|
155 | vt52 ? ("\033E\033H") : |
---|
156 | "\n")); |
---|
157 | printf("\nRestriction site Maximum Likelihood"); |
---|
158 | printf(" method, version %s\n\n",VERSION); |
---|
159 | printf("Settings for this run:\n"); |
---|
160 | printf(" U Search for best tree? %s\n", |
---|
161 | (usertree ? "No, use user trees in input file" : "Yes")); |
---|
162 | if (usertree) { |
---|
163 | printf(" N Use lengths from user trees? %s\n", |
---|
164 | (lengths ? "Yes" : "No")); |
---|
165 | } |
---|
166 | printf(" A Are all sites detected? %s\n", |
---|
167 | (trunc8 ? "No" : "Yes")); |
---|
168 | if (!usertree) { |
---|
169 | printf(" G Global rearrangements? %s\n", |
---|
170 | (global ? "Yes" : "No")); |
---|
171 | printf(" J Randomize input order of sequences? "); |
---|
172 | if (jumble) |
---|
173 | printf("Yes (seed =%8hd,%3hd times)\n", inseed0, njumble); |
---|
174 | else |
---|
175 | printf("No. Use input order\n"); |
---|
176 | } |
---|
177 | printf(" L Site length?%3hd\n",sitelength); |
---|
178 | printf(" O Outgroup root? %s%3hd\n", |
---|
179 | (outgropt ? "Yes, at sequence number" : |
---|
180 | "No, use as outgroup species"),outgrno); |
---|
181 | |
---|
182 | printf(" E Extrapolation factor%7.1f\n", extrapol); |
---|
183 | printf(" M Analyze multiple data sets?"); |
---|
184 | if (mulsets) |
---|
185 | printf(" Yes, %2hd sets\n", datasets); |
---|
186 | else |
---|
187 | printf(" No\n"); |
---|
188 | printf(" I Input sequences interleaved? %s\n", |
---|
189 | (interleaved ? "Yes" : "No, sequential")); |
---|
190 | printf(" 0 Terminal type (IBM PC, VT52, ANSI)? %s\n", |
---|
191 | ibmpc ? "IBM PC" : |
---|
192 | ansi ? "ANSI" : |
---|
193 | vt52 ? "vt52" : "(none)"); |
---|
194 | printf(" 1 Print out the data at start of run %s\n", |
---|
195 | (printdata ? "Yes" : "No")); |
---|
196 | printf(" 2 Print indications of progress of run %s\n", |
---|
197 | (progress ? "Yes" : "No")); |
---|
198 | printf(" 3 Print out tree %s\n", |
---|
199 | (treeprint ? "Yes" : "No")); |
---|
200 | printf(" 4 Write out trees onto tree file? %s\n", |
---|
201 | (trout ? "Yes" : "No")); |
---|
202 | printf("\nAre these settings correct?"); |
---|
203 | printf(" (type Y or the letter for one to change)\n"); |
---|
204 | scanf("%c%*[^\n]", &ch); |
---|
205 | getchar(); |
---|
206 | if (ch == '\n') |
---|
207 | ch = ' '; |
---|
208 | uppercase(&ch); |
---|
209 | if (ch == 'Y') |
---|
210 | break; |
---|
211 | if (strchr("JOUNAGLTLEMI01234",ch) != NULL){ |
---|
212 | switch (ch) { |
---|
213 | |
---|
214 | case 'A': |
---|
215 | trunc8 = !trunc8; |
---|
216 | break; |
---|
217 | |
---|
218 | case 'E': |
---|
219 | printf("Extrapolation factor?\n"); |
---|
220 | do { |
---|
221 | scanf("%lf%*[^\n]", &extrapol); |
---|
222 | getchar(); |
---|
223 | if (extrapol <= 0.0) |
---|
224 | printf("Must be positive!\n"); |
---|
225 | } while (extrapol <= 0.0); |
---|
226 | break; |
---|
227 | |
---|
228 | case 'G': |
---|
229 | global = !global; |
---|
230 | break; |
---|
231 | |
---|
232 | case 'J': |
---|
233 | jumble = !jumble; |
---|
234 | if (jumble) { |
---|
235 | printf("Random number seed (must be odd)?\n"); |
---|
236 | scanf("%hd%*[^\n]", &inseed); |
---|
237 | getchar(); |
---|
238 | inseed0 = inseed; |
---|
239 | for (i = 0; i <= 5; i++) |
---|
240 | seed[i] = 0; |
---|
241 | i = 0; |
---|
242 | do { |
---|
243 | seed[i] = inseed & 63; |
---|
244 | inseed /= 64; |
---|
245 | i++; |
---|
246 | } while (inseed != 0); |
---|
247 | printf("Number of times to jumble?\n"); |
---|
248 | scanf("%hd%*[^\n]", &njumble); |
---|
249 | getchar(); |
---|
250 | } |
---|
251 | else njumble = 1; |
---|
252 | break; |
---|
253 | |
---|
254 | case 'L': |
---|
255 | do { |
---|
256 | printf("New Sitelength?\n"); |
---|
257 | scanf("%hd%*[^\n]", &sitelength); |
---|
258 | getchar(); |
---|
259 | if (sitelength < 1) |
---|
260 | printf("BAD RESTRICTION SITE LENGTH: %hd\n", sitelength); |
---|
261 | } while (sitelength < 1); |
---|
262 | break; |
---|
263 | |
---|
264 | case 'N': |
---|
265 | lengths = !lengths; |
---|
266 | break; |
---|
267 | |
---|
268 | case 'O': |
---|
269 | outgropt = !outgropt; |
---|
270 | if (outgropt) { |
---|
271 | done1 = true; |
---|
272 | do { |
---|
273 | printf("Type number of the outgroup:\n"); |
---|
274 | scanf("%hd%*[^\n]", &outgrno); |
---|
275 | getchar(); |
---|
276 | done1 = (outgrno >= 1 && outgrno <= numsp); |
---|
277 | if (!done1) { |
---|
278 | printf("BAD OUTGROUP NUMBER: %4hd\n", outgrno); |
---|
279 | printf(" Must be in range 1 -%2hd\n", numsp); |
---|
280 | } |
---|
281 | } while (done1 != true); |
---|
282 | } else outgrno = 1; |
---|
283 | break; |
---|
284 | |
---|
285 | case 'U': |
---|
286 | usertree = !usertree; |
---|
287 | break; |
---|
288 | |
---|
289 | case 'M': |
---|
290 | mulsets = !mulsets; |
---|
291 | if (mulsets) { |
---|
292 | done1 = false; |
---|
293 | do { |
---|
294 | printf("How many data sets?\n"); |
---|
295 | scanf("%hd%*[^\n]", &datasets); |
---|
296 | getchar(); |
---|
297 | done1 = (datasets >= 1); |
---|
298 | if (!done1) |
---|
299 | printf("BAD DATA SETS NUMBER: it must be greater than 1\n"); |
---|
300 | } while (done1 != true); |
---|
301 | } |
---|
302 | break; |
---|
303 | |
---|
304 | case 'I': |
---|
305 | interleaved = !interleaved; |
---|
306 | break; |
---|
307 | |
---|
308 | case '0': |
---|
309 | if (ibmpc) { |
---|
310 | ibmpc = false; |
---|
311 | vt52 = true; |
---|
312 | } else { |
---|
313 | if (vt52) { |
---|
314 | vt52 = false; |
---|
315 | ansi = true; |
---|
316 | } else if (ansi) |
---|
317 | ansi = false; |
---|
318 | else |
---|
319 | ibmpc = true; |
---|
320 | } |
---|
321 | break; |
---|
322 | |
---|
323 | case '1': |
---|
324 | printdata = !printdata; |
---|
325 | break; |
---|
326 | |
---|
327 | case '2': |
---|
328 | progress = !progress; |
---|
329 | break; |
---|
330 | |
---|
331 | case '3': |
---|
332 | treeprint = !treeprint; |
---|
333 | break; |
---|
334 | |
---|
335 | case '4': |
---|
336 | trout = !trout; |
---|
337 | break; |
---|
338 | } |
---|
339 | } else |
---|
340 | printf("Not a possible option!\n"); |
---|
341 | } |
---|
342 | } /* getoptions */ |
---|
343 | |
---|
344 | |
---|
345 | void doinit() |
---|
346 | { |
---|
347 | /* initializes variables */ |
---|
348 | short i, j; |
---|
349 | node *p, *q; |
---|
350 | |
---|
351 | getnums(); |
---|
352 | getoptions(); |
---|
353 | y = (Char **)Malloc(numsp*sizeof(Char *)); |
---|
354 | for (i = 0; i < numsp; i++) |
---|
355 | y[i] = (Char *)Malloc(sites*sizeof(Char)); |
---|
356 | curtree.trans = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
357 | for (i=0;i<numsp2;++i){ |
---|
358 | curtree.trans[i] = (transmatrix)Malloc((sitelength + 1) * sizeof (double *)); |
---|
359 | for (j=0;j<sitelength+1;++j) |
---|
360 | curtree.trans[i][j] = (double *)Malloc((sitelength + 1) * sizeof(double)); |
---|
361 | } |
---|
362 | curtree.transprod = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
363 | for (i=0;i<numsp2;++i){ |
---|
364 | curtree.transprod[i] = (transmatrix)Malloc((sitelength+ 1) * sizeof (double *)); |
---|
365 | for (j=0;j<(sitelength + 1);++j) |
---|
366 | curtree.transprod[i][j] = (double *)Malloc((sitelength + 1)* sizeof(double)); |
---|
367 | } |
---|
368 | curtree.nodep = (node **)Malloc(numsp2*sizeof(node *)); |
---|
369 | for (i = 0; i < numsp; i++) |
---|
370 | curtree.nodep[i] = (node *)Malloc(sizeof(node)); |
---|
371 | for (i = numsp1 - 1; i < numsp2; i++) { |
---|
372 | q = NULL; |
---|
373 | for (j = 1; j <= 3; j++) { |
---|
374 | p = (node *)Malloc(sizeof(node)); |
---|
375 | p->next = q; |
---|
376 | q = p; |
---|
377 | } |
---|
378 | p->next->next->next = p; |
---|
379 | curtree.nodep[i] = p; |
---|
380 | } |
---|
381 | if (usertree) |
---|
382 | return; |
---|
383 | bestree.trans = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
384 | for (i=0;i<numsp2;++i){ |
---|
385 | bestree.trans[i] = (transmatrix)Malloc((sitelength + 1) * sizeof (double *)); |
---|
386 | for (j=0;j<(sitelength + 1);++j){ |
---|
387 | bestree.trans[i][j] = (double *)Malloc((sitelength + 1)* sizeof(double)); |
---|
388 | } |
---|
389 | } |
---|
390 | bestree.transprod = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
391 | for (i=0;i<numsp2;++i){ |
---|
392 | bestree.transprod[i] = (transmatrix)Malloc((sitelength + 1)* sizeof (double *)); |
---|
393 | for (j=0;j<sitelength+1;++j) |
---|
394 | bestree.transprod[i][j] = (double *)Malloc((sitelength +1)* sizeof(double)); |
---|
395 | } |
---|
396 | bestree.nodep = (node **)Malloc(numsp2*sizeof(node *)); |
---|
397 | for (i = 0; i < numsp; i++) |
---|
398 | bestree.nodep[i] = (node *)Malloc(sizeof(node)); |
---|
399 | for (i = numsp1 - 1; i < numsp2; i++) { |
---|
400 | q = NULL; |
---|
401 | for (j = 1; j <= 3; j++) { |
---|
402 | p = (node *)Malloc(sizeof(node)); |
---|
403 | p->next = q; |
---|
404 | q = p; |
---|
405 | } |
---|
406 | p->next->next->next = p; |
---|
407 | bestree.nodep[i] = p; |
---|
408 | } |
---|
409 | priortree.trans = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
410 | for (i=0;i<numsp2;++i){ |
---|
411 | priortree.trans[i] = (transmatrix)Malloc((sitelength + 1) * sizeof (double *)); |
---|
412 | for (j=0;j<(sitelength + 1);++j){ |
---|
413 | priortree.trans[i][j] = (double *)Malloc((sitelength + 1)* sizeof(double)); |
---|
414 | } |
---|
415 | } |
---|
416 | priortree.transprod = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
417 | for (i=0;i<numsp2;++i){ |
---|
418 | priortree.transprod[i] = (transmatrix)Malloc((sitelength + 1)* sizeof (double *)); |
---|
419 | for (j=0;j<sitelength+1;++j) |
---|
420 | priortree.transprod[i][j] = (double *)Malloc((sitelength +1)* sizeof(double)); |
---|
421 | } |
---|
422 | priortree.nodep = (node **)Malloc(numsp2*sizeof(node *)); |
---|
423 | for (i = 0; i < numsp; i++) |
---|
424 | priortree.nodep[i] = (node *)Malloc(sizeof(node)); |
---|
425 | for (i = numsp1 - 1; i < numsp2; i++) { |
---|
426 | q = NULL; |
---|
427 | for (j = 1; j <= 3; j++) { |
---|
428 | p = (node *)Malloc(sizeof(node)); |
---|
429 | p->next = q; |
---|
430 | q = p; |
---|
431 | } |
---|
432 | p->next->next->next = p; |
---|
433 | priortree.nodep[i] = p; |
---|
434 | } |
---|
435 | if (njumble == 1) return; |
---|
436 | bestree2.trans = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
437 | for (i=0;i<numsp2;++i){ |
---|
438 | bestree2.trans[i] = (transmatrix)Malloc((sitelength + 1) * sizeof (double *)); |
---|
439 | for (j=0;j<sitelength + 1;++j) |
---|
440 | bestree2.trans[i][j] = (double *)Malloc((sitelength +1)* sizeof(double)); |
---|
441 | } |
---|
442 | bestree2.transprod = (transptr)Malloc(numsp2*sizeof(transmatrix)); |
---|
443 | for (i=0;i<numsp2;++i){ |
---|
444 | bestree2.transprod[i] = (transmatrix)Malloc((sitelength +1)* sizeof (double *)); |
---|
445 | for (j=0;j<sitelength+1;++j) |
---|
446 | bestree2.transprod[i][j] = (double *)Malloc((sitelength +1)* sizeof(double)); |
---|
447 | } |
---|
448 | bestree2.nodep = (node **)Malloc(numsp2*sizeof(node *)); |
---|
449 | for (i = 0; i < numsp; i++) |
---|
450 | bestree2.nodep[i] = (node *)Malloc(sizeof(node)); |
---|
451 | for (i = numsp1 - 1; i < numsp2; i++) { |
---|
452 | q = NULL; |
---|
453 | for (j = 1; j <= 3; j++) { |
---|
454 | p = (node *)Malloc(sizeof(node)); |
---|
455 | p->next = q; |
---|
456 | q = p; |
---|
457 | } |
---|
458 | p->next->next->next = p; |
---|
459 | bestree2.nodep[i] = p; |
---|
460 | } |
---|
461 | } /* doinit */ |
---|
462 | |
---|
463 | |
---|
464 | void inputweights() |
---|
465 | { |
---|
466 | /* input the character weights, 0 or 1 */ |
---|
467 | Char ch; |
---|
468 | short i; |
---|
469 | |
---|
470 | for (i = 1; i < nmlngth; i++) { |
---|
471 | ch = getc(infile); |
---|
472 | if (ch == '\n') |
---|
473 | ch = ' '; |
---|
474 | } |
---|
475 | weightsum = 0; |
---|
476 | for (i = 1; i <= sites; i++) { |
---|
477 | do { |
---|
478 | if (eoln(infile)) { |
---|
479 | fscanf(infile, "%*[^\n]"); |
---|
480 | getc(infile); |
---|
481 | } |
---|
482 | ch = getc(infile); |
---|
483 | if (ch == '\n') |
---|
484 | ch = ' '; |
---|
485 | } while (ch == ' '); |
---|
486 | weight[i] = 1; |
---|
487 | if (ch == '0' || ch == '1') |
---|
488 | weight[i] = ch - '0'; |
---|
489 | else { |
---|
490 | printf("BAD WEIGHT CHARACTER: %c -- WEIGHTS IN RESTML MUST BE 0 OR 1\n", |
---|
491 | ch); |
---|
492 | exit(-1); |
---|
493 | } |
---|
494 | weightsum += weight[i]; |
---|
495 | } |
---|
496 | fscanf(infile, "%*[^\n]"); |
---|
497 | getc(infile); |
---|
498 | weights = true; |
---|
499 | } /* inputweights */ |
---|
500 | |
---|
501 | void printweights() |
---|
502 | { |
---|
503 | /* print out the weights of sites */ |
---|
504 | short i, j; |
---|
505 | |
---|
506 | fprintf(outfile, "\n Sites are weighted as follows:\n\n"); |
---|
507 | for (i = 1; i <= sites; i++) { |
---|
508 | if ((i - 1) % 60 == 0) { |
---|
509 | putc('\n', outfile); |
---|
510 | for (j = 1; j <= nmlngth + 3; j++) |
---|
511 | putc(' ', outfile); |
---|
512 | } |
---|
513 | fprintf(outfile, "%hd", weight[i]); |
---|
514 | if (i % 10 == 0 && i % 60 != 0) |
---|
515 | putc(' ', outfile); |
---|
516 | } |
---|
517 | fprintf(outfile, "\n\n"); |
---|
518 | } /* printweights */ |
---|
519 | |
---|
520 | void inputoptions() |
---|
521 | { |
---|
522 | /* read the options information */ |
---|
523 | Char ch; |
---|
524 | short i, extranum, cursp, curst, curenz; |
---|
525 | |
---|
526 | if (!firstset) { |
---|
527 | if (eoln(infile)) { |
---|
528 | fscanf(infile, "%*[^\n]"); |
---|
529 | getc(infile); |
---|
530 | } |
---|
531 | fscanf(infile, "%hd%hd%hd", &cursp, &curst, &curenz); |
---|
532 | if (cursp != numsp) { |
---|
533 | printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4hd\n", |
---|
534 | ith); |
---|
535 | exit(-1); |
---|
536 | } |
---|
537 | if (curenz != enzymes) { |
---|
538 | printf("\nERROR: INCONSISTENT NUMBER OF ENZYMES IN DATA SET %4hd\n", |
---|
539 | ith); |
---|
540 | exit(-1); |
---|
541 | } |
---|
542 | sites = curst; |
---|
543 | } |
---|
544 | for (i = 1; i <= sites; i++) |
---|
545 | weight[i] = 1; |
---|
546 | weightsum = sites; |
---|
547 | extranum = 0; |
---|
548 | while (!(eoln(infile))) { |
---|
549 | ch = getc(infile); |
---|
550 | if (ch == '\n') |
---|
551 | ch = ' '; |
---|
552 | uppercase(&ch); |
---|
553 | if (ch == 'W') |
---|
554 | extranum++; |
---|
555 | else if (ch != ' ') { |
---|
556 | printf("BAD OPTION CHARACTER: %c\n", ch); |
---|
557 | exit(-1); |
---|
558 | } |
---|
559 | } |
---|
560 | fscanf(infile, "%*[^\n]"); |
---|
561 | getc(infile); |
---|
562 | for (i = 1; i <= extranum; i++) { |
---|
563 | ch = getc(infile); |
---|
564 | if (ch == '\n') |
---|
565 | ch = ' '; |
---|
566 | uppercase(&ch); |
---|
567 | if (ch != 'W') { |
---|
568 | printf("ERROR: INCORRECT AUXILIARY OPTIONS LINE"); |
---|
569 | printf(" WHICH STARTS WITH %c\n", ch); |
---|
570 | exit(-1); |
---|
571 | } |
---|
572 | else |
---|
573 | inputweights(); |
---|
574 | } |
---|
575 | fprintf(outfile, "\n Recognition sequences all%2hd bases long\n", |
---|
576 | sitelength); |
---|
577 | if (trunc8) |
---|
578 | fprintf(outfile, |
---|
579 | "\nSites absent from all species are assumed to have been omitted\n\n"); |
---|
580 | if (weights) |
---|
581 | printweights(); |
---|
582 | } /* inputoptions */ |
---|
583 | |
---|
584 | void setuptree(a) |
---|
585 | tree *a; |
---|
586 | { |
---|
587 | /* set up data structures for a tree */ |
---|
588 | short i, j; |
---|
589 | node *p; |
---|
590 | |
---|
591 | for (i = 1; i <= numsp; i++) { |
---|
592 | a->nodep[i - 1]->tip = true; |
---|
593 | a->nodep[i - 1]->iter = true; |
---|
594 | a->nodep[i - 1]->number = i; |
---|
595 | } |
---|
596 | for (i = numsp1; i <= numsp2; i++) { |
---|
597 | p = a->nodep[i - 1]; |
---|
598 | for (j = 1; j <= 3; j++) { |
---|
599 | p->tip = false; |
---|
600 | p->iter = true; |
---|
601 | p->number = i; |
---|
602 | p = p->next; |
---|
603 | } |
---|
604 | } |
---|
605 | a->likelihood = -999999.0; |
---|
606 | a->start = a->nodep[0]; |
---|
607 | } /* setuptree */ |
---|
608 | |
---|
609 | |
---|
610 | void getdata() |
---|
611 | { |
---|
612 | /* read the species and sites data */ |
---|
613 | short i, j, k, l, sitesread, sitesnew; |
---|
614 | Char ch; |
---|
615 | boolean allread, done; |
---|
616 | |
---|
617 | if (printdata) |
---|
618 | putc('\n', outfile); |
---|
619 | j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; |
---|
620 | if (j < nmlngth - 1) |
---|
621 | j = nmlngth - 1; |
---|
622 | if (j > 39) |
---|
623 | j = 39; |
---|
624 | if (printdata) { |
---|
625 | fprintf(outfile, "Name"); |
---|
626 | for (i = 1; i <= j; i++) |
---|
627 | putc(' ', outfile); |
---|
628 | fprintf(outfile, "Sites\n"); |
---|
629 | fprintf(outfile, "----"); |
---|
630 | for (i = 1; i <= j; i++) |
---|
631 | putc(' ', outfile); |
---|
632 | fprintf(outfile, "-----\n\n"); |
---|
633 | } |
---|
634 | setuptree(&curtree); |
---|
635 | sitesread = 0; |
---|
636 | allread = false; |
---|
637 | while (!(allread)) { |
---|
638 | allread = true; |
---|
639 | if (eoln(infile)) { |
---|
640 | fscanf(infile, "%*[^\n]"); |
---|
641 | getc(infile); |
---|
642 | } |
---|
643 | i = 1; |
---|
644 | while (i <= numsp ) { |
---|
645 | if ((interleaved && sitesread == 0) || !interleaved) { |
---|
646 | for (j = 0; j < nmlngth; j++) { |
---|
647 | if (eof(infile) | eoln(infile)){ |
---|
648 | printf("ERROR: END-OF-LINE OR END-OF-FILE"); |
---|
649 | printf(" IN THE MIDDLE OF A SPECIES NAME\n"); |
---|
650 | exit(-1); |
---|
651 | } |
---|
652 | curtree.nodep[i - 1]->nayme[j] = getc(infile); |
---|
653 | } |
---|
654 | } |
---|
655 | if (interleaved) |
---|
656 | j = sitesread; |
---|
657 | else |
---|
658 | j = 0; |
---|
659 | done = false; |
---|
660 | while (!done && !eof(infile)) { |
---|
661 | if (interleaved) |
---|
662 | done = true; |
---|
663 | while (j < sites && !(eoln(infile) || eof(infile))) { |
---|
664 | ch = getc(infile); |
---|
665 | if (ch == '\n') |
---|
666 | ch = ' '; |
---|
667 | if (ch == ' ') |
---|
668 | continue; |
---|
669 | uppercase(&ch); |
---|
670 | if (ch != '1' && ch != '0' && ch != '+' && ch != '-' && ch != '?' && |
---|
671 | ch != '.') { |
---|
672 | printf(" WARNING -- BAD SYMBOL %c",ch); |
---|
673 | printf(" AT POSITION %5hd OF SPECIES %3hd\n",j,i); |
---|
674 | exit(-1); |
---|
675 | } |
---|
676 | if (ch == '1') |
---|
677 | ch = '+'; |
---|
678 | if (ch == '0') |
---|
679 | ch = '-'; |
---|
680 | j++; |
---|
681 | if (ch == '.') |
---|
682 | ch = y[0][j - 1]; |
---|
683 | y[i - 1][j - 1] = ch; |
---|
684 | } |
---|
685 | if (interleaved) |
---|
686 | continue; |
---|
687 | if (j < sites) { |
---|
688 | fscanf(infile, "%*[^\n]"); |
---|
689 | getc(infile); |
---|
690 | } else if (j == sites) |
---|
691 | done = true; |
---|
692 | } |
---|
693 | if (interleaved && i == 1) |
---|
694 | sitesnew = j; |
---|
695 | fscanf(infile, "%*[^\n]"); |
---|
696 | getc(infile); |
---|
697 | if ((interleaved && j != sitesnew ) || ((!interleaved) && j != sites)){ |
---|
698 | printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n"); |
---|
699 | exit(-1);} |
---|
700 | i++; |
---|
701 | } |
---|
702 | if (interleaved) { |
---|
703 | sitesread = sitesnew; |
---|
704 | allread = (sitesread == sites); |
---|
705 | } else |
---|
706 | allread = (i > numsp); |
---|
707 | } |
---|
708 | if (printdata) { |
---|
709 | for (i = 1; i <= ((sites - 1) / 60 + 1); i++) { |
---|
710 | for (j = 0; j < numsp; j++) { |
---|
711 | for (k = 0; k < nmlngth; k++) |
---|
712 | putc(curtree.nodep[j]->nayme[k], outfile); |
---|
713 | fprintf(outfile, " "); |
---|
714 | l = i * 60; |
---|
715 | if (l > sites) |
---|
716 | l = sites; |
---|
717 | for (k = (i - 1) * 60 + 1; k <= l; k++) { |
---|
718 | putc(y[j][k - 1], outfile); |
---|
719 | if (k % 10 == 0 && k % 60 != 0) |
---|
720 | putc(' ', outfile); |
---|
721 | } |
---|
722 | putc('\n', outfile); |
---|
723 | } |
---|
724 | putc('\n', outfile); |
---|
725 | } |
---|
726 | putc('\n', outfile); |
---|
727 | } |
---|
728 | putc('\n', outfile); |
---|
729 | if (!usertree) { |
---|
730 | setuptree(&priortree); |
---|
731 | setuptree(&bestree); |
---|
732 | if (njumble > 1) setuptree(&bestree2); |
---|
733 | } |
---|
734 | } /* getdata */ |
---|
735 | |
---|
736 | void sitesort() |
---|
737 | { |
---|
738 | /* Shell sort keeping alias, aliasweight in same order */ |
---|
739 | short gap, i, j, jj, jg, k, itemp; |
---|
740 | boolean flip, tied; |
---|
741 | |
---|
742 | gap = sites / 2; |
---|
743 | while (gap > 0) { |
---|
744 | for (i = gap + 1; i <= sites; i++) { |
---|
745 | j = i - gap; |
---|
746 | flip = true; |
---|
747 | while (j > 0 && flip) { |
---|
748 | jj = alias[j]; |
---|
749 | jg = alias[j + gap]; |
---|
750 | flip = false; |
---|
751 | tied = true; |
---|
752 | k = 1; |
---|
753 | while (k <= numsp && tied) { |
---|
754 | flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); |
---|
755 | tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); |
---|
756 | k++; |
---|
757 | } |
---|
758 | if (tied) { |
---|
759 | aliasweight[j] += aliasweight[j + gap]; |
---|
760 | aliasweight[j + gap] = 0; |
---|
761 | } |
---|
762 | if (!flip) |
---|
763 | break; |
---|
764 | itemp = alias[j]; |
---|
765 | alias[j] = alias[j + gap]; |
---|
766 | alias[j + gap] = itemp; |
---|
767 | itemp = aliasweight[j]; |
---|
768 | aliasweight[j] = aliasweight[j + gap]; |
---|
769 | aliasweight[j + gap] = itemp; |
---|
770 | j -= gap; |
---|
771 | } |
---|
772 | } |
---|
773 | gap /= 2; |
---|
774 | } |
---|
775 | } /* sitesort */ |
---|
776 | |
---|
777 | void sitecombine() |
---|
778 | { |
---|
779 | /* combine sites that have identical patterns */ |
---|
780 | short i, j, k; |
---|
781 | boolean tied; |
---|
782 | |
---|
783 | i = 1; |
---|
784 | while (i < sites) { |
---|
785 | j = i + 1; |
---|
786 | tied = true; |
---|
787 | while (j <= sites && tied) { |
---|
788 | k = 1; |
---|
789 | while (k <= numsp && tied) { |
---|
790 | tied = (tied && y[k - 1][alias[i] - 1] == y[k - 1][alias[j] - 1]); |
---|
791 | k++; |
---|
792 | } |
---|
793 | if (tied && aliasweight[j] > 0) { |
---|
794 | aliasweight[i] += aliasweight[j]; |
---|
795 | aliasweight[j] = 0; |
---|
796 | alias[j] = alias[i]; |
---|
797 | } |
---|
798 | j++; |
---|
799 | } |
---|
800 | i = j - 1; |
---|
801 | } |
---|
802 | } /* sitecombine */ |
---|
803 | |
---|
804 | void sitescrunch() |
---|
805 | { |
---|
806 | /* move so positively weighted sites come first */ |
---|
807 | short i, j, itemp; |
---|
808 | boolean done, found; |
---|
809 | |
---|
810 | done = false; |
---|
811 | i = 1; |
---|
812 | j = 2; |
---|
813 | while (!done) { |
---|
814 | found = false; |
---|
815 | if (aliasweight[i] > 0) |
---|
816 | i++; |
---|
817 | else { |
---|
818 | if (j <= i) |
---|
819 | j = i + 1; |
---|
820 | if (j <= sites) { |
---|
821 | found = false; |
---|
822 | do { |
---|
823 | found = (aliasweight[j] > 0); |
---|
824 | j++; |
---|
825 | } while (!(found || j > sites)); |
---|
826 | if (found) { |
---|
827 | j--; |
---|
828 | itemp = alias[i]; |
---|
829 | alias[i] = alias[j]; |
---|
830 | alias[j] = itemp; |
---|
831 | itemp = aliasweight[i]; |
---|
832 | aliasweight[i] = aliasweight[j]; |
---|
833 | aliasweight[j] = itemp; |
---|
834 | } else |
---|
835 | done = true; |
---|
836 | } else |
---|
837 | done = true; |
---|
838 | } |
---|
839 | done = (done || i >= sites); |
---|
840 | } |
---|
841 | } /* sitescrunch */ |
---|
842 | |
---|
843 | void makeweights() |
---|
844 | { |
---|
845 | /* make up weights vector to avoid duplicate computations */ |
---|
846 | short i,j; |
---|
847 | node *p; |
---|
848 | |
---|
849 | for (i = 1; i <= sites; i++) { |
---|
850 | alias[i] = i; |
---|
851 | aliasweight[i] = weight[i]; |
---|
852 | } |
---|
853 | sitesort(); |
---|
854 | sitecombine(); |
---|
855 | sitescrunch(); |
---|
856 | for (i = 1; i <= sites; i++) { |
---|
857 | weight[i] = aliasweight[i]; |
---|
858 | if (weight[i] > 0) |
---|
859 | endsite = i; |
---|
860 | } |
---|
861 | weight[0] = 1; |
---|
862 | for (i = 0; i < numsp; i++) |
---|
863 | curtree.nodep[i]->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
864 | for (i = numsp; i < numsp2; i++) { |
---|
865 | p = curtree.nodep[i]; |
---|
866 | for (j = 1; j <= 3; j++) { |
---|
867 | p->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
868 | p = p->next; |
---|
869 | } |
---|
870 | } |
---|
871 | if (!usertree) { |
---|
872 | for (i = 0; i < numsp; i++) |
---|
873 | priortree.nodep[i]->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
874 | for (i = numsp; i < numsp2; i++) { |
---|
875 | p = priortree.nodep[i]; |
---|
876 | for (j = 1; j <= 3; j++) { |
---|
877 | p->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
878 | p = p->next; |
---|
879 | } |
---|
880 | } |
---|
881 | for (i = 0; i < numsp; i++) |
---|
882 | bestree.nodep[i]->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
883 | for (i = numsp; i < numsp2; i++) { |
---|
884 | p = bestree.nodep[i]; |
---|
885 | for (j = 1; j <= 3; j++) { |
---|
886 | p->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
887 | p = p->next; |
---|
888 | } |
---|
889 | } |
---|
890 | if (njumble > 1) { |
---|
891 | for (i = 0; i < numsp; i++) |
---|
892 | bestree2.nodep[i]->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
893 | for (i = numsp; i < numsp2; i++) { |
---|
894 | p = bestree2.nodep[i]; |
---|
895 | for (j = 1; j <= 3; j++) { |
---|
896 | p->x = (phenotype)Malloc((endsite+1)*sizeof(sitelike)); |
---|
897 | p = p->next; |
---|
898 | } |
---|
899 | } |
---|
900 | } |
---|
901 | } |
---|
902 | } /* makeweights */ |
---|
903 | |
---|
904 | void makevalues() |
---|
905 | { |
---|
906 | /* set up fractional likelihoods at tips */ |
---|
907 | short i, j, k, l, m; |
---|
908 | boolean found; |
---|
909 | |
---|
910 | for (k = 1; k <= endsite; k++) { |
---|
911 | j = alias[k]; |
---|
912 | for (i = 0; i < numsp; i++) { |
---|
913 | for (l = 0; l <= sitelength; l++) |
---|
914 | curtree.nodep[i]->x[k][l] = 1.0; |
---|
915 | switch (y[i][j - 1]) { |
---|
916 | |
---|
917 | case '+': |
---|
918 | for (m = 1; m <= sitelength; m++) |
---|
919 | curtree.nodep[i]->x[k][m] = 0.0; |
---|
920 | break; |
---|
921 | |
---|
922 | case '-': |
---|
923 | curtree.nodep[i]->x[k][0] = 0.0; |
---|
924 | break; |
---|
925 | |
---|
926 | case '?': |
---|
927 | /* blank case */ |
---|
928 | break; |
---|
929 | } |
---|
930 | } |
---|
931 | } |
---|
932 | for (i = 0; i < numsp; i++) { |
---|
933 | for (k = 1; k <= sitelength; k++) |
---|
934 | curtree.nodep[i]->x[0][k] = 1.0; |
---|
935 | curtree.nodep[i]->x[0][0] = 0.0; |
---|
936 | } |
---|
937 | if (trunc8) |
---|
938 | return; |
---|
939 | found = false; |
---|
940 | i = 1; |
---|
941 | while (!found && i <= endsite) { |
---|
942 | found = true; |
---|
943 | for (k = 0; k < numsp; k++) |
---|
944 | found = (found && y[k][alias[i] - 1] == '-'); |
---|
945 | if (!found) |
---|
946 | i++; |
---|
947 | } |
---|
948 | if (found) { |
---|
949 | weightsum += (enzymes - 1) * weight[i]; |
---|
950 | weight[i] *= enzymes; |
---|
951 | } |
---|
952 | } /* makevalues */ |
---|
953 | |
---|
954 | |
---|
955 | void getinput() |
---|
956 | { |
---|
957 | /* reads the input data */ |
---|
958 | inputoptions(); |
---|
959 | getdata(); |
---|
960 | makeweights(); |
---|
961 | makevalues(); |
---|
962 | } /* getinput */ |
---|
963 | |
---|
964 | |
---|
965 | main(argc, argv) |
---|
966 | int argc; |
---|
967 | Char *argv[]; |
---|
968 | { /* maximum likelihood phylogenies from restriction sites */ |
---|
969 | char infilename[100],outfilename[100],trfilename[100]; |
---|
970 | #ifdef MAC |
---|
971 | macsetup("Restml",""); |
---|
972 | #endif |
---|
973 | openfile(&infile,INFILE,"r",argv[0],infilename); |
---|
974 | openfile(&outfile,OUTFILE,"w",argv[0],outfilename); |
---|
975 | ibmpc = ibmpc0; |
---|
976 | ansi = ansi0; |
---|
977 | vt52 = vt520; |
---|
978 | mulsets = false; |
---|
979 | datasets = 1; |
---|
980 | firstset = true; |
---|
981 | doinit(); |
---|
982 | if (trout) |
---|
983 | openfile(&treefile,TREEFILE,"w",argv[0],trfilename); |
---|
984 | enterorder = (short *)Malloc(numsp*sizeof(short)); |
---|
985 | weight = (short *)Malloc((sites+1)*sizeof(short)); |
---|
986 | alias = (short *)Malloc((sites+1)*sizeof(short)); |
---|
987 | aliasweight = (short *)Malloc((sites+1)*sizeof(short)); |
---|
988 | for (ith = 1; ith <= datasets; ith++) { |
---|
989 | if (datasets > 1) { |
---|
990 | fprintf(outfile, "Data set # %hd:\n",ith); |
---|
991 | if (progress) |
---|
992 | printf("\nData set # %hd:\n",ith); |
---|
993 | } |
---|
994 | getinput(); |
---|
995 | if (ith == 1) |
---|
996 | firstset = false; |
---|
997 | for (jumb = 1; jumb <= njumble; jumb++) |
---|
998 | maketree(); |
---|
999 | } |
---|
1000 | FClose(infile); |
---|
1001 | FClose(outfile); |
---|
1002 | FClose(treefile); |
---|
1003 | #ifdef MAC |
---|
1004 | fixmacfile(outfilename); |
---|
1005 | fixmacfile(trfilename); |
---|
1006 | #endif |
---|
1007 | exit(0); |
---|
1008 | } /* maximum likelihood phylogenies from restriction sites */ |
---|
1009 | |
---|
1010 | /* |
---|
1011 | misc. support routines. Some of these should eventually be removed |
---|
1012 | */ |
---|
1013 | |
---|
1014 | int eof(f) |
---|
1015 | FILE *f; |
---|
1016 | { |
---|
1017 | register int ch; |
---|
1018 | |
---|
1019 | if (feof(f)) |
---|
1020 | return 1; |
---|
1021 | if (f == stdin) |
---|
1022 | return 0; |
---|
1023 | ch = getc(f); |
---|
1024 | if (ch == EOF) |
---|
1025 | return 1; |
---|
1026 | ungetc(ch, f); |
---|
1027 | return 0; |
---|
1028 | } |
---|
1029 | |
---|
1030 | |
---|
1031 | int eoln(f) |
---|
1032 | FILE *f; |
---|
1033 | { |
---|
1034 | register int ch; |
---|
1035 | |
---|
1036 | ch = getc(f); |
---|
1037 | if (ch == EOF) |
---|
1038 | return 1; |
---|
1039 | ungetc(ch, f); |
---|
1040 | return (ch == '\n'); |
---|
1041 | } |
---|
1042 | |
---|
1043 | void memerror() |
---|
1044 | { |
---|
1045 | printf("Error allocating memory\n"); |
---|
1046 | exit(-1); |
---|
1047 | } |
---|
1048 | |
---|
1049 | MALLOCRETURN *mymalloc(x) |
---|
1050 | long x; |
---|
1051 | { |
---|
1052 | MALLOCRETURN *mem; |
---|
1053 | mem = (MALLOCRETURN *)calloc(1,x); |
---|
1054 | if (!mem) |
---|
1055 | memerror(); |
---|
1056 | else |
---|
1057 | return (MALLOCRETURN *)mem; |
---|
1058 | } |
---|
1059 | |
---|