1 | |
---|
2 | /* version 3.6. (c) Copyright 1993-2002 by the University of Washington. |
---|
3 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
---|
4 | Permission is granted to copy and use this program provided no fee is |
---|
5 | charged for it and provided that this copyright notice is not removed. */ |
---|
6 | |
---|
7 | #include "phylip.h" |
---|
8 | #include "seq.h" |
---|
9 | |
---|
10 | #define initialv 0.1 /* starting value of branch length */ |
---|
11 | |
---|
12 | #define over 60 /* maximum width of a tree on screen */ |
---|
13 | |
---|
14 | extern sequence y; |
---|
15 | |
---|
16 | #ifndef OLDC |
---|
17 | /* function prototypes */ |
---|
18 | void restml_inputnumbers(void); |
---|
19 | void getoptions(void); |
---|
20 | void allocrest(void); |
---|
21 | void setuppie(void); |
---|
22 | void doinit(void); |
---|
23 | void inputoptions(void); |
---|
24 | void restml_inputdata(void); |
---|
25 | void restml_sitesort(void); |
---|
26 | void restml_sitecombine(void); |
---|
27 | void makeweights(void); |
---|
28 | |
---|
29 | void restml_makevalues(void); |
---|
30 | void getinput(void); |
---|
31 | void copymatrix(transmatrix, transmatrix); |
---|
32 | void maketrans(double, boolean); |
---|
33 | void branchtrans(long, double); |
---|
34 | double evaluate(tree *, node *); |
---|
35 | void nuview(node *); |
---|
36 | void makenewv(node *); |
---|
37 | void update(node *); |
---|
38 | void smooth(node *); |
---|
39 | |
---|
40 | void insert_(node *p, node *); |
---|
41 | void restml_re_move(node **, node **); |
---|
42 | void restml_copynode(node *, node *); |
---|
43 | void restml_copy_(tree *, tree *); |
---|
44 | void buildnewtip(long , tree *); |
---|
45 | void buildsimpletree(tree *); |
---|
46 | void addtraverse(node *, node *, boolean); |
---|
47 | void rearrange(node *, node *); |
---|
48 | void restml_coordinates(node *, double, long *,double *, double *); |
---|
49 | void restml_printree(void); |
---|
50 | |
---|
51 | double sigma(node *, double *); |
---|
52 | void describe(node *); |
---|
53 | void summarize(void); |
---|
54 | void restml_treeout(node *); |
---|
55 | void inittravtree(node *); |
---|
56 | void treevaluate(void); |
---|
57 | void maketree(void); |
---|
58 | /* function prototypes */ |
---|
59 | #endif |
---|
60 | |
---|
61 | |
---|
62 | Char infilename[FNMLNGTH], outfilename[FNMLNGTH], intreename[FNMLNGTH], outtreename[FNMLNGTH]; |
---|
63 | long nonodes2, sites, enzymes, weightsum, sitelength, datasets, |
---|
64 | ith, njumble, jumb=0; |
---|
65 | long inseed, inseed0; |
---|
66 | boolean global, jumble, lengths, weights, trout, trunc8, usertree, |
---|
67 | reconsider, progress, mulsets, firstset, improve, smoothit; |
---|
68 | double bestyet; |
---|
69 | tree curtree, priortree, bestree, bestree2; |
---|
70 | longer seed; |
---|
71 | long *enterorder; |
---|
72 | steptr aliasweight; |
---|
73 | char *progname; |
---|
74 | node *qwhere; |
---|
75 | |
---|
76 | /* Local variables for maketree, propagated globally for C version: */ |
---|
77 | long nextsp, numtrees, maxwhich, col; |
---|
78 | double maxlogl; |
---|
79 | boolean succeeded, smoothed; |
---|
80 | transmatrix tempmatrix, temp2matrix, temp3matrix, |
---|
81 | temp4matrix, temp5matrix, tempslope, tempcurve; |
---|
82 | sitelike2 pie; |
---|
83 | double *l0gl; |
---|
84 | double **l0gf; |
---|
85 | Char global_ch; |
---|
86 | |
---|
87 | /* variables added to keep treeread2() happy */ |
---|
88 | boolean goteof; |
---|
89 | double trweight; |
---|
90 | boolean haslengths; |
---|
91 | |
---|
92 | |
---|
93 | void restml_inputnumbers() |
---|
94 | { |
---|
95 | /* read and print out numbers of species and sites */ |
---|
96 | fscanf(infile, "%ld%ld%ld", &spp, &sites, &enzymes); |
---|
97 | nonodes2 = spp * 2 - 2; |
---|
98 | } /* restml_inputnumbers */ |
---|
99 | |
---|
100 | |
---|
101 | void getoptions() |
---|
102 | { |
---|
103 | /* interactively set options */ |
---|
104 | long loopcount, loopcount2; |
---|
105 | Char ch; |
---|
106 | |
---|
107 | fprintf(outfile, "\nRestriction site Maximum Likelihood"); |
---|
108 | fprintf(outfile, " method, version %s\n\n",VERSION); |
---|
109 | putchar('\n'); |
---|
110 | sitelength = 6; |
---|
111 | trunc8 = true; |
---|
112 | global = false; |
---|
113 | improve = false; |
---|
114 | jumble = false; |
---|
115 | njumble = 1; |
---|
116 | lengths = false; |
---|
117 | outgrno = 1; |
---|
118 | outgropt = false; |
---|
119 | reconsider = false; |
---|
120 | trout = true; |
---|
121 | usertree = false; |
---|
122 | weights = false; |
---|
123 | printdata = false; |
---|
124 | progress = true; |
---|
125 | treeprint = true; |
---|
126 | interleaved = true; |
---|
127 | loopcount = 0; |
---|
128 | for (;;) { |
---|
129 | cleerhome(); |
---|
130 | printf("\nRestriction site Maximum Likelihood"); |
---|
131 | printf(" method, version %s\n\n",VERSION); |
---|
132 | printf("Settings for this run:\n"); |
---|
133 | printf(" U Search for best tree? %s\n", |
---|
134 | (usertree ? "No, use user trees in input file" : "Yes")); |
---|
135 | if (usertree) { |
---|
136 | printf(" N Use lengths from user trees? %s\n", |
---|
137 | (lengths ? "Yes" : "No")); |
---|
138 | } |
---|
139 | printf(" A Are all sites detected? %s\n", |
---|
140 | (trunc8 ? "No" : "Yes")); |
---|
141 | if (!usertree) { |
---|
142 | printf(" S Speedier but rougher analysis? %s\n", |
---|
143 | (improve ? "No, not rough" : "Yes")); |
---|
144 | printf(" G Global rearrangements? %s\n", |
---|
145 | (global ? "Yes" : "No")); |
---|
146 | printf(" J Randomize input order of sequences? "); |
---|
147 | if (jumble) |
---|
148 | printf("Yes (seed =%8ld,%3ld times)\n", inseed0, njumble); |
---|
149 | else |
---|
150 | printf("No. Use input order\n"); |
---|
151 | } |
---|
152 | else |
---|
153 | printf(" V Rearrange starting with user tree? %s\n", |
---|
154 | (reconsider ? "Yes" : "No")); |
---|
155 | printf(" L Site length?%3ld\n",sitelength); |
---|
156 | printf(" O Outgroup root? %s%3ld\n", |
---|
157 | (outgropt ? "Yes, at sequence number" : |
---|
158 | "No, use as outgroup species"),outgrno); |
---|
159 | |
---|
160 | printf(" M Analyze multiple data sets?"); |
---|
161 | if (mulsets) |
---|
162 | printf(" Yes, %2ld sets\n", datasets); |
---|
163 | else |
---|
164 | printf(" No\n"); |
---|
165 | printf(" I Input sequences interleaved? %s\n", |
---|
166 | (interleaved ? "Yes" : "No, sequential")); |
---|
167 | printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", |
---|
168 | ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"); |
---|
169 | printf(" 1 Print out the data at start of run %s\n", |
---|
170 | (printdata ? "Yes" : "No")); |
---|
171 | printf(" 2 Print indications of progress of run %s\n", |
---|
172 | (progress ? "Yes" : "No")); |
---|
173 | printf(" 3 Print out tree %s\n", |
---|
174 | (treeprint ? "Yes" : "No")); |
---|
175 | printf(" 4 Write out trees onto tree file? %s\n", |
---|
176 | (trout ? "Yes" : "No")); |
---|
177 | printf("\n Y to accept these or type the letter for one to change\n"); |
---|
178 | scanf("%c%*[^\n]", &ch); |
---|
179 | getchar(); |
---|
180 | if (ch == '\n') |
---|
181 | ch = ' '; |
---|
182 | uppercase(&ch); |
---|
183 | if (ch == 'Y') |
---|
184 | break; |
---|
185 | if (strchr("UNASGJVLOTMI01234",ch) != NULL){ |
---|
186 | switch (ch) { |
---|
187 | |
---|
188 | case 'A': |
---|
189 | trunc8 = !trunc8; |
---|
190 | break; |
---|
191 | |
---|
192 | case 'S': |
---|
193 | improve = !improve; |
---|
194 | break; |
---|
195 | |
---|
196 | case 'G': |
---|
197 | global = !global; |
---|
198 | break; |
---|
199 | |
---|
200 | case 'J': |
---|
201 | jumble = !jumble; |
---|
202 | if (jumble) |
---|
203 | initjumble(&inseed, &inseed0, seed, &njumble); |
---|
204 | else njumble = 1; |
---|
205 | break; |
---|
206 | |
---|
207 | case 'L': |
---|
208 | loopcount2 = 0; |
---|
209 | do { |
---|
210 | printf("New Sitelength?\n"); |
---|
211 | scanf("%ld%*[^\n]", &sitelength); |
---|
212 | getchar(); |
---|
213 | if (sitelength < 1) |
---|
214 | printf("BAD RESTRICTION SITE LENGTH: %ld\n", sitelength); |
---|
215 | countup(&loopcount2, 10); |
---|
216 | } while (sitelength < 1); |
---|
217 | break; |
---|
218 | |
---|
219 | case 'N': |
---|
220 | lengths = !lengths; |
---|
221 | break; |
---|
222 | |
---|
223 | case 'O': |
---|
224 | outgropt = !outgropt; |
---|
225 | if (outgropt) |
---|
226 | initoutgroup(&outgrno, spp); |
---|
227 | else outgrno = 1; |
---|
228 | break; |
---|
229 | |
---|
230 | case 'U': |
---|
231 | usertree = !usertree; |
---|
232 | break; |
---|
233 | |
---|
234 | case 'V': |
---|
235 | reconsider = !reconsider; |
---|
236 | break; |
---|
237 | |
---|
238 | case 'M': |
---|
239 | mulsets = !mulsets; |
---|
240 | if (mulsets) |
---|
241 | initdatasets(&datasets); |
---|
242 | break; |
---|
243 | |
---|
244 | case 'I': |
---|
245 | interleaved = !interleaved; |
---|
246 | break; |
---|
247 | |
---|
248 | case '0': |
---|
249 | initterminal(&ibmpc, &ansi); |
---|
250 | break; |
---|
251 | |
---|
252 | case '1': |
---|
253 | printdata = !printdata; |
---|
254 | break; |
---|
255 | |
---|
256 | case '2': |
---|
257 | progress = !progress; |
---|
258 | break; |
---|
259 | |
---|
260 | case '3': |
---|
261 | treeprint = !treeprint; |
---|
262 | break; |
---|
263 | |
---|
264 | case '4': |
---|
265 | trout = !trout; |
---|
266 | break; |
---|
267 | } |
---|
268 | } else |
---|
269 | printf("Not a possible option!\n"); |
---|
270 | countup(&loopcount, 100); |
---|
271 | } |
---|
272 | } /* getoptions */ |
---|
273 | |
---|
274 | |
---|
275 | void allocrest() |
---|
276 | { |
---|
277 | long i; |
---|
278 | |
---|
279 | y = (Char **)Malloc(spp*sizeof(Char *)); |
---|
280 | for (i = 0; i < spp; i++) |
---|
281 | y[i] = (Char *)Malloc(sites*sizeof(Char)); |
---|
282 | nayme = (naym *)Malloc(spp*sizeof(naym)); |
---|
283 | enterorder = (long *)Malloc(spp*sizeof(long)); |
---|
284 | weight = (steptr)Malloc((sites+1)*sizeof(long)); |
---|
285 | alias = (steptr)Malloc((sites+1)*sizeof(long)); |
---|
286 | aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); |
---|
287 | } /* allocrest */ |
---|
288 | |
---|
289 | |
---|
290 | void setuppie() |
---|
291 | { |
---|
292 | /* set up equilibrium probabilities of being a given |
---|
293 | number of bases away from a restriction site */ |
---|
294 | long i; |
---|
295 | double sum; |
---|
296 | |
---|
297 | pie[0] = 1.0; |
---|
298 | sum = pie[0]; |
---|
299 | for (i = 1; i <= sitelength; i++) { |
---|
300 | pie[i] = 3 * pie[i - 1] * (sitelength - i + 1) / i; |
---|
301 | sum += pie[i]; |
---|
302 | } |
---|
303 | for (i = 0; i <= sitelength; i++) |
---|
304 | pie[i] /= sum; |
---|
305 | } /* setuppie */ |
---|
306 | |
---|
307 | |
---|
308 | void doinit() |
---|
309 | { |
---|
310 | /* initializes variables */ |
---|
311 | long i; |
---|
312 | |
---|
313 | restml_inputnumbers(); |
---|
314 | getoptions(); |
---|
315 | if (printdata) |
---|
316 | fprintf(outfile, "%4ld Species, %4ld Sites,%4ld Enzymes\n", |
---|
317 | spp, sites, enzymes); |
---|
318 | tempmatrix = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
319 | for (i=0; i<=sitelength; i++) |
---|
320 | tempmatrix[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
321 | temp2matrix = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
322 | for (i=0; i<=sitelength; i++) |
---|
323 | temp2matrix[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
324 | temp3matrix = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
325 | for (i=0; i<=sitelength; i++) |
---|
326 | temp3matrix[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
327 | temp4matrix = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
328 | for (i=0; i<=sitelength; i++) |
---|
329 | temp4matrix[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
330 | temp5matrix = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
331 | for (i=0; i<=sitelength; i++) |
---|
332 | temp5matrix[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
333 | tempslope = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
334 | for (i=0; i<=sitelength; i++) |
---|
335 | tempslope[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
336 | tempcurve = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
337 | for (i=0; i<=sitelength; i++) |
---|
338 | tempcurve[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
339 | setuppie(); |
---|
340 | alloctrans(&curtree.trans, nonodes2, sitelength); |
---|
341 | alloctree(&curtree.nodep, nonodes2, false); |
---|
342 | allocrest(); |
---|
343 | if (usertree && !reconsider) |
---|
344 | return; |
---|
345 | alloctrans(&bestree.trans, nonodes2, sitelength); |
---|
346 | alloctree(&bestree.nodep, nonodes2, 0); |
---|
347 | alloctrans(&priortree.trans, nonodes2, sitelength); |
---|
348 | alloctree(&priortree.nodep, nonodes2, 0); |
---|
349 | if (njumble == 1) return; |
---|
350 | alloctrans(&bestree2.trans, nonodes2, sitelength); |
---|
351 | alloctree(&bestree2.nodep, nonodes2, 0); |
---|
352 | } /* doinit */ |
---|
353 | |
---|
354 | |
---|
355 | void inputoptions() |
---|
356 | { |
---|
357 | /* read the options information */ |
---|
358 | Char ch; |
---|
359 | long i, extranum, cursp, curst, curenz; |
---|
360 | |
---|
361 | if (!firstset) { |
---|
362 | if (eoln(infile)) |
---|
363 | scan_eoln(infile); |
---|
364 | fscanf(infile, "%ld%ld%ld", &cursp, &curst, &curenz); |
---|
365 | if (cursp != spp) { |
---|
366 | printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4ld\n", |
---|
367 | ith); |
---|
368 | exxit(-1); |
---|
369 | } |
---|
370 | if (curenz != enzymes) { |
---|
371 | printf("\nERROR: INCONSISTENT NUMBER OF ENZYMES IN DATA SET %4ld\n", |
---|
372 | ith); |
---|
373 | exxit(-1); |
---|
374 | } |
---|
375 | sites = curst; |
---|
376 | } |
---|
377 | for (i = 1; i <= sites; i++) |
---|
378 | weight[i] = 1; |
---|
379 | weightsum = sites; |
---|
380 | extranum = 0; |
---|
381 | readoptions(&extranum, "W"); |
---|
382 | for (i = 1; i <= extranum; i++) { |
---|
383 | matchoptions(&ch, "W"); |
---|
384 | if (ch == 'W') |
---|
385 | inputweights2(1, sites+1, &weightsum, weight, &weights, "RESTML"); |
---|
386 | } |
---|
387 | fprintf(outfile, "\n Recognition sequences all%2ld bases long\n", |
---|
388 | sitelength); |
---|
389 | if (trunc8) |
---|
390 | fprintf(outfile, |
---|
391 | "\nSites absent from all species are assumed to have been omitted\n\n"); |
---|
392 | if (weights) |
---|
393 | printweights(outfile, 1, sites, weight, "Sites"); |
---|
394 | } /* inputoptions */ |
---|
395 | |
---|
396 | |
---|
397 | void restml_inputdata() |
---|
398 | { |
---|
399 | /* read the species and sites data */ |
---|
400 | long i, j, k, l, sitesread, sitesnew=0; |
---|
401 | Char ch; |
---|
402 | boolean allread, done; |
---|
403 | |
---|
404 | if (printdata) |
---|
405 | putc('\n', outfile); |
---|
406 | j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; |
---|
407 | if (j < nmlngth - 1) |
---|
408 | j = nmlngth - 1; |
---|
409 | if (j > 39) |
---|
410 | j = 39; |
---|
411 | if (printdata) { |
---|
412 | fprintf(outfile, "Name"); |
---|
413 | for (i = 1; i <= j; i++) |
---|
414 | putc(' ', outfile); |
---|
415 | fprintf(outfile, "Sites\n"); |
---|
416 | fprintf(outfile, "----"); |
---|
417 | for (i = 1; i <= j; i++) |
---|
418 | putc(' ', outfile); |
---|
419 | fprintf(outfile, "-----\n\n"); |
---|
420 | } |
---|
421 | sitesread = 0; |
---|
422 | allread = false; |
---|
423 | while (!(allread)) { |
---|
424 | allread = true; |
---|
425 | if (eoln(infile)) |
---|
426 | scan_eoln(infile); |
---|
427 | i = 1; |
---|
428 | while (i <= spp ) { |
---|
429 | if ((interleaved && sitesread == 0) || !interleaved) |
---|
430 | initname(i - 1); |
---|
431 | if (interleaved) |
---|
432 | j = sitesread; |
---|
433 | else |
---|
434 | j = 0; |
---|
435 | done = false; |
---|
436 | while (!done && !eoff(infile)) { |
---|
437 | if (interleaved) |
---|
438 | done = true; |
---|
439 | while (j < sites && !(eoln(infile) || eoff(infile))) { |
---|
440 | ch = gettc(infile); |
---|
441 | if (ch == '\n') |
---|
442 | ch = ' '; |
---|
443 | if (ch == ' ') |
---|
444 | continue; |
---|
445 | uppercase(&ch); |
---|
446 | if (ch != '1' && ch != '0' && ch != '+' && ch != '-' && ch != '?') { |
---|
447 | printf(" ERROR: Bad symbol %c", ch); |
---|
448 | printf(" at position %ld of species %ld\n", j+1, i); |
---|
449 | exxit(-1); |
---|
450 | } |
---|
451 | if (ch == '1') |
---|
452 | ch = '+'; |
---|
453 | if (ch == '0') |
---|
454 | ch = '-'; |
---|
455 | j++; |
---|
456 | y[i - 1][j - 1] = ch; |
---|
457 | } |
---|
458 | if (interleaved) |
---|
459 | continue; |
---|
460 | if (j < sites) |
---|
461 | scan_eoln(infile); |
---|
462 | else if (j == sites) |
---|
463 | done = true; |
---|
464 | } |
---|
465 | if (interleaved && i == 1) |
---|
466 | sitesnew = j; |
---|
467 | scan_eoln(infile); |
---|
468 | if ((interleaved && j != sitesnew ) || ((!interleaved) && j != sites)){ |
---|
469 | printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n"); |
---|
470 | exxit(-1);} |
---|
471 | i++; |
---|
472 | } |
---|
473 | if (interleaved) { |
---|
474 | sitesread = sitesnew; |
---|
475 | allread = (sitesread == sites); |
---|
476 | } else |
---|
477 | allread = (i > spp); |
---|
478 | } |
---|
479 | if (printdata) { |
---|
480 | for (i = 1; i <= ((sites - 1) / 60 + 1); i++) { |
---|
481 | for (j = 0; j < spp; j++) { |
---|
482 | for (k = 0; k < nmlngth; k++) |
---|
483 | putc(nayme[j][k], outfile); |
---|
484 | fprintf(outfile, " "); |
---|
485 | l = i * 60; |
---|
486 | if (l > sites) |
---|
487 | l = sites; |
---|
488 | for (k = (i - 1) * 60 + 1; k <= l; k++) { |
---|
489 | putc(y[j][k - 1], outfile); |
---|
490 | if (k % 10 == 0 && k % 60 != 0) |
---|
491 | putc(' ', outfile); |
---|
492 | } |
---|
493 | putc('\n', outfile); |
---|
494 | } |
---|
495 | putc('\n', outfile); |
---|
496 | } |
---|
497 | putc('\n', outfile); |
---|
498 | } |
---|
499 | putc('\n', outfile); |
---|
500 | } /* restml_inputdata */ |
---|
501 | |
---|
502 | |
---|
503 | void restml_sitesort() |
---|
504 | { |
---|
505 | /* Shell sort keeping alias, aliasweight in same order */ |
---|
506 | long gap, i, j, jj, jg, k, itemp; |
---|
507 | boolean flip, tied; |
---|
508 | |
---|
509 | gap = sites / 2; |
---|
510 | while (gap > 0) { |
---|
511 | for (i = gap + 1; i <= sites; i++) { |
---|
512 | j = i - gap; |
---|
513 | flip = true; |
---|
514 | while (j > 0 && flip) { |
---|
515 | jj = alias[j]; |
---|
516 | jg = alias[j + gap]; |
---|
517 | flip = false; |
---|
518 | tied = true; |
---|
519 | k = 1; |
---|
520 | while (k <= spp && tied) { |
---|
521 | flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]); |
---|
522 | tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]); |
---|
523 | k++; |
---|
524 | } |
---|
525 | if (tied) { |
---|
526 | aliasweight[j] += aliasweight[j + gap]; |
---|
527 | aliasweight[j + gap] = 0; |
---|
528 | } |
---|
529 | if (!flip) |
---|
530 | break; |
---|
531 | itemp = alias[j]; |
---|
532 | alias[j] = alias[j + gap]; |
---|
533 | alias[j + gap] = itemp; |
---|
534 | itemp = aliasweight[j]; |
---|
535 | aliasweight[j] = aliasweight[j + gap]; |
---|
536 | aliasweight[j + gap] = itemp; |
---|
537 | j -= gap; |
---|
538 | } |
---|
539 | } |
---|
540 | gap /= 2; |
---|
541 | } |
---|
542 | } /* restml_sitesort */ |
---|
543 | |
---|
544 | |
---|
545 | void restml_sitecombine() |
---|
546 | { |
---|
547 | /* combine sites that have identical patterns */ |
---|
548 | long i, j, k; |
---|
549 | boolean tied; |
---|
550 | |
---|
551 | i = 1; |
---|
552 | while (i < sites) { |
---|
553 | j = i + 1; |
---|
554 | tied = true; |
---|
555 | while (j <= sites && tied) { |
---|
556 | k = 1; |
---|
557 | while (k <= spp && tied) { |
---|
558 | tied = (tied && y[k - 1][alias[i] - 1] == y[k - 1][alias[j] - 1]); |
---|
559 | k++; |
---|
560 | } |
---|
561 | if (tied && aliasweight[j] > 0) { |
---|
562 | aliasweight[i] += aliasweight[j]; |
---|
563 | aliasweight[j] = 0; |
---|
564 | alias[j] = alias[i]; |
---|
565 | } |
---|
566 | j++; |
---|
567 | } |
---|
568 | i = j - 1; |
---|
569 | } |
---|
570 | } /* restml_sitecombine */ |
---|
571 | |
---|
572 | |
---|
573 | void makeweights() |
---|
574 | { |
---|
575 | /* make up weights vector to avoid duplicate computations */ |
---|
576 | long i; |
---|
577 | |
---|
578 | for (i = 1; i <= sites; i++) { |
---|
579 | alias[i] = i; |
---|
580 | aliasweight[i] = weight[i]; |
---|
581 | } |
---|
582 | restml_sitesort(); |
---|
583 | restml_sitecombine(); |
---|
584 | sitescrunch2(sites + 1, 2, 3, aliasweight); |
---|
585 | for (i = 1; i <= sites; i++) { |
---|
586 | weight[i] = aliasweight[i]; |
---|
587 | if (weight[i] > 0) |
---|
588 | endsite = i; |
---|
589 | } |
---|
590 | weight[0] = 1; |
---|
591 | } /* makeweights */ |
---|
592 | |
---|
593 | |
---|
594 | void restml_makevalues() |
---|
595 | { |
---|
596 | /* set up fractional likelihoods at tips */ |
---|
597 | long i, j, k, l, m; |
---|
598 | boolean found; |
---|
599 | |
---|
600 | for (k = 1; k <= endsite; k++) { |
---|
601 | j = alias[k]; |
---|
602 | for (i = 0; i < spp; i++) { |
---|
603 | for (l = 0; l <= sitelength; l++) |
---|
604 | curtree.nodep[i]->x2[k][l] = 1.0; |
---|
605 | switch (y[i][j - 1]) { |
---|
606 | |
---|
607 | case '+': |
---|
608 | for (m = 1; m <= sitelength; m++) |
---|
609 | curtree.nodep[i]->x2[k][m] = 0.0; |
---|
610 | break; |
---|
611 | |
---|
612 | case '-': |
---|
613 | curtree.nodep[i]->x2[k][0] = 0.0; |
---|
614 | break; |
---|
615 | |
---|
616 | case '?': |
---|
617 | /* blank case */ |
---|
618 | break; |
---|
619 | } |
---|
620 | } |
---|
621 | } |
---|
622 | for (i = 0; i < spp; i++) { |
---|
623 | for (k = 1; k <= sitelength; k++) |
---|
624 | curtree.nodep[i]->x2[0][k] = 1.0; |
---|
625 | curtree.nodep[i]->x2[0][0] = 0.0; |
---|
626 | } |
---|
627 | if (trunc8) |
---|
628 | return; |
---|
629 | found = false; |
---|
630 | i = 1; |
---|
631 | while (!found && i <= endsite) { |
---|
632 | found = true; |
---|
633 | for (k = 0; k < spp; k++) |
---|
634 | found = (found && y[k][alias[i] - 1] == '-'); |
---|
635 | if (!found) |
---|
636 | i++; |
---|
637 | } |
---|
638 | if (found) { |
---|
639 | weightsum += (enzymes - 1) * weight[i]; |
---|
640 | weight[i] *= enzymes; |
---|
641 | } |
---|
642 | } /* restml_makevalues */ |
---|
643 | |
---|
644 | |
---|
645 | void getinput() |
---|
646 | { |
---|
647 | /* reads the input data */ |
---|
648 | inputoptions(); |
---|
649 | restml_inputdata(); |
---|
650 | makeweights(); |
---|
651 | setuptree2(curtree); |
---|
652 | if (!usertree || reconsider) { |
---|
653 | setuptree2(priortree); |
---|
654 | setuptree2(bestree); |
---|
655 | if (njumble > 1) |
---|
656 | setuptree2(bestree2); |
---|
657 | } |
---|
658 | allocx2(nonodes2, endsite+1, sitelength, curtree.nodep, false); |
---|
659 | if (!usertree || reconsider) { |
---|
660 | allocx2(nonodes2, endsite+1, sitelength, priortree.nodep, 0); |
---|
661 | allocx2(nonodes2, endsite+1, sitelength, bestree.nodep, 0); |
---|
662 | if (njumble > 1) |
---|
663 | allocx2(nonodes2, endsite+1, sitelength, bestree2.nodep, 0); |
---|
664 | } |
---|
665 | restml_makevalues(); |
---|
666 | } /* getinput */ |
---|
667 | |
---|
668 | |
---|
669 | void copymatrix(transmatrix tomat, transmatrix frommat) |
---|
670 | { |
---|
671 | /* copy a matrix the size of transition matrix */ |
---|
672 | int i,j; |
---|
673 | |
---|
674 | for (i=0;i<=sitelength;++i){ |
---|
675 | for (j=0;j<=sitelength;++j) |
---|
676 | tomat[i][j] = frommat[i][j]; |
---|
677 | } |
---|
678 | } /* copymatrix */ |
---|
679 | |
---|
680 | |
---|
681 | void maketrans(double p, boolean nr) |
---|
682 | { |
---|
683 | /* make transition matrix, product matrix with change |
---|
684 | probability p. Put the results in tempmatrix, tempslope, tempcurve */ |
---|
685 | long i, j, k, m1, m2; |
---|
686 | double sump, sums=0, sumc=0, pover3, pijk, term; |
---|
687 | double binom1[maxcutter + 1], binom2[maxcutter + 1]; |
---|
688 | |
---|
689 | pover3 = p / 3; |
---|
690 | for (i = 0; i <= sitelength; i++) { |
---|
691 | if (p > 1.0 - epsilon) |
---|
692 | p = 1.0 - epsilon; |
---|
693 | if (p < epsilon) |
---|
694 | p = epsilon; |
---|
695 | binom1[0] = exp((sitelength - i) * log(1 - p)); |
---|
696 | for (k = 1; k <= sitelength - i; k++) |
---|
697 | binom1[k] = binom1[k - 1] * (p / (1 - p)) * (sitelength - i - k + 1) / k; |
---|
698 | binom2[0] = exp(i * log(1 - pover3)); |
---|
699 | for (k = 1; k <= i; k++) |
---|
700 | binom2[k] = binom2[k - 1] * (pover3 / (1 - pover3)) * (i - k + 1) / k; |
---|
701 | for (j = 0; j <= sitelength; ++j) { |
---|
702 | sump = 0.0; |
---|
703 | if (nr) { |
---|
704 | sums = 0.0; |
---|
705 | sumc = 0.0; |
---|
706 | } |
---|
707 | if (i - j > 0) |
---|
708 | m1 = i - j; |
---|
709 | else |
---|
710 | m1 = 0; |
---|
711 | if (sitelength - j < i) |
---|
712 | m2 = sitelength - j; |
---|
713 | else |
---|
714 | m2 = i; |
---|
715 | for (k = m1; k <= m2; k++) { |
---|
716 | pijk = binom1[j - i + k] * binom2[k]; |
---|
717 | sump += pijk; |
---|
718 | if (nr) { |
---|
719 | term = (j-i+2*k)/p - (sitelength-j-k)/(1.0-p) - (i-k)/(3.0-p); |
---|
720 | sums += pijk * term; |
---|
721 | sumc += pijk * (term * term |
---|
722 | - (j-i+2*k)/(p*p) |
---|
723 | - (sitelength-j-k)/((1.0-p)*(1.0-p)) |
---|
724 | - (i-k)/((3.0-p)*(3.0-p)) ); |
---|
725 | } |
---|
726 | } |
---|
727 | tempmatrix[i][j] = sump; |
---|
728 | if (nr) { |
---|
729 | tempslope[i][j] = sums; |
---|
730 | tempcurve[i][j] = sumc; |
---|
731 | } |
---|
732 | } |
---|
733 | } |
---|
734 | } /* maketrans */ |
---|
735 | |
---|
736 | |
---|
737 | void branchtrans(long i, double p) |
---|
738 | { |
---|
739 | /* make branch transition matrix for branch i with probability of change p */ |
---|
740 | boolean nr; |
---|
741 | |
---|
742 | nr = false; |
---|
743 | maketrans(p, nr); |
---|
744 | copymatrix(curtree.trans[i - 1], tempmatrix); |
---|
745 | } /* branchtrans */ |
---|
746 | |
---|
747 | |
---|
748 | double evaluate(tree *tr, node *p) |
---|
749 | { |
---|
750 | /* evaluates the likelihood, using info. at one branch */ |
---|
751 | double sum, sum2, local_y, liketerm, like0, lnlike0=0, term; |
---|
752 | long i, j, k; |
---|
753 | node *q; |
---|
754 | sitelike2 x1, x2; |
---|
755 | boolean nr; |
---|
756 | |
---|
757 | sum = 0.0; |
---|
758 | q = p->back; |
---|
759 | local_y = p->v; |
---|
760 | nr = false; |
---|
761 | maketrans(local_y, nr); |
---|
762 | memcpy(x1, p->x2[0], sizeof(sitelike2)); |
---|
763 | memcpy(x2, q->x2[0], sizeof(sitelike2)); |
---|
764 | if (trunc8) { |
---|
765 | like0 = 0.0; |
---|
766 | for (j = 0; j <= sitelength; j++) { |
---|
767 | liketerm = pie[j] * x1[j]; |
---|
768 | for (k = 0; k <= sitelength; k++) |
---|
769 | like0 += liketerm * tempmatrix[j][k] * x2[k]; |
---|
770 | } |
---|
771 | lnlike0 = log(enzymes * (1.0 - like0)); |
---|
772 | } |
---|
773 | for (i = 1; i <= endsite; i++) { |
---|
774 | memcpy(x1, p->x2[i], sizeof(sitelike2)); |
---|
775 | memcpy(x2, q->x2[i], sizeof(sitelike2)); |
---|
776 | sum2 = 0.0; |
---|
777 | for (j = 0; j <= sitelength; j++) { |
---|
778 | liketerm = pie[j] * x1[j]; |
---|
779 | for (k = 0; k <= sitelength; k++) |
---|
780 | sum2 += liketerm * tempmatrix[j][k] * x2[k]; |
---|
781 | } |
---|
782 | term = log(sum2); |
---|
783 | if (trunc8) |
---|
784 | term -= lnlike0; |
---|
785 | if (usertree) |
---|
786 | l0gf[which - 1][i - 1] = term; |
---|
787 | sum += weight[i] * term; |
---|
788 | } |
---|
789 | /* *** debug put a variable "saveit" in evaluate as third argument as to |
---|
790 | whether to save the KHT suff */ |
---|
791 | if (usertree) { |
---|
792 | l0gl[which - 1] = sum; |
---|
793 | if (which == 1) { |
---|
794 | maxwhich = 1; |
---|
795 | maxlogl = sum; |
---|
796 | } else if (sum > maxlogl) { |
---|
797 | maxwhich = which; |
---|
798 | maxlogl = sum; |
---|
799 | } |
---|
800 | } |
---|
801 | tr->likelihood = sum; |
---|
802 | return sum; |
---|
803 | } /* evaluate */ |
---|
804 | |
---|
805 | |
---|
806 | void nuview(node *p) |
---|
807 | { |
---|
808 | /* recompute fractional likelihoods for one part of tree */ |
---|
809 | long i, j, k, lowlim; |
---|
810 | double sumq, sumr; |
---|
811 | node *q, *r; |
---|
812 | sitelike2 xq, xr, xp; |
---|
813 | transmatrix tempq, tempr; |
---|
814 | double *tq, *tr; |
---|
815 | |
---|
816 | if (!p->next->back->tip && !p->next->back->initialized) |
---|
817 | nuview (p->next->back); |
---|
818 | if (!p->next->next->back->tip && !p->next->next->back->initialized) |
---|
819 | nuview (p->next->next->back); |
---|
820 | tempq = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
821 | tempr = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
822 | for (i=0;i<=sitelength;++i){ |
---|
823 | tempq[i] = (double *)Malloc((sitelength+1) * sizeof (double)); |
---|
824 | tempr[i] = (double *)Malloc((sitelength+1) * sizeof (double)); |
---|
825 | } |
---|
826 | if (trunc8) |
---|
827 | lowlim = 0; |
---|
828 | else |
---|
829 | lowlim = 1; |
---|
830 | q = p->next->back; |
---|
831 | r = p->next->next->back; |
---|
832 | copymatrix(tempq,curtree.trans[q->branchnum - 1]); |
---|
833 | copymatrix(tempr,curtree.trans[r->branchnum - 1]); |
---|
834 | for (i = lowlim; i <= endsite; i++) { |
---|
835 | memcpy (xq, q->x2[i], sizeof(sitelike2)); |
---|
836 | memcpy (xr, r->x2[i], sizeof(sitelike2)); |
---|
837 | for (j = 0; j <= sitelength; j++) { |
---|
838 | sumq = 0.0; |
---|
839 | sumr = 0.0; |
---|
840 | tq = tempq[j]; |
---|
841 | tr = tempr[j]; |
---|
842 | for (k = 0; k <= sitelength; k++) { |
---|
843 | sumq += tq[k] * xq[k]; |
---|
844 | sumr += tr[k] * xr[k]; |
---|
845 | } |
---|
846 | xp[j] = sumq * sumr; |
---|
847 | } |
---|
848 | memcpy(p->x2[i], xp, sizeof(sitelike2)); |
---|
849 | } |
---|
850 | for (i=0;i<=sitelength;++i){ |
---|
851 | free(tempq[i]); |
---|
852 | free(tempr[i]); |
---|
853 | } |
---|
854 | free(tempq); |
---|
855 | free(tempr); |
---|
856 | p->initialized = true; |
---|
857 | } /* nuview */ |
---|
858 | |
---|
859 | |
---|
860 | void makenewv(node *p) |
---|
861 | { |
---|
862 | /* Newton-Raphson algorithm improvement of a branch length */ |
---|
863 | long i, j, k, lowlim, it, ite; |
---|
864 | double sum, sums, sumc, like, slope, curve, liketerm, liket, |
---|
865 | local_y, yold=0, yorig, like0=0, slope0=0, curve0=0, oldlike=0, temp; |
---|
866 | boolean done, nr, firsttime, better; |
---|
867 | node *q; |
---|
868 | sitelike2 xx1, xx2; |
---|
869 | double *tm, *ts, *tc; |
---|
870 | |
---|
871 | q = p->back; |
---|
872 | local_y = p->v; |
---|
873 | yorig = local_y; |
---|
874 | if (trunc8) |
---|
875 | lowlim = 0; |
---|
876 | else |
---|
877 | lowlim = 1; |
---|
878 | done = false; |
---|
879 | nr = true; |
---|
880 | firsttime = true; |
---|
881 | it = 1; |
---|
882 | ite = 0; |
---|
883 | while ((it < iterations) && (ite < 20) && (!done)) { |
---|
884 | like = 0.0; |
---|
885 | slope = 0.0; |
---|
886 | curve = 0.0; |
---|
887 | maketrans(local_y, nr); |
---|
888 | for (i = lowlim; i <= endsite; i++) { |
---|
889 | memcpy(xx1, p->x2[i], sizeof(sitelike2)); |
---|
890 | memcpy(xx2, q->x2[i], sizeof(sitelike2)); |
---|
891 | sum = 0.0; |
---|
892 | sums = 0.0; |
---|
893 | sumc = 0.0; |
---|
894 | for (j = 0; j <= sitelength; j++) { |
---|
895 | liket = xx1[j] * pie[j]; |
---|
896 | tm = tempmatrix[j]; |
---|
897 | ts = tempslope[j]; |
---|
898 | tc = tempcurve[j]; |
---|
899 | for (k = 0; k <= sitelength; k++) { |
---|
900 | liketerm = liket * xx2[k]; |
---|
901 | sum += tm[k] * liketerm; |
---|
902 | sums += ts[k] * liketerm; |
---|
903 | sumc += tc[k] * liketerm; |
---|
904 | } |
---|
905 | } |
---|
906 | if (i == 0) { |
---|
907 | like0 = sum; |
---|
908 | slope0 = sums; |
---|
909 | curve0 = sumc; |
---|
910 | } else { |
---|
911 | like += weight[i] * log(sum); |
---|
912 | slope += weight[i] * sums/sum; |
---|
913 | temp = sums/sum; |
---|
914 | curve += weight[i] * (sumc/sum-temp*temp); |
---|
915 | } |
---|
916 | } |
---|
917 | if (trunc8 && fabs(like0 - 1.0) > 1.0e-10) { |
---|
918 | like -= weightsum * log(enzymes * (1.0 - like0)); |
---|
919 | slope += weightsum * slope0 /(1.0 - like0); |
---|
920 | curve += weightsum * (curve0 /(1.0 - like0) |
---|
921 | + slope0*slope0/((1.0 - like0)*(1.0 - like0))); |
---|
922 | } |
---|
923 | better = false; |
---|
924 | if (firsttime) { |
---|
925 | yold = local_y; |
---|
926 | oldlike = like; |
---|
927 | firsttime = false; |
---|
928 | better = true; |
---|
929 | } else { |
---|
930 | if (like > oldlike) { |
---|
931 | yold = local_y; |
---|
932 | oldlike = like; |
---|
933 | better = true; |
---|
934 | it++; |
---|
935 | } |
---|
936 | } |
---|
937 | if (better) { |
---|
938 | local_y = local_y + slope/fabs(curve); |
---|
939 | if (local_y < epsilon) |
---|
940 | local_y = 10.0 * epsilon; |
---|
941 | if (local_y > 0.75) |
---|
942 | local_y = 0.75; |
---|
943 | } else { |
---|
944 | if (fabs(local_y - yold) < epsilon) |
---|
945 | ite = 20; |
---|
946 | local_y = (local_y + yold) / 2.0; |
---|
947 | } |
---|
948 | ite++; |
---|
949 | done = fabs(local_y-yold) < epsilon; |
---|
950 | } |
---|
951 | smoothed = (fabs(yold-yorig) < epsilon) && (yorig > 1000.0*epsilon); |
---|
952 | p->v = yold; |
---|
953 | q->v = yold; |
---|
954 | branchtrans(p->branchnum, yold); |
---|
955 | curtree.likelihood = oldlike; |
---|
956 | } /* makenewv */ |
---|
957 | |
---|
958 | |
---|
959 | void update(node *p) |
---|
960 | { |
---|
961 | /* improve branch length and views for one branch */ |
---|
962 | |
---|
963 | if (!p->tip && !p->initialized) |
---|
964 | nuview(p); |
---|
965 | if (!p->back->tip && !p->back->initialized) |
---|
966 | nuview(p->back); |
---|
967 | if (p->iter) { |
---|
968 | makenewv(p); |
---|
969 | if (!p->tip) { |
---|
970 | p->next->initialized = false; |
---|
971 | p->next->next->initialized = false; |
---|
972 | } |
---|
973 | if (!p->back->tip) { |
---|
974 | p->back->next->initialized = false; |
---|
975 | p->back->next->next->initialized = false; |
---|
976 | } |
---|
977 | } |
---|
978 | } /* update */ |
---|
979 | |
---|
980 | |
---|
981 | void smooth(node *p) |
---|
982 | { |
---|
983 | /* update nodes throughout the tree, recursively */ |
---|
984 | |
---|
985 | smoothed = false; |
---|
986 | update(p); |
---|
987 | if (!p->tip) { |
---|
988 | if (smoothit && !smoothed) { |
---|
989 | smooth(p->next->back); |
---|
990 | p->initialized = false; |
---|
991 | p->next->next->initialized = false; |
---|
992 | } |
---|
993 | if (smoothit && !smoothed) { |
---|
994 | smooth(p->next->next->back); |
---|
995 | p->initialized = false; |
---|
996 | p->next->initialized = false; |
---|
997 | } |
---|
998 | } |
---|
999 | } /* smooth */ |
---|
1000 | |
---|
1001 | |
---|
1002 | void insert_(node *p, node *q) |
---|
1003 | { |
---|
1004 | /* insert a subtree into a branch, improve lengths in tree */ |
---|
1005 | long i, m, n; |
---|
1006 | node *r; |
---|
1007 | |
---|
1008 | r = p->next->next; |
---|
1009 | hookup(r, q->back); |
---|
1010 | hookup(p->next, q); |
---|
1011 | if (q->v >= 0.75) |
---|
1012 | q->v = 0.75; |
---|
1013 | else |
---|
1014 | q->v = 0.75 * (1 - sqrt(1 - 1.333333 * q->v)); |
---|
1015 | q->back->v = q->v; |
---|
1016 | r->v = q->v; |
---|
1017 | r->back->v = r->v; |
---|
1018 | if (q->branchnum == q->index) { |
---|
1019 | m = q->branchnum; |
---|
1020 | n = r->index; |
---|
1021 | } else { |
---|
1022 | m = r->index; |
---|
1023 | n = q->branchnum; |
---|
1024 | } |
---|
1025 | q->branchnum = m; |
---|
1026 | q->back->branchnum = m; |
---|
1027 | r->branchnum = n; |
---|
1028 | r->back->branchnum = n; |
---|
1029 | branchtrans(q->branchnum, q->v); |
---|
1030 | branchtrans(r->branchnum, r->v); |
---|
1031 | p->initialized = false; |
---|
1032 | p->next->initialized = false; |
---|
1033 | p->next->next->initialized = false; |
---|
1034 | i = 1; |
---|
1035 | while (i <= smoothings) { |
---|
1036 | smooth(p); |
---|
1037 | if (!smoothit) { |
---|
1038 | if (!p->tip) { |
---|
1039 | smooth (p->next->back); |
---|
1040 | smooth (p->next->next->back); |
---|
1041 | } |
---|
1042 | } |
---|
1043 | else |
---|
1044 | smooth(p->back); |
---|
1045 | i++; |
---|
1046 | } |
---|
1047 | } /* insert */ |
---|
1048 | |
---|
1049 | |
---|
1050 | void restml_re_move(node **p, node **q) |
---|
1051 | { |
---|
1052 | /* remove p and record in q where it was */ |
---|
1053 | long i; |
---|
1054 | |
---|
1055 | *q = (*p)->next->back; |
---|
1056 | hookup(*q, (*p)->next->next->back); |
---|
1057 | (*q)->back->branchnum = (*q)->branchnum; |
---|
1058 | branchtrans((*q)->branchnum, 0.75*(1 - (1 - 1.333333*(*q)->v) |
---|
1059 | * (1 - 1.333333*(*p)->next->v))); |
---|
1060 | (*p)->next->back = NULL; |
---|
1061 | (*p)->next->next->back = NULL; |
---|
1062 | if (!(*q)->tip) { |
---|
1063 | (*q)->next->initialized = false; |
---|
1064 | (*q)->next->next->initialized = false; |
---|
1065 | } |
---|
1066 | if (!(*q)->back->tip) { |
---|
1067 | (*q)->back->next->initialized = false; |
---|
1068 | (*q)->back->next->next->initialized = false; |
---|
1069 | } |
---|
1070 | i = 1; |
---|
1071 | while (i <= smoothings) { |
---|
1072 | smooth(*q); |
---|
1073 | if (smoothit) |
---|
1074 | smooth((*q)->back); |
---|
1075 | i++; |
---|
1076 | } |
---|
1077 | } /* restml_re_move */ |
---|
1078 | |
---|
1079 | |
---|
1080 | void restml_copynode(node *c, node *d) |
---|
1081 | { |
---|
1082 | /* copy a node */ |
---|
1083 | |
---|
1084 | d->branchnum = c->branchnum; |
---|
1085 | memcpy(d->x2, c->x2, (endsite+1)*sizeof(sitelike2)); |
---|
1086 | d->v = c->v; |
---|
1087 | d->iter = c->iter; |
---|
1088 | d->xcoord = c->xcoord; |
---|
1089 | d->ycoord = c->ycoord; |
---|
1090 | d->ymin = c->ymin; |
---|
1091 | d->ymax = c->ymax; |
---|
1092 | d->initialized = c->initialized; |
---|
1093 | } /* restml_copynode */ |
---|
1094 | |
---|
1095 | |
---|
1096 | void restml_copy_(tree *a, tree *b) |
---|
1097 | { |
---|
1098 | /* copy a tree */ |
---|
1099 | long i,j; |
---|
1100 | node *p, *q; |
---|
1101 | |
---|
1102 | for (i = 0; i < spp; i++) { |
---|
1103 | restml_copynode(a->nodep[i], b->nodep[i]); |
---|
1104 | if (a->nodep[i]->back) { |
---|
1105 | if (a->nodep[i]->back == a->nodep[a->nodep[i]->back->index - 1]) |
---|
1106 | b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]; |
---|
1107 | else if (a->nodep[i]->back |
---|
1108 | == a->nodep[a->nodep[i]->back->index - 1]->next) |
---|
1109 | b->nodep[i]->back = b->nodep[a->nodep[i]->back->index - 1]->next; |
---|
1110 | else |
---|
1111 | b->nodep[i]->back |
---|
1112 | = b->nodep[a->nodep[i]->back->index - 1]->next->next; |
---|
1113 | } |
---|
1114 | else b->nodep[i]->back = NULL; |
---|
1115 | } |
---|
1116 | for (i = spp; i < nonodes2; i++) { |
---|
1117 | p = a->nodep[i]; |
---|
1118 | q = b->nodep[i]; |
---|
1119 | for (j = 1; j <= 3; j++) { |
---|
1120 | restml_copynode(p, q); |
---|
1121 | if (p->back) { |
---|
1122 | if (p->back == a->nodep[p->back->index - 1]) |
---|
1123 | q->back = b->nodep[p->back->index - 1]; |
---|
1124 | else if (p->back == a->nodep[p->back->index - 1]->next) |
---|
1125 | q->back = b->nodep[p->back->index - 1]->next; |
---|
1126 | else |
---|
1127 | q->back = b->nodep[p->back->index - 1]->next->next; |
---|
1128 | } |
---|
1129 | else |
---|
1130 | q->back = NULL; |
---|
1131 | p = p->next; |
---|
1132 | q = q->next; |
---|
1133 | } |
---|
1134 | } |
---|
1135 | b->likelihood = a->likelihood; |
---|
1136 | for (i=0;i<nonodes2;++i) |
---|
1137 | copymatrix(b->trans[i],a->trans[i]); |
---|
1138 | b->start = a->start; |
---|
1139 | } /* restml_copy */ |
---|
1140 | |
---|
1141 | |
---|
1142 | void buildnewtip(long m, tree *tr) |
---|
1143 | { |
---|
1144 | /* set up a new tip and interior node it is connected to */ |
---|
1145 | node *p; |
---|
1146 | long i, j; |
---|
1147 | |
---|
1148 | p = tr->nodep[nextsp + spp - 3]; |
---|
1149 | for (i = 0; i < endsite; i++) { |
---|
1150 | for (j = 0; j < sitelength; j++) { |
---|
1151 | p->x2[i][j] = 1.0; |
---|
1152 | p->next->x2[i][j] = 1.0; |
---|
1153 | p->next->next->x2[i][j] = 1.0; |
---|
1154 | } |
---|
1155 | } |
---|
1156 | hookup(tr->nodep[m - 1], p); |
---|
1157 | p->v = initialv; |
---|
1158 | p->back->v = initialv; |
---|
1159 | branchtrans(m, initialv); |
---|
1160 | p->branchnum = m; |
---|
1161 | p->next->branchnum = p->index; |
---|
1162 | p->next->next->branchnum = p->index; |
---|
1163 | p->back->branchnum = m; |
---|
1164 | } /* buildnewtip */ |
---|
1165 | |
---|
1166 | |
---|
1167 | void buildsimpletree(tree *tr) |
---|
1168 | { |
---|
1169 | /* set up and adjust branch lengths of a three-species tree */ |
---|
1170 | |
---|
1171 | hookup(tr->nodep[enterorder[0] - 1], tr->nodep[enterorder[1] - 1]); |
---|
1172 | tr->nodep[enterorder[0] - 1]->v = initialv; |
---|
1173 | tr->nodep[enterorder[1] - 1]->v = initialv; |
---|
1174 | branchtrans(enterorder[1], initialv); |
---|
1175 | tr->nodep[enterorder[0] - 1]->branchnum = 2; |
---|
1176 | tr->nodep[enterorder[1] - 1]->branchnum = 2; |
---|
1177 | buildnewtip(enterorder[2], tr); |
---|
1178 | insert_(tr->nodep[enterorder[2] - 1]->back, tr->nodep[enterorder[1] - 1]); |
---|
1179 | tr->start = tr->nodep[enterorder[2]-1]->back; |
---|
1180 | } /* buildsimpletree */ |
---|
1181 | |
---|
1182 | |
---|
1183 | void addtraverse(node *p, node *q, boolean contin) |
---|
1184 | { |
---|
1185 | /* try adding p at q, proceed recursively through tree */ |
---|
1186 | long oldnum = 0; |
---|
1187 | double like, vsave = 0; |
---|
1188 | node *qback =NULL; |
---|
1189 | |
---|
1190 | if (!smoothit) { |
---|
1191 | oldnum = q->branchnum; |
---|
1192 | copymatrix (temp2matrix, curtree.trans[oldnum-1]); |
---|
1193 | vsave = q->v; |
---|
1194 | qback = q->back; |
---|
1195 | } |
---|
1196 | insert_(p, q); |
---|
1197 | like = evaluate(&curtree, p); |
---|
1198 | if (like > bestyet) { |
---|
1199 | bestyet = like; |
---|
1200 | if (smoothit) |
---|
1201 | restml_copy_(&curtree, &bestree); |
---|
1202 | else |
---|
1203 | qwhere = q; |
---|
1204 | succeeded = true; |
---|
1205 | } |
---|
1206 | if (smoothit) |
---|
1207 | restml_copy_(&priortree, &curtree); |
---|
1208 | else { |
---|
1209 | hookup (q, qback); |
---|
1210 | q->v = vsave; |
---|
1211 | q->back->v = vsave; |
---|
1212 | q->branchnum = oldnum; |
---|
1213 | q->back->branchnum = oldnum; |
---|
1214 | copymatrix (curtree.trans[oldnum-1], temp2matrix); |
---|
1215 | curtree.likelihood = bestyet; |
---|
1216 | } |
---|
1217 | if (!q->tip && contin) { |
---|
1218 | addtraverse(p, q->next->back, contin); |
---|
1219 | addtraverse(p, q->next->next->back, contin); |
---|
1220 | } |
---|
1221 | } /* addtraverse */ |
---|
1222 | |
---|
1223 | |
---|
1224 | void rearrange(node *p, node *pp) |
---|
1225 | { |
---|
1226 | /* rearranges the tree, globally or locally */ |
---|
1227 | long i, oldnum3=0, oldnum4=0, oldnum5=0; |
---|
1228 | double v3=0, v4=0, v5=0; |
---|
1229 | node *q, *r; |
---|
1230 | |
---|
1231 | if (!p->tip && !p->back->tip) { |
---|
1232 | bestyet = curtree.likelihood; |
---|
1233 | if (p->back->next != pp) |
---|
1234 | r = p->back->next; |
---|
1235 | else |
---|
1236 | r = p->back->next->next; |
---|
1237 | if (!smoothit) { |
---|
1238 | oldnum3 = r->branchnum; |
---|
1239 | copymatrix (temp3matrix, curtree.trans[oldnum3-1]); |
---|
1240 | v3 = r->v; |
---|
1241 | oldnum4 = r->next->branchnum; |
---|
1242 | copymatrix (temp4matrix, curtree.trans[oldnum4-1]); |
---|
1243 | v4 = r->next->v; |
---|
1244 | oldnum5 = r->next->next->branchnum; |
---|
1245 | copymatrix (temp5matrix, curtree.trans[oldnum5-1]); |
---|
1246 | v5 = r->next->next->v; |
---|
1247 | } |
---|
1248 | else |
---|
1249 | restml_copy_(&curtree, &bestree); |
---|
1250 | restml_re_move(&r, &q); |
---|
1251 | if (smoothit) |
---|
1252 | restml_copy_(&curtree, &priortree); |
---|
1253 | else |
---|
1254 | qwhere = q; |
---|
1255 | addtraverse(r, p->next->back, (boolean)(global && (nextsp == spp))); |
---|
1256 | addtraverse(r, p->next->next->back, (boolean)(global && (nextsp == spp))); |
---|
1257 | if (global && nextsp == spp && !succeeded) { |
---|
1258 | p = p->back; |
---|
1259 | if (!p->tip) { |
---|
1260 | addtraverse(r, p->next->back, (boolean)(global && (nextsp == spp))); |
---|
1261 | addtraverse(r, p->next->next->back, (boolean)(global && (nextsp == spp))); |
---|
1262 | } |
---|
1263 | p = p->back; |
---|
1264 | } |
---|
1265 | if (smoothit) |
---|
1266 | restml_copy_(&bestree, &curtree); |
---|
1267 | else { |
---|
1268 | insert_(r, qwhere); |
---|
1269 | if (qwhere == q) { |
---|
1270 | r->v = v3; |
---|
1271 | r->back->v = v3; |
---|
1272 | r->branchnum = oldnum3; |
---|
1273 | r->back->branchnum = oldnum3; |
---|
1274 | copymatrix (curtree.trans[oldnum3-1], temp3matrix); |
---|
1275 | r->next->v = v4; |
---|
1276 | r->next->back->v = v4; |
---|
1277 | r->next->branchnum = oldnum4; |
---|
1278 | r->next->back->branchnum = oldnum4; |
---|
1279 | copymatrix (curtree.trans[oldnum4-1], temp4matrix); |
---|
1280 | r->next->next->v = v5; |
---|
1281 | r->next->next->back->v = v5; |
---|
1282 | r->next->next->branchnum = oldnum5; |
---|
1283 | r->next->next->back->branchnum = oldnum5; |
---|
1284 | copymatrix (curtree.trans[oldnum5-1], temp5matrix); |
---|
1285 | curtree.likelihood = bestyet; |
---|
1286 | } |
---|
1287 | else { |
---|
1288 | smoothit = true; |
---|
1289 | for (i = 1; i<=smoothings; i++) { |
---|
1290 | smooth (r); |
---|
1291 | smooth (r->back); |
---|
1292 | } |
---|
1293 | smoothit = false; |
---|
1294 | restml_copy_(&curtree, &bestree); |
---|
1295 | } |
---|
1296 | } |
---|
1297 | if (global && nextsp == spp && progress) { |
---|
1298 | putchar('.'); |
---|
1299 | fflush(stdout); |
---|
1300 | } |
---|
1301 | } |
---|
1302 | if (!p->tip) { |
---|
1303 | rearrange(p->next->back, p); |
---|
1304 | rearrange(p->next->next->back, p); |
---|
1305 | } |
---|
1306 | } /* rearrange */ |
---|
1307 | |
---|
1308 | |
---|
1309 | void restml_coordinates(node *p, double lengthsum, long *tipy, |
---|
1310 | double *tipmax, double *x) |
---|
1311 | { |
---|
1312 | /* establishes coordinates of nodes */ |
---|
1313 | node *q, *first, *last; |
---|
1314 | |
---|
1315 | if (p->tip) { |
---|
1316 | p->xcoord = (long)(over * lengthsum + 0.5); |
---|
1317 | p->ycoord = (*tipy); |
---|
1318 | p->ymin = (*tipy); |
---|
1319 | p->ymax = (*tipy); |
---|
1320 | (*tipy) += down; |
---|
1321 | if (lengthsum > (*tipmax)) |
---|
1322 | (*tipmax) = lengthsum; |
---|
1323 | return; |
---|
1324 | } |
---|
1325 | q = p->next; |
---|
1326 | do { |
---|
1327 | (*x) = -0.75 * log(1.0 - 1.333333 * q->v); |
---|
1328 | restml_coordinates(q->back, lengthsum + (*x),tipy,tipmax,x); |
---|
1329 | q = q->next; |
---|
1330 | } while ((p == curtree.start || p != q) && |
---|
1331 | (p != curtree.start || p->next != q)); |
---|
1332 | first = p->next->back; |
---|
1333 | q = p; |
---|
1334 | while (q->next != p) |
---|
1335 | q = q->next; |
---|
1336 | last = q->back; |
---|
1337 | p->xcoord = (long)(over * lengthsum + 0.5); |
---|
1338 | if (p == curtree.start) |
---|
1339 | p->ycoord = p->next->next->back->ycoord; |
---|
1340 | else |
---|
1341 | p->ycoord = (first->ycoord + last->ycoord) / 2; |
---|
1342 | p->ymin = first->ymin; |
---|
1343 | p->ymax = last->ymax; |
---|
1344 | } /* restml_coordinates */ |
---|
1345 | |
---|
1346 | |
---|
1347 | void restml_printree() |
---|
1348 | { |
---|
1349 | /* prints out diagram of the tree */ |
---|
1350 | long tipy,i; |
---|
1351 | double scale, tipmax, x; |
---|
1352 | |
---|
1353 | putc('\n', outfile); |
---|
1354 | if (!treeprint) |
---|
1355 | return; |
---|
1356 | putc('\n', outfile); |
---|
1357 | tipy = 1; |
---|
1358 | tipmax = 0.0; |
---|
1359 | restml_coordinates(curtree.start, 0.0, &tipy,&tipmax,&x); |
---|
1360 | scale = 1.0 / (tipmax + 1.000); |
---|
1361 | for (i = 1; i <= tipy - down; i++) |
---|
1362 | drawline2(i, scale, curtree); |
---|
1363 | putc('\n', outfile); |
---|
1364 | } /* restml_printree */ |
---|
1365 | |
---|
1366 | |
---|
1367 | double sigma(node *q, double *sumlr) |
---|
1368 | { |
---|
1369 | /* get 1.95996 * approximate standard error of branch length */ |
---|
1370 | double sump, sumr, sums, sumc, p, pover3, pijk, Qjk, liketerm, f; |
---|
1371 | double slopef,curvef; |
---|
1372 | long i, j, k, m1, m2; |
---|
1373 | double binom1[maxcutter + 1], binom2[maxcutter + 1]; |
---|
1374 | transmatrix Prob, slopeP, curveP; |
---|
1375 | node *r; |
---|
1376 | sitelike2 x1, x2; |
---|
1377 | double term, TEMP; |
---|
1378 | |
---|
1379 | Prob = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
1380 | slopeP = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
1381 | curveP = (transmatrix)Malloc((sitelength+1) * sizeof(double *)); |
---|
1382 | for (i=0; i<=sitelength; ++i) { |
---|
1383 | Prob[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
1384 | slopeP[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
1385 | curveP[i] = (double *)Malloc((sitelength+1) * sizeof(double)); |
---|
1386 | } |
---|
1387 | p = q->v; |
---|
1388 | pover3 = p / 3; |
---|
1389 | for (i = 0; i <= sitelength; i++) { |
---|
1390 | binom1[0] = exp((sitelength - i) * log(1 - p)); |
---|
1391 | for (k = 1; k <= (sitelength - i); k++) |
---|
1392 | binom1[k] = binom1[k - 1] * (p / (1 - p)) * (sitelength - i - k + 1) / k; |
---|
1393 | binom2[0] = exp(i * log(1 - pover3)); |
---|
1394 | for (k = 1; k <= i; k++) |
---|
1395 | binom2[k] = binom2[k - 1] * (pover3 / (1 - pover3)) * (i - k + 1) / k; |
---|
1396 | for (j = 0; j <= sitelength; j++) { |
---|
1397 | sump = 0.0; |
---|
1398 | sums = 0.0; |
---|
1399 | sumc = 0.0; |
---|
1400 | if (i - j > 0) |
---|
1401 | m1 = i - j; |
---|
1402 | else |
---|
1403 | m1 = 0; |
---|
1404 | if (sitelength - j < i) |
---|
1405 | m2 = sitelength - j; |
---|
1406 | else |
---|
1407 | m2 = i; |
---|
1408 | for (k = m1; k <= m2; k++) { |
---|
1409 | pijk = binom1[j - i + k] * binom2[k]; |
---|
1410 | sump += pijk; |
---|
1411 | term = (j-i+2*k)/p - (sitelength-j-k)/(1.0-p) - (i-k)/(3.0-p); |
---|
1412 | sums += pijk * term; |
---|
1413 | sumc += pijk * (term * term |
---|
1414 | - (j-i+2*k)/(p*p) |
---|
1415 | - (sitelength-j-k)/((1.0-p)*(1.0-p)) |
---|
1416 | - (i-k)/((3.0-p)*(3.0-p)) ); |
---|
1417 | } |
---|
1418 | Prob[i][j] = sump; |
---|
1419 | slopeP[i][j] = sums; |
---|
1420 | curveP[i][j] = sumc; |
---|
1421 | } |
---|
1422 | } |
---|
1423 | (*sumlr) = 0.0; |
---|
1424 | sumc = 0.0; |
---|
1425 | sums = 0.0; |
---|
1426 | r = q->back; |
---|
1427 | for (i = 1; i <= endsite; i++) { |
---|
1428 | f = 0.0; |
---|
1429 | slopef = 0.0; |
---|
1430 | curvef = 0.0; |
---|
1431 | sumr = 0.0; |
---|
1432 | memcpy(x1, q->x2[i], sizeof(sitelike2)); |
---|
1433 | memcpy(x2, r->x2[i], sizeof(sitelike2)); |
---|
1434 | for (j = 0; j <= sitelength; j++) { |
---|
1435 | liketerm = pie[j] * x1[j]; |
---|
1436 | sumr += liketerm * x2[j]; |
---|
1437 | for (k = 0; k <= sitelength; k++) { |
---|
1438 | Qjk = liketerm * x2[k]; |
---|
1439 | f += Qjk * Prob[j][k]; |
---|
1440 | slopef += Qjk * slopeP[j][k]; |
---|
1441 | curvef += Qjk * curveP[j][k]; |
---|
1442 | } |
---|
1443 | } |
---|
1444 | (*sumlr) += weight[i] * log(f / sumr); |
---|
1445 | sums += weight[i] * slopef / f; |
---|
1446 | TEMP = slopef / f; |
---|
1447 | sumc += weight[i] * (curvef / f - TEMP * TEMP); |
---|
1448 | } |
---|
1449 | if (trunc8) { |
---|
1450 | f = 0.0; |
---|
1451 | slopef = 0.0; |
---|
1452 | curvef = 0.0; |
---|
1453 | sumr = 0.0; |
---|
1454 | memcpy(x1, q->x2[0], sizeof(sitelike2)); |
---|
1455 | memcpy(x2, r->x2[0], sizeof(sitelike2)); |
---|
1456 | for (j = 0; j <= sitelength; j++) { |
---|
1457 | liketerm = pie[j] * x1[j]; |
---|
1458 | sumr += liketerm * x2[j]; |
---|
1459 | for (k = 0; k <= sitelength; k++) { |
---|
1460 | Qjk = liketerm * x2[k]; |
---|
1461 | f += Qjk * Prob[j][k]; |
---|
1462 | slopef += Qjk * slopeP[j][k]; |
---|
1463 | curvef += Qjk * curveP[j][k]; |
---|
1464 | } |
---|
1465 | } |
---|
1466 | (*sumlr) += weightsum * log((1.0 - sumr) / (1.0 - f)); |
---|
1467 | sums += weightsum * slopef / (1.0 - f); |
---|
1468 | TEMP = slopef / (1.0 - f); |
---|
1469 | sumc += weightsum * (curvef / (1.0 - f) + TEMP * TEMP); |
---|
1470 | } |
---|
1471 | for (i=0;i<sitelength;++i){ |
---|
1472 | free(Prob[i]); |
---|
1473 | free(slopeP[i]); |
---|
1474 | free(curveP[i]); |
---|
1475 | } |
---|
1476 | free(Prob); |
---|
1477 | free(slopeP); |
---|
1478 | free(curveP); |
---|
1479 | if (sumc < -1.0e-6) |
---|
1480 | return ((-sums - sqrt(sums * sums - 3.841 * sumc)) / sumc); |
---|
1481 | else |
---|
1482 | return -1.0; |
---|
1483 | } /* sigma */ |
---|
1484 | |
---|
1485 | |
---|
1486 | void describe(node *p) |
---|
1487 | { |
---|
1488 | /* print out information on one branch */ |
---|
1489 | double sumlr; |
---|
1490 | long i; |
---|
1491 | node *q; |
---|
1492 | double s; |
---|
1493 | |
---|
1494 | q = p->back; |
---|
1495 | fprintf(outfile, "%4ld ", q->index - spp); |
---|
1496 | fprintf(outfile, " "); |
---|
1497 | if (p->tip) { |
---|
1498 | for (i = 0; i < nmlngth; i++) |
---|
1499 | putc(nayme[p->index - 1][i], outfile); |
---|
1500 | } else |
---|
1501 | fprintf(outfile, "%4ld ", p->index - spp); |
---|
1502 | if (q->v >= 0.75) |
---|
1503 | fprintf(outfile, " infinity"); |
---|
1504 | else |
---|
1505 | fprintf(outfile, "%13.5f", -0.75 * log(1 - 1.333333 * q->v)); |
---|
1506 | if (p->iter) { |
---|
1507 | s = sigma(q, &sumlr); |
---|
1508 | if (s < 0.0) |
---|
1509 | fprintf(outfile, " ( zero, infinity)"); |
---|
1510 | else { |
---|
1511 | fprintf(outfile, " ("); |
---|
1512 | if (q->v - s <= 0.0) |
---|
1513 | fprintf(outfile, " zero"); |
---|
1514 | else |
---|
1515 | fprintf(outfile, "%9.5f", -0.75 * log(1 - 1.333333 * (q->v - s))); |
---|
1516 | putc(',', outfile); |
---|
1517 | if (q->v + s >= 0.75) |
---|
1518 | fprintf(outfile, " infinity"); |
---|
1519 | else |
---|
1520 | fprintf(outfile, "%12.5f", -0.75 * log(1 - 1.333333 * (q->v + s))); |
---|
1521 | putc(')', outfile); |
---|
1522 | } |
---|
1523 | if (sumlr > 1.9205) |
---|
1524 | fprintf(outfile, " *"); |
---|
1525 | if (sumlr > 2.995) |
---|
1526 | putc('*', outfile); |
---|
1527 | } |
---|
1528 | else |
---|
1529 | fprintf(outfile, " (not varied)"); |
---|
1530 | putc('\n', outfile); |
---|
1531 | if (!p->tip) { |
---|
1532 | describe(p->next->back); |
---|
1533 | describe(p->next->next->back); |
---|
1534 | } |
---|
1535 | } /* describe */ |
---|
1536 | |
---|
1537 | |
---|
1538 | void summarize() |
---|
1539 | { |
---|
1540 | /* print out information on branches of tree */ |
---|
1541 | |
---|
1542 | fprintf(outfile, "\nremember: "); |
---|
1543 | if (outgropt) |
---|
1544 | fprintf(outfile, "(although rooted by outgroup) "); |
---|
1545 | fprintf(outfile, "this is an unrooted tree!\n\n"); |
---|
1546 | fprintf(outfile, "Ln Likelihood = %11.5f\n\n", curtree.likelihood); |
---|
1547 | fprintf(outfile, " \n"); |
---|
1548 | fprintf(outfile, " Between And Length"); |
---|
1549 | fprintf(outfile, " Approx. Confidence Limits\n"); |
---|
1550 | fprintf(outfile, " ------- --- ------"); |
---|
1551 | fprintf(outfile, " ------- ---------- ------\n"); |
---|
1552 | describe(curtree.start->next->back); |
---|
1553 | describe(curtree.start->next->next->back); |
---|
1554 | describe(curtree.start->back); |
---|
1555 | fprintf(outfile, "\n * = significantly positive, P < 0.05\n"); |
---|
1556 | fprintf(outfile, " ** = significantly positive, P < 0.01\n\n\n"); |
---|
1557 | } /* summarize */ |
---|
1558 | |
---|
1559 | |
---|
1560 | void restml_treeout(node *p) |
---|
1561 | { |
---|
1562 | /* write out file with representation of final tree */ |
---|
1563 | long i, n, w; |
---|
1564 | Char c; |
---|
1565 | double x; |
---|
1566 | |
---|
1567 | if (p->tip) { |
---|
1568 | n = 0; |
---|
1569 | for (i = 1; i <= nmlngth; i++) { |
---|
1570 | if (nayme[p->index - 1][i - 1] != ' ') |
---|
1571 | n = i; |
---|
1572 | } |
---|
1573 | for (i = 0; i < n; i++) { |
---|
1574 | c = nayme[p->index - 1][i]; |
---|
1575 | if (c == ' ') |
---|
1576 | c = '_'; |
---|
1577 | putc(c, outtree); |
---|
1578 | } |
---|
1579 | col += n; |
---|
1580 | } else { |
---|
1581 | putc('(', outtree); |
---|
1582 | col++; |
---|
1583 | restml_treeout(p->next->back); |
---|
1584 | putc(',', outtree); |
---|
1585 | col++; |
---|
1586 | if (col > 45) { |
---|
1587 | putc('\n', outtree); |
---|
1588 | col = 0; |
---|
1589 | } |
---|
1590 | restml_treeout(p->next->next->back); |
---|
1591 | if (p == curtree.start) { |
---|
1592 | putc(',', outtree); |
---|
1593 | col++; |
---|
1594 | if (col > 45) { |
---|
1595 | putc('\n', outtree); |
---|
1596 | col = 0; |
---|
1597 | } |
---|
1598 | restml_treeout(p->back); |
---|
1599 | } |
---|
1600 | putc(')', outtree); |
---|
1601 | col++; |
---|
1602 | } |
---|
1603 | if (p->v >= 0.75) |
---|
1604 | x = -1.0; |
---|
1605 | else |
---|
1606 | x = -0.75 * log(1 - 1.333333 * p->v); |
---|
1607 | if (x > 0.0) |
---|
1608 | w = (long)(0.43429448222 * log(x)); |
---|
1609 | else if (x == 0.0) |
---|
1610 | w = 0; |
---|
1611 | else |
---|
1612 | w = (long)(0.43429448222 * log(-x)) + 1; |
---|
1613 | if (w < 0) |
---|
1614 | w = 0; |
---|
1615 | if (p == curtree.start) |
---|
1616 | fprintf(outtree, ";\n"); |
---|
1617 | else { |
---|
1618 | fprintf(outtree, ":%*.5f", (int)(w + 7), x); |
---|
1619 | col += w + 8; |
---|
1620 | } |
---|
1621 | } /* restml_treeout */ |
---|
1622 | |
---|
1623 | |
---|
1624 | void inittravtree(node *p) |
---|
1625 | { |
---|
1626 | /* traverse tree to set initialized and v to initial values */ |
---|
1627 | |
---|
1628 | if (p->index < p->back->index) |
---|
1629 | p->branchnum = p->index; |
---|
1630 | else |
---|
1631 | p->branchnum = p->back->index; |
---|
1632 | branchtrans(p->branchnum, initialv); |
---|
1633 | p->initialized = false; |
---|
1634 | p->back->initialized = false; |
---|
1635 | p->v = initialv; |
---|
1636 | p->back->v = initialv; |
---|
1637 | if (!p->tip) { |
---|
1638 | inittravtree(p->next->back); |
---|
1639 | inittravtree(p->next->next->back); |
---|
1640 | } |
---|
1641 | } /* inittravtree */ |
---|
1642 | |
---|
1643 | |
---|
1644 | void treevaluate() |
---|
1645 | { |
---|
1646 | /* find maximum likelihood branch lengths of user tree */ |
---|
1647 | long i; |
---|
1648 | // double dummy; |
---|
1649 | |
---|
1650 | inittravtree(curtree.start); |
---|
1651 | smoothit = true; |
---|
1652 | for (i = 1; i <= smoothings * 4; i++) |
---|
1653 | smooth (curtree.start); |
---|
1654 | /*dummy =*/ evaluate(&curtree, curtree.start); |
---|
1655 | } /* treevaluate */ |
---|
1656 | |
---|
1657 | |
---|
1658 | void maketree() |
---|
1659 | { |
---|
1660 | /* construct and rearrange tree */ |
---|
1661 | long i; |
---|
1662 | |
---|
1663 | if (usertree) { |
---|
1664 | openfile(&intree,INTREE,"input tree file","r",progname,intreename); |
---|
1665 | numtrees = countsemic(&intree); |
---|
1666 | if (numtrees > 2) |
---|
1667 | initseed(&inseed, &inseed0, seed); |
---|
1668 | l0gl = (double *)Malloc(numtrees * sizeof(double)); |
---|
1669 | l0gf = (double **)Malloc(numtrees * sizeof(double *)); |
---|
1670 | for (i=0;i<numtrees;++i) |
---|
1671 | l0gf[i] = (double *)Malloc(endsite * sizeof(double)); |
---|
1672 | if (treeprint) { |
---|
1673 | fprintf(outfile, "User-defined tree"); |
---|
1674 | if (numtrees > 1) |
---|
1675 | putc('s', outfile); |
---|
1676 | fprintf(outfile, ":\n\n"); |
---|
1677 | } |
---|
1678 | which = 1; |
---|
1679 | while (which <= numtrees) { |
---|
1680 | treeread2 (intree, &curtree.start, curtree.nodep, |
---|
1681 | lengths, &trweight, &goteof, &haslengths, &spp); |
---|
1682 | treevaluate(); |
---|
1683 | if (reconsider) { |
---|
1684 | bestyet = - nextsp*sites*sitelength*log(4.0); |
---|
1685 | succeeded = true; |
---|
1686 | while (succeeded) { |
---|
1687 | succeeded = false; |
---|
1688 | rearrange(curtree.start, curtree.start->back); |
---|
1689 | } |
---|
1690 | treevaluate(); |
---|
1691 | } |
---|
1692 | restml_printree(); |
---|
1693 | summarize(); |
---|
1694 | if (trout) { |
---|
1695 | col = 0; |
---|
1696 | restml_treeout(curtree.start); |
---|
1697 | } |
---|
1698 | which++; |
---|
1699 | } |
---|
1700 | FClose(intree); |
---|
1701 | if (numtrees > 1 && weightsum > 1 ) |
---|
1702 | standev2(numtrees, maxwhich, 1, endsite, maxlogl, l0gl, l0gf, |
---|
1703 | aliasweight, seed); |
---|
1704 | } else { |
---|
1705 | for (i = 1; i <= spp; i++) |
---|
1706 | enterorder[i - 1] = i; |
---|
1707 | if (jumble) |
---|
1708 | randumize(seed, enterorder); |
---|
1709 | if (progress) { |
---|
1710 | printf("\nAdding species:\n"); |
---|
1711 | writename(0, 3, enterorder); |
---|
1712 | } |
---|
1713 | nextsp = 3; |
---|
1714 | buildsimpletree(&curtree); |
---|
1715 | curtree.start = curtree.nodep[enterorder[0] - 1]->back; |
---|
1716 | smoothit = improve; |
---|
1717 | nextsp = 4; |
---|
1718 | while (nextsp <= spp) { |
---|
1719 | buildnewtip(enterorder[nextsp - 1], &curtree); |
---|
1720 | bestyet = - nextsp*sites*sitelength*log(4.0); |
---|
1721 | if (smoothit) |
---|
1722 | restml_copy_(&curtree, &priortree); |
---|
1723 | addtraverse(curtree.nodep[enterorder[nextsp - 1] - 1]->back, |
---|
1724 | curtree.start, true); |
---|
1725 | if (smoothit) |
---|
1726 | restml_copy_(&bestree, &curtree); |
---|
1727 | else { |
---|
1728 | insert_(curtree.nodep[enterorder[nextsp - 1] - 1]->back, qwhere); |
---|
1729 | smoothit = true; |
---|
1730 | for (i = 1; i<=smoothings; i++) { |
---|
1731 | smooth (curtree.start); |
---|
1732 | smooth (curtree.start->back); |
---|
1733 | } |
---|
1734 | smoothit = false; |
---|
1735 | restml_copy_(&curtree, &bestree); |
---|
1736 | bestyet = curtree.likelihood; |
---|
1737 | } |
---|
1738 | if (progress) |
---|
1739 | writename(nextsp - 1, 1, enterorder); |
---|
1740 | if (global && nextsp == spp) { |
---|
1741 | if (progress) { |
---|
1742 | printf("Doing global rearrangements\n"); |
---|
1743 | printf(" "); |
---|
1744 | } |
---|
1745 | } |
---|
1746 | succeeded = true; |
---|
1747 | while (succeeded) { |
---|
1748 | succeeded = false; |
---|
1749 | rearrange(curtree.start, curtree.start->back); |
---|
1750 | } |
---|
1751 | for (i = spp; i < nonodes2; i++) { |
---|
1752 | curtree.nodep[i]->initialized = false; |
---|
1753 | curtree.nodep[i]->next->initialized = false; |
---|
1754 | curtree.nodep[i]->next->next->initialized = false; |
---|
1755 | } |
---|
1756 | if (!smoothit) { |
---|
1757 | smoothit = true; |
---|
1758 | for (i = 1; i<=smoothings; i++) { |
---|
1759 | smooth (curtree.start); |
---|
1760 | smooth (curtree.start->back); |
---|
1761 | } |
---|
1762 | smoothit = false; |
---|
1763 | restml_copy_(&curtree, &bestree); |
---|
1764 | } |
---|
1765 | nextsp++; |
---|
1766 | } |
---|
1767 | if (global && progress) { |
---|
1768 | putchar('\n'); |
---|
1769 | fflush(stdout); |
---|
1770 | } |
---|
1771 | if (njumble > 1) { |
---|
1772 | if (jumb == 1) |
---|
1773 | restml_copy_(&bestree, &bestree2); |
---|
1774 | else |
---|
1775 | if (bestree2.likelihood < bestree.likelihood) |
---|
1776 | restml_copy_(&bestree, &bestree2); |
---|
1777 | } |
---|
1778 | if (jumb == njumble) { |
---|
1779 | if (njumble > 1) |
---|
1780 | restml_copy_(&bestree2, &curtree); |
---|
1781 | curtree.start = curtree.nodep[outgrno - 1]->back; |
---|
1782 | restml_printree(); |
---|
1783 | summarize(); |
---|
1784 | if (trout) { |
---|
1785 | col = 0; |
---|
1786 | restml_treeout(curtree.start); |
---|
1787 | } |
---|
1788 | } |
---|
1789 | } |
---|
1790 | freex2(nonodes2, curtree.nodep); |
---|
1791 | if (!usertree) { |
---|
1792 | freex2(nonodes2, priortree.nodep); |
---|
1793 | freex2(nonodes2, bestree.nodep); |
---|
1794 | if (njumble > 1) |
---|
1795 | freex2(nonodes2, bestree2.nodep); |
---|
1796 | } else { |
---|
1797 | free(l0gl); |
---|
1798 | for (i=0;i<numtrees;++i) |
---|
1799 | free(l0gf[i]); |
---|
1800 | free(l0gf); |
---|
1801 | } |
---|
1802 | if (jumb == njumble) { |
---|
1803 | if (progress) { |
---|
1804 | printf("\nOutput written to file \"%s\"\n\n", outfilename); |
---|
1805 | if (trout) |
---|
1806 | printf("Tree also written onto file \"%s\"\n", outtreename); |
---|
1807 | putchar('\n'); |
---|
1808 | } |
---|
1809 | } |
---|
1810 | } /* maketree */ |
---|
1811 | |
---|
1812 | |
---|
1813 | int main(int argc, Char *argv[]) |
---|
1814 | { /* maximum likelihood phylogenies from restriction sites */ |
---|
1815 | #ifdef MAC |
---|
1816 | argc = 1; /* macsetup("Restml",""); */ |
---|
1817 | argv[0] = "RestML"; |
---|
1818 | #endif |
---|
1819 | init(argc, argv); |
---|
1820 | progname = argv[0]; |
---|
1821 | openfile(&infile,INFILE,"input file","r",argv[0],infilename); |
---|
1822 | openfile(&outfile,OUTFILE,"output file","w",argv[0],outfilename); |
---|
1823 | ibmpc = IBMCRT; |
---|
1824 | ansi = ANSICRT; |
---|
1825 | mulsets = false; |
---|
1826 | datasets = 1; |
---|
1827 | firstset = true; |
---|
1828 | doinit(); |
---|
1829 | if (trout) |
---|
1830 | openfile(&outtree,OUTTREE,"output tree file","w",argv[0],outtreename); |
---|
1831 | for (ith = 1; ith <= datasets; ith++) { |
---|
1832 | if (datasets > 1) { |
---|
1833 | fprintf(outfile, "Data set # %ld:\n",ith); |
---|
1834 | if (progress) |
---|
1835 | printf("\nData set # %ld:\n",ith); |
---|
1836 | } |
---|
1837 | getinput(); |
---|
1838 | if (ith == 1) |
---|
1839 | firstset = false; |
---|
1840 | for (jumb = 1; jumb <= njumble; jumb++) |
---|
1841 | maketree(); |
---|
1842 | } |
---|
1843 | FClose(infile); |
---|
1844 | FClose(outfile); |
---|
1845 | FClose(outtree); |
---|
1846 | #ifdef MAC |
---|
1847 | fixmacfile(outfilename); |
---|
1848 | fixmacfile(outtreename); |
---|
1849 | #endif |
---|
1850 | printf("Done.\n\n"); |
---|
1851 | return 0; |
---|
1852 | } /* maximum likelihood phylogenies from restriction sites */ |
---|