1 | /* |
---|
2 | * ml2.c |
---|
3 | * |
---|
4 | * |
---|
5 | * Part of TREE-PUZZLE 5.0 (June 2000) |
---|
6 | * |
---|
7 | * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, |
---|
8 | * M. Vingron, and Arndt von Haeseler |
---|
9 | * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler |
---|
10 | * |
---|
11 | * All parts of the source except where indicated are distributed under |
---|
12 | * the GNU public licence. See http://www.opensource.org for details. |
---|
13 | */ |
---|
14 | |
---|
15 | |
---|
16 | #define EXTERN extern |
---|
17 | |
---|
18 | /* prototypes */ |
---|
19 | #include <stdio.h> |
---|
20 | #include <stdlib.h> |
---|
21 | #include <math.h> |
---|
22 | #include <ctype.h> |
---|
23 | #include <string.h> |
---|
24 | #include "util.h" |
---|
25 | #include "ml.h" |
---|
26 | |
---|
27 | #define STDOUT stdout |
---|
28 | #ifndef PARALLEL /* because printf() runs significantly faster */ |
---|
29 | /* than fprintf(stdout) on an Apple McIntosh */ |
---|
30 | /* (HS) */ |
---|
31 | # define FPRINTF printf |
---|
32 | # define STDOUTFILE |
---|
33 | #else |
---|
34 | # define FPRINTF fprintf |
---|
35 | # define STDOUTFILE STDOUT, |
---|
36 | #endif |
---|
37 | |
---|
38 | /* prototypes for two functions of puzzle2.c */ |
---|
39 | void fputid10(FILE *, int); |
---|
40 | int fputid(FILE *, int); |
---|
41 | |
---|
42 | |
---|
43 | /******************************************************************************/ |
---|
44 | /* user tree input */ |
---|
45 | /******************************************************************************/ |
---|
46 | |
---|
47 | /* read user tree, drop all blanks, tabs, and newlines. |
---|
48 | Drop edgelengths (after :) but keep internal |
---|
49 | labels. Check whether all pairs of brackets match. */ |
---|
50 | void getusertree(FILE *itfp, cvector tr, int maxlen) |
---|
51 | { |
---|
52 | int n, brac, ci; |
---|
53 | int comment = 0; |
---|
54 | |
---|
55 | /* look for opening bracket */ |
---|
56 | n = 0; |
---|
57 | brac = 0; |
---|
58 | do { |
---|
59 | ci = fgetc(itfp); |
---|
60 | if (ci == EOF) { |
---|
61 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing start bracket in tree)\n\n\n"); |
---|
62 | exit(1); |
---|
63 | } |
---|
64 | if (ci == '[') comment = 1; |
---|
65 | if ((ci == ']') && comment) { |
---|
66 | comment = 0; |
---|
67 | ci = fgetc(itfp); |
---|
68 | } |
---|
69 | } while (comment || ((char) ci != '(')); |
---|
70 | tr[n] = (char) ci; |
---|
71 | brac++; |
---|
72 | |
---|
73 | do { |
---|
74 | /* get next character (skip blanks, newlines, and tabs) */ |
---|
75 | do { |
---|
76 | ci = fgetc(itfp); |
---|
77 | if (ci == EOF) { |
---|
78 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no more characters in tree)\n\n\n"); |
---|
79 | exit(1); |
---|
80 | } |
---|
81 | if (ci == '[') comment = 1; |
---|
82 | if ((ci == ']') && comment) { |
---|
83 | comment = 0; |
---|
84 | ci = fgetc(itfp); |
---|
85 | } |
---|
86 | } while (comment || (char) ci == ' ' || (char) ci == '\n' || (char) ci == '\t'); |
---|
87 | |
---|
88 | if ((char) ci == ':') { /* skip characters until a ,) appears */ |
---|
89 | do { |
---|
90 | ci = fgetc(itfp); |
---|
91 | if (ci == EOF) { |
---|
92 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing ';' or ',' in tree)\n\n\n"); |
---|
93 | exit(1); |
---|
94 | } |
---|
95 | if (ci == '[') comment = 1; |
---|
96 | if ((ci == ']') && comment) { |
---|
97 | comment = 0; |
---|
98 | ci = fgetc(itfp); |
---|
99 | } |
---|
100 | } while (comment || ((char) ci != ',' && (char) ci != ')') ); |
---|
101 | } |
---|
102 | |
---|
103 | if ((char) ci == '(') { |
---|
104 | brac++; |
---|
105 | } |
---|
106 | if ((char) ci == ')') { |
---|
107 | brac--; |
---|
108 | } |
---|
109 | |
---|
110 | n++; |
---|
111 | tr[n] = (char) ci; |
---|
112 | |
---|
113 | } while (((char) ci != ';') && (n != maxlen-2)); |
---|
114 | |
---|
115 | if (n == maxlen-2) { |
---|
116 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (tree description too long)\n\n\n"); |
---|
117 | exit(1); |
---|
118 | } |
---|
119 | |
---|
120 | if (brac != 0) { |
---|
121 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (brackets don't match in tree)\n\n\n"); |
---|
122 | exit(1); |
---|
123 | } |
---|
124 | |
---|
125 | n++; |
---|
126 | tr[n] = '\0'; |
---|
127 | } |
---|
128 | |
---|
129 | |
---|
130 | Node *internalnode(Tree *tr, char **chpp, int *ninode) |
---|
131 | { |
---|
132 | Node *xp, *np, *rp; |
---|
133 | int i, j, dvg, ff, stop, numc; |
---|
134 | char ident[100], idcomp[11]; |
---|
135 | char *idp; |
---|
136 | |
---|
137 | (*chpp)++; |
---|
138 | if (**chpp == '(') { /* process subgroup */ |
---|
139 | |
---|
140 | xp = internalnode(tr, chpp, ninode); |
---|
141 | xp->isop = xp; |
---|
142 | dvg = 1; |
---|
143 | while (**chpp != ')') { |
---|
144 | if (**chpp == '\0') { |
---|
145 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); |
---|
146 | exit(1); |
---|
147 | } |
---|
148 | dvg++; |
---|
149 | /* insert edges around node */ |
---|
150 | np = internalnode(tr, chpp, ninode); |
---|
151 | np->isop = xp->isop; |
---|
152 | xp->isop = np; |
---|
153 | xp = np; |
---|
154 | } |
---|
155 | /* closing bracket reached */ |
---|
156 | |
---|
157 | (*chpp)++; |
---|
158 | if (dvg < 2) { |
---|
159 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (only one OTU inside pair of brackets)\n\n\n"); |
---|
160 | exit(1); |
---|
161 | } |
---|
162 | |
---|
163 | if ((*ninode) >= Maxspc-3) { /* all internal nodes already used */ |
---|
164 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n"); |
---|
165 | exit(1); |
---|
166 | } |
---|
167 | |
---|
168 | rp = tr->ibrnchp[*ninode]; |
---|
169 | rp->isop = xp->isop; |
---|
170 | xp->isop = rp; |
---|
171 | |
---|
172 | for (j = 0; j < Numspc; j++) |
---|
173 | rp->paths[j] = 0; |
---|
174 | xp = rp->isop; |
---|
175 | while (xp != rp) { |
---|
176 | for (j = 0; j < Numspc; j++) { |
---|
177 | if (xp->paths[j] == 1) |
---|
178 | rp->paths[j] = 1; |
---|
179 | } |
---|
180 | xp = xp->isop; |
---|
181 | } |
---|
182 | (*ninode)++; |
---|
183 | |
---|
184 | if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp; |
---|
185 | if ((**chpp) == '\0') { |
---|
186 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); |
---|
187 | exit(1); |
---|
188 | } |
---|
189 | |
---|
190 | /* read internal label into rp->label (max. 20 characters) */ |
---|
191 | rp->label = new_cvector(21); |
---|
192 | (rp->label)[0] = **chpp; |
---|
193 | (rp->label)[1] = '\0'; |
---|
194 | for (numc = 1; numc < 20; numc++) { |
---|
195 | (*chpp)++; |
---|
196 | if ((**chpp) == ',' || (**chpp) == ')') return rp->kinp; |
---|
197 | if ((**chpp) == '\0') { |
---|
198 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); |
---|
199 | exit(1); |
---|
200 | } |
---|
201 | (rp->label)[numc] = **chpp; |
---|
202 | (rp->label)[numc+1] = '\0'; |
---|
203 | } |
---|
204 | do { /* skip the rest of the internal label */ |
---|
205 | (*chpp)++; |
---|
206 | if ((**chpp) == '\0') { |
---|
207 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); |
---|
208 | exit(1); |
---|
209 | } |
---|
210 | } while (((**chpp) != ',' && (**chpp) != ')')); |
---|
211 | |
---|
212 | return rp->kinp; |
---|
213 | |
---|
214 | } else { /* process species names */ |
---|
215 | /* read species name */ |
---|
216 | for (idp = ident; **chpp != ',' && |
---|
217 | **chpp != ')' && **chpp != '\0'; (*chpp)++) { |
---|
218 | *idp++ = **chpp; |
---|
219 | } |
---|
220 | *idp = '\0'; |
---|
221 | /* look for internal number */ |
---|
222 | idcomp[10] = '\0'; |
---|
223 | |
---|
224 | for (i = 0; i < Maxspc; i++) { |
---|
225 | ff = 0; |
---|
226 | stop = FALSE; |
---|
227 | do { |
---|
228 | idcomp[ff] = Identif[i][ff]; |
---|
229 | ff++; |
---|
230 | if (idcomp[ff-1] == ' ') stop = TRUE; |
---|
231 | } while (!stop && (ff != 10)); |
---|
232 | if (stop) idcomp[ff-1] = '\0'; |
---|
233 | |
---|
234 | if (!strcmp(ident, idcomp)) { |
---|
235 | if (usedtaxa[i]) { |
---|
236 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurrences of sequence '"); |
---|
237 | FPRINTF(STDOUTFILE "%s' in tree)\n\n\n", ident); |
---|
238 | exit(1); |
---|
239 | } |
---|
240 | usedtaxa[i] = TRUE; |
---|
241 | return tr->ebrnchp[i]->kinp; |
---|
242 | } |
---|
243 | } |
---|
244 | |
---|
245 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unknown sequence '%s' in tree)\n\n\n", ident); |
---|
246 | exit(1); |
---|
247 | } |
---|
248 | return NULL; /* never returned but without some compilers complain */ |
---|
249 | } |
---|
250 | |
---|
251 | /* make tree structure, the tree description may contain internal |
---|
252 | labels but no edge lengths */ |
---|
253 | void constructtree(Tree *tr, cvector strtree) |
---|
254 | { |
---|
255 | char *chp; |
---|
256 | int ninode, i; |
---|
257 | int dvg, numc; |
---|
258 | Node *xp, *np; |
---|
259 | |
---|
260 | ninode = 0; |
---|
261 | chp = strtree; |
---|
262 | usedtaxa = new_ivector(Maxspc); |
---|
263 | for (i = 0; i < Maxspc; i++) usedtaxa[i] = FALSE; |
---|
264 | |
---|
265 | xp = internalnode(tr, &chp, &ninode); |
---|
266 | xp->isop = xp; |
---|
267 | dvg = 1; |
---|
268 | while (*chp != ')') { /* look for closing bracket */ |
---|
269 | if (*chp == '\0') { |
---|
270 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (unexpected end of tree)\n\n\n"); |
---|
271 | exit(1); |
---|
272 | } |
---|
273 | dvg++; |
---|
274 | /* insert edges around node */ |
---|
275 | np = internalnode(tr, &chp, &ninode); |
---|
276 | np->isop = xp->isop; |
---|
277 | xp->isop = np; |
---|
278 | xp = np; |
---|
279 | } |
---|
280 | |
---|
281 | for (i = 0; i < Maxspc; i++) |
---|
282 | if (usedtaxa[i] == FALSE) { |
---|
283 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (sequences missing in tree)\n\n\n"); |
---|
284 | exit(1); |
---|
285 | } |
---|
286 | |
---|
287 | /* closing bracket reached */ |
---|
288 | if (dvg < 3) { |
---|
289 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no unrooted tree)\n\n\n"); |
---|
290 | exit(1); |
---|
291 | } |
---|
292 | tr->rootp = xp; |
---|
293 | Numibrnch = ninode; |
---|
294 | Numbrnch = Numspc + ninode; |
---|
295 | |
---|
296 | chp++; |
---|
297 | if (*chp == ';' || *chp == '\0') { |
---|
298 | free_ivector(usedtaxa); |
---|
299 | return; |
---|
300 | } |
---|
301 | |
---|
302 | /* copy last internal label (max. 20 characters) */ |
---|
303 | xp->label = new_cvector(21); |
---|
304 | (xp->label)[0] = *chp; |
---|
305 | (xp->label)[1] = '\0'; |
---|
306 | for (numc = 1; numc < 20; numc++) { |
---|
307 | chp++; |
---|
308 | if (*chp == ';' || *chp == '\0') { |
---|
309 | free_ivector(usedtaxa); |
---|
310 | return; |
---|
311 | } else { |
---|
312 | (xp->label)[numc] = *chp; |
---|
313 | (xp->label)[numc+1] = '\0'; |
---|
314 | } |
---|
315 | } |
---|
316 | free_ivector(usedtaxa); |
---|
317 | return; |
---|
318 | } |
---|
319 | |
---|
320 | |
---|
321 | /* remove possible basal bifurcation */ |
---|
322 | void removebasalbif(cvector strtree) |
---|
323 | { |
---|
324 | int n, c, brak, cutflag, h; |
---|
325 | |
---|
326 | /* check how many OTUs on basal level */ |
---|
327 | n = 0; |
---|
328 | c = 0; |
---|
329 | brak = 0; |
---|
330 | do { |
---|
331 | if (strtree[n] == '(') brak++; |
---|
332 | if (strtree[n] == ')') brak--; |
---|
333 | |
---|
334 | if (strtree[n] == ',' && brak == 1) c++; /* number of commas in outer bracket */ |
---|
335 | |
---|
336 | n++; |
---|
337 | } while (strtree[n] != '\0'); |
---|
338 | |
---|
339 | /* if only 1 OTU inside outer bracket stop now */ |
---|
340 | if (c == 0) { |
---|
341 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (Only 1 OTU inside outer bracket in tree)\n\n\n"); |
---|
342 | exit(1); |
---|
343 | } |
---|
344 | |
---|
345 | /* if only 2 OTUs inside outer bracket delete second pair of |
---|
346 | brackets from the right to remove basal bifurcation */ |
---|
347 | |
---|
348 | if (c == 1) { |
---|
349 | |
---|
350 | n = 0; |
---|
351 | brak = 0; |
---|
352 | cutflag = 0; /* not yet cutted */ |
---|
353 | h = 0; |
---|
354 | do { |
---|
355 | if (strtree[n] == '(') brak++; |
---|
356 | if (strtree[n] == ')') brak--; |
---|
357 | |
---|
358 | if (brak == 2 && cutflag == 0) cutflag = 1; /* cutting */ |
---|
359 | if (brak == 1 && cutflag == 1) { |
---|
360 | cutflag = 2; /* cutted */ |
---|
361 | /* leave out internal label */ |
---|
362 | do { |
---|
363 | h++; |
---|
364 | } while (strtree[n+h] != ')' && strtree[n+h] != ','); |
---|
365 | |
---|
366 | } |
---|
367 | |
---|
368 | if (cutflag == 1) strtree[n] = strtree[n+1]; |
---|
369 | if (cutflag == 2) strtree[n-1] = strtree[n+h]; |
---|
370 | |
---|
371 | n++; |
---|
372 | } while (strtree[n] != '\0'); |
---|
373 | } |
---|
374 | } |
---|
375 | |
---|
376 | |
---|
377 | void makeusertree(FILE *itfp) |
---|
378 | { |
---|
379 | cvector strtree; |
---|
380 | |
---|
381 | strtree = new_cvector(23*Maxspc); /* for treefile */ |
---|
382 | getusertree(itfp, strtree, 23*Maxspc); |
---|
383 | removebasalbif(strtree); |
---|
384 | constructtree(Ctree, strtree); |
---|
385 | free_cvector(strtree); |
---|
386 | } |
---|
387 | |
---|
388 | |
---|
389 | /******************************************************************************/ |
---|
390 | /* memory organisation for maximum likelihood tree */ |
---|
391 | /******************************************************************************/ |
---|
392 | |
---|
393 | /* initialise new tree */ |
---|
394 | Tree *new_tree(int maxspc, int numptrn, cmatrix seqconint) |
---|
395 | { |
---|
396 | int n, i, maxibrnch; |
---|
397 | Tree *tr; |
---|
398 | Node *dp, *up; |
---|
399 | |
---|
400 | maxibrnch = maxspc - 3; |
---|
401 | heights = (Node **) malloc((unsigned)(maxspc-2) * sizeof(Node *)); |
---|
402 | if (heights == NULL) maerror("heights in new_tree"); |
---|
403 | tr = (Tree *) malloc(sizeof(Tree)); |
---|
404 | if (tr == NULL) maerror("tr in new_tree"); |
---|
405 | tr->ebrnchp = (Node **) malloc((unsigned)maxspc * sizeof(Node *)); |
---|
406 | if (tr->ebrnchp == NULL) maerror("ebrnchp in new_tree"); |
---|
407 | tr->ibrnchp = (Node **) malloc((unsigned)maxibrnch * sizeof(Node *)); |
---|
408 | if (tr->ibrnchp == NULL) maerror("ibrnchp in new_tree"); |
---|
409 | tr->condlkl = new_dmatrix(numcats, numptrn); |
---|
410 | for (n = 0; n < maxspc; n++) { |
---|
411 | dp = (Node *) malloc(sizeof(Node)); |
---|
412 | if (dp == NULL) maerror("dp in new_tree"); |
---|
413 | up = (Node *) malloc(sizeof(Node)); |
---|
414 | if (up == NULL) maerror("up in new_tree"); |
---|
415 | dp->isop = NULL; |
---|
416 | up->isop = NULL; |
---|
417 | dp->kinp = up; |
---|
418 | up->kinp = dp; |
---|
419 | dp->descen = TRUE; |
---|
420 | up->descen = FALSE; |
---|
421 | dp->number = n; |
---|
422 | up->number = n; |
---|
423 | dp->length = 0.0; |
---|
424 | up->length = 0.0; |
---|
425 | dp->lengthc = 0.0; |
---|
426 | up->lengthc = 0.0; |
---|
427 | dp->varlen = 0.0; |
---|
428 | up->varlen = 0.0; |
---|
429 | dp->paths = new_ivector(maxspc); |
---|
430 | up->paths = dp->paths; |
---|
431 | for (i = 0; i < maxspc; i++) dp->paths[i] = 0; |
---|
432 | dp->paths[n] = 1; |
---|
433 | dp->eprob = seqconint[n]; |
---|
434 | up->eprob = NULL; |
---|
435 | dp->partials = NULL; |
---|
436 | up->partials = new_dcube(numcats, numptrn, tpmradix); |
---|
437 | tr->ebrnchp[n] = dp; |
---|
438 | up->label = NULL; |
---|
439 | dp->label = NULL; |
---|
440 | } |
---|
441 | for (n = 0; n < maxibrnch; n++) { |
---|
442 | dp = (Node *) malloc(sizeof(Node)); |
---|
443 | if (dp == NULL) maerror("dp in new_tree"); |
---|
444 | up = (Node *) malloc(sizeof(Node)); |
---|
445 | if (up == NULL) maerror("up in new_tree"); |
---|
446 | dp->isop = NULL; |
---|
447 | up->isop = NULL; |
---|
448 | dp->kinp = up; |
---|
449 | up->kinp = dp; |
---|
450 | dp->descen = TRUE; |
---|
451 | up->descen = FALSE; |
---|
452 | dp->number = n; |
---|
453 | up->number = n; |
---|
454 | dp->length = 0.0; |
---|
455 | up->length = 0.0; |
---|
456 | dp->lengthc = 0.0; |
---|
457 | up->lengthc = 0.0; |
---|
458 | dp->varlen = 0.0; |
---|
459 | up->varlen = 0.0; |
---|
460 | dp->paths = new_ivector(maxspc); |
---|
461 | up->paths = dp->paths; |
---|
462 | for (i = 0; i < maxspc; i++) dp->paths[i] = 0; |
---|
463 | dp->eprob = NULL; |
---|
464 | up->eprob = NULL; |
---|
465 | dp->partials = new_dcube(numcats, numptrn, tpmradix); |
---|
466 | up->partials = new_dcube(numcats, numptrn, tpmradix); |
---|
467 | tr->ibrnchp[n] = dp; |
---|
468 | up->label = NULL; |
---|
469 | dp->label = NULL; |
---|
470 | } |
---|
471 | tr->rootp = NULL; |
---|
472 | |
---|
473 | /* |
---|
474 | * reserve memory for lengths of the tree branches |
---|
475 | * and for the distance matrix as a vector |
---|
476 | * (needed for LS estimation of tree branch lengths) |
---|
477 | */ |
---|
478 | |
---|
479 | Brnlength = new_dvector(2 * maxspc - 3); |
---|
480 | Distanvec = new_dvector((maxspc * (maxspc - 1)) / 2); |
---|
481 | |
---|
482 | return tr; |
---|
483 | } |
---|
484 | |
---|
485 | |
---|
486 | /* initialise quartet tree */ |
---|
487 | Tree *new_quartet(int numptrn, cmatrix seqconint) |
---|
488 | { |
---|
489 | int n, i; |
---|
490 | Tree *tr; |
---|
491 | Node *dp, *up; |
---|
492 | |
---|
493 | heights = (Node **) malloc((unsigned)2 * sizeof(Node *)); |
---|
494 | if (heights == NULL) maerror("heights in new_quartet"); |
---|
495 | /* reserve memory for tree */ |
---|
496 | tr = (Tree *) malloc(sizeof(Tree)); |
---|
497 | if (tr == NULL) maerror("tr in new_quartet"); |
---|
498 | tr->ebrnchp = (Node **) malloc((unsigned) 4 * sizeof(Node *)); |
---|
499 | if (tr->ebrnchp == NULL) maerror("ebrnchp in new_quartet"); |
---|
500 | tr->ibrnchp = (Node **) malloc((unsigned) sizeof(Node *)); |
---|
501 | if (tr->ibrnchp == NULL) maerror("ibrnchp in new_quartet"); |
---|
502 | tr->condlkl = new_dmatrix(numcats, numptrn); |
---|
503 | /* reserve memory for nodes */ |
---|
504 | for (n = 0; n < 4; n++) { |
---|
505 | dp = (Node *) malloc(sizeof(Node)); |
---|
506 | if (dp == NULL) maerror("dp in new_quartet"); |
---|
507 | up = (Node *) malloc(sizeof(Node)); |
---|
508 | if (up == NULL) maerror("dp in new_quartet"); |
---|
509 | dp->isop = NULL; |
---|
510 | dp->kinp = up; |
---|
511 | up->kinp = dp; |
---|
512 | dp->descen = TRUE; |
---|
513 | up->descen = FALSE; |
---|
514 | dp->number = n; |
---|
515 | up->number = n; |
---|
516 | dp->length = 0.0; |
---|
517 | up->length = 0.0; |
---|
518 | dp->lengthc = 0.0; |
---|
519 | up->lengthc = 0.0; |
---|
520 | dp->varlen = 0.0; |
---|
521 | up->varlen = 0.0; |
---|
522 | dp->paths = new_ivector(4); |
---|
523 | up->paths = dp->paths; |
---|
524 | for (i = 0; i < 4; i++) dp->paths[i] = 0; |
---|
525 | dp->paths[n] = 1; |
---|
526 | dp->eprob = seqconint[n]; /* make quartet (0,1)-(2,3) as default */ |
---|
527 | up->eprob = NULL; |
---|
528 | dp->partials = NULL; |
---|
529 | up->partials = new_dcube(numcats, numptrn, tpmradix); |
---|
530 | tr->ebrnchp[n] = dp; |
---|
531 | } |
---|
532 | |
---|
533 | /* reserve memory for internal branch */ |
---|
534 | dp = (Node *) malloc(sizeof(Node)); |
---|
535 | if (dp == NULL) maerror("dp in new_quartet"); |
---|
536 | up = (Node *) malloc(sizeof(Node)); |
---|
537 | if (up == NULL) maerror("dp in new_quartet"); |
---|
538 | dp->isop = tr->ebrnchp[3]->kinp; /* connect internal branch */ |
---|
539 | up->isop = tr->ebrnchp[0]->kinp; |
---|
540 | dp->kinp = up; |
---|
541 | up->kinp = dp; |
---|
542 | dp->descen = TRUE; |
---|
543 | up->descen = FALSE; |
---|
544 | dp->number = 0; |
---|
545 | up->number = 0; |
---|
546 | dp->length = 0.0; |
---|
547 | up->length = 0.0; |
---|
548 | dp->lengthc = 0.0; |
---|
549 | up->lengthc = 0.0; |
---|
550 | dp->varlen = 0.0; |
---|
551 | up->varlen = 0.0; |
---|
552 | dp->paths = new_ivector(4); |
---|
553 | up->paths = dp->paths; |
---|
554 | up->paths[0] = 0; |
---|
555 | up->paths[1] = 0; |
---|
556 | up->paths[2] = 1; |
---|
557 | up->paths[3] = 1; |
---|
558 | dp->eprob = NULL; |
---|
559 | up->eprob = NULL; |
---|
560 | dp->partials = new_dcube(numcats, numptrn, tpmradix); |
---|
561 | up->partials = new_dcube(numcats, numptrn, tpmradix); |
---|
562 | tr->ibrnchp[0] = dp; |
---|
563 | |
---|
564 | /* place root */ |
---|
565 | tr->rootp = up; |
---|
566 | |
---|
567 | /* connect external branches */ |
---|
568 | tr->ebrnchp[0]->kinp->isop = tr->ebrnchp[1]->kinp; |
---|
569 | tr->ebrnchp[1]->kinp->isop = tr->rootp; |
---|
570 | tr->ebrnchp[3]->kinp->isop = tr->ebrnchp[2]->kinp; |
---|
571 | tr->ebrnchp[2]->kinp->isop = tr->rootp->kinp; |
---|
572 | |
---|
573 | /* |
---|
574 | * reserve memory for lengths of the five branches |
---|
575 | * of a quartet and for the six possible distances |
---|
576 | * (needed for LS estimation of branch lengths) |
---|
577 | */ |
---|
578 | Brnlength = new_dvector(NUMQBRNCH); |
---|
579 | Distanvec = new_dvector(NUMQSPC*(NUMQSPC-1)/2); |
---|
580 | |
---|
581 | return tr; |
---|
582 | } |
---|
583 | |
---|
584 | |
---|
585 | /* free tree memory */ |
---|
586 | void free_tree(Tree *tr, int taxa) |
---|
587 | { |
---|
588 | int n; |
---|
589 | Node *dp, *up; |
---|
590 | |
---|
591 | free(heights); |
---|
592 | free_dmatrix(tr->condlkl); |
---|
593 | for (n = 0; n < taxa; n++) { |
---|
594 | dp = tr->ebrnchp[n]; |
---|
595 | up = dp->kinp; |
---|
596 | free_ivector(dp->paths); |
---|
597 | free_dcube(up->partials); |
---|
598 | free(dp); |
---|
599 | free(up); |
---|
600 | } |
---|
601 | free(tr->ebrnchp); |
---|
602 | for (n = 0; n < (taxa-3); n++) { |
---|
603 | dp = tr->ibrnchp[n]; |
---|
604 | up = dp->kinp; |
---|
605 | free_dcube(dp->partials); |
---|
606 | free_dcube(up->partials); |
---|
607 | free_ivector(dp->paths); |
---|
608 | free(dp); |
---|
609 | free(up); |
---|
610 | } |
---|
611 | free(tr->ibrnchp); |
---|
612 | free(tr); |
---|
613 | free_dvector(Brnlength); /* branch lengths (for LS estimation) */ |
---|
614 | free_dvector(Distanvec); /* distances (for LS estimation) */ |
---|
615 | } |
---|
616 | |
---|
617 | |
---|
618 | /* make (a,b)-(c,d) quartet |
---|
619 | |
---|
620 | a ---+ +--- c |
---|
621 | +-----+ |
---|
622 | b ---+ +--- d |
---|
623 | |
---|
624 | species numbers range from 0 to Maxspc - 1 */ |
---|
625 | |
---|
626 | void make_quartet(int a, int b, int c, int d) |
---|
627 | { |
---|
628 | /* place sequences */ |
---|
629 | Ctree->ebrnchp[0]->eprob = Seqpat[a]; |
---|
630 | Ctree->ebrnchp[1]->eprob = Seqpat[b]; |
---|
631 | Ctree->ebrnchp[2]->eprob = Seqpat[c]; |
---|
632 | Ctree->ebrnchp[3]->eprob = Seqpat[d]; |
---|
633 | |
---|
634 | /* make distance vector */ |
---|
635 | Distanvec[0] = Distanmat[b][a]; |
---|
636 | Distanvec[1] = Distanmat[c][a]; |
---|
637 | Distanvec[2] = Distanmat[c][b]; |
---|
638 | Distanvec[3] = Distanmat[d][a]; |
---|
639 | Distanvec[4] = Distanmat[d][b]; |
---|
640 | Distanvec[5] = Distanmat[d][c]; |
---|
641 | } |
---|
642 | |
---|
643 | /* write distance matrix as vector */ |
---|
644 | void changedistan(dmatrix distanmat, dvector distanvec, int numspc) |
---|
645 | { |
---|
646 | int i, j, k; |
---|
647 | |
---|
648 | for (k = 0, i = 1; i < numspc; i++) { |
---|
649 | for (j = 0; j < i; j++, k++) |
---|
650 | distanvec[k] = distanmat[i][j]; |
---|
651 | } |
---|
652 | } |
---|
653 | |
---|
654 | |
---|
655 | /******************************************************************************/ |
---|
656 | /* computation of maximum likelihood tree */ |
---|
657 | /******************************************************************************/ |
---|
658 | |
---|
659 | |
---|
660 | /* compute the likelihood for (a,b)-(c,d) quartet */ |
---|
661 | double quartet_lklhd(int a, int b, int c, int d) |
---|
662 | { |
---|
663 | /* reserve memory for quartet if necessary */ |
---|
664 | if (mlmode != 1) { /* no quartet tree */ |
---|
665 | if (Ctree != NULL) |
---|
666 | free_tree(Ctree, Numspc); |
---|
667 | Ctree = new_quartet(Numptrn, Seqpat); |
---|
668 | Numbrnch = NUMQBRNCH; |
---|
669 | Numibrnch = NUMQIBRNCH; |
---|
670 | Numspc = NUMQSPC; |
---|
671 | mlmode = 1; |
---|
672 | } |
---|
673 | |
---|
674 | /* make (a,b)-(c,d) quartet */ |
---|
675 | make_quartet(a,b,c,d); |
---|
676 | |
---|
677 | clockmode = 0; /* nonclocklike branch lengths */ |
---|
678 | |
---|
679 | /* least square estimate for branch length */ |
---|
680 | lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); |
---|
681 | |
---|
682 | /* compute likelihood */ |
---|
683 | Ctree->lklhd = optlkl(Ctree); |
---|
684 | |
---|
685 | return Ctree->lklhd; |
---|
686 | } |
---|
687 | |
---|
688 | |
---|
689 | /* compute the approximate likelihood for (a,b)-(c,d) quartet */ |
---|
690 | double quartet_alklhd(int a, int b, int c, int d) |
---|
691 | { |
---|
692 | /* reserve memory for quartet if necessary */ |
---|
693 | if (mlmode != 1) { /* no quartet tree */ |
---|
694 | if (Ctree != NULL) |
---|
695 | free_tree(Ctree, Numspc); |
---|
696 | Ctree = new_quartet(Numptrn, Seqpat); |
---|
697 | Numbrnch = NUMQBRNCH; |
---|
698 | Numibrnch = NUMQIBRNCH; |
---|
699 | Numspc = NUMQSPC; |
---|
700 | mlmode = 1; |
---|
701 | } |
---|
702 | |
---|
703 | /* make (a,b)-(c,d) quartet */ |
---|
704 | make_quartet(a,b,c,d); |
---|
705 | |
---|
706 | clockmode = 0; /* nonclocklike branch lengths */ |
---|
707 | |
---|
708 | /* least square estimate for branch length */ |
---|
709 | lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); |
---|
710 | |
---|
711 | /* compute likelihood */ |
---|
712 | Ctree->lklhd = treelkl(Ctree); |
---|
713 | |
---|
714 | return Ctree->lklhd; |
---|
715 | } |
---|
716 | |
---|
717 | |
---|
718 | /* read usertree from file to memory */ |
---|
719 | void readusertree(FILE *ifp) |
---|
720 | { |
---|
721 | /* reserve memory for tree if necessary */ |
---|
722 | if (mlmode != 2) { /* no tree */ |
---|
723 | if (Ctree != NULL) |
---|
724 | free_tree(Ctree, Numspc); |
---|
725 | Ctree = new_tree(Maxspc, Numptrn, Seqpat); |
---|
726 | Numbrnch = 2*Maxspc-3; |
---|
727 | Numibrnch = Maxspc-3; |
---|
728 | Numspc = Maxspc; |
---|
729 | mlmode = 2; |
---|
730 | } |
---|
731 | |
---|
732 | /* read tree */ |
---|
733 | makeusertree(ifp); |
---|
734 | } |
---|
735 | |
---|
736 | |
---|
737 | /* compute the likelihood of a usertree */ |
---|
738 | double usertree_lklhd() |
---|
739 | { |
---|
740 | /* be sure to have a usertree in memory and |
---|
741 | to have pairwise distances computed */ |
---|
742 | |
---|
743 | clockmode = 0; /* nonclocklike branch lengths */ |
---|
744 | |
---|
745 | /* least square estimate for branch length */ |
---|
746 | changedistan(Distanmat, Distanvec, Numspc); |
---|
747 | lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); |
---|
748 | |
---|
749 | /* compute likelihood */ |
---|
750 | Ctree->lklhd = optlkl(Ctree); |
---|
751 | |
---|
752 | return Ctree->lklhd; |
---|
753 | } |
---|
754 | |
---|
755 | |
---|
756 | /* compute the approximate likelihood of a usertree */ |
---|
757 | double usertree_alklhd() |
---|
758 | { |
---|
759 | /* be sure to have a usertree in memory and |
---|
760 | to have pairwise distances computed */ |
---|
761 | |
---|
762 | clockmode = 0; /* nonclocklike branch lengths */ |
---|
763 | |
---|
764 | /* least square estimate for branch length */ |
---|
765 | changedistan(Distanmat, Distanvec, Numspc); |
---|
766 | lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); |
---|
767 | |
---|
768 | /* compute likelihood */ |
---|
769 | Ctree->lklhd = treelkl(Ctree); |
---|
770 | |
---|
771 | return Ctree->lklhd; |
---|
772 | } |
---|
773 | |
---|
774 | |
---|
775 | /* preparation for ML analysis */ |
---|
776 | void mlstart() |
---|
777 | { |
---|
778 | /* number of states and code length */ |
---|
779 | tpmradix = gettpmradix(); |
---|
780 | |
---|
781 | /* declare variables */ |
---|
782 | Eval = new_dvector(tpmradix); |
---|
783 | Evec = new_dmatrix(tpmradix,tpmradix); |
---|
784 | Ievc = new_dmatrix(tpmradix,tpmradix); |
---|
785 | iexp = new_dmatrix(tpmradix,tpmradix); |
---|
786 | Alias = new_ivector(Maxsite); |
---|
787 | |
---|
788 | /* process sequence information */ |
---|
789 | evaluateseqs(); |
---|
790 | bestrate = new_ivector(Numptrn); |
---|
791 | |
---|
792 | /* compute transition probability matrix */ |
---|
793 | tranprobmat(); |
---|
794 | |
---|
795 | /* non-zero rate categories */ |
---|
796 | Rates = new_dvector(numcats); |
---|
797 | updaterates(); |
---|
798 | ltprobr = new_dcube(numcats, tpmradix,tpmradix); |
---|
799 | |
---|
800 | /* compute distance matrix */ |
---|
801 | Distanmat = new_dmatrix(Maxspc, Maxspc); |
---|
802 | initdistan(); |
---|
803 | |
---|
804 | /* initialize tree pointer for quartet tree */ |
---|
805 | mlmode = 1; |
---|
806 | Ctree = new_quartet(Numptrn, Seqpat); |
---|
807 | Numbrnch = NUMQBRNCH; |
---|
808 | Numibrnch = NUMQIBRNCH; |
---|
809 | Numspc = NUMQSPC; |
---|
810 | |
---|
811 | /* computing ML distances */ |
---|
812 | computedistan(); |
---|
813 | } |
---|
814 | |
---|
815 | |
---|
816 | /* recompute ml distances for quartet only */ |
---|
817 | void distupdate(int a, int b, int c, int d) |
---|
818 | { |
---|
819 | /* update distance matrix */ |
---|
820 | /* consider only entries relevant to quartet */ |
---|
821 | Distanmat[a][b] = mldistance(a, b); |
---|
822 | Distanmat[b][a] = Distanmat[a][b]; |
---|
823 | Distanmat[a][c] = mldistance(a, c); |
---|
824 | Distanmat[c][a] = Distanmat[a][c]; |
---|
825 | Distanmat[a][d] = mldistance(a, d); |
---|
826 | Distanmat[d][a] = Distanmat[a][d]; |
---|
827 | Distanmat[b][c] = mldistance(b, c); |
---|
828 | Distanmat[c][b] = Distanmat[b][c]; |
---|
829 | Distanmat[b][d] = mldistance(b, d); |
---|
830 | Distanmat[d][b] = Distanmat[b][d]; |
---|
831 | Distanmat[c][d] = mldistance(c, d); |
---|
832 | Distanmat[d][c] = Distanmat[c][d]; |
---|
833 | } |
---|
834 | |
---|
835 | |
---|
836 | /* cleanup after ML analysis */ |
---|
837 | void mlfinish() |
---|
838 | { |
---|
839 | if (Ctree != NULL) |
---|
840 | free_tree(Ctree, Numspc); |
---|
841 | free_ivector(bestrate); |
---|
842 | free_ivector(Alias); |
---|
843 | free_cmatrix(Seqpat); |
---|
844 | free_ivector(constpat); |
---|
845 | free_ivector(Weight); |
---|
846 | free_dmatrix(Distanmat); |
---|
847 | free_dvector(Eval); |
---|
848 | free_dmatrix(Evec); |
---|
849 | free_dmatrix(Ievc); |
---|
850 | free_dvector(Rates); |
---|
851 | free_dcube(ltprobr); |
---|
852 | free_dmatrix(iexp); |
---|
853 | } |
---|
854 | |
---|
855 | |
---|
856 | /******************************************************************************/ |
---|
857 | /* tree output */ |
---|
858 | /******************************************************************************/ |
---|
859 | |
---|
860 | |
---|
861 | #define MAXOVER 50 |
---|
862 | #define MAXLENG 30 |
---|
863 | #define MAXCOLUMN 80 |
---|
864 | |
---|
865 | |
---|
866 | void prbranch(Node *up, int depth, int m, int maxm, |
---|
867 | ivector umbrella, ivector column, FILE *outfp) |
---|
868 | { |
---|
869 | int i, num, n, maxn, lim; |
---|
870 | Node *cp; |
---|
871 | char bch; |
---|
872 | |
---|
873 | if ((int)((clockmode ? up->lengthc : up->length) * Proportion) >= MAXOVER) { |
---|
874 | column[depth] = MAXLENG; |
---|
875 | bch = '+'; |
---|
876 | } else { |
---|
877 | column[depth] = (int)((clockmode ? up->lengthc : up->length) * Proportion) + 3; |
---|
878 | bch = '-'; |
---|
879 | } |
---|
880 | |
---|
881 | if (up->isop == NULL) { /* external branch */ |
---|
882 | num = up->number + 1; /* offset */ |
---|
883 | if (m == 1) umbrella[depth - 1] = TRUE; |
---|
884 | for (i = 0; i < depth; i++) { |
---|
885 | if (umbrella[i]) |
---|
886 | fprintf(outfp, "%*c", column[i], ':'); |
---|
887 | else |
---|
888 | fprintf(outfp, "%*c", column[i], ' '); |
---|
889 | } |
---|
890 | if (m == maxm) |
---|
891 | umbrella[depth - 1] = FALSE; |
---|
892 | for (i = 0, lim = column[depth] - 3; i < lim; i++) |
---|
893 | fputc(bch, outfp); |
---|
894 | fprintf(outfp, "-%d ", num); |
---|
895 | |
---|
896 | fputid(outfp, up->number); |
---|
897 | |
---|
898 | |
---|
899 | fputc('\n', outfp); |
---|
900 | fputc(' ', outfp); |
---|
901 | return; |
---|
902 | } |
---|
903 | |
---|
904 | num = up->number + 1 + Numspc; /* offset, internal branch */ |
---|
905 | for (cp = up->isop, maxn = 0; cp != up; cp = cp->isop, maxn++) |
---|
906 | ; |
---|
907 | for (cp = up->isop, n = 1; cp != up; cp = cp->isop, n++) { |
---|
908 | prbranch(cp->kinp, depth + 1, n, maxn, umbrella, column, outfp); |
---|
909 | if (m == 1 && n == maxn / 2) umbrella[depth - 1] = TRUE; |
---|
910 | if (n != maxn) { |
---|
911 | for (i = 0; i < depth; i++) { |
---|
912 | if (umbrella[i]) |
---|
913 | fprintf(outfp, "%*c", column[i], ':'); |
---|
914 | else |
---|
915 | fprintf(outfp, "%*c", column[i], ' '); |
---|
916 | } |
---|
917 | if (n == maxn / 2) { /* internal branch */ |
---|
918 | for (i = 0, lim = column[depth] - 3; i < lim; i++) |
---|
919 | fputc(bch, outfp); |
---|
920 | if (num < 10) |
---|
921 | fprintf(outfp, "--%d", num); |
---|
922 | else if (num < 100) |
---|
923 | fprintf(outfp, "-%2d", num); |
---|
924 | else |
---|
925 | fprintf(outfp, "%3d", num); |
---|
926 | } else { |
---|
927 | if (umbrella[depth]) |
---|
928 | fprintf(outfp, "%*c", column[depth], ':'); |
---|
929 | else |
---|
930 | fprintf(outfp, "%*c", column[depth], ' '); |
---|
931 | } |
---|
932 | fputc('\n', outfp); |
---|
933 | fputc(' ', outfp); |
---|
934 | } |
---|
935 | if (m == maxm) umbrella[depth - 1] = FALSE; |
---|
936 | } |
---|
937 | return; |
---|
938 | } |
---|
939 | |
---|
940 | |
---|
941 | void getproportion(double *proportion, dvector distanvec, int numspc) |
---|
942 | { |
---|
943 | int i, maxpair; |
---|
944 | double maxdis; |
---|
945 | |
---|
946 | maxpair = (numspc*(numspc-1))/2; |
---|
947 | |
---|
948 | maxdis = 0.0; |
---|
949 | for (i = 0; i < maxpair; i++) { |
---|
950 | if (distanvec[i] > maxdis) { |
---|
951 | maxdis = distanvec[i]; |
---|
952 | } |
---|
953 | } |
---|
954 | *proportion = (double) MAXCOLUMN / (maxdis * 3.0); |
---|
955 | if (*proportion > 1.0) *proportion = 1.0; |
---|
956 | } |
---|
957 | |
---|
958 | |
---|
959 | void prtopology(FILE *outfp) |
---|
960 | { |
---|
961 | int n, maxn, depth; |
---|
962 | ivector umbrella; |
---|
963 | ivector column; |
---|
964 | Node *cp, *rp; |
---|
965 | |
---|
966 | getproportion(&Proportion, Distanvec, Numspc); |
---|
967 | |
---|
968 | umbrella = new_ivector(Numspc); |
---|
969 | column = new_ivector(Numspc); |
---|
970 | |
---|
971 | for (n = 0; n < Numspc; n++) { |
---|
972 | umbrella[n] = FALSE; |
---|
973 | column[n] = 3; |
---|
974 | } |
---|
975 | column[0] = 1; |
---|
976 | |
---|
977 | fputc(' ', outfp); |
---|
978 | |
---|
979 | /* original code: rp = Ctree->rootp */ |
---|
980 | /* but we want to print the first group in the |
---|
981 | trichotomy as outgroup at the bottom! */ |
---|
982 | rp = Ctree->rootp->isop; |
---|
983 | |
---|
984 | for (maxn = 1, cp = rp->isop; cp != rp; cp = cp->isop, maxn++) |
---|
985 | ; |
---|
986 | depth = 1; |
---|
987 | n = 0; |
---|
988 | |
---|
989 | cp = rp; |
---|
990 | do { |
---|
991 | cp = cp->isop; |
---|
992 | n++; |
---|
993 | prbranch(cp->kinp, depth, n, maxn, umbrella, column, outfp); |
---|
994 | if (cp != rp) fprintf(outfp, "%*c\n ", column[0], ':'); |
---|
995 | } while (cp != rp); |
---|
996 | |
---|
997 | free_ivector(umbrella); |
---|
998 | free_ivector(column); |
---|
999 | } |
---|
1000 | |
---|
1001 | |
---|
1002 | /* print unrooted tree file with branch lengths */ |
---|
1003 | void fputphylogeny(FILE *fp) |
---|
1004 | { |
---|
1005 | Node *cp, *rp; |
---|
1006 | int n; |
---|
1007 | |
---|
1008 | cp = rp = Ctree->rootp; |
---|
1009 | putc('(', fp); |
---|
1010 | n = 1; |
---|
1011 | do { |
---|
1012 | cp = cp->isop->kinp; |
---|
1013 | if (cp->isop == NULL) { /* external node */ |
---|
1014 | if (n > 60) { |
---|
1015 | fprintf(fp, "\n"); |
---|
1016 | n = 2; |
---|
1017 | } |
---|
1018 | n += fputid(fp, cp->number); |
---|
1019 | fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01); |
---|
1020 | n += 7; |
---|
1021 | cp = cp->kinp; |
---|
1022 | } else { /* internal node */ |
---|
1023 | if (cp->descen) { |
---|
1024 | if (n > 60) { |
---|
1025 | fprintf(fp, "\n"); |
---|
1026 | n = 1; |
---|
1027 | } |
---|
1028 | putc('(', fp); |
---|
1029 | n++; |
---|
1030 | } else { |
---|
1031 | putc(')', fp); |
---|
1032 | n++; |
---|
1033 | if (n > 60) { |
---|
1034 | fprintf(fp, "\n"); |
---|
1035 | n = 1; |
---|
1036 | } |
---|
1037 | /* internal label */ |
---|
1038 | if (cp->kinp->label != NULL) { |
---|
1039 | fprintf(fp, "%s", cp->kinp->label); |
---|
1040 | n += strlen(cp->kinp->label); |
---|
1041 | } |
---|
1042 | fprintf(fp, ":%.5f", ((clockmode ? cp->lengthc : cp->length))*0.01); |
---|
1043 | n += 7; |
---|
1044 | } |
---|
1045 | } |
---|
1046 | if (!cp->descen && !cp->isop->descen && cp != rp) { |
---|
1047 | putc(',', fp); /* not last subtree */ |
---|
1048 | n++; |
---|
1049 | } |
---|
1050 | } while (cp != rp); |
---|
1051 | fprintf(fp, ")"); |
---|
1052 | /* internal label */ |
---|
1053 | if (cp->label != NULL) |
---|
1054 | fprintf(fp, "%s", cp->label); |
---|
1055 | fprintf(fp, ";\n"); |
---|
1056 | } |
---|
1057 | |
---|
1058 | |
---|
1059 | void resulttree(FILE *outfp) |
---|
1060 | { |
---|
1061 | int n, ne, closeflag; |
---|
1062 | Node *ep, *ip; |
---|
1063 | double blen; |
---|
1064 | |
---|
1065 | closeflag = FALSE; |
---|
1066 | |
---|
1067 | if (clockmode) { |
---|
1068 | fprintf(outfp, "\n branch length nc/c"); |
---|
1069 | fprintf(outfp, " branch length nc/c (= non-clock/clock)\n"); |
---|
1070 | } else { |
---|
1071 | fprintf(outfp, "\n branch length S.E."); |
---|
1072 | fprintf(outfp, " branch length S.E.\n"); |
---|
1073 | } |
---|
1074 | for (n = 0; n < Numspc; n++) { |
---|
1075 | ep = Ctree->ebrnchp[n]; |
---|
1076 | ne = ep->number; |
---|
1077 | fputid10(outfp, ne); |
---|
1078 | fputs(" ", outfp); |
---|
1079 | fprintf(outfp, "%3d", ne + 1); |
---|
1080 | blen = (clockmode ? ep->lengthc : ep->length); |
---|
1081 | fprintf(outfp, "%9.5f", blen*0.01); |
---|
1082 | if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE; |
---|
1083 | if (clockmode) |
---|
1084 | fprintf(outfp, "%9.3f", (ep->length)/(ep->lengthc)); |
---|
1085 | else |
---|
1086 | fprintf(outfp, "%9.5f", 0.01*sqrt(ep->kinp->varlen)); |
---|
1087 | if (n < Numibrnch) { |
---|
1088 | ip = Ctree->ibrnchp[n]; |
---|
1089 | fprintf(outfp, "%8d", n + 1 + Numspc); |
---|
1090 | blen = (clockmode ? ip->lengthc : ip->length); |
---|
1091 | fprintf(outfp, "%9.5f", blen*0.01); |
---|
1092 | if (blen < 5.0*MINARC || blen > 0.95*MAXARC) closeflag = TRUE; |
---|
1093 | if (clockmode) |
---|
1094 | fprintf(outfp, "%9.3f", (ip->length)/(ip->lengthc)); |
---|
1095 | else |
---|
1096 | fprintf(outfp, "%9.5f", 0.01*sqrt(ip->kinp->varlen)); |
---|
1097 | fputc('\n', outfp); |
---|
1098 | } else { |
---|
1099 | if (n == Numspc - 3) { |
---|
1100 | fputc('\n', outfp); |
---|
1101 | } else if (n == Numspc - 2) { |
---|
1102 | if (clockmode) { |
---|
1103 | if (!Convergc) |
---|
1104 | fprintf(outfp, " No convergence after %d iterations!\n", Numitc); |
---|
1105 | else |
---|
1106 | fprintf(outfp, " %d iterations until convergence\n", Numitc); |
---|
1107 | } else { |
---|
1108 | if (!Converg) |
---|
1109 | fprintf(outfp, " No convergence after %d iterations!\n", Numit); |
---|
1110 | else |
---|
1111 | fprintf(outfp, " %d iterations until convergence\n", Numit); |
---|
1112 | } |
---|
1113 | } else if (n == Numspc - 1) { |
---|
1114 | fprintf(outfp, " log L: %.2f\n", (clockmode ? Ctree->lklhdc : Ctree->lklhd)); |
---|
1115 | } else { |
---|
1116 | fputc('\n', outfp); |
---|
1117 | } |
---|
1118 | } |
---|
1119 | } |
---|
1120 | if(closeflag) |
---|
1121 | fprintf(outfp, "\nWARNING --- at least one branch length is close to an internal boundary!\n"); |
---|
1122 | } |
---|
1123 | |
---|
1124 | |
---|
1125 | /******************************************************************************/ |
---|
1126 | /* Neighbor-joining tree */ |
---|
1127 | /******************************************************************************/ |
---|
1128 | |
---|
1129 | |
---|
1130 | /* compute NJ tree and write to file */ |
---|
1131 | void njtree(FILE *fp) |
---|
1132 | { |
---|
1133 | /* reserve memory for tree if necessary */ |
---|
1134 | if (mlmode != 3) { /* no tree */ |
---|
1135 | if (Ctree != NULL) |
---|
1136 | free_tree(Ctree, Numspc); |
---|
1137 | Ctree = new_tree(Maxspc, Numptrn, Seqpat); |
---|
1138 | Numbrnch = 2*Maxspc-3; |
---|
1139 | Numibrnch = Maxspc-3; |
---|
1140 | Numspc = Maxspc; |
---|
1141 | mlmode = 3; |
---|
1142 | } |
---|
1143 | |
---|
1144 | /* construct NJ tree from distance matrix */ |
---|
1145 | njdistantree(Ctree); |
---|
1146 | |
---|
1147 | fputphylogeny(fp); |
---|
1148 | } |
---|
1149 | |
---|
1150 | |
---|
1151 | /* construct NJ tree from distance matrix */ |
---|
1152 | void njdistantree(Tree *tr) |
---|
1153 | { |
---|
1154 | int i, j, otui=0, otuj=0, otuk, nsp2, cinode, step, restsp, k; |
---|
1155 | double dij, bix, bjx, bkx, sij, smax, dnsp2; |
---|
1156 | dvector r; |
---|
1157 | dmatrix distan; |
---|
1158 | Node **psotu, *cp, *ip, *jp, *kp; |
---|
1159 | |
---|
1160 | distan = new_dmatrix(Maxspc,Maxspc); |
---|
1161 | for (i = 0; i < Maxspc; i++) |
---|
1162 | for (j = 0; j < Maxspc; j++) |
---|
1163 | distan[i][j] = Distanmat[i][j]; |
---|
1164 | |
---|
1165 | nsp2 = Maxspc - 2; |
---|
1166 | dnsp2 = 1.0 / nsp2; |
---|
1167 | |
---|
1168 | r = new_dvector(Maxspc); |
---|
1169 | |
---|
1170 | psotu = (Node **) malloc((unsigned)Maxspc * sizeof(Node *)); |
---|
1171 | if (psotu == NULL) maerror("psotu in njdistantree"); |
---|
1172 | |
---|
1173 | /* external branches are start OTUs */ |
---|
1174 | for (i = 0; i < Maxspc; i++) |
---|
1175 | psotu[i] = tr->ebrnchp[i]->kinp; |
---|
1176 | |
---|
1177 | restsp = Maxspc; |
---|
1178 | cinode = 0; /* counter for internal nodes */ |
---|
1179 | |
---|
1180 | for (step = 0; restsp > 3; step++) { /* NJ clustering steps */ |
---|
1181 | |
---|
1182 | for (i = 0; i < Maxspc; i++) { |
---|
1183 | if (psotu[i] != NULL) { |
---|
1184 | for (j = 0, r[i] = 0.0; j < Maxspc; j++) |
---|
1185 | if (psotu[j] != NULL) |
---|
1186 | r[i] += distan[i][j]; |
---|
1187 | } |
---|
1188 | } |
---|
1189 | |
---|
1190 | smax = -1.0; |
---|
1191 | for (i = 0; i < Maxspc-1; i++) { |
---|
1192 | if (psotu[i] != NULL) { |
---|
1193 | |
---|
1194 | for (j = i+1; j < Maxspc; j++) { |
---|
1195 | if (psotu[j] != NULL) |
---|
1196 | { |
---|
1197 | sij = ( r[i] + r[j] ) * dnsp2 - distan[i][j]; |
---|
1198 | |
---|
1199 | if (sij > smax) { |
---|
1200 | smax = sij; |
---|
1201 | otui = i; |
---|
1202 | otuj = j; |
---|
1203 | } |
---|
1204 | } |
---|
1205 | } |
---|
1206 | } |
---|
1207 | } |
---|
1208 | |
---|
1209 | /* new pair: otui and otuj */ |
---|
1210 | |
---|
1211 | dij = distan[otui][otuj]; |
---|
1212 | bix = (dij + r[otui]/nsp2 - r[otuj]/nsp2) * 0.5; |
---|
1213 | bjx = dij - bix; |
---|
1214 | |
---|
1215 | cp = tr->ibrnchp[cinode]; |
---|
1216 | |
---|
1217 | ip = psotu[otui]; |
---|
1218 | jp = psotu[otuj]; |
---|
1219 | cp->isop = ip; |
---|
1220 | ip->isop = jp; |
---|
1221 | jp->isop = cp; |
---|
1222 | ip->length = bix; |
---|
1223 | jp->length = bjx; |
---|
1224 | ip->kinp->length = ip->length; |
---|
1225 | jp->kinp->length = jp->length; |
---|
1226 | |
---|
1227 | cp = cp->kinp; |
---|
1228 | |
---|
1229 | for (k = 0; k < Maxspc; k++) |
---|
1230 | { |
---|
1231 | if (psotu[k] != NULL && k != otui && k != otuj) |
---|
1232 | { |
---|
1233 | dij = (distan[otui][k] + distan[otuj][k] - distan[otui][otuj]) * 0.5; |
---|
1234 | distan[otui][k] = dij; |
---|
1235 | distan[k][otui] = dij; |
---|
1236 | } |
---|
1237 | } |
---|
1238 | distan[otui][otui] = 0.0; |
---|
1239 | |
---|
1240 | psotu[otui] = cp; |
---|
1241 | psotu[otuj] = NULL; |
---|
1242 | |
---|
1243 | cinode++; |
---|
1244 | |
---|
1245 | restsp--; |
---|
1246 | nsp2--; |
---|
1247 | dnsp2 = 1.0 / nsp2; |
---|
1248 | } |
---|
1249 | |
---|
1250 | otui = otuj = otuk = -1; |
---|
1251 | for (i = 0; i < Maxspc; i++) |
---|
1252 | { |
---|
1253 | if (psotu[i] != NULL) { |
---|
1254 | if (otui == -1) otui = i; |
---|
1255 | else if (otuj == -1) otuj = i; |
---|
1256 | else otuk = i; |
---|
1257 | } |
---|
1258 | } |
---|
1259 | bix = (distan[otui][otuj] + distan[otui][otuk] - distan[otuj][otuk]) * 0.5; |
---|
1260 | bjx = distan[otui][otuj] - bix; |
---|
1261 | bkx = distan[otui][otuk] - bix; |
---|
1262 | ip = psotu[otui]; |
---|
1263 | jp = psotu[otuj]; |
---|
1264 | kp = psotu[otuk]; |
---|
1265 | ip->isop = jp; |
---|
1266 | jp->isop = kp; |
---|
1267 | kp->isop = ip; |
---|
1268 | ip->length = bix; |
---|
1269 | jp->length = bjx; |
---|
1270 | kp->length = bkx; |
---|
1271 | ip->kinp->length = ip->length; |
---|
1272 | jp->kinp->length = jp->length; |
---|
1273 | kp->kinp->length = kp->length; |
---|
1274 | |
---|
1275 | tr->rootp = kp; |
---|
1276 | |
---|
1277 | free_dvector(r); |
---|
1278 | free_dmatrix(distan); |
---|
1279 | free((Node *) psotu); |
---|
1280 | } |
---|
1281 | |
---|
1282 | /******************************************************************************/ |
---|
1283 | /* find best assignment of rate categories */ |
---|
1284 | /******************************************************************************/ |
---|
1285 | |
---|
1286 | /* find best assignment of rate categories */ |
---|
1287 | void findbestratecombination() |
---|
1288 | { |
---|
1289 | int k, u; |
---|
1290 | double bestvalue, fv2; |
---|
1291 | dvector catprob; |
---|
1292 | dmatrix cdl; |
---|
1293 | |
---|
1294 | cdl = Ctree->condlkl; |
---|
1295 | catprob = new_dvector(numcats+1); |
---|
1296 | fv2 = (1.0-fracinv)/(double) numcats; |
---|
1297 | |
---|
1298 | for (k = 0; k < Numptrn; k++) { |
---|
1299 | /* zero rate */ |
---|
1300 | if (constpat[k] == TRUE) |
---|
1301 | catprob[0] = fracinv*Freqtpm[(int) Seqpat[0][k]]; |
---|
1302 | else |
---|
1303 | catprob[0] = 0.0; |
---|
1304 | /* non-zero-rates */ |
---|
1305 | for (u = 1; u < numcats+1; u++) |
---|
1306 | catprob[u] = fv2*cdl[u-1][k]; |
---|
1307 | /* find best */ |
---|
1308 | bestvalue = catprob[0]; |
---|
1309 | bestrate[k] = 0; |
---|
1310 | for (u = 1; u < numcats+1; u++) |
---|
1311 | if (catprob[u] >= bestvalue) { |
---|
1312 | bestvalue = catprob[u]; |
---|
1313 | bestrate[k] = u; |
---|
1314 | } |
---|
1315 | } |
---|
1316 | free_dvector(catprob); |
---|
1317 | bestratefound = 1; |
---|
1318 | } |
---|
1319 | |
---|
1320 | /* print best assignment of rate categories */ |
---|
1321 | void printbestratecombination(FILE *fp) |
---|
1322 | { |
---|
1323 | int s, k; |
---|
1324 | |
---|
1325 | for (s = 0; s < Maxsite; s++) { |
---|
1326 | k = Alias[s]; |
---|
1327 | fprintf(fp, "%2d", bestrate[k]); |
---|
1328 | if ((s+1) % 30 == 0) |
---|
1329 | fprintf(fp, "\n"); |
---|
1330 | else if ((s+1) % 10 == 0) |
---|
1331 | fprintf(fp, " "); |
---|
1332 | } |
---|
1333 | if (s % 70 != 0) |
---|
1334 | fprintf(fp, "\n"); |
---|
1335 | } |
---|
1336 | |
---|
1337 | |
---|
1338 | /******************************************************************************/ |
---|
1339 | /* computation of clocklike branch lengths */ |
---|
1340 | /******************************************************************************/ |
---|
1341 | |
---|
1342 | /* checks wether e is a valid edge specification */ |
---|
1343 | int checkedge(int e) |
---|
1344 | { |
---|
1345 | /* there are Numspc external branches: |
---|
1346 | 0 - Numspc-1 |
---|
1347 | there are Numibrnch internal branches: |
---|
1348 | Numspc - Numspc+Numibrnch-1 |
---|
1349 | */ |
---|
1350 | |
---|
1351 | if (e < 0) return FALSE; |
---|
1352 | if (e < Numspc+Numibrnch) return TRUE; |
---|
1353 | else return FALSE; |
---|
1354 | } |
---|
1355 | |
---|
1356 | /* print topology of subtree */ |
---|
1357 | void fputsubstree(FILE *fp, Node *ip) |
---|
1358 | { |
---|
1359 | Node *cp; |
---|
1360 | |
---|
1361 | if (ip->isop == NULL) { /* terminal nodes */ |
---|
1362 | numtc += fputid(fp, ip->number); |
---|
1363 | } else { |
---|
1364 | cp = ip; |
---|
1365 | fprintf(fp, "("); |
---|
1366 | numtc += 1; |
---|
1367 | do { |
---|
1368 | cp = cp->isop->kinp; |
---|
1369 | if (cp->isop == NULL) { /* external node */ |
---|
1370 | numtc += fputid(fp, cp->number); |
---|
1371 | fprintf(fp, ":%.5f", (cp->lengthc)*0.01); |
---|
1372 | numtc += 7; |
---|
1373 | cp = cp->kinp; |
---|
1374 | } else { /* internal node */ |
---|
1375 | if (cp->height > 0.0) { |
---|
1376 | fprintf(fp, "("); |
---|
1377 | numtc += 1; |
---|
1378 | } else if (cp->height < 0.0) { |
---|
1379 | fprintf(fp, ")"); |
---|
1380 | numtc += 1; |
---|
1381 | if (numtc > 60) { |
---|
1382 | fprintf(fp, "\n"); |
---|
1383 | numtc = 1; |
---|
1384 | } |
---|
1385 | /* internal label */ |
---|
1386 | if (cp->kinp->label != NULL) { |
---|
1387 | fprintf(fp, "%s", cp->kinp->label); |
---|
1388 | numtc += strlen(cp->kinp->label); |
---|
1389 | } |
---|
1390 | if (numtc > 60) { |
---|
1391 | fprintf(fp, "\n"); |
---|
1392 | numtc = 1; |
---|
1393 | } |
---|
1394 | fprintf(fp, ":%.5f", (cp->lengthc)*0.01); |
---|
1395 | numtc += 6; |
---|
1396 | if (numtc > 60) { |
---|
1397 | fprintf(fp, "\n"); |
---|
1398 | numtc = 1; |
---|
1399 | } |
---|
1400 | } |
---|
1401 | } |
---|
1402 | if (cp->height <= 0.0 && cp->isop->height <= 0.0 && |
---|
1403 | cp->isop != ip) { |
---|
1404 | putc(',', fp); /* not last subtree */ |
---|
1405 | numtc += 1; |
---|
1406 | if (numtc > 60) { |
---|
1407 | fprintf(fp, "\n"); |
---|
1408 | numtc = 1; |
---|
1409 | } |
---|
1410 | } |
---|
1411 | } while (cp->isop != ip); |
---|
1412 | fprintf(fp, ")"); |
---|
1413 | numtc += 1; |
---|
1414 | } |
---|
1415 | if (numtc > 60) { |
---|
1416 | fprintf(fp, "\n"); |
---|
1417 | numtc = 1; |
---|
1418 | } |
---|
1419 | |
---|
1420 | } |
---|
1421 | |
---|
1422 | /* print rooted tree file */ |
---|
1423 | void fputrooted(FILE *fp, int e) |
---|
1424 | { |
---|
1425 | Node *rootbr; |
---|
1426 | |
---|
1427 | /* to be called only after clocklike branch |
---|
1428 | lengths have been computed */ |
---|
1429 | |
---|
1430 | /* pointer to root branch */ |
---|
1431 | if (e < Numspc) rootbr = Ctree->ebrnchp[e]; |
---|
1432 | else rootbr = Ctree->ibrnchp[e - Numspc]; |
---|
1433 | |
---|
1434 | fprintf(fp, "("); |
---|
1435 | numtc = 2; |
---|
1436 | fputsubstree(fp, rootbr); |
---|
1437 | /* internal label */ |
---|
1438 | if (rootbr->label != NULL) { |
---|
1439 | fprintf(fp, "%s", rootbr->label); |
---|
1440 | numtc += strlen(rootbr->label); |
---|
1441 | } |
---|
1442 | if (numtc > 60) { |
---|
1443 | fprintf(fp, "\n"); |
---|
1444 | numtc = 1; |
---|
1445 | } |
---|
1446 | fprintf(fp, ":%.5f,", (hroot - rootbr->height)*0.01); |
---|
1447 | numtc += 7; |
---|
1448 | if (numtc > 60) { |
---|
1449 | fprintf(fp, "\n"); |
---|
1450 | numtc = 1; |
---|
1451 | } |
---|
1452 | fputsubstree(fp, rootbr->kinp); |
---|
1453 | /* internal label */ |
---|
1454 | if (rootbr->kinp->label != NULL) { |
---|
1455 | fprintf(fp, "%s", rootbr->kinp->label); |
---|
1456 | numtc += strlen(rootbr->kinp->label); |
---|
1457 | } |
---|
1458 | if (numtc > 60) { |
---|
1459 | fprintf(fp, "\n"); |
---|
1460 | numtc = 1; |
---|
1461 | } |
---|
1462 | fprintf(fp, ":%.5f);\n", (hroot - rootbr->kinp->height)*0.01); |
---|
1463 | } |
---|
1464 | |
---|
1465 | /* finds heights in subtree */ |
---|
1466 | void findheights(Node *ip) |
---|
1467 | { |
---|
1468 | Node *cp, *rp; |
---|
1469 | |
---|
1470 | if (ip->isop != NULL) { /* forget terminal nodes */ |
---|
1471 | |
---|
1472 | cp = ip; |
---|
1473 | |
---|
1474 | /* initialise node */ |
---|
1475 | cp->height = 1.0; /* up */ |
---|
1476 | rp = cp; |
---|
1477 | while (rp->isop != cp) { |
---|
1478 | rp = rp->isop; |
---|
1479 | rp->height = -1.0; /* down */ |
---|
1480 | } |
---|
1481 | |
---|
1482 | do { |
---|
1483 | cp = cp->isop->kinp; |
---|
1484 | if (cp->isop == NULL) { /* external node */ |
---|
1485 | cp = cp->kinp; |
---|
1486 | } else { /* internal node */ |
---|
1487 | if (cp->height == 0.0) { /* node not yet visited */ |
---|
1488 | cp->height = 1.0; /* up */ |
---|
1489 | rp = cp; |
---|
1490 | while (rp->isop != cp) { |
---|
1491 | rp = rp->isop; |
---|
1492 | rp->height = -1.0; /* down */ |
---|
1493 | } |
---|
1494 | } else if (cp->kinp->height == 1.0) { |
---|
1495 | /* cp->kinp is next height pointer */ |
---|
1496 | heights[Numhts] = cp->kinp; |
---|
1497 | Numhts++; |
---|
1498 | } |
---|
1499 | } |
---|
1500 | } while (cp->isop != ip); |
---|
1501 | /* ip is last height pointer */ |
---|
1502 | heights[Numhts] = ip; |
---|
1503 | Numhts++; |
---|
1504 | } |
---|
1505 | } |
---|
1506 | |
---|
1507 | |
---|
1508 | /* initialise clocklike branch lengths (with root on edge e) */ |
---|
1509 | void initclock(int e) |
---|
1510 | { |
---|
1511 | int n, h, count; |
---|
1512 | Node *cp, *rp; |
---|
1513 | double sum, minh, aveh, len; |
---|
1514 | |
---|
1515 | /* be sure to have a Ctree in memory and |
---|
1516 | to have pairwise distances computed */ |
---|
1517 | |
---|
1518 | clockmode = 1; /* clocklike branch lengths */ |
---|
1519 | |
---|
1520 | /* least square estimate for branch length */ |
---|
1521 | changedistan(Distanmat, Distanvec, Numspc); |
---|
1522 | lslength(Ctree, Distanvec, Numspc, Numibrnch, Brnlength); |
---|
1523 | |
---|
1524 | /* pointer to root branch */ |
---|
1525 | if (e < Numspc) rootbr = Ctree->ebrnchp[e]; |
---|
1526 | else rootbr = Ctree->ibrnchp[e - Numspc]; |
---|
1527 | |
---|
1528 | /* clear all heights */ |
---|
1529 | for (n = 0; n < Numspc; n++) { |
---|
1530 | Ctree->ebrnchp[n]->height = 0.0; |
---|
1531 | Ctree->ebrnchp[n]->kinp->height = 0.0; |
---|
1532 | Ctree->ebrnchp[n]->varheight = 0.0; |
---|
1533 | Ctree->ebrnchp[n]->kinp->varheight = 0.0; |
---|
1534 | if (n < Numibrnch) { |
---|
1535 | Ctree->ibrnchp[n]->height = 0.0; |
---|
1536 | Ctree->ibrnchp[n]->kinp->height = 0.0; |
---|
1537 | Ctree->ibrnchp[n]->varheight = 0.0; |
---|
1538 | Ctree->ibrnchp[n]->kinp->varheight = 0.0; |
---|
1539 | } |
---|
1540 | } |
---|
1541 | |
---|
1542 | /* collect pointers to height nodes */ |
---|
1543 | Numhts = 0; |
---|
1544 | findheights(rootbr); /* one side */ |
---|
1545 | findheights(rootbr->kinp); /* other side */ |
---|
1546 | |
---|
1547 | /* assign preliminary approximate heights and |
---|
1548 | corresponding branch lengths */ |
---|
1549 | for (h = 0; h < Numhts; h++) { |
---|
1550 | |
---|
1551 | cp = rp = heights[h]; |
---|
1552 | sum = 0; |
---|
1553 | count = 0; |
---|
1554 | minh = 0.0; |
---|
1555 | while (rp->isop != cp) { |
---|
1556 | count++; |
---|
1557 | rp = rp->isop; |
---|
1558 | sum += rp->lengthc + rp->kinp->height; |
---|
1559 | if (rp->kinp->height > minh) minh = rp->kinp->height; |
---|
1560 | } |
---|
1561 | aveh = sum / (double) count; |
---|
1562 | if (aveh < minh + MINARC) aveh = minh + MINARC; |
---|
1563 | cp->height = aveh; |
---|
1564 | rp = cp; |
---|
1565 | while (rp->isop != cp) { |
---|
1566 | rp = rp->isop; |
---|
1567 | len = aveh - rp->kinp->height; |
---|
1568 | rp->kinp->lengthc = len; |
---|
1569 | rp->lengthc = len; |
---|
1570 | } |
---|
1571 | |
---|
1572 | } |
---|
1573 | if (rootbr->height > rootbr->kinp->height) minh = rootbr->height; |
---|
1574 | else minh = rootbr->kinp->height; |
---|
1575 | aveh = 0.5*(rootbr->lengthc + rootbr->height + rootbr->kinp->height); |
---|
1576 | if (aveh < minh + MINARC) aveh = minh + MINARC; |
---|
1577 | hroot = aveh; |
---|
1578 | maxhroot = RMHROOT*hroot; /* maximal possible hroot */ |
---|
1579 | len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); |
---|
1580 | rootbr->lengthc = len; |
---|
1581 | rootbr->kinp->lengthc = len; |
---|
1582 | } |
---|
1583 | |
---|
1584 | /* approximate likelihood under the constaining assumption of |
---|
1585 | clocklike branch lengths (with root on edge e) */ |
---|
1586 | double clock_alklhd(int e) |
---|
1587 | { |
---|
1588 | initclock(e); |
---|
1589 | Ctree->lklhdc = treelkl(Ctree); |
---|
1590 | |
---|
1591 | return Ctree->lklhdc; |
---|
1592 | } |
---|
1593 | |
---|
1594 | /* log-likelihood given height ht at node pointed to by chep */ |
---|
1595 | double heightlkl(double ht) |
---|
1596 | { |
---|
1597 | Node *rp; |
---|
1598 | double len; |
---|
1599 | |
---|
1600 | /* adjust branch lengths */ |
---|
1601 | chep->height = ht; |
---|
1602 | /* descendent branches */ |
---|
1603 | rp = chep; |
---|
1604 | while (rp->isop != chep) { |
---|
1605 | rp = rp->isop; |
---|
1606 | len = chep->height - rp->kinp->height; |
---|
1607 | rp->kinp->lengthc = len; |
---|
1608 | rp->lengthc = len; |
---|
1609 | } |
---|
1610 | /* upward branch */ |
---|
1611 | if (chep == rootbr || chep->kinp == rootbr) { |
---|
1612 | len = (hroot - chep->height) + (hroot - chep->kinp->height); |
---|
1613 | chep->lengthc = len; |
---|
1614 | chep->kinp->lengthc = len; |
---|
1615 | } else { |
---|
1616 | rp = chep->kinp; |
---|
1617 | while (rp->isop->height <= 0.0) |
---|
1618 | rp = rp->isop; |
---|
1619 | chep->lengthc = rp->isop->height - chep->height; |
---|
1620 | chep->kinp->lengthc = rp->isop->height - chep->height; |
---|
1621 | } |
---|
1622 | |
---|
1623 | /* compute likelihood */ |
---|
1624 | Ctree->lklhdc = treelkl(Ctree); |
---|
1625 | |
---|
1626 | return -(Ctree->lklhdc); /* we use a minimizing procedure */ |
---|
1627 | } |
---|
1628 | |
---|
1629 | /* optimize current height */ |
---|
1630 | void optheight(void) |
---|
1631 | { |
---|
1632 | double he, fx, f2x, minh, maxh, len; |
---|
1633 | Node *rp; |
---|
1634 | |
---|
1635 | /* current height */ |
---|
1636 | he = chep->height; |
---|
1637 | |
---|
1638 | /* minimum */ |
---|
1639 | minh = 0.0; |
---|
1640 | rp = chep; |
---|
1641 | while (rp->isop != chep) { |
---|
1642 | rp = rp->isop; |
---|
1643 | if (rp->kinp->height > minh) |
---|
1644 | minh = rp->kinp->height; |
---|
1645 | } |
---|
1646 | minh += MINARC; |
---|
1647 | |
---|
1648 | /* maximum */ |
---|
1649 | if (chep == rootbr || chep->kinp == rootbr) { |
---|
1650 | maxh = hroot; |
---|
1651 | } else { |
---|
1652 | rp = chep->kinp; |
---|
1653 | while (rp->isop->height <= 0.0) |
---|
1654 | rp = rp->isop; |
---|
1655 | maxh = rp->isop->height; |
---|
1656 | } |
---|
1657 | maxh -= MINARC; |
---|
1658 | |
---|
1659 | /* check borders for height */ |
---|
1660 | if (he < minh) he = minh; |
---|
1661 | if (he > maxh) he = maxh; |
---|
1662 | |
---|
1663 | /* optimization */ |
---|
1664 | if (!(he == minh && he == maxh)) |
---|
1665 | he = onedimenmin(minh, he, maxh, heightlkl, HEPSILON, &fx, &f2x); |
---|
1666 | |
---|
1667 | /* variance of height */ |
---|
1668 | f2x = fabs(f2x); |
---|
1669 | if (1.0/(maxhroot*maxhroot) < f2x) |
---|
1670 | chep->varheight = 1.0/f2x; |
---|
1671 | else |
---|
1672 | chep->varheight = maxhroot*maxhroot; |
---|
1673 | |
---|
1674 | /* adjust branch lengths */ |
---|
1675 | chep->height = he; |
---|
1676 | /* descendent branches */ |
---|
1677 | rp = chep; |
---|
1678 | while (rp->isop != chep) { |
---|
1679 | rp = rp->isop; |
---|
1680 | len = chep->height - rp->kinp->height; |
---|
1681 | rp->kinp->lengthc = len; |
---|
1682 | rp->lengthc = len; |
---|
1683 | } |
---|
1684 | /* upward branch */ |
---|
1685 | if (chep == rootbr || chep->kinp == rootbr) { |
---|
1686 | len = (hroot - chep->height) + (hroot - chep->kinp->height); |
---|
1687 | chep->lengthc = len; |
---|
1688 | chep->kinp->lengthc = len; |
---|
1689 | } else { |
---|
1690 | rp = chep->kinp; |
---|
1691 | while (rp->isop->height <= 0.0) |
---|
1692 | rp = rp->isop; |
---|
1693 | chep->lengthc = rp->isop->height - chep->height; |
---|
1694 | chep->kinp->lengthc = rp->isop->height - chep->height; |
---|
1695 | } |
---|
1696 | } |
---|
1697 | |
---|
1698 | /* log-likelihood given height ht at root */ |
---|
1699 | double rheightlkl(double ht) |
---|
1700 | { |
---|
1701 | double len; |
---|
1702 | |
---|
1703 | /* adjust branch lengths */ |
---|
1704 | hroot = ht; |
---|
1705 | len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); |
---|
1706 | rootbr->lengthc = len; |
---|
1707 | rootbr->kinp->lengthc = len; |
---|
1708 | |
---|
1709 | /* compute likelihood */ |
---|
1710 | Ctree->lklhdc = treelkl(Ctree); |
---|
1711 | |
---|
1712 | return -(Ctree->lklhdc); /* we use a minimizing procedure */ |
---|
1713 | } |
---|
1714 | |
---|
1715 | /* optimize height of root */ |
---|
1716 | void optrheight(void) |
---|
1717 | { |
---|
1718 | double he, fx, f2x, minh, len; |
---|
1719 | |
---|
1720 | /* current height */ |
---|
1721 | he = hroot; |
---|
1722 | |
---|
1723 | /* minimum */ |
---|
1724 | if (rootbr->height > rootbr->kinp->height) |
---|
1725 | minh = rootbr->height; |
---|
1726 | else |
---|
1727 | minh = rootbr->kinp->height; |
---|
1728 | minh += MINARC; |
---|
1729 | |
---|
1730 | /* check borders for height */ |
---|
1731 | if (he < minh) he = minh; |
---|
1732 | if (he > maxhroot) he = maxhroot; |
---|
1733 | |
---|
1734 | /* optimization */ |
---|
1735 | he = onedimenmin(minh, he, maxhroot, rheightlkl, HEPSILON, &fx, &f2x); |
---|
1736 | |
---|
1737 | /* variance of height of root */ |
---|
1738 | f2x = fabs(f2x); |
---|
1739 | if (1.0/(maxhroot*maxhroot) < f2x) |
---|
1740 | varhroot = 1.0/f2x; |
---|
1741 | else |
---|
1742 | varhroot = maxhroot*maxhroot; |
---|
1743 | |
---|
1744 | /* adjust branch lengths */ |
---|
1745 | hroot = he; |
---|
1746 | len = (hroot - rootbr->height) + (hroot - rootbr->kinp->height); |
---|
1747 | rootbr->lengthc = len; |
---|
1748 | rootbr->kinp->lengthc = len; |
---|
1749 | } |
---|
1750 | |
---|
1751 | /* exact likelihood under the constaining assumption of |
---|
1752 | clocklike branch lengths (with root on edge e) */ |
---|
1753 | double clock_lklhd(int e) |
---|
1754 | { |
---|
1755 | int h, nconv; |
---|
1756 | double old; |
---|
1757 | |
---|
1758 | Numitc = 0; |
---|
1759 | Convergc = FALSE; |
---|
1760 | |
---|
1761 | initclock(e); |
---|
1762 | |
---|
1763 | do { |
---|
1764 | |
---|
1765 | Numitc++; |
---|
1766 | nconv = 0; |
---|
1767 | |
---|
1768 | /* optimize height of root */ |
---|
1769 | old = hroot; |
---|
1770 | optrheight(); |
---|
1771 | if (fabs(old - hroot) < HEPSILON) nconv++; |
---|
1772 | |
---|
1773 | /* optimize height of nodes */ |
---|
1774 | for (h = Numhts-1; h >= 0; h--) { |
---|
1775 | |
---|
1776 | /* pointer chep to current height node */ |
---|
1777 | chep = heights[h]; |
---|
1778 | |
---|
1779 | /* store old value */ |
---|
1780 | old = chep->height; |
---|
1781 | |
---|
1782 | /* find better height */ |
---|
1783 | optheight(); |
---|
1784 | |
---|
1785 | /* converged ? */ |
---|
1786 | if (fabs(old - chep->height) < HEPSILON) nconv++; |
---|
1787 | } |
---|
1788 | |
---|
1789 | if (nconv == Numhts+1) Convergc = TRUE; |
---|
1790 | |
---|
1791 | } while (Numitc < MAXIT && !Convergc); |
---|
1792 | |
---|
1793 | /* compute final likelihood */ |
---|
1794 | Ctree->lklhdc = treelkl(Ctree); |
---|
1795 | |
---|
1796 | return Ctree->lklhdc; |
---|
1797 | } |
---|
1798 | |
---|
1799 | /* find out the edge containing the root */ |
---|
1800 | int findrootedge() |
---|
1801 | { |
---|
1802 | int e, ebest; |
---|
1803 | double logbest, logtest; |
---|
1804 | |
---|
1805 | /* compute the likelihood for all edges and take the edge with |
---|
1806 | best likelihood (using approximate ML) */ |
---|
1807 | |
---|
1808 | ebest = 0; |
---|
1809 | logbest = clock_alklhd(0); |
---|
1810 | numbestroot = 1; |
---|
1811 | for (e = 1; e < Numspc+Numibrnch; e++) { |
---|
1812 | logtest = clock_alklhd(e); |
---|
1813 | if (logtest > logbest) { |
---|
1814 | ebest = e; |
---|
1815 | logbest = logtest; |
---|
1816 | numbestroot = 1; |
---|
1817 | } else if (logtest == logbest) { |
---|
1818 | numbestroot++; |
---|
1819 | } |
---|
1820 | } |
---|
1821 | |
---|
1822 | return ebest; |
---|
1823 | } |
---|
1824 | |
---|
1825 | /* show heights and corresponding standard errors */ |
---|
1826 | void resultheights(FILE *fp) |
---|
1827 | { |
---|
1828 | int h, num; |
---|
1829 | Node *cp; |
---|
1830 | |
---|
1831 | fprintf(fp, " height S.E. of node common to branches\n"); |
---|
1832 | for (h = 0; h < Numhts; h++) { |
---|
1833 | fprintf(fp, "%.5f %.5f ", (heights[h]->height)*0.01, |
---|
1834 | sqrt(heights[h]->varheight)*0.01); |
---|
1835 | cp = heights[h]; |
---|
1836 | do { |
---|
1837 | num = (cp->number) + 1; |
---|
1838 | if (cp->kinp->isop != NULL) num += Numspc; /* internal branch */ |
---|
1839 | fprintf(fp, "%d ", num); |
---|
1840 | cp = cp->isop; |
---|
1841 | } while (cp != heights[h]); |
---|
1842 | fprintf(fp, "\n"); |
---|
1843 | |
---|
1844 | } |
---|
1845 | fprintf(fp, "%.5f %.5f of root at branch %d\n", |
---|
1846 | hroot*0.01, sqrt(varhroot)*0.01, locroot+1); |
---|
1847 | } |
---|
1848 | |
---|