1 | #define programName "fastDNAml" |
---|
2 | #define programVersion "1.0.4" |
---|
3 | #define programDate "July 13, 1992" |
---|
4 | |
---|
5 | /* Copyright notice from dnaml: |
---|
6 | * |
---|
7 | * version 3.3. (c) Copyright 1986, 1990 by the University of Washington |
---|
8 | * and Joseph Felsenstein. Written by Joseph Felsenstein. Permission is |
---|
9 | * granted to copy and use this program provided no fee is charged for it |
---|
10 | * and provided that this copyright notice is not removed. |
---|
11 | * |
---|
12 | * Conversion to C and changes in sequential code by Gary Olsen, 1991-1992 |
---|
13 | * |
---|
14 | * p4 version by Hideo Matsuda and Ross Overbeek, 1991-1992 |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * 1.0.1 Add ntaxa to tree comments |
---|
19 | * Set minimum branch length on reading tree |
---|
20 | * Add blanks around operators in treeString (for prolog parsing) |
---|
21 | * Add program version to treeString comments |
---|
22 | * |
---|
23 | * 1.0.2 Improved option line diagnostics |
---|
24 | * Improved auxiliary line diagnostics |
---|
25 | * Removed some trailing blanks from output |
---|
26 | * |
---|
27 | * 1.0.3 Checkpoint trees that do not need any optimization |
---|
28 | * Print restart tree likelihood before optimizing |
---|
29 | * Fix treefile option so that it really toggles |
---|
30 | * |
---|
31 | * 1.0.4 Add support for tree fact (instead of true Newick tree) in |
---|
32 | * processTreeCom, treeReadLen, str_processTreeCom and |
---|
33 | * str_treeReadLen |
---|
34 | * Use bit operations in randum |
---|
35 | * Correct error in bootstrap mask used with weighting mask |
---|
36 | */ |
---|
37 | |
---|
38 | #ifdef Master |
---|
39 | # undef Master |
---|
40 | # define Master 1 |
---|
41 | # define Slave 0 |
---|
42 | # define Sequential 0 |
---|
43 | #else |
---|
44 | # ifdef Slave |
---|
45 | # undef Slave |
---|
46 | # define Master 0 |
---|
47 | # define Slave 1 |
---|
48 | # define Sequential 0 |
---|
49 | # else |
---|
50 | # ifdef Sequential |
---|
51 | # undef Sequential |
---|
52 | # endif |
---|
53 | # define Master 0 |
---|
54 | # define Slave 0 |
---|
55 | # define Sequential 1 |
---|
56 | # endif |
---|
57 | #endif |
---|
58 | |
---|
59 | #include <stdio.h> |
---|
60 | #include <stdlib.h> |
---|
61 | #include <string.h> |
---|
62 | #include <math.h> |
---|
63 | #include "fastDNAml_1_0.h" |
---|
64 | |
---|
65 | #if Master || Slave |
---|
66 | #include "p4.h" |
---|
67 | #include "comm_link.h" |
---|
68 | #endif |
---|
69 | |
---|
70 | /* Global variables */ |
---|
71 | |
---|
72 | longer seed; /* random number seed with 6 bits per element */ |
---|
73 | |
---|
74 | xarray |
---|
75 | *usedxtip, *freextip; |
---|
76 | |
---|
77 | double |
---|
78 | rate[maxcategories+1], /* rates for categories */ |
---|
79 | ratvalue[maxpatterns], /* rates per site */ |
---|
80 | wgt_rate[maxpatterns], /* weighted rate per site */ |
---|
81 | wgt_rate2[maxpatterns]; /* weighted rate**2 per site */ |
---|
82 | |
---|
83 | double |
---|
84 | xi, xv, ttratio, /* transition/transversion info */ |
---|
85 | freqa, freqc, freqg, freqt, /* base frequencies */ |
---|
86 | freqr, freqy, invfreqr, invfreqy, |
---|
87 | freqar, freqcy, freqgr, freqty, |
---|
88 | totalwrate, /* total of weighted rate values */ |
---|
89 | fracchange; /* random matching fraquency (in a sense) */ |
---|
90 | |
---|
91 | int |
---|
92 | alias[maxsites+1], /* site corresponding to pattern */ |
---|
93 | aliasweight[maxsites+1], /* weight of given pattern */ |
---|
94 | category[maxsites+1], /* rate category of sequence positions */ |
---|
95 | catnumb[maxpatterns], /* category number by pattern number */ |
---|
96 | weight[maxsites+1]; /* weight of sequence positions */ |
---|
97 | |
---|
98 | int |
---|
99 | categs, /* number of rate categories */ |
---|
100 | endsite, /* number of unique sequence patterns */ |
---|
101 | extrainfo, /* runtime information switch */ |
---|
102 | numsp, /* number of species (same as tr->mxtips) */ |
---|
103 | nkeep, /* number of best trees to keep */ |
---|
104 | sites, /* number of input sequence positions */ |
---|
105 | trout, /* write tree to "treefile" */ |
---|
106 | weightsum; /* sum of weights of positions in analysis */ |
---|
107 | |
---|
108 | boolean |
---|
109 | anerror, /* error flag */ |
---|
110 | bootstrap, /* do a set of bootstrap column weights */ |
---|
111 | freqsfrom, /* use empirical base frequencies */ |
---|
112 | interleaved, /* input data are in interleaved format */ |
---|
113 | jumble, /* use random addition order */ |
---|
114 | outgropt, /* use user-supplied outgroup */ |
---|
115 | printdata, /* echo data to output stream */ |
---|
116 | fastadd, /* test addition sites without global smoothing */ |
---|
117 | restart, /* continue sequential addition to partial tree */ |
---|
118 | treeprint; /* print tree to output stream */ |
---|
119 | |
---|
120 | char |
---|
121 | *y[maxsp+1]; /* sequence data array */ |
---|
122 | |
---|
123 | #if Sequential /* Use standard input */ |
---|
124 | # undef DNAML_STEP |
---|
125 | # define DNAML_STEP 0 |
---|
126 | # define INFILE stdin |
---|
127 | #endif |
---|
128 | |
---|
129 | #if Master |
---|
130 | # define MAX_SEND_AHEAD 400 |
---|
131 | char *best_tr_recv = NULL; /* these are used for flow control */ |
---|
132 | double best_lk_recv; |
---|
133 | int send_ahead = 0; /* number of outstanding sends */ |
---|
134 | |
---|
135 | # ifdef DNAML_STEP |
---|
136 | # define DNAML_STEP 1 |
---|
137 | # endif |
---|
138 | # define INFILE Seqf |
---|
139 | # define OUTFILE Outf |
---|
140 | FILE *INFILE, *OUTFILE; |
---|
141 | comm_block comm_slave; |
---|
142 | #endif |
---|
143 | |
---|
144 | #if Slave |
---|
145 | # undef DNAML_STEP |
---|
146 | # define DNAML_STEP 0 |
---|
147 | # define INFILE Seqf |
---|
148 | # define OUTFILE Outf |
---|
149 | FILE *INFILE, *OUTFILE; |
---|
150 | comm_block comm_master; |
---|
151 | #endif |
---|
152 | |
---|
153 | #if DNAML_STEP |
---|
154 | int begin_step_time, end_step_time; |
---|
155 | #endif |
---|
156 | |
---|
157 | #if Debug |
---|
158 | FILE *debug; |
---|
159 | #endif |
---|
160 | |
---|
161 | #if DNAML_STEP |
---|
162 | # define REPORT_ADD_SPECS p4_send(DNAML_ADD_SPECS, DNAML_HOST_ID, NULL, 0) |
---|
163 | # define REPORT_SEND_TREE p4_send(DNAML_SEND_TREE, DNAML_HOST_ID, NULL, 0) |
---|
164 | # define REPORT_RECV_TREE p4_send(DNAML_RECV_TREE, DNAML_HOST_ID, NULL, 0) |
---|
165 | # define REPORT_STEP_TIME \ |
---|
166 | {\ |
---|
167 | char send_buf[80]; \ |
---|
168 | end_step_time = p4_clock(); \ |
---|
169 | (void) sprintf(send_buf, "%d", end_step_time-begin_step_time); \ |
---|
170 | p4_send(DNAML_STEP_TIME, DNAML_HOST_ID, send_buf,strlen(send_buf)+1); \ |
---|
171 | begin_step_time = end_step_time; \ |
---|
172 | } |
---|
173 | #else |
---|
174 | # define REPORT_ADD_SPECS |
---|
175 | # define REPORT_SEND_TREE |
---|
176 | # define REPORT_RECV_TREE |
---|
177 | # define REPORT_STEP_TIME |
---|
178 | #endif |
---|
179 | |
---|
180 | /*=======================================================================*/ |
---|
181 | /* PROGRAM */ |
---|
182 | /*=======================================================================*/ |
---|
183 | /* Best tree handling for dnaml */ |
---|
184 | /*=======================================================================*/ |
---|
185 | |
---|
186 | /* Tip value comparisons |
---|
187 | * |
---|
188 | * Use void pointers to hide type from other routines. Only tipValPtr and |
---|
189 | * cmpTipVal need to be changed to alter the nature of the values compared |
---|
190 | * (e.g., names instead of node numbers). |
---|
191 | * |
---|
192 | * cmpTipVal(tipValPtr(nodeptr p), tipValPtr(nodeptr q)) == -1, 0 or 1. |
---|
193 | * |
---|
194 | * This provides direct comparison of tip values (for example, for |
---|
195 | * definition of tr->start). |
---|
196 | */ |
---|
197 | |
---|
198 | |
---|
199 | void *tipValPtr (p) nodeptr p; { return (void *) & p->number; } |
---|
200 | |
---|
201 | |
---|
202 | int cmpTipVal (v1, v2) |
---|
203 | void *v1, *v2; |
---|
204 | { /* cmpTipVal */ |
---|
205 | int i1, i2; |
---|
206 | |
---|
207 | i1 = *((int *) v1); |
---|
208 | i2 = *((int *) v2); |
---|
209 | return (i1 < i2) ? -1 : ((i1 == i2) ? 0 : 1); |
---|
210 | } /* cmpTipVal */ |
---|
211 | |
---|
212 | |
---|
213 | /* These are the only routines that need to UNDERSTAND topologies */ |
---|
214 | |
---|
215 | |
---|
216 | topol *setupTopol (maxtips, nsites) |
---|
217 | int maxtips, nsites; |
---|
218 | { /* setupTopol */ |
---|
219 | topol *tpl; |
---|
220 | |
---|
221 | if (! (tpl = (topol *) malloc((unsigned) sizeof(topol))) || |
---|
222 | ! (tpl->links = (connptr) malloc((unsigned) ((2 * maxtips - 3) * |
---|
223 | sizeof(connect)))) || |
---|
224 | (nsites && ! (tpl->log_f |
---|
225 | = (double *) malloc((unsigned) (nsites * sizeof(double)))))) { |
---|
226 | printf("ERROR: Unable to get topology memory"); |
---|
227 | tpl = (topol *) NULL; |
---|
228 | } |
---|
229 | |
---|
230 | else { |
---|
231 | if (! nsites) tpl->log_f = (double *) NULL; |
---|
232 | tpl->likelihood = unlikely; |
---|
233 | tpl->start = (node *) NULL; |
---|
234 | tpl->nextlink = 0; |
---|
235 | tpl->ntips = 0; |
---|
236 | tpl->nextnode = 0; |
---|
237 | tpl->opt_level = 0; /* degree of branch swapping explored */ |
---|
238 | tpl->scrNum = 0; /* position in sorted list of scores */ |
---|
239 | tpl->tplNum = 0; /* position in sorted list of trees */ |
---|
240 | tpl->log_f_valid = 0; /* log_f value sites */ |
---|
241 | tpl->smoothed = FALSE; /* branch optimization converged? */ |
---|
242 | } |
---|
243 | |
---|
244 | return tpl; |
---|
245 | } /* setupTopol */ |
---|
246 | |
---|
247 | |
---|
248 | void freeTopol (tpl) |
---|
249 | topol *tpl; |
---|
250 | { /* freeTopol */ |
---|
251 | free((char *) tpl->links); |
---|
252 | if (tpl->log_f) free((char *) tpl->log_f); |
---|
253 | free((char *) tpl); |
---|
254 | } /* freeTopol */ |
---|
255 | |
---|
256 | |
---|
257 | int saveSubtree (p, tpl) |
---|
258 | nodeptr p; |
---|
259 | topol *tpl; |
---|
260 | /* Save a subtree in a standard order so that earlier branches |
---|
261 | * from a node contain lower value tips than do second branches from |
---|
262 | * the node. This code works with arbitrary furcations in the tree. |
---|
263 | */ |
---|
264 | { /* saveSubtree */ |
---|
265 | connptr r, r0; |
---|
266 | nodeptr q, s; |
---|
267 | int t, t0, t1; |
---|
268 | |
---|
269 | r0 = tpl->links; |
---|
270 | r = r0 + (tpl->nextlink)++; |
---|
271 | r->p = p; |
---|
272 | r->q = q = p->back; |
---|
273 | r->z = p->z; |
---|
274 | r->descend = 0; /* No children (yet) */ |
---|
275 | |
---|
276 | if (q->tip) { |
---|
277 | r->valptr = tipValPtr(q); /* Assign value */ |
---|
278 | } |
---|
279 | |
---|
280 | else { /* Internal node, look at children */ |
---|
281 | s = q->next; /* First child */ |
---|
282 | do { |
---|
283 | t = saveSubtree(s, tpl); /* Generate child's subtree */ |
---|
284 | |
---|
285 | t0 = 0; /* Merge child into list */ |
---|
286 | t1 = r->descend; |
---|
287 | while (t1 && (cmpTipVal(r0[t1].valptr, r0[t].valptr) < 0)) { |
---|
288 | t0 = t1; |
---|
289 | t1 = r0[t1].sibling; |
---|
290 | } |
---|
291 | if (t0) r0[t0].sibling = t; else r->descend = t; |
---|
292 | r0[t].sibling = t1; |
---|
293 | |
---|
294 | s = s->next; /* Next child */ |
---|
295 | } while (s != q); |
---|
296 | |
---|
297 | r->valptr = r0[r->descend].valptr; /* Inherit first child's value */ |
---|
298 | } /* End of internal node processing */ |
---|
299 | |
---|
300 | return r - r0; |
---|
301 | } /* saveSubtree */ |
---|
302 | |
---|
303 | |
---|
304 | nodeptr minSubtreeTip (p0) |
---|
305 | nodeptr p0; |
---|
306 | { /* minTreeTip */ |
---|
307 | nodeptr minTip, p, testTip; |
---|
308 | |
---|
309 | if (p0->tip) return p0; |
---|
310 | |
---|
311 | p = p0->next; |
---|
312 | minTip = minSubtreeTip(p->back); |
---|
313 | while ((p = p->next) != p0) { |
---|
314 | testTip = minSubtreeTip(p->back); |
---|
315 | if (cmpTipVal(tipValPtr(testTip), tipValPtr(minTip)) < 0) |
---|
316 | minTip = testTip; |
---|
317 | } |
---|
318 | return minTip; |
---|
319 | } /* minTreeTip */ |
---|
320 | |
---|
321 | |
---|
322 | nodeptr minTreeTip (p) |
---|
323 | nodeptr p; |
---|
324 | { /* minTreeTip */ |
---|
325 | nodeptr minp, minpb; |
---|
326 | |
---|
327 | minp = minSubtreeTip(p); |
---|
328 | minpb = minSubtreeTip(p->back); |
---|
329 | return cmpTipVal(tipValPtr(minp), tipValPtr(minpb)) < 0 ? minp : minpb; |
---|
330 | } /* minTreeTip */ |
---|
331 | |
---|
332 | |
---|
333 | void saveTree (tr, tpl) |
---|
334 | tree *tr; |
---|
335 | topol *tpl; |
---|
336 | /* Save a tree topology in a standard order so that first branches |
---|
337 | * from a node contain lower value tips than do second branches from |
---|
338 | * the node. The root tip should have the lowest value of all. |
---|
339 | */ |
---|
340 | { /* saveTree */ |
---|
341 | connptr r; |
---|
342 | double *tr_log_f, *tpl_log_f; |
---|
343 | int i; |
---|
344 | |
---|
345 | tpl->nextlink = 0; /* Reset link pointer */ |
---|
346 | r = tpl->links + saveSubtree(minTreeTip(tr->start), tpl); /* Save tree */ |
---|
347 | r->sibling = 0; |
---|
348 | |
---|
349 | tpl->likelihood = tr->likelihood; |
---|
350 | tpl->start = tr->start; |
---|
351 | tpl->ntips = tr->ntips; |
---|
352 | tpl->nextnode = tr->nextnode; |
---|
353 | tpl->opt_level = tr->opt_level; |
---|
354 | tpl->smoothed = tr->smoothed; |
---|
355 | |
---|
356 | if (tpl_log_f = tpl->log_f) { |
---|
357 | tr_log_f = tr->log_f; |
---|
358 | i = tpl->log_f_valid = tr->log_f_valid; |
---|
359 | while (--i >= 0) *tpl_log_f++ = *tr_log_f++; |
---|
360 | } |
---|
361 | else { |
---|
362 | tpl->log_f_valid = 0; |
---|
363 | } |
---|
364 | } /* saveTree */ |
---|
365 | |
---|
366 | |
---|
367 | void restoreTree (tpl, tr) |
---|
368 | topol *tpl; |
---|
369 | tree *tr; |
---|
370 | { /* restoreTree */ |
---|
371 | void hookup(); |
---|
372 | void initrav(); |
---|
373 | |
---|
374 | connptr r; |
---|
375 | nodeptr p, p0; |
---|
376 | double *tr_log_f, *tpl_log_f; |
---|
377 | int i; |
---|
378 | |
---|
379 | /* Clear existing connections */ |
---|
380 | |
---|
381 | for (i = 1; i <= 2*(tr->mxtips) - 2; i++) { /* Uses p = p->next at tip */ |
---|
382 | p0 = p = tr->nodep[i]; |
---|
383 | do { |
---|
384 | p->back = (nodeptr) NULL; |
---|
385 | p = p->next; |
---|
386 | } while (p != p0); |
---|
387 | } |
---|
388 | |
---|
389 | /* Copy connections from topology */ |
---|
390 | |
---|
391 | for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++) { |
---|
392 | hookup(r->p, r->q, r->z); |
---|
393 | } |
---|
394 | |
---|
395 | tr->likelihood = tpl->likelihood; |
---|
396 | tr->start = tpl->start; |
---|
397 | tr->ntips = tpl->ntips; |
---|
398 | tr->nextnode = tpl->nextnode; |
---|
399 | tr->opt_level = tpl->opt_level; |
---|
400 | tr->smoothed = tpl->smoothed; |
---|
401 | |
---|
402 | if (tpl_log_f = tpl->log_f) { |
---|
403 | tr_log_f = tr->log_f; |
---|
404 | i = tr->log_f_valid = tpl->log_f_valid; |
---|
405 | while (--i >= 0) *tr_log_f++ = *tpl_log_f++; |
---|
406 | } |
---|
407 | else { |
---|
408 | tr->log_f_valid = 0; |
---|
409 | } |
---|
410 | |
---|
411 | initrav(tr->start); |
---|
412 | initrav(tr->start->back); |
---|
413 | } /* restoreTree */ |
---|
414 | |
---|
415 | |
---|
416 | int initBestTree (bt, newKeep, nsites) |
---|
417 | bestlist *bt; |
---|
418 | int newKeep, nsites; |
---|
419 | { /* initBestTree */ |
---|
420 | int i, nlogf; |
---|
421 | |
---|
422 | if (newKeep < 1) newKeep = 1; |
---|
423 | if (newKeep > maxkeep) newKeep = maxkeep; |
---|
424 | |
---|
425 | bt->best = unlikely; |
---|
426 | bt->worst = unlikely; |
---|
427 | bt->nvalid = 0; |
---|
428 | bt->ninit = -1; |
---|
429 | bt->numtrees = 0; |
---|
430 | bt->improved = FALSE; |
---|
431 | if (! (bt->start = setupTopol(numsp, nsites))) return 0; |
---|
432 | |
---|
433 | for (i = bt->ninit + 1; i <= newKeep; i++) { |
---|
434 | nlogf = (i <= maxlogf) ? nsites : 0; |
---|
435 | if (! (bt->byScore[i] = setupTopol(numsp, nlogf))) break; |
---|
436 | bt->byTopol[i] = bt->byScore[i]; |
---|
437 | bt->ninit = i; |
---|
438 | } |
---|
439 | |
---|
440 | return (bt->nkeep = bt->ninit); |
---|
441 | } /* initBestTree */ |
---|
442 | |
---|
443 | |
---|
444 | int resetBestTree (bt) |
---|
445 | bestlist *bt; |
---|
446 | { /* resetBestTree */ |
---|
447 | bt->best = unlikely; |
---|
448 | bt->worst = unlikely; |
---|
449 | bt->nvalid = 0; |
---|
450 | bt->improved = FALSE; |
---|
451 | } /* resetBestTree */ |
---|
452 | |
---|
453 | |
---|
454 | void freeBestTree(bt) |
---|
455 | bestlist *bt; |
---|
456 | { /* freeBestTree */ |
---|
457 | while (bt->ninit >= 0) freeTopol(bt->byScore[(bt->ninit)--]); |
---|
458 | freeTopol(bt->start); |
---|
459 | } /* freeBestTree */ |
---|
460 | |
---|
461 | |
---|
462 | int saveBestTree (bt, tr) |
---|
463 | bestlist *bt; |
---|
464 | tree *tr; |
---|
465 | { /* saveBestTree */ |
---|
466 | topol *tpl; |
---|
467 | int scrNum, tplNum; |
---|
468 | |
---|
469 | if ((bt->nvalid < bt->nkeep) || (tr->likelihood > bt->worst)) { |
---|
470 | scrNum = 1; |
---|
471 | tplNum = 1; |
---|
472 | tpl = bt->byScore[scrNum]; |
---|
473 | saveTree(tr, tpl); |
---|
474 | tpl->scrNum = scrNum; |
---|
475 | tpl->tplNum = tplNum; |
---|
476 | |
---|
477 | bt->byTopol[tplNum] = tpl; |
---|
478 | bt->nvalid = 1; |
---|
479 | bt->best = tr->likelihood; |
---|
480 | bt->worst = tr->likelihood; |
---|
481 | bt->improved = TRUE; |
---|
482 | } |
---|
483 | |
---|
484 | else |
---|
485 | scrNum = 0; |
---|
486 | |
---|
487 | return scrNum; |
---|
488 | } /* saveBestTree */ |
---|
489 | |
---|
490 | |
---|
491 | int startOpt (bt, tr) |
---|
492 | bestlist *bt; |
---|
493 | tree *tr; |
---|
494 | { /* startOpt */ |
---|
495 | int scrNum; |
---|
496 | |
---|
497 | bt->nvalid = 0; |
---|
498 | scrNum = saveBestTree (bt, tr); |
---|
499 | bt->improved = FALSE; |
---|
500 | return scrNum; |
---|
501 | } /* startOpt */ |
---|
502 | |
---|
503 | |
---|
504 | int setOptLevel (bt, level) |
---|
505 | bestlist *bt; |
---|
506 | int level; |
---|
507 | { /* setOptLevel */ |
---|
508 | int scrNum; |
---|
509 | |
---|
510 | if (! bt->improved) { |
---|
511 | bt->byScore[1]->opt_level = level; |
---|
512 | scrNum = 1; |
---|
513 | } |
---|
514 | else |
---|
515 | scrNum = 0; |
---|
516 | |
---|
517 | return scrNum; |
---|
518 | } /* setOptLevel */ |
---|
519 | |
---|
520 | |
---|
521 | int recallBestTree (bt, rank, tr) |
---|
522 | bestlist *bt; |
---|
523 | int rank; |
---|
524 | tree *tr; |
---|
525 | { /* recallBestTree */ |
---|
526 | if (rank < 1) rank = 1; |
---|
527 | if (rank > bt->nvalid) rank = bt->nvalid; |
---|
528 | if (rank > 0) restoreTree(bt->byScore[rank], tr); |
---|
529 | return rank; |
---|
530 | } /* recallBestTree */ |
---|
531 | |
---|
532 | /*=======================================================================*/ |
---|
533 | /* End of best tree routines */ |
---|
534 | /*=======================================================================*/ |
---|
535 | |
---|
536 | |
---|
537 | #if 0 |
---|
538 | void hang(msg) char *msg; {printf("Hanging around: %s\n", msg); while(1);} |
---|
539 | #endif |
---|
540 | |
---|
541 | |
---|
542 | void getnums (tr) |
---|
543 | tree *tr; |
---|
544 | /* input number of species, number of sites */ |
---|
545 | { /* getnums */ |
---|
546 | printf("\n%s, version %s, %s\n\n", |
---|
547 | programName, |
---|
548 | programVersion, |
---|
549 | programDate); |
---|
550 | printf("Based on Joseph Felsenstein's\n\n"); |
---|
551 | printf("Nucleic acid sequence Maximum Likelihood method, "); |
---|
552 | printf("version 3.3\n\n"); |
---|
553 | |
---|
554 | if (fscanf(INFILE, "%d %d", &numsp, &sites) != 2) { |
---|
555 | printf("ERROR: Problem reading number of species and sites\n"); |
---|
556 | anerror = TRUE; |
---|
557 | return; |
---|
558 | } |
---|
559 | printf("%d Species, %d Sites\n\n", numsp, sites); |
---|
560 | |
---|
561 | if (numsp > maxsp) { |
---|
562 | printf("TOO MANY SPECIES: adjust CONSTants\n"); |
---|
563 | anerror = TRUE; |
---|
564 | } |
---|
565 | else if (numsp < 4) { |
---|
566 | printf("TOO FEW SPECIES\n"); |
---|
567 | anerror = TRUE; |
---|
568 | } |
---|
569 | |
---|
570 | if (sites > maxsites) { |
---|
571 | printf("TOO MANY SITES: adjust CONSTants\n"); |
---|
572 | anerror = TRUE; |
---|
573 | } |
---|
574 | else if (sites < 1) { |
---|
575 | printf("TOO FEW SITES\n"); |
---|
576 | anerror = TRUE; |
---|
577 | } |
---|
578 | |
---|
579 | tr->mxtips = numsp; |
---|
580 | } /* getnums */ |
---|
581 | |
---|
582 | |
---|
583 | boolean digit (ch) int ch; {return (ch >= '0' && ch <= '9'); } |
---|
584 | |
---|
585 | |
---|
586 | boolean white (ch) int ch; { return (ch == ' ' || ch == '\n' || ch == '\t'); } |
---|
587 | |
---|
588 | |
---|
589 | void uppercase (chptr) |
---|
590 | int *chptr; |
---|
591 | /* convert character to upper case -- either ASCII or EBCDIC */ |
---|
592 | { /* uppercase */ |
---|
593 | int ch; |
---|
594 | |
---|
595 | ch = *chptr; |
---|
596 | if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') |
---|
597 | || (ch >= 's' && ch <= 'z')) |
---|
598 | *chptr = ch + 'A' - 'a'; |
---|
599 | } /* uppercase */ |
---|
600 | |
---|
601 | |
---|
602 | int base36 (ch) |
---|
603 | int ch; |
---|
604 | { /* base36 */ |
---|
605 | if (ch >= '0' && ch <= '9') return (ch - '0'); |
---|
606 | else if (ch >= 'A' && ch <= 'I') return (ch - 'A' + 10); |
---|
607 | else if (ch >= 'J' && ch <= 'R') return (ch - 'J' + 19); |
---|
608 | else if (ch >= 'S' && ch <= 'Z') return (ch - 'S' + 28); |
---|
609 | else if (ch >= 'a' && ch <= 'i') return (ch - 'a' + 10); |
---|
610 | else if (ch >= 'j' && ch <= 'r') return (ch - 'j' + 19); |
---|
611 | else if (ch >= 's' && ch <= 'z') return (ch - 's' + 28); |
---|
612 | else return -1; |
---|
613 | } /* base36 */ |
---|
614 | |
---|
615 | |
---|
616 | int itobase36 (i) |
---|
617 | int i; |
---|
618 | { /* itobase36 */ |
---|
619 | if (i < 0) return '?'; |
---|
620 | else if (i < 10) return (i + '0'); |
---|
621 | else if (i < 19) return (i - 10 + 'A'); |
---|
622 | else if (i < 28) return (i - 19 + 'J'); |
---|
623 | else if (i < 36) return (i - 28 + 'S'); |
---|
624 | else return '?'; |
---|
625 | } /* itobase36 */ |
---|
626 | |
---|
627 | |
---|
628 | int findch (c) |
---|
629 | int c; |
---|
630 | { /* findch */ |
---|
631 | int ch; |
---|
632 | |
---|
633 | while ((ch = getc(INFILE)) != EOF && ch != c) ; |
---|
634 | return ch; |
---|
635 | } /* findch */ |
---|
636 | |
---|
637 | |
---|
638 | #if Master || Slave |
---|
639 | int str_findch (treestrp, c) |
---|
640 | char **treestrp; |
---|
641 | int c; |
---|
642 | { /* str_findch */ |
---|
643 | int ch; |
---|
644 | |
---|
645 | while ((ch = *(*treestrp)++) != NULL && ch != c) ; |
---|
646 | return ch; |
---|
647 | } /* str_findch */ |
---|
648 | #endif |
---|
649 | |
---|
650 | |
---|
651 | void inputcategories () |
---|
652 | /* reads the categories for each site */ |
---|
653 | { /* inputcategories */ |
---|
654 | int i, ch, ci; |
---|
655 | |
---|
656 | for (i = 1; i <= nmlngth; i++) (void) getc(INFILE); |
---|
657 | i = 1; |
---|
658 | while (i <= sites) { |
---|
659 | ch = getc(INFILE); |
---|
660 | ci = base36(ch); |
---|
661 | if (ci >= 0 && ci <= categs) |
---|
662 | category[i++] = ci; |
---|
663 | else if (! white(ch)) { |
---|
664 | printf("ERROR: Bad category character (%c) at site %d\n", ch, i); |
---|
665 | anerror = TRUE; |
---|
666 | return; |
---|
667 | } |
---|
668 | } |
---|
669 | |
---|
670 | if (findch('\n') == EOF) { /* skip to end of line */ |
---|
671 | printf("ERROR: Missing newline at end of category data\n"); |
---|
672 | anerror = TRUE; |
---|
673 | } |
---|
674 | } /* inputcategories */ |
---|
675 | |
---|
676 | |
---|
677 | void inputweights () |
---|
678 | /* input the character weights 0, 1, 2 ... 9, A, B, ... Y, Z */ |
---|
679 | { /* inputweights */ |
---|
680 | int i, ch, wi; |
---|
681 | |
---|
682 | for (i = 2; i <= nmlngth; i++) (void) getc(INFILE); |
---|
683 | weightsum = 0; |
---|
684 | i = 1; |
---|
685 | while (i <= sites) { |
---|
686 | ch = getc(INFILE); |
---|
687 | wi = base36(ch); |
---|
688 | if (wi >= 0) |
---|
689 | weightsum += weight[i++] = wi; |
---|
690 | else if (! white(ch)) { |
---|
691 | printf("ERROR: Bad weight character: '%c'", ch); |
---|
692 | printf(" Weights in dnaml must be a digit or a letter.\n"); |
---|
693 | anerror = TRUE; |
---|
694 | return; |
---|
695 | } |
---|
696 | } |
---|
697 | |
---|
698 | if (findch('\n') == EOF) { /* skip to end of line */ |
---|
699 | printf("ERROR: Missing newline at end of weight data\n"); |
---|
700 | anerror = TRUE; |
---|
701 | } |
---|
702 | } /* inputweights */ |
---|
703 | |
---|
704 | |
---|
705 | void getoptions (tr) |
---|
706 | tree *tr; |
---|
707 | { /* getoptions */ |
---|
708 | longer bseed; |
---|
709 | long inseed, binseed; /* random number seed input from user */ |
---|
710 | int ch, i, j, extranum; |
---|
711 | double randum(); |
---|
712 | |
---|
713 | bootstrap = FALSE; /* Don't bootstrap column weights */ |
---|
714 | categs = 0; /* Number of rate categories */ |
---|
715 | extrainfo = 0; /* No extra runtime info unless requested */ |
---|
716 | freqsfrom = FALSE; /* Use empirical base frequencies */ |
---|
717 | tr->global = -1; /* Default search locale for optimum */ |
---|
718 | tr->partswap = 1; /* Default to swap locally after insert */ |
---|
719 | interleaved = TRUE; /* By default, data format is interleaved */ |
---|
720 | jumble = FALSE; /* Use random addition sequence */ |
---|
721 | nkeep = 1; /* Keep only the one best tree */ |
---|
722 | tr->outgr = 1; /* Outgroup number */ |
---|
723 | outgropt = FALSE; /* User-defined outgroup rooting */ |
---|
724 | printdata = FALSE; /* Don't echo data to output stream */ |
---|
725 | fastadd = Master; /* Smooth branches globally in add */ |
---|
726 | rate[1] = 1.0; /* Rate values */ |
---|
727 | restart = FALSE; /* Restart from user tree */ |
---|
728 | treeprint = TRUE; /* Print tree to output stream */ |
---|
729 | trout = 0; /* Output tree file */ |
---|
730 | ttratio = 2.0; /* Transition/transversion rate ratio */ |
---|
731 | tr->userlen = FALSE; /* User-supplied branch lengths */ |
---|
732 | tr->usertree = FALSE; /* User-defined tree topologies */ |
---|
733 | tr->userwgt = FALSE; /* User-defined position weights */ |
---|
734 | extranum = 0; |
---|
735 | |
---|
736 | while ((ch = getc(INFILE)) != '\n' && ch != EOF) { |
---|
737 | uppercase(& ch); |
---|
738 | switch (ch) { |
---|
739 | case '1' : printdata = ! printdata; break; |
---|
740 | case '3' : treeprint = ! treeprint; break; |
---|
741 | case '4' : trout = 1; break; |
---|
742 | case 'B' : bootstrap = TRUE; binseed = 0; extranum++; break; |
---|
743 | case 'C' : categs = -1; extranum++; break; |
---|
744 | case 'E' : extrainfo = -1; break; |
---|
745 | case 'F' : freqsfrom = TRUE; break; |
---|
746 | case 'G' : tr->global = -2; break; |
---|
747 | case 'I' : interleaved = ! interleaved; break; |
---|
748 | case 'J' : jumble = TRUE; inseed = 0; extranum++; break; |
---|
749 | case 'K' : extranum++; break; |
---|
750 | case 'L' : tr->userlen = TRUE; break; |
---|
751 | case 'O' : outgropt = TRUE; tr->outgr = 0; extranum++; break; |
---|
752 | case 'Q' : fastadd = ! Master; break; |
---|
753 | case 'R' : restart = TRUE; break; |
---|
754 | case 'T' : ttratio = -1.0; extranum++; break; |
---|
755 | case 'U' : tr->usertree = TRUE; break; |
---|
756 | case 'W' : tr->userwgt = TRUE; weightsum = 0; extranum++; break; |
---|
757 | case 'Y' : trout = 1 - trout; break; |
---|
758 | case ' ' : break; |
---|
759 | case '\t': break; |
---|
760 | default : |
---|
761 | printf("ERROR: Bad option character: '%c'\n", ch); |
---|
762 | anerror = TRUE; |
---|
763 | return; |
---|
764 | } |
---|
765 | } |
---|
766 | |
---|
767 | if (ch == EOF) { |
---|
768 | printf("ERROR: End-of-file in options list\n"); |
---|
769 | anerror = TRUE; |
---|
770 | return; |
---|
771 | } |
---|
772 | |
---|
773 | if (tr->usertree && restart) { |
---|
774 | printf("ERROR: The restart and user-tree options conflict:\n"); |
---|
775 | printf(" Restart adds rest of taxa to a starting tree;\n"); |
---|
776 | printf(" User-tree does not add any taxa.\n\n"); |
---|
777 | anerror = TRUE; |
---|
778 | return; |
---|
779 | } |
---|
780 | if (tr->usertree && jumble) { |
---|
781 | printf("WARNING: The jumble and user-tree options conflict:\n"); |
---|
782 | printf(" Jumble adds taxa to a tree in random order;\n"); |
---|
783 | printf(" User-tree does not use taxa addition.\n"); |
---|
784 | printf(" Jumble ingnored for this run.\n\n"); |
---|
785 | jumble = FALSE; |
---|
786 | } |
---|
787 | if (tr->userlen && tr->global != -1) { |
---|
788 | printf("ERROR: The global and user-lengths options conflict:\n"); |
---|
789 | printf(" Global optimizes a starting tree;\n"); |
---|
790 | printf(" User-lengths constrain the starting tree.\n\n"); |
---|
791 | anerror = TRUE; |
---|
792 | return; |
---|
793 | } |
---|
794 | if (tr->userlen && ! tr->usertree) { |
---|
795 | printf("WARNING: User lengths required user tree option.\n"); |
---|
796 | printf(" User-tree option set for this run.\n\n"); |
---|
797 | tr->usertree = TRUE; |
---|
798 | } |
---|
799 | |
---|
800 | /* process lines with auxiliary data */ |
---|
801 | |
---|
802 | while (extranum--) { |
---|
803 | ch = getc(INFILE); |
---|
804 | uppercase(& ch); |
---|
805 | switch (ch) { |
---|
806 | |
---|
807 | case 'B': /* Bootstrap */ |
---|
808 | if (! bootstrap || binseed != 0) { |
---|
809 | printf("ERROR: Unexpected Bootstrap auxiliary data line\n"); |
---|
810 | anerror = TRUE; |
---|
811 | } |
---|
812 | else if (fscanf(INFILE,"%ld",&binseed)!=1 || findch('\n')==EOF) { |
---|
813 | printf("ERROR: Problem reading boostrap random seed value\n"); |
---|
814 | anerror = TRUE; |
---|
815 | } |
---|
816 | break; |
---|
817 | |
---|
818 | case 'C': /* Categories */ |
---|
819 | if (categs >= 0) { |
---|
820 | printf("ERROR: Unexpected Categories auxiliary data line\n"); |
---|
821 | anerror = TRUE; |
---|
822 | } |
---|
823 | else if (fscanf(INFILE, "%d", &categs) != 1) { |
---|
824 | printf("ERROR: Problem reading number of rate categories\n"); |
---|
825 | anerror = TRUE; |
---|
826 | } |
---|
827 | else if (categs < 1 || categs > maxcategories) { |
---|
828 | printf("ERROR: Bad number of categories: %d\n", categs); |
---|
829 | printf("Must be in range 1 - %d\n", maxcategories); |
---|
830 | anerror = TRUE; |
---|
831 | } |
---|
832 | else { |
---|
833 | for (j = 1; j <= categs |
---|
834 | && fscanf(INFILE, "%lf", &(rate[j])) == 1; j++) ; |
---|
835 | |
---|
836 | if ((j <= categs) || (findch('\n') == EOF)) { |
---|
837 | printf("ERROR: Problem reading rate values\n"); |
---|
838 | anerror = TRUE; |
---|
839 | } |
---|
840 | else |
---|
841 | inputcategories(); |
---|
842 | } |
---|
843 | break; |
---|
844 | |
---|
845 | case 'E': /* Extra output information */ |
---|
846 | if (fscanf(INFILE,"%d",&extrainfo) != 1 || findch('\n') == EOF) { |
---|
847 | printf("ERROR: Problem reading extra info value\n"); |
---|
848 | anerror = TRUE; |
---|
849 | } |
---|
850 | extranum++; /* Don't count this line since it is optional */ |
---|
851 | break; |
---|
852 | |
---|
853 | case 'G': /* Global */ |
---|
854 | if (tr->global != -2) { |
---|
855 | printf("ERROR: Unexpected Global auxiliary data line\n"); |
---|
856 | anerror = TRUE; |
---|
857 | } |
---|
858 | else if (fscanf(INFILE, "%d", &(tr->global)) != 1) { |
---|
859 | printf("ERROR: Problem reading rearrangement region size\n"); |
---|
860 | anerror = TRUE; |
---|
861 | } |
---|
862 | else if (tr->global < 0) { |
---|
863 | printf("WARNING: Global region size too small;\n"); |
---|
864 | printf(" value reset to local\n\n"); |
---|
865 | tr->global = 1; |
---|
866 | } |
---|
867 | else if (tr->global == 0) tr->partswap = 0; |
---|
868 | else if (tr->global > tr->mxtips - 3) { |
---|
869 | tr->global = tr->mxtips - 3; |
---|
870 | } |
---|
871 | |
---|
872 | while ((ch = getc(INFILE)) != '\n') { /* Scan for second value */ |
---|
873 | if (! white(ch)) { |
---|
874 | if (ch != EOF) (void) ungetc(ch, INFILE); |
---|
875 | if (ch == EOF || fscanf(INFILE, "%d", &(tr->partswap)) != 1 |
---|
876 | || findch('\n') == EOF) { |
---|
877 | printf("ERROR: Problem reading insert swap region size\n"); |
---|
878 | anerror = TRUE; |
---|
879 | } |
---|
880 | else if (tr->partswap < 0) tr->partswap = 1; |
---|
881 | else if (tr->partswap > tr->mxtips - 3) { |
---|
882 | tr->partswap = tr->mxtips - 3; |
---|
883 | } |
---|
884 | |
---|
885 | if (tr->partswap > tr->global) tr->global = tr->partswap; |
---|
886 | break; /* Break while loop */ |
---|
887 | } |
---|
888 | } |
---|
889 | |
---|
890 | extranum++; /* Don't count this line since it is optional */ |
---|
891 | break; /* Break switch statement */ |
---|
892 | |
---|
893 | case 'J': /* Jumble */ |
---|
894 | if (! jumble || inseed != 0) { |
---|
895 | printf("ERROR: Unexpected Jumble auxiliary data line\n"); |
---|
896 | anerror = TRUE; |
---|
897 | } |
---|
898 | else if (fscanf(INFILE,"%ld",&inseed) != 1 || findch('\n')==EOF) { |
---|
899 | printf("ERROR: Problem reading jumble random seed value\n"); |
---|
900 | anerror = TRUE; |
---|
901 | } |
---|
902 | break; |
---|
903 | |
---|
904 | case 'K': /* Keep */ |
---|
905 | if (fscanf(INFILE, "%d", &nkeep) != 1 || findch('\n') == EOF || |
---|
906 | nkeep < 1) { |
---|
907 | printf("ERROR: Problem reading number of kept trees\n"); |
---|
908 | anerror = TRUE; |
---|
909 | } |
---|
910 | else if (nkeep > maxkeep) { |
---|
911 | printf("WARNING: Kept trees lowered from %d to %d\n\n", |
---|
912 | nkeep, maxkeep); |
---|
913 | nkeep = maxkeep; |
---|
914 | } |
---|
915 | break; |
---|
916 | |
---|
917 | case 'O': /* Outgroup */ |
---|
918 | if (! outgropt || tr->outgr > 0) { |
---|
919 | printf("ERROR: Unexpected Outgroup auxiliary data line\n"); |
---|
920 | anerror = TRUE; |
---|
921 | } |
---|
922 | else if (fscanf(INFILE, "%d", &(tr->outgr)) != 1 || |
---|
923 | findch('\n') == EOF) { |
---|
924 | printf("ERROR: Problem reading outgroup number\n"); |
---|
925 | anerror = TRUE; |
---|
926 | } |
---|
927 | else if ((tr->outgr < 1) || (tr->outgr > tr->mxtips)) { |
---|
928 | printf("ERROR: Bad outgroup: '%d'\n", tr->outgr); |
---|
929 | anerror = TRUE; |
---|
930 | } |
---|
931 | break; |
---|
932 | |
---|
933 | case 'T': /* Transition/transversion ratio */ |
---|
934 | if (ttratio >= 0.0) { |
---|
935 | printf("ERROR: Unexpected Transition/transversion auxiliary data\n"); |
---|
936 | anerror = TRUE; |
---|
937 | } |
---|
938 | else if (fscanf(INFILE,"%lf",&ttratio)!=1 || findch('\n')==EOF) { |
---|
939 | printf("ERROR: Problem reading transition/transversion ratio\n"); |
---|
940 | anerror = TRUE; |
---|
941 | } |
---|
942 | break; |
---|
943 | |
---|
944 | case 'W': /* Weights */ |
---|
945 | if (! tr->userwgt || weightsum > 0) { |
---|
946 | printf("ERROR: Unexpected Weights auxiliary data\n"); |
---|
947 | anerror = TRUE; |
---|
948 | } |
---|
949 | else { |
---|
950 | inputweights(); |
---|
951 | } |
---|
952 | break; |
---|
953 | |
---|
954 | case 'Y': /* Output tree file */ |
---|
955 | if (! trout) { |
---|
956 | printf("ERROR: Unexpected Treefile auxiliary data\n"); |
---|
957 | anerror = TRUE; |
---|
958 | } |
---|
959 | else if (fscanf(INFILE,"%d",&trout) != 1 || findch('\n') == EOF) { |
---|
960 | printf("ERROR: Problem reading output tree-type number\n"); |
---|
961 | anerror = TRUE; |
---|
962 | } |
---|
963 | else if ((trout < 0) || (trout > 2)) { |
---|
964 | printf("ERROR: Bad output tree-type number: '%d'\n", trout); |
---|
965 | anerror = TRUE; |
---|
966 | } |
---|
967 | extranum++; /* Don't count this line since it is optional */ |
---|
968 | break; |
---|
969 | |
---|
970 | default: |
---|
971 | printf("ERROR: Auxiliary options line starts with '%c'\n", ch); |
---|
972 | anerror = TRUE; |
---|
973 | break; |
---|
974 | } |
---|
975 | |
---|
976 | if (anerror) return; |
---|
977 | } |
---|
978 | |
---|
979 | if (anerror) return; |
---|
980 | |
---|
981 | if (! tr->userwgt) { |
---|
982 | for (i = 1; i <= sites; i++) weight[i] = 1; |
---|
983 | weightsum = sites; |
---|
984 | } |
---|
985 | |
---|
986 | if (tr->userwgt && weightsum < 1) { |
---|
987 | printf("ERROR: Missing or bad user-supplied weight data.\n"); |
---|
988 | anerror = TRUE; |
---|
989 | return; |
---|
990 | } |
---|
991 | |
---|
992 | if (bootstrap) { |
---|
993 | int nonzero; |
---|
994 | |
---|
995 | printf("Bootstrap random number seed = %ld\n\n", binseed); |
---|
996 | |
---|
997 | if (binseed < 0) binseed = -binseed; |
---|
998 | for (i = 0; i <= 5; i++) {bseed[i] = binseed & 63; binseed >>= 6;} |
---|
999 | |
---|
1000 | nonzero = 0; |
---|
1001 | for (i = 1; i <= sites; i++) if (weight[i] > 0) nonzero++; |
---|
1002 | |
---|
1003 | for (j = 1; j <= nonzero; j++) aliasweight[j] = 0; |
---|
1004 | for (j = 1; j <= nonzero; j++) |
---|
1005 | aliasweight[(int) (nonzero*randum(bseed)) + 1]++; |
---|
1006 | |
---|
1007 | j = 0; |
---|
1008 | weightsum = 0; |
---|
1009 | for (i = 1; i <= sites; i++) { |
---|
1010 | if (weight[i] > 0) { |
---|
1011 | weightsum += (weight[i] *= aliasweight[++j]); |
---|
1012 | } |
---|
1013 | } |
---|
1014 | } |
---|
1015 | |
---|
1016 | if (jumble) { |
---|
1017 | printf("Jumble random number seed = %ld\n\n", inseed); |
---|
1018 | if (inseed < 0) inseed = -inseed; |
---|
1019 | for (i = 0; i <= 5; i++) {seed[i] = inseed & 63; inseed >>= 6;} |
---|
1020 | } |
---|
1021 | |
---|
1022 | if (categs > 0) { |
---|
1023 | printf("Site category Rate of change\n\n"); |
---|
1024 | for (i = 1; i <= categs; i++) |
---|
1025 | printf(" %c%13.3f\n", itobase36(i), rate[i]); |
---|
1026 | putchar('\n'); |
---|
1027 | for (i = 1; i <= sites; i++) { |
---|
1028 | if ((weight[i] > 0) && (category[i] < 1)) { |
---|
1029 | printf("ERROR: Bad category (%c) at site %d\n", |
---|
1030 | itobase36(category[i]), i); |
---|
1031 | anerror = TRUE; |
---|
1032 | return; |
---|
1033 | } |
---|
1034 | } |
---|
1035 | } |
---|
1036 | else if (categs < 0) { |
---|
1037 | printf("ERROR: Category auxiliary data missing from input\n"); |
---|
1038 | anerror = TRUE; |
---|
1039 | return; |
---|
1040 | } |
---|
1041 | else { /* categs == 0 */ |
---|
1042 | for (i = 1; i <= sites; i++) category[i] = 1; |
---|
1043 | categs = 1; |
---|
1044 | } |
---|
1045 | |
---|
1046 | if (tr->outgr < 1) { |
---|
1047 | printf("ERROR: Outgroup auxiliary data missing from input\n"); |
---|
1048 | anerror = TRUE; |
---|
1049 | return; |
---|
1050 | } |
---|
1051 | |
---|
1052 | if (ttratio < 0.0) { |
---|
1053 | printf("ERROR: Transition/transversion auxiliary data missing from input\n"); |
---|
1054 | anerror = TRUE; |
---|
1055 | return; |
---|
1056 | } |
---|
1057 | |
---|
1058 | if (tr->global < 0) { |
---|
1059 | if (tr->global == -2) tr->global = tr->mxtips - 3; /* Default global */ |
---|
1060 | else tr->global = tr->usertree ? 0 : 1; /* No global */ |
---|
1061 | } |
---|
1062 | |
---|
1063 | if (restart) { |
---|
1064 | printf("Restart option in effect. "); |
---|
1065 | printf("Sequence addition will start from appended tree.\n\n"); |
---|
1066 | } |
---|
1067 | |
---|
1068 | if (tr->usertree && ! tr->global) { |
---|
1069 | printf("User-supplied tree topology%swill be used.\n\n", |
---|
1070 | tr->userlen ? " and branch lengths " : " "); |
---|
1071 | } |
---|
1072 | else { |
---|
1073 | if (! tr->usertree) { |
---|
1074 | printf("Rearrangements of partial trees may cross %d %s.\n", |
---|
1075 | tr->partswap, tr->partswap == 1 ? "branch" : "branches"); |
---|
1076 | } |
---|
1077 | printf("Rearrangements of full tree may cross %d %s.\n\n", |
---|
1078 | tr->global, tr->global == 1 ? "branch" : "branches"); |
---|
1079 | } |
---|
1080 | } /* getoptions */ |
---|
1081 | |
---|
1082 | |
---|
1083 | void getbasefreqs () |
---|
1084 | { /* getbasefreqs */ |
---|
1085 | double suma, sumb; |
---|
1086 | |
---|
1087 | if (freqsfrom) printf("Empirical "); |
---|
1088 | printf("Base Frequencies:\n\n"); |
---|
1089 | |
---|
1090 | if (! freqsfrom) { |
---|
1091 | if (fscanf(INFILE, "%lf%lf%lf%lf", &freqa, &freqc, &freqg, &freqt) != 4 |
---|
1092 | || findch('\n') == EOF) { |
---|
1093 | printf("ERROR: Problem reading user base frequencies\n"); |
---|
1094 | anerror = TRUE; |
---|
1095 | return; |
---|
1096 | } |
---|
1097 | } |
---|
1098 | |
---|
1099 | printf(" A %10.5f\n", freqa); |
---|
1100 | printf(" C %10.5f\n", freqc); |
---|
1101 | printf(" G %10.5f\n", freqg); |
---|
1102 | printf(" T(U) %10.5f\n\n", freqt); |
---|
1103 | freqr = freqa + freqg; |
---|
1104 | invfreqr = 1.0/freqr; |
---|
1105 | freqar = freqa * invfreqr; |
---|
1106 | freqgr = freqg * invfreqr; |
---|
1107 | freqy = freqc + freqt; |
---|
1108 | invfreqy = 1.0/freqy; |
---|
1109 | freqcy = freqc * invfreqy; |
---|
1110 | freqty = freqt * invfreqy; |
---|
1111 | printf("Transition/transversion ratio = %10.6f\n\n", ttratio); |
---|
1112 | suma = ttratio*freqr*freqy - (freqa*freqg + freqc*freqt); |
---|
1113 | sumb = freqa*freqgr + freqc*freqty; |
---|
1114 | xi = suma/(suma+sumb); |
---|
1115 | xv = 1.0 - xi; |
---|
1116 | if (xi <= 0.0) { |
---|
1117 | printf("WARNING: This transition/transversion ratio\n"); |
---|
1118 | printf(" is impossible with these base frequencies!\n"); |
---|
1119 | printf("Transition/transversion parameter reset\n\n"); |
---|
1120 | xi = 3/5; |
---|
1121 | xv = 2/5; |
---|
1122 | } |
---|
1123 | printf("(Transition/transversion parameter = %10.6f)\n\n", xi/xv); |
---|
1124 | fracchange = xi*(2*freqa*freqgr + 2*freqc*freqty) |
---|
1125 | + xv*(1.0 - freqa*freqa - freqc*freqc |
---|
1126 | - freqg*freqg - freqt*freqt); |
---|
1127 | } /* getbasefreqs */ |
---|
1128 | |
---|
1129 | |
---|
1130 | void getyspace () |
---|
1131 | { /* getyspace */ |
---|
1132 | long size; |
---|
1133 | int i; |
---|
1134 | char *y0; |
---|
1135 | |
---|
1136 | size = 4 * (sites/4 + 1); |
---|
1137 | if (! (y0 = malloc((unsigned) ((numsp+1) * size * sizeof(char))))) { |
---|
1138 | printf("ERROR: Unable to obtain space for data array\n"); |
---|
1139 | anerror = TRUE; |
---|
1140 | return; |
---|
1141 | } |
---|
1142 | |
---|
1143 | for (i = 0; i <= numsp; i++) { |
---|
1144 | y[i] = y0; |
---|
1145 | y0 += size; |
---|
1146 | } |
---|
1147 | } /* getyspace */ |
---|
1148 | |
---|
1149 | |
---|
1150 | void setupTree (tr) |
---|
1151 | tree *tr; |
---|
1152 | { /* setupTree */ |
---|
1153 | nodeptr p0, p, q; |
---|
1154 | int i, j, tips, inter; |
---|
1155 | |
---|
1156 | tips = tr->mxtips; |
---|
1157 | inter = tr->mxtips - 1; |
---|
1158 | |
---|
1159 | if (!(p0 = (nodeptr) malloc((unsigned) ((tips + 3*inter) * |
---|
1160 | sizeof(node))))) { |
---|
1161 | printf("ERROR: Unable to obtain sufficient tree memory\n"); |
---|
1162 | anerror = TRUE; |
---|
1163 | return; |
---|
1164 | } |
---|
1165 | |
---|
1166 | tr->nodep[0] = (node *) NULL; /* Use as 1-based array */ |
---|
1167 | |
---|
1168 | for (i = 1; i <= tips; i++) { /* Set-up tips */ |
---|
1169 | p = p0++; |
---|
1170 | p->x = (xarray *) NULL; |
---|
1171 | p->tip = (char *) NULL; |
---|
1172 | p->number = i; |
---|
1173 | p->next = p; |
---|
1174 | p->back = (node *) NULL; |
---|
1175 | |
---|
1176 | tr->nodep[i] = p; |
---|
1177 | } |
---|
1178 | |
---|
1179 | for (i = tips + 1; i <= tips + inter; i++) { /* Internal nodes */ |
---|
1180 | q = (node *) NULL; |
---|
1181 | for (j = 1; j <= 3; j++) { |
---|
1182 | p = p0++; |
---|
1183 | p->x = (xarray *) NULL; |
---|
1184 | p->tip = (char *) NULL; |
---|
1185 | p->number = i; |
---|
1186 | p->next = q; |
---|
1187 | p->back = (node *) NULL; |
---|
1188 | q = p; |
---|
1189 | } |
---|
1190 | p->next->next->next = p; |
---|
1191 | tr->nodep[i] = p; |
---|
1192 | } |
---|
1193 | |
---|
1194 | tr->likelihood = unlikely; |
---|
1195 | tr->start = (node *) NULL; |
---|
1196 | tr->outgrnode = tr->nodep[tr->outgr]; |
---|
1197 | tr->ntips = 0; |
---|
1198 | tr->nextnode = 0; |
---|
1199 | tr->opt_level = 0; |
---|
1200 | tr->smoothed = FALSE; |
---|
1201 | tr->log_f_valid = 0; |
---|
1202 | } /* setupTree */ |
---|
1203 | |
---|
1204 | |
---|
1205 | void freeTreeNode (p) /* Free tree node (sector) associated data */ |
---|
1206 | nodeptr p; |
---|
1207 | { /* freeTreeNode */ |
---|
1208 | if (p) { |
---|
1209 | if (p->x) { |
---|
1210 | if (p->x->a) free((char *) p->x->a); |
---|
1211 | free((char *) p->x); |
---|
1212 | } |
---|
1213 | } |
---|
1214 | } /* freeTreeNode */ |
---|
1215 | |
---|
1216 | |
---|
1217 | void freeTree (tr) |
---|
1218 | tree *tr; |
---|
1219 | { /* freeTree */ |
---|
1220 | nodeptr p, q; |
---|
1221 | int i, tips, inter; |
---|
1222 | |
---|
1223 | tips = tr->mxtips; |
---|
1224 | inter = tr->mxtips - 1; |
---|
1225 | |
---|
1226 | for (i = 1; i <= tips; i++) freeTreeNode(tr->nodep[i]); |
---|
1227 | |
---|
1228 | for (i = tips + 1; i <= tips + inter; i++) { |
---|
1229 | if (p = tr->nodep[i]) { |
---|
1230 | if (q = p->next) { |
---|
1231 | freeTreeNode(q->next); |
---|
1232 | freeTreeNode(q); |
---|
1233 | } |
---|
1234 | freeTreeNode(p); |
---|
1235 | } |
---|
1236 | } |
---|
1237 | |
---|
1238 | free((char *) tr->nodep[1]); /* Free the actual nodes */ |
---|
1239 | } /* freeTree */ |
---|
1240 | |
---|
1241 | |
---|
1242 | void getdata (tr) |
---|
1243 | tree *tr; |
---|
1244 | /* read sequences */ |
---|
1245 | { /* getdata */ |
---|
1246 | int i, j, k, l, basesread, basesnew, ch; |
---|
1247 | int meaning[256]; /* meaning of input characters */ |
---|
1248 | char *nameptr; |
---|
1249 | boolean allread, firstpass; |
---|
1250 | |
---|
1251 | for (i = 0; i <= 255; i++) meaning[i] = 0; |
---|
1252 | meaning['A'] = 1; |
---|
1253 | meaning['B'] = 14; |
---|
1254 | meaning['C'] = 2; |
---|
1255 | meaning['D'] = 13; |
---|
1256 | meaning['G'] = 4; |
---|
1257 | meaning['H'] = 11; |
---|
1258 | meaning['K'] = 12; |
---|
1259 | meaning['M'] = 3; |
---|
1260 | meaning['N'] = 15; |
---|
1261 | meaning['O'] = 15; |
---|
1262 | meaning['R'] = 5; |
---|
1263 | meaning['S'] = 6; |
---|
1264 | meaning['T'] = 8; |
---|
1265 | meaning['U'] = 8; |
---|
1266 | meaning['V'] = 7; |
---|
1267 | meaning['W'] = 9; |
---|
1268 | meaning['X'] = 15; |
---|
1269 | meaning['Y'] = 10; |
---|
1270 | meaning['?'] = 15; |
---|
1271 | meaning['-'] = 15; |
---|
1272 | |
---|
1273 | basesread = basesnew = 0; |
---|
1274 | |
---|
1275 | allread = FALSE; |
---|
1276 | firstpass = TRUE; |
---|
1277 | ch = ' '; |
---|
1278 | |
---|
1279 | while (! allread) { |
---|
1280 | for (i = 1; i <= tr->mxtips; i++) { /* Read data line */ |
---|
1281 | if (firstpass) { /* Read species names */ |
---|
1282 | j = 1; |
---|
1283 | nameptr = tr->nodep[i]->name; |
---|
1284 | while (white(ch = getc(INFILE))) { /* Skip blank lines */ |
---|
1285 | if (ch == '\n') { |
---|
1286 | j = 1; |
---|
1287 | nameptr = tr->nodep[i]->name; |
---|
1288 | } |
---|
1289 | else if (j++ <= nmlngth) *nameptr++ = ' '; |
---|
1290 | } |
---|
1291 | |
---|
1292 | if (j > nmlngth) { |
---|
1293 | printf("ERROR: Blank name for species %d; ", i); |
---|
1294 | printf("check number of species,\n"); |
---|
1295 | printf(" number of sites, and interleave option.\n"); |
---|
1296 | anerror = TRUE; |
---|
1297 | return; |
---|
1298 | } |
---|
1299 | |
---|
1300 | if (ch != EOF) { |
---|
1301 | *nameptr++ = ch; |
---|
1302 | j++; |
---|
1303 | while (j <= nmlngth) { |
---|
1304 | if ((ch = getc(INFILE)) == '\n' || ch == EOF) break; |
---|
1305 | if (ch == '_' || white(ch)) ch = ' '; |
---|
1306 | *nameptr++ = ch; |
---|
1307 | j++; |
---|
1308 | } |
---|
1309 | while (j++ <= nmlngth) *nameptr++ = ' '; |
---|
1310 | *nameptr = '\0'; /* add null termination */ |
---|
1311 | } |
---|
1312 | |
---|
1313 | if (ch == EOF) { |
---|
1314 | printf("ERROR: End-of-file in name of species %d\n", i); |
---|
1315 | anerror = TRUE; |
---|
1316 | return; |
---|
1317 | } |
---|
1318 | } |
---|
1319 | |
---|
1320 | j = basesread; |
---|
1321 | while ((j < sites) && ((ch = getc(INFILE)) != EOF) |
---|
1322 | && ((! interleaved) || (ch != '\n'))) { |
---|
1323 | uppercase(& ch); |
---|
1324 | if (meaning[ch] || ch == '.') { |
---|
1325 | j++; |
---|
1326 | if (ch == '.') { |
---|
1327 | if (i != 1) ch = y[1][j]; |
---|
1328 | else { |
---|
1329 | printf("ERROR: Dot (.) found at site %d of sequence 1\n", j); |
---|
1330 | anerror = TRUE; |
---|
1331 | return; |
---|
1332 | } |
---|
1333 | } |
---|
1334 | y[i][j] = ch; |
---|
1335 | } |
---|
1336 | else if (white(ch) || digit(ch)) ; |
---|
1337 | else { |
---|
1338 | printf("ERROR: Bad base (%c) at site %d of sequence %d\n", |
---|
1339 | ch, j, i); |
---|
1340 | anerror = TRUE; |
---|
1341 | return; |
---|
1342 | } |
---|
1343 | } |
---|
1344 | |
---|
1345 | if (ch == EOF) { |
---|
1346 | printf("ERROR: End-of-file at site %d of sequence %d\n", j, i); |
---|
1347 | anerror = TRUE; |
---|
1348 | return; |
---|
1349 | } |
---|
1350 | |
---|
1351 | if (! firstpass && (j == basesread)) i--; /* no data on line */ |
---|
1352 | else if (i == 1) basesnew = j; |
---|
1353 | else if (j != basesnew) { |
---|
1354 | printf("ERROR: Sequences out of alignment\n"); |
---|
1355 | printf("%d (instead of %d) residues read in sequence %d\n", |
---|
1356 | j - basesread, basesnew - basesread, i); |
---|
1357 | anerror = TRUE; |
---|
1358 | return; |
---|
1359 | } |
---|
1360 | |
---|
1361 | while (ch != '\n' && ch != EOF) ch = getc(INFILE); /* flush line */ |
---|
1362 | } /* next sequence */ |
---|
1363 | firstpass = FALSE; |
---|
1364 | basesread = basesnew; |
---|
1365 | allread = (basesread >= sites); |
---|
1366 | } |
---|
1367 | |
---|
1368 | /* Print listing of sequence alignment */ |
---|
1369 | |
---|
1370 | if (printdata) { |
---|
1371 | j = nmlngth - 5 + ((sites + ((sites-1)/10))/2); |
---|
1372 | if (j < nmlngth - 1) j = nmlngth - 1; |
---|
1373 | if (j > 37) j = 37; |
---|
1374 | printf("Name"); for (i=1;i<=j;i++) putchar(' '); printf("Sequences\n"); |
---|
1375 | printf("----"); for (i=1;i<=j;i++) putchar(' '); printf("---------\n"); |
---|
1376 | putchar('\n'); |
---|
1377 | |
---|
1378 | for (i = 1; i <= sites; i += 60) { |
---|
1379 | l = i + 59; |
---|
1380 | if (l > sites) l = sites; |
---|
1381 | |
---|
1382 | if (tr->userwgt) { |
---|
1383 | printf("Weights "); |
---|
1384 | for (j = 11; j <= nmlngth+3; j++) putchar(' '); |
---|
1385 | for (k = i; k <= l; k++) { |
---|
1386 | putchar(itobase36(weight[k])); |
---|
1387 | if (((k % 10) == 0) && ((k % 60) != 0)) putchar(' '); |
---|
1388 | } |
---|
1389 | putchar('\n'); |
---|
1390 | } |
---|
1391 | |
---|
1392 | if (categs > 1) { |
---|
1393 | printf("Categories"); |
---|
1394 | for (j = 11; j <= nmlngth+3; j++) putchar(' '); |
---|
1395 | for (k = i; k <= l; k++) { |
---|
1396 | putchar(itobase36(category[k])); |
---|
1397 | if (((k % 10) == 0) && ((k % 60) != 0)) putchar(' '); |
---|
1398 | } |
---|
1399 | putchar('\n'); |
---|
1400 | } |
---|
1401 | |
---|
1402 | for (j = 1; j <= tr->mxtips; j++) { |
---|
1403 | printf("%s ", tr->nodep[j]->name); |
---|
1404 | for (k = i; k <= l; k++) { |
---|
1405 | ch = y[j][k]; |
---|
1406 | if ((j > 1) && (ch == y[1][k])) ch = '.'; |
---|
1407 | putchar(ch); |
---|
1408 | if (((k % 10) == 0) && ((k % 60) != 0)) putchar(' '); |
---|
1409 | } |
---|
1410 | putchar('\n'); |
---|
1411 | } |
---|
1412 | putchar('\n'); |
---|
1413 | } |
---|
1414 | } |
---|
1415 | |
---|
1416 | for (j = 1; j <= tr->mxtips; j++) /* Convert characters to meanings */ |
---|
1417 | for (i = 1; i <= sites; i++) y[j][i] = meaning[y[j][i]]; |
---|
1418 | |
---|
1419 | } /* getdata */ |
---|
1420 | |
---|
1421 | |
---|
1422 | void getinput (tr) |
---|
1423 | tree *tr; |
---|
1424 | { /* getinput */ |
---|
1425 | getnums(tr); if (anerror) return; |
---|
1426 | getoptions(tr); if (anerror) return; |
---|
1427 | if (! freqsfrom) getbasefreqs(); if (anerror) return; |
---|
1428 | getyspace(); if (anerror) return; |
---|
1429 | setupTree(tr); if (anerror) return; |
---|
1430 | getdata(tr); if (anerror) return; |
---|
1431 | } /* getinput */ |
---|
1432 | |
---|
1433 | |
---|
1434 | void sitesort () |
---|
1435 | /* Shell sort keeping sites, weights in same order */ |
---|
1436 | { /* sitesort */ |
---|
1437 | int gap, i, j, jj, jg, k; |
---|
1438 | boolean flip, tied; |
---|
1439 | |
---|
1440 | for (gap = sites/2; gap > 0; gap /= 2) { |
---|
1441 | for (i = gap + 1; i <= sites; i++) { |
---|
1442 | j = i - gap; |
---|
1443 | |
---|
1444 | do { |
---|
1445 | jj = alias[j]; |
---|
1446 | jg = alias[j+gap]; |
---|
1447 | flip = (category[jj] > category[jg]); |
---|
1448 | tied = (category[jj] == category[jg]); |
---|
1449 | for (k = 1; (k <= numsp) && tied; k++) { |
---|
1450 | flip = (y[k][jj] > y[k][jg]); |
---|
1451 | tied = (y[k][jj] == y[k][jg]); |
---|
1452 | } |
---|
1453 | if (flip) { |
---|
1454 | alias[j] = jg; |
---|
1455 | alias[j+gap] = jj; |
---|
1456 | j -= gap; |
---|
1457 | } |
---|
1458 | } while (flip && (j > 0)); |
---|
1459 | |
---|
1460 | } /* for (i ... */ |
---|
1461 | } /* for (gap ... */ |
---|
1462 | } /* sitesort */ |
---|
1463 | |
---|
1464 | |
---|
1465 | void sitecombcrunch () |
---|
1466 | /* combine sites that have identical patterns (and nonzero weight) */ |
---|
1467 | { /* sitecombcrunch */ |
---|
1468 | int i, sitei, j, sitej, k; |
---|
1469 | boolean tied; |
---|
1470 | |
---|
1471 | i = 0; |
---|
1472 | alias[0] = alias[1]; |
---|
1473 | aliasweight[0] = 0; |
---|
1474 | |
---|
1475 | for (j = 1; j <= sites; j++) { |
---|
1476 | sitei = alias[i]; |
---|
1477 | sitej = alias[j]; |
---|
1478 | tied = (category[sitei] == category[sitej]); |
---|
1479 | |
---|
1480 | for (k = 1; tied && (k <= numsp); k++) |
---|
1481 | tied = (y[k][sitei] == y[k][sitej]); |
---|
1482 | |
---|
1483 | if (tied) { |
---|
1484 | aliasweight[i] += weight[sitej]; |
---|
1485 | } |
---|
1486 | else { |
---|
1487 | if (aliasweight[i] > 0) i++; |
---|
1488 | aliasweight[i] = weight[sitej]; |
---|
1489 | alias[i] = sitej; |
---|
1490 | } |
---|
1491 | } |
---|
1492 | |
---|
1493 | endsite = i; |
---|
1494 | if (aliasweight[i] > 0) endsite++; |
---|
1495 | } /* sitecombcrunch */ |
---|
1496 | |
---|
1497 | |
---|
1498 | void makeweights () |
---|
1499 | /* make up weights vector to avoid duplicate computations */ |
---|
1500 | { /* makeweights */ |
---|
1501 | int i; |
---|
1502 | |
---|
1503 | for (i = 1; i <= sites; i++) alias[i] = i; |
---|
1504 | sitesort(); |
---|
1505 | sitecombcrunch(); |
---|
1506 | if (endsite > maxpatterns) { |
---|
1507 | printf("ERROR: Too many patterns in data\n"); |
---|
1508 | printf(" Increase maxpatterns to at least %d\n", endsite); |
---|
1509 | anerror = TRUE; |
---|
1510 | } |
---|
1511 | else { |
---|
1512 | printf("Analyzing %d distinct data patterns (columns)\n\n", endsite); |
---|
1513 | } |
---|
1514 | } /* makeweights */ |
---|
1515 | |
---|
1516 | |
---|
1517 | void makevalues (tr) |
---|
1518 | tree *tr; |
---|
1519 | /* set up fractional likelihoods at tips */ |
---|
1520 | { /* makevalues */ |
---|
1521 | double temp; |
---|
1522 | int i, j, k; |
---|
1523 | |
---|
1524 | for (i = 1; i <= tr->mxtips; i++) { /* Pack and move tip data */ |
---|
1525 | for (j = 0; j < endsite; j++) y[i-1][j] = y[i][alias[j]]; |
---|
1526 | tr->nodep[i]->tip = &(y[i-1][0]); |
---|
1527 | } |
---|
1528 | |
---|
1529 | totalwrate = 0.0; |
---|
1530 | for (k = 0; k < endsite; k++) { |
---|
1531 | catnumb[k] = i = category[alias[k]]; |
---|
1532 | ratvalue[k] = temp = rate[i]; |
---|
1533 | totalwrate += wgt_rate[k] = temp * aliasweight[k]; |
---|
1534 | wgt_rate2[k] = temp * temp * aliasweight[k]; |
---|
1535 | } |
---|
1536 | } /* makevalues */ |
---|
1537 | |
---|
1538 | |
---|
1539 | void empiricalfreqs (tr) |
---|
1540 | tree *tr; |
---|
1541 | /* Get empirical base frequencies from the data */ |
---|
1542 | { /* empiricalfreqs */ |
---|
1543 | double sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft; |
---|
1544 | int i, j, k, code; |
---|
1545 | char *yptr; |
---|
1546 | |
---|
1547 | freqa = 0.25; |
---|
1548 | freqc = 0.25; |
---|
1549 | freqg = 0.25; |
---|
1550 | freqt = 0.25; |
---|
1551 | for (k = 1; k <= 8; k++) { |
---|
1552 | suma = 0.0; |
---|
1553 | sumc = 0.0; |
---|
1554 | sumg = 0.0; |
---|
1555 | sumt = 0.0; |
---|
1556 | for (i = 1; i <= tr->mxtips; i++) { |
---|
1557 | yptr = tr->nodep[i]->tip; |
---|
1558 | for (j = 0; j < endsite; j++) { |
---|
1559 | code = *yptr++; |
---|
1560 | fa = freqa * ( code & 1); |
---|
1561 | fc = freqc * ((code >> 1) & 1); |
---|
1562 | fg = freqg * ((code >> 2) & 1); |
---|
1563 | ft = freqt * ((code >> 3) & 1); |
---|
1564 | wj = aliasweight[j] / (fa + fc + fg + ft); |
---|
1565 | suma += wj * fa; |
---|
1566 | sumc += wj * fc; |
---|
1567 | sumg += wj * fg; |
---|
1568 | sumt += wj * ft; |
---|
1569 | } |
---|
1570 | } |
---|
1571 | sum = suma + sumc + sumg + sumt; |
---|
1572 | freqa = suma / sum; |
---|
1573 | freqc = sumc / sum; |
---|
1574 | freqg = sumg / sum; |
---|
1575 | freqt = sumt / sum; |
---|
1576 | } |
---|
1577 | } /* empiricalfreqs */ |
---|
1578 | |
---|
1579 | |
---|
1580 | xarray *setupxarray () |
---|
1581 | { /* setupxarray */ |
---|
1582 | xarray *x; |
---|
1583 | xtype *data; |
---|
1584 | |
---|
1585 | x = (xarray *) malloc((unsigned) sizeof(xarray)); |
---|
1586 | if (x) { |
---|
1587 | data = (xtype *) malloc((unsigned) (4 * endsite * sizeof(xtype))); |
---|
1588 | if (data) { |
---|
1589 | x->a = data; |
---|
1590 | x->c = data += endsite; |
---|
1591 | x->g = data += endsite; |
---|
1592 | x->t = data + endsite; |
---|
1593 | x->prev = x->next = x; |
---|
1594 | x->owner = (node *) NULL; |
---|
1595 | } |
---|
1596 | else { |
---|
1597 | free((char *) x); |
---|
1598 | return (xarray *) NULL; |
---|
1599 | } |
---|
1600 | } |
---|
1601 | return x; |
---|
1602 | } /* setupxarray */ |
---|
1603 | |
---|
1604 | |
---|
1605 | void linkxarray (req, min, freexptr, usedxptr) |
---|
1606 | int req, min; |
---|
1607 | xarray **freexptr, **usedxptr; |
---|
1608 | /* Link a set of xarrays */ |
---|
1609 | { /* linkxarray */ |
---|
1610 | xarray *first, *prev, *x; |
---|
1611 | int i; |
---|
1612 | |
---|
1613 | first = prev = (xarray *) NULL; |
---|
1614 | i = 0; |
---|
1615 | |
---|
1616 | do { |
---|
1617 | x = setupxarray(); |
---|
1618 | if (x) { |
---|
1619 | if (! first) first = x; |
---|
1620 | else { |
---|
1621 | prev->next = x; |
---|
1622 | x->prev = prev; |
---|
1623 | } |
---|
1624 | prev = x; |
---|
1625 | i++; |
---|
1626 | } |
---|
1627 | else { |
---|
1628 | printf("ERROR: Failure to get requested xarray memory\n"); |
---|
1629 | if (i < min) { |
---|
1630 | anerror = TRUE; |
---|
1631 | return; |
---|
1632 | } |
---|
1633 | } |
---|
1634 | } while ((i < req) && x); |
---|
1635 | |
---|
1636 | if (first) { |
---|
1637 | first->prev = prev; |
---|
1638 | prev->next = first; |
---|
1639 | } |
---|
1640 | |
---|
1641 | *freexptr = first; |
---|
1642 | *usedxptr = (xarray *) NULL; |
---|
1643 | } /* linkxarray */ |
---|
1644 | |
---|
1645 | |
---|
1646 | void setupnodex (tr) |
---|
1647 | tree *tr; |
---|
1648 | { /* setupnodex */ |
---|
1649 | nodeptr p; |
---|
1650 | int i; |
---|
1651 | |
---|
1652 | for (i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2); i++) { |
---|
1653 | p = tr->nodep[i]; |
---|
1654 | if (anerror = !(p->x = setupxarray())) { |
---|
1655 | printf("ERROR: Failure to get internal node xarray memory\n"); |
---|
1656 | return; |
---|
1657 | } |
---|
1658 | } |
---|
1659 | } /* setupnodex */ |
---|
1660 | |
---|
1661 | |
---|
1662 | xarray *getxtip (p) |
---|
1663 | nodeptr p; |
---|
1664 | { /* getxtip */ |
---|
1665 | xarray *new; |
---|
1666 | boolean splice; |
---|
1667 | |
---|
1668 | if (! p) return (xarray *) NULL; |
---|
1669 | |
---|
1670 | splice = FALSE; |
---|
1671 | |
---|
1672 | if (p->x) { /* array is there; move to tail of list */ |
---|
1673 | new = p->x; |
---|
1674 | if (new == new->prev) ; /* linked to self; leave it */ |
---|
1675 | else if (new == usedxtip) usedxtip = usedxtip->next; /* at head */ |
---|
1676 | else if (new == usedxtip->prev) ; /* already at tail */ |
---|
1677 | else { /* move to tail of list */ |
---|
1678 | new->prev->next = new->next; |
---|
1679 | new->next->prev = new->prev; |
---|
1680 | splice = TRUE; |
---|
1681 | } |
---|
1682 | } |
---|
1683 | |
---|
1684 | else if (freextip) { /* take from unused list */ |
---|
1685 | p->x = new = freextip; |
---|
1686 | new->owner = p; |
---|
1687 | if (new->prev != new) { /* not only member of freelist */ |
---|
1688 | new->prev->next = new->next; |
---|
1689 | new->next->prev = new->prev; |
---|
1690 | freextip = new->next; |
---|
1691 | } |
---|
1692 | else |
---|
1693 | freextip = (xarray *) NULL; |
---|
1694 | |
---|
1695 | splice = TRUE; |
---|
1696 | } |
---|
1697 | |
---|
1698 | else if (usedxtip) { /* take from head of used list */ |
---|
1699 | usedxtip->owner->x = (xarray *) NULL; |
---|
1700 | p->x = new = usedxtip; |
---|
1701 | new->owner = p; |
---|
1702 | usedxtip = usedxtip->next; |
---|
1703 | } |
---|
1704 | |
---|
1705 | else { |
---|
1706 | printf("ERROR: Unable to locate memory for tip %d.\n", p->number); |
---|
1707 | anerror = TRUE; |
---|
1708 | exit(1); |
---|
1709 | } |
---|
1710 | |
---|
1711 | if (splice) { |
---|
1712 | if (usedxtip) { /* list is not empty */ |
---|
1713 | usedxtip->prev->next = new; |
---|
1714 | new->prev = usedxtip->prev; |
---|
1715 | usedxtip->prev = new; |
---|
1716 | new->next = usedxtip; |
---|
1717 | } |
---|
1718 | else |
---|
1719 | usedxtip = new->prev = new->next = new; |
---|
1720 | } |
---|
1721 | |
---|
1722 | return new; |
---|
1723 | } /* getxtip */ |
---|
1724 | |
---|
1725 | |
---|
1726 | xarray *getxnode (p) |
---|
1727 | nodeptr p; |
---|
1728 | /* Ensure that internal node p has memory */ |
---|
1729 | { /* getxnode */ |
---|
1730 | nodeptr s; |
---|
1731 | |
---|
1732 | if (! (p->x)) { /* Move likelihood array on this node to sector p */ |
---|
1733 | if ((s = p->next)->x || (s = s->next)->x) { |
---|
1734 | p->x = s->x; |
---|
1735 | s->x = (xarray *) NULL; |
---|
1736 | } |
---|
1737 | else { |
---|
1738 | printf("ERROR: Unable to locate memory at node %d.\n", p->number); |
---|
1739 | exit(1); |
---|
1740 | } |
---|
1741 | } |
---|
1742 | return p->x; |
---|
1743 | } /* getxnode */ |
---|
1744 | |
---|
1745 | |
---|
1746 | void newview (p) /* Update likelihoods at node */ |
---|
1747 | nodeptr p; |
---|
1748 | { /* newview */ |
---|
1749 | double z1, lz1, xvlz1, z2, lz2, xvlz2, |
---|
1750 | zz1, zv1, fx1r, fx1y, fx1n, suma1, sumg1, sumc1, sumt1, |
---|
1751 | zz2, zv2, fx2r, fx2y, fx2n, ki, tempi, tempj; |
---|
1752 | nodeptr q, r; |
---|
1753 | xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t, |
---|
1754 | *x3a, *x3c, *x3g, *x3t; |
---|
1755 | int i; |
---|
1756 | |
---|
1757 | if (p->tip) { /* Make sure that data are at tip */ |
---|
1758 | int code; |
---|
1759 | char *yptr; |
---|
1760 | |
---|
1761 | if (p->x) return; /* They are already there */ |
---|
1762 | |
---|
1763 | (void) getxtip(p); /* They are not, so get memory */ |
---|
1764 | x3a = &(p->x->a[0]); /* Move tip data to xarray */ |
---|
1765 | x3c = &(p->x->c[0]); |
---|
1766 | x3g = &(p->x->g[0]); |
---|
1767 | x3t = &(p->x->t[0]); |
---|
1768 | yptr = p->tip; |
---|
1769 | for (i = 0; i < endsite; i++) { |
---|
1770 | code = *yptr++; |
---|
1771 | *x3a++ = code & 1; |
---|
1772 | *x3c++ = (code >> 1) & 1; |
---|
1773 | *x3g++ = (code >> 2) & 1; |
---|
1774 | *x3t++ = (code >> 3) & 1; |
---|
1775 | } |
---|
1776 | return; |
---|
1777 | } |
---|
1778 | |
---|
1779 | /* Internal node needs update */ |
---|
1780 | |
---|
1781 | q = p->next->back; |
---|
1782 | r = p->next->next->back; |
---|
1783 | |
---|
1784 | while ((! p->x) || (! q->x) || (! r->x)) { |
---|
1785 | if (! q->x) newview(q); |
---|
1786 | if (! r->x) newview(r); |
---|
1787 | if (! p->x) (void) getxnode(p); |
---|
1788 | } |
---|
1789 | |
---|
1790 | x1a = &(q->x->a[0]); |
---|
1791 | x1c = &(q->x->c[0]); |
---|
1792 | x1g = &(q->x->g[0]); |
---|
1793 | x1t = &(q->x->t[0]); |
---|
1794 | z1 = q->z; |
---|
1795 | lz1 = (z1 > zmin) ? log(z1) : log(zmin); |
---|
1796 | xvlz1 = xv * lz1; |
---|
1797 | |
---|
1798 | x2a = &(r->x->a[0]); |
---|
1799 | x2c = &(r->x->c[0]); |
---|
1800 | x2g = &(r->x->g[0]); |
---|
1801 | x2t = &(r->x->t[0]); |
---|
1802 | z2 = r->z; |
---|
1803 | lz2 = (z2 > zmin) ? log(z2) : log(zmin); |
---|
1804 | xvlz2 = xv * lz2; |
---|
1805 | |
---|
1806 | x3a = &(p->x->a[0]); |
---|
1807 | x3c = &(p->x->c[0]); |
---|
1808 | x3g = &(p->x->g[0]); |
---|
1809 | x3t = &(p->x->t[0]); |
---|
1810 | |
---|
1811 | { double zz1table[maxcategories+1], zv1table[maxcategories+1], |
---|
1812 | zz2table[maxcategories+1], zv2table[maxcategories+1], |
---|
1813 | *zz1ptr, *zv1ptr, *zz2ptr, *zv2ptr, *rptr; |
---|
1814 | int cat, *cptr; |
---|
1815 | |
---|
1816 | rptr = &(rate[1]); |
---|
1817 | zz1ptr = &(zz1table[1]); |
---|
1818 | zv1ptr = &(zv1table[1]); |
---|
1819 | zz2ptr = &(zz2table[1]); |
---|
1820 | zv2ptr = &(zv2table[1]); |
---|
1821 | for (i = 1; i <= categs; i++) { /* exps for each category */ |
---|
1822 | ki = *rptr++; |
---|
1823 | *zz1ptr++ = exp(ki * lz1); |
---|
1824 | *zv1ptr++ = exp(ki * xvlz1); |
---|
1825 | *zz2ptr++ = exp(ki * lz2); |
---|
1826 | *zv2ptr++ = exp(ki * xvlz2); |
---|
1827 | } |
---|
1828 | |
---|
1829 | cptr = &(catnumb[0]); |
---|
1830 | for (i = 0; i < endsite; i++) { |
---|
1831 | cat = *cptr++; |
---|
1832 | |
---|
1833 | zz1 = zz1table[cat]; |
---|
1834 | zv1 = zv1table[cat]; |
---|
1835 | fx1r = freqa * *x1a + freqg * *x1g; |
---|
1836 | fx1y = freqc * *x1c + freqt * *x1t; |
---|
1837 | fx1n = fx1r + fx1y; |
---|
1838 | tempi = fx1r * invfreqr; |
---|
1839 | tempj = zv1 * (tempi-fx1n) + fx1n; |
---|
1840 | suma1 = zz1 * (*x1a++ - tempi) + tempj; |
---|
1841 | sumg1 = zz1 * (*x1g++ - tempi) + tempj; |
---|
1842 | tempi = fx1y * invfreqy; |
---|
1843 | tempj = zv1 * (tempi-fx1n) + fx1n; |
---|
1844 | sumc1 = zz1 * (*x1c++ - tempi) + tempj; |
---|
1845 | sumt1 = zz1 * (*x1t++ - tempi) + tempj; |
---|
1846 | |
---|
1847 | zz2 = zz2table[cat]; |
---|
1848 | zv2 = zv2table[cat]; |
---|
1849 | fx2r = freqa * *x2a + freqg * *x2g; |
---|
1850 | fx2y = freqc * *x2c + freqt * *x2t; |
---|
1851 | fx2n = fx2r + fx2y; |
---|
1852 | tempi = fx2r * invfreqr; |
---|
1853 | tempj = zv2 * (tempi-fx2n) + fx2n; |
---|
1854 | *x3a++ = suma1 * (zz2 * (*x2a++ - tempi) + tempj); |
---|
1855 | *x3g++ = sumg1 * (zz2 * (*x2g++ - tempi) + tempj); |
---|
1856 | tempi = fx2y * invfreqy; |
---|
1857 | tempj = zv2 * (tempi-fx2n) + fx2n; |
---|
1858 | *x3c++ = sumc1 * (zz2 * (*x2c++ - tempi) + tempj); |
---|
1859 | *x3t++ = sumt1 * (zz2 * (*x2t++ - tempi) + tempj); |
---|
1860 | } |
---|
1861 | } |
---|
1862 | } /* newview */ |
---|
1863 | |
---|
1864 | |
---|
1865 | double evaluate (tr, p) |
---|
1866 | tree *tr; |
---|
1867 | nodeptr p; |
---|
1868 | { /* evaluate */ |
---|
1869 | double sum, z, lz, xvlz, |
---|
1870 | ki, zz, zv, fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y, |
---|
1871 | suma, sumb, sumc, term; |
---|
1872 | double zztable[maxcategories+1], zvtable[maxcategories+1], |
---|
1873 | *zzptr, *zvptr; |
---|
1874 | double *log_f, *rptr; |
---|
1875 | xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t; |
---|
1876 | nodeptr q; |
---|
1877 | int cat, *cptr, i, *wptr; |
---|
1878 | |
---|
1879 | q = p->back; |
---|
1880 | while ((! p->x) || (! q->x)) { |
---|
1881 | if (! (p->x)) newview(p); |
---|
1882 | if (! (q->x)) newview(q); |
---|
1883 | } |
---|
1884 | |
---|
1885 | x1a = &(p->x->a[0]); |
---|
1886 | x1c = &(p->x->c[0]); |
---|
1887 | x1g = &(p->x->g[0]); |
---|
1888 | x1t = &(p->x->t[0]); |
---|
1889 | |
---|
1890 | x2a = &(q->x->a[0]); |
---|
1891 | x2c = &(q->x->c[0]); |
---|
1892 | x2g = &(q->x->g[0]); |
---|
1893 | x2t = &(q->x->t[0]); |
---|
1894 | |
---|
1895 | z = p->z; |
---|
1896 | if (z < zmin) z = zmin; |
---|
1897 | lz = log(z); |
---|
1898 | xvlz = xv * lz; |
---|
1899 | |
---|
1900 | rptr = &(rate[1]); |
---|
1901 | zzptr = &(zztable[1]); |
---|
1902 | zvptr = &(zvtable[1]); |
---|
1903 | for (i = 1; i <= categs; i++) { |
---|
1904 | ki = *rptr++; |
---|
1905 | *zzptr++ = exp(ki * lz); |
---|
1906 | *zvptr++ = exp(ki * xvlz); |
---|
1907 | } |
---|
1908 | |
---|
1909 | wptr = &(aliasweight[0]); |
---|
1910 | cptr = &(catnumb[0]); |
---|
1911 | log_f = tr->log_f; |
---|
1912 | tr->log_f_valid = TRUE; |
---|
1913 | sum = 0.0; |
---|
1914 | |
---|
1915 | for (i = 0; i < endsite; i++) { |
---|
1916 | cat = *cptr++; |
---|
1917 | zz = zztable[cat]; |
---|
1918 | zv = zvtable[cat]; |
---|
1919 | fx1a = freqa * *x1a++; |
---|
1920 | fx1g = freqg * *x1g++; |
---|
1921 | fx1c = freqc * *x1c++; |
---|
1922 | fx1t = freqt * *x1t++; |
---|
1923 | suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t; |
---|
1924 | fx2r = freqa * *x2a++ + freqg * *x2g++; |
---|
1925 | fx2y = freqc * *x2c++ + freqt * *x2t++; |
---|
1926 | fx1r = fx1a + fx1g; |
---|
1927 | fx1y = fx1c + fx1t; |
---|
1928 | sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
1929 | sumb = fx1r * fx2r * invfreqr + fx1y * fx2y * invfreqy; |
---|
1930 | suma -= sumb; |
---|
1931 | sumb -= sumc; |
---|
1932 | term = log(zz * suma + zv * sumb + sumc); |
---|
1933 | sum += *wptr++ * term; |
---|
1934 | *log_f++ = term; |
---|
1935 | } |
---|
1936 | |
---|
1937 | tr->likelihood = sum; |
---|
1938 | return sum; |
---|
1939 | } /* evaluate */ |
---|
1940 | |
---|
1941 | |
---|
1942 | #if 0 /* This is not currently used */ |
---|
1943 | double evaluateslope (p, q, z) /* d(L)/d(lz) */ |
---|
1944 | nodeptr p, q; |
---|
1945 | double z; |
---|
1946 | { /* evaluateslope */ |
---|
1947 | double dLdlz, lz, xvlz; |
---|
1948 | double sumai[maxpatterns], sumbi[maxpatterns], sumci[maxpatterns], |
---|
1949 | *sumaptr, *sumbptr, *sumcptr; |
---|
1950 | double zztable[maxcategories+1], zvtable[maxcategories+1], |
---|
1951 | *zzptr, *zvptr; |
---|
1952 | double ki, fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y, |
---|
1953 | suma, sumb, sumc; |
---|
1954 | double *rptr, *wrptr; |
---|
1955 | xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t; |
---|
1956 | int cat, *cptr, i; |
---|
1957 | |
---|
1958 | |
---|
1959 | while ((! p->x) || (! q->x)) { |
---|
1960 | if (! p->x) newview(p); |
---|
1961 | if (! q->x) newview(q); |
---|
1962 | } |
---|
1963 | |
---|
1964 | x1a = &(p->x->a[0]); |
---|
1965 | x1c = &(p->x->c[0]); |
---|
1966 | x1g = &(p->x->g[0]); |
---|
1967 | x1t = &(p->x->t[0]); |
---|
1968 | |
---|
1969 | x2a = &(q->x->a[0]); |
---|
1970 | x2c = &(q->x->c[0]); |
---|
1971 | x2g = &(q->x->g[0]); |
---|
1972 | x2t = &(q->x->t[0]); |
---|
1973 | |
---|
1974 | sumaptr = &(sumai[0]); |
---|
1975 | sumbptr = &(sumbi[0]); |
---|
1976 | sumcptr = &(sumci[0]); |
---|
1977 | |
---|
1978 | for (i = 0; i < endsite; i++) { |
---|
1979 | fx1a = freqa * *x1a++; |
---|
1980 | fx1g = freqg * *x1g++; |
---|
1981 | fx1c = freqc * *x1c++; |
---|
1982 | fx1t = freqt * *x1t++; |
---|
1983 | suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t; |
---|
1984 | fx2r = freqa * *x2a++ + freqg * *x2g++; |
---|
1985 | fx2y = freqc * *x2c++ + freqt * *x2t++; |
---|
1986 | fx1r = fx1a + fx1g; |
---|
1987 | fx1y = fx1c + fx1t; |
---|
1988 | *sumcptr++ = sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
1989 | sumb = fx1r * fx2r * invfreqr + fx1y * fx2y * invfreqy; |
---|
1990 | *sumaptr++ = suma - sumb; |
---|
1991 | *sumbptr++ = sumb - sumc; |
---|
1992 | } |
---|
1993 | |
---|
1994 | if (z < zmin) z = zmin; |
---|
1995 | lz = log(z); |
---|
1996 | xvlz = xv * lz; |
---|
1997 | |
---|
1998 | rptr = &(rate[1]); |
---|
1999 | zzptr = &(zztable[1]); |
---|
2000 | zvptr = &(zvtable[1]); |
---|
2001 | for (i = 1; i <= categs; i++) { |
---|
2002 | ki = *rptr++; |
---|
2003 | *zzptr++ = exp(ki * lz); |
---|
2004 | *zvptr++ = exp(ki * xvlz); |
---|
2005 | } |
---|
2006 | |
---|
2007 | sumaptr = &(sumai[0]); |
---|
2008 | sumbptr = &(sumbi[0]); |
---|
2009 | sumcptr = &(sumci[0]); |
---|
2010 | cptr = &(catnumb[0]); |
---|
2011 | wrptr = &(wgt_rate[0]); |
---|
2012 | dLdlz = 0.0; |
---|
2013 | |
---|
2014 | for (i = 0; i < endsite; i++) { |
---|
2015 | cat = *cptr++; |
---|
2016 | suma = *sumaptr++ * zztable[cat]; |
---|
2017 | sumb = *sumbptr++ * zvtable[cat]; |
---|
2018 | dLdlz += *wrptr++ * (suma + sumb*xv) / (suma + sumb + *sumcptr++); |
---|
2019 | } |
---|
2020 | |
---|
2021 | return dLdlz; |
---|
2022 | } /* evaluateslope */ |
---|
2023 | #endif |
---|
2024 | |
---|
2025 | |
---|
2026 | double makenewz (p, q, z0, maxiter) |
---|
2027 | nodeptr p, q; |
---|
2028 | double z0; |
---|
2029 | int maxiter; |
---|
2030 | { /* makenewz */ |
---|
2031 | double abi[maxpatterns], bci[maxpatterns], sumci[maxpatterns], |
---|
2032 | *abptr, *bcptr, *sumcptr; |
---|
2033 | double dlnLidlz, dlnLdlz, d2lnLdlz2, z, zprev, zstep, lz, xvlz, |
---|
2034 | ki, suma, sumb, sumc, ab, bc, inv_Li, t1, t2, |
---|
2035 | fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y; |
---|
2036 | double zztable[maxcategories+1], zvtable[maxcategories+1], |
---|
2037 | *zzptr, *zvptr; |
---|
2038 | double *rptr, *wrptr, *wr2ptr; |
---|
2039 | xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t; |
---|
2040 | int cat, *cptr, i, curvatOK; |
---|
2041 | |
---|
2042 | |
---|
2043 | while ((! p->x) || (! q->x)) { |
---|
2044 | if (! (p->x)) newview(p); |
---|
2045 | if (! (q->x)) newview(q); |
---|
2046 | } |
---|
2047 | |
---|
2048 | x1a = &(p->x->a[0]); |
---|
2049 | x1c = &(p->x->c[0]); |
---|
2050 | x1g = &(p->x->g[0]); |
---|
2051 | x1t = &(p->x->t[0]); |
---|
2052 | x2a = &(q->x->a[0]); |
---|
2053 | x2c = &(q->x->c[0]); |
---|
2054 | x2g = &(q->x->g[0]); |
---|
2055 | x2t = &(q->x->t[0]); |
---|
2056 | |
---|
2057 | abptr = &(abi[0]); |
---|
2058 | bcptr = &(bci[0]); |
---|
2059 | sumcptr = &(sumci[0]); |
---|
2060 | |
---|
2061 | for (i = 0; i < endsite; i++) { |
---|
2062 | fx1a = freqa * *x1a++; |
---|
2063 | fx1g = freqg * *x1g++; |
---|
2064 | fx1c = freqc * *x1c++; |
---|
2065 | fx1t = freqt * *x1t++; |
---|
2066 | suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t; |
---|
2067 | fx2r = freqa * *x2a++ + freqg * *x2g++; |
---|
2068 | fx2y = freqc * *x2c++ + freqt * *x2t++; |
---|
2069 | fx1r = fx1a + fx1g; |
---|
2070 | fx1y = fx1c + fx1t; |
---|
2071 | *sumcptr++ = sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
2072 | sumb = fx1r * fx2r * invfreqr + fx1y * fx2y * invfreqy; |
---|
2073 | *abptr++ = suma - sumb; |
---|
2074 | *bcptr++ = sumb - sumc; |
---|
2075 | } |
---|
2076 | |
---|
2077 | z = z0; |
---|
2078 | do { |
---|
2079 | zprev = z; |
---|
2080 | zstep = (1.0 - zmax) * z + zmin; |
---|
2081 | curvatOK = FALSE; |
---|
2082 | |
---|
2083 | do { |
---|
2084 | if (z < zmin) z = zmin; |
---|
2085 | else if (z > zmax) z = zmax; |
---|
2086 | |
---|
2087 | lz = log(z); |
---|
2088 | xvlz = xv * lz; |
---|
2089 | rptr = &(rate[1]); |
---|
2090 | zzptr = &(zztable[1]); |
---|
2091 | zvptr = &(zvtable[1]); |
---|
2092 | |
---|
2093 | for (i = 1; i <= categs; i++) { |
---|
2094 | ki = *rptr++; |
---|
2095 | *zzptr++ = exp(ki * lz); |
---|
2096 | *zvptr++ = exp(ki * xvlz); |
---|
2097 | } |
---|
2098 | |
---|
2099 | abptr = &(abi[0]); |
---|
2100 | bcptr = &(bci[0]); |
---|
2101 | sumcptr = &(sumci[0]); |
---|
2102 | cptr = &(catnumb[0]); |
---|
2103 | wrptr = &(wgt_rate[0]); |
---|
2104 | wr2ptr = &(wgt_rate2[0]); |
---|
2105 | dlnLdlz = 0.0; /* = d(ln(likelihood))/d(lz) */ |
---|
2106 | d2lnLdlz2 = 0.0; /* = d2(ln(likelihood))/d(lz)2 */ |
---|
2107 | |
---|
2108 | for (i = 0; i < endsite; i++) { |
---|
2109 | cat = *cptr++; /* ratecategory(i) */ |
---|
2110 | ab = *abptr++ * zztable[cat]; |
---|
2111 | bc = *bcptr++ * zvtable[cat]; |
---|
2112 | sumc = *sumcptr++; |
---|
2113 | inv_Li = 1.0/(ab + bc + sumc); |
---|
2114 | t1 = ab * inv_Li; |
---|
2115 | t2 = xv * bc * inv_Li; |
---|
2116 | dlnLidlz = t1 + t2; |
---|
2117 | dlnLdlz += *wrptr++ * dlnLidlz; |
---|
2118 | d2lnLdlz2 += *wr2ptr++ * (t1 + xv * t2 - dlnLidlz * dlnLidlz); |
---|
2119 | } |
---|
2120 | |
---|
2121 | if ((d2lnLdlz2 >= 0.0) && (z < zmax)) |
---|
2122 | zprev = z = 0.37 * z + 0.63; /* Bad curvature, shorten branch */ |
---|
2123 | else |
---|
2124 | curvatOK = TRUE; |
---|
2125 | |
---|
2126 | } while (! curvatOK); |
---|
2127 | |
---|
2128 | if (d2lnLdlz2 < 0.0) { |
---|
2129 | z *= exp(-dlnLdlz / d2lnLdlz2); |
---|
2130 | if (z < zmin) z = zmin; |
---|
2131 | if (z > 0.25 * zprev + 0.75) /* Limit steps toward z = 1.0 */ |
---|
2132 | z = 0.25 * zprev + 0.75; |
---|
2133 | } |
---|
2134 | |
---|
2135 | if (z > zmax) z = zmax; |
---|
2136 | |
---|
2137 | } while ((--maxiter > 0) && (ABS(z - zprev) > zstep)); |
---|
2138 | |
---|
2139 | return z; |
---|
2140 | } /* makenewz */ |
---|
2141 | |
---|
2142 | |
---|
2143 | void update (tr, p) |
---|
2144 | tree *tr; |
---|
2145 | nodeptr p; |
---|
2146 | { /* update */ |
---|
2147 | nodeptr q; |
---|
2148 | double z0, z; |
---|
2149 | |
---|
2150 | q = p->back; |
---|
2151 | z0 = q->z; |
---|
2152 | p->z = q->z = z = makenewz(p, q, z0, newzpercycle); |
---|
2153 | if (ABS(z - z0) > deltaz) tr->smoothed = FALSE; |
---|
2154 | } /* update */ |
---|
2155 | |
---|
2156 | |
---|
2157 | void smooth (tr, p) |
---|
2158 | tree *tr; |
---|
2159 | nodeptr p; |
---|
2160 | { /* smooth */ |
---|
2161 | update(tr, p); /* Adjust branch */ |
---|
2162 | if (! p->tip) { /* Adjust "descendents" */ |
---|
2163 | smooth(tr, p->next->back); |
---|
2164 | smooth(tr, p->next->next->back); |
---|
2165 | |
---|
2166 | # if ReturnSmoothedView |
---|
2167 | newview(p); |
---|
2168 | # endif |
---|
2169 | } |
---|
2170 | } /* smooth */ |
---|
2171 | |
---|
2172 | |
---|
2173 | void smoothTree (tr, maxtimes) |
---|
2174 | tree *tr; |
---|
2175 | int maxtimes; |
---|
2176 | { /* smoothTree */ |
---|
2177 | nodeptr p; |
---|
2178 | |
---|
2179 | p = tr->start; |
---|
2180 | |
---|
2181 | while (--maxtimes >= 0 && ! anerror) { |
---|
2182 | tr->smoothed = TRUE; |
---|
2183 | smooth(tr, p->back); |
---|
2184 | if (! p->tip) { |
---|
2185 | smooth(tr, p->next->back); |
---|
2186 | smooth(tr, p->next->next->back); |
---|
2187 | } |
---|
2188 | if (tr->smoothed) break; |
---|
2189 | } |
---|
2190 | } /* smoothTree */ |
---|
2191 | |
---|
2192 | |
---|
2193 | void localSmooth (tr, p, maxtimes) /* Smooth branches around p */ |
---|
2194 | tree *tr; |
---|
2195 | nodeptr p; |
---|
2196 | int maxtimes; |
---|
2197 | { /* localSmooth */ |
---|
2198 | nodeptr pn, pnn; |
---|
2199 | |
---|
2200 | if (p->tip) return; /* Should actually be an error */ |
---|
2201 | |
---|
2202 | pn = p->next; |
---|
2203 | pnn = pn->next; |
---|
2204 | while (--maxtimes >= 0) { |
---|
2205 | tr->smoothed = TRUE; |
---|
2206 | update(tr, p); if (anerror) break; |
---|
2207 | update(tr, pn); if (anerror) break; |
---|
2208 | update(tr, pnn); if (anerror) break; |
---|
2209 | if (tr->smoothed) break; |
---|
2210 | } |
---|
2211 | tr->smoothed = FALSE; /* Only smooth locally */ |
---|
2212 | } /* localSmooth */ |
---|
2213 | |
---|
2214 | |
---|
2215 | void hookup (p, q, z) |
---|
2216 | nodeptr p, q; |
---|
2217 | double z; |
---|
2218 | { /* hookup */ |
---|
2219 | p->back = q; |
---|
2220 | q->back = p; |
---|
2221 | p->z = q->z = z; |
---|
2222 | } /* hookup */ |
---|
2223 | |
---|
2224 | |
---|
2225 | void insert (tr, p, q, glob) /* Insert node p into branch q <-> q->back */ |
---|
2226 | tree *tr; |
---|
2227 | nodeptr p, q; |
---|
2228 | boolean glob; /* Smooth tree globally? */ |
---|
2229 | |
---|
2230 | /* q |
---|
2231 | /. |
---|
2232 | add/ . |
---|
2233 | / . |
---|
2234 | pn . |
---|
2235 | s ---- p .remove |
---|
2236 | pnn . |
---|
2237 | \ . |
---|
2238 | add\ . |
---|
2239 | \. pn = p->next; |
---|
2240 | r pnn = p->next->next; |
---|
2241 | */ |
---|
2242 | |
---|
2243 | { /* insert */ |
---|
2244 | nodeptr r, s; |
---|
2245 | |
---|
2246 | r = q->back; |
---|
2247 | s = p->back; |
---|
2248 | |
---|
2249 | # if BestInsertAverage && ! Master |
---|
2250 | { double zqr, zqs, zrs, lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax; |
---|
2251 | |
---|
2252 | zqr = makenewz(q, r, q->z, iterations); |
---|
2253 | zqs = makenewz(q, s, defaultz, iterations); |
---|
2254 | zrs = makenewz(r, s, defaultz, iterations); |
---|
2255 | |
---|
2256 | lzqr = (zqr > zmin) ? log(zqr) : log(zmin); /* long branches */ |
---|
2257 | lzqs = (zqs > zmin) ? log(zqs) : log(zmin); |
---|
2258 | lzrs = (zrs > zmin) ? log(zrs) : log(zmin); |
---|
2259 | lzsum = 0.5 * (lzqr + lzqs + lzrs); |
---|
2260 | lzq = lzsum - lzrs; |
---|
2261 | lzr = lzsum - lzqs; |
---|
2262 | lzs = lzsum - lzqr; |
---|
2263 | lzmax = log(zmax); |
---|
2264 | if (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} /* short */ |
---|
2265 | else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;} |
---|
2266 | else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;} |
---|
2267 | |
---|
2268 | hookup(p->next, q, exp(lzq)); |
---|
2269 | hookup(p->next->next, r, exp(lzr)); |
---|
2270 | hookup(p, s, exp(lzs)); |
---|
2271 | } |
---|
2272 | |
---|
2273 | # else |
---|
2274 | { double z; |
---|
2275 | z = sqrt(q->z); |
---|
2276 | hookup(p->next, q, z); |
---|
2277 | hookup(p->next->next, r, z); |
---|
2278 | } |
---|
2279 | |
---|
2280 | # endif |
---|
2281 | |
---|
2282 | newview(p); /* Required so that sector p is valid at update */ |
---|
2283 | tr->opt_level = 0; |
---|
2284 | |
---|
2285 | # if ! Master /* Smoothings are done by slave */ |
---|
2286 | if (! glob) localSmooth(tr, p, smoothings); /* Smooth locale of p */ |
---|
2287 | if (glob) smoothTree(tr, smoothings); /* Smooth whole tree */ |
---|
2288 | |
---|
2289 | # else |
---|
2290 | tr->likelihood = unlikely; |
---|
2291 | # endif |
---|
2292 | |
---|
2293 | } /* insert */ |
---|
2294 | |
---|
2295 | |
---|
2296 | nodeptr removeNode (tr, p) |
---|
2297 | tree *tr; |
---|
2298 | nodeptr p; |
---|
2299 | |
---|
2300 | /* q |
---|
2301 | .| |
---|
2302 | remove. | |
---|
2303 | . | |
---|
2304 | pn | |
---|
2305 | s ---- p |add |
---|
2306 | pnn | |
---|
2307 | . | |
---|
2308 | remove. | |
---|
2309 | .| pn = p->next; |
---|
2310 | r pnn = p->next->next; |
---|
2311 | */ |
---|
2312 | |
---|
2313 | /* remove p and return where it was */ |
---|
2314 | { /* removeNode */ |
---|
2315 | double zqr; |
---|
2316 | nodeptr q, r; |
---|
2317 | |
---|
2318 | q = p->next->back; |
---|
2319 | r = p->next->next->back; |
---|
2320 | zqr = q->z * r->z; |
---|
2321 | # if ! Master |
---|
2322 | hookup(q, r, makenewz(q, r, zqr, iterations)); |
---|
2323 | # else |
---|
2324 | hookup(q, r, zqr); |
---|
2325 | # endif |
---|
2326 | |
---|
2327 | p->next->next->back = p->next->back = (node *) NULL; |
---|
2328 | return q; |
---|
2329 | } /* removeNode */ |
---|
2330 | |
---|
2331 | |
---|
2332 | void initrav (p) |
---|
2333 | nodeptr p; |
---|
2334 | { /* initrav */ |
---|
2335 | if (! p->tip) { |
---|
2336 | initrav(p->next->back); |
---|
2337 | initrav(p->next->next->back); |
---|
2338 | newview(p); |
---|
2339 | } |
---|
2340 | } /* initrav */ |
---|
2341 | |
---|
2342 | |
---|
2343 | nodeptr buildNewTip (tr, p) |
---|
2344 | tree *tr; |
---|
2345 | nodeptr p; |
---|
2346 | { /* buildNewTip */ |
---|
2347 | nodeptr q; |
---|
2348 | |
---|
2349 | q = tr->nodep[(tr->nextnode)++]; |
---|
2350 | hookup(p, q, defaultz); |
---|
2351 | return q; |
---|
2352 | } /* buildNewTip */ |
---|
2353 | |
---|
2354 | |
---|
2355 | void buildSimpleTree (tr, ip, iq, ir) |
---|
2356 | tree *tr; |
---|
2357 | int ip, iq, ir; |
---|
2358 | { /* buildSimpleTree */ |
---|
2359 | /* p, q and r are tips meeting at s */ |
---|
2360 | nodeptr p, s; |
---|
2361 | int i; |
---|
2362 | |
---|
2363 | i = MIN(ip, iq); |
---|
2364 | if (ir < i) i = ir; |
---|
2365 | tr->start = tr->nodep[i]; |
---|
2366 | tr->ntips = 3; |
---|
2367 | p = tr->nodep[ip]; |
---|
2368 | hookup(p, tr->nodep[iq], defaultz); |
---|
2369 | s = buildNewTip(tr, tr->nodep[ir]); |
---|
2370 | insert(tr, s, p, FALSE); /* Smoothing is local to s */ |
---|
2371 | } /* buildSimpleTree */ |
---|
2372 | |
---|
2373 | |
---|
2374 | boolean readKeyValue (string, key, format, value) |
---|
2375 | char *string, *key, *format; |
---|
2376 | void *value; |
---|
2377 | { /* readKeyValue */ |
---|
2378 | |
---|
2379 | if (! (string = strstr(string, key))) return FALSE; |
---|
2380 | string += strlen(key); |
---|
2381 | if (! (string = index(string, '='))) return FALSE; |
---|
2382 | string++; |
---|
2383 | return sscanf(string, format, value); /* 1 if read, otherwise 0 */ |
---|
2384 | } /* readKeyValue */ |
---|
2385 | |
---|
2386 | |
---|
2387 | #if Master || Slave |
---|
2388 | |
---|
2389 | double str_readTreeLikelihood (treestr) |
---|
2390 | char *treestr; |
---|
2391 | { /* str_readTreeLikelihood */ |
---|
2392 | double lk1; |
---|
2393 | char *com, *com_end; |
---|
2394 | boolean readKeyValue(); |
---|
2395 | |
---|
2396 | if ((com = index(treestr, '[')) && (com < index(treestr, '(')) |
---|
2397 | && (com_end = index(com, ']'))) { |
---|
2398 | com++; |
---|
2399 | *com_end = 0; |
---|
2400 | if (readKeyValue(com, likelihood_key, "%lg", (void *) &(lk1))) { |
---|
2401 | *com_end = ']'; |
---|
2402 | return lk1; |
---|
2403 | } |
---|
2404 | } |
---|
2405 | |
---|
2406 | fprintf(stderr, "ERROR reading likelihood in receiveTree\n"); |
---|
2407 | anerror = TRUE; |
---|
2408 | return 1.0; |
---|
2409 | } /* str_readTreeLikelihood */ |
---|
2410 | |
---|
2411 | |
---|
2412 | void sendTree (comm, tr) |
---|
2413 | comm_block *comm; |
---|
2414 | tree *tr; |
---|
2415 | { /* sendTree */ |
---|
2416 | char treestr[maxsp*(nmlngth+24)+100], *p1; |
---|
2417 | char *treeString(); |
---|
2418 | # if Master |
---|
2419 | void sendTreeNum(); |
---|
2420 | # endif |
---|
2421 | |
---|
2422 | comm->done_flag = tr->likelihood > 0.0; |
---|
2423 | if (comm->done_flag) |
---|
2424 | write_comm_msg(comm, NULL); |
---|
2425 | |
---|
2426 | else { |
---|
2427 | # if Master |
---|
2428 | if (send_ahead >= MAX_SEND_AHEAD) { |
---|
2429 | int n_to_get; |
---|
2430 | |
---|
2431 | n_to_get = (send_ahead+1)/2; |
---|
2432 | sendTreeNum(n_to_get); |
---|
2433 | send_ahead -= n_to_get; |
---|
2434 | read_comm_msg(&comm_slave, treestr); |
---|
2435 | new_likelihood = str_readTreeLikelihood(treestr); |
---|
2436 | if (! best_tr_recv || (new_likelihood > best_lk_recv)) { |
---|
2437 | if (best_tr_recv) free(best_tr_recv); |
---|
2438 | best_tr_recv = malloc((unsigned) (strlen(treestr) + 1)); |
---|
2439 | strcpy(best_tr_recv, treestr); |
---|
2440 | best_lk_recv = new_likelihood; |
---|
2441 | } |
---|
2442 | } |
---|
2443 | send_ahead++; |
---|
2444 | # endif /* End #if Master */ |
---|
2445 | |
---|
2446 | REPORT_SEND_TREE; |
---|
2447 | (void) sprintf(treestr, "[%16.14g] ", tr->likelihood); |
---|
2448 | p1 = treestr + strlen(treestr); |
---|
2449 | (void) treeString(p1, tr, tr->start->back, 1); |
---|
2450 | write_comm_msg(comm, treestr); |
---|
2451 | } |
---|
2452 | } /* sendTree */ |
---|
2453 | |
---|
2454 | |
---|
2455 | void receiveTree (comm, tr) |
---|
2456 | comm_block *comm; |
---|
2457 | tree *tr; |
---|
2458 | { /* receiveTree */ |
---|
2459 | char treestr[maxsp*(nmlngth+24)+100], *p1; |
---|
2460 | void str_treeReadLen(); |
---|
2461 | |
---|
2462 | read_comm_msg(comm, treestr); |
---|
2463 | if (comm->done_flag) |
---|
2464 | tr->likelihood = 1.0; |
---|
2465 | |
---|
2466 | else { |
---|
2467 | # if Master |
---|
2468 | if (best_tr_recv) { |
---|
2469 | if (str_readTreeLikelihood(treestr) < best_lk_recv) { |
---|
2470 | strcpy(treestr, best_tr_recv); /* Overwrite new tree with best */ |
---|
2471 | } |
---|
2472 | free(best_tr_recv); |
---|
2473 | best_tr_recv = NULL; |
---|
2474 | } |
---|
2475 | # endif /* End #if Master */ |
---|
2476 | |
---|
2477 | p1 = treestr; |
---|
2478 | if (str_findch(&p1, '[') != '[' |
---|
2479 | || sscanf(p1, "%lg", &(tr->likelihood)) != 1 |
---|
2480 | || (p1 = index(p1, ']')) == NULL) { |
---|
2481 | fprintf(stderr, "ERROR reading likelihood in receiveTree\n"); |
---|
2482 | anerror = TRUE; |
---|
2483 | return; |
---|
2484 | } |
---|
2485 | p1++; /* skip ']' */ |
---|
2486 | str_treeReadLen(p1, tr); |
---|
2487 | } |
---|
2488 | } /* receiveTree */ |
---|
2489 | |
---|
2490 | |
---|
2491 | void requestForWork () |
---|
2492 | { /* requestForWork */ |
---|
2493 | p4_send(DNAML_REQUEST, DNAML_DISPATCHER_ID, NULL, 0); |
---|
2494 | } /* requestForWork */ |
---|
2495 | #endif /* End #if Master || Slave */ |
---|
2496 | |
---|
2497 | |
---|
2498 | #if Master |
---|
2499 | void sendTreeNum(n_to_get) |
---|
2500 | int n_to_get; |
---|
2501 | { /* sendTreeNum */ |
---|
2502 | char scr[512]; |
---|
2503 | |
---|
2504 | sprintf(scr, "%d", n_to_get); |
---|
2505 | p4_send(DNAML_NUM_TREE, DNAML_MERGER_ID, scr, strlen(scr)+1); |
---|
2506 | } /* sendTreeNum */ |
---|
2507 | |
---|
2508 | |
---|
2509 | void getReturnedTrees (tr, bt, n_tree_sent) |
---|
2510 | tree *tr; |
---|
2511 | bestlist *bt; |
---|
2512 | int n_tree_sent; /* number of trees sent to slaves */ |
---|
2513 | { /* getReturnedTrees */ |
---|
2514 | void sendTreeNum(), receiveTree(); |
---|
2515 | |
---|
2516 | sendTreeNum(send_ahead); |
---|
2517 | send_ahead = 0; |
---|
2518 | |
---|
2519 | receiveTree(&comm_slave, tr); |
---|
2520 | tr->smoothed = TRUE; |
---|
2521 | (void) saveBestTree(bt, tr); |
---|
2522 | } /* getReturnedTrees */ |
---|
2523 | #endif |
---|
2524 | |
---|
2525 | |
---|
2526 | void cacheZ (tr) |
---|
2527 | tree *tr; |
---|
2528 | { /* cacheZ */ |
---|
2529 | nodeptr p; |
---|
2530 | int nodes; |
---|
2531 | |
---|
2532 | nodes = tr->mxtips + 3 * (tr->mxtips - 2); |
---|
2533 | p = tr->nodep[1]; |
---|
2534 | while (nodes-- > 0) {p->z0 = p->z; p++;} |
---|
2535 | } /* cacheZ */ |
---|
2536 | |
---|
2537 | |
---|
2538 | void restoreZ (tr) |
---|
2539 | tree *tr; |
---|
2540 | { /* restoreZ */ |
---|
2541 | nodeptr p; |
---|
2542 | int nodes; |
---|
2543 | |
---|
2544 | nodes = tr->mxtips + 3 * (tr->mxtips - 2); |
---|
2545 | p = tr->nodep[1]; |
---|
2546 | while (nodes-- > 0) {p->z = p->z0; p++;} |
---|
2547 | } /* restoreZ */ |
---|
2548 | |
---|
2549 | |
---|
2550 | testInsert (tr, p, q, bt, fast) |
---|
2551 | tree *tr; |
---|
2552 | nodeptr p, q; |
---|
2553 | bestlist *bt; |
---|
2554 | boolean fast; |
---|
2555 | { /* testInsert */ |
---|
2556 | double qz; |
---|
2557 | nodeptr r; |
---|
2558 | |
---|
2559 | r = q->back; /* Save original connection */ |
---|
2560 | qz = q->z; |
---|
2561 | insert(tr, p, q, ! fast); |
---|
2562 | |
---|
2563 | # if ! Master |
---|
2564 | (void) evaluate(tr, fast ? p->next->next : tr->start); |
---|
2565 | (void) saveBestTree(bt, tr); |
---|
2566 | # else /* Master */ |
---|
2567 | tr->likelihood = unlikely; |
---|
2568 | sendTree(&comm_slave, tr); |
---|
2569 | # endif |
---|
2570 | |
---|
2571 | /* remove p from this branch */ |
---|
2572 | |
---|
2573 | hookup(q, r, qz); |
---|
2574 | p->next->next->back = p->next->back = (nodeptr) NULL; |
---|
2575 | if (! fast) { /* With fast add, other values are still OK */ |
---|
2576 | restoreZ(tr); /* Restore branch lengths */ |
---|
2577 | # if ! Master /* Regenerate x values */ |
---|
2578 | initrav(p->back); |
---|
2579 | initrav(q); |
---|
2580 | initrav(r); |
---|
2581 | # endif |
---|
2582 | } |
---|
2583 | } /* testInsert */ |
---|
2584 | |
---|
2585 | |
---|
2586 | int addTraverse (tr, p, q, mintrav, maxtrav, bt, fast) |
---|
2587 | tree *tr; |
---|
2588 | nodeptr p, q; |
---|
2589 | int mintrav, maxtrav; |
---|
2590 | bestlist *bt; |
---|
2591 | boolean fast; |
---|
2592 | { /* addTraverse */ |
---|
2593 | int tested; |
---|
2594 | |
---|
2595 | tested = 0; |
---|
2596 | if (--mintrav <= 0) { /* Moved minimum distance? */ |
---|
2597 | testInsert(tr, p, q, bt, fast); |
---|
2598 | tested++; |
---|
2599 | } |
---|
2600 | |
---|
2601 | if ((! q->tip) && (--maxtrav > 0)) { /* Continue traverse? */ |
---|
2602 | tested += addTraverse(tr, p, q->next->back, |
---|
2603 | mintrav, maxtrav, bt, fast); |
---|
2604 | tested += addTraverse(tr, p, q->next->next->back, |
---|
2605 | mintrav, maxtrav, bt, fast); |
---|
2606 | } |
---|
2607 | |
---|
2608 | return tested; |
---|
2609 | } /* addTraverse */ |
---|
2610 | |
---|
2611 | |
---|
2612 | int rearrange (tr, p, mintrav, maxtrav, bt) |
---|
2613 | tree *tr; |
---|
2614 | nodeptr p; |
---|
2615 | int mintrav, maxtrav; |
---|
2616 | bestlist *bt; |
---|
2617 | /* rearranges the tree, globally or locally */ |
---|
2618 | { /* rearrange */ |
---|
2619 | double p1z, p2z, q1z, q2z; |
---|
2620 | nodeptr p1, p2, q, q1, q2; |
---|
2621 | int tested, mintrav2; |
---|
2622 | |
---|
2623 | tested = 0; |
---|
2624 | if (maxtrav < 1 || mintrav > maxtrav) return tested; |
---|
2625 | |
---|
2626 | /* Moving subtree forward in tree. */ |
---|
2627 | |
---|
2628 | if (! p->tip) { |
---|
2629 | p1 = p->next->back; |
---|
2630 | p2 = p->next->next->back; |
---|
2631 | if (! p1->tip || ! p2->tip) { |
---|
2632 | p1z = p1->z; |
---|
2633 | p2z = p2->z; |
---|
2634 | (void) removeNode(tr, p); |
---|
2635 | cacheZ(tr); |
---|
2636 | if (! p1->tip) { |
---|
2637 | tested += addTraverse(tr, p, p1->next->back, |
---|
2638 | mintrav, maxtrav, bt, FALSE); |
---|
2639 | tested += addTraverse(tr, p, p1->next->next->back, |
---|
2640 | mintrav, maxtrav, bt, FALSE); |
---|
2641 | } |
---|
2642 | |
---|
2643 | if (! p2->tip) { |
---|
2644 | tested += addTraverse(tr, p, p2->next->back, |
---|
2645 | mintrav, maxtrav, bt, FALSE); |
---|
2646 | tested += addTraverse(tr, p, p2->next->next->back, |
---|
2647 | mintrav, maxtrav, bt, FALSE); |
---|
2648 | } |
---|
2649 | |
---|
2650 | hookup(p->next, p1, p1z); /* Restore original tree */ |
---|
2651 | hookup(p->next->next, p2, p2z); |
---|
2652 | initrav(tr->start); |
---|
2653 | initrav(tr->start->back); |
---|
2654 | } |
---|
2655 | } /* if (! p->tip) */ |
---|
2656 | |
---|
2657 | /* Moving subtree backward in tree. Minimum move is 2 to avoid duplicates */ |
---|
2658 | |
---|
2659 | q = p->back; |
---|
2660 | if (! q->tip && maxtrav > 1) { |
---|
2661 | q1 = q->next->back; |
---|
2662 | q2 = q->next->next->back; |
---|
2663 | if (! q1->tip && (!q1->next->back->tip || !q1->next->next->back->tip) || |
---|
2664 | ! q2->tip && (!q2->next->back->tip || !q2->next->next->back->tip)) { |
---|
2665 | q1z = q1->z; |
---|
2666 | q2z = q2->z; |
---|
2667 | (void) removeNode(tr, q); |
---|
2668 | cacheZ(tr); |
---|
2669 | mintrav2 = mintrav > 2 ? mintrav : 2; |
---|
2670 | |
---|
2671 | if (! q1->tip) { |
---|
2672 | tested += addTraverse(tr, q, q1->next->back, |
---|
2673 | mintrav2 , maxtrav, bt, FALSE); |
---|
2674 | tested += addTraverse(tr, q, q1->next->next->back, |
---|
2675 | mintrav2 , maxtrav, bt, FALSE); |
---|
2676 | } |
---|
2677 | |
---|
2678 | if (! q2->tip) { |
---|
2679 | tested += addTraverse(tr, q, q2->next->back, |
---|
2680 | mintrav2 , maxtrav, bt, FALSE); |
---|
2681 | tested += addTraverse(tr, q, q2->next->next->back, |
---|
2682 | mintrav2 , maxtrav, bt, FALSE); |
---|
2683 | } |
---|
2684 | |
---|
2685 | hookup(q->next, q1, q1z); /* Restore original tree */ |
---|
2686 | hookup(q->next->next, q2, q2z); |
---|
2687 | initrav(tr->start); |
---|
2688 | initrav(tr->start->back); |
---|
2689 | } |
---|
2690 | } /* if (! q->tip && maxtrav > 1) */ |
---|
2691 | |
---|
2692 | /* Move other subtrees */ |
---|
2693 | |
---|
2694 | if (! p->tip) { |
---|
2695 | tested += rearrange(tr, p->next->back, mintrav, maxtrav, bt); |
---|
2696 | tested += rearrange(tr, p->next->next->back, mintrav, maxtrav, bt); |
---|
2697 | } |
---|
2698 | |
---|
2699 | return tested; |
---|
2700 | } /* rearrange */ |
---|
2701 | |
---|
2702 | |
---|
2703 | FILE *fopen_pid (filenm, mode) |
---|
2704 | char *filenm, *mode; |
---|
2705 | { /* fopen_pid */ |
---|
2706 | char scr[512]; |
---|
2707 | |
---|
2708 | (void) sprintf(scr, "%s.%d", filenm, getpid()); |
---|
2709 | return fopen(scr, mode); |
---|
2710 | } /* fopen_pid */ |
---|
2711 | |
---|
2712 | |
---|
2713 | #if DeleteCheckpointFile |
---|
2714 | void unlink_pid (filenm) |
---|
2715 | char *filenm; |
---|
2716 | { /* unlink_pid */ |
---|
2717 | char scr[512]; |
---|
2718 | |
---|
2719 | (void) sprintf(scr, "%s.%d", filenm, getpid()); |
---|
2720 | unlink(scr); |
---|
2721 | } /* unlink_pid */ |
---|
2722 | #endif |
---|
2723 | |
---|
2724 | |
---|
2725 | void writeCheckpoint (tr) |
---|
2726 | tree *tr; |
---|
2727 | { /* writeCheckpoint */ |
---|
2728 | FILE *checkpointf; |
---|
2729 | void treeOut(); |
---|
2730 | |
---|
2731 | checkpointf = fopen_pid(checkpointname,"a"); |
---|
2732 | if (checkpointf) { |
---|
2733 | treeOut(checkpointf, tr, 1); /* 1 is for Newick format */ |
---|
2734 | (void) fclose(checkpointf); |
---|
2735 | } |
---|
2736 | } /* writeCheckpoint */ |
---|
2737 | |
---|
2738 | |
---|
2739 | node * findAnyTip(p) |
---|
2740 | nodeptr p; |
---|
2741 | { /* findAnyTip */ |
---|
2742 | return p->tip ? p : findAnyTip(p->next->back); |
---|
2743 | } /* findAnyTip */ |
---|
2744 | |
---|
2745 | |
---|
2746 | void optimize (tr, maxtrav, bt) |
---|
2747 | tree *tr; |
---|
2748 | int maxtrav; |
---|
2749 | bestlist *bt; |
---|
2750 | { /* optimize */ |
---|
2751 | nodeptr p; |
---|
2752 | int mintrav, tested; |
---|
2753 | |
---|
2754 | if (tr->ntips < 4) return; |
---|
2755 | |
---|
2756 | writeCheckpoint(tr); /* checkpoint the starting tree */ |
---|
2757 | |
---|
2758 | if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; |
---|
2759 | if (maxtrav <= tr->opt_level) return; |
---|
2760 | |
---|
2761 | printf(" Doing %s rearrangements\n", |
---|
2762 | (maxtrav == 1) ? "local" : |
---|
2763 | (maxtrav < tr->ntips - 3) ? "regional" : "global"); |
---|
2764 | |
---|
2765 | /* loop while tree gets better */ |
---|
2766 | |
---|
2767 | do { |
---|
2768 | (void) startOpt(bt, tr); |
---|
2769 | mintrav = tr->opt_level + 1; |
---|
2770 | |
---|
2771 | /* rearrange must start from a tip or it will miss some trees */ |
---|
2772 | |
---|
2773 | p = findAnyTip(tr->start); |
---|
2774 | tested = rearrange(tr, p->back, mintrav, maxtrav, bt); |
---|
2775 | |
---|
2776 | # if Master |
---|
2777 | getReturnedTrees(tr, bt, tested); |
---|
2778 | # endif |
---|
2779 | |
---|
2780 | if (anerror) return; |
---|
2781 | bt->numtrees += tested; |
---|
2782 | (void) setOptLevel(bt, maxtrav); |
---|
2783 | (void) recallBestTree(bt, 1, tr); /* recover best found tree */ |
---|
2784 | |
---|
2785 | printf(" Tested %d alternative trees\n", tested); |
---|
2786 | if (bt->improved) { |
---|
2787 | printf(" Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
2788 | } |
---|
2789 | |
---|
2790 | writeCheckpoint(tr); /* checkpoint the new tree */ |
---|
2791 | } while (maxtrav > tr->opt_level); |
---|
2792 | |
---|
2793 | } /* optimize */ |
---|
2794 | |
---|
2795 | |
---|
2796 | void coordinates (tr, p, lengthsum, tdptr) |
---|
2797 | tree *tr; |
---|
2798 | nodeptr p; |
---|
2799 | double lengthsum; |
---|
2800 | drawdata *tdptr; |
---|
2801 | { /* coordinates */ |
---|
2802 | /* establishes coordinates of nodes */ |
---|
2803 | double x, z; |
---|
2804 | nodeptr q, first, last; |
---|
2805 | |
---|
2806 | if (p->tip) { |
---|
2807 | p->xcoord = nint(over * lengthsum); |
---|
2808 | p->ymax = p->ymin = p->ycoord = tdptr->tipy; |
---|
2809 | tdptr->tipy += down; |
---|
2810 | if (lengthsum > tdptr->tipmax) tdptr->tipmax = lengthsum; |
---|
2811 | } |
---|
2812 | |
---|
2813 | else { |
---|
2814 | q = p->next; |
---|
2815 | do { |
---|
2816 | z = q->z; |
---|
2817 | if (z < zmin) z = zmin; |
---|
2818 | x = lengthsum - fracchange * log(z); |
---|
2819 | coordinates(tr, q->back, x, tdptr); |
---|
2820 | q = q->next; |
---|
2821 | } while (p == tr->start->back ? q != p->next : q != p); |
---|
2822 | |
---|
2823 | first = p->next->back; |
---|
2824 | q = p; |
---|
2825 | while (q->next != p) q = q->next; |
---|
2826 | last = q->back; |
---|
2827 | p->xcoord = nint(over * lengthsum); |
---|
2828 | p->ycoord = (first->ycoord + last->ycoord)/2; |
---|
2829 | p->ymin = first->ymin; |
---|
2830 | p->ymax = last->ymax; |
---|
2831 | } |
---|
2832 | } /* coordinates */ |
---|
2833 | |
---|
2834 | |
---|
2835 | void copyTrimmedName (cp1, cp2) |
---|
2836 | char *cp1, *cp2; |
---|
2837 | { /* copyTrimmedName */ |
---|
2838 | char *ep; |
---|
2839 | |
---|
2840 | ep = cp1; |
---|
2841 | while (*ep) ep++; /* move forward to end */ |
---|
2842 | ep--; /* move back to last */ |
---|
2843 | while (ep >= cp1 && white((int) *(ep))) ep--; /* trim white */ |
---|
2844 | while (cp1 <= ep) *cp2++ = *cp1++; /* copy to new end */ |
---|
2845 | *cp2 = 0; |
---|
2846 | } /* copyTrimmedName */ |
---|
2847 | |
---|
2848 | |
---|
2849 | void drawline (tr, i, scale) |
---|
2850 | tree *tr; |
---|
2851 | int i; |
---|
2852 | double scale; |
---|
2853 | /* draws one row of the tree diagram by moving up tree */ |
---|
2854 | /* Modified to handle 1000 taxa, October 16, 1991 */ |
---|
2855 | { /* drawline */ |
---|
2856 | nodeptr p, q, r, first, last; |
---|
2857 | char *nameptr; |
---|
2858 | int n, j, k, l, extra; |
---|
2859 | boolean done; |
---|
2860 | |
---|
2861 | p = q = tr->start->back; |
---|
2862 | extra = 0; |
---|
2863 | |
---|
2864 | if (i == p->ycoord) { |
---|
2865 | k = q->number - tr->mxtips; |
---|
2866 | for (j = k; j < 1000; j *= 10) putchar('-'); |
---|
2867 | printf("%d", k); |
---|
2868 | extra = 1; |
---|
2869 | } |
---|
2870 | else printf(" "); |
---|
2871 | |
---|
2872 | do { |
---|
2873 | if (! p->tip) { |
---|
2874 | r = p->next; |
---|
2875 | done = FALSE; |
---|
2876 | do { |
---|
2877 | if ((i >= r->back->ymin) && (i <= r->back->ymax)) { |
---|
2878 | q = r->back; |
---|
2879 | done = TRUE; |
---|
2880 | } |
---|
2881 | r = r->next; |
---|
2882 | } while (! done && (p == tr->start->back ? r != p->next : r != p)); |
---|
2883 | |
---|
2884 | first = p->next->back; |
---|
2885 | r = p; |
---|
2886 | while (r->next != p) r = r->next; |
---|
2887 | last = r->back; |
---|
2888 | if (p == tr->start->back) last = p->back; |
---|
2889 | } |
---|
2890 | |
---|
2891 | done = (p->tip) || (p == q); |
---|
2892 | n = nint(scale*(q->xcoord - p->xcoord)); |
---|
2893 | if ((n < 3) && (! q->tip)) n = 3; |
---|
2894 | n -= extra; |
---|
2895 | extra = 0; |
---|
2896 | |
---|
2897 | if ((q->ycoord == i) && (! done)) { |
---|
2898 | if (p->ycoord != q->ycoord) putchar('+'); |
---|
2899 | else putchar('-'); |
---|
2900 | |
---|
2901 | if (! q->tip) { |
---|
2902 | k = q->number - tr->mxtips; |
---|
2903 | l = n - 3; |
---|
2904 | for (j = k; j < 100; j *= 10) l++; |
---|
2905 | for (j = 1; j <= l; j++) putchar('-'); |
---|
2906 | printf("%d", k); |
---|
2907 | extra = 1; |
---|
2908 | } |
---|
2909 | else for (j = 1; j <= n-1; j++) putchar('-'); |
---|
2910 | } |
---|
2911 | |
---|
2912 | else if (! p->tip) { |
---|
2913 | if ((last->ycoord > i) && (first->ycoord < i) && (i != p->ycoord)) { |
---|
2914 | putchar('!'); |
---|
2915 | for (j = 1; j <= n-1; j++) putchar(' '); |
---|
2916 | } |
---|
2917 | else for (j = 1; j <= n; j++) putchar(' '); |
---|
2918 | } |
---|
2919 | |
---|
2920 | else |
---|
2921 | for (j = 1; j <= n; j++) putchar(' '); |
---|
2922 | |
---|
2923 | p = q; |
---|
2924 | } while (! done); |
---|
2925 | |
---|
2926 | if ((p->ycoord == i) && p->tip) { |
---|
2927 | char trimmed[nmlngth + 1]; |
---|
2928 | |
---|
2929 | copyTrimmedName(p->name, trimmed); |
---|
2930 | printf(" %s", trimmed); |
---|
2931 | } |
---|
2932 | |
---|
2933 | putchar('\n'); |
---|
2934 | } /* drawline */ |
---|
2935 | |
---|
2936 | |
---|
2937 | void printTree (tr) |
---|
2938 | tree *tr; |
---|
2939 | /* prints out diagram of the tree */ |
---|
2940 | { /* printTree */ |
---|
2941 | drawdata tipdata; |
---|
2942 | double scale; |
---|
2943 | int i, imax; |
---|
2944 | |
---|
2945 | if (treeprint) { |
---|
2946 | putchar('\n'); |
---|
2947 | tipdata.tipy = 1; |
---|
2948 | tipdata.tipmax = 0.0; |
---|
2949 | coordinates(tr, tr->start->back, (double) 0.0, & tipdata); |
---|
2950 | scale = 1.0 / tipdata.tipmax; |
---|
2951 | imax = tipdata.tipy - down; |
---|
2952 | for (i = 1; i <= imax; i++) drawline(tr, i, scale); |
---|
2953 | printf("\nRemember: "); |
---|
2954 | if (outgropt) printf("(although rooted by outgroup) "); |
---|
2955 | printf("this is an unrooted tree!\n\n"); |
---|
2956 | } |
---|
2957 | } /* printTree */ |
---|
2958 | |
---|
2959 | |
---|
2960 | double sigma (p, sumlrptr) |
---|
2961 | nodeptr p; |
---|
2962 | double *sumlrptr; |
---|
2963 | /* compute standard deviation */ |
---|
2964 | { /* sigma */ |
---|
2965 | double slope, sum, sumlr, z, zv, zz, lz, |
---|
2966 | rat, suma, sumb, sumc, d2, d, li, temp, abzz, bczv, t3, |
---|
2967 | fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y, w; |
---|
2968 | double *rptr; |
---|
2969 | xtype *x1a, *x1c, *x1g, *x1t, *x2a, *x2c, *x2g, *x2t; |
---|
2970 | nodeptr q; |
---|
2971 | int i, *wptr; |
---|
2972 | |
---|
2973 | q = p->back; |
---|
2974 | while ((! p->x) || (! q->x)) { |
---|
2975 | if (! (p->x)) newview(p); |
---|
2976 | if (! (q->x)) newview(q); |
---|
2977 | } |
---|
2978 | |
---|
2979 | x1a = &(p->x->a[0]); |
---|
2980 | x1c = &(p->x->c[0]); |
---|
2981 | x1g = &(p->x->g[0]); |
---|
2982 | x1t = &(p->x->t[0]); |
---|
2983 | |
---|
2984 | x2a = &(q->x->a[0]); |
---|
2985 | x2c = &(q->x->c[0]); |
---|
2986 | x2g = &(q->x->g[0]); |
---|
2987 | x2t = &(q->x->t[0]); |
---|
2988 | |
---|
2989 | z = p->z; |
---|
2990 | if (z < zmin) z = zmin; |
---|
2991 | lz = log(z); |
---|
2992 | |
---|
2993 | wptr = &(aliasweight[0]); |
---|
2994 | rptr = &(ratvalue[0]); |
---|
2995 | sum = sumlr = slope = 0.0; |
---|
2996 | |
---|
2997 | for (i = 0; i < endsite; i++) { |
---|
2998 | rat = *rptr++; |
---|
2999 | zz = exp(rat * lz); |
---|
3000 | zv = exp(rat*xv * lz); |
---|
3001 | |
---|
3002 | fx1a = freqa * *x1a++; |
---|
3003 | fx1g = freqg * *x1g++; |
---|
3004 | fx1c = freqc * *x1c++; |
---|
3005 | fx1t = freqt * *x1t++; |
---|
3006 | fx1r = fx1a + fx1g; |
---|
3007 | fx1y = fx1c + fx1t; |
---|
3008 | suma = fx1a * *x2a + fx1c * *x2c + fx1g * *x2g + fx1t * *x2t; |
---|
3009 | fx2r = freqa * *x2a++ + freqg * *x2g++; |
---|
3010 | fx2y = freqc * *x2c++ + freqt * *x2t++; |
---|
3011 | sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
3012 | sumb = fx1r * fx2r * invfreqr + fx1y * fx2y * invfreqy; |
---|
3013 | abzz = zz * (suma - sumb); |
---|
3014 | bczv = zv * (sumb - sumc); |
---|
3015 | li = sumc + abzz + bczv; |
---|
3016 | t3 = xv * bczv; |
---|
3017 | d = abzz + t3; |
---|
3018 | d2 = rat * (abzz*(rat-1.0) + t3*(rat*xv-1.0)); |
---|
3019 | |
---|
3020 | temp = rat * d / li; |
---|
3021 | w = *wptr++; |
---|
3022 | slope += w * temp; |
---|
3023 | sum += w * (temp * temp - d2/li); |
---|
3024 | sumlr += w * log(li/(suma+1.0E-300)); |
---|
3025 | } |
---|
3026 | |
---|
3027 | *sumlrptr = sumlr; |
---|
3028 | return (sum > 1.0E-300) ? z*(-slope + sqrt(slope*slope + 3.841*sum))/sum |
---|
3029 | : 1.0; |
---|
3030 | } /* sigma */ |
---|
3031 | |
---|
3032 | |
---|
3033 | void describe (tr, p) |
---|
3034 | tree *tr; |
---|
3035 | nodeptr p; |
---|
3036 | /* print out information for one branch */ |
---|
3037 | { /* describe */ |
---|
3038 | double z, s, sumlr; |
---|
3039 | nodeptr q; |
---|
3040 | |
---|
3041 | q = p->back; |
---|
3042 | printf("%4d ", q->number - tr->mxtips); |
---|
3043 | if (p->tip) printf("%s", p->name); |
---|
3044 | else printf("%4d ", p->number - tr->mxtips); |
---|
3045 | |
---|
3046 | z = q->z; |
---|
3047 | if (z <= zmin) printf(" infinity"); |
---|
3048 | else printf("%15.5f", -log(z)*fracchange); |
---|
3049 | |
---|
3050 | s = sigma(q, &sumlr); |
---|
3051 | printf(" ("); |
---|
3052 | if (z + s >= zmax) printf(" zero"); |
---|
3053 | else printf("%9.5f", (double) -log(z + s)*fracchange); |
---|
3054 | putchar(','); |
---|
3055 | if (z - s <= zmin) printf(" infinity"); |
---|
3056 | else printf("%12.5f", (double) -log(z - s)*fracchange); |
---|
3057 | putchar(')'); |
---|
3058 | |
---|
3059 | if (sumlr > 2.995 ) printf(" **"); |
---|
3060 | else if (sumlr > 1.9205) printf(" *"); |
---|
3061 | putchar('\n'); |
---|
3062 | |
---|
3063 | if (! p->tip) { |
---|
3064 | describe(tr, p->next->back); |
---|
3065 | describe(tr, p->next->next->back); |
---|
3066 | } |
---|
3067 | } /* describe */ |
---|
3068 | |
---|
3069 | |
---|
3070 | void summarize (tr) |
---|
3071 | tree *tr; |
---|
3072 | /* print out branch length information and node numbers */ |
---|
3073 | { /* summarize */ |
---|
3074 | printf("Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
3075 | putchar('\n'); |
---|
3076 | printf(" Between And Length"); |
---|
3077 | printf(" Approx. Confidence Limits\n"); |
---|
3078 | printf(" ------- --- ------"); |
---|
3079 | printf(" ------- ---------- ------\n"); |
---|
3080 | |
---|
3081 | describe(tr, tr->start->back->next->back); |
---|
3082 | describe(tr, tr->start->back->next->next->back); |
---|
3083 | describe(tr, tr->start); |
---|
3084 | putchar('\n'); |
---|
3085 | printf(" * = significantly positive, P < 0.05\n"); |
---|
3086 | printf(" ** = significantly positive, P < 0.01\n\n\n"); |
---|
3087 | } /* summarize */ |
---|
3088 | |
---|
3089 | |
---|
3090 | /*=========== This is a problem if tr->start->back is a tip! ===========*/ |
---|
3091 | /* All routines should be contrived so that tr->start->back is not a tip */ |
---|
3092 | |
---|
3093 | char *treeString (treestr, tr, p, form) |
---|
3094 | char *treestr; |
---|
3095 | tree *tr; |
---|
3096 | nodeptr p; |
---|
3097 | int form; |
---|
3098 | /* write string with representation of tree */ |
---|
3099 | /* form == 1 -> Newick tree */ |
---|
3100 | /* form == 2 -> Prolog fact */ |
---|
3101 | { /* treeString */ |
---|
3102 | double x, z; |
---|
3103 | char *nameptr; |
---|
3104 | int n, c; |
---|
3105 | |
---|
3106 | if (p == tr->start->back) { |
---|
3107 | if (form == 2) { |
---|
3108 | (void) sprintf(treestr, "phylip_tree("); |
---|
3109 | while (*treestr) treestr++; /* move pointer to null */ |
---|
3110 | } |
---|
3111 | |
---|
3112 | (void) sprintf(treestr, "[&&%s: version = '%s'", |
---|
3113 | programName, programVersion); |
---|
3114 | while (*treestr) treestr++; |
---|
3115 | |
---|
3116 | (void) sprintf(treestr, ", %s = %15.13g", |
---|
3117 | likelihood_key, tr->likelihood); |
---|
3118 | while (*treestr) treestr++; |
---|
3119 | |
---|
3120 | (void) sprintf(treestr, ", %s = %d", ntaxa_key, tr->ntips); |
---|
3121 | while (*treestr) treestr++; |
---|
3122 | |
---|
3123 | (void) sprintf(treestr,", %s = %d", opt_level_key, tr->opt_level); |
---|
3124 | while (*treestr) treestr++; |
---|
3125 | |
---|
3126 | (void) sprintf(treestr, ", %s = %d", smoothed_key, tr->smoothed); |
---|
3127 | while (*treestr) treestr++; |
---|
3128 | |
---|
3129 | (void) sprintf(treestr, "]%s", form == 2 ? ", " : " "); |
---|
3130 | while (*treestr) treestr++; |
---|
3131 | } |
---|
3132 | |
---|
3133 | if (p->tip) { |
---|
3134 | *treestr++ = '\''; |
---|
3135 | n = nmlngth; |
---|
3136 | nameptr = p->name + nmlngth - 1; |
---|
3137 | while (*nameptr-- == ' ' && n) n--; /* Trim trailing spaces */ |
---|
3138 | nameptr = p->name; |
---|
3139 | while (n--) { |
---|
3140 | if ((c = *nameptr++) == '\'') *treestr++ = '\''; |
---|
3141 | *treestr++ = c; |
---|
3142 | } |
---|
3143 | *treestr++ = '\''; |
---|
3144 | } |
---|
3145 | |
---|
3146 | else { |
---|
3147 | *treestr++ = '('; |
---|
3148 | treestr = treeString(treestr, tr, p->next->back, form); |
---|
3149 | *treestr++ = ','; |
---|
3150 | treestr = treeString(treestr, tr, p->next->next->back, form); |
---|
3151 | if (p == tr->start->back) { |
---|
3152 | *treestr++ = ','; |
---|
3153 | treestr = treeString(treestr, tr, p->back, form); |
---|
3154 | } |
---|
3155 | *treestr++ = ')'; |
---|
3156 | } |
---|
3157 | |
---|
3158 | if (p == tr->start->back) { |
---|
3159 | (void) sprintf(treestr, ":0.0%s\n", (form != 2) ? ";" : ")."); |
---|
3160 | } |
---|
3161 | else { |
---|
3162 | z = p->z; |
---|
3163 | if (z < zmin) z = zmin; |
---|
3164 | x = -log(z) * fracchange; |
---|
3165 | (void) sprintf(treestr, ": %8.6f", x); /* prolog needs the space */ |
---|
3166 | } |
---|
3167 | |
---|
3168 | while (*treestr) treestr++; /* move pointer up to null termination */ |
---|
3169 | return treestr; |
---|
3170 | } /* treeString */ |
---|
3171 | |
---|
3172 | |
---|
3173 | void treeOut (treefile, tr, form) |
---|
3174 | FILE *treefile; |
---|
3175 | tree *tr; |
---|
3176 | int form; |
---|
3177 | /* write out file with representation of final tree */ |
---|
3178 | { /* treeOut */ |
---|
3179 | int c; |
---|
3180 | char *cptr, treestr[maxsp*(nmlngth+24)]; |
---|
3181 | |
---|
3182 | (void) treeString(treestr, tr, tr->start->back, form); |
---|
3183 | cptr = treestr; |
---|
3184 | while (c = *cptr++) putc(c, treefile); |
---|
3185 | } /* treeOut */ |
---|
3186 | |
---|
3187 | |
---|
3188 | /*=======================================================================*/ |
---|
3189 | /* Read a tree from a file */ |
---|
3190 | /*=======================================================================*/ |
---|
3191 | |
---|
3192 | |
---|
3193 | int treeFinishCom () |
---|
3194 | { /* treeFinishCom */ |
---|
3195 | int ch; |
---|
3196 | boolean inquote; |
---|
3197 | |
---|
3198 | inquote = FALSE; |
---|
3199 | while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) { |
---|
3200 | if (ch == '[' && ! inquote) { /* comment; find its end */ |
---|
3201 | if ((ch = treeFinishCom()) == EOF) break; |
---|
3202 | } |
---|
3203 | else if (ch == '\'') inquote = ! inquote; /* start or end of quote */ |
---|
3204 | } |
---|
3205 | |
---|
3206 | return ch; |
---|
3207 | } /* treeFinishCom */ |
---|
3208 | |
---|
3209 | |
---|
3210 | int treeGetCh () |
---|
3211 | /* get next nonblank, noncomment character */ |
---|
3212 | { /* treeGetCh */ |
---|
3213 | int ch; |
---|
3214 | |
---|
3215 | while ((ch = getc(INFILE)) != EOF) { |
---|
3216 | if (white(ch)) ; |
---|
3217 | else if (ch == '[') { /* comment; find its end */ |
---|
3218 | if ((ch = treeFinishCom()) == EOF) break; |
---|
3219 | } |
---|
3220 | else break; |
---|
3221 | } |
---|
3222 | |
---|
3223 | return ch; |
---|
3224 | } /* treeGetCh */ |
---|
3225 | |
---|
3226 | |
---|
3227 | void treeFlushLabel () |
---|
3228 | { /* treeFlushLabel */ |
---|
3229 | int ch; |
---|
3230 | boolean done, quoted; |
---|
3231 | |
---|
3232 | if ((ch = treeGetCh()) == EOF) return; |
---|
3233 | done = (ch == ':' || ch == ',' || ch == ')' || ch == '[' || ch == ';'); |
---|
3234 | if (! done && (quoted = (ch == '\''))) ch = getc(INFILE); |
---|
3235 | |
---|
3236 | while (! done) { |
---|
3237 | if (quoted) { |
---|
3238 | if ((ch = findch('\'')) == EOF) return; /* find close quote */ |
---|
3239 | ch = getc(INFILE); /* check next char */ |
---|
3240 | if (ch != '\'') done = TRUE; /* not doubled quote */ |
---|
3241 | } |
---|
3242 | else if (ch == ':' || ch == ',' || ch == ')' || ch == '[' |
---|
3243 | || ch == ';' || ch == '\n' || ch == EOF) { |
---|
3244 | done = TRUE; |
---|
3245 | } |
---|
3246 | if (! done) done = ((ch = getc(INFILE)) == EOF); |
---|
3247 | } |
---|
3248 | |
---|
3249 | if (ch != EOF) (void) ungetc(ch, INFILE); |
---|
3250 | } /* treeFlushLabel */ |
---|
3251 | |
---|
3252 | |
---|
3253 | int findTipName (tr, ch) |
---|
3254 | tree *tr; |
---|
3255 | int ch; |
---|
3256 | { /* findTipName */ |
---|
3257 | nodeptr q; |
---|
3258 | char *nameptr, str[nmlngth+1]; |
---|
3259 | int i, n; |
---|
3260 | boolean found, quoted, done; |
---|
3261 | |
---|
3262 | if (quoted = (ch == '\'')) ch = getc(INFILE); |
---|
3263 | done = FALSE; |
---|
3264 | i = 0; |
---|
3265 | |
---|
3266 | do { |
---|
3267 | if (quoted) { |
---|
3268 | if (ch == '\'') { |
---|
3269 | ch = getc(INFILE); |
---|
3270 | if (ch != '\'') done = TRUE; |
---|
3271 | } |
---|
3272 | else if (ch == EOF) |
---|
3273 | done = TRUE; |
---|
3274 | else if (ch == '\n' || ch == '\t') |
---|
3275 | ch = ' '; |
---|
3276 | } |
---|
3277 | else if (ch == ':' || ch == ',' || ch == ')' || ch == '[' |
---|
3278 | || ch == '\n' || ch == EOF) |
---|
3279 | done = TRUE; |
---|
3280 | else if (ch == '_' || ch == '\t') |
---|
3281 | ch = ' '; |
---|
3282 | |
---|
3283 | if (! done) { |
---|
3284 | if (i < nmlngth) str[i++] = ch; |
---|
3285 | ch = getc(INFILE); |
---|
3286 | } |
---|
3287 | } while (! done); |
---|
3288 | |
---|
3289 | if (ch == EOF) { |
---|
3290 | printf("ERROR: End-of-file in tree species name\n"); |
---|
3291 | return 0; |
---|
3292 | } |
---|
3293 | |
---|
3294 | (void) ungetc(ch, INFILE); |
---|
3295 | while (i < nmlngth) str[i++] = ' '; /* Pad name */ |
---|
3296 | |
---|
3297 | n = 1; |
---|
3298 | do { |
---|
3299 | q = tr->nodep[n]; |
---|
3300 | if (! (q->back)) { /* Only consider unused tips */ |
---|
3301 | i = 0; |
---|
3302 | nameptr = q->name; |
---|
3303 | do {found = str[i] == *nameptr++;} while (found && (++i < nmlngth)); |
---|
3304 | } |
---|
3305 | else |
---|
3306 | found = FALSE; |
---|
3307 | } while ((! found) && (++n <= tr->mxtips)); |
---|
3308 | |
---|
3309 | if (! found) { |
---|
3310 | i = nmlngth; |
---|
3311 | do {str[i] = '\0';} while (i-- && (str[i] <= ' ')); |
---|
3312 | printf("ERROR: Cannot find data for tree species: %s\n", str); |
---|
3313 | } |
---|
3314 | |
---|
3315 | return (found ? n : 0); |
---|
3316 | } /* findTipName */ |
---|
3317 | |
---|
3318 | |
---|
3319 | double processLength () |
---|
3320 | { /* processLength */ |
---|
3321 | double branch; |
---|
3322 | int ch; |
---|
3323 | char string[41]; |
---|
3324 | |
---|
3325 | ch = treeGetCh(); /* Skip comments */ |
---|
3326 | if (ch != EOF) (void) ungetc(ch, INFILE); |
---|
3327 | |
---|
3328 | if (fscanf(INFILE, "%lf", &branch) != 1) { |
---|
3329 | printf("ERROR: Problem reading branch length in processLength:\n"); |
---|
3330 | if (fscanf(INFILE, "%40s", string) == 1) printf("%s\n", string); |
---|
3331 | anerror = TRUE; |
---|
3332 | branch = 0.0; |
---|
3333 | } |
---|
3334 | |
---|
3335 | return branch; |
---|
3336 | } /* processLength */ |
---|
3337 | |
---|
3338 | |
---|
3339 | void treeFlushLen () |
---|
3340 | { /* treeFlushLen */ |
---|
3341 | int ch; |
---|
3342 | |
---|
3343 | if ((ch = treeGetCh()) == ':') |
---|
3344 | (void) processLength(); |
---|
3345 | else if (ch != EOF) |
---|
3346 | (void) ungetc(ch, INFILE); |
---|
3347 | |
---|
3348 | } /* treeFlushLen */ |
---|
3349 | |
---|
3350 | |
---|
3351 | void treeNeedCh (c1, where) |
---|
3352 | int c1; |
---|
3353 | char *where; |
---|
3354 | { /* treeNeedCh */ |
---|
3355 | int c2, i; |
---|
3356 | |
---|
3357 | if ((c2 = treeGetCh()) == c1) return; |
---|
3358 | |
---|
3359 | printf("ERROR: Missing '%c' %s tree; ", c1, where); |
---|
3360 | if (c2 == EOF) |
---|
3361 | printf("End-of-File"); |
---|
3362 | else { |
---|
3363 | putchar('\''); |
---|
3364 | for (i = 24; i-- && (c2 != EOF); c2 = getc(INFILE)) putchar(c2); |
---|
3365 | putchar('\''); |
---|
3366 | } |
---|
3367 | printf(" found instead\n"); |
---|
3368 | anerror = TRUE; |
---|
3369 | } /* treeNeedCh */ |
---|
3370 | |
---|
3371 | |
---|
3372 | void addElementLen (tr, p) |
---|
3373 | tree *tr; |
---|
3374 | nodeptr p; |
---|
3375 | { /* addElementLen */ |
---|
3376 | double z, branch; |
---|
3377 | nodeptr q; |
---|
3378 | int n, ch; |
---|
3379 | |
---|
3380 | if ((ch = treeGetCh()) == '(') { /* A new internal node */ |
---|
3381 | n = (tr->nextnode)++; |
---|
3382 | if (n > 2*(tr->mxtips) - 2) { |
---|
3383 | if (tr->rooted || n > 2*(tr->mxtips) - 1) { |
---|
3384 | printf("ERROR: Too many internal nodes. Is tree rooted?\n"); |
---|
3385 | printf(" Deepest splitting should be a trifurcation.\n"); |
---|
3386 | anerror = TRUE; |
---|
3387 | return; |
---|
3388 | } |
---|
3389 | else { |
---|
3390 | tr->rooted = TRUE; |
---|
3391 | } |
---|
3392 | } |
---|
3393 | q = tr->nodep[n]; |
---|
3394 | addElementLen(tr, q->next); if (anerror) return; |
---|
3395 | treeNeedCh(',', "in"); if (anerror) return; |
---|
3396 | addElementLen(tr, q->next->next); if (anerror) return; |
---|
3397 | treeNeedCh(')', "in"); if (anerror) return; |
---|
3398 | treeFlushLabel(); if (anerror) return; |
---|
3399 | } |
---|
3400 | |
---|
3401 | else { /* A new tip */ |
---|
3402 | n = findTipName(tr, ch); |
---|
3403 | if (n <= 0) {anerror = TRUE; return; } |
---|
3404 | q = tr->nodep[n]; |
---|
3405 | if (tr->start->number > n) tr->start = q; |
---|
3406 | (tr->ntips)++; |
---|
3407 | } /* End of tip processing */ |
---|
3408 | |
---|
3409 | if (tr->userlen) { |
---|
3410 | treeNeedCh(':', "in"); if (anerror) return; |
---|
3411 | branch = processLength(); if (anerror) return; |
---|
3412 | z = exp(-branch / fracchange); |
---|
3413 | if (z > zmax) z = zmax; |
---|
3414 | hookup(p, q, z); |
---|
3415 | } |
---|
3416 | else { |
---|
3417 | treeFlushLen(); if (anerror) return; |
---|
3418 | hookup(p, q, defaultz); |
---|
3419 | } |
---|
3420 | } /* addElementLen */ |
---|
3421 | |
---|
3422 | |
---|
3423 | int saveTreeCom (comstrp) |
---|
3424 | char **comstrp; |
---|
3425 | { /* saveTreeCom */ |
---|
3426 | int ch; |
---|
3427 | boolean inquote; |
---|
3428 | |
---|
3429 | inquote = FALSE; |
---|
3430 | while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) { |
---|
3431 | *(*comstrp)++ = ch; /* save character */ |
---|
3432 | if (ch == '[' && ! inquote) { /* comment; find its end */ |
---|
3433 | if ((ch = saveTreeCom(comstrp)) == EOF) break; |
---|
3434 | *(*comstrp)++ = ch; /* add ] */ |
---|
3435 | } |
---|
3436 | else if (ch == '\'') inquote = ! inquote; /* start or end of quote */ |
---|
3437 | } |
---|
3438 | |
---|
3439 | return ch; |
---|
3440 | } /* saveTreeCom */ |
---|
3441 | |
---|
3442 | |
---|
3443 | boolean processTreeCom(tr) |
---|
3444 | tree *tr; |
---|
3445 | { /* processTreeCom */ |
---|
3446 | int ch, text_started, functor_read, com_open; |
---|
3447 | |
---|
3448 | /* Accept prefatory "phylip_tree(" or "pseudoNewick(" */ |
---|
3449 | |
---|
3450 | functor_read = text_started = 0; |
---|
3451 | fscanf(INFILE, " p%nhylip_tree(%n", &text_started, &functor_read); |
---|
3452 | if (text_started && ! functor_read) { |
---|
3453 | fscanf(INFILE, "seudoNewick(%n", &functor_read); |
---|
3454 | if (! functor_read) { |
---|
3455 | printf("Start of tree 'p...' not understood.\n"); |
---|
3456 | anerror = TRUE; |
---|
3457 | return; |
---|
3458 | } |
---|
3459 | } |
---|
3460 | |
---|
3461 | com_open = 0; |
---|
3462 | fscanf(INFILE, " [%n", &com_open); |
---|
3463 | |
---|
3464 | if (com_open) { /* comment; read it */ |
---|
3465 | char com[1024], *com_end; |
---|
3466 | |
---|
3467 | com_end = com; |
---|
3468 | if (saveTreeCom(&com_end) == EOF) { /* omits enclosing []s */ |
---|
3469 | printf("Missing end of tree comment.\n"); |
---|
3470 | anerror = TRUE; |
---|
3471 | return; |
---|
3472 | } |
---|
3473 | |
---|
3474 | *com_end = 0; |
---|
3475 | (void) readKeyValue(com, likelihood_key, "%lg", |
---|
3476 | (void *) &(tr->likelihood)); |
---|
3477 | (void) readKeyValue(com, opt_level_key, "%d", |
---|
3478 | (void *) &(tr->opt_level)); |
---|
3479 | (void) readKeyValue(com, smoothed_key, "%d", |
---|
3480 | (void *) &(tr->smoothed)); |
---|
3481 | |
---|
3482 | if (functor_read) fscanf(INFILE, " ,"); /* remove trailing comma */ |
---|
3483 | } |
---|
3484 | |
---|
3485 | return (functor_read > 0); |
---|
3486 | } /* processTreeCom */ |
---|
3487 | |
---|
3488 | |
---|
3489 | void uprootTree (tr, p) |
---|
3490 | tree *tr; |
---|
3491 | nodeptr p; |
---|
3492 | { /* uprootTree */ |
---|
3493 | nodeptr q, r, s; |
---|
3494 | int n; |
---|
3495 | |
---|
3496 | if (p->tip || p->back) { |
---|
3497 | printf("ERROR: Unable to uproot tree.\n"); |
---|
3498 | printf(" Inappropriate node marked for removal.\n"); |
---|
3499 | anerror = TRUE; |
---|
3500 | return; |
---|
3501 | } |
---|
3502 | |
---|
3503 | n = --(tr->nextnode); /* last internal node added */ |
---|
3504 | if (n != tr->mxtips + tr->ntips - 1) { |
---|
3505 | printf("ERROR: Unable to uproot tree. Inconsistent\n"); |
---|
3506 | printf(" number of tips and nodes for rooted tree.\n"); |
---|
3507 | anerror = TRUE; |
---|
3508 | return; |
---|
3509 | } |
---|
3510 | |
---|
3511 | q = p->next->back; /* remove p from tree */ |
---|
3512 | r = p->next->next->back; |
---|
3513 | hookup(q, r, tr->userlen ? (q->z * r->z) : defaultz); |
---|
3514 | |
---|
3515 | q = tr->nodep[n]; |
---|
3516 | r = q->next; |
---|
3517 | s = q->next->next; |
---|
3518 | if (tr->ntips > 2 && p != q && p != r && p != s) { |
---|
3519 | hookup(p, q->back, q->z); /* move connections to p */ |
---|
3520 | hookup(p->next, r->back, r->z); |
---|
3521 | hookup(p->next->next, s->back, s->z); |
---|
3522 | } |
---|
3523 | |
---|
3524 | q->back = r->back = s->back = (nodeptr) NULL; |
---|
3525 | tr->rooted = FALSE; |
---|
3526 | } /* uprootTree */ |
---|
3527 | |
---|
3528 | |
---|
3529 | void treeReadLen (tr) |
---|
3530 | tree *tr; |
---|
3531 | { /* treeReadLen */ |
---|
3532 | nodeptr p; |
---|
3533 | int i, ch; |
---|
3534 | boolean is_fact, found; |
---|
3535 | |
---|
3536 | for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL; |
---|
3537 | tr->start = tr->nodep[tr->mxtips]; |
---|
3538 | tr->ntips = 0; |
---|
3539 | tr->nextnode = tr->mxtips + 1; |
---|
3540 | tr->opt_level = 0; |
---|
3541 | tr->log_f_valid = 0; |
---|
3542 | tr->smoothed = FALSE; |
---|
3543 | tr->rooted = FALSE; |
---|
3544 | |
---|
3545 | is_fact = processTreeCom(tr); |
---|
3546 | |
---|
3547 | p = tr->nodep[(tr->nextnode)++]; |
---|
3548 | treeNeedCh('(', "at start of"); if (anerror) return; |
---|
3549 | addElementLen(tr, p); if (anerror) return; |
---|
3550 | treeNeedCh(',', "in"); if (anerror) return; |
---|
3551 | addElementLen(tr, p->next); if (anerror) return; |
---|
3552 | if (! tr->rooted) { |
---|
3553 | if ((ch = treeGetCh()) == ',') { /* An unrooted format */ |
---|
3554 | addElementLen(tr, p->next->next); if (anerror) return; |
---|
3555 | } |
---|
3556 | else { /* A rooted format */ |
---|
3557 | p->next->next->back = (nodeptr) NULL; |
---|
3558 | tr->rooted = TRUE; |
---|
3559 | if (ch != EOF) (void) ungetc(ch, INFILE); |
---|
3560 | } |
---|
3561 | } |
---|
3562 | treeNeedCh(')', "in"); if (anerror) return; |
---|
3563 | treeFlushLabel(); if (anerror) return; |
---|
3564 | treeFlushLen(); if (anerror) return; |
---|
3565 | if (is_fact) { |
---|
3566 | treeNeedCh(')', "at end of"); if (anerror) return; |
---|
3567 | treeNeedCh('.', "at end of"); if (anerror) return; |
---|
3568 | } |
---|
3569 | else { |
---|
3570 | treeNeedCh(';', "at end of"); if (anerror) return; |
---|
3571 | } |
---|
3572 | |
---|
3573 | if (tr->rooted) uprootTree(tr, p->next->next); if (anerror) return; |
---|
3574 | tr->start = p->next->next->back; /* This is start used by treeString */ |
---|
3575 | |
---|
3576 | initrav(tr->start); |
---|
3577 | initrav(tr->start->back); |
---|
3578 | } /* treeReadLen */ |
---|
3579 | |
---|
3580 | |
---|
3581 | /*=======================================================================*/ |
---|
3582 | /* Read a tree from a string */ |
---|
3583 | /*=======================================================================*/ |
---|
3584 | |
---|
3585 | |
---|
3586 | #if Master || Slave |
---|
3587 | int str_treeFinishCom (treestrp) |
---|
3588 | char **treestrp; /* tree string pointer */ |
---|
3589 | { /* str_treeFinishCom */ |
---|
3590 | int ch; |
---|
3591 | boolean inquote; |
---|
3592 | |
---|
3593 | inquote = FALSE; |
---|
3594 | while ((ch = *(*treestrp)++) != NULL && (inquote || ch != ']')) { |
---|
3595 | if (ch == '[' && ! inquote) { /* comment; find its end */ |
---|
3596 | if ((ch = str_treeFinishCom(treestrp)) == NULL) break; |
---|
3597 | } |
---|
3598 | else if (ch == '\'') inquote = ! inquote; /* start or end of quote */ |
---|
3599 | } |
---|
3600 | return ch; |
---|
3601 | } /* str_treeFinishCom */ |
---|
3602 | |
---|
3603 | |
---|
3604 | int str_treeGetCh (treestrp) |
---|
3605 | char **treestrp; /* tree string pointer */ |
---|
3606 | /* get next nonblank, noncomment character */ |
---|
3607 | { /* str_treeGetCh */ |
---|
3608 | int ch; |
---|
3609 | |
---|
3610 | while ((ch = *(*treestrp)++) != NULL) { |
---|
3611 | if (white(ch)) ; |
---|
3612 | else if (ch == '[') { /* comment; find its end */ |
---|
3613 | if ((ch = str_treeFinishCom(treestrp)) == NULL) break; |
---|
3614 | } |
---|
3615 | else break; |
---|
3616 | } |
---|
3617 | |
---|
3618 | return ch; |
---|
3619 | } /* str_treeGetCh */ |
---|
3620 | |
---|
3621 | |
---|
3622 | void str_treeFlushLabel (treestrp) |
---|
3623 | char **treestrp; /* tree string pointer */ |
---|
3624 | { /* str_treeFlushLabel */ |
---|
3625 | int ch; |
---|
3626 | boolean done, quoted; |
---|
3627 | |
---|
3628 | if ((ch = str_treeGetCh(treestrp)) == NULL) done = TRUE; |
---|
3629 | else { |
---|
3630 | done = (ch == ':' || ch == ',' || ch == ')' || ch == '[' || ch == ';'); |
---|
3631 | if (! done && (quoted = (ch == '\''))) ch = *(*treestrp)++; |
---|
3632 | } |
---|
3633 | |
---|
3634 | while (! done) { |
---|
3635 | if (quoted) { |
---|
3636 | if ((ch = str_findch(treestrp, '\'')) == NULL) done = TRUE; |
---|
3637 | else { |
---|
3638 | ch = *(*treestrp)++; /* check next char */ |
---|
3639 | if (ch != '\'') done = TRUE; /* not doubled quote */ |
---|
3640 | } |
---|
3641 | } |
---|
3642 | else if (ch == ':' || ch == ',' || ch == ')' || ch == '[' |
---|
3643 | || ch == ';' || ch == '\n' || ch == NULL) { |
---|
3644 | done = TRUE; |
---|
3645 | } |
---|
3646 | if (! done) done = ((ch = *(*treestrp)++) == NULL); |
---|
3647 | } |
---|
3648 | |
---|
3649 | (*treestrp)--; |
---|
3650 | } /* str_treeFlushLabel */ |
---|
3651 | |
---|
3652 | |
---|
3653 | int str_findTipName (treestrp, tr, ch) |
---|
3654 | char **treestrp; /* tree string pointer */ |
---|
3655 | tree *tr; |
---|
3656 | int ch; |
---|
3657 | { /* str_findTipName */ |
---|
3658 | nodeptr q; |
---|
3659 | char *nameptr, str[nmlngth+1]; |
---|
3660 | int i, n; |
---|
3661 | boolean found, quoted, done; |
---|
3662 | |
---|
3663 | i = 0; |
---|
3664 | if (quoted = (ch == '\'')) ch = *(*treestrp)++; |
---|
3665 | done = FALSE; |
---|
3666 | |
---|
3667 | do { |
---|
3668 | if (quoted) { |
---|
3669 | if (ch == '\'') { |
---|
3670 | ch = *(*treestrp)++; |
---|
3671 | if (ch != '\'') done = TRUE; |
---|
3672 | } |
---|
3673 | else if (ch == NULL) |
---|
3674 | done = TRUE; |
---|
3675 | else if (ch == '\n' || ch == '\t') |
---|
3676 | ch = ' '; |
---|
3677 | } |
---|
3678 | else if (ch == ':' || ch == ',' || ch == ')' || ch == '[' |
---|
3679 | || ch == '\n' || ch == NULL) |
---|
3680 | done = TRUE; |
---|
3681 | else if (ch == '_' || ch == '\t') |
---|
3682 | ch = ' '; |
---|
3683 | |
---|
3684 | if (! done) { |
---|
3685 | if (i < nmlngth) str[i++] = ch; |
---|
3686 | ch = *(*treestrp)++; |
---|
3687 | } |
---|
3688 | } while (! done); |
---|
3689 | |
---|
3690 | (*treestrp)--; |
---|
3691 | if (ch == NULL) { |
---|
3692 | printf("ERROR: NULL in tree species name\n"); |
---|
3693 | return 0; |
---|
3694 | } |
---|
3695 | |
---|
3696 | while (i < nmlngth) str[i++] = ' '; /* Pad name */ |
---|
3697 | |
---|
3698 | n = 1; |
---|
3699 | do { |
---|
3700 | q = tr->nodep[n]; |
---|
3701 | if (! (q->back)) { /* Only consider unused tips */ |
---|
3702 | i = 0; |
---|
3703 | nameptr = q->name; |
---|
3704 | do {found = str[i] == *nameptr++;} while (found && (++i < nmlngth)); |
---|
3705 | } |
---|
3706 | else |
---|
3707 | found = FALSE; |
---|
3708 | } while ((! found) && (++n <= tr->mxtips)); |
---|
3709 | |
---|
3710 | if (! found) { |
---|
3711 | i = nmlngth; |
---|
3712 | do {str[i] = '\0';} while (i-- && (str[i] <= ' ')); |
---|
3713 | printf("ERROR: Cannot find data for tree species: %s\n", str); |
---|
3714 | } |
---|
3715 | |
---|
3716 | return (found ? n : 0); |
---|
3717 | } /* str_findTipName */ |
---|
3718 | |
---|
3719 | |
---|
3720 | double str_processLength (treestrp) |
---|
3721 | char **treestrp; /* tree string ponter */ |
---|
3722 | { /* str_processLength */ |
---|
3723 | double branch; |
---|
3724 | int used; |
---|
3725 | |
---|
3726 | (void) str_treeGetCh(treestrp); /* Skip comments */ |
---|
3727 | (*treestrp)--; |
---|
3728 | |
---|
3729 | if (sscanf(*treestrp, "%lf%n", &branch, &used) != 1) { |
---|
3730 | printf("ERROR: Problem reading branch length in str_processLength:\n"); |
---|
3731 | printf("%40s\n", *treestrp); |
---|
3732 | anerror = TRUE; |
---|
3733 | branch = 0.0; |
---|
3734 | } |
---|
3735 | else { |
---|
3736 | *treestrp += used; |
---|
3737 | } |
---|
3738 | |
---|
3739 | return branch; |
---|
3740 | } /* str_processLength */ |
---|
3741 | |
---|
3742 | |
---|
3743 | void str_treeFlushLen (treestrp) |
---|
3744 | char **treestrp; /* tree string ponter */ |
---|
3745 | { /* str_treeFlushLen */ |
---|
3746 | int ch; |
---|
3747 | |
---|
3748 | if ((ch = str_treeGetCh(treestrp)) == ':') |
---|
3749 | (void) str_processLength(treestrp); |
---|
3750 | else |
---|
3751 | (*treestrp)--; |
---|
3752 | |
---|
3753 | } /* str_treeFlushLen */ |
---|
3754 | |
---|
3755 | |
---|
3756 | void str_treeNeedCh (treestrp, c1, where) |
---|
3757 | char **treestrp; /* tree string pointer */ |
---|
3758 | int c1; |
---|
3759 | char *where; |
---|
3760 | { /* str_treeNeedCh */ |
---|
3761 | int c2, i; |
---|
3762 | |
---|
3763 | if ((c2 = str_treeGetCh(treestrp)) == c1) return; |
---|
3764 | |
---|
3765 | printf("ERROR: Missing '%c' %s tree; ", c1, where); |
---|
3766 | if (c2 == NULL) |
---|
3767 | printf("NULL"); |
---|
3768 | else { |
---|
3769 | putchar('\''); |
---|
3770 | for (i = 24; i-- && (c2 != NULL); c2 = *(*treestrp)++) putchar(c2); |
---|
3771 | putchar('\''); |
---|
3772 | } |
---|
3773 | printf(" found instead\n"); |
---|
3774 | anerror = TRUE; |
---|
3775 | } /* str_treeNeedCh */ |
---|
3776 | |
---|
3777 | |
---|
3778 | void str_addElementLen (treestrp, tr, p) |
---|
3779 | char **treestrp; /* tree string pointer */ |
---|
3780 | tree *tr; |
---|
3781 | nodeptr p; |
---|
3782 | { /* str_addElementLen */ |
---|
3783 | double z, branch; |
---|
3784 | nodeptr q; |
---|
3785 | int n, ch; |
---|
3786 | |
---|
3787 | if ((ch = str_treeGetCh(treestrp)) == '(') { /* A new internal node */ |
---|
3788 | n = (tr->nextnode)++; |
---|
3789 | if (n > 2*(tr->mxtips) - 2) { |
---|
3790 | if (tr->rooted || n > 2*(tr->mxtips) - 1) { |
---|
3791 | printf("ERROR: too many internal nodes. Is tree rooted?\n"); |
---|
3792 | printf("Deepest splitting should be a trifurcation.\n"); |
---|
3793 | anerror = TRUE; |
---|
3794 | return; |
---|
3795 | } |
---|
3796 | else { |
---|
3797 | tr->rooted = TRUE; |
---|
3798 | } |
---|
3799 | } |
---|
3800 | q = tr->nodep[n]; |
---|
3801 | str_addElementLen(treestrp, tr, q->next); if (anerror) return; |
---|
3802 | str_treeNeedCh(treestrp, ',', "in"); if (anerror) return; |
---|
3803 | str_addElementLen(treestrp, tr, q->next->next); if (anerror) return; |
---|
3804 | str_treeNeedCh(treestrp, ')', "in"); if (anerror) return; |
---|
3805 | str_treeFlushLabel(treestrp); if (anerror) return; |
---|
3806 | } |
---|
3807 | |
---|
3808 | else { /* A new tip */ |
---|
3809 | n = str_findTipName(treestrp, tr, ch); |
---|
3810 | if (n <= 0) {anerror = TRUE; return; } |
---|
3811 | q = tr->nodep[n]; |
---|
3812 | if (tr->start->number > n) tr->start = q; |
---|
3813 | (tr->ntips)++; |
---|
3814 | } /* End of tip processing */ |
---|
3815 | |
---|
3816 | /* Master and Slave always use lengths */ |
---|
3817 | |
---|
3818 | str_treeNeedCh(treestrp, ':', "in"); if (anerror) return; |
---|
3819 | branch = str_processLength(treestrp); if (anerror) return; |
---|
3820 | z = exp(-branch / fracchange); |
---|
3821 | if (z > zmax) z = zmax; |
---|
3822 | hookup(p, q, z); |
---|
3823 | |
---|
3824 | return; |
---|
3825 | } /* str_addElementLen */ |
---|
3826 | |
---|
3827 | |
---|
3828 | boolean str_processTreeCom(tr, treestrp) |
---|
3829 | tree *tr; |
---|
3830 | char **treestrp; |
---|
3831 | { /* str_processTreeCom */ |
---|
3832 | char *com, *com_end; |
---|
3833 | int text_started, functor_read, com_open; |
---|
3834 | |
---|
3835 | com = *treestrp; |
---|
3836 | |
---|
3837 | functor_read = text_started = 0; |
---|
3838 | sscanf(com, " p%nhylip_tree(%n", &text_started, &functor_read); |
---|
3839 | if (functor_read) { |
---|
3840 | com += functor_read; |
---|
3841 | } |
---|
3842 | else if (text_started) { |
---|
3843 | com += text_started; |
---|
3844 | sscanf(com, "seudoNewick(%n", &functor_read); |
---|
3845 | if (! functor_read) { |
---|
3846 | printf("Start of tree 'p...' not understood.\n"); |
---|
3847 | anerror = TRUE; |
---|
3848 | return; |
---|
3849 | } |
---|
3850 | else { |
---|
3851 | com += functor_read; |
---|
3852 | } |
---|
3853 | } |
---|
3854 | |
---|
3855 | com_open = 0; |
---|
3856 | sscanf(com, " [%n", &com_open); |
---|
3857 | com += com_open; |
---|
3858 | |
---|
3859 | if (com_open) { /* comment; read it */ |
---|
3860 | if (!(com_end = index(com, ']'))) { |
---|
3861 | printf("Missing end of tree comment.\n"); |
---|
3862 | anerror = TRUE; |
---|
3863 | return; |
---|
3864 | } |
---|
3865 | |
---|
3866 | *com_end = 0; |
---|
3867 | (void) readKeyValue(com, likelihood_key, "%lg", |
---|
3868 | (void *) &(tr->likelihood)); |
---|
3869 | (void) readKeyValue(com, opt_level_key, "%d", |
---|
3870 | (void *) &(tr->opt_level)); |
---|
3871 | (void) readKeyValue(com, smoothed_key, "%d", |
---|
3872 | (void *) &(tr->smoothed)); |
---|
3873 | *com_end = ']'; |
---|
3874 | com_end++; |
---|
3875 | |
---|
3876 | if (functor_read) { /* remove trailing comma */ |
---|
3877 | text_started = 0; |
---|
3878 | sscanf(com_end, " ,%n", &text_started); |
---|
3879 | com_end += text_started; |
---|
3880 | } |
---|
3881 | |
---|
3882 | *treestrp = com_end; |
---|
3883 | } |
---|
3884 | |
---|
3885 | return (functor_read > 0); |
---|
3886 | } /* str_processTreeCom */ |
---|
3887 | |
---|
3888 | |
---|
3889 | void str_treeReadLen (treestr, tr) |
---|
3890 | char *treestr; |
---|
3891 | tree *tr; |
---|
3892 | /* read string with representation of tree */ |
---|
3893 | { /* str_treeReadLen */ |
---|
3894 | nodeptr p; |
---|
3895 | int i; |
---|
3896 | boolean is_fact, found; |
---|
3897 | |
---|
3898 | for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL; |
---|
3899 | tr->start = tr->nodep[tr->mxtips]; |
---|
3900 | tr->ntips = 0; |
---|
3901 | tr->nextnode = tr->mxtips + 1; |
---|
3902 | tr->opt_level = 0; |
---|
3903 | tr->log_f_valid = 0; |
---|
3904 | tr->smoothed = Master; |
---|
3905 | tr->rooted = FALSE; |
---|
3906 | |
---|
3907 | is_fact = str_processTreeCom(tr, &treestr); |
---|
3908 | |
---|
3909 | p = tr->nodep[(tr->nextnode)++]; |
---|
3910 | str_treeNeedCh(&treestr, '(', "at start of"); if (anerror) return; |
---|
3911 | str_addElementLen(&treestr, tr, p); if (anerror) return; |
---|
3912 | str_treeNeedCh(&treestr, ',', "in"); if (anerror) return; |
---|
3913 | str_addElementLen(&treestr, tr, p->next); if (anerror) return; |
---|
3914 | if (! tr->rooted) { |
---|
3915 | if (str_treeGetCh(&treestr) == ',') { /* An unrooted format */ |
---|
3916 | str_addElementLen(&treestr, tr, p->next->next); |
---|
3917 | if (anerror) return; |
---|
3918 | } |
---|
3919 | else { /* A rooted format */ |
---|
3920 | p->next->next->back = (nodeptr) NULL; |
---|
3921 | tr->rooted = TRUE; |
---|
3922 | treestr--; |
---|
3923 | } |
---|
3924 | } |
---|
3925 | str_treeNeedCh(&treestr, ')', "in"); if (anerror) return; |
---|
3926 | str_treeFlushLabel(&treestr); if (anerror) return; |
---|
3927 | str_treeFlushLen(&treestr); if (anerror) return; |
---|
3928 | if (is_fact) { |
---|
3929 | str_treeNeedCh(&treestr, ')', "at end of"); if (anerror) return; |
---|
3930 | str_treeNeedCh(&treestr, '.', "at end of"); if (anerror) return; |
---|
3931 | } |
---|
3932 | else { |
---|
3933 | str_treeNeedCh(&treestr, ';', "at end of"); if (anerror) return; |
---|
3934 | } |
---|
3935 | |
---|
3936 | if (tr->rooted) uprootTree(tr, p->next->next); if (anerror) return; |
---|
3937 | tr->start = p->next->next->back; /* This is start used by treeString */ |
---|
3938 | |
---|
3939 | initrav(tr->start); |
---|
3940 | initrav(tr->start->back); |
---|
3941 | } /* str_treeReadLen */ |
---|
3942 | #endif |
---|
3943 | |
---|
3944 | |
---|
3945 | void treeEvaluate (tr, bt) /* Evaluate a user tree */ |
---|
3946 | tree *tr; |
---|
3947 | bestlist *bt; |
---|
3948 | { /* treeEvaluate */ |
---|
3949 | |
---|
3950 | if (Slave || ! tr->userlen) smoothTree(tr, 4 * smoothings); |
---|
3951 | |
---|
3952 | (void) evaluate(tr, tr->start); |
---|
3953 | if (anerror) return; |
---|
3954 | |
---|
3955 | # if ! Slave |
---|
3956 | (void) saveBestTree(bt, tr); |
---|
3957 | # endif |
---|
3958 | } /* treeEvaluate */ |
---|
3959 | |
---|
3960 | |
---|
3961 | #if Master || Slave |
---|
3962 | FILE *freopen_pid (filenm, mode, stream) |
---|
3963 | char *filenm, *mode; |
---|
3964 | FILE *stream; |
---|
3965 | { /* freopen_pid */ |
---|
3966 | char scr[512]; |
---|
3967 | |
---|
3968 | (void) sprintf(scr, "%s.%d", filenm, getpid()); |
---|
3969 | return freopen(scr, mode, stream); |
---|
3970 | } /* freopen_pid */ |
---|
3971 | #endif |
---|
3972 | |
---|
3973 | |
---|
3974 | void showBestTrees (bt, tr, treefile) |
---|
3975 | bestlist *bt; |
---|
3976 | tree *tr; |
---|
3977 | FILE *treefile; |
---|
3978 | { /* showBestTrees */ |
---|
3979 | int rank; |
---|
3980 | |
---|
3981 | for (rank = 1; rank <= bt->nvalid; rank++) { |
---|
3982 | if (rank > 1) { |
---|
3983 | if (rank != recallBestTree(bt, rank, tr)) break; |
---|
3984 | } |
---|
3985 | (void) evaluate(tr, tr->start); |
---|
3986 | if (anerror) return; |
---|
3987 | if (tr->outgrnode->back) tr->start = tr->outgrnode; |
---|
3988 | printTree(tr); |
---|
3989 | summarize(tr); |
---|
3990 | if (treefile) treeOut(treefile, tr, trout); |
---|
3991 | } |
---|
3992 | } /* showBestTrees */ |
---|
3993 | |
---|
3994 | |
---|
3995 | void cmpBestTrees (bt, tr) |
---|
3996 | bestlist *bt; |
---|
3997 | tree *tr; |
---|
3998 | { /* cmpBestTrees */ |
---|
3999 | double sum, sum2, sd, temp, bestscore; |
---|
4000 | double log_f0[maxpatterns]; /* Save a copy of best log_f */ |
---|
4001 | double *log_f_ptr, *log_f0_ptr; |
---|
4002 | int i, j, num, besttips; |
---|
4003 | |
---|
4004 | num = bt->nvalid; |
---|
4005 | if ((num <= 1) || (weightsum <= 1)) return; |
---|
4006 | |
---|
4007 | printf("Tree Ln L Diff Ln L Its S.D."); |
---|
4008 | printf(" Significantly worse?\n\n"); |
---|
4009 | |
---|
4010 | for (i = 1; i <= num; i++) { |
---|
4011 | if (i != recallBestTree(bt, i, tr)) break; |
---|
4012 | if (! (tr->log_f_valid)) (void) evaluate(tr, tr->start); |
---|
4013 | |
---|
4014 | printf("%3d%14.5f", i, tr->likelihood); |
---|
4015 | if (i == 1) { |
---|
4016 | printf(" <------ best\n"); |
---|
4017 | besttips = tr->ntips; |
---|
4018 | bestscore = tr->likelihood; |
---|
4019 | log_f0_ptr = log_f0; |
---|
4020 | log_f_ptr = tr->log_f; |
---|
4021 | for (j = 0; j < endsite; j++) *log_f0_ptr++ = *log_f_ptr++; |
---|
4022 | } |
---|
4023 | else if (tr->ntips != besttips) |
---|
4024 | printf(" (different number of species)\n"); |
---|
4025 | else { |
---|
4026 | sum = sum2 = 0.0; |
---|
4027 | log_f0_ptr = log_f0; |
---|
4028 | log_f_ptr = tr->log_f; |
---|
4029 | for (j = 0; j < endsite; j++) { |
---|
4030 | temp = *log_f0_ptr++ - *log_f_ptr++; |
---|
4031 | sum += aliasweight[j] * temp; |
---|
4032 | sum2 += aliasweight[j] * temp * temp; |
---|
4033 | } |
---|
4034 | sd = sqrt( weightsum * (sum2 - sum*sum/weightsum) / (weightsum-1) ); |
---|
4035 | printf("%14.5f%14.4f", tr->likelihood - bestscore, sd); |
---|
4036 | printf(" %s\n", (sum > 1.95996 * sd) ? "Yes" : " No"); |
---|
4037 | } |
---|
4038 | } |
---|
4039 | |
---|
4040 | printf("\n\n"); |
---|
4041 | } /* cmpBestTrees */ |
---|
4042 | |
---|
4043 | |
---|
4044 | void makeUserTree (tr, bt) |
---|
4045 | tree *tr; |
---|
4046 | bestlist *bt; |
---|
4047 | { /* makeUserTree */ |
---|
4048 | FILE *treefile; |
---|
4049 | int nusertrees, which; |
---|
4050 | |
---|
4051 | if (fscanf(INFILE, "%d", &nusertrees) != 1 || findch('\n') == EOF) { |
---|
4052 | printf("ERROR: Problem reading number of user trees\n"); |
---|
4053 | anerror = TRUE; |
---|
4054 | return; |
---|
4055 | } |
---|
4056 | |
---|
4057 | printf("User-defined %s:\n\n", (nusertrees == 1) ? "tree" : "trees"); |
---|
4058 | |
---|
4059 | treefile = trout ? fopen_pid("treefile","w") : (FILE *) NULL; |
---|
4060 | |
---|
4061 | for (which = 1; which <= nusertrees; which++) { |
---|
4062 | treeReadLen(tr); if (anerror) return; |
---|
4063 | treeEvaluate(tr, bt); if (anerror) return; |
---|
4064 | if (tr->global <= 0) { |
---|
4065 | if (tr->outgrnode->back) tr->start = tr->outgrnode; |
---|
4066 | printTree(tr); |
---|
4067 | summarize(tr); |
---|
4068 | if (treefile) treeOut(treefile, tr, trout); |
---|
4069 | } |
---|
4070 | else { |
---|
4071 | printf("%6d: Ln Likelihood =%14.5f\n", which, tr->likelihood); |
---|
4072 | } |
---|
4073 | } |
---|
4074 | |
---|
4075 | if (tr->global > 0) { |
---|
4076 | putchar('\n'); |
---|
4077 | if (! recallBestTree(bt, 1, tr)) anerror = TRUE; |
---|
4078 | if (anerror) return; |
---|
4079 | printf(" Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
4080 | optimize(tr, tr->global, bt); |
---|
4081 | if (anerror) return; |
---|
4082 | if (tr->outgrnode->back) tr->start = tr->outgrnode; |
---|
4083 | printTree(tr); |
---|
4084 | summarize(tr); |
---|
4085 | if (treefile) treeOut(treefile, tr, trout); |
---|
4086 | } |
---|
4087 | |
---|
4088 | if (treefile) { |
---|
4089 | (void) fclose(treefile); |
---|
4090 | printf("Tree also written to file\n"); |
---|
4091 | } |
---|
4092 | |
---|
4093 | putchar('\n'); |
---|
4094 | if (anerror) return; |
---|
4095 | |
---|
4096 | cmpBestTrees(bt, tr); |
---|
4097 | } /* makeUserTree */ |
---|
4098 | |
---|
4099 | |
---|
4100 | #if Slave |
---|
4101 | void slaveTreeEvaluate (tr, bt) |
---|
4102 | tree *tr; |
---|
4103 | bestlist *bt; |
---|
4104 | { /* slaveTreeEvaluate */ |
---|
4105 | boolean done; |
---|
4106 | |
---|
4107 | do { |
---|
4108 | requestForWork(); |
---|
4109 | receiveTree(&comm_master, tr); |
---|
4110 | done = tr->likelihood > 0.0; |
---|
4111 | if (! done) { |
---|
4112 | treeEvaluate(tr, bt); if (anerror) return; |
---|
4113 | sendTree(&comm_master, tr); if (anerror) return; |
---|
4114 | } |
---|
4115 | } while (! done); |
---|
4116 | } /* slaveTreeEvaluate */ |
---|
4117 | #endif |
---|
4118 | |
---|
4119 | |
---|
4120 | double randum (seed) |
---|
4121 | longer seed; |
---|
4122 | /* random number generator */ |
---|
4123 | { /* randum */ |
---|
4124 | int i, j, k, sum, mult[4]; |
---|
4125 | longer newseed; |
---|
4126 | double x; |
---|
4127 | |
---|
4128 | mult[0] = 13; |
---|
4129 | mult[1] = 24; |
---|
4130 | mult[2] = 22; |
---|
4131 | mult[3] = 6; |
---|
4132 | for (i = 0; i <= 5; i++) newseed[i] = 0; |
---|
4133 | for (i = 0; i <= 5; i++) { |
---|
4134 | sum = newseed[i]; |
---|
4135 | k = MIN(i,3); |
---|
4136 | for (j = 0; j <= k; j++) sum += mult[j] * seed[i-j]; |
---|
4137 | newseed[i] = sum; |
---|
4138 | for (j = i; j <= 4; j++) { |
---|
4139 | newseed[j+1] += newseed[j] >> 6; |
---|
4140 | newseed[j] &= 63; |
---|
4141 | } |
---|
4142 | } |
---|
4143 | newseed[5] &= 3; |
---|
4144 | x = 0.0; |
---|
4145 | for (i = 0; i <= 5; i++) x = 0.015625 * x + (seed[i] = newseed[i]); |
---|
4146 | return 0.25 * x; |
---|
4147 | } /* randum */ |
---|
4148 | |
---|
4149 | |
---|
4150 | void makeDenovoTree (tr, bt) |
---|
4151 | tree *tr; |
---|
4152 | bestlist *bt; |
---|
4153 | { /* makeDenovoTree */ |
---|
4154 | FILE *treefile; |
---|
4155 | nodeptr p; |
---|
4156 | int enterorder[maxsp+1]; /* random entry order */ |
---|
4157 | int i, j, k, nextsp, newsp, maxtrav, tested; |
---|
4158 | |
---|
4159 | if (restart) { |
---|
4160 | printf("Restarting from tree with the following sequences:\n"); |
---|
4161 | tr->userlen = TRUE; |
---|
4162 | treeReadLen(tr); if (anerror) return; |
---|
4163 | smoothTree(tr, smoothings); if (anerror) return; |
---|
4164 | (void) evaluate(tr, tr->start); if (anerror) return; |
---|
4165 | (void) saveBestTree(bt, tr); if (anerror) return; |
---|
4166 | |
---|
4167 | for (i = 1, j = tr->ntips; i <= tr->mxtips; i++) { /* find loose tips */ |
---|
4168 | if (! tr->nodep[i]->back) enterorder[++j] = i; |
---|
4169 | else { |
---|
4170 | char trimmed[nmlngth + 1]; |
---|
4171 | |
---|
4172 | copyTrimmedName(tr->nodep[i]->name, trimmed); |
---|
4173 | printf(" %s\n", trimmed); |
---|
4174 | # if Master |
---|
4175 | if (i>3) REPORT_ADD_SPECS; |
---|
4176 | # endif |
---|
4177 | } |
---|
4178 | } |
---|
4179 | putchar('\n'); |
---|
4180 | } |
---|
4181 | |
---|
4182 | else { /* start from scratch */ |
---|
4183 | tr->ntips = 0; |
---|
4184 | for (i = 1; i <= tr->mxtips; i++) enterorder[i] = i; |
---|
4185 | } |
---|
4186 | |
---|
4187 | if (jumble) for (i = tr->ntips + 1; i <= tr->mxtips; i++) { |
---|
4188 | j = randum(seed)*(tr->mxtips - tr->ntips) + tr->ntips + 1; |
---|
4189 | k = enterorder[j]; |
---|
4190 | enterorder[j] = enterorder[i]; |
---|
4191 | enterorder[i] = k; |
---|
4192 | } |
---|
4193 | |
---|
4194 | bt->numtrees = 1; |
---|
4195 | if (tr->ntips < tr->mxtips) printf("Adding species:\n"); |
---|
4196 | |
---|
4197 | if (tr->ntips == 0) { |
---|
4198 | for (i = 1; i <= 3; i++) { |
---|
4199 | char trimmed[nmlngth + 1]; |
---|
4200 | |
---|
4201 | copyTrimmedName(tr->nodep[enterorder[i]]->name, trimmed); |
---|
4202 | printf(" %s\n", trimmed); |
---|
4203 | } |
---|
4204 | tr->nextnode = tr->mxtips + 1; |
---|
4205 | buildSimpleTree(tr, enterorder[1], enterorder[2], enterorder[3]); |
---|
4206 | } |
---|
4207 | |
---|
4208 | if (anerror) return; |
---|
4209 | |
---|
4210 | while (tr->ntips < tr->mxtips || tr->opt_level < tr->global) { |
---|
4211 | maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap; |
---|
4212 | if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; |
---|
4213 | |
---|
4214 | if (tr->opt_level >= maxtrav) { |
---|
4215 | char trimmed[nmlngth + 1]; |
---|
4216 | |
---|
4217 | nextsp = ++(tr->ntips); |
---|
4218 | newsp = enterorder[nextsp]; |
---|
4219 | p = tr->nodep[newsp]; |
---|
4220 | |
---|
4221 | copyTrimmedName(p->name, trimmed); |
---|
4222 | printf(" %s\n", trimmed); |
---|
4223 | |
---|
4224 | # if Master |
---|
4225 | if (nextsp % DNAML_STEP_TIME_COUNT == 1) { |
---|
4226 | REPORT_STEP_TIME; |
---|
4227 | } |
---|
4228 | REPORT_ADD_SPECS; |
---|
4229 | # endif |
---|
4230 | |
---|
4231 | (void) buildNewTip(tr, p); |
---|
4232 | |
---|
4233 | resetBestTree(bt); |
---|
4234 | cacheZ(tr); |
---|
4235 | tested = addTraverse(tr, p->back, findAnyTip(tr->start)->back, |
---|
4236 | 1, tr->ntips - 2, bt, fastadd); |
---|
4237 | bt->numtrees += tested; |
---|
4238 | if (anerror) return; |
---|
4239 | |
---|
4240 | # if Master |
---|
4241 | getReturnedTrees(tr, bt, tested); |
---|
4242 | # endif |
---|
4243 | |
---|
4244 | printf(" Tested %d alternative trees\n", tested); |
---|
4245 | |
---|
4246 | (void) recallBestTree(bt, 1, tr); |
---|
4247 | if (! tr->smoothed) { |
---|
4248 | smoothTree(tr, smoothings); if (anerror) return; |
---|
4249 | (void) evaluate(tr, tr->start); if (anerror) return; |
---|
4250 | (void) saveBestTree(bt, tr); |
---|
4251 | } |
---|
4252 | |
---|
4253 | if (tr->ntips == 4) tr->opt_level = 1; /* All 4 taxon trees done */ |
---|
4254 | maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap; |
---|
4255 | if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; |
---|
4256 | } |
---|
4257 | |
---|
4258 | printf(" Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
4259 | optimize(tr, maxtrav, bt); |
---|
4260 | |
---|
4261 | if (anerror) return; |
---|
4262 | } |
---|
4263 | |
---|
4264 | printf("\nExamined %d %s\n", bt->numtrees, |
---|
4265 | bt->numtrees != 1 ? "trees" : "tree"); |
---|
4266 | |
---|
4267 | treefile = trout ? fopen_pid("treefile","w") : (FILE *) NULL; |
---|
4268 | showBestTrees(bt, tr, treefile); |
---|
4269 | if (treefile) { |
---|
4270 | (void) fclose(treefile); |
---|
4271 | printf("Tree also written to file\n\n"); |
---|
4272 | } |
---|
4273 | |
---|
4274 | cmpBestTrees(bt, tr); |
---|
4275 | |
---|
4276 | # if DeleteCheckpointFile |
---|
4277 | unlink_pid(checkpointname); |
---|
4278 | # endif |
---|
4279 | } /* makeDenovoTree */ |
---|
4280 | |
---|
4281 | /*==========================================================================*/ |
---|
4282 | /* "main" routine */ |
---|
4283 | /*==========================================================================*/ |
---|
4284 | |
---|
4285 | #if Sequential |
---|
4286 | main () |
---|
4287 | #else |
---|
4288 | void slave () |
---|
4289 | #endif |
---|
4290 | { /* DNA Maximum Likelihood */ |
---|
4291 | # if Master |
---|
4292 | int starttime, inputtime, endtime; |
---|
4293 | # endif |
---|
4294 | |
---|
4295 | # if Master || Slave |
---|
4296 | int my_id, nprocs, type, from, sz; |
---|
4297 | char *msg; |
---|
4298 | # endif |
---|
4299 | |
---|
4300 | tree curtree, *tr; /* current tree */ |
---|
4301 | bestlist bestree, *bt; /* topology of best found tree */ |
---|
4302 | |
---|
4303 | |
---|
4304 | # if Debug |
---|
4305 | debug = fopen_pid("dnaml.debug","w"); |
---|
4306 | # endif |
---|
4307 | |
---|
4308 | # if Master |
---|
4309 | starttime = p4_clock(); |
---|
4310 | nprocs = p4_num_total_slaves(); |
---|
4311 | |
---|
4312 | if ((OUTFILE = freopen_pid("master.out", "w", stdout)) == NULL) { |
---|
4313 | fprintf(stderr, "Could not open output file\n"); |
---|
4314 | exit(1); |
---|
4315 | } |
---|
4316 | |
---|
4317 | /* Receive input file name from host */ |
---|
4318 | type = DNAML_FILE_NAME; from = -1; |
---|
4319 | p4_recv(&type, &from, &msg, &sz); |
---|
4320 | if ((INFILE = fopen(msg, "r")) == NULL) { |
---|
4321 | fprintf(stderr, "master could not open input file %s\n", msg); |
---|
4322 | exit(1); |
---|
4323 | } |
---|
4324 | p4_msg_free(msg); |
---|
4325 | |
---|
4326 | open_link(&comm_slave); |
---|
4327 | # endif |
---|
4328 | |
---|
4329 | # if DNAML_STEP |
---|
4330 | begin_step_time = starttime; |
---|
4331 | # endif |
---|
4332 | |
---|
4333 | # if Slave |
---|
4334 | my_id = p4_get_my_id(); |
---|
4335 | nprocs = p4_num_total_slaves(); |
---|
4336 | |
---|
4337 | /* Receive input file name from host */ |
---|
4338 | type = DNAML_FILE_NAME; from = -1; |
---|
4339 | p4_recv(&type, &from, &msg, &sz); |
---|
4340 | if ((INFILE = fopen(msg, "r")) == NULL) { |
---|
4341 | fprintf(stderr, "slave could not open input file %s\n",msg); |
---|
4342 | exit(1); |
---|
4343 | } |
---|
4344 | p4_msg_free(msg); |
---|
4345 | |
---|
4346 | if ((OUTFILE = freopen("/dev/null", "w", stdout)) == NULL) { |
---|
4347 | fprintf(stderr, "Could not open output file\n"); |
---|
4348 | exit(1); |
---|
4349 | } |
---|
4350 | |
---|
4351 | open_link(&comm_master); |
---|
4352 | # endif |
---|
4353 | |
---|
4354 | anerror = FALSE; |
---|
4355 | tr = & curtree; |
---|
4356 | bt = & bestree; |
---|
4357 | bt->ninit = 0; |
---|
4358 | getinput(tr); if (anerror) return 1; |
---|
4359 | |
---|
4360 | # if Master |
---|
4361 | inputtime = p4_clock(); |
---|
4362 | printf("Input time %d milliseconds\n",inputtime-starttime); |
---|
4363 | REPORT_STEP_TIME; |
---|
4364 | # endif |
---|
4365 | |
---|
4366 | # if Slave |
---|
4367 | (void) fclose(INFILE); |
---|
4368 | # endif |
---|
4369 | |
---|
4370 | /* The material below would be a loop over jumbles and/or boots */ |
---|
4371 | |
---|
4372 | makeweights(); if (anerror) return 1; |
---|
4373 | makevalues(tr); if (anerror) return 1; |
---|
4374 | if (freqsfrom) { |
---|
4375 | empiricalfreqs(tr); if (anerror) return 1; |
---|
4376 | getbasefreqs(); |
---|
4377 | } |
---|
4378 | |
---|
4379 | linkxarray(3, 3, & freextip, & usedxtip); if (anerror) return 1; |
---|
4380 | setupnodex(tr); if (anerror) return 1; |
---|
4381 | |
---|
4382 | # if Slave |
---|
4383 | slaveTreeEvaluate(tr, bt); |
---|
4384 | # else |
---|
4385 | (void) initBestTree(bt, nkeep, endsite); if (anerror) return 1; |
---|
4386 | if (! tr->usertree) {makeDenovoTree(tr, bt); if (anerror) return 1;} |
---|
4387 | else {makeUserTree(tr, bt); if (anerror) return 1;} |
---|
4388 | freeBestTree(bt); if (anerror) return 1; |
---|
4389 | # endif |
---|
4390 | |
---|
4391 | /* Endpoint for jumble and/or boot loop */ |
---|
4392 | |
---|
4393 | # if Master |
---|
4394 | tr->likelihood = 1.0; /* terminate slaves */ |
---|
4395 | sendTree(&comm_slave, tr); |
---|
4396 | # endif |
---|
4397 | |
---|
4398 | freeTree(tr); |
---|
4399 | |
---|
4400 | # if Master |
---|
4401 | close_link(&comm_slave); |
---|
4402 | (void) fclose(INFILE); |
---|
4403 | |
---|
4404 | REPORT_STEP_TIME; |
---|
4405 | endtime = p4_clock(); |
---|
4406 | printf("Execution time %d milliseconds\n", endtime - inputtime); |
---|
4407 | (void) fclose(OUTFILE); |
---|
4408 | # endif |
---|
4409 | |
---|
4410 | # if Slave |
---|
4411 | close_link(&comm_master); |
---|
4412 | (void) fclose(OUTFILE); |
---|
4413 | # endif |
---|
4414 | |
---|
4415 | # if Debug |
---|
4416 | (void) fclose(debug); |
---|
4417 | # endif |
---|
4418 | |
---|
4419 | # if Master || Slave |
---|
4420 | p4_send(DNAML_DONE, DNAML_HOST_ID, NULL, 0); |
---|
4421 | # else |
---|
4422 | return 0; |
---|
4423 | # endif |
---|
4424 | } /* DNA Maximum Likelihood */ |
---|