1 | #define programName "AxMl" |
---|
2 | #define programVersion "1.1.0" |
---|
3 | #define programVersionInt 10202 |
---|
4 | #define programDate "February, 2002" |
---|
5 | #define programDateInt 20000212 |
---|
6 | |
---|
7 | /*Time measurment */ |
---|
8 | #include <sys/times.h> |
---|
9 | #include <sys/types.h> |
---|
10 | #include <sys/time.h> |
---|
11 | #include <time.h> |
---|
12 | #include <stdlib.h> |
---|
13 | #include <string.h> |
---|
14 | double accTime = 0.0; |
---|
15 | double globTime = 0.0; |
---|
16 | struct timeval ttime; |
---|
17 | |
---|
18 | long flopBlocks = 0; |
---|
19 | |
---|
20 | double gettime() |
---|
21 | { |
---|
22 | gettimeofday(&ttime , NULL); |
---|
23 | return ttime.tv_sec + ttime.tv_usec * 0.000001; |
---|
24 | } |
---|
25 | /*Time measurment*/ |
---|
26 | |
---|
27 | /* AxMl, a program for estimation of phylogenetic trees from sequences |
---|
28 | * implementing SEVs (Subtree Equality Vectors) Copyright (C) 2002 by |
---|
29 | * Alexandros P. Stamatakis. |
---|
30 | * Derived from: |
---|
31 | * fastDNAml, a program for estimation of phylogenetic trees from sequences. |
---|
32 | * Copyright (C) 1998, 1999, 2000 by Gary J. Olsen |
---|
33 | * |
---|
34 | * This program is free software; you may redistribute it and/or modify it |
---|
35 | * under the terms of the GNU General Public License as published by the Free |
---|
36 | * Software Foundation; either version 2 of the License, or (at your option) |
---|
37 | * any later version. |
---|
38 | * |
---|
39 | * This program is distributed in the hope that it will be useful, but |
---|
40 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
41 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
42 | * for more details. |
---|
43 | * |
---|
44 | * You should have received a copy of the GNU General Public License along |
---|
45 | * with this program; if not, write to the Free Software Foundation, Inc., |
---|
46 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
---|
47 | * |
---|
48 | * For any other enquiries write to Gary J. Olsen, Department of Microbiology, |
---|
49 | * University of Illinois, Urbana, IL 61801, USA |
---|
50 | * |
---|
51 | * Or send E-mail to gary@phylo.life.uiuc.edu |
---|
52 | * |
---|
53 | * |
---|
54 | * fastDNAml is based in part on the program dnaml by Joseph Felsenstein. |
---|
55 | * |
---|
56 | * Copyright notice from dnaml: |
---|
57 | * |
---|
58 | * version 3.3. (c) Copyright 1986, 1990 by the University of Washington |
---|
59 | * and Joseph Felsenstein. Written by Joseph Felsenstein. Permission is |
---|
60 | * granted to copy and use this program provided no fee is charged for it |
---|
61 | * and provided that this copyright notice is not removed. |
---|
62 | * |
---|
63 | * |
---|
64 | * When publishing work that based on results from fastDNAml please cite: |
---|
65 | * |
---|
66 | * Felsenstein, J. 1981. Evolutionary trees from DNA sequences: |
---|
67 | * A maximum likelihood approach. J. Mol. Evol. 17: 368-376. |
---|
68 | * |
---|
69 | * and |
---|
70 | * |
---|
71 | * Olsen, G. J., Matsuda, H., Hagstrom, R., and Overbeek, R. 1994. |
---|
72 | * fastDNAml: A tool for construction of phylogenetic trees of DNA |
---|
73 | * sequences using maximum likelihood. Comput. Appl. Biosci. 10: 41-48. |
---|
74 | */ |
---|
75 | |
---|
76 | /* Conversion to C and changes in sequential code by Gary Olsen, 1991-1994 |
---|
77 | * |
---|
78 | * p4 version by Hideo Matsuda and Ross Overbeek, 1991-1993 |
---|
79 | */ |
---|
80 | |
---|
81 | /* |
---|
82 | * 1.0 March 14, 1992 |
---|
83 | * Initial "release" version |
---|
84 | * |
---|
85 | * 1.0.1 March 18, 1992 |
---|
86 | * Add ntaxa to tree comments |
---|
87 | * Set minimum branch length on reading tree |
---|
88 | * Add blanks around operators in treeString (for prolog parsing) |
---|
89 | * Add program version to treeString comments |
---|
90 | * |
---|
91 | * 1.0.2 April 6, 1992 |
---|
92 | * Improved option line diagnostics |
---|
93 | * Improved auxiliary line diagnostics |
---|
94 | * Removed some trailing blanks from output |
---|
95 | * |
---|
96 | * 1.0.3 April 6, 1992 |
---|
97 | * Checkpoint trees that do not need any optimization |
---|
98 | * Print restart tree likelihood before optimizing |
---|
99 | * Fix treefile option so that it really toggles |
---|
100 | * |
---|
101 | * 1.0.4 July 13, 1992 |
---|
102 | * Add support for tree fact (instead of true Newick tree) in |
---|
103 | * processTreeCom, treeReadLen, str_processTreeCom and |
---|
104 | * str_treeReadLen |
---|
105 | * Use bit operations in randum |
---|
106 | * Correct error in bootstrap mask used with weighting mask |
---|
107 | * |
---|
108 | * 1.0.5 August 22, 1992 |
---|
109 | * Fix reading of underscore as first nonblank character in name |
---|
110 | * Add strchr and strstr functions to source code |
---|
111 | * Add output treefile name to message "Tree also written ..." |
---|
112 | * |
---|
113 | * 1.0.6 November 20, 1992 |
---|
114 | * Change (! nsites) test in setupTopol to (nsites == 0) for MIPS R4000 |
---|
115 | * Add vectorizing compiler directives for CRAY |
---|
116 | * Include updates and corrections to parallel code from H. Matsuda |
---|
117 | * |
---|
118 | * 1.0.7 March 25, 1993 |
---|
119 | * Remove translation of underlines in taxon names |
---|
120 | * |
---|
121 | * 1.0.8 April 30, 1993 |
---|
122 | * Remove version number from fastDNAml.h file name |
---|
123 | * |
---|
124 | * 1.0.9 August 12, 1993 |
---|
125 | * Version was never released. |
---|
126 | * Redefine treefile formats and default: |
---|
127 | * 0 None |
---|
128 | * 1 Newick |
---|
129 | * 2 Prolog |
---|
130 | * 3 PHYLIP (Default) |
---|
131 | * Remove quote marks and comment from PHYLIP treefile format. |
---|
132 | * |
---|
133 | * 1.1.0 September 3-5, 1993 |
---|
134 | * Arrays of size maxpatterns moved from stack to heap (mallocs) in |
---|
135 | * evaluateslope, makenewz, and cmpBestTrees. |
---|
136 | * Correct [maxsites] to [maxpatterns] in temporary array definitions |
---|
137 | * in Vectorize code of newview and evaluate. (These should also |
---|
138 | * get converted to use malloc() at some point.) |
---|
139 | * Change randum to use 12 bit segments, not 6. Change its seed |
---|
140 | * parameter to long *. |
---|
141 | * Remove the code that took the absolute value of random seeds. |
---|
142 | * Correct integer divide artifact in setting default transition/ |
---|
143 | * transversion parameter values. |
---|
144 | * When transition/transversion ratio is "reset", change to small |
---|
145 | * value, not the program default. |
---|
146 | * Report the "reset" transition/transversion ratio in the output. |
---|
147 | * Move global data into analdef, rawDNA, and crunchedDNA structures. |
---|
148 | * Change names of routines white and digit to whitechar and digitchar. |
---|
149 | * Convert y[] to yType, which is normally char, but is int if the |
---|
150 | * Vectorize flag is set. |
---|
151 | * Split option line reading out of getoptions routine. |
---|
152 | * |
---|
153 | * 1.1.1 September 30, 1994 |
---|
154 | * Incorporate changes made in 1.0.A (Feb. 11, 1994): |
---|
155 | * Remove processing of quotation marks within comments. |
---|
156 | * Break label finding into copy to string and find tip. |
---|
157 | * Generalize tree reading to read trees when names are and are not |
---|
158 | * already known. |
---|
159 | * Remove absolute value from randum seed reading. |
---|
160 | * Include integer version number and program date. |
---|
161 | * Remove maxsite, maxpatterns and maxsp limitations. |
---|
162 | * Incorporate code for retaining multiple trees. |
---|
163 | * Activate code for Hasegawa & Kishino test of tree differences. |
---|
164 | * Make quick add the default, with Q turning it off. |
---|
165 | * Make emperical frequencies the option with F turning it off. |
---|
166 | * Allow a residue frequency option line anywhere in the options. |
---|
167 | * Increase string length passed to treeString (should be length |
---|
168 | * checked, but ...) |
---|
169 | * Introduce (Sept.30) and fix (Oct. 26) bug in bootstrap sampling. |
---|
170 | * Fix error when user frequencies are last line and start with F. |
---|
171 | * |
---|
172 | * 1.2 September 5, 1997 |
---|
173 | * Move likelihood components into structure. |
---|
174 | * Change rawDNA to rawdata. |
---|
175 | * Change crunchedDNA to cruncheddata. |
---|
176 | * Recast the likelihoods per site into an array of stuctures, |
---|
177 | * where each stucture (likelivector) includes the likelihoods |
---|
178 | * of each residue type at the site, and a magnitude scale |
---|
179 | * factor (exp). This requires changing the space allocation, |
---|
180 | * newview, makenewz, evaluate, and sigma. |
---|
181 | * Change code of newview to rescale likelihoods up by 2**256 when |
---|
182 | * the largest value falls below 2**-256. This should solve |
---|
183 | * floating point underflow for all practical sized trees. |
---|
184 | * No changes are necessary in makenewz or sigma, since only |
---|
185 | * relative likelihoods are necessary. |
---|
186 | * |
---|
187 | * 1.2.1 March 9, 1998 |
---|
188 | * Convert likelihood adjustment factor (2**256) to a constant. |
---|
189 | * Fix vectorized calculation of likelihood (error introduced in 1.2) |
---|
190 | * |
---|
191 | * 1.2.2 December 23, 1998 |
---|
192 | * General code clean-up. |
---|
193 | * Convert to function definitions with parameter type lists |
---|
194 | * |
---|
195 | * 1.2.2 January 3, 2000 |
---|
196 | * Add copyright and license information |
---|
197 | * Make this the current release version |
---|
198 | */ |
---|
199 | |
---|
200 | #ifdef Master |
---|
201 | # undef Master |
---|
202 | # define Master 1 |
---|
203 | # define Slave 0 |
---|
204 | # define Sequential 0 |
---|
205 | #else |
---|
206 | # ifdef Slave |
---|
207 | # undef Slave |
---|
208 | # define Master 0 |
---|
209 | # define Slave 1 |
---|
210 | # define Sequential 0 |
---|
211 | # else |
---|
212 | # ifdef Sequential |
---|
213 | # undef Sequential |
---|
214 | # endif |
---|
215 | # define Master 0 |
---|
216 | # define Slave 0 |
---|
217 | # define Sequential 1 |
---|
218 | # endif |
---|
219 | #endif |
---|
220 | |
---|
221 | #ifdef CRAY |
---|
222 | # define Vectorize |
---|
223 | #endif |
---|
224 | |
---|
225 | #ifdef Vectorize |
---|
226 | # define maxpatterns 10000 /* maximum number of different site patterns */ |
---|
227 | #endif |
---|
228 | |
---|
229 | #include <stdio.h> |
---|
230 | #include <math.h> |
---|
231 | #include "axml.h" /* Requires version 1.2 */ |
---|
232 | |
---|
233 | #if Master || Slave |
---|
234 | # include "p4.h" |
---|
235 | # include "comm_link.h" |
---|
236 | #endif |
---|
237 | |
---|
238 | /* Global variables */ |
---|
239 | |
---|
240 | xarray *usedxtip, *freextip; |
---|
241 | |
---|
242 | #ifdef FLOP_BLOCK_COUNTER |
---|
243 | unsigned long flopBlock = 0; |
---|
244 | #endif |
---|
245 | |
---|
246 | #if Sequential /* Use standard input */ |
---|
247 | # undef DNAML_STEP |
---|
248 | # define DNAML_STEP 0 |
---|
249 | # define INFILE stdin |
---|
250 | #endif |
---|
251 | |
---|
252 | #if Master |
---|
253 | # define MAX_SEND_AHEAD 400 |
---|
254 | char *best_tr_recv = NULL; /* these are used for flow control */ |
---|
255 | double best_lk_recv; |
---|
256 | int send_ahead = 0; /* number of outstanding sends */ |
---|
257 | |
---|
258 | # ifdef DNAML_STEP |
---|
259 | # define DNAML_STEP 1 |
---|
260 | # endif |
---|
261 | # define INFILE Seqf |
---|
262 | # define OUTFILE Outf |
---|
263 | FILE *INFILE, *OUTFILE; |
---|
264 | comm_block comm_slave; |
---|
265 | #endif |
---|
266 | |
---|
267 | #if Slave |
---|
268 | # undef DNAML_STEP |
---|
269 | # define DNAML_STEP 0 |
---|
270 | # define INFILE Seqf |
---|
271 | # define OUTFILE Outf |
---|
272 | FILE *INFILE, *OUTFILE; |
---|
273 | comm_block comm_master; |
---|
274 | #endif |
---|
275 | |
---|
276 | #if Debug |
---|
277 | FILE *debug; |
---|
278 | #endif |
---|
279 | |
---|
280 | #if DNAML_STEP |
---|
281 | int begin_step_time, end_step_time; |
---|
282 | # define REPORT_ADD_SPECS p4_send(DNAML_ADD_SPECS, DNAML_HOST_ID, NULL, 0) |
---|
283 | # define REPORT_SEND_TREE p4_send(DNAML_SEND_TREE, DNAML_HOST_ID, NULL, 0) |
---|
284 | # define REPORT_RECV_TREE p4_send(DNAML_RECV_TREE, DNAML_HOST_ID, NULL, 0) |
---|
285 | # define REPORT_STEP_TIME \ |
---|
286 | {\ |
---|
287 | char send_buf[80]; \ |
---|
288 | end_step_time = p4_clock(); \ |
---|
289 | (void) sprintf(send_buf, "%d", end_step_time-begin_step_time); \ |
---|
290 | p4_send(DNAML_STEP_TIME, DNAML_HOST_ID, send_buf,strlen(send_buf)+1); \ |
---|
291 | begin_step_time = end_step_time; \ |
---|
292 | } |
---|
293 | #else |
---|
294 | # define REPORT_ADD_SPECS |
---|
295 | # define REPORT_SEND_TREE |
---|
296 | # define REPORT_RECV_TREE |
---|
297 | # define REPORT_STEP_TIME |
---|
298 | #endif |
---|
299 | |
---|
300 | /*=======================================================================*/ |
---|
301 | /* PROGRAM */ |
---|
302 | /*=======================================================================*/ |
---|
303 | /* Best tree handling for dnaml */ |
---|
304 | /*=======================================================================*/ |
---|
305 | |
---|
306 | /* Tip value comparisons |
---|
307 | * |
---|
308 | * Use void pointers to hide type from other routines. Only tipValPtr and |
---|
309 | * cmpTipVal need to be changed to alter the nature of the values compared |
---|
310 | * (e.g., names instead of node numbers). |
---|
311 | * |
---|
312 | * cmpTipVal(tipValPtr(nodeptr p), tipValPtr(nodeptr q)) == -1, 0 or 1. |
---|
313 | * |
---|
314 | * This provides direct comparison of tip values (for example, for |
---|
315 | * definition of tr->start). |
---|
316 | */ |
---|
317 | |
---|
318 | |
---|
319 | void *tipValPtr (nodeptr p) |
---|
320 | { return (void *) & p->number; |
---|
321 | } |
---|
322 | |
---|
323 | |
---|
324 | int cmpTipVal (void *v1, void *v2) |
---|
325 | { /* cmpTipVal */ |
---|
326 | int i1, i2; |
---|
327 | |
---|
328 | i1 = *((int *) v1); |
---|
329 | i2 = *((int *) v2); |
---|
330 | return (i1 < i2) ? -1 : ((i1 == i2) ? 0 : 1); |
---|
331 | } /* cmpTipVal */ |
---|
332 | |
---|
333 | |
---|
334 | /* These are the only routines that need to UNDERSTAND topologies */ |
---|
335 | |
---|
336 | |
---|
337 | topol *setupTopol (int maxtips, int nsites) |
---|
338 | { /* setupTopol */ |
---|
339 | topol *tpl; |
---|
340 | |
---|
341 | if (! (tpl = (topol *) Malloc(sizeof(topol))) || |
---|
342 | ! (tpl->links = (connptr) Malloc((2*maxtips-3) * sizeof(connect))) || |
---|
343 | (nsites && ! (tpl->log_f |
---|
344 | = (double *) Malloc(nsites * sizeof(double))))) { |
---|
345 | printf("ERROR: Unable to get topology memory"); |
---|
346 | tpl = (topol *) NULL; |
---|
347 | } |
---|
348 | |
---|
349 | else { |
---|
350 | if (nsites == 0) tpl->log_f = (double *) NULL; |
---|
351 | tpl->likelihood = unlikely; |
---|
352 | tpl->start = (node *) NULL; |
---|
353 | tpl->nextlink = 0; |
---|
354 | tpl->ntips = 0; |
---|
355 | tpl->nextnode = 0; |
---|
356 | tpl->opt_level = 0; /* degree of branch swapping explored */ |
---|
357 | tpl->scrNum = 0; /* position in sorted list of scores */ |
---|
358 | tpl->tplNum = 0; /* position in sorted list of trees */ |
---|
359 | tpl->log_f_valid = 0; /* log_f value sites */ |
---|
360 | tpl->prelabeled = TRUE; |
---|
361 | tpl->smoothed = FALSE; /* branch optimization converged? */ |
---|
362 | } |
---|
363 | |
---|
364 | return tpl; |
---|
365 | } /* setupTopol */ |
---|
366 | |
---|
367 | |
---|
368 | void freeTopol (topol *tpl) |
---|
369 | { /* freeTopol */ |
---|
370 | Free(tpl->links); |
---|
371 | if (tpl->log_f) Free(tpl->log_f); |
---|
372 | Free(tpl); |
---|
373 | } /* freeTopol */ |
---|
374 | |
---|
375 | |
---|
376 | int saveSubtree (nodeptr p, topol *tpl) |
---|
377 | /* Save a subtree in a standard order so that earlier branches |
---|
378 | * from a node contain lower value tips than do second branches from |
---|
379 | * the node. This code works with arbitrary furcations in the tree. |
---|
380 | */ |
---|
381 | { /* saveSubtree */ |
---|
382 | connptr r, r0; |
---|
383 | nodeptr q, s; |
---|
384 | int t, t0, t1; |
---|
385 | |
---|
386 | r0 = tpl->links; |
---|
387 | r = r0 + (tpl->nextlink)++; |
---|
388 | r->p = p; |
---|
389 | r->q = q = p->back; |
---|
390 | r->z = p->z; |
---|
391 | r->descend = 0; /* No children (yet) */ |
---|
392 | |
---|
393 | if (q->tip) { |
---|
394 | r->valptr = tipValPtr(q); /* Assign value */ |
---|
395 | } |
---|
396 | |
---|
397 | else { /* Internal node, look at children */ |
---|
398 | s = q->next; /* First child */ |
---|
399 | do { |
---|
400 | t = saveSubtree(s, tpl); /* Generate child's subtree */ |
---|
401 | |
---|
402 | t0 = 0; /* Merge child into list */ |
---|
403 | t1 = r->descend; |
---|
404 | while (t1 && (cmpTipVal(r0[t1].valptr, r0[t].valptr) < 0)) { |
---|
405 | t0 = t1; |
---|
406 | t1 = r0[t1].sibling; |
---|
407 | } |
---|
408 | if (t0) r0[t0].sibling = t; else r->descend = t; |
---|
409 | r0[t].sibling = t1; |
---|
410 | |
---|
411 | s = s->next; /* Next child */ |
---|
412 | } while (s != q); |
---|
413 | |
---|
414 | r->valptr = r0[r->descend].valptr; /* Inherit first child's value */ |
---|
415 | } /* End of internal node processing */ |
---|
416 | |
---|
417 | return r - r0; |
---|
418 | } /* saveSubtree */ |
---|
419 | |
---|
420 | |
---|
421 | nodeptr minSubtreeTip (nodeptr p0) |
---|
422 | { /* minTreeTip */ |
---|
423 | nodeptr minTip, p, testTip; |
---|
424 | |
---|
425 | if (p0->tip) return p0; |
---|
426 | |
---|
427 | p = p0->next; |
---|
428 | minTip = minSubtreeTip(p->back); |
---|
429 | while ((p = p->next) != p0) { |
---|
430 | testTip = minSubtreeTip(p->back); |
---|
431 | if (cmpTipVal(tipValPtr(testTip), tipValPtr(minTip)) < 0) |
---|
432 | minTip = testTip; |
---|
433 | } |
---|
434 | return minTip; |
---|
435 | } /* minTreeTip */ |
---|
436 | |
---|
437 | |
---|
438 | nodeptr minTreeTip (nodeptr p) |
---|
439 | { /* minTreeTip */ |
---|
440 | nodeptr minp, minpb; |
---|
441 | |
---|
442 | minp = minSubtreeTip(p); |
---|
443 | minpb = minSubtreeTip(p->back); |
---|
444 | return cmpTipVal(tipValPtr(minp), tipValPtr(minpb)) < 0 ? minp : minpb; |
---|
445 | } /* minTreeTip */ |
---|
446 | |
---|
447 | |
---|
448 | void saveTree (tree *tr, topol *tpl) |
---|
449 | /* Save a tree topology in a standard order so that first branches |
---|
450 | * from a node contain lower value tips than do second branches from |
---|
451 | * the node. The root tip should have the lowest value of all. |
---|
452 | */ |
---|
453 | { /* saveTree */ |
---|
454 | connptr r; |
---|
455 | double *tr_log_f, *tpl_log_f; |
---|
456 | int i; |
---|
457 | |
---|
458 | tpl->nextlink = 0; /* Reset link pointer */ |
---|
459 | r = tpl->links + saveSubtree(minTreeTip(tr->start), tpl); /* Save tree */ |
---|
460 | r->sibling = 0; |
---|
461 | |
---|
462 | tpl->likelihood = tr->likelihood; |
---|
463 | tpl->start = tr->start; |
---|
464 | tpl->ntips = tr->ntips; |
---|
465 | tpl->nextnode = tr->nextnode; |
---|
466 | tpl->opt_level = tr->opt_level; |
---|
467 | tpl->prelabeled = tr->prelabeled; |
---|
468 | tpl->smoothed = tr->smoothed; |
---|
469 | |
---|
470 | if (tpl_log_f = tpl->log_f) { |
---|
471 | tr_log_f = tr->log_f; |
---|
472 | i = tpl->log_f_valid = tr->log_f_valid; |
---|
473 | while (--i >= 0) *tpl_log_f++ = *tr_log_f++; |
---|
474 | } |
---|
475 | else { |
---|
476 | tpl->log_f_valid = 0; |
---|
477 | } |
---|
478 | } /* saveTree */ |
---|
479 | |
---|
480 | |
---|
481 | void copyTopol (topol *tpl1, topol *tpl2) |
---|
482 | { /* copyTopol */ |
---|
483 | connptr r1, r2, r10, r20; |
---|
484 | double *tpl1_log_f, *tpl2_log_f; |
---|
485 | int i; |
---|
486 | |
---|
487 | r10 = tpl1->links; |
---|
488 | r20 = tpl2->links; |
---|
489 | tpl2->nextlink = tpl1->nextlink; |
---|
490 | |
---|
491 | r1 = r10; |
---|
492 | r2 = r20; |
---|
493 | i = 2 * tpl1->ntips - 3; |
---|
494 | while (--i >= 0) { |
---|
495 | r2->z = r1->z; |
---|
496 | r2->p = r1->p; |
---|
497 | r2->q = r1->q; |
---|
498 | r2->valptr = r1->valptr; |
---|
499 | r2->descend = r1->descend; |
---|
500 | r2->sibling = r1->sibling; |
---|
501 | r1++; |
---|
502 | r2++; |
---|
503 | } |
---|
504 | |
---|
505 | if (tpl1->log_f_valid && tpl2->log_f) { |
---|
506 | tpl1_log_f = tpl1->log_f; |
---|
507 | tpl2_log_f = tpl2->log_f; |
---|
508 | tpl2->log_f_valid = i = tpl1->log_f_valid; |
---|
509 | while (--i >= 0) *tpl2_log_f++ = *tpl1_log_f++; |
---|
510 | } |
---|
511 | else { |
---|
512 | tpl2->log_f_valid = 0; |
---|
513 | } |
---|
514 | |
---|
515 | tpl2->likelihood = tpl1->likelihood; |
---|
516 | tpl2->start = tpl1->start; |
---|
517 | tpl2->ntips = tpl1->ntips; |
---|
518 | tpl2->nextnode = tpl1->nextnode; |
---|
519 | tpl2->opt_level = tpl1->opt_level; |
---|
520 | tpl2->prelabeled = tpl1->prelabeled; |
---|
521 | tpl2->scrNum = tpl1->scrNum; |
---|
522 | tpl2->tplNum = tpl1->tplNum; |
---|
523 | tpl2->smoothed = tpl1->smoothed; |
---|
524 | } /* copyTopol */ |
---|
525 | |
---|
526 | |
---|
527 | boolean restoreTree (topol *tpl, tree *tr) |
---|
528 | { /* restoreTree */ |
---|
529 | void hookup(); |
---|
530 | boolean initrav(); |
---|
531 | |
---|
532 | connptr r; |
---|
533 | nodeptr p, p0; |
---|
534 | double *tr_log_f, *tpl_log_f; |
---|
535 | int i; |
---|
536 | |
---|
537 | /* Clear existing connections */ |
---|
538 | |
---|
539 | for (i = 1; i <= 2*(tr->mxtips) - 2; i++) { /* Uses p = p->next at tip */ |
---|
540 | p0 = p = tr->nodep[i]; |
---|
541 | do { |
---|
542 | p->back = (nodeptr) NULL; |
---|
543 | p = p->next; |
---|
544 | } while (p != p0); |
---|
545 | } |
---|
546 | |
---|
547 | /* Copy connections from topology */ |
---|
548 | |
---|
549 | for (r = tpl->links, i = 0; i < tpl->nextlink; r++, i++) { |
---|
550 | hookup(r->p, r->q, r->z); |
---|
551 | } |
---|
552 | |
---|
553 | tr->likelihood = tpl->likelihood; |
---|
554 | tr->start = tpl->start; |
---|
555 | tr->ntips = tpl->ntips; |
---|
556 | tr->nextnode = tpl->nextnode; |
---|
557 | tr->opt_level = tpl->opt_level; |
---|
558 | tr->prelabeled = tpl->prelabeled; |
---|
559 | tr->smoothed = tpl->smoothed; |
---|
560 | |
---|
561 | if (tpl_log_f = tpl->log_f) { |
---|
562 | tr_log_f = tr->log_f; |
---|
563 | i = tr->log_f_valid = tpl->log_f_valid; |
---|
564 | while (--i >= 0) *tr_log_f++ = *tpl_log_f++; |
---|
565 | } |
---|
566 | else { |
---|
567 | tr->log_f_valid = 0; |
---|
568 | } |
---|
569 | |
---|
570 | return (initrav(tr, tr->start) && initrav(tr, tr->start->back)); |
---|
571 | } /* restoreTree */ |
---|
572 | |
---|
573 | |
---|
574 | int initBestTree (bestlist *bt, int newkeep, int numsp, int sites) |
---|
575 | { /* initBestTree */ |
---|
576 | int i, nlogf; |
---|
577 | |
---|
578 | |
---|
579 | bt->nkeep = 0; |
---|
580 | |
---|
581 | if (bt->ninit <= 0) { |
---|
582 | if (! (bt->start = setupTopol(numsp, sites))) return 0; |
---|
583 | bt->ninit = -1; |
---|
584 | bt->nvalid = 0; |
---|
585 | bt->numtrees = 0; |
---|
586 | bt->best = unlikely; |
---|
587 | bt->improved = FALSE; |
---|
588 | bt->byScore = (topol **) Malloc((newkeep+1) * sizeof(topol *)); |
---|
589 | bt->byTopol = (topol **) Malloc((newkeep+1) * sizeof(topol *)); |
---|
590 | if (! bt->byScore || ! bt->byTopol) { |
---|
591 | fprintf(stderr, "initBestTree: Malloc failure\n"); |
---|
592 | return 0; |
---|
593 | } |
---|
594 | } |
---|
595 | else if (ABS(newkeep) > bt->ninit) { |
---|
596 | if (newkeep < 0) newkeep = -(bt->ninit); |
---|
597 | else newkeep = bt->ninit; |
---|
598 | } |
---|
599 | |
---|
600 | if (newkeep < 1) { /* Use negative newkeep to clear list */ |
---|
601 | newkeep = -newkeep; |
---|
602 | if (newkeep < 1) newkeep = 1; |
---|
603 | bt->nvalid = 0; |
---|
604 | bt->best = unlikely; |
---|
605 | } |
---|
606 | |
---|
607 | if (bt->nvalid >= newkeep) { |
---|
608 | bt->nvalid = newkeep; |
---|
609 | bt->worst = bt->byScore[newkeep]->likelihood; |
---|
610 | } |
---|
611 | else { |
---|
612 | bt->worst = unlikely; |
---|
613 | } |
---|
614 | |
---|
615 | for (i = bt->ninit + 1; i <= newkeep; i++) { |
---|
616 | nlogf = (i <= maxlogf) ? sites : 0; |
---|
617 | if (! (bt->byScore[i] = setupTopol(numsp, nlogf))) break; |
---|
618 | bt->byTopol[i] = bt->byScore[i]; |
---|
619 | bt->ninit = i; |
---|
620 | } |
---|
621 | |
---|
622 | return (bt->nkeep = MIN(newkeep, bt->ninit)); |
---|
623 | } /* initBestTree */ |
---|
624 | |
---|
625 | |
---|
626 | |
---|
627 | int resetBestTree (bestlist *bt) |
---|
628 | { /* resetBestTree */ |
---|
629 | bt->best = unlikely; |
---|
630 | bt->worst = unlikely; |
---|
631 | bt->nvalid = 0; |
---|
632 | bt->improved = FALSE; |
---|
633 | } /* resetBestTree */ |
---|
634 | |
---|
635 | |
---|
636 | boolean freeBestTree(bestlist *bt) |
---|
637 | { /* freeBestTree */ |
---|
638 | while (bt->ninit >= 0) freeTopol(bt->byScore[(bt->ninit)--]); |
---|
639 | freeTopol(bt->start); |
---|
640 | return TRUE; |
---|
641 | } /* freeBestTree */ |
---|
642 | |
---|
643 | |
---|
644 | /* Compare two trees, assuming that each is in standard order. Return |
---|
645 | * -1 if first preceeds second, 0 if they are identical, or +1 if first |
---|
646 | * follows second in standard order. Lower number tips preceed higher |
---|
647 | * number tips. A tip preceeds a corresponding internal node. Internal |
---|
648 | * nodes are ranked by their lowest number tip. |
---|
649 | */ |
---|
650 | |
---|
651 | int cmpSubtopol (connptr p10, connptr p1, connptr p20, connptr p2) |
---|
652 | { /* cmpSubtopol */ |
---|
653 | connptr p1d, p2d; |
---|
654 | int cmp; |
---|
655 | |
---|
656 | if (! p1->descend && ! p2->descend) /* Two tips */ |
---|
657 | return cmpTipVal(p1->valptr, p2->valptr); |
---|
658 | |
---|
659 | if (! p1->descend) return -1; /* p1 = tip, p2 = node */ |
---|
660 | if (! p2->descend) return 1; /* p2 = tip, p1 = node */ |
---|
661 | |
---|
662 | p1d = p10 + p1->descend; |
---|
663 | p2d = p20 + p2->descend; |
---|
664 | while (1) { /* Two nodes */ |
---|
665 | if (cmp = cmpSubtopol(p10, p1d, p20, p2d)) return cmp; /* Subtrees */ |
---|
666 | if (! p1d->sibling && ! p2d->sibling) return 0; /* Lists done */ |
---|
667 | if (! p1d->sibling) return -1; /* One done, other not */ |
---|
668 | if (! p2d->sibling) return 1; /* One done, other not */ |
---|
669 | p1d = p10 + p1d->sibling; /* Neither done */ |
---|
670 | p2d = p20 + p2d->sibling; |
---|
671 | } |
---|
672 | } /* cmpSubtopol */ |
---|
673 | |
---|
674 | |
---|
675 | |
---|
676 | int cmpTopol (void *tpl1, void *tpl2) |
---|
677 | { /* cmpTopol */ |
---|
678 | connptr r1, r2; |
---|
679 | int cmp; |
---|
680 | |
---|
681 | r1 = ((topol *) tpl1)->links; |
---|
682 | r2 = ((topol *) tpl2)->links; |
---|
683 | cmp = cmpTipVal(tipValPtr(r1->p), tipValPtr(r2->p)); |
---|
684 | if (cmp) return cmp; |
---|
685 | return cmpSubtopol(r1, r1, r2, r2); |
---|
686 | } /* cmpTopol */ |
---|
687 | |
---|
688 | |
---|
689 | |
---|
690 | int cmpTplScore (void *tpl1, void *tpl2) |
---|
691 | { /* cmpTplScore */ |
---|
692 | double l1, l2; |
---|
693 | |
---|
694 | l1 = ((topol *) tpl1)->likelihood; |
---|
695 | l2 = ((topol *) tpl2)->likelihood; |
---|
696 | return (l1 > l2) ? -1 : ((l1 == l2) ? 0 : 1); |
---|
697 | } /* cmpTplScore */ |
---|
698 | |
---|
699 | |
---|
700 | |
---|
701 | /* Find an item in a sorted list of n items. If the item is in the list, |
---|
702 | * return its index. If it is not in the list, return the negative of the |
---|
703 | * position into which it should be inserted. |
---|
704 | */ |
---|
705 | |
---|
706 | int findInList (void *item, void *list[], int n, int (* cmpFunc)()) |
---|
707 | { /* findInList */ |
---|
708 | int mid, hi, lo, cmp; |
---|
709 | |
---|
710 | if (n < 1) return -1; /* No match; first index */ |
---|
711 | |
---|
712 | lo = 1; |
---|
713 | mid = 0; |
---|
714 | hi = n; |
---|
715 | while (lo < hi) { |
---|
716 | mid = (lo + hi) >> 1; |
---|
717 | cmp = (* cmpFunc)(item, list[mid-1]); |
---|
718 | if (cmp) { |
---|
719 | if (cmp < 0) hi = mid; |
---|
720 | else lo = mid + 1; |
---|
721 | } |
---|
722 | else return mid; /* Exact match */ |
---|
723 | } |
---|
724 | |
---|
725 | if (lo != mid) { |
---|
726 | cmp = (* cmpFunc)(item, list[lo-1]); |
---|
727 | if (cmp == 0) return lo; |
---|
728 | } |
---|
729 | if (cmp > 0) lo++; /* Result of step = 0 test */ |
---|
730 | return -lo; |
---|
731 | } /* findInList */ |
---|
732 | |
---|
733 | |
---|
734 | |
---|
735 | int findTreeInList (bestlist *bt, tree *tr) |
---|
736 | { /* findTreeInList */ |
---|
737 | topol *tpl; |
---|
738 | |
---|
739 | tpl = bt->byScore[0]; |
---|
740 | saveTree(tr, tpl); |
---|
741 | return findInList((void *) tpl, (void **) (& (bt->byTopol[1])), |
---|
742 | bt->nvalid, cmpTopol); |
---|
743 | } /* findTreeInList */ |
---|
744 | |
---|
745 | |
---|
746 | int saveBestTree (bestlist *bt, tree *tr) |
---|
747 | { /* saveBestTree */ |
---|
748 | double *tr_log_f, *tpl_log_f; |
---|
749 | topol *tpl, *reuse; |
---|
750 | int tplNum, scrNum, reuseScrNum, reuseTplNum, i, oldValid, newValid; |
---|
751 | |
---|
752 | tplNum = findTreeInList(bt, tr); |
---|
753 | tpl = bt->byScore[0]; |
---|
754 | oldValid = newValid = bt->nvalid; |
---|
755 | |
---|
756 | if (tplNum > 0) { /* Topology is in list */ |
---|
757 | reuse = bt->byTopol[tplNum]; /* Matching topol */ |
---|
758 | reuseScrNum = reuse->scrNum; |
---|
759 | reuseTplNum = reuse->tplNum; |
---|
760 | } |
---|
761 | /* Good enough to keep? */ |
---|
762 | else if (tr->likelihood < bt->worst) return 0; |
---|
763 | |
---|
764 | else { /* Topology is not in list */ |
---|
765 | tplNum = -tplNum; /* Add to list (not replace) */ |
---|
766 | if (newValid < bt->nkeep) bt->nvalid = ++newValid; |
---|
767 | reuseScrNum = newValid; /* Take worst tree */ |
---|
768 | reuse = bt->byScore[reuseScrNum]; |
---|
769 | reuseTplNum = (newValid > oldValid) ? newValid : reuse->tplNum; |
---|
770 | if (tr->likelihood > bt->start->likelihood) bt->improved = TRUE; |
---|
771 | } |
---|
772 | |
---|
773 | scrNum = findInList((void *) tpl, (void **) (& (bt->byScore[1])), |
---|
774 | oldValid, cmpTplScore); |
---|
775 | scrNum = ABS(scrNum); |
---|
776 | |
---|
777 | if (scrNum < reuseScrNum) |
---|
778 | for (i = reuseScrNum; i > scrNum; i--) |
---|
779 | (bt->byScore[i] = bt->byScore[i-1])->scrNum = i; |
---|
780 | |
---|
781 | else if (scrNum > reuseScrNum) { |
---|
782 | scrNum--; |
---|
783 | for (i = reuseScrNum; i < scrNum; i++) |
---|
784 | (bt->byScore[i] = bt->byScore[i+1])->scrNum = i; |
---|
785 | } |
---|
786 | |
---|
787 | if (tplNum < reuseTplNum) |
---|
788 | for (i = reuseTplNum; i > tplNum; i--) |
---|
789 | (bt->byTopol[i] = bt->byTopol[i-1])->tplNum = i; |
---|
790 | |
---|
791 | else if (tplNum > reuseTplNum) { |
---|
792 | tplNum--; |
---|
793 | for (i = reuseTplNum; i < tplNum; i++) |
---|
794 | (bt->byTopol[i] = bt->byTopol[i+1])->tplNum = i; |
---|
795 | } |
---|
796 | |
---|
797 | if (tpl_log_f = tpl->log_f) { |
---|
798 | tr_log_f = tr->log_f; |
---|
799 | i = tpl->log_f_valid = tr->log_f_valid; |
---|
800 | while (--i >= 0) *tpl_log_f++ = *tr_log_f++; |
---|
801 | } |
---|
802 | else { |
---|
803 | tpl->log_f_valid = 0; |
---|
804 | } |
---|
805 | |
---|
806 | tpl->scrNum = scrNum; |
---|
807 | tpl->tplNum = tplNum; |
---|
808 | bt->byTopol[tplNum] = bt->byScore[scrNum] = tpl; |
---|
809 | bt->byScore[0] = reuse; |
---|
810 | |
---|
811 | if (scrNum == 1) bt->best = tr->likelihood; |
---|
812 | if (newValid == bt->nkeep) bt->worst = bt->byScore[newValid]->likelihood; |
---|
813 | |
---|
814 | return scrNum; |
---|
815 | } /* saveBestTree */ |
---|
816 | |
---|
817 | |
---|
818 | int startOpt (bestlist *bt, tree *tr) |
---|
819 | { /* startOpt */ |
---|
820 | int scrNum; |
---|
821 | |
---|
822 | scrNum = saveBestTree(bt, tr); |
---|
823 | copyTopol(bt->byScore[scrNum], bt->start); |
---|
824 | bt->improved = FALSE; |
---|
825 | return scrNum; |
---|
826 | } /* startOpt */ |
---|
827 | |
---|
828 | |
---|
829 | int setOptLevel (bestlist *bt, int opt_level) |
---|
830 | { /* setOptLevel */ |
---|
831 | int tplNum, scrNum; |
---|
832 | |
---|
833 | tplNum = findInList((void *) bt->start, (void **) (&(bt->byTopol[1])), |
---|
834 | bt->nvalid, cmpTopol); |
---|
835 | if (tplNum > 0) { |
---|
836 | bt->byTopol[tplNum]->opt_level = opt_level; |
---|
837 | scrNum = bt->byTopol[tplNum]->scrNum; |
---|
838 | } |
---|
839 | else { |
---|
840 | scrNum = 0; |
---|
841 | } |
---|
842 | |
---|
843 | return scrNum; |
---|
844 | } /* setOptLevel */ |
---|
845 | |
---|
846 | |
---|
847 | int recallBestTree (bestlist *bt, int rank, tree *tr) |
---|
848 | { /* recallBestTree */ |
---|
849 | if (rank < 1) rank = 1; |
---|
850 | if (rank > bt->nvalid) rank = bt->nvalid; |
---|
851 | if (rank > 0) if (! restoreTree(bt->byScore[rank], tr)) return FALSE; |
---|
852 | return rank; |
---|
853 | } /* recallBestTree */ |
---|
854 | |
---|
855 | |
---|
856 | /*=======================================================================*/ |
---|
857 | /* End of best tree routines */ |
---|
858 | /*=======================================================================*/ |
---|
859 | |
---|
860 | |
---|
861 | #if 0 |
---|
862 | void hang(char *msg) |
---|
863 | { printf("Hanging around: %s\n", msg); |
---|
864 | while(1); |
---|
865 | } |
---|
866 | #endif |
---|
867 | |
---|
868 | |
---|
869 | boolean getnums (rawdata *rdta) |
---|
870 | /* input number of species, number of sites */ |
---|
871 | { /* getnums */ |
---|
872 | printf("\n%s, version %s, %s,\nCopyright (C) 2002 by Alexandros P. Stamatakis\n\n", |
---|
873 | programName, |
---|
874 | programVersion, |
---|
875 | programDate); |
---|
876 | printf("Based in part on Joseph Felsenstein's\n\n"); |
---|
877 | printf(" Nucleic acid sequence Maximum Likelihood method, version 3.3\n\n\n"); |
---|
878 | |
---|
879 | if (fscanf(INFILE, "%d %d", & rdta->numsp, & rdta->sites) != 2) { |
---|
880 | printf("ERROR: Problem reading number of species and sites\n"); |
---|
881 | return FALSE; |
---|
882 | } |
---|
883 | printf("%d Species, %d Sites\n\n", rdta->numsp, rdta->sites); |
---|
884 | |
---|
885 | if (rdta->numsp < 4) { |
---|
886 | printf("TOO FEW SPECIES\n"); |
---|
887 | return FALSE; |
---|
888 | } |
---|
889 | |
---|
890 | if (rdta->sites < 1) { |
---|
891 | printf("TOO FEW SITES\n"); |
---|
892 | return FALSE; |
---|
893 | } |
---|
894 | |
---|
895 | return TRUE; |
---|
896 | } /* getnums */ |
---|
897 | |
---|
898 | |
---|
899 | boolean digitchar (int ch) {return (ch >= '0' && ch <= '9'); } |
---|
900 | |
---|
901 | |
---|
902 | boolean whitechar (int ch) |
---|
903 | { return (ch == ' ' || ch == '\n' || ch == '\t'); |
---|
904 | } |
---|
905 | |
---|
906 | |
---|
907 | void uppercase (int *chptr) |
---|
908 | /* convert character to upper case -- either ASCII or EBCDIC */ |
---|
909 | { /* uppercase */ |
---|
910 | int ch; |
---|
911 | |
---|
912 | ch = *chptr; |
---|
913 | if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') |
---|
914 | || (ch >= 's' && ch <= 'z')) |
---|
915 | *chptr = ch + 'A' - 'a'; |
---|
916 | } /* uppercase */ |
---|
917 | |
---|
918 | |
---|
919 | int base36 (int ch) |
---|
920 | { /* base36 */ |
---|
921 | if (ch >= '0' && ch <= '9') return (ch - '0'); |
---|
922 | else if (ch >= 'A' && ch <= 'I') return (ch - 'A' + 10); |
---|
923 | else if (ch >= 'J' && ch <= 'R') return (ch - 'J' + 19); |
---|
924 | else if (ch >= 'S' && ch <= 'Z') return (ch - 'S' + 28); |
---|
925 | else if (ch >= 'a' && ch <= 'i') return (ch - 'a' + 10); |
---|
926 | else if (ch >= 'j' && ch <= 'r') return (ch - 'j' + 19); |
---|
927 | else if (ch >= 's' && ch <= 'z') return (ch - 's' + 28); |
---|
928 | else return -1; |
---|
929 | } /* base36 */ |
---|
930 | |
---|
931 | |
---|
932 | int itobase36 (int i) |
---|
933 | { /* itobase36 */ |
---|
934 | if (i < 0) return '?'; |
---|
935 | else if (i < 10) return (i + '0'); |
---|
936 | else if (i < 19) return (i - 10 + 'A'); |
---|
937 | else if (i < 28) return (i - 19 + 'J'); |
---|
938 | else if (i < 36) return (i - 28 + 'S'); |
---|
939 | else return '?'; |
---|
940 | } /* itobase36 */ |
---|
941 | |
---|
942 | |
---|
943 | int findch (int c) |
---|
944 | { /* findch */ |
---|
945 | int ch; |
---|
946 | |
---|
947 | while ((ch = getc(INFILE)) != EOF && ch != c) ; |
---|
948 | return ch; |
---|
949 | } /* findch */ |
---|
950 | |
---|
951 | |
---|
952 | #if Master || Slave |
---|
953 | int str_findch (char **treestrp, int c) |
---|
954 | { /* str_findch */ |
---|
955 | int ch; |
---|
956 | |
---|
957 | while ((ch = *(*treestrp)++) != NULL && ch != c) ; |
---|
958 | return ch; |
---|
959 | } /* str_findch */ |
---|
960 | #endif |
---|
961 | |
---|
962 | |
---|
963 | boolean inputboot(analdef *adef) |
---|
964 | /* read the bootstrap auxilliary info */ |
---|
965 | { /* inputboot */ |
---|
966 | if (! adef->boot) { |
---|
967 | printf("ERROR: Unexpected Bootstrap auxiliary data line\n"); |
---|
968 | return FALSE; |
---|
969 | } |
---|
970 | else if (fscanf(INFILE, "%ld", & adef->boot) != 1 || |
---|
971 | findch('\n') == EOF) { |
---|
972 | printf("ERROR: Problem reading bootstrap random seed value\n"); |
---|
973 | return FALSE; |
---|
974 | } |
---|
975 | |
---|
976 | return TRUE; |
---|
977 | } /* inputboot */ |
---|
978 | |
---|
979 | |
---|
980 | boolean inputcategories (rawdata *rdta) |
---|
981 | /* read the category rates and the categories for each site */ |
---|
982 | { /* inputcategories */ |
---|
983 | int i, j, ch, ci; |
---|
984 | |
---|
985 | if (rdta->categs >= 0) { |
---|
986 | printf("ERROR: Unexpected Categories auxiliary data line\n"); |
---|
987 | return FALSE; |
---|
988 | } |
---|
989 | if (fscanf(INFILE, "%d", & rdta->categs) != 1) { |
---|
990 | printf("ERROR: Problem reading number of rate categories\n"); |
---|
991 | return FALSE; |
---|
992 | } |
---|
993 | if (rdta->categs < 1 || rdta->categs > maxcategories) { |
---|
994 | printf("ERROR: Bad number of categories: %d\n", rdta->categs); |
---|
995 | printf("Must be in range 1 - %d\n", maxcategories); |
---|
996 | return FALSE; |
---|
997 | } |
---|
998 | |
---|
999 | for (j = 1; j <= rdta->categs |
---|
1000 | && fscanf(INFILE, "%lf", &(rdta->catrat[j])) == 1; j++) ; |
---|
1001 | |
---|
1002 | if ((j <= rdta->categs) || (findch('\n') == EOF)) { |
---|
1003 | printf("ERROR: Problem reading rate values\n"); |
---|
1004 | return FALSE; |
---|
1005 | } |
---|
1006 | |
---|
1007 | for (i = 1; i <= nmlngth; i++) (void) getc(INFILE); |
---|
1008 | |
---|
1009 | i = 1; |
---|
1010 | while (i <= rdta->sites) { |
---|
1011 | ch = getc(INFILE); |
---|
1012 | ci = base36(ch); |
---|
1013 | if (ci >= 0 && ci <= rdta->categs) |
---|
1014 | rdta->sitecat[i++] = ci; |
---|
1015 | else if (! whitechar(ch)) { |
---|
1016 | printf("ERROR: Bad category character (%c) at site %d\n", ch, i); |
---|
1017 | return FALSE; |
---|
1018 | } |
---|
1019 | } |
---|
1020 | |
---|
1021 | if (findch('\n') == EOF) { /* skip to end of line */ |
---|
1022 | printf("ERROR: Missing newline at end of category data\n"); |
---|
1023 | return FALSE; |
---|
1024 | } |
---|
1025 | |
---|
1026 | return TRUE; |
---|
1027 | } /* inputcategories */ |
---|
1028 | |
---|
1029 | |
---|
1030 | boolean inputextra (analdef *adef) |
---|
1031 | { /* inputextra */ |
---|
1032 | if (fscanf(INFILE,"%d", & adef->extra) != 1 || |
---|
1033 | findch('\n') == EOF) { |
---|
1034 | printf("ERROR: Problem reading extra info value\n"); |
---|
1035 | return FALSE; |
---|
1036 | } |
---|
1037 | |
---|
1038 | return TRUE; |
---|
1039 | } /* inputextra */ |
---|
1040 | |
---|
1041 | |
---|
1042 | boolean inputfreqs (rawdata *rdta) |
---|
1043 | { /* inputfreqs */ |
---|
1044 | if (fscanf(INFILE, "%lf%lf%lf%lf", |
---|
1045 | & rdta->freqa, & rdta->freqc, |
---|
1046 | & rdta->freqg, & rdta->freqt) != 4 || |
---|
1047 | findch('\n') == EOF) { |
---|
1048 | printf("ERROR: Problem reading user base frequencies data\n"); |
---|
1049 | return FALSE; |
---|
1050 | } |
---|
1051 | |
---|
1052 | rdta->freqread = TRUE; |
---|
1053 | return TRUE; |
---|
1054 | } /* inputfreqs */ |
---|
1055 | |
---|
1056 | |
---|
1057 | boolean inputglobal (tree *tr) |
---|
1058 | /* input the global option information */ |
---|
1059 | { /* inputglobal */ |
---|
1060 | int ch; |
---|
1061 | |
---|
1062 | if (tr->global != -2) { |
---|
1063 | printf("ERROR: Unexpected Global auxiliary data line\n"); |
---|
1064 | return FALSE; |
---|
1065 | } |
---|
1066 | if (fscanf(INFILE, "%d", &(tr->global)) != 1) { |
---|
1067 | printf("ERROR: Problem reading rearrangement region size\n"); |
---|
1068 | return FALSE; |
---|
1069 | } |
---|
1070 | if (tr->global < 0) { |
---|
1071 | printf("WARNING: Global region size too small;\n"); |
---|
1072 | printf(" value reset to local\n\n"); |
---|
1073 | tr->global = 1; |
---|
1074 | } |
---|
1075 | else if (tr->global == 0) tr->partswap = 0; |
---|
1076 | else if (tr->global > tr->mxtips - 3) { |
---|
1077 | tr->global = tr->mxtips - 3; |
---|
1078 | } |
---|
1079 | |
---|
1080 | while ((ch = getc(INFILE)) != '\n') { /* Scan for second value */ |
---|
1081 | if (! whitechar(ch)) { |
---|
1082 | if (ch != EOF) (void) ungetc(ch, INFILE); |
---|
1083 | if (ch == EOF || fscanf(INFILE, "%d", &(tr->partswap)) != 1 |
---|
1084 | || findch('\n') == EOF) { |
---|
1085 | printf("ERROR: Problem reading insert swap region size\n"); |
---|
1086 | return FALSE; |
---|
1087 | } |
---|
1088 | else if (tr->partswap < 0) tr->partswap = 1; |
---|
1089 | else if (tr->partswap > tr->mxtips - 3) { |
---|
1090 | tr->partswap = tr->mxtips - 3; |
---|
1091 | } |
---|
1092 | |
---|
1093 | if (tr->partswap > tr->global) tr->global = tr->partswap; |
---|
1094 | break; /* Break while loop */ |
---|
1095 | } |
---|
1096 | } |
---|
1097 | |
---|
1098 | return TRUE; |
---|
1099 | } /* inputglobal */ |
---|
1100 | |
---|
1101 | |
---|
1102 | boolean inputjumble (analdef *adef) |
---|
1103 | { /* inputjumble */ |
---|
1104 | if (! adef->jumble) { |
---|
1105 | printf("ERROR: Unexpected Jumble auxiliary data line\n"); |
---|
1106 | return FALSE; |
---|
1107 | } |
---|
1108 | else if (fscanf(INFILE, "%ld", & adef->jumble) != 1 || |
---|
1109 | findch('\n') == EOF) { |
---|
1110 | printf("ERROR: Problem reading jumble random seed value\n"); |
---|
1111 | return FALSE; |
---|
1112 | } |
---|
1113 | else if (adef->jumble == 0) { |
---|
1114 | printf("WARNING: Jumble random number seed is zero\n\n"); |
---|
1115 | } |
---|
1116 | |
---|
1117 | return TRUE; |
---|
1118 | } /* inputjumble */ |
---|
1119 | |
---|
1120 | |
---|
1121 | boolean inputkeep (analdef *adef) |
---|
1122 | { /* inputkeep */ |
---|
1123 | if (fscanf(INFILE, "%d", & adef->nkeep) != 1 || |
---|
1124 | findch('\n') == EOF || adef->nkeep < 1) { |
---|
1125 | printf("ERROR: Problem reading number of kept trees\n"); |
---|
1126 | return FALSE; |
---|
1127 | } |
---|
1128 | |
---|
1129 | return TRUE; |
---|
1130 | } /* inputkeep */ |
---|
1131 | |
---|
1132 | |
---|
1133 | boolean inputoutgroup (analdef *adef, tree *tr) |
---|
1134 | { /* inputoutgroup */ |
---|
1135 | if (! adef->root || tr->outgr > 0) { |
---|
1136 | printf("ERROR: Unexpected Outgroup auxiliary data line\n"); |
---|
1137 | return FALSE; |
---|
1138 | } |
---|
1139 | else if (fscanf(INFILE, "%d", &(tr->outgr)) != 1 || |
---|
1140 | findch('\n') == EOF) { |
---|
1141 | printf("ERROR: Problem reading outgroup number\n"); |
---|
1142 | return FALSE; |
---|
1143 | } |
---|
1144 | else if ((tr->outgr < 1) || (tr->outgr > tr->mxtips)) { |
---|
1145 | printf("ERROR: Bad outgroup: '%d'\n", tr->outgr); |
---|
1146 | return FALSE; |
---|
1147 | } |
---|
1148 | |
---|
1149 | return TRUE; |
---|
1150 | } /* inputoutgroup */ |
---|
1151 | |
---|
1152 | |
---|
1153 | boolean inputratio (rawdata *rdta) |
---|
1154 | { /* inputratio */ |
---|
1155 | if (rdta->ttratio >= 0.0) { |
---|
1156 | printf("ERROR: Unexpected Transition/transversion auxiliary data\n"); |
---|
1157 | return FALSE; |
---|
1158 | } |
---|
1159 | else if (fscanf(INFILE,"%lf", & rdta->ttratio)!=1 || |
---|
1160 | findch('\n') == EOF) { |
---|
1161 | printf("ERROR: Problem reading transition/transversion ratio\n"); |
---|
1162 | return FALSE; |
---|
1163 | } |
---|
1164 | |
---|
1165 | return TRUE; |
---|
1166 | } /* inputratio */ |
---|
1167 | |
---|
1168 | |
---|
1169 | /* Y 0 is treeNone (no tree) |
---|
1170 | Y 1 is treeNewick |
---|
1171 | Y 2 is treeProlog |
---|
1172 | Y 3 is treePHYLIP |
---|
1173 | */ |
---|
1174 | |
---|
1175 | boolean inputtreeopt (analdef *adef) |
---|
1176 | { /* inputtreeopt */ |
---|
1177 | if (! adef->trout) { |
---|
1178 | printf("ERROR: Unexpected Treefile auxiliary data\n"); |
---|
1179 | return FALSE; |
---|
1180 | } |
---|
1181 | else if (fscanf(INFILE,"%d", & adef->trout) != 1 || |
---|
1182 | findch('\n') == EOF) { |
---|
1183 | printf("ERROR: Problem reading output tree-type number\n"); |
---|
1184 | return FALSE; |
---|
1185 | } |
---|
1186 | else if ((adef->trout < 0) || (adef->trout > treeMaxType)) { |
---|
1187 | printf("ERROR: Bad output tree-type number: '%d'\n", adef->trout); |
---|
1188 | return FALSE; |
---|
1189 | } |
---|
1190 | |
---|
1191 | return TRUE; |
---|
1192 | } /* inputtreeopt */ |
---|
1193 | |
---|
1194 | |
---|
1195 | boolean inputweights (analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
1196 | /* input the character weights 0, 1, 2 ... 9, A, B, ... Y, Z */ |
---|
1197 | { /* inputweights */ |
---|
1198 | int i, ch, wi; |
---|
1199 | |
---|
1200 | if (! adef->userwgt || cdta->wgtsum > 0) { |
---|
1201 | printf("ERROR: Unexpected Weights auxiliary data\n"); |
---|
1202 | return FALSE; |
---|
1203 | } |
---|
1204 | |
---|
1205 | for (i = 2; i <= nmlngth; i++) (void) getc(INFILE); |
---|
1206 | cdta->wgtsum = 0; |
---|
1207 | i = 1; |
---|
1208 | while (i <= rdta->sites) { |
---|
1209 | ch = getc(INFILE); |
---|
1210 | wi = base36(ch); |
---|
1211 | if (wi >= 0) |
---|
1212 | cdta->wgtsum += rdta->wgt[i++] = wi; |
---|
1213 | else if (! whitechar(ch)) { |
---|
1214 | printf("ERROR: Bad weight character: '%c'", ch); |
---|
1215 | printf(" Weights in dnaml must be a digit or a letter.\n"); |
---|
1216 | return FALSE; |
---|
1217 | } |
---|
1218 | } |
---|
1219 | |
---|
1220 | if (findch('\n') == EOF) { /* skip to end of line */ |
---|
1221 | printf("ERROR: Missing newline at end of weight data\n"); |
---|
1222 | return FALSE; |
---|
1223 | } |
---|
1224 | |
---|
1225 | return TRUE; |
---|
1226 | } /* inputweights */ |
---|
1227 | |
---|
1228 | boolean inputdepth (tree *tr) |
---|
1229 | { |
---|
1230 | if (fscanf(INFILE, "%d", & tr->eqDepth) != 1 || |
---|
1231 | findch('\n') == EOF || tr->eqDepth < 0) { |
---|
1232 | printf("Depth (H - option) has to be greater than 0 \n "); |
---|
1233 | |
---|
1234 | return FALSE; |
---|
1235 | } |
---|
1236 | fprintf(stderr,"set to: %d\n", tr->eqDepth); |
---|
1237 | return TRUE; |
---|
1238 | } |
---|
1239 | |
---|
1240 | |
---|
1241 | boolean getoptions (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
1242 | { /* getoptions */ |
---|
1243 | int ch, i, extranum; |
---|
1244 | |
---|
1245 | adef->boot = 0; /* Don't bootstrap column weights */ |
---|
1246 | adef->empf = TRUE; /* Use empirical base frequencies */ |
---|
1247 | adef->extra = 0; /* No extra runtime info unless requested */ |
---|
1248 | adef->interleaved = TRUE; /* By default, data format is interleaved */ |
---|
1249 | adef->jumble = FALSE; /* Use random addition sequence */ |
---|
1250 | adef->nkeep = 0; /* Keep only the one best tree */ |
---|
1251 | adef->prdata = FALSE; /* Don't echo data to output stream */ |
---|
1252 | adef->qadd = TRUE; /* Smooth branches globally in add */ |
---|
1253 | adef->restart = FALSE; /* Restart from user tree */ |
---|
1254 | adef->root = FALSE; /* User-defined outgroup rooting */ |
---|
1255 | adef->trout = treeDefType; /* Output tree file */ |
---|
1256 | adef->trprint = TRUE; /* Print tree to output stream */ |
---|
1257 | rdta->categs = 0; /* No rate categories */ |
---|
1258 | rdta->catrat[1] = 1.0; /* Rate values */ |
---|
1259 | rdta->freqread = FALSE; /* User-defined frequencies not read yet */ |
---|
1260 | rdta->ttratio = 2.0; /* Transition/transversion rate ratio */ |
---|
1261 | tr->global = -1; /* Default search locale for optimum */ |
---|
1262 | tr->mxtips = rdta->numsp; |
---|
1263 | tr->outgr = 1; /* Outgroup number */ |
---|
1264 | tr->partswap = 1; /* Default to swap locally after insert */ |
---|
1265 | tr->userlen = FALSE; /* User-supplied branch lengths */ |
---|
1266 | tr->eqDepth = 0; |
---|
1267 | adef->usertree = FALSE; /* User-defined tree topologies */ |
---|
1268 | adef->userwgt = FALSE; /* User-defined position weights */ |
---|
1269 | extranum = 0; |
---|
1270 | |
---|
1271 | while ((ch = getc(INFILE)) != '\n' && ch != EOF) { |
---|
1272 | uppercase(& ch); |
---|
1273 | switch (ch) { |
---|
1274 | case '1' : adef->prdata = ! adef->prdata; break; |
---|
1275 | case '3' : adef->trprint = ! adef->trprint; break; |
---|
1276 | case '4' : adef->trout = treeDefType - adef->trout; break; |
---|
1277 | case 'B' : adef->boot = 1; extranum++; break; |
---|
1278 | case 'C' : rdta->categs = -1; extranum++; break; |
---|
1279 | case 'E' : adef->extra = -1; break; |
---|
1280 | case 'F' : adef->empf = ! adef->empf; break; |
---|
1281 | case 'G' : tr->global = -2; break; |
---|
1282 | //stm |
---|
1283 | case 'H' : extranum++;break; |
---|
1284 | //stm |
---|
1285 | |
---|
1286 | |
---|
1287 | case 'I' : adef->interleaved = ! adef->interleaved; break; |
---|
1288 | case 'J' : adef->jumble = 1; extranum++; break; |
---|
1289 | case 'K' : extranum++; break; |
---|
1290 | case 'L' : tr->userlen = TRUE; break; |
---|
1291 | case 'O' : adef->root = TRUE; tr->outgr = 0; extranum++; break; |
---|
1292 | case 'Q' : adef->qadd = FALSE; break; |
---|
1293 | case 'R' : adef->restart = TRUE; break; |
---|
1294 | case 'T' : rdta->ttratio = -1.0; extranum++; break; |
---|
1295 | case 'U' : adef->usertree = TRUE; break; |
---|
1296 | case 'W' : adef->userwgt = TRUE; cdta->wgtsum = 0; extranum++; break; |
---|
1297 | case 'Y' : adef->trout = treeDefType - adef->trout; break; |
---|
1298 | case ' ' : break; |
---|
1299 | case '\t': break; |
---|
1300 | default : |
---|
1301 | printf("ERROR: Bad option character: '%c'\n", ch); |
---|
1302 | return FALSE; |
---|
1303 | } |
---|
1304 | } |
---|
1305 | |
---|
1306 | if (ch == EOF) { |
---|
1307 | printf("ERROR: End-of-file in options list\n"); |
---|
1308 | return FALSE; |
---|
1309 | } |
---|
1310 | |
---|
1311 | if (adef->usertree && adef->restart) { |
---|
1312 | printf("ERROR: The restart and user-tree options conflict:\n"); |
---|
1313 | printf(" Restart adds rest of taxa to a starting tree;\n"); |
---|
1314 | printf(" User-tree does not add any taxa.\n\n"); |
---|
1315 | return FALSE; |
---|
1316 | } |
---|
1317 | |
---|
1318 | if (adef->usertree && adef->jumble) { |
---|
1319 | printf("WARNING: The jumble and user-tree options conflict:\n"); |
---|
1320 | printf(" Jumble adds taxa to a tree in random order;\n"); |
---|
1321 | printf(" User-tree does not use taxa addition.\n"); |
---|
1322 | printf(" Jumble option cancelled for this run.\n\n"); |
---|
1323 | adef->jumble = FALSE; |
---|
1324 | } |
---|
1325 | |
---|
1326 | if (tr->userlen && tr->global != -1) { |
---|
1327 | printf("ERROR: The global and user-lengths options conflict:\n"); |
---|
1328 | printf(" Global optimizes a starting tree;\n"); |
---|
1329 | printf(" User-lengths constrain the starting tree.\n\n"); |
---|
1330 | return FALSE; |
---|
1331 | } |
---|
1332 | |
---|
1333 | if (tr->userlen && ! adef->usertree) { |
---|
1334 | printf("WARNING: User lengths required user tree option.\n"); |
---|
1335 | printf(" User-tree option set for this run.\n\n"); |
---|
1336 | adef->usertree = TRUE; |
---|
1337 | } |
---|
1338 | |
---|
1339 | rdta->wgt = (int *) Malloc((rdta->sites + 1) * sizeof(int)); |
---|
1340 | rdta->wgt2 = (int *) Malloc((rdta->sites + 1) * sizeof(int)); |
---|
1341 | rdta->sitecat = (int *) Malloc((rdta->sites + 1) * sizeof(int)); |
---|
1342 | cdta->alias = (int *) Malloc((rdta->sites + 1) * sizeof(int)); |
---|
1343 | cdta->aliaswgt = (int *) Malloc((rdta->sites + 1) * sizeof(int)); |
---|
1344 | cdta->patcat = (int *) Malloc((rdta->sites + 1) * sizeof(int)); |
---|
1345 | cdta->patrat = (double *) Malloc((rdta->sites + 1) * sizeof(double)); |
---|
1346 | cdta->wr = (double *) Malloc((rdta->sites + 1) * sizeof(double)); |
---|
1347 | cdta->wr2 = (double *) Malloc((rdta->sites + 1) * sizeof(double)); |
---|
1348 | if ( ! rdta->wgt || ! rdta->wgt2 || ! rdta->sitecat || ! cdta->alias |
---|
1349 | || ! cdta->aliaswgt || ! cdta->patcat || ! cdta->patrat |
---|
1350 | || ! cdta->wr || ! cdta->wr2) { |
---|
1351 | fprintf(stderr, "getoptions: Malloc failure\n"); |
---|
1352 | return 0; |
---|
1353 | } |
---|
1354 | |
---|
1355 | /* process lines with auxiliary data */ |
---|
1356 | |
---|
1357 | while (extranum--) { |
---|
1358 | ch = getc(INFILE); |
---|
1359 | uppercase(& ch); |
---|
1360 | switch (ch) { |
---|
1361 | case 'B': if (! inputboot(adef)) return FALSE; break; |
---|
1362 | case 'C': if (! inputcategories(rdta)) return FALSE; break; |
---|
1363 | case 'E': if (! inputextra(adef)) return FALSE; extranum++; break; |
---|
1364 | case 'F': if (! inputfreqs(rdta)) return FALSE; break; |
---|
1365 | case 'G': if (! inputglobal(tr)) return FALSE; extranum++; break; |
---|
1366 | //stm |
---|
1367 | case 'H': if(!inputdepth(tr)) return FALSE; break; |
---|
1368 | //stm |
---|
1369 | case 'J': if (! inputjumble(adef)) return FALSE; break; |
---|
1370 | case 'K': if (! inputkeep(adef)) return FALSE; break; |
---|
1371 | case 'O': if (! inputoutgroup(adef, tr)) return FALSE; break; |
---|
1372 | case 'T': if (! inputratio(rdta)) return FALSE; break; |
---|
1373 | case 'W': if (! inputweights(adef, rdta, cdta)) return FALSE; break; |
---|
1374 | case 'Y': if (! inputtreeopt(adef)) return FALSE; extranum++; break; |
---|
1375 | |
---|
1376 | default: |
---|
1377 | printf("ERROR: Auxiliary options line starts with '%c'\n", ch); |
---|
1378 | return FALSE; |
---|
1379 | } |
---|
1380 | } |
---|
1381 | |
---|
1382 | if (! adef->userwgt) { |
---|
1383 | for (i = 1; i <= rdta->sites; i++) rdta->wgt[i] = 1; |
---|
1384 | cdta->wgtsum = rdta->sites; |
---|
1385 | } |
---|
1386 | |
---|
1387 | if (adef->userwgt && cdta->wgtsum < 1) { |
---|
1388 | printf("ERROR: Missing or bad user-supplied weight data.\n"); |
---|
1389 | return FALSE; |
---|
1390 | } |
---|
1391 | |
---|
1392 | if (adef->boot) { |
---|
1393 | printf("Bootstrap random number seed = %ld\n\n", adef->boot); |
---|
1394 | } |
---|
1395 | |
---|
1396 | if (adef->jumble) { |
---|
1397 | printf("Jumble random number seed = %ld\n\n", adef->jumble); |
---|
1398 | } |
---|
1399 | |
---|
1400 | if (adef->qadd) { |
---|
1401 | printf("Quick add (only local branches initially optimized) in effect\n\n"); |
---|
1402 | } |
---|
1403 | |
---|
1404 | if (rdta->categs > 0) { |
---|
1405 | printf("Site category Rate of change\n\n"); |
---|
1406 | for (i = 1; i <= rdta->categs; i++) |
---|
1407 | printf(" %c%13.3f\n", itobase36(i), rdta->catrat[i]); |
---|
1408 | putchar('\n'); |
---|
1409 | for (i = 1; i <= rdta->sites; i++) { |
---|
1410 | if ((rdta->wgt[i] > 0) && (rdta->sitecat[i] < 1)) { |
---|
1411 | printf("ERROR: Bad category (%c) at site %d\n", |
---|
1412 | itobase36(rdta->sitecat[i]), i); |
---|
1413 | return FALSE; |
---|
1414 | } |
---|
1415 | } |
---|
1416 | } |
---|
1417 | else if (rdta->categs < 0) { |
---|
1418 | printf("ERROR: Category auxiliary data missing from input\n"); |
---|
1419 | return FALSE; |
---|
1420 | } |
---|
1421 | else { /* rdta->categs == 0 */ |
---|
1422 | for (i = 1; i <= rdta->sites; i++) rdta->sitecat[i] = 1; |
---|
1423 | rdta->categs = 1; |
---|
1424 | } |
---|
1425 | |
---|
1426 | if (tr->outgr < 1) { |
---|
1427 | printf("ERROR: Outgroup auxiliary data missing from input\n"); |
---|
1428 | return FALSE; |
---|
1429 | } |
---|
1430 | |
---|
1431 | if (rdta->ttratio < 0.0) { |
---|
1432 | printf("ERROR: Transition/transversion auxiliary data missing from input\n"); |
---|
1433 | return FALSE; |
---|
1434 | } |
---|
1435 | |
---|
1436 | if (tr->global < 0) { |
---|
1437 | if (tr->global == -2) tr->global = tr->mxtips - 3; /* Default global */ |
---|
1438 | else tr->global = adef->usertree ? 0 : 1;/* No global */ |
---|
1439 | } |
---|
1440 | |
---|
1441 | if (adef->restart) { |
---|
1442 | printf("Restart option in effect. "); |
---|
1443 | printf("Sequence addition will start from appended tree.\n\n"); |
---|
1444 | } |
---|
1445 | |
---|
1446 | if (adef->usertree && ! tr->global) { |
---|
1447 | printf("User-supplied tree topology%swill be used.\n\n", |
---|
1448 | tr->userlen ? " and branch lengths " : " "); |
---|
1449 | } |
---|
1450 | else { |
---|
1451 | if (! adef->usertree) { |
---|
1452 | printf("Rearrangements of partial trees may cross %d %s.\n", |
---|
1453 | tr->partswap, tr->partswap == 1 ? "branch" : "branches"); |
---|
1454 | } |
---|
1455 | printf("Rearrangements of full tree may cross %d %s.\n\n", |
---|
1456 | tr->global, tr->global == 1 ? "branch" : "branches"); |
---|
1457 | } |
---|
1458 | |
---|
1459 | if (! adef->usertree && adef->nkeep == 0) adef->nkeep = 1; |
---|
1460 | |
---|
1461 | return TRUE; |
---|
1462 | } /* getoptions */ |
---|
1463 | |
---|
1464 | |
---|
1465 | boolean getbasefreqs (rawdata *rdta) |
---|
1466 | { /* getbasefreqs */ |
---|
1467 | int ch; |
---|
1468 | |
---|
1469 | if (rdta->freqread) return TRUE; |
---|
1470 | |
---|
1471 | ch = getc(INFILE); |
---|
1472 | if (! ((ch == 'F') || (ch == 'f'))) (void) ungetc(ch, INFILE); |
---|
1473 | |
---|
1474 | if (fscanf(INFILE, "%lf%lf%lf%lf", |
---|
1475 | & rdta->freqa, & rdta->freqc, |
---|
1476 | & rdta->freqg, & rdta->freqt) != 4 || |
---|
1477 | findch('\n') == EOF) { |
---|
1478 | printf("ERROR: Problem reading user base frequencies\n"); |
---|
1479 | return FALSE; |
---|
1480 | } |
---|
1481 | |
---|
1482 | return TRUE; |
---|
1483 | } /* getbasefreqs */ |
---|
1484 | |
---|
1485 | |
---|
1486 | boolean getyspace (rawdata *rdta) |
---|
1487 | { /* getyspace */ |
---|
1488 | long size; |
---|
1489 | int i; |
---|
1490 | yType *y0; |
---|
1491 | |
---|
1492 | if (! (rdta->y = (yType **) Malloc((rdta->numsp + 1) * sizeof(yType *)))) { |
---|
1493 | printf("ERROR: Unable to obtain space for data array pointers\n"); |
---|
1494 | return FALSE; |
---|
1495 | } |
---|
1496 | |
---|
1497 | size = 4 * (rdta->sites / 4 + 1); |
---|
1498 | if (! (y0 = (yType *) Malloc((rdta->numsp + 1) * size * sizeof(yType)))) { |
---|
1499 | printf("ERROR: Unable to obtain space for data array\n"); |
---|
1500 | return FALSE; |
---|
1501 | } |
---|
1502 | |
---|
1503 | for (i = 0; i <= rdta->numsp; i++) { |
---|
1504 | rdta->y[i] = y0; |
---|
1505 | y0 += size; |
---|
1506 | } |
---|
1507 | |
---|
1508 | return TRUE; |
---|
1509 | } /* getyspace */ |
---|
1510 | |
---|
1511 | |
---|
1512 | |
---|
1513 | boolean setupTree (tree *tr, int nsites) |
---|
1514 | { /* setupTree */ |
---|
1515 | nodeptr p0, p, q; |
---|
1516 | int i, j, tips, inter; |
---|
1517 | |
---|
1518 | tips = tr->mxtips; |
---|
1519 | inter = tr->mxtips - 1; |
---|
1520 | |
---|
1521 | if (!(p0 = (nodeptr) Malloc((tips + 3*inter) * sizeof(node)))) { |
---|
1522 | printf("ERROR: Unable to obtain sufficient tree memory\n"); |
---|
1523 | return FALSE; |
---|
1524 | } |
---|
1525 | |
---|
1526 | |
---|
1527 | if (!(tr->nodep = (nodeptr *) Malloc((2*tr->mxtips) * sizeof(nodeptr)))) { |
---|
1528 | printf("ERROR: Unable to obtain sufficient tree memory, too\n"); |
---|
1529 | return FALSE; |
---|
1530 | } |
---|
1531 | |
---|
1532 | |
---|
1533 | tr->nodep[0] = (node *) NULL; /* Use as 1-based array */ |
---|
1534 | |
---|
1535 | for (i = 1; i <= tips; i++) { /* Set-up tips */ |
---|
1536 | p = p0++; |
---|
1537 | p->x = (xarray *) NULL; |
---|
1538 | p->tip = (yType *) NULL; |
---|
1539 | |
---|
1540 | /* AxML modification start*/ |
---|
1541 | |
---|
1542 | p->equalityVector = (char *) NULL; /*initialize eq vector with zero */ |
---|
1543 | |
---|
1544 | /* AxML modification end*/ |
---|
1545 | |
---|
1546 | |
---|
1547 | p->number = i; |
---|
1548 | p->next = p; |
---|
1549 | p->back = (node *) NULL; |
---|
1550 | |
---|
1551 | tr->nodep[i] = p; |
---|
1552 | } |
---|
1553 | |
---|
1554 | for (i = tips + 1; i <= tips + inter; i++) { /* Internal nodes */ |
---|
1555 | q = (node *) NULL; |
---|
1556 | for (j = 1; j <= 3; j++) { |
---|
1557 | p = p0++; |
---|
1558 | p->x = (xarray *) NULL; |
---|
1559 | p->tip = (yType *) NULL; |
---|
1560 | p->number = i; |
---|
1561 | |
---|
1562 | /* AxML modification start*/ |
---|
1563 | |
---|
1564 | p->equalityVector = (char *) NULL; /*initialize eq vector with zero */ |
---|
1565 | /* AxML modification end*/ |
---|
1566 | |
---|
1567 | |
---|
1568 | p->next = q; |
---|
1569 | p->back = (node *) NULL; |
---|
1570 | q = p; |
---|
1571 | } |
---|
1572 | p->next->next->next = p; |
---|
1573 | tr->nodep[i] = p; |
---|
1574 | } |
---|
1575 | |
---|
1576 | tr->likelihood = unlikely; |
---|
1577 | tr->start = (node *) NULL; |
---|
1578 | tr->outgrnode = tr->nodep[tr->outgr]; |
---|
1579 | tr->ntips = 0; |
---|
1580 | tr->nextnode = 0; |
---|
1581 | tr->opt_level = 0; |
---|
1582 | tr->prelabeled = TRUE; |
---|
1583 | tr->smoothed = FALSE; |
---|
1584 | tr->log_f_valid = 0; |
---|
1585 | |
---|
1586 | tr->log_f = (double *) Malloc(nsites * sizeof(double)); |
---|
1587 | if (! tr->log_f) { |
---|
1588 | printf("ERROR: Unable to obtain sufficient tree memory, trey\n"); |
---|
1589 | return FALSE; |
---|
1590 | } |
---|
1591 | |
---|
1592 | return TRUE; |
---|
1593 | } /* setupTree */ |
---|
1594 | |
---|
1595 | void freeTreeNode (nodeptr p) /* Free tree node (sector) associated data */ |
---|
1596 | { /* freeTreeNode */ |
---|
1597 | if (p) { |
---|
1598 | if (p->x) { |
---|
1599 | if (p->x->lv) Free(p->x->lv); |
---|
1600 | Free(p->x); |
---|
1601 | } |
---|
1602 | /* AxML modification start*/ |
---|
1603 | |
---|
1604 | Free(p->equalityVector); /*free equality vector of node*/ |
---|
1605 | |
---|
1606 | /* AxML modification end*/ |
---|
1607 | } |
---|
1608 | } /* freeTree */ |
---|
1609 | |
---|
1610 | |
---|
1611 | |
---|
1612 | /* AxML modification start*/ |
---|
1613 | |
---|
1614 | |
---|
1615 | boolean setupEqualityVector(tree *tr) /* new function for initializing and mapping equality vectors */ |
---|
1616 | { |
---|
1617 | int siteNum = tr->cdta->endsite; |
---|
1618 | nodeptr upper = tr->nodep[tr->mxtips * 2 - 1]; |
---|
1619 | nodeptr p = tr->nodep[1]; |
---|
1620 | char *ev, *end; |
---|
1621 | char *tip; |
---|
1622 | |
---|
1623 | likelivector *l; |
---|
1624 | int i, j; |
---|
1625 | homType *mlvP; |
---|
1626 | int ref[EQUALITIES]; |
---|
1627 | int code; |
---|
1628 | |
---|
1629 | for(; p < upper; p++) |
---|
1630 | { |
---|
1631 | if( ! (p->equalityVector = |
---|
1632 | (ev = (char *)malloc(siteNum * sizeof(char))))) |
---|
1633 | { |
---|
1634 | printf("ERROR: Failure to get node equality vector memory\n"); |
---|
1635 | return FALSE; |
---|
1636 | } |
---|
1637 | |
---|
1638 | end = & ev[siteNum]; |
---|
1639 | |
---|
1640 | if(p->tip) |
---|
1641 | { |
---|
1642 | int i; |
---|
1643 | tip = p->tip; |
---|
1644 | |
---|
1645 | for(i = 0; i < equalities; i++) |
---|
1646 | ref[i] = -1; |
---|
1647 | |
---|
1648 | for(i = 0; i < siteNum; i++) |
---|
1649 | { |
---|
1650 | ev[i] = map[tip[i]]; |
---|
1651 | if(ref[ev[i]] == -1) |
---|
1652 | ref[ev[i]] = 1; |
---|
1653 | } |
---|
1654 | for(i = 0; i < equalities; i++) |
---|
1655 | { |
---|
1656 | for(j = 1; j <= tr->rdta->categs; j++) |
---|
1657 | { |
---|
1658 | mlvP = &p->mlv[i][j]; |
---|
1659 | code = map2[i]; |
---|
1660 | // if(ref[i] == 1) |
---|
1661 | //{ |
---|
1662 | mlvP->a = code & 1; |
---|
1663 | mlvP->c = (code >> 1) & 1; |
---|
1664 | mlvP->g = (code >> 2) & 1; |
---|
1665 | mlvP->t = (code >> 3) & 1; |
---|
1666 | mlvP->exp = 0; |
---|
1667 | mlvP->set = TRUE; |
---|
1668 | //} |
---|
1669 | //else |
---|
1670 | //mlvP->set = FALSE; |
---|
1671 | |
---|
1672 | } |
---|
1673 | } |
---|
1674 | |
---|
1675 | } |
---|
1676 | else |
---|
1677 | { |
---|
1678 | for(; ev < end; ev++) |
---|
1679 | { |
---|
1680 | *ev = 0; |
---|
1681 | } |
---|
1682 | } |
---|
1683 | } |
---|
1684 | |
---|
1685 | return TRUE; |
---|
1686 | } |
---|
1687 | |
---|
1688 | /* AxML modification end*/ |
---|
1689 | |
---|
1690 | |
---|
1691 | |
---|
1692 | |
---|
1693 | |
---|
1694 | |
---|
1695 | |
---|
1696 | void freeTree (tree *tr) |
---|
1697 | { /* freeTree */ |
---|
1698 | nodeptr p, q; |
---|
1699 | int i, tips, inter; |
---|
1700 | |
---|
1701 | tips = tr->mxtips; |
---|
1702 | inter = tr->mxtips - 1; |
---|
1703 | |
---|
1704 | for (i = 1; i <= tips; i++) freeTreeNode(tr->nodep[i]); |
---|
1705 | |
---|
1706 | for (i = tips + 1; i <= tips + inter; i++) { |
---|
1707 | if (p = tr->nodep[i]) { |
---|
1708 | if (q = p->next) { |
---|
1709 | freeTreeNode(q->next); |
---|
1710 | freeTreeNode(q); |
---|
1711 | } |
---|
1712 | freeTreeNode(p); |
---|
1713 | } |
---|
1714 | } |
---|
1715 | |
---|
1716 | Free(tr->nodep[1]); /* Free the actual nodes */ |
---|
1717 | } /* freeTree */ |
---|
1718 | |
---|
1719 | |
---|
1720 | boolean getdata (analdef *adef, rawdata *rdta, tree *tr) |
---|
1721 | /* read sequences */ |
---|
1722 | { /* getdata */ |
---|
1723 | int i, j, k, l, basesread, basesnew, ch; |
---|
1724 | int meaning[256]; /* meaning of input characters */ |
---|
1725 | char *nameptr; |
---|
1726 | boolean allread, firstpass; |
---|
1727 | |
---|
1728 | for (i = 0; i <= 255; i++) meaning[i] = 0; |
---|
1729 | meaning['A'] = 1; |
---|
1730 | meaning['B'] = 14; |
---|
1731 | meaning['C'] = 2; |
---|
1732 | meaning['D'] = 13; |
---|
1733 | meaning['G'] = 4; |
---|
1734 | meaning['H'] = 11; |
---|
1735 | meaning['K'] = 12; |
---|
1736 | meaning['M'] = 3; |
---|
1737 | meaning['N'] = 15; |
---|
1738 | meaning['O'] = 15; |
---|
1739 | meaning['R'] = 5; |
---|
1740 | meaning['S'] = 6; |
---|
1741 | meaning['T'] = 8; |
---|
1742 | meaning['U'] = 8; |
---|
1743 | meaning['V'] = 7; |
---|
1744 | meaning['W'] = 9; |
---|
1745 | meaning['X'] = 15; |
---|
1746 | meaning['Y'] = 10; |
---|
1747 | meaning['?'] = 15; |
---|
1748 | meaning['-'] = 15; |
---|
1749 | |
---|
1750 | basesread = basesnew = 0; |
---|
1751 | |
---|
1752 | allread = FALSE; |
---|
1753 | firstpass = TRUE; |
---|
1754 | ch = ' '; |
---|
1755 | |
---|
1756 | while (! allread) { |
---|
1757 | for (i = 1; i <= tr->mxtips; i++) { /* Read data line */ |
---|
1758 | |
---|
1759 | if (firstpass) { /* Read species names */ |
---|
1760 | j = 1; |
---|
1761 | while (whitechar(ch = getc(INFILE))) { /* Skip blank lines */ |
---|
1762 | if (ch == '\n') j = 1; else j++; |
---|
1763 | } |
---|
1764 | |
---|
1765 | if (j > nmlngth) { |
---|
1766 | printf("ERROR: Blank name for species %d; ", i); |
---|
1767 | printf("check number of species,\n"); |
---|
1768 | printf(" number of sites, and interleave option.\n"); |
---|
1769 | return FALSE; |
---|
1770 | } |
---|
1771 | |
---|
1772 | nameptr = tr->nodep[i]->name; |
---|
1773 | for (k = 1; k < j; k++) *nameptr++ = ' '; |
---|
1774 | |
---|
1775 | while (ch != '\n' && ch != EOF) { |
---|
1776 | if (whitechar(ch)) ch = ' '; |
---|
1777 | *nameptr++ = ch; |
---|
1778 | if (++j > nmlngth) break; |
---|
1779 | ch = getc(INFILE); |
---|
1780 | } |
---|
1781 | |
---|
1782 | while (*(--nameptr) == ' ') ; /* remove trailing blanks */ |
---|
1783 | *(++nameptr) = '\0'; /* add null termination */ |
---|
1784 | |
---|
1785 | if (ch == EOF) { |
---|
1786 | printf("ERROR: End-of-file in name of species %d\n", i); |
---|
1787 | return FALSE; |
---|
1788 | } |
---|
1789 | } /* if (firstpass) */ |
---|
1790 | |
---|
1791 | j = basesread; |
---|
1792 | while ((j < rdta->sites) |
---|
1793 | && ((ch = getc(INFILE)) != EOF) |
---|
1794 | && ((! adef->interleaved) || (ch != '\n'))) { |
---|
1795 | uppercase(& ch); |
---|
1796 | if (meaning[ch] || ch == '.') { |
---|
1797 | j++; |
---|
1798 | if (ch == '.') { |
---|
1799 | if (i != 1) ch = rdta->y[1][j]; |
---|
1800 | else { |
---|
1801 | printf("ERROR: Dot (.) found at site %d of sequence 1\n", j); |
---|
1802 | return FALSE; |
---|
1803 | } |
---|
1804 | } |
---|
1805 | rdta->y[i][j] = ch; |
---|
1806 | } |
---|
1807 | else if (whitechar(ch) || digitchar(ch)) ; |
---|
1808 | else { |
---|
1809 | printf("ERROR: Bad base (%c) at site %d of sequence %d\n", |
---|
1810 | ch, j, i); |
---|
1811 | return FALSE; |
---|
1812 | } |
---|
1813 | } |
---|
1814 | |
---|
1815 | if (ch == EOF) { |
---|
1816 | printf("ERROR: End-of-file at site %d of sequence %d\n", j, i); |
---|
1817 | return FALSE; |
---|
1818 | } |
---|
1819 | |
---|
1820 | if (! firstpass && (j == basesread)) i--; /* no data on line */ |
---|
1821 | else if (i == 1) basesnew = j; |
---|
1822 | else if (j != basesnew) { |
---|
1823 | printf("ERROR: Sequences out of alignment\n"); |
---|
1824 | printf("%d (instead of %d) residues read in sequence %d\n", |
---|
1825 | j - basesread, basesnew - basesread, i); |
---|
1826 | return FALSE; |
---|
1827 | } |
---|
1828 | |
---|
1829 | while (ch != '\n' && ch != EOF) ch = getc(INFILE); /* flush line */ |
---|
1830 | } /* next sequence */ |
---|
1831 | firstpass = FALSE; |
---|
1832 | basesread = basesnew; |
---|
1833 | allread = (basesread >= rdta->sites); |
---|
1834 | } |
---|
1835 | |
---|
1836 | /* Print listing of sequence alignment */ |
---|
1837 | |
---|
1838 | if (adef->prdata) { |
---|
1839 | j = nmlngth - 5 + ((rdta->sites + ((rdta->sites - 1)/10))/2); |
---|
1840 | if (j < nmlngth - 1) j = nmlngth - 1; |
---|
1841 | if (j > 37) j = 37; |
---|
1842 | printf("Name"); for (i=1;i<=j;i++) putchar(' '); printf("Sequences\n"); |
---|
1843 | printf("----"); for (i=1;i<=j;i++) putchar(' '); printf("---------\n"); |
---|
1844 | putchar('\n'); |
---|
1845 | |
---|
1846 | for (i = 1; i <= rdta->sites; i += 60) { |
---|
1847 | l = i + 59; |
---|
1848 | if (l > rdta->sites) l = rdta->sites; |
---|
1849 | |
---|
1850 | if (adef->userwgt) { |
---|
1851 | printf("Weights "); |
---|
1852 | for (j = 11; j <= nmlngth+3; j++) putchar(' '); |
---|
1853 | for (k = i; k <= l; k++) { |
---|
1854 | putchar(itobase36(rdta->wgt[k])); |
---|
1855 | if (((k % 10) == 0) && (k < l)) putchar(' '); |
---|
1856 | } |
---|
1857 | putchar('\n'); |
---|
1858 | } |
---|
1859 | |
---|
1860 | if (rdta->categs > 1) { |
---|
1861 | printf("Categories"); |
---|
1862 | for (j = 11; j <= nmlngth+3; j++) putchar(' '); |
---|
1863 | for (k = i; k <= l; k++) { |
---|
1864 | putchar(itobase36(rdta->sitecat[k])); |
---|
1865 | if (((k % 10) == 0) && (k < l)) putchar(' '); |
---|
1866 | } |
---|
1867 | putchar('\n'); |
---|
1868 | } |
---|
1869 | |
---|
1870 | for (j = 1; j <= tr->mxtips; j++) { |
---|
1871 | nameptr = tr->nodep[j]->name; |
---|
1872 | k = nmlngth+3; |
---|
1873 | while (ch = *nameptr++) {putchar(ch); k--;} |
---|
1874 | while (--k >= 0) putchar(' '); |
---|
1875 | |
---|
1876 | for (k = i; k <= l; k++) { |
---|
1877 | ch = rdta->y[j][k]; |
---|
1878 | if ((j > 1) && (ch == rdta->y[1][k])) ch = '.'; |
---|
1879 | putchar(ch); |
---|
1880 | if (((k % 10) == 0) && (k < l)) putchar(' '); |
---|
1881 | } |
---|
1882 | putchar('\n'); |
---|
1883 | } |
---|
1884 | putchar('\n'); |
---|
1885 | } |
---|
1886 | } |
---|
1887 | |
---|
1888 | for (j = 1; j <= tr->mxtips; j++) /* Convert characters to meanings */ |
---|
1889 | for (i = 1; i <= rdta->sites; i++) { |
---|
1890 | rdta->y[j][i] = meaning[rdta->y[j][i]]; |
---|
1891 | } |
---|
1892 | |
---|
1893 | return TRUE; |
---|
1894 | } /* getdata */ |
---|
1895 | |
---|
1896 | |
---|
1897 | boolean getntrees (analdef *adef) |
---|
1898 | { /* getntrees */ |
---|
1899 | |
---|
1900 | if (fscanf(INFILE, "%d", &(adef->numutrees)) != 1 || findch('\n') == EOF) { |
---|
1901 | printf("ERROR: Problem reading number of user trees\n"); |
---|
1902 | return FALSE; |
---|
1903 | } |
---|
1904 | |
---|
1905 | if (adef->nkeep == 0) adef->nkeep = adef->numutrees; |
---|
1906 | |
---|
1907 | return TRUE; |
---|
1908 | } /* getntrees */ |
---|
1909 | |
---|
1910 | |
---|
1911 | boolean getinput (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
1912 | { /* getinput */ |
---|
1913 | if (! getnums(rdta)) return FALSE; |
---|
1914 | if (! getoptions(adef, rdta, cdta, tr)) return FALSE; |
---|
1915 | if (! adef->empf && ! getbasefreqs(rdta)) return FALSE; |
---|
1916 | if (! getyspace(rdta)) return FALSE; |
---|
1917 | if (! setupTree(tr, rdta->sites)) return FALSE; |
---|
1918 | if (! getdata(adef, rdta, tr)) return FALSE; |
---|
1919 | if (adef->usertree && ! getntrees(adef)) return FALSE; |
---|
1920 | |
---|
1921 | return TRUE; |
---|
1922 | } /* getinput */ |
---|
1923 | |
---|
1924 | |
---|
1925 | void makeboot (analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
1926 | { /* makeboot */ |
---|
1927 | int i, j, nonzero; |
---|
1928 | double randum(); |
---|
1929 | |
---|
1930 | nonzero = 0; |
---|
1931 | for (i = 1; i <= rdta->sites; i++) if (rdta->wgt[i] > 0) nonzero++; |
---|
1932 | |
---|
1933 | for (j = 1; j <= nonzero; j++) cdta->aliaswgt[j] = 0; |
---|
1934 | for (j = 1; j <= nonzero; j++) |
---|
1935 | cdta->aliaswgt[(int) (nonzero*randum(& adef->boot)) + 1]++; |
---|
1936 | |
---|
1937 | j = 0; |
---|
1938 | cdta->wgtsum = 0; |
---|
1939 | for (i = 1; i <= rdta->sites; i++) { |
---|
1940 | if (rdta->wgt[i] > 0) |
---|
1941 | cdta->wgtsum += (rdta->wgt2[i] = rdta->wgt[i] * cdta->aliaswgt[++j]); |
---|
1942 | else |
---|
1943 | rdta->wgt2[i] = 0; |
---|
1944 | } |
---|
1945 | } /* makeboot */ |
---|
1946 | |
---|
1947 | |
---|
1948 | void sitesort (rawdata *rdta, cruncheddata *cdta) |
---|
1949 | /* Shell sort keeping sites with identical residues and weights in |
---|
1950 | * the original order (i.e., a stable sort). |
---|
1951 | * The index created in cdta->alias is 1 based. The |
---|
1952 | * sitecombcrunch routine packs it to a 0 based index. |
---|
1953 | */ |
---|
1954 | { /* sitesort */ |
---|
1955 | int gap, i, j, jj, jg, k, n, nsp; |
---|
1956 | int *index, *category; |
---|
1957 | boolean flip, tied; |
---|
1958 | yType **data; |
---|
1959 | |
---|
1960 | index = cdta->alias; |
---|
1961 | category = rdta->sitecat; |
---|
1962 | data = rdta->y; |
---|
1963 | n = rdta->sites; |
---|
1964 | nsp = rdta->numsp; |
---|
1965 | |
---|
1966 | for (gap = n / 2; gap > 0; gap /= 2) { |
---|
1967 | for (i = gap + 1; i <= n; i++) { |
---|
1968 | j = i - gap; |
---|
1969 | |
---|
1970 | do { |
---|
1971 | jj = index[j]; |
---|
1972 | jg = index[j+gap]; |
---|
1973 | flip = (category[jj] > category[jg]); |
---|
1974 | tied = (category[jj] == category[jg]); |
---|
1975 | for (k = 1; (k <= nsp) && tied; k++) { |
---|
1976 | flip = (data[k][jj] > data[k][jg]); |
---|
1977 | tied = (data[k][jj] == data[k][jg]); |
---|
1978 | } |
---|
1979 | if (flip) { |
---|
1980 | index[j] = jg; |
---|
1981 | index[j+gap] = jj; |
---|
1982 | j -= gap; |
---|
1983 | } |
---|
1984 | } while (flip && (j > 0)); |
---|
1985 | |
---|
1986 | } /* for (i ... */ |
---|
1987 | } /* for (gap ... */ |
---|
1988 | } /* sitesort */ |
---|
1989 | |
---|
1990 | |
---|
1991 | void sitecombcrunch (rawdata *rdta, cruncheddata *cdta) |
---|
1992 | /* combine sites that have identical patterns (and nonzero weight) */ |
---|
1993 | { /* sitecombcrunch */ |
---|
1994 | int i, sitei, j, sitej, k; |
---|
1995 | boolean tied; |
---|
1996 | |
---|
1997 | i = 0; |
---|
1998 | cdta->alias[0] = cdta->alias[1]; |
---|
1999 | cdta->aliaswgt[0] = 0; |
---|
2000 | |
---|
2001 | for (j = 1; j <= rdta->sites; j++) { |
---|
2002 | sitei = cdta->alias[i]; |
---|
2003 | sitej = cdta->alias[j]; |
---|
2004 | tied = (rdta->sitecat[sitei] == rdta->sitecat[sitej]); |
---|
2005 | |
---|
2006 | for (k = 1; tied && (k <= rdta->numsp); k++) |
---|
2007 | tied = (rdta->y[k][sitei] == rdta->y[k][sitej]); |
---|
2008 | |
---|
2009 | if (tied) { |
---|
2010 | cdta->aliaswgt[i] += rdta->wgt2[sitej]; |
---|
2011 | } |
---|
2012 | else { |
---|
2013 | if (cdta->aliaswgt[i] > 0) i++; |
---|
2014 | cdta->aliaswgt[i] = rdta->wgt2[sitej]; |
---|
2015 | cdta->alias[i] = sitej; |
---|
2016 | } |
---|
2017 | } |
---|
2018 | |
---|
2019 | cdta->endsite = i; |
---|
2020 | if (cdta->aliaswgt[i] > 0) cdta->endsite++; |
---|
2021 | } /* sitecombcrunch */ |
---|
2022 | |
---|
2023 | |
---|
2024 | boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
2025 | /* make up weights vector to avoid duplicate computations */ |
---|
2026 | { /* makeweights */ |
---|
2027 | int i; |
---|
2028 | |
---|
2029 | if (adef->boot) makeboot(adef, rdta, cdta); |
---|
2030 | else for (i = 1; i <= rdta->sites; i++) rdta->wgt2[i] = rdta->wgt[i]; |
---|
2031 | |
---|
2032 | for (i = 1; i <= rdta->sites; i++) cdta->alias[i] = i; |
---|
2033 | sitesort(rdta, cdta); |
---|
2034 | sitecombcrunch(rdta, cdta); |
---|
2035 | |
---|
2036 | printf("Total weight of positions in analysis = %d\n", cdta->wgtsum); |
---|
2037 | printf("There are %d distinct data patterns (columns)\n\n", cdta->endsite); |
---|
2038 | |
---|
2039 | return TRUE; |
---|
2040 | } /* makeweights */ |
---|
2041 | |
---|
2042 | |
---|
2043 | |
---|
2044 | |
---|
2045 | |
---|
2046 | |
---|
2047 | boolean makevalues (rawdata *rdta, cruncheddata *cdta) |
---|
2048 | /* set up fractional likelihoods at tips */ |
---|
2049 | { /* makevalues */ |
---|
2050 | double temp, wtemp; |
---|
2051 | int i, j; |
---|
2052 | |
---|
2053 | for (i = 1; i <= rdta->numsp; i++) { /* Pack and move tip data */ |
---|
2054 | for (j = 0; j < cdta->endsite; j++) { |
---|
2055 | rdta->y[i-1][j] = rdta->y[i][cdta->alias[j]]; |
---|
2056 | } |
---|
2057 | } |
---|
2058 | |
---|
2059 | for (j = 0; j < cdta->endsite; j++) { |
---|
2060 | cdta->patcat[j] = i = rdta->sitecat[cdta->alias[j]]; |
---|
2061 | cdta->patrat[j] = temp = rdta->catrat[i]; |
---|
2062 | cdta->wr[j] = wtemp = temp * cdta->aliaswgt[j]; |
---|
2063 | cdta->wr2[j] = temp * wtemp; |
---|
2064 | } |
---|
2065 | |
---|
2066 | |
---|
2067 | |
---|
2068 | return TRUE; |
---|
2069 | } /* makevalues */ |
---|
2070 | |
---|
2071 | |
---|
2072 | boolean empiricalfreqs (rawdata *rdta, cruncheddata *cdta) |
---|
2073 | /* Get empirical base frequencies from the data */ |
---|
2074 | { /* empiricalfreqs */ |
---|
2075 | double sum, suma, sumc, sumg, sumt, wj, fa, fc, fg, ft; |
---|
2076 | int i, j, k, code; |
---|
2077 | yType *yptr; |
---|
2078 | |
---|
2079 | rdta->freqa = 0.25; |
---|
2080 | rdta->freqc = 0.25; |
---|
2081 | rdta->freqg = 0.25; |
---|
2082 | rdta->freqt = 0.25; |
---|
2083 | for (k = 1; k <= 8; k++) { |
---|
2084 | suma = 0.0; |
---|
2085 | sumc = 0.0; |
---|
2086 | sumg = 0.0; |
---|
2087 | sumt = 0.0; |
---|
2088 | for (i = 0; i < rdta->numsp; i++) { |
---|
2089 | yptr = rdta->y[i]; |
---|
2090 | for (j = 0; j < cdta->endsite; j++) { |
---|
2091 | code = *yptr++; |
---|
2092 | fa = rdta->freqa * ( code & 1); |
---|
2093 | fc = rdta->freqc * ((code >> 1) & 1); |
---|
2094 | fg = rdta->freqg * ((code >> 2) & 1); |
---|
2095 | ft = rdta->freqt * ((code >> 3) & 1); |
---|
2096 | wj = cdta->aliaswgt[j] / (fa + fc + fg + ft); |
---|
2097 | suma += wj * fa; |
---|
2098 | sumc += wj * fc; |
---|
2099 | sumg += wj * fg; |
---|
2100 | sumt += wj * ft; |
---|
2101 | } |
---|
2102 | } |
---|
2103 | sum = suma + sumc + sumg + sumt; |
---|
2104 | rdta->freqa = suma / sum; |
---|
2105 | rdta->freqc = sumc / sum; |
---|
2106 | rdta->freqg = sumg / sum; |
---|
2107 | rdta->freqt = sumt / sum; |
---|
2108 | } |
---|
2109 | |
---|
2110 | return TRUE; |
---|
2111 | } /* empiricalfreqs */ |
---|
2112 | |
---|
2113 | |
---|
2114 | void reportfreqs (analdef *adef, rawdata *rdta) |
---|
2115 | { /* reportfreqs */ |
---|
2116 | double suma, sumb; |
---|
2117 | |
---|
2118 | if (adef->empf) printf("Empirical "); |
---|
2119 | printf("Base Frequencies:\n\n"); |
---|
2120 | printf(" A %10.5f\n", rdta->freqa); |
---|
2121 | printf(" C %10.5f\n", rdta->freqc); |
---|
2122 | printf(" G %10.5f\n", rdta->freqg); |
---|
2123 | printf(" T(U) %10.5f\n\n", rdta->freqt); |
---|
2124 | rdta->freqr = rdta->freqa + rdta->freqg; |
---|
2125 | rdta->invfreqr = 1.0/rdta->freqr; |
---|
2126 | rdta->freqar = rdta->freqa * rdta->invfreqr; |
---|
2127 | rdta->freqgr = rdta->freqg * rdta->invfreqr; |
---|
2128 | rdta->freqy = rdta->freqc + rdta->freqt; |
---|
2129 | rdta->invfreqy = 1.0/rdta->freqy; |
---|
2130 | rdta->freqcy = rdta->freqc * rdta->invfreqy; |
---|
2131 | rdta->freqty = rdta->freqt * rdta->invfreqy; |
---|
2132 | printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio); |
---|
2133 | suma = rdta->ttratio*rdta->freqr*rdta->freqy |
---|
2134 | - (rdta->freqa*rdta->freqg + rdta->freqc*rdta->freqt); |
---|
2135 | sumb = rdta->freqa*rdta->freqgr + rdta->freqc*rdta->freqty; |
---|
2136 | rdta->xi = suma/(suma+sumb); |
---|
2137 | rdta->xv = 1.0 - rdta->xi; |
---|
2138 | if (rdta->xi <= 0.0) { |
---|
2139 | printf("WARNING: This transition/transversion ratio\n"); |
---|
2140 | printf(" is impossible with these base frequencies!\n"); |
---|
2141 | printf("Transition/transversion parameter reset\n\n"); |
---|
2142 | rdta->xi = 0.000001; |
---|
2143 | rdta->xv = 1.0 - rdta->xi; |
---|
2144 | rdta->ttratio = (sumb * rdta->xi / rdta->xv |
---|
2145 | + rdta->freqa * rdta->freqg |
---|
2146 | + rdta->freqc * rdta->freqt) |
---|
2147 | / (rdta->freqr * rdta->freqy); |
---|
2148 | printf("Transition/transversion ratio = %10.6f\n\n", rdta->ttratio); |
---|
2149 | } |
---|
2150 | printf("(Transition/transversion parameter = %10.6f)\n\n", |
---|
2151 | rdta->xi/rdta->xv); |
---|
2152 | rdta->fracchange = 2.0 * rdta->xi * (rdta->freqa * rdta->freqgr |
---|
2153 | + rdta->freqc * rdta->freqty) |
---|
2154 | + rdta->xv * (1.0 - rdta->freqa * rdta->freqa |
---|
2155 | - rdta->freqc * rdta->freqc |
---|
2156 | - rdta->freqg * rdta->freqg |
---|
2157 | - rdta->freqt * rdta->freqt); |
---|
2158 | } /* reportfreqs */ |
---|
2159 | |
---|
2160 | |
---|
2161 | boolean linkdata2tree (rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
2162 | /* Link data array to the tree tips */ |
---|
2163 | { /* linkdata2tree */ |
---|
2164 | int i; |
---|
2165 | int j; |
---|
2166 | yType *tip; |
---|
2167 | int ref[EQUALITIES]; |
---|
2168 | tr->rdta = rdta; |
---|
2169 | tr->cdta = cdta; |
---|
2170 | |
---|
2171 | for(i = 0; i < EQUALITIES; i++) |
---|
2172 | ref[i] = 0; |
---|
2173 | equalities = 0; |
---|
2174 | |
---|
2175 | |
---|
2176 | for (i = 1; i <= tr->mxtips; i++) { /* Associate data with tips */ |
---|
2177 | tr->nodep[i]->tip = &(rdta->y[i-1][0]); |
---|
2178 | tip = tr->nodep[i]->tip; |
---|
2179 | for(j = 0; j < tr->cdta->endsite; j++) |
---|
2180 | { |
---|
2181 | if(ref[tip[j]] == 0) |
---|
2182 | ref[tip[j]] = 1; |
---|
2183 | } |
---|
2184 | } |
---|
2185 | |
---|
2186 | for(i = 0; i < EQUALITIES; i++) |
---|
2187 | { |
---|
2188 | if(ref[i] == 1) |
---|
2189 | { |
---|
2190 | map[i] = equalities++; |
---|
2191 | } |
---|
2192 | else |
---|
2193 | map[i] = -1; |
---|
2194 | } |
---|
2195 | |
---|
2196 | map2 = (char *)malloc(equalities * sizeof(char)); |
---|
2197 | |
---|
2198 | j = 0; |
---|
2199 | |
---|
2200 | for(i = 0; i < EQUALITIES; i++) |
---|
2201 | { |
---|
2202 | if(ref[i] == 1) |
---|
2203 | { |
---|
2204 | map2[j] = i; |
---|
2205 | j++; |
---|
2206 | } |
---|
2207 | } |
---|
2208 | |
---|
2209 | |
---|
2210 | |
---|
2211 | return TRUE; |
---|
2212 | } /* linkdata2tree */ |
---|
2213 | |
---|
2214 | |
---|
2215 | xarray *setupxarray (int npat) |
---|
2216 | { /* setupxarray */ |
---|
2217 | xarray *x; |
---|
2218 | likelivector *data; |
---|
2219 | |
---|
2220 | x = (xarray *) Malloc(sizeof(xarray)); |
---|
2221 | if (x) { |
---|
2222 | data = (likelivector *) Malloc(npat * sizeof(likelivector)); |
---|
2223 | if (data) { |
---|
2224 | x->lv = data; |
---|
2225 | x->prev = x->next = x; |
---|
2226 | x->owner = (node *) NULL; |
---|
2227 | } |
---|
2228 | else { |
---|
2229 | Free(x); |
---|
2230 | return (xarray *) NULL; |
---|
2231 | } |
---|
2232 | } |
---|
2233 | return x; |
---|
2234 | } /* setupxarray */ |
---|
2235 | |
---|
2236 | |
---|
2237 | boolean linkxarray (int req, int min, int npat, |
---|
2238 | xarray **freexptr, xarray **usedxptr) |
---|
2239 | /* Link a set of xarrays */ |
---|
2240 | { /* linkxarray */ |
---|
2241 | xarray *first, *prev, *x; |
---|
2242 | int i; |
---|
2243 | |
---|
2244 | first = prev = (xarray *) NULL; |
---|
2245 | i = 0; |
---|
2246 | |
---|
2247 | do { |
---|
2248 | x = setupxarray(npat); |
---|
2249 | if (x) { |
---|
2250 | if (! first) first = x; |
---|
2251 | else { |
---|
2252 | prev->next = x; |
---|
2253 | x->prev = prev; |
---|
2254 | } |
---|
2255 | prev = x; |
---|
2256 | i++; |
---|
2257 | } |
---|
2258 | else { |
---|
2259 | printf("ERROR: Failure to get requested xarray memory\n"); |
---|
2260 | if (i < min) return FALSE; |
---|
2261 | } |
---|
2262 | } while ((i < req) && x); |
---|
2263 | |
---|
2264 | if (first) { |
---|
2265 | first->prev = prev; |
---|
2266 | prev->next = first; |
---|
2267 | } |
---|
2268 | |
---|
2269 | *freexptr = first; |
---|
2270 | *usedxptr = (xarray *) NULL; |
---|
2271 | |
---|
2272 | return TRUE; |
---|
2273 | } /* linkxarray */ |
---|
2274 | |
---|
2275 | |
---|
2276 | boolean setupnodex (tree *tr) |
---|
2277 | { /* setupnodex */ |
---|
2278 | nodeptr p; |
---|
2279 | int i; |
---|
2280 | |
---|
2281 | for (i = tr->mxtips + 1; (i <= 2*(tr->mxtips) - 2); i++) { |
---|
2282 | p = tr->nodep[i]; |
---|
2283 | if (! (p->x = setupxarray(tr->cdta->endsite))) { |
---|
2284 | printf("ERROR: Failure to get internal node xarray memory\n"); |
---|
2285 | return FALSE; |
---|
2286 | } |
---|
2287 | } |
---|
2288 | |
---|
2289 | return TRUE; |
---|
2290 | } /* setupnodex */ |
---|
2291 | |
---|
2292 | |
---|
2293 | xarray *getxtip (nodeptr p) |
---|
2294 | { /* getxtip */ |
---|
2295 | xarray *new; |
---|
2296 | boolean splice; |
---|
2297 | |
---|
2298 | if (! p) return (xarray *) NULL; |
---|
2299 | |
---|
2300 | splice = FALSE; |
---|
2301 | |
---|
2302 | if (p->x) { /* array is there; move to tail of list */ |
---|
2303 | new = p->x; |
---|
2304 | if (new == new->prev) ; /* linked to self; leave it */ |
---|
2305 | else if (new == usedxtip) usedxtip = usedxtip->next; /* at head */ |
---|
2306 | else if (new == usedxtip->prev) ; /* already at tail */ |
---|
2307 | else { /* move to tail of list */ |
---|
2308 | new->prev->next = new->next; |
---|
2309 | new->next->prev = new->prev; |
---|
2310 | splice = TRUE; |
---|
2311 | } |
---|
2312 | } |
---|
2313 | |
---|
2314 | else if (freextip) { /* take from unused list */ |
---|
2315 | p->x = new = freextip; |
---|
2316 | new->owner = p; |
---|
2317 | if (new->prev != new) { /* not only member of freelist */ |
---|
2318 | new->prev->next = new->next; |
---|
2319 | new->next->prev = new->prev; |
---|
2320 | freextip = new->next; |
---|
2321 | } |
---|
2322 | else |
---|
2323 | freextip = (xarray *) NULL; |
---|
2324 | |
---|
2325 | splice = TRUE; |
---|
2326 | } |
---|
2327 | |
---|
2328 | else if (usedxtip) { /* take from head of used list */ |
---|
2329 | usedxtip->owner->x = (xarray *) NULL; |
---|
2330 | p->x = new = usedxtip; |
---|
2331 | new->owner = p; |
---|
2332 | usedxtip = usedxtip->next; |
---|
2333 | } |
---|
2334 | |
---|
2335 | else { |
---|
2336 | printf("ERROR: Unable to locate memory for tip %d.\n", p->number); |
---|
2337 | return (xarray *) NULL; |
---|
2338 | } |
---|
2339 | |
---|
2340 | if (splice) { |
---|
2341 | if (usedxtip) { /* list is not empty */ |
---|
2342 | usedxtip->prev->next = new; |
---|
2343 | new->prev = usedxtip->prev; |
---|
2344 | usedxtip->prev = new; |
---|
2345 | new->next = usedxtip; |
---|
2346 | } |
---|
2347 | else |
---|
2348 | usedxtip = new->prev = new->next = new; |
---|
2349 | } |
---|
2350 | |
---|
2351 | return new; |
---|
2352 | } /* getxtip */ |
---|
2353 | |
---|
2354 | |
---|
2355 | xarray *getxnode (nodeptr p) |
---|
2356 | /* Ensure that internal node p has memory */ |
---|
2357 | { /* getxnode */ |
---|
2358 | nodeptr s; |
---|
2359 | |
---|
2360 | if (! (p->x)) { /* Move likelihood array on this node to sector p */ |
---|
2361 | if ((s = p->next)->x || (s = s->next)->x) { |
---|
2362 | p->x = s->x; |
---|
2363 | s->x = (xarray *) NULL; |
---|
2364 | } |
---|
2365 | else { |
---|
2366 | printf("ERROR: Unable to locate memory at node %d.\n", p->number); |
---|
2367 | exit(1); |
---|
2368 | } |
---|
2369 | } |
---|
2370 | return p->x; |
---|
2371 | } /* getxnode */ |
---|
2372 | |
---|
2373 | |
---|
2374 | /* AxML modification start*/ |
---|
2375 | /* optimized version of function newview(), implementing homogeneous and */ |
---|
2376 | /* partially heterogeneous subtree equality vectors */ |
---|
2377 | |
---|
2378 | |
---|
2379 | printhq(long *hq, tree *tr) |
---|
2380 | { |
---|
2381 | int i; |
---|
2382 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
2383 | fprintf(stderr,"%ld ", hq[i]); |
---|
2384 | fprintf(stderr,"\n"); |
---|
2385 | } |
---|
2386 | |
---|
2387 | |
---|
2388 | |
---|
2389 | boolean newviewOptimized(tree *tr, nodeptr p) /* Update likelihoods at node */ |
---|
2390 | { /* newviewOptimized */ |
---|
2391 | double zq, lzq, xvlzq, zr, lzr, xvlzr; |
---|
2392 | nodeptr q, r, realP; |
---|
2393 | likelivector *lp, *lq, *lr; |
---|
2394 | homType *mlvP; |
---|
2395 | int i, j; |
---|
2396 | |
---|
2397 | if (p->tip) { /* Make sure that data are at tip */ |
---|
2398 | |
---|
2399 | if (p->x) return TRUE; /* They are already there */ |
---|
2400 | |
---|
2401 | if (! getxtip(p)) return FALSE; /* They are not, so get memory */ |
---|
2402 | |
---|
2403 | return TRUE; |
---|
2404 | } |
---|
2405 | |
---|
2406 | /* Internal node needs update */ |
---|
2407 | |
---|
2408 | q = p->next->back; |
---|
2409 | r = p->next->next->back; |
---|
2410 | |
---|
2411 | while ((! p->x) || (! q->x) || (! r->x)) { |
---|
2412 | /* rekursiver aufruf */ |
---|
2413 | if (! q->x) if (! newviewOptimized(tr, q)) return FALSE; |
---|
2414 | if (! r->x) if (! newviewOptimized(tr, r)) return FALSE; |
---|
2415 | if (! p->x) if (! getxnode(p)) return FALSE; |
---|
2416 | } |
---|
2417 | |
---|
2418 | |
---|
2419 | |
---|
2420 | lp = p->x->lv; |
---|
2421 | |
---|
2422 | lq = q->x->lv; |
---|
2423 | zq = q->z; |
---|
2424 | lzq = (zq > zmin) ? log(zq) : log(zmin); |
---|
2425 | xvlzq = tr->rdta->xv * lzq; |
---|
2426 | |
---|
2427 | lr = r->x->lv; |
---|
2428 | zr = r->z; |
---|
2429 | lzr = (zr > zmin) ? log(zr) : log(zmin); |
---|
2430 | xvlzr = tr->rdta->xv * lzr; |
---|
2431 | |
---|
2432 | { double zzq, zvq, |
---|
2433 | zzr, zvr, rp; |
---|
2434 | double zzqtable[maxcategories+1], zvqtable[maxcategories+1], |
---|
2435 | zzrtable[maxcategories+1], zvrtable[maxcategories+1], |
---|
2436 | *zzqptr, *zvqptr, *zzrptr, *zvrptr, *rptr; |
---|
2437 | double fxqr, fxqy, fxqn, sumaq, sumgq, sumcq, sumtq, |
---|
2438 | fxrr, fxry, fxrn, ki, tempi, tempj; |
---|
2439 | register char *s1; |
---|
2440 | register char *s2; |
---|
2441 | register char *eqptr; |
---|
2442 | int *cptr; |
---|
2443 | int cat; |
---|
2444 | homType *mlvR, *mlvQ; |
---|
2445 | poutsa calc[EQUALITIES][maxcategories],calcR[EQUALITIES][maxcategories], *calcptr, *calcptrR; |
---|
2446 | long ref, ref2, mexp; |
---|
2447 | nodeptr realQ, realR; |
---|
2448 | |
---|
2449 | |
---|
2450 | rptr = &(tr->rdta->catrat[1]); |
---|
2451 | zzqptr = &(zzqtable[1]); |
---|
2452 | zvqptr = &(zvqtable[1]); |
---|
2453 | zzrptr = &(zzrtable[1]); |
---|
2454 | zvrptr = &(zvrtable[1]); |
---|
2455 | /* get equality vectors of p, q, and r */ |
---|
2456 | |
---|
2457 | realP = tr->nodep[p->number]; |
---|
2458 | |
---|
2459 | |
---|
2460 | realQ = tr->nodep[q->number]; |
---|
2461 | realR = tr->nodep[r->number]; |
---|
2462 | |
---|
2463 | eqptr = realP->equalityVector; |
---|
2464 | s1 = realQ->equalityVector; |
---|
2465 | s2 = realR->equalityVector; |
---|
2466 | |
---|
2467 | |
---|
2468 | for (j = 1; j <= tr->rdta->categs; j++) |
---|
2469 | { |
---|
2470 | ki = *rptr++; |
---|
2471 | *zzqptr++ = zzq = exp(ki * lzq); |
---|
2472 | *zvqptr++ = zvq = exp(ki * xvlzq); |
---|
2473 | *zzrptr++ = zzr = exp(ki * lzr); |
---|
2474 | *zvrptr++ = zvr = exp(ki * xvlzr); |
---|
2475 | |
---|
2476 | for(i = 0; i < equalities; i++) |
---|
2477 | { |
---|
2478 | |
---|
2479 | mlvQ = &realQ->mlv[i][j]; |
---|
2480 | |
---|
2481 | if(mlvQ->set) |
---|
2482 | { |
---|
2483 | calcptr = &calc[i][j]; |
---|
2484 | fxqr = tr->rdta->freqa * mlvQ->a + tr->rdta->freqg * mlvQ->g; |
---|
2485 | fxqy = tr->rdta->freqc * mlvQ->c + tr->rdta->freqt * mlvQ->t; |
---|
2486 | fxqn = fxqr + fxqy; |
---|
2487 | tempi = fxqr * tr->rdta->invfreqr; |
---|
2488 | tempj = zvq * (tempi-fxqn) + fxqn; |
---|
2489 | calcptr->sumaq = zzq * (mlvQ->a - tempi) + tempj; |
---|
2490 | calcptr->sumgq = zzq * (mlvQ->g - tempi) + tempj; |
---|
2491 | tempi = fxqy * tr->rdta->invfreqy; |
---|
2492 | tempj = zvq * (tempi-fxqn) + fxqn; |
---|
2493 | calcptr->sumcq = zzq * (mlvQ->c - tempi) + tempj; |
---|
2494 | calcptr->sumtq = zzq * (mlvQ->t - tempi) + tempj; |
---|
2495 | calcptr->exp = mlvQ->exp; |
---|
2496 | } |
---|
2497 | |
---|
2498 | mlvR = &realR->mlv[i][j]; |
---|
2499 | |
---|
2500 | if(mlvR->set) |
---|
2501 | { |
---|
2502 | calcptrR = &calcR[i][j]; |
---|
2503 | fxrr = tr->rdta->freqa * mlvR->a + tr->rdta->freqg * mlvR->g; |
---|
2504 | fxry = tr->rdta->freqc * mlvR->c + tr->rdta->freqt * mlvR->t; |
---|
2505 | fxrn = fxrr + fxry; |
---|
2506 | tempi = fxrr * tr->rdta->invfreqr; |
---|
2507 | tempj = zvr * (tempi-fxrn) + fxrn; |
---|
2508 | calcptrR->sumaq = zzr * (mlvR->a - tempi) + tempj; |
---|
2509 | calcptrR->sumgq = zzr * (mlvR->g - tempi) + tempj; |
---|
2510 | tempi = fxry * tr->rdta->invfreqy; |
---|
2511 | tempj = zvr * (tempi-fxrn) + fxrn; |
---|
2512 | calcptrR->sumcq = zzr * (mlvR->c - tempi) + tempj; |
---|
2513 | calcptrR->sumtq = zzr * (mlvR->t - tempi) + tempj; |
---|
2514 | calcptrR->exp = mlvR->exp; |
---|
2515 | } |
---|
2516 | |
---|
2517 | if(mlvR->set && mlvQ->set) |
---|
2518 | { |
---|
2519 | mlvP = &realP->mlv[i][j]; |
---|
2520 | |
---|
2521 | mlvP->a = calcptrR->sumaq * calcptr->sumaq; |
---|
2522 | mlvP->g = calcptrR->sumgq * calcptr->sumgq; |
---|
2523 | mlvP->c = calcptrR->sumcq * calcptr->sumcq; |
---|
2524 | mlvP->t = calcptrR->sumtq * calcptr->sumtq; |
---|
2525 | mlvP->exp = calcptrR->exp + calcptr->exp; |
---|
2526 | if (mlvP->a < minlikelihood && mlvP->g < minlikelihood |
---|
2527 | && mlvP->c < minlikelihood && mlvP->t < minlikelihood) { |
---|
2528 | mlvP->a *= twotothe256; |
---|
2529 | mlvP->g *= twotothe256; |
---|
2530 | mlvP->c *= twotothe256; |
---|
2531 | mlvP->t *= twotothe256; |
---|
2532 | mlvP->exp += 1; |
---|
2533 | } |
---|
2534 | |
---|
2535 | mlvP->set = TRUE; |
---|
2536 | } |
---|
2537 | else |
---|
2538 | mlvP->set = FALSE; |
---|
2539 | |
---|
2540 | } |
---|
2541 | } |
---|
2542 | |
---|
2543 | cptr = &(tr->cdta->patcat[0]); |
---|
2544 | |
---|
2545 | if(r->tip && q->tip) |
---|
2546 | { |
---|
2547 | for (i = 0; i < tr->cdta->endsite; i++) |
---|
2548 | { |
---|
2549 | ref = *s2++; |
---|
2550 | ref2 = *s1++; |
---|
2551 | cat = *cptr++; |
---|
2552 | |
---|
2553 | |
---|
2554 | if(ref2 != ref) |
---|
2555 | { |
---|
2556 | *eqptr++ = -1; |
---|
2557 | calcptrR = &calcR[ref][cat]; |
---|
2558 | calcptr = &calc[ref2][cat]; |
---|
2559 | lp->a = calcptrR->sumaq * calcptr->sumaq; |
---|
2560 | lp->g = calcptrR->sumgq * calcptr->sumgq; |
---|
2561 | lp->c = calcptrR->sumcq * calcptr->sumcq; |
---|
2562 | lp->t = calcptrR->sumtq * calcptr->sumtq; |
---|
2563 | lp->exp = calcptrR->exp + calcptr->exp; |
---|
2564 | if (lp->a < minlikelihood && lp->g < minlikelihood |
---|
2565 | && lp->c < minlikelihood && lp->t < minlikelihood) { |
---|
2566 | lp->a *= twotothe256; |
---|
2567 | lp->g *= twotothe256; |
---|
2568 | lp->c *= twotothe256; |
---|
2569 | lp->t *= twotothe256; |
---|
2570 | lp->exp += 1; |
---|
2571 | } |
---|
2572 | |
---|
2573 | } |
---|
2574 | else |
---|
2575 | { |
---|
2576 | *eqptr++ = ref; |
---|
2577 | } |
---|
2578 | lp++; |
---|
2579 | |
---|
2580 | } |
---|
2581 | } |
---|
2582 | else |
---|
2583 | { |
---|
2584 | |
---|
2585 | |
---|
2586 | for (i = 0; i < tr->cdta->endsite; i++) |
---|
2587 | { |
---|
2588 | ref = *s2++; |
---|
2589 | ref2 = *s1++; |
---|
2590 | cat = *cptr++; |
---|
2591 | |
---|
2592 | if(ref2 >= 0) |
---|
2593 | { |
---|
2594 | if(ref2 == ref) |
---|
2595 | { |
---|
2596 | *eqptr++ = ref; |
---|
2597 | goto incTip; |
---|
2598 | } |
---|
2599 | calcptr = &calc[ref2][cat]; |
---|
2600 | sumaq = calcptr->sumaq; |
---|
2601 | sumgq = calcptr->sumgq; |
---|
2602 | sumcq = calcptr->sumcq; |
---|
2603 | sumtq = calcptr->sumtq; |
---|
2604 | mexp = calcptr->exp; |
---|
2605 | |
---|
2606 | } |
---|
2607 | else |
---|
2608 | { |
---|
2609 | zzq = zzqtable[cat]; |
---|
2610 | zvq = zvqtable[cat]; |
---|
2611 | fxqr = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
2612 | fxqy = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
2613 | fxqn = fxqr + fxqy; |
---|
2614 | tempi = fxqr * tr->rdta->invfreqr; |
---|
2615 | tempj = zvq * (tempi-fxqn) + fxqn; |
---|
2616 | sumaq = zzq * (lq->a - tempi) + tempj; |
---|
2617 | sumgq = zzq * (lq->g - tempi) + tempj; |
---|
2618 | tempi = fxqy * tr->rdta->invfreqy; |
---|
2619 | tempj = zvq * (tempi-fxqn) + fxqn; |
---|
2620 | sumcq = zzq * (lq->c - tempi) + tempj; |
---|
2621 | sumtq = zzq * (lq->t - tempi) + tempj; |
---|
2622 | mexp = lq->exp; |
---|
2623 | } |
---|
2624 | |
---|
2625 | |
---|
2626 | if(ref >= 0) |
---|
2627 | { |
---|
2628 | calcptr = &calcR[ref][cat]; |
---|
2629 | lp->a = sumaq * calcptr->sumaq; |
---|
2630 | lp->g = sumgq * calcptr->sumgq; |
---|
2631 | lp->c = sumcq * calcptr->sumcq; |
---|
2632 | lp->t = sumtq * calcptr->sumtq; |
---|
2633 | lp->exp = mexp + calcptr->exp; |
---|
2634 | } |
---|
2635 | else |
---|
2636 | { |
---|
2637 | zzr = zzrtable[cat]; |
---|
2638 | zvr = zvrtable[cat]; |
---|
2639 | fxrr = tr->rdta->freqa * lr->a + tr->rdta->freqg * lr->g; |
---|
2640 | fxry = tr->rdta->freqc * lr->c + tr->rdta->freqt * lr->t; |
---|
2641 | fxrn = fxrr + fxry; |
---|
2642 | tempi = fxrr * tr->rdta->invfreqr; |
---|
2643 | tempj = zvr * (tempi-fxrn) + fxrn; |
---|
2644 | lp->a = sumaq * (zzr * (lr->a - tempi) + tempj); |
---|
2645 | lp->g = sumgq * (zzr * (lr->g - tempi) + tempj); |
---|
2646 | tempi = fxry * tr->rdta->invfreqy; |
---|
2647 | tempj = zvr * (tempi-fxrn) + fxrn; |
---|
2648 | lp->c = sumcq * (zzr * (lr->c - tempi) + tempj); |
---|
2649 | lp->t = sumtq * (zzr * (lr->t - tempi) + tempj); |
---|
2650 | lp->exp = mexp + lr->exp; |
---|
2651 | } |
---|
2652 | |
---|
2653 | if (lp->a < minlikelihood && lp->g < minlikelihood |
---|
2654 | && lp->c < minlikelihood && lp->t < minlikelihood) { |
---|
2655 | lp->a *= twotothe256; |
---|
2656 | lp->g *= twotothe256; |
---|
2657 | lp->c *= twotothe256; |
---|
2658 | lp->t *= twotothe256; |
---|
2659 | lp->exp += 1; |
---|
2660 | } |
---|
2661 | |
---|
2662 | *eqptr++ = -1; |
---|
2663 | incTip: |
---|
2664 | lp++; |
---|
2665 | lq++; |
---|
2666 | lr++; |
---|
2667 | } |
---|
2668 | } |
---|
2669 | } |
---|
2670 | |
---|
2671 | |
---|
2672 | return TRUE; |
---|
2673 | } /* newviewOptimized */ |
---|
2674 | |
---|
2675 | /* AxML modification end*/ |
---|
2676 | |
---|
2677 | double evaluate (tree *tr, nodeptr p) |
---|
2678 | { /* evaluate */ |
---|
2679 | double sum, z, lz, xvlz, |
---|
2680 | ki, fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y, |
---|
2681 | suma, sumb, sumc, term; |
---|
2682 | |
---|
2683 | |
---|
2684 | double zz, zv; |
---|
2685 | double zztable[maxcategories+1], zvtable[maxcategories+1], |
---|
2686 | *zzptr, *zvptr; |
---|
2687 | double *log_f, *rptr; |
---|
2688 | likelivector *lp, *lq; |
---|
2689 | homType *mlvP, *mlvQ; |
---|
2690 | nodeptr q, realQ, realP; |
---|
2691 | int i, *wptr, cat, *cptr, j; |
---|
2692 | register char *sQ; |
---|
2693 | register char *sP; |
---|
2694 | long refQ, refP; |
---|
2695 | memP calcP[EQUALITIES][maxcategories], *calcptrP; |
---|
2696 | memQ calcQ[EQUALITIES][maxcategories], *calcptrQ; |
---|
2697 | |
---|
2698 | q = p->back; |
---|
2699 | |
---|
2700 | while ((! p->x) || (! q->x)) { |
---|
2701 | if (! (p->x)) if (! newviewOptimized(tr, p)) return badEval; |
---|
2702 | if (! (q->x)) if (! newviewOptimized(tr, q)) return badEval; |
---|
2703 | } |
---|
2704 | |
---|
2705 | lp = p->x->lv; |
---|
2706 | lq = q->x->lv; |
---|
2707 | |
---|
2708 | z = p->z; |
---|
2709 | if (z < zmin) z = zmin; |
---|
2710 | lz = log(z); |
---|
2711 | xvlz = tr->rdta->xv * lz; |
---|
2712 | |
---|
2713 | rptr = &(tr->rdta->catrat[1]); |
---|
2714 | zzptr = &(zztable[1]); |
---|
2715 | zvptr = &(zvtable[1]); |
---|
2716 | |
---|
2717 | |
---|
2718 | cptr = &(tr->cdta->patcat[0]); |
---|
2719 | wptr = &(tr->cdta->aliaswgt[0]); |
---|
2720 | log_f = tr->log_f; |
---|
2721 | tr->log_f_valid = tr->cdta->endsite; |
---|
2722 | sum = 0.0; |
---|
2723 | |
---|
2724 | realQ = tr->nodep[q->number]; |
---|
2725 | realP = tr->nodep[p->number]; |
---|
2726 | sQ = realQ->equalityVector; |
---|
2727 | sP = realP->equalityVector; |
---|
2728 | |
---|
2729 | |
---|
2730 | for (j = 1; j <= tr->rdta->categs; j++) |
---|
2731 | { |
---|
2732 | ki = *rptr++; |
---|
2733 | *zzptr++ = exp(ki * lz); |
---|
2734 | *zvptr++ = exp(ki * xvlz); |
---|
2735 | |
---|
2736 | for(i = 0; i < equalities; i++) |
---|
2737 | { |
---|
2738 | |
---|
2739 | mlvP = &realP->mlv[i][j]; |
---|
2740 | if(mlvP->set) |
---|
2741 | { |
---|
2742 | calcptrP = &calcP[i][j]; |
---|
2743 | calcptrP->fx1a = tr->rdta->freqa * mlvP->a; |
---|
2744 | calcptrP->fx1g = tr->rdta->freqg * mlvP->g; |
---|
2745 | calcptrP->fx1c = tr->rdta->freqc * mlvP->c; |
---|
2746 | calcptrP->fx1t = tr->rdta->freqt * mlvP->t; |
---|
2747 | calcptrP->sumag = calcptrP->fx1a + calcptrP->fx1g; |
---|
2748 | calcptrP->sumct = calcptrP->fx1c + calcptrP->fx1t; |
---|
2749 | calcptrP->sumagct = calcptrP->sumag + calcptrP->sumct; |
---|
2750 | calcptrP->exp = mlvP->exp; |
---|
2751 | } |
---|
2752 | mlvQ = &realQ->mlv[i][j]; |
---|
2753 | if(mlvQ->set) |
---|
2754 | { |
---|
2755 | calcptrQ = &calcQ[i][j]; |
---|
2756 | calcptrQ->fx2r = tr->rdta->freqa * mlvQ->a + tr->rdta->freqg * mlvQ->g; |
---|
2757 | calcptrQ->fx2y = tr->rdta->freqc * mlvQ->c + tr->rdta->freqt * mlvQ->t; |
---|
2758 | calcptrQ->sumfx2rfx2y = calcptrQ->fx2r + calcptrQ->fx2y; |
---|
2759 | calcptrQ->exp = mlvQ->exp; |
---|
2760 | calcptrQ->a = mlvQ->a; |
---|
2761 | calcptrQ->c = mlvQ->c; |
---|
2762 | calcptrQ->g = mlvQ->g; |
---|
2763 | calcptrQ->t = mlvQ->t; |
---|
2764 | } |
---|
2765 | |
---|
2766 | } |
---|
2767 | } |
---|
2768 | |
---|
2769 | |
---|
2770 | |
---|
2771 | |
---|
2772 | for (i = 0; i < tr->cdta->endsite; i++) { |
---|
2773 | refQ = *sQ++; |
---|
2774 | refP = *sP++; |
---|
2775 | cat = *cptr++; |
---|
2776 | zz = zztable[cat]; |
---|
2777 | zv = zvtable[cat]; |
---|
2778 | if(refP >= 0) |
---|
2779 | { |
---|
2780 | if(refQ >= 0) |
---|
2781 | { |
---|
2782 | calcptrP = &calcP[refP][cat]; |
---|
2783 | calcptrQ = &calcQ[refQ][cat]; |
---|
2784 | suma = calcptrP->fx1a * calcptrQ->a + |
---|
2785 | calcptrP->fx1c * calcptrQ->c + |
---|
2786 | calcptrP->fx1g * calcptrQ->g + calcptrP->fx1t * calcptrQ->t; |
---|
2787 | |
---|
2788 | sumc = calcptrP->sumagct * calcptrQ->sumfx2rfx2y; |
---|
2789 | sumb = calcptrP->sumag * calcptrQ->fx2r * tr->rdta->invfreqr + calcptrP->sumct * calcptrQ->fx2y * tr->rdta->invfreqy; |
---|
2790 | suma -= sumb; |
---|
2791 | sumb -= sumc; |
---|
2792 | term = log(zz * suma + zv * sumb + sumc) + |
---|
2793 | (calcptrP->exp + calcptrQ->exp)*log(minlikelihood); |
---|
2794 | sum += *wptr++ * term; |
---|
2795 | |
---|
2796 | *log_f++ = term; |
---|
2797 | lp++; |
---|
2798 | lq++; |
---|
2799 | } |
---|
2800 | else |
---|
2801 | { |
---|
2802 | calcptrP = &calcP[refP][cat]; |
---|
2803 | suma = calcptrP->fx1a * lq->a + calcptrP->fx1c * lq->c + |
---|
2804 | calcptrP->fx1g * lq->g + calcptrP->fx1t * lq->t; |
---|
2805 | |
---|
2806 | fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
2807 | fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
2808 | |
---|
2809 | sumc = calcptrP->sumagct * (fx2r + fx2y); |
---|
2810 | sumb = calcptrP->sumag * fx2r * tr->rdta->invfreqr + calcptrP->sumct * fx2y * tr->rdta->invfreqy; |
---|
2811 | suma -= sumb; |
---|
2812 | sumb -= sumc; |
---|
2813 | term = log(zz * suma + zv * sumb + sumc) + |
---|
2814 | (calcptrP->exp + lq->exp)*log(minlikelihood); |
---|
2815 | sum += *wptr++ * term; |
---|
2816 | |
---|
2817 | *log_f++ = term; |
---|
2818 | lp++; |
---|
2819 | lq++; |
---|
2820 | } |
---|
2821 | } |
---|
2822 | else |
---|
2823 | { |
---|
2824 | if(refQ >= 0) |
---|
2825 | { |
---|
2826 | calcptrQ = &calcQ[refQ][cat]; |
---|
2827 | fx1a = tr->rdta->freqa * lp->a; |
---|
2828 | fx1g = tr->rdta->freqg * lp->g; |
---|
2829 | fx1c = tr->rdta->freqc * lp->c; |
---|
2830 | fx1t = tr->rdta->freqt * lp->t; |
---|
2831 | suma = fx1a * calcptrQ->a + fx1c * calcptrQ->c + |
---|
2832 | fx1g * calcptrQ->g + fx1t * calcptrQ->t; |
---|
2833 | fx1r = fx1a + fx1g; |
---|
2834 | fx1y = fx1c + fx1t; |
---|
2835 | sumc = (fx1r + fx1y) * calcptrQ->sumfx2rfx2y; |
---|
2836 | sumb = fx1r * calcptrQ->fx2r * tr->rdta->invfreqr + fx1y * calcptrQ->fx2y * tr->rdta->invfreqy; |
---|
2837 | suma -= sumb; |
---|
2838 | sumb -= sumc; |
---|
2839 | term = log(zz * suma + zv * sumb + sumc) + |
---|
2840 | (lp->exp + calcptrQ->exp)*log(minlikelihood); |
---|
2841 | sum += *wptr++ * term; |
---|
2842 | *log_f++ = term; |
---|
2843 | lp++; |
---|
2844 | lq++; |
---|
2845 | } |
---|
2846 | else |
---|
2847 | { |
---|
2848 | fx1a = tr->rdta->freqa * lp->a; |
---|
2849 | fx1g = tr->rdta->freqg * lp->g; |
---|
2850 | fx1c = tr->rdta->freqc * lp->c; |
---|
2851 | fx1t = tr->rdta->freqt * lp->t; |
---|
2852 | suma = fx1a * lq->a + fx1c * lq->c + fx1g * lq->g + fx1t * lq->t; |
---|
2853 | fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
2854 | fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
2855 | fx1r = fx1a + fx1g; |
---|
2856 | fx1y = fx1c + fx1t; |
---|
2857 | sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
2858 | sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy; |
---|
2859 | suma -= sumb; |
---|
2860 | sumb -= sumc; |
---|
2861 | term = log(zz * suma + zv * sumb + sumc) + |
---|
2862 | (lp->exp + lq->exp)*log(minlikelihood); |
---|
2863 | sum += *wptr++ * term; |
---|
2864 | *log_f++ = term; |
---|
2865 | lp++; |
---|
2866 | lq++; |
---|
2867 | } |
---|
2868 | } |
---|
2869 | } |
---|
2870 | |
---|
2871 | |
---|
2872 | tr->likelihood = sum; |
---|
2873 | return sum; |
---|
2874 | } /* evaluate */ |
---|
2875 | |
---|
2876 | |
---|
2877 | long countEQ = 0, countNEQ = 0; |
---|
2878 | |
---|
2879 | double makenewz (tree *tr, nodeptr p, nodeptr q, double z0, int maxiter) |
---|
2880 | { /* makenewz */ |
---|
2881 | likelivector *lp, *lq, *mlv[EQUALITIES][maxcategories]; |
---|
2882 | double *abi, *bci, *sumci, *abptr, *bcptr, *sumcptr; |
---|
2883 | double dlnLidlz, dlnLdlz, d2lnLdlz2, z, zprev, zstep, lz, xvlz, |
---|
2884 | ki, suma, sumb, sumc, ab, bc, inv_Li, t1, t2, |
---|
2885 | fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y; |
---|
2886 | double zzp, zvp; |
---|
2887 | double *wrptr, w, *wr2ptr, *rptr; |
---|
2888 | int i, curvatOK, j , cat, *cptr; |
---|
2889 | memP calcP[EQUALITIES][maxcategories], *calcptrP; |
---|
2890 | memQ calcQ[EQUALITIES][maxcategories], *calcptrQ; |
---|
2891 | homType *mlvP, *mlvQ; |
---|
2892 | register char *sQ; |
---|
2893 | register char *sP; |
---|
2894 | nodeptr realQ, realP; |
---|
2895 | long refQ, refP; |
---|
2896 | double zztable[maxcategories+1], zvtable[maxcategories+1], |
---|
2897 | *zzptr, *zvptr; |
---|
2898 | |
---|
2899 | |
---|
2900 | while ((! p->x) || (! q->x)) { |
---|
2901 | if (! (p->x)) if (! newviewOptimized(tr, p)) return badZ; |
---|
2902 | if (! (q->x)) if (! newviewOptimized(tr, q)) return badZ; |
---|
2903 | } |
---|
2904 | |
---|
2905 | realQ = tr->nodep[q->number]; |
---|
2906 | realP = tr->nodep[p->number]; |
---|
2907 | sQ = realQ->equalityVector; |
---|
2908 | sP = realP->equalityVector; |
---|
2909 | lp = p->x->lv; |
---|
2910 | lq = q->x->lv; |
---|
2911 | |
---|
2912 | |
---|
2913 | |
---|
2914 | { |
---|
2915 | unsigned scratch_size; |
---|
2916 | scratch_size = sizeof(double) * tr->cdta->endsite; |
---|
2917 | if ((abi = (double *) Malloc(scratch_size)) && |
---|
2918 | (bci = (double *) Malloc(scratch_size)) && |
---|
2919 | (sumci = (double *) Malloc(scratch_size))) ; |
---|
2920 | else { |
---|
2921 | printf("ERROR: makenewz unable to obtain space for arrays\n"); |
---|
2922 | return badZ; |
---|
2923 | } |
---|
2924 | |
---|
2925 | } |
---|
2926 | abptr = abi; |
---|
2927 | bcptr = bci; |
---|
2928 | sumcptr = sumci; |
---|
2929 | |
---|
2930 | # ifdef Vectorize |
---|
2931 | # pragma IVDEP |
---|
2932 | # endif |
---|
2933 | |
---|
2934 | |
---|
2935 | |
---|
2936 | for (j = 1; j <= tr->rdta->categs; j++) |
---|
2937 | { |
---|
2938 | for(i = 0; i < equalities; i++) |
---|
2939 | { |
---|
2940 | mlvP = &realP->mlv[i][j]; |
---|
2941 | if(mlvP->set) |
---|
2942 | { |
---|
2943 | calcptrP = &calcP[i][j]; |
---|
2944 | calcptrP->fx1a = tr->rdta->freqa * mlvP->a; |
---|
2945 | calcptrP->fx1g = tr->rdta->freqg * mlvP->g; |
---|
2946 | calcptrP->fx1c = tr->rdta->freqc * mlvP->c; |
---|
2947 | calcptrP->fx1t = tr->rdta->freqt * mlvP->t; |
---|
2948 | calcptrP->sumag = calcptrP->fx1a + calcptrP->fx1g; |
---|
2949 | calcptrP->sumct = calcptrP->fx1c + calcptrP->fx1t; |
---|
2950 | calcptrP->sumagct = calcptrP->sumag + calcptrP->sumct; |
---|
2951 | } |
---|
2952 | mlvQ = &realQ->mlv[i][j]; |
---|
2953 | if(mlvQ->set) |
---|
2954 | { |
---|
2955 | calcptrQ = &calcQ[i][j]; |
---|
2956 | calcptrQ->fx2r = tr->rdta->freqa * mlvQ->a + tr->rdta->freqg * mlvQ->g; |
---|
2957 | calcptrQ->fx2y = tr->rdta->freqc * mlvQ->c + tr->rdta->freqt * mlvQ->t; |
---|
2958 | calcptrQ->sumfx2rfx2y = calcptrQ->fx2r + calcptrQ->fx2y; |
---|
2959 | calcptrQ->a = mlvQ->a; |
---|
2960 | calcptrQ->c = mlvQ->c; |
---|
2961 | calcptrQ->g = mlvQ->g; |
---|
2962 | calcptrQ->t = mlvQ->t; |
---|
2963 | } |
---|
2964 | } |
---|
2965 | } |
---|
2966 | cptr = &(tr->cdta->patcat[0]); |
---|
2967 | |
---|
2968 | for (i = 0; i < tr->cdta->endsite; i++) { |
---|
2969 | refQ = *sQ++; |
---|
2970 | refP = *sP++; |
---|
2971 | |
---|
2972 | cat = *cptr++; |
---|
2973 | |
---|
2974 | if(refP >= 0) |
---|
2975 | { |
---|
2976 | calcptrP = &calcP[refP][cat]; |
---|
2977 | |
---|
2978 | if(refQ >= 0) |
---|
2979 | { |
---|
2980 | |
---|
2981 | calcptrQ = &calcQ[refQ][cat]; |
---|
2982 | suma = calcptrP->fx1a * calcptrQ->a + calcptrP->fx1c * calcptrQ->c + |
---|
2983 | calcptrP->fx1g * calcptrQ->g + calcptrP->fx1t * calcptrQ->t; |
---|
2984 | |
---|
2985 | |
---|
2986 | *sumcptr++ = sumc = calcptrP->sumagct * calcptrQ->sumfx2rfx2y; |
---|
2987 | sumb = calcptrP->sumag * calcptrQ->fx2r * tr->rdta->invfreqr + calcptrP->sumct * calcptrQ->fx2y * tr->rdta->invfreqy; |
---|
2988 | *abptr++ = suma - sumb; |
---|
2989 | *bcptr++ = sumb - sumc; |
---|
2990 | lp++; |
---|
2991 | lq++; |
---|
2992 | } |
---|
2993 | else |
---|
2994 | { |
---|
2995 | suma = calcptrP->fx1a * lq->a + calcptrP->fx1c * lq->c + |
---|
2996 | calcptrP->fx1g * lq->g + calcptrP->fx1t * lq->t; |
---|
2997 | |
---|
2998 | fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
2999 | fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
3000 | |
---|
3001 | *sumcptr++ = sumc = calcptrP->sumagct * (fx2r + fx2y); |
---|
3002 | sumb = calcptrP->sumag * fx2r * tr->rdta->invfreqr + calcptrP->sumct * fx2y * tr->rdta->invfreqy ; |
---|
3003 | *abptr++ = suma - sumb; |
---|
3004 | *bcptr++ = sumb - sumc; |
---|
3005 | lp++; |
---|
3006 | lq++; |
---|
3007 | } |
---|
3008 | } |
---|
3009 | else |
---|
3010 | { |
---|
3011 | if(refQ >= 0) |
---|
3012 | { |
---|
3013 | calcptrQ = &calcQ[refQ][cat]; |
---|
3014 | fx1a = tr->rdta->freqa * lp->a; |
---|
3015 | fx1g = tr->rdta->freqg * lp->g; |
---|
3016 | fx1c = tr->rdta->freqc * lp->c; |
---|
3017 | fx1t = tr->rdta->freqt * lp->t; |
---|
3018 | suma = fx1a * calcptrQ->a + fx1c * calcptrQ->c + |
---|
3019 | fx1g * calcptrQ->g + fx1t * calcptrQ->t; |
---|
3020 | fx1r = fx1a + fx1g; |
---|
3021 | fx1y = fx1c + fx1t; |
---|
3022 | *sumcptr++ = sumc = (fx1r + fx1y) * calcptrQ->sumfx2rfx2y; |
---|
3023 | sumb = fx1r * calcptrQ->fx2r * tr->rdta->invfreqr + fx1y * calcptrQ->fx2y * tr->rdta->invfreqy; |
---|
3024 | *abptr++ = suma - sumb; |
---|
3025 | *bcptr++ = sumb - sumc; |
---|
3026 | lp++; |
---|
3027 | lq++; |
---|
3028 | } |
---|
3029 | else |
---|
3030 | { |
---|
3031 | |
---|
3032 | fx1a = tr->rdta->freqa * lp->a; |
---|
3033 | fx1g = tr->rdta->freqg * lp->g; |
---|
3034 | fx1c = tr->rdta->freqc * lp->c; |
---|
3035 | fx1t = tr->rdta->freqt * lp->t; |
---|
3036 | suma = fx1a * lq->a + fx1c * lq->c + fx1g * lq->g + fx1t * lq->t; |
---|
3037 | fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
3038 | //nur von q abh |
---|
3039 | fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
3040 | //nur von q abh |
---|
3041 | fx1r = fx1a + fx1g; |
---|
3042 | fx1y = fx1c + fx1t; |
---|
3043 | *sumcptr++ = sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
3044 | //zweiter term nur von q abh |
---|
3045 | sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy; |
---|
3046 | |
---|
3047 | |
---|
3048 | |
---|
3049 | *abptr++ = suma - sumb; |
---|
3050 | *bcptr++ = sumb - sumc; |
---|
3051 | lp++; |
---|
3052 | lq++; |
---|
3053 | } |
---|
3054 | } |
---|
3055 | |
---|
3056 | } |
---|
3057 | |
---|
3058 | z = z0; |
---|
3059 | do { |
---|
3060 | zprev = z; |
---|
3061 | zstep = (1.0 - zmax) * z + zmin; |
---|
3062 | curvatOK = FALSE; |
---|
3063 | |
---|
3064 | do { |
---|
3065 | if (z < zmin) z = zmin; |
---|
3066 | else if (z > zmax) z = zmax; |
---|
3067 | |
---|
3068 | lz = log(z); |
---|
3069 | xvlz = tr->rdta->xv * lz; |
---|
3070 | rptr = &(tr->rdta->catrat[1]); |
---|
3071 | zzptr = &(zztable[1]); |
---|
3072 | zvptr = &(zvtable[1]); |
---|
3073 | |
---|
3074 | # ifdef Vectorize |
---|
3075 | # pragma IVDEP |
---|
3076 | # endif |
---|
3077 | |
---|
3078 | for (i = 1; i <= tr->rdta->categs; i++) { |
---|
3079 | ki = *rptr++; |
---|
3080 | *zzptr++ = exp(ki * lz); |
---|
3081 | *zvptr++ = exp(ki * xvlz); |
---|
3082 | } |
---|
3083 | |
---|
3084 | abptr = abi; |
---|
3085 | bcptr = bci; |
---|
3086 | sumcptr = sumci; |
---|
3087 | cptr = &(tr->cdta->patcat[0]); |
---|
3088 | wrptr = &(tr->cdta->wr[0]); |
---|
3089 | wr2ptr = &(tr->cdta->wr2[0]); |
---|
3090 | dlnLdlz = 0.0; /* = d(ln(likelihood))/d(lz) */ |
---|
3091 | d2lnLdlz2 = 0.0; /* = d2(ln(likelihood))/d(lz)2 */ |
---|
3092 | |
---|
3093 | # ifdef Vectorize |
---|
3094 | # pragma IVDEP |
---|
3095 | # endif |
---|
3096 | |
---|
3097 | for (i = 0; i < tr->cdta->endsite; i++) { |
---|
3098 | cat = *cptr++; /* ratecategory(i) */ |
---|
3099 | ab = *abptr++ * zztable[cat]; |
---|
3100 | bc = *bcptr++ * zvtable[cat]; |
---|
3101 | sumc = *sumcptr++; |
---|
3102 | inv_Li = 1.0/(ab + bc + sumc); |
---|
3103 | t1 = ab * inv_Li; |
---|
3104 | t2 = tr->rdta->xv * bc * inv_Li; |
---|
3105 | dlnLidlz = t1 + t2; |
---|
3106 | dlnLdlz += *wrptr++ * dlnLidlz; |
---|
3107 | d2lnLdlz2 += *wr2ptr++ * (t1 + tr->rdta->xv * t2 - dlnLidlz * dlnLidlz); |
---|
3108 | } |
---|
3109 | |
---|
3110 | if ((d2lnLdlz2 >= 0.0) && (z < zmax)) |
---|
3111 | zprev = z = 0.37 * z + 0.63; /* Bad curvature, shorten branch */ |
---|
3112 | else |
---|
3113 | curvatOK = TRUE; |
---|
3114 | |
---|
3115 | } while (! curvatOK); |
---|
3116 | |
---|
3117 | if (d2lnLdlz2 < 0.0) { |
---|
3118 | z *= exp(-dlnLdlz / d2lnLdlz2); |
---|
3119 | if (z < zmin) z = zmin; |
---|
3120 | if (z > 0.25 * zprev + 0.75) /* Limit steps toward z = 1.0 */ |
---|
3121 | z = 0.25 * zprev + 0.75; |
---|
3122 | } |
---|
3123 | |
---|
3124 | if (z > zmax) z = zmax; |
---|
3125 | |
---|
3126 | } while ((--maxiter > 0) && (ABS(z - zprev) > zstep)); |
---|
3127 | |
---|
3128 | |
---|
3129 | Free(abi); |
---|
3130 | Free(bci); |
---|
3131 | Free(sumci); |
---|
3132 | |
---|
3133 | |
---|
3134 | |
---|
3135 | return z; |
---|
3136 | } /* makenewz */ |
---|
3137 | |
---|
3138 | |
---|
3139 | boolean update (tree *tr, nodeptr p) |
---|
3140 | { /* update */ |
---|
3141 | nodeptr q; |
---|
3142 | double z0, z; |
---|
3143 | |
---|
3144 | q = p->back; |
---|
3145 | z0 = q->z; |
---|
3146 | if ((z = makenewz(tr, p, q, z0, newzpercycle)) == badZ) return FALSE; |
---|
3147 | p->z = q->z = z; |
---|
3148 | if (ABS(z - z0) > deltaz) tr->smoothed = FALSE; |
---|
3149 | return TRUE; |
---|
3150 | } /* update */ |
---|
3151 | |
---|
3152 | |
---|
3153 | boolean smooth (tree *tr, nodeptr p) |
---|
3154 | { /* smooth */ |
---|
3155 | nodeptr q; |
---|
3156 | |
---|
3157 | if (! update(tr, p)) return FALSE; /* Adjust branch */ |
---|
3158 | if (! p->tip) { /* Adjust descendants */ |
---|
3159 | q = p->next; |
---|
3160 | while (q != p) { |
---|
3161 | if (! smooth(tr, q->back)) return FALSE; |
---|
3162 | q = q->next; |
---|
3163 | } |
---|
3164 | |
---|
3165 | # if ReturnSmoothedView |
---|
3166 | |
---|
3167 | if (! newviewOptimized(tr, p)) return FALSE; |
---|
3168 | # endif |
---|
3169 | } |
---|
3170 | |
---|
3171 | return TRUE; |
---|
3172 | } /* smooth */ |
---|
3173 | |
---|
3174 | |
---|
3175 | boolean smoothTree (tree *tr, int maxtimes) |
---|
3176 | { /* smoothTree */ |
---|
3177 | nodeptr p, q; |
---|
3178 | |
---|
3179 | p = tr->start; |
---|
3180 | |
---|
3181 | while (--maxtimes >= 0) { |
---|
3182 | tr->smoothed = TRUE; |
---|
3183 | if (! smooth(tr, p->back)) return FALSE; |
---|
3184 | if (! p->tip) { |
---|
3185 | q = p->next; |
---|
3186 | while (q != p) { |
---|
3187 | if (! smooth(tr, q->back)) return FALSE; |
---|
3188 | q = q->next; |
---|
3189 | } |
---|
3190 | } |
---|
3191 | if (tr->smoothed) break; |
---|
3192 | } |
---|
3193 | |
---|
3194 | return TRUE; |
---|
3195 | } /* smoothTree */ |
---|
3196 | |
---|
3197 | |
---|
3198 | boolean localSmooth (tree *tr, nodeptr p, int maxtimes) |
---|
3199 | { /* localSmooth -- Smooth branches around p */ |
---|
3200 | nodeptr q; |
---|
3201 | |
---|
3202 | if (p->tip) return FALSE; /* Should be an error */ |
---|
3203 | |
---|
3204 | while (--maxtimes >= 0) { |
---|
3205 | tr->smoothed = TRUE; |
---|
3206 | q = p; |
---|
3207 | do { |
---|
3208 | if (! update(tr, q)) return FALSE; |
---|
3209 | q = q->next; |
---|
3210 | } while (q != p); |
---|
3211 | if (tr->smoothed) break; |
---|
3212 | } |
---|
3213 | |
---|
3214 | tr->smoothed = FALSE; /* Only smooth locally */ |
---|
3215 | return TRUE; |
---|
3216 | } /* localSmooth */ |
---|
3217 | |
---|
3218 | |
---|
3219 | void hookup (nodeptr p, nodeptr q, double z) |
---|
3220 | { /* hookup */ |
---|
3221 | p->back = q; |
---|
3222 | q->back = p; |
---|
3223 | p->z = q->z = z; |
---|
3224 | } /* hookup */ |
---|
3225 | |
---|
3226 | |
---|
3227 | /* Insert node p into branch q <-> q->back */ |
---|
3228 | |
---|
3229 | |
---|
3230 | boolean insert (tree *tr, nodeptr p, nodeptr q, boolean glob) |
---|
3231 | /* glob -- Smooth tree globally? */ |
---|
3232 | |
---|
3233 | /* q |
---|
3234 | /. |
---|
3235 | add/ . |
---|
3236 | / . |
---|
3237 | pn . |
---|
3238 | s ---- p .remove |
---|
3239 | pnn . |
---|
3240 | \ . |
---|
3241 | add\ . |
---|
3242 | \. pn = p->next; |
---|
3243 | r pnn = p->next->next; |
---|
3244 | */ |
---|
3245 | |
---|
3246 | { /* insert */ |
---|
3247 | nodeptr r, s; |
---|
3248 | |
---|
3249 | r = q->back; |
---|
3250 | s = p->back; |
---|
3251 | |
---|
3252 | # if BestInsertAverage && ! Master |
---|
3253 | { double zqr, zqs, zrs, lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax; |
---|
3254 | |
---|
3255 | if ((zqr = makenewz(tr, q, r, q->z, iterations)) == badZ) return FALSE; |
---|
3256 | if ((zqs = makenewz(tr, q, s, defaultz, iterations)) == badZ) return FALSE; |
---|
3257 | if ((zrs = makenewz(tr, r, s, defaultz, iterations)) == badZ) return FALSE; |
---|
3258 | |
---|
3259 | |
---|
3260 | |
---|
3261 | |
---|
3262 | lzqr = (zqr > zmin) ? log(zqr) : log(zmin); /* long branches */ |
---|
3263 | lzqs = (zqs > zmin) ? log(zqs) : log(zmin); |
---|
3264 | lzrs = (zrs > zmin) ? log(zrs) : log(zmin); |
---|
3265 | lzsum = 0.5 * (lzqr + lzqs + lzrs); |
---|
3266 | |
---|
3267 | lzq = lzsum - lzrs; |
---|
3268 | lzr = lzsum - lzqs; |
---|
3269 | lzs = lzsum - lzqr; |
---|
3270 | lzmax = log(zmax); |
---|
3271 | |
---|
3272 | if (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} /* short */ |
---|
3273 | else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;} |
---|
3274 | else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;} |
---|
3275 | |
---|
3276 | hookup(p->next, q, exp(lzq)); |
---|
3277 | hookup(p->next->next, r, exp(lzr)); |
---|
3278 | hookup(p, s, exp(lzs)); |
---|
3279 | } |
---|
3280 | |
---|
3281 | # else |
---|
3282 | { double z; |
---|
3283 | z = sqrt(q->z); |
---|
3284 | hookup(p->next, q, z); |
---|
3285 | hookup(p->next->next, r, z); |
---|
3286 | } |
---|
3287 | |
---|
3288 | # endif |
---|
3289 | |
---|
3290 | |
---|
3291 | if (! newviewOptimized(tr, p)) return FALSE; |
---|
3292 | |
---|
3293 | |
---|
3294 | |
---|
3295 | tr->opt_level = 0; |
---|
3296 | |
---|
3297 | # if ! Master /* Smoothings are done by slave */ |
---|
3298 | if (glob) { /* Smooth whole tree */ |
---|
3299 | if (! smoothTree(tr, smoothings)) return FALSE; |
---|
3300 | } |
---|
3301 | else { /* Smooth locale of p */ |
---|
3302 | if (! localSmooth(tr, p, smoothings)) return FALSE; |
---|
3303 | } |
---|
3304 | |
---|
3305 | # else |
---|
3306 | tr->likelihood = unlikely; |
---|
3307 | # endif |
---|
3308 | |
---|
3309 | return TRUE; |
---|
3310 | } /* insert */ |
---|
3311 | |
---|
3312 | |
---|
3313 | nodeptr removeNode (tree *tr, nodeptr p) |
---|
3314 | |
---|
3315 | /* q |
---|
3316 | .| |
---|
3317 | remove. | |
---|
3318 | . | |
---|
3319 | pn | |
---|
3320 | s ---- p |add |
---|
3321 | pnn | |
---|
3322 | . | |
---|
3323 | remove. | |
---|
3324 | .| pn = p->next; |
---|
3325 | r pnn = p->next->next; |
---|
3326 | */ |
---|
3327 | |
---|
3328 | /* remove p and return where it was */ |
---|
3329 | { /* removeNode */ |
---|
3330 | double zqr; |
---|
3331 | nodeptr q, r; |
---|
3332 | |
---|
3333 | q = p->next->back; |
---|
3334 | r = p->next->next->back; |
---|
3335 | zqr = q->z * r->z; |
---|
3336 | # if ! Master |
---|
3337 | if ((zqr = makenewz(tr, q, r, zqr, iterations)) == badZ) return (node *) NULL; |
---|
3338 | # endif |
---|
3339 | hookup(q, r, zqr); |
---|
3340 | |
---|
3341 | p->next->next->back = p->next->back = (node *) NULL; |
---|
3342 | return q; |
---|
3343 | } /* removeNode */ |
---|
3344 | |
---|
3345 | |
---|
3346 | boolean initrav (tree *tr, nodeptr p) |
---|
3347 | { /* initrav */ |
---|
3348 | nodeptr q; |
---|
3349 | |
---|
3350 | if (! p->tip) { |
---|
3351 | q = p->next; |
---|
3352 | do { |
---|
3353 | if (! initrav(tr, q->back)) return FALSE; |
---|
3354 | q = q->next; |
---|
3355 | } while (q != p); |
---|
3356 | |
---|
3357 | if (! newviewOptimized(tr, p)) return FALSE; |
---|
3358 | |
---|
3359 | } |
---|
3360 | |
---|
3361 | return TRUE; |
---|
3362 | } /* initrav */ |
---|
3363 | |
---|
3364 | |
---|
3365 | nodeptr buildNewTip (tree *tr, nodeptr p) |
---|
3366 | { /* buildNewTip */ |
---|
3367 | nodeptr q; |
---|
3368 | |
---|
3369 | q = tr->nodep[(tr->nextnode)++]; |
---|
3370 | hookup(p, q, defaultz); |
---|
3371 | return q; |
---|
3372 | } /* buildNewTip */ |
---|
3373 | |
---|
3374 | |
---|
3375 | boolean buildSimpleTree (tree *tr, int ip, int iq, int ir) |
---|
3376 | { /* buildSimpleTree */ |
---|
3377 | /* p, q and r are tips meeting at s */ |
---|
3378 | nodeptr p, s; |
---|
3379 | int i; |
---|
3380 | |
---|
3381 | i = MIN(ip, iq); |
---|
3382 | if (ir < i) i = ir; |
---|
3383 | tr->start = tr->nodep[i]; |
---|
3384 | tr->ntips = 3; |
---|
3385 | p = tr->nodep[ip]; |
---|
3386 | hookup(p, tr->nodep[iq], defaultz); |
---|
3387 | s = buildNewTip(tr, tr->nodep[ir]); |
---|
3388 | |
---|
3389 | return insert(tr, s, p, FALSE); /* Smoothing is local to s */ |
---|
3390 | } /* buildSimpleTree */ |
---|
3391 | |
---|
3392 | #if 0 |
---|
3393 | char * strchr (char *str, int chr) |
---|
3394 | { /* strchr */ |
---|
3395 | int c; |
---|
3396 | |
---|
3397 | while (c = *str) {if (c == chr) return str; str++;} |
---|
3398 | return (char *) NULL; |
---|
3399 | } /* strchr */ |
---|
3400 | |
---|
3401 | |
---|
3402 | char * strstr (char *str1, char *str2) |
---|
3403 | { /* strstr */ |
---|
3404 | char *s1, *s2; |
---|
3405 | int c; |
---|
3406 | |
---|
3407 | while (*(s1 = str1)) { |
---|
3408 | s2 = str2; |
---|
3409 | do { |
---|
3410 | if (! (c = *s2++)) return str1; |
---|
3411 | } |
---|
3412 | while (*s1++ == c); |
---|
3413 | str1++; |
---|
3414 | } |
---|
3415 | return (char *) NULL; |
---|
3416 | } /* strstr */ |
---|
3417 | #endif |
---|
3418 | |
---|
3419 | boolean readKeyValue (char *string, char *key, char *format, void *value) |
---|
3420 | { /* readKeyValue */ |
---|
3421 | |
---|
3422 | if (! (string = strstr(string, key))) return FALSE; |
---|
3423 | string += strlen(key); |
---|
3424 | if (! (string = strchr(string, '='))) return FALSE; |
---|
3425 | string++; |
---|
3426 | return sscanf(string, format, value); /* 1 if read, otherwise 0 */ |
---|
3427 | } /* readKeyValue */ |
---|
3428 | |
---|
3429 | |
---|
3430 | #if Master || Slave |
---|
3431 | |
---|
3432 | double str_readTreeLikelihood (char *treestr) |
---|
3433 | { /* str_readTreeLikelihood */ |
---|
3434 | double lk1; |
---|
3435 | char *com, *com_end; |
---|
3436 | boolean readKeyValue(); |
---|
3437 | |
---|
3438 | if ((com = strchr(treestr, '[')) && (com < strchr(treestr, '(')) |
---|
3439 | && (com_end = strchr(com, ']'))) { |
---|
3440 | com++; |
---|
3441 | *com_end = 0; |
---|
3442 | if (readKeyValue(com, likelihood_key, "%lg", (void *) &(lk1))) { |
---|
3443 | *com_end = ']'; |
---|
3444 | return lk1; |
---|
3445 | } |
---|
3446 | } |
---|
3447 | |
---|
3448 | fprintf(stderr, "ERROR reading likelihood in receiveTree\n"); |
---|
3449 | return badEval; |
---|
3450 | } /* str_readTreeLikelihood */ |
---|
3451 | |
---|
3452 | |
---|
3453 | boolean sendTree (comm_block *comm, tree *tr) |
---|
3454 | { /* sendTree */ |
---|
3455 | char *treestr; |
---|
3456 | char *treeString(); |
---|
3457 | # if Master |
---|
3458 | void sendTreeNum(); |
---|
3459 | # endif |
---|
3460 | |
---|
3461 | comm->done_flag = tr->likelihood > 0.0; |
---|
3462 | if (comm->done_flag) |
---|
3463 | write_comm_msg(comm, NULL); |
---|
3464 | |
---|
3465 | else { |
---|
3466 | treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256); |
---|
3467 | if (! treestr) { |
---|
3468 | fprintf(stderr, "sendTree: Malloc failure\n"); |
---|
3469 | return 0; |
---|
3470 | } |
---|
3471 | |
---|
3472 | # if Master |
---|
3473 | if (send_ahead >= MAX_SEND_AHEAD) { |
---|
3474 | double new_likelihood; |
---|
3475 | int n_to_get; |
---|
3476 | |
---|
3477 | n_to_get = (send_ahead+1)/2; |
---|
3478 | sendTreeNum(n_to_get); |
---|
3479 | send_ahead -= n_to_get; |
---|
3480 | read_comm_msg(& comm_slave, treestr); |
---|
3481 | new_likelihood = str_readTreeLikelihood(treestr); |
---|
3482 | if (new_likelihood == badEval) return FALSE; |
---|
3483 | if (! best_tr_recv || (new_likelihood > best_lk_recv)) { |
---|
3484 | if (best_tr_recv) Free(best_tr_recv); |
---|
3485 | best_tr_recv = Malloc(strlen(treestr) + 1); |
---|
3486 | strcpy(best_tr_recv, treestr); |
---|
3487 | best_lk_recv = new_likelihood; |
---|
3488 | } |
---|
3489 | } |
---|
3490 | send_ahead++; |
---|
3491 | # endif /* End #if Master */ |
---|
3492 | |
---|
3493 | REPORT_SEND_TREE; |
---|
3494 | (void) treeString(treestr, tr, tr->start->back, 1); |
---|
3495 | write_comm_msg(comm, treestr); |
---|
3496 | |
---|
3497 | Free(treestr); |
---|
3498 | } |
---|
3499 | |
---|
3500 | return TRUE; |
---|
3501 | } /* sendTree */ |
---|
3502 | |
---|
3503 | |
---|
3504 | boolean receiveTree (comm_block *comm, tree *tr) |
---|
3505 | { /* receiveTree */ |
---|
3506 | char *treestr; |
---|
3507 | boolean status; |
---|
3508 | boolean str_treeReadLen(); |
---|
3509 | |
---|
3510 | treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256); |
---|
3511 | if (! treestr) { |
---|
3512 | fprintf(stderr, "receiveTree: Malloc failure\n"); |
---|
3513 | return 0; |
---|
3514 | } |
---|
3515 | |
---|
3516 | read_comm_msg(comm, treestr); |
---|
3517 | if (comm->done_flag) { |
---|
3518 | tr->likelihood = 1.0; |
---|
3519 | status = TRUE; |
---|
3520 | } |
---|
3521 | |
---|
3522 | else { |
---|
3523 | # if Master |
---|
3524 | if (best_tr_recv) { |
---|
3525 | if (str_readTreeLikelihood(treestr) < best_lk_recv) { |
---|
3526 | strcpy(treestr, best_tr_recv); /* Overwrite new tree with best */ |
---|
3527 | } |
---|
3528 | Free(best_tr_recv); |
---|
3529 | best_tr_recv = NULL; |
---|
3530 | } |
---|
3531 | # endif /* End #if Master */ |
---|
3532 | |
---|
3533 | status = str_treeReadLen(treestr, tr); |
---|
3534 | } |
---|
3535 | |
---|
3536 | Free(treestr); |
---|
3537 | return status; |
---|
3538 | } /* receiveTree */ |
---|
3539 | |
---|
3540 | |
---|
3541 | void requestForWork (void) |
---|
3542 | { /* requestForWork */ |
---|
3543 | p4_send(DNAML_REQUEST, DNAML_DISPATCHER_ID, NULL, 0); |
---|
3544 | } /* requestForWork */ |
---|
3545 | #endif /* End #if Master || Slave */ |
---|
3546 | |
---|
3547 | |
---|
3548 | #if Master |
---|
3549 | void sendTreeNum(int n_to_get) |
---|
3550 | { /* sendTreeNum */ |
---|
3551 | char scr[512]; |
---|
3552 | |
---|
3553 | sprintf(scr, "%d", n_to_get); |
---|
3554 | p4_send(DNAML_NUM_TREE, DNAML_MERGER_ID, scr, strlen(scr)+1); |
---|
3555 | } /* sendTreeNum */ |
---|
3556 | |
---|
3557 | |
---|
3558 | boolean getReturnedTrees (tree *tr, bestlist *bt, int n_tree_sent) |
---|
3559 | /* n_tree_sent -- number of trees sent to slaves */ |
---|
3560 | { /* getReturnedTrees */ |
---|
3561 | void sendTreeNum(); |
---|
3562 | boolean receiveTree(); |
---|
3563 | |
---|
3564 | sendTreeNum(send_ahead); |
---|
3565 | send_ahead = 0; |
---|
3566 | |
---|
3567 | if (! receiveTree(& comm_slave, tr)) return FALSE; |
---|
3568 | tr->smoothed = TRUE; |
---|
3569 | (void) saveBestTree(bt, tr); |
---|
3570 | |
---|
3571 | return TRUE; |
---|
3572 | } /* getReturnedTrees */ |
---|
3573 | #endif |
---|
3574 | |
---|
3575 | |
---|
3576 | void cacheZ (tree *tr) |
---|
3577 | { /* cacheZ */ |
---|
3578 | nodeptr p; |
---|
3579 | int nodes; |
---|
3580 | |
---|
3581 | nodes = tr->mxtips + 3 * (tr->mxtips - 2); |
---|
3582 | p = tr->nodep[1]; |
---|
3583 | while (nodes-- > 0) {p->z0 = p->z; p++;} |
---|
3584 | } /* cacheZ */ |
---|
3585 | |
---|
3586 | |
---|
3587 | void restoreZ (tree *tr) |
---|
3588 | { /* restoreZ */ |
---|
3589 | nodeptr p; |
---|
3590 | int nodes; |
---|
3591 | |
---|
3592 | nodes = tr->mxtips + 3 * (tr->mxtips - 2); |
---|
3593 | p = tr->nodep[1]; |
---|
3594 | while (nodes-- > 0) {p->z = p->z0; p++;} |
---|
3595 | } /* restoreZ */ |
---|
3596 | |
---|
3597 | |
---|
3598 | boolean testInsert (tree *tr, nodeptr p, nodeptr q, bestlist *bt, boolean fast) |
---|
3599 | { /* testInsert */ |
---|
3600 | double qz; |
---|
3601 | nodeptr r; |
---|
3602 | |
---|
3603 | r = q->back; /* Save original connection */ |
---|
3604 | qz = q->z; |
---|
3605 | if (! insert(tr, p, q, ! fast)) return FALSE; |
---|
3606 | |
---|
3607 | # if ! Master |
---|
3608 | if (evaluate(tr, fast ? p->next->next : tr->start) == badEval) return FALSE; |
---|
3609 | (void) saveBestTree(bt, tr); |
---|
3610 | # else /* Master */ |
---|
3611 | tr->likelihood = unlikely; |
---|
3612 | if (! sendTree(& comm_slave, tr)) return FALSE; |
---|
3613 | # endif |
---|
3614 | |
---|
3615 | /* remove p from this branch */ |
---|
3616 | |
---|
3617 | hookup(q, r, qz); |
---|
3618 | p->next->next->back = p->next->back = (nodeptr) NULL; |
---|
3619 | if (! fast) { /* With fast add, other values are still OK */ |
---|
3620 | restoreZ(tr); /* Restore branch lengths */ |
---|
3621 | # if ! Master /* Regenerate x values */ |
---|
3622 | if (! initrav(tr, p->back)) return FALSE; |
---|
3623 | if (! initrav(tr, q)) return FALSE; |
---|
3624 | if (! initrav(tr, r)) return FALSE; |
---|
3625 | # endif |
---|
3626 | } |
---|
3627 | |
---|
3628 | return TRUE; |
---|
3629 | } /* testInsert */ |
---|
3630 | |
---|
3631 | |
---|
3632 | int addTraverse (tree *tr, nodeptr p, nodeptr q, |
---|
3633 | int mintrav, int maxtrav, bestlist *bt, boolean fast) |
---|
3634 | { /* addTraverse */ |
---|
3635 | int tested, newtested; |
---|
3636 | |
---|
3637 | tested = 0; |
---|
3638 | if (--mintrav <= 0) { /* Moved minimum distance? */ |
---|
3639 | if (! testInsert(tr, p, q, bt, fast)) return badRear; |
---|
3640 | tested++; |
---|
3641 | } |
---|
3642 | |
---|
3643 | if ((! q->tip) && (--maxtrav > 0)) { /* Continue traverse? */ |
---|
3644 | newtested = addTraverse(tr, p, q->next->back, |
---|
3645 | mintrav, maxtrav, bt, fast); |
---|
3646 | if (newtested == badRear) return badRear; |
---|
3647 | tested += newtested; |
---|
3648 | newtested = addTraverse(tr, p, q->next->next->back, |
---|
3649 | mintrav, maxtrav, bt, fast); |
---|
3650 | if (newtested == badRear) return badRear; |
---|
3651 | tested += newtested; |
---|
3652 | } |
---|
3653 | |
---|
3654 | return tested; |
---|
3655 | } /* addTraverse */ |
---|
3656 | |
---|
3657 | |
---|
3658 | int rearrange (tree *tr, nodeptr p, int mintrav, int maxtrav, bestlist *bt) |
---|
3659 | /* rearranges the tree, globally or locally */ |
---|
3660 | { /* rearrange */ |
---|
3661 | double p1z, p2z, q1z, q2z; |
---|
3662 | nodeptr p1, p2, q, q1, q2; |
---|
3663 | int tested, mintrav2, newtested; |
---|
3664 | |
---|
3665 | tested = 0; |
---|
3666 | if (maxtrav < 1 || mintrav > maxtrav) return tested; |
---|
3667 | |
---|
3668 | /* Moving subtree forward in tree. */ |
---|
3669 | |
---|
3670 | if (! p->tip) { |
---|
3671 | p1 = p->next->back; |
---|
3672 | p2 = p->next->next->back; |
---|
3673 | if (! p1->tip || ! p2->tip) { |
---|
3674 | p1z = p1->z; |
---|
3675 | p2z = p2->z; |
---|
3676 | if (! removeNode(tr, p)) return badRear; |
---|
3677 | cacheZ(tr); |
---|
3678 | if (! p1->tip) { |
---|
3679 | newtested = addTraverse(tr, p, p1->next->back, |
---|
3680 | mintrav, maxtrav, bt, FALSE); |
---|
3681 | if (newtested == badRear) return badRear; |
---|
3682 | tested += newtested; |
---|
3683 | newtested = addTraverse(tr, p, p1->next->next->back, |
---|
3684 | mintrav, maxtrav, bt, FALSE); |
---|
3685 | if (newtested == badRear) return badRear; |
---|
3686 | tested += newtested; |
---|
3687 | } |
---|
3688 | |
---|
3689 | if (! p2->tip) { |
---|
3690 | newtested = addTraverse(tr, p, p2->next->back, |
---|
3691 | mintrav, maxtrav, bt, FALSE); |
---|
3692 | if (newtested == badRear) return badRear; |
---|
3693 | tested += newtested; |
---|
3694 | newtested = addTraverse(tr, p, p2->next->next->back, |
---|
3695 | mintrav, maxtrav, bt, FALSE); |
---|
3696 | if (newtested == badRear) return badRear; |
---|
3697 | tested += newtested; |
---|
3698 | } |
---|
3699 | |
---|
3700 | hookup(p->next, p1, p1z); /* Restore original tree */ |
---|
3701 | hookup(p->next->next, p2, p2z); |
---|
3702 | if (! (initrav(tr, tr->start) |
---|
3703 | && initrav(tr, tr->start->back))) return badRear; |
---|
3704 | } |
---|
3705 | } /* if (! p->tip) */ |
---|
3706 | |
---|
3707 | /* Moving subtree backward in tree. Minimum move is 2 to avoid duplicates */ |
---|
3708 | |
---|
3709 | q = p->back; |
---|
3710 | if (! q->tip && maxtrav > 1) { |
---|
3711 | q1 = q->next->back; |
---|
3712 | q2 = q->next->next->back; |
---|
3713 | if (! q1->tip && (!q1->next->back->tip || !q1->next->next->back->tip) || |
---|
3714 | ! q2->tip && (!q2->next->back->tip || !q2->next->next->back->tip)) { |
---|
3715 | q1z = q1->z; |
---|
3716 | q2z = q2->z; |
---|
3717 | if (! removeNode(tr, q)) return badRear; |
---|
3718 | cacheZ(tr); |
---|
3719 | mintrav2 = mintrav > 2 ? mintrav : 2; |
---|
3720 | |
---|
3721 | if (! q1->tip) { |
---|
3722 | newtested = addTraverse(tr, q, q1->next->back, |
---|
3723 | mintrav2 , maxtrav, bt, FALSE); |
---|
3724 | if (newtested == badRear) return badRear; |
---|
3725 | tested += newtested; |
---|
3726 | newtested = addTraverse(tr, q, q1->next->next->back, |
---|
3727 | mintrav2 , maxtrav, bt, FALSE); |
---|
3728 | if (newtested == badRear) return badRear; |
---|
3729 | tested += newtested; |
---|
3730 | } |
---|
3731 | |
---|
3732 | if (! q2->tip) { |
---|
3733 | newtested = addTraverse(tr, q, q2->next->back, |
---|
3734 | mintrav2 , maxtrav, bt, FALSE); |
---|
3735 | if (newtested == badRear) return badRear; |
---|
3736 | tested += newtested; |
---|
3737 | newtested = addTraverse(tr, q, q2->next->next->back, |
---|
3738 | mintrav2 , maxtrav, bt, FALSE); |
---|
3739 | if (newtested == badRear) return badRear; |
---|
3740 | tested += newtested; |
---|
3741 | } |
---|
3742 | |
---|
3743 | hookup(q->next, q1, q1z); /* Restore original tree */ |
---|
3744 | hookup(q->next->next, q2, q2z); |
---|
3745 | if (! (initrav(tr, tr->start) |
---|
3746 | && initrav(tr, tr->start->back))) return badRear; |
---|
3747 | } |
---|
3748 | } /* if (! q->tip && maxtrav > 1) */ |
---|
3749 | |
---|
3750 | /* Move other subtrees */ |
---|
3751 | |
---|
3752 | if (! p->tip) { |
---|
3753 | newtested = rearrange(tr, p->next->back, mintrav, maxtrav, bt); |
---|
3754 | if (newtested == badRear) return badRear; |
---|
3755 | tested += newtested; |
---|
3756 | newtested = rearrange(tr, p->next->next->back, mintrav, maxtrav, bt); |
---|
3757 | if (newtested == badRear) return badRear; |
---|
3758 | tested += newtested; |
---|
3759 | } |
---|
3760 | |
---|
3761 | return tested; |
---|
3762 | } /* rearrange */ |
---|
3763 | |
---|
3764 | |
---|
3765 | FILE *fopen_pid (char *filenm, char *mode, char *name_pid) |
---|
3766 | { /* fopen_pid */ |
---|
3767 | |
---|
3768 | (void) sprintf(name_pid, "%s.%d", filenm, getpid()); |
---|
3769 | return fopen(name_pid, mode); |
---|
3770 | } /* fopen_pid */ |
---|
3771 | |
---|
3772 | |
---|
3773 | #if DeleteCheckpointFile |
---|
3774 | void unlink_pid (char *filenm) |
---|
3775 | { /* unlink_pid */ |
---|
3776 | char scr[512]; |
---|
3777 | |
---|
3778 | (void) sprintf(scr, "%s.%d", filenm, getpid()); |
---|
3779 | unlink(scr); |
---|
3780 | } /* unlink_pid */ |
---|
3781 | #endif |
---|
3782 | |
---|
3783 | |
---|
3784 | void writeCheckpoint (tree *tr) |
---|
3785 | { /* writeCheckpoint */ |
---|
3786 | char filename[128]; |
---|
3787 | FILE *checkpointf; |
---|
3788 | void treeOut(); |
---|
3789 | |
---|
3790 | checkpointf = fopen_pid(checkpointname, "a", filename); |
---|
3791 | if (checkpointf) { |
---|
3792 | treeOut(checkpointf, tr, treeNewick); |
---|
3793 | (void) fclose(checkpointf); |
---|
3794 | } |
---|
3795 | } /* writeCheckpoint */ |
---|
3796 | |
---|
3797 | |
---|
3798 | node * findAnyTip(nodeptr p) |
---|
3799 | { /* findAnyTip */ |
---|
3800 | return p->tip ? p : findAnyTip(p->next->back); |
---|
3801 | } /* findAnyTip */ |
---|
3802 | |
---|
3803 | |
---|
3804 | boolean optimize (tree *tr, int maxtrav, bestlist *bt) |
---|
3805 | { /* optimize */ |
---|
3806 | nodeptr p; |
---|
3807 | int mintrav, tested; |
---|
3808 | |
---|
3809 | if (tr->ntips < 4) return TRUE; |
---|
3810 | |
---|
3811 | writeCheckpoint(tr); /* checkpoint the starting tree */ |
---|
3812 | |
---|
3813 | if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; |
---|
3814 | if (maxtrav <= tr->opt_level) return TRUE; |
---|
3815 | |
---|
3816 | printf(" Doing %s rearrangements\n", |
---|
3817 | (maxtrav == 1) ? "local" : |
---|
3818 | (maxtrav < tr->ntips - 3) ? "regional" : "global"); |
---|
3819 | |
---|
3820 | /* loop while tree gets better */ |
---|
3821 | |
---|
3822 | do { |
---|
3823 | (void) startOpt(bt, tr); |
---|
3824 | mintrav = tr->opt_level + 1; |
---|
3825 | |
---|
3826 | /* rearrange must start from a tip or it will miss some trees */ |
---|
3827 | |
---|
3828 | p = findAnyTip(tr->start); |
---|
3829 | tested = rearrange(tr, p->back, mintrav, maxtrav, bt); |
---|
3830 | if (tested == badRear) return FALSE; |
---|
3831 | |
---|
3832 | # if Master |
---|
3833 | if (! getReturnedTrees(tr, bt, tested)) return FALSE; |
---|
3834 | # endif |
---|
3835 | |
---|
3836 | bt->numtrees += tested; |
---|
3837 | (void) setOptLevel(bt, maxtrav); |
---|
3838 | if (! recallBestTree(bt, 1, tr)) return FALSE; /* recover best tree */ |
---|
3839 | |
---|
3840 | printf(" Tested %d alternative trees\n", tested); |
---|
3841 | |
---|
3842 | if (bt->improved) { |
---|
3843 | printf(" Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
3844 | } |
---|
3845 | |
---|
3846 | writeCheckpoint(tr); /* checkpoint the new tree */ |
---|
3847 | } while (maxtrav > tr->opt_level); |
---|
3848 | |
---|
3849 | return TRUE; |
---|
3850 | } /* optimize */ |
---|
3851 | |
---|
3852 | |
---|
3853 | void coordinates (tree *tr, nodeptr p, double lengthsum, drawdata *tdptr) |
---|
3854 | { /* coordinates */ |
---|
3855 | /* establishes coordinates of nodes */ |
---|
3856 | double x, z; |
---|
3857 | nodeptr q, first, last; |
---|
3858 | |
---|
3859 | if (p->tip) { |
---|
3860 | p->xcoord = NINT(over * lengthsum); |
---|
3861 | p->ymax = p->ymin = p->ycoord = tdptr->tipy; |
---|
3862 | tdptr->tipy += down; |
---|
3863 | if (lengthsum > tdptr->tipmax) tdptr->tipmax = lengthsum; |
---|
3864 | } |
---|
3865 | |
---|
3866 | else { |
---|
3867 | q = p->next; |
---|
3868 | do { |
---|
3869 | z = q->z; |
---|
3870 | if (z < zmin) z = zmin; |
---|
3871 | x = lengthsum - tr->rdta->fracchange * log(z); |
---|
3872 | coordinates(tr, q->back, x, tdptr); |
---|
3873 | q = q->next; |
---|
3874 | } while (p == tr->start->back ? q != p->next : q != p); |
---|
3875 | |
---|
3876 | first = p->next->back; |
---|
3877 | q = p; |
---|
3878 | while (q->next != p) q = q->next; |
---|
3879 | last = q->back; |
---|
3880 | p->xcoord = NINT(over * lengthsum); |
---|
3881 | p->ycoord = (first->ycoord + last->ycoord)/2; |
---|
3882 | p->ymin = first->ymin; |
---|
3883 | p->ymax = last->ymax; |
---|
3884 | } |
---|
3885 | } /* coordinates */ |
---|
3886 | |
---|
3887 | |
---|
3888 | void drawline (tree *tr, int i, double scale) |
---|
3889 | /* draws one row of the tree diagram by moving up tree */ |
---|
3890 | /* Modified to handle 1000 taxa, October 16, 1991 */ |
---|
3891 | { /* drawline */ |
---|
3892 | nodeptr p, q, r, first, last; |
---|
3893 | int n, j, k, l, extra; |
---|
3894 | boolean done; |
---|
3895 | |
---|
3896 | p = q = tr->start->back; |
---|
3897 | extra = 0; |
---|
3898 | |
---|
3899 | if (i == p->ycoord) { |
---|
3900 | k = q->number - tr->mxtips; |
---|
3901 | for (j = k; j < 1000; j *= 10) putchar('-'); |
---|
3902 | printf("%d", k); |
---|
3903 | extra = 1; |
---|
3904 | } |
---|
3905 | else printf(" "); |
---|
3906 | |
---|
3907 | do { |
---|
3908 | if (! p->tip) { |
---|
3909 | r = p->next; |
---|
3910 | done = FALSE; |
---|
3911 | do { |
---|
3912 | if ((i >= r->back->ymin) && (i <= r->back->ymax)) { |
---|
3913 | q = r->back; |
---|
3914 | done = TRUE; |
---|
3915 | } |
---|
3916 | r = r->next; |
---|
3917 | } while (! done && (p == tr->start->back ? r != p->next : r != p)); |
---|
3918 | |
---|
3919 | first = p->next->back; |
---|
3920 | r = p; |
---|
3921 | while (r->next != p) r = r->next; |
---|
3922 | last = r->back; |
---|
3923 | if (p == tr->start->back) last = p->back; |
---|
3924 | } |
---|
3925 | |
---|
3926 | done = (p->tip) || (p == q); |
---|
3927 | n = NINT(scale*(q->xcoord - p->xcoord)); |
---|
3928 | if ((n < 3) && (! q->tip)) n = 3; |
---|
3929 | n -= extra; |
---|
3930 | extra = 0; |
---|
3931 | |
---|
3932 | if ((q->ycoord == i) && (! done)) { |
---|
3933 | if (p->ycoord != q->ycoord) putchar('+'); |
---|
3934 | else putchar('-'); |
---|
3935 | |
---|
3936 | if (! q->tip) { |
---|
3937 | k = q->number - tr->mxtips; |
---|
3938 | l = n - 3; |
---|
3939 | for (j = k; j < 100; j *= 10) l++; |
---|
3940 | for (j = 1; j <= l; j++) putchar('-'); |
---|
3941 | printf("%d", k); |
---|
3942 | extra = 1; |
---|
3943 | } |
---|
3944 | else for (j = 1; j <= n-1; j++) putchar('-'); |
---|
3945 | } |
---|
3946 | |
---|
3947 | else if (! p->tip) { |
---|
3948 | if ((last->ycoord > i) && (first->ycoord < i) && (i != p->ycoord)) { |
---|
3949 | putchar('!'); |
---|
3950 | for (j = 1; j <= n-1; j++) putchar(' '); |
---|
3951 | } |
---|
3952 | else for (j = 1; j <= n; j++) putchar(' '); |
---|
3953 | } |
---|
3954 | |
---|
3955 | else |
---|
3956 | for (j = 1; j <= n; j++) putchar(' '); |
---|
3957 | |
---|
3958 | p = q; |
---|
3959 | } while (! done); |
---|
3960 | |
---|
3961 | if ((p->ycoord == i) && p->tip) { |
---|
3962 | printf(" %s", p->name); |
---|
3963 | } |
---|
3964 | |
---|
3965 | putchar('\n'); |
---|
3966 | } /* drawline */ |
---|
3967 | |
---|
3968 | |
---|
3969 | void printTree (tree *tr, analdef *adef) |
---|
3970 | /* prints out diagram of the tree */ |
---|
3971 | { /* printTree */ |
---|
3972 | drawdata tipdata; |
---|
3973 | double scale; |
---|
3974 | int i, imax; |
---|
3975 | |
---|
3976 | if (adef->trprint) { |
---|
3977 | putchar('\n'); |
---|
3978 | tipdata.tipy = 1; |
---|
3979 | tipdata.tipmax = 0.0; |
---|
3980 | coordinates(tr, tr->start->back, (double) 0.0, & tipdata); |
---|
3981 | scale = 1.0 / tipdata.tipmax; |
---|
3982 | imax = tipdata.tipy - down; |
---|
3983 | for (i = 1; i <= imax; i++) drawline(tr, i, scale); |
---|
3984 | printf("\nRemember: "); |
---|
3985 | if (adef->root) printf("(although rooted by outgroup) "); |
---|
3986 | printf("this is an unrooted tree!\n\n"); |
---|
3987 | } |
---|
3988 | } /* printTree */ |
---|
3989 | |
---|
3990 | |
---|
3991 | double sigma (tree *tr, nodeptr p, double *sumlrptr) |
---|
3992 | /* compute standard deviation */ |
---|
3993 | { /* sigma */ |
---|
3994 | likelivector *lp, *lq; |
---|
3995 | homType *mlvP, *mlvQ; |
---|
3996 | double slope, sum, sumlr, z, zv, zz, lz, |
---|
3997 | rat, suma, sumb, sumc, d2, d, li, temp, abzz, bczv, t3, |
---|
3998 | fx1a, fx1c, fx1g, fx1t, fx1r, fx1y, fx2r, fx2y, w; |
---|
3999 | double *rptr; |
---|
4000 | nodeptr q, realQ, realP; |
---|
4001 | int i, *wptr, j, cat, *cptr; |
---|
4002 | register char *sQ; |
---|
4003 | register char *sP; |
---|
4004 | long refQ, refP; |
---|
4005 | memP calcP[EQUALITIES][maxcategories], *calcptrP; |
---|
4006 | memQ calcQ[EQUALITIES][maxcategories], *calcptrQ; |
---|
4007 | |
---|
4008 | q = p->back; |
---|
4009 | while ((! p->x) || (! q->x)) { |
---|
4010 | /*stamatak*/ |
---|
4011 | |
---|
4012 | if (! (p->x)) if (! newviewOptimized(tr, p)) return -1.0; |
---|
4013 | if (! (q->x)) if (! newviewOptimized(tr, q)) return -1.0; |
---|
4014 | } |
---|
4015 | |
---|
4016 | lp = p->x->lv; |
---|
4017 | lq = q->x->lv; |
---|
4018 | |
---|
4019 | z = p->z; |
---|
4020 | if (z < zmin) z = zmin; |
---|
4021 | lz = log(z); |
---|
4022 | cptr = &(tr->cdta->patcat[0]); |
---|
4023 | wptr = &(tr->cdta->aliaswgt[0]); |
---|
4024 | rptr = &(tr->cdta->patrat[0]); |
---|
4025 | sum = sumlr = slope = 0.0; |
---|
4026 | |
---|
4027 | # ifdef Vectorize |
---|
4028 | # pragma IVDEP |
---|
4029 | # endif |
---|
4030 | |
---|
4031 | realQ = tr->nodep[q->number]; |
---|
4032 | realP = tr->nodep[p->number]; |
---|
4033 | sQ = realQ->equalityVector; |
---|
4034 | sP = realP->equalityVector; |
---|
4035 | |
---|
4036 | for (j = 1; j <= tr->rdta->categs; j++) |
---|
4037 | { |
---|
4038 | |
---|
4039 | for(i = 0; i < equalities; i++) |
---|
4040 | { |
---|
4041 | mlvP = &realP->mlv[i][j]; |
---|
4042 | if(mlvP->set) |
---|
4043 | { |
---|
4044 | calcptrP = &calcP[i][j]; |
---|
4045 | calcptrP->fx1a = tr->rdta->freqa * mlvP->a; |
---|
4046 | calcptrP->fx1g = tr->rdta->freqg * mlvP->g; |
---|
4047 | calcptrP->fx1c = tr->rdta->freqc * mlvP->c; |
---|
4048 | calcptrP->fx1t = tr->rdta->freqt * mlvP->t; |
---|
4049 | calcptrP->sumag = calcptrP->fx1a + calcptrP->fx1g; |
---|
4050 | calcptrP->sumct = calcptrP->fx1c + calcptrP->fx1t; |
---|
4051 | calcptrP->sumagct = calcptrP->sumag + calcptrP->sumct; |
---|
4052 | calcptrP->exp = mlvP->exp; |
---|
4053 | } |
---|
4054 | mlvQ = &realQ->mlv[i][j]; |
---|
4055 | if(mlvQ->set) |
---|
4056 | { |
---|
4057 | calcptrQ = &calcQ[i][j]; |
---|
4058 | calcptrQ->fx2r = tr->rdta->freqa * mlvQ->a + tr->rdta->freqg * mlvQ->g; |
---|
4059 | calcptrQ->fx2y = tr->rdta->freqc * mlvQ->c + tr->rdta->freqt * mlvQ->t; |
---|
4060 | calcptrQ->sumfx2rfx2y = calcptrQ->fx2r + calcptrQ->fx2y; |
---|
4061 | calcptrQ->exp = mlvQ->exp; |
---|
4062 | calcptrQ->a = mlvQ->a; |
---|
4063 | calcptrQ->c = mlvQ->c; |
---|
4064 | calcptrQ->g = mlvQ->g; |
---|
4065 | calcptrQ->t = mlvQ->t; |
---|
4066 | } |
---|
4067 | } |
---|
4068 | } |
---|
4069 | |
---|
4070 | |
---|
4071 | |
---|
4072 | |
---|
4073 | |
---|
4074 | for (i = 0; i < tr->cdta->endsite; i++) { |
---|
4075 | rat = *rptr++; |
---|
4076 | cat = *cptr++; |
---|
4077 | refQ = *sQ++; |
---|
4078 | refP = *sP++; |
---|
4079 | |
---|
4080 | zz = exp(rat * lz); |
---|
4081 | zv = exp(rat * tr->rdta->xv * lz); |
---|
4082 | |
---|
4083 | |
---|
4084 | if(refP >= 0) |
---|
4085 | { |
---|
4086 | if(refQ >= 0) |
---|
4087 | { |
---|
4088 | calcptrP = &calcP[refP][cat]; |
---|
4089 | calcptrQ = &calcQ[refQ][cat]; |
---|
4090 | suma = calcptrP->fx1a * calcptrQ->a + |
---|
4091 | calcptrP->fx1c * calcptrQ->c + |
---|
4092 | calcptrP->fx1g * calcptrQ->g + calcptrP->fx1t * calcptrQ->t; |
---|
4093 | |
---|
4094 | |
---|
4095 | sumc = calcptrP->sumagct * calcptrQ->sumfx2rfx2y; |
---|
4096 | sumb = calcptrP->sumag * calcptrQ->fx2r * tr->rdta->invfreqr + calcptrP->sumct * calcptrQ->fx2y * tr->rdta->invfreqy; |
---|
4097 | |
---|
4098 | } |
---|
4099 | else |
---|
4100 | { |
---|
4101 | calcptrP = &calcP[refP][cat]; |
---|
4102 | suma = calcptrP->fx1a * lq->a + calcptrP->fx1c * lq->c + |
---|
4103 | calcptrP->fx1g * lq->g + calcptrP->fx1t * lq->t; |
---|
4104 | |
---|
4105 | fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
4106 | fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
4107 | |
---|
4108 | sumc = calcptrP->sumagct * (fx2r + fx2y); |
---|
4109 | sumb = calcptrP->sumag * fx2r * tr->rdta->invfreqr + calcptrP->sumct * fx2y * tr->rdta->invfreqy; |
---|
4110 | |
---|
4111 | } |
---|
4112 | } |
---|
4113 | else |
---|
4114 | { |
---|
4115 | if(refQ >= 0) |
---|
4116 | { |
---|
4117 | calcptrQ = &calcQ[refQ][cat]; |
---|
4118 | fx1a = tr->rdta->freqa * lp->a; |
---|
4119 | fx1g = tr->rdta->freqg * lp->g; |
---|
4120 | fx1c = tr->rdta->freqc * lp->c; |
---|
4121 | fx1t = tr->rdta->freqt * lp->t; |
---|
4122 | suma = fx1a * calcptrQ->a + fx1c * calcptrQ->c + |
---|
4123 | fx1g * calcptrQ->g + fx1t * calcptrQ->t; |
---|
4124 | fx1r = fx1a + fx1g; |
---|
4125 | fx1y = fx1c + fx1t; |
---|
4126 | sumc = (fx1r + fx1y) * calcptrQ->sumfx2rfx2y; |
---|
4127 | sumb = fx1r * calcptrQ->fx2r * tr->rdta->invfreqr + fx1y * calcptrQ->fx2y * tr->rdta->invfreqy; |
---|
4128 | |
---|
4129 | } |
---|
4130 | else |
---|
4131 | { |
---|
4132 | fx1a = tr->rdta->freqa * lp->a; |
---|
4133 | fx1g = tr->rdta->freqg * lp->g; |
---|
4134 | fx1c = tr->rdta->freqc * lp->c; |
---|
4135 | fx1t = tr->rdta->freqt * lp->t; |
---|
4136 | suma = fx1a * lq->a + fx1c * lq->c + fx1g * lq->g + fx1t * lq->t; |
---|
4137 | fx2r = tr->rdta->freqa * lq->a + tr->rdta->freqg * lq->g; |
---|
4138 | fx2y = tr->rdta->freqc * lq->c + tr->rdta->freqt * lq->t; |
---|
4139 | fx1r = fx1a + fx1g; |
---|
4140 | fx1y = fx1c + fx1t; |
---|
4141 | sumc = (fx1r + fx1y) * (fx2r + fx2y); |
---|
4142 | sumb = fx1r * fx2r * tr->rdta->invfreqr + fx1y * fx2y * tr->rdta->invfreqy; |
---|
4143 | |
---|
4144 | } |
---|
4145 | } |
---|
4146 | |
---|
4147 | |
---|
4148 | |
---|
4149 | abzz = zz * (suma - sumb); |
---|
4150 | bczv = zv * (sumb - sumc); |
---|
4151 | li = sumc + abzz + bczv; |
---|
4152 | t3 = tr->rdta->xv * bczv; |
---|
4153 | d = abzz + t3; |
---|
4154 | d2 = rat * (abzz*(rat-1.0) + t3*(rat * tr->rdta->xv - 1.0)); |
---|
4155 | |
---|
4156 | temp = rat * d / li; |
---|
4157 | w = *wptr++; |
---|
4158 | slope += w * temp; |
---|
4159 | sum += w * (temp * temp - d2/li); |
---|
4160 | sumlr += w * log(li / (suma + 1.0E-300)); |
---|
4161 | lp++; |
---|
4162 | lq++; |
---|
4163 | } |
---|
4164 | |
---|
4165 | *sumlrptr = sumlr; |
---|
4166 | return (sum > 1.0E-300) ? z*(-slope + sqrt(slope*slope + 3.841*sum))/sum |
---|
4167 | : 1.0; |
---|
4168 | } /* sigma */ |
---|
4169 | |
---|
4170 | |
---|
4171 | void describe (tree *tr, nodeptr p) |
---|
4172 | /* print out information for one branch */ |
---|
4173 | { /* describe */ |
---|
4174 | double z, s, sumlr; |
---|
4175 | nodeptr q; |
---|
4176 | char *nameptr; |
---|
4177 | int k, ch; |
---|
4178 | |
---|
4179 | q = p->back; |
---|
4180 | printf("%4d ", q->number - tr->mxtips); |
---|
4181 | if (p->tip) { |
---|
4182 | nameptr = p->name; |
---|
4183 | k = nmlngth; |
---|
4184 | while (ch = *nameptr++) {putchar(ch); k--;} |
---|
4185 | while (--k >= 0) putchar(' '); |
---|
4186 | } |
---|
4187 | else { |
---|
4188 | printf("%4d", p->number - tr->mxtips); |
---|
4189 | for (k = 4; k < nmlngth; k++) putchar(' '); |
---|
4190 | } |
---|
4191 | |
---|
4192 | z = q->z; |
---|
4193 | if (z <= zmin) printf(" infinity"); |
---|
4194 | else printf("%15.5f", -log(z) * tr->rdta->fracchange); |
---|
4195 | |
---|
4196 | s = sigma(tr, q, & sumlr); |
---|
4197 | printf(" ("); |
---|
4198 | if (z + s >= zmax) printf(" zero"); |
---|
4199 | else printf("%9.5f", (double) -log(z + s) * tr->rdta->fracchange); |
---|
4200 | putchar(','); |
---|
4201 | if (z - s <= zmin) printf(" infinity"); |
---|
4202 | else printf("%12.5f", (double) -log(z - s) * tr->rdta->fracchange); |
---|
4203 | putchar(')'); |
---|
4204 | |
---|
4205 | if (sumlr > 2.995 ) printf(" **"); |
---|
4206 | else if (sumlr > 1.9205) printf(" *"); |
---|
4207 | putchar('\n'); |
---|
4208 | |
---|
4209 | if (! p->tip) { |
---|
4210 | describe(tr, p->next->back); |
---|
4211 | describe(tr, p->next->next->back); |
---|
4212 | } |
---|
4213 | } /* describe */ |
---|
4214 | |
---|
4215 | |
---|
4216 | void summarize (tree *tr) |
---|
4217 | /* print out branch length information and node numbers */ |
---|
4218 | { /* summarize */ |
---|
4219 | printf("Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
4220 | putchar('\n'); |
---|
4221 | printf(" Between And Length"); |
---|
4222 | printf(" Approx. Confidence Limits\n"); |
---|
4223 | printf(" ------- --- ------"); |
---|
4224 | printf(" ------- ---------- ------\n"); |
---|
4225 | |
---|
4226 | describe(tr, tr->start->back->next->back); |
---|
4227 | describe(tr, tr->start->back->next->next->back); |
---|
4228 | describe(tr, tr->start); |
---|
4229 | putchar('\n'); |
---|
4230 | printf(" * = significantly positive, P < 0.05\n"); |
---|
4231 | printf(" ** = significantly positive, P < 0.01\n\n\n"); |
---|
4232 | globTime = gettime() - globTime; |
---|
4233 | fprintf(stderr, "Acc time global: %f\n", globTime); |
---|
4234 | printf("Acc time global: %f\n", globTime); |
---|
4235 | fprintf(stderr, "FLOPBS: %ld\n", flopBlocks); |
---|
4236 | fprintf(stderr,"depth: %d\n", tr->eqDepth); |
---|
4237 | printf("FLOPPBS: %ld\n", flopBlocks); |
---|
4238 | fprintf(stderr,"countEQ: %ld, countNEQ: %ld\n", countEQ, countNEQ); |
---|
4239 | |
---|
4240 | } /* summarize */ |
---|
4241 | |
---|
4242 | |
---|
4243 | /*=========== This is a problem if tr->start->back is a tip! ===========*/ |
---|
4244 | /* All routines should be contrived so that tr->start->back is not a tip */ |
---|
4245 | |
---|
4246 | char *treeString (char *treestr, tree *tr, nodeptr p, int form) |
---|
4247 | /* write string with representation of tree */ |
---|
4248 | /* form == 1 -> Newick tree */ |
---|
4249 | /* form == 2 -> Prolog fact */ |
---|
4250 | /* form == 3 -> PHYLIP tree */ |
---|
4251 | { /* treeString */ |
---|
4252 | double x, z; |
---|
4253 | char *nameptr; |
---|
4254 | int c; |
---|
4255 | |
---|
4256 | if (p == tr->start->back) { |
---|
4257 | if (form != treePHYLIP) { |
---|
4258 | if (form == treeProlog) { |
---|
4259 | (void) sprintf(treestr, "phylip_tree("); |
---|
4260 | while (*treestr) treestr++; /* move pointer to null */ |
---|
4261 | } |
---|
4262 | |
---|
4263 | (void) sprintf(treestr, "[&&%s: version = '%s'", |
---|
4264 | programName, programVersion); |
---|
4265 | while (*treestr) treestr++; |
---|
4266 | |
---|
4267 | (void) sprintf(treestr, ", %s = %15.13g", |
---|
4268 | likelihood_key, tr->likelihood); |
---|
4269 | while (*treestr) treestr++; |
---|
4270 | |
---|
4271 | (void) sprintf(treestr, ", %s = %d", ntaxa_key, tr->ntips); |
---|
4272 | while (*treestr) treestr++; |
---|
4273 | |
---|
4274 | (void) sprintf(treestr,", %s = %d", opt_level_key, tr->opt_level); |
---|
4275 | while (*treestr) treestr++; |
---|
4276 | |
---|
4277 | (void) sprintf(treestr, ", %s = %d", smoothed_key, tr->smoothed); |
---|
4278 | while (*treestr) treestr++; |
---|
4279 | |
---|
4280 | (void) sprintf(treestr, "]%s", form == treeProlog ? ", " : " "); |
---|
4281 | while (*treestr) treestr++; |
---|
4282 | } |
---|
4283 | } |
---|
4284 | |
---|
4285 | if (p->tip) { |
---|
4286 | if (form != treePHYLIP) *treestr++ = '\''; |
---|
4287 | nameptr = p->name; |
---|
4288 | while (c = *nameptr++) { |
---|
4289 | if (form != treePHYLIP) {if (c == '\'') *treestr++ = '\'';} |
---|
4290 | else if (c == ' ') {c = '_';} |
---|
4291 | *treestr++ = c; |
---|
4292 | } |
---|
4293 | if (form != treePHYLIP) *treestr++ = '\''; |
---|
4294 | } |
---|
4295 | |
---|
4296 | else { |
---|
4297 | *treestr++ = '('; |
---|
4298 | treestr = treeString(treestr, tr, p->next->back, form); |
---|
4299 | *treestr++ = ','; |
---|
4300 | treestr = treeString(treestr, tr, p->next->next->back, form); |
---|
4301 | if (p == tr->start->back) { |
---|
4302 | *treestr++ = ','; |
---|
4303 | treestr = treeString(treestr, tr, p->back, form); |
---|
4304 | } |
---|
4305 | *treestr++ = ')'; |
---|
4306 | } |
---|
4307 | |
---|
4308 | if (p == tr->start->back) { |
---|
4309 | (void) sprintf(treestr, ":0.0%s\n", (form != treeProlog) ? ";" : ")."); |
---|
4310 | } |
---|
4311 | else { |
---|
4312 | z = p->z; |
---|
4313 | if (z < zmin) z = zmin; |
---|
4314 | x = -log(z) * tr->rdta->fracchange; |
---|
4315 | (void) sprintf(treestr, ": %8.6f", x); /* prolog needs the space */ |
---|
4316 | } |
---|
4317 | |
---|
4318 | while (*treestr) treestr++; /* move pointer up to null termination */ |
---|
4319 | return treestr; |
---|
4320 | } /* treeString */ |
---|
4321 | |
---|
4322 | |
---|
4323 | void treeOut (FILE *treefile, tree *tr, int form) |
---|
4324 | /* write out file with representation of final tree */ |
---|
4325 | { /* treeOut */ |
---|
4326 | int c; |
---|
4327 | char *cptr, *treestr; |
---|
4328 | |
---|
4329 | treestr = (char *) Malloc((tr->ntips * (nmlngth+32)) + 256); |
---|
4330 | if (! treestr) { |
---|
4331 | fprintf(stderr, "treeOut: Malloc failure\n"); |
---|
4332 | exit(1); |
---|
4333 | } |
---|
4334 | |
---|
4335 | (void) treeString(treestr, tr, tr->start->back, form); |
---|
4336 | cptr = treestr; |
---|
4337 | while (c = *cptr++) putc(c, treefile); |
---|
4338 | |
---|
4339 | Free(treestr); |
---|
4340 | } /* treeOut */ |
---|
4341 | |
---|
4342 | |
---|
4343 | /*=======================================================================*/ |
---|
4344 | /* Read a tree from a file */ |
---|
4345 | /*=======================================================================*/ |
---|
4346 | |
---|
4347 | |
---|
4348 | /* 1.0.A Processing of quotation marks in comment removed |
---|
4349 | */ |
---|
4350 | |
---|
4351 | int treeFinishCom (FILE *fp, char **strp) |
---|
4352 | { /* treeFinishCom */ |
---|
4353 | int ch; |
---|
4354 | |
---|
4355 | while ((ch = getc(fp)) != EOF && ch != ']') { |
---|
4356 | if (strp != NULL) *(*strp)++ = ch; /* save character */ |
---|
4357 | if (ch == '[') { /* nested comment; find its end */ |
---|
4358 | if ((ch = treeFinishCom(fp, strp)) == EOF) break; |
---|
4359 | if (strp != NULL) *(*strp)++ = ch; /* save closing ] */ |
---|
4360 | } |
---|
4361 | } |
---|
4362 | |
---|
4363 | if (strp != NULL) **strp = '\0'; /* terminate string */ |
---|
4364 | return ch; |
---|
4365 | } /* treeFinishCom */ |
---|
4366 | |
---|
4367 | |
---|
4368 | int treeGetCh (FILE *fp) /* get next nonblank, noncomment character */ |
---|
4369 | { /* treeGetCh */ |
---|
4370 | int ch; |
---|
4371 | |
---|
4372 | while ((ch = getc(fp)) != EOF) { |
---|
4373 | if (whitechar(ch)) ; |
---|
4374 | else if (ch == '[') { /* comment; find its end */ |
---|
4375 | if ((ch = treeFinishCom(fp, (char **) NULL)) == EOF) break; |
---|
4376 | } |
---|
4377 | else break; |
---|
4378 | } |
---|
4379 | |
---|
4380 | return ch; |
---|
4381 | } /* treeGetCh */ |
---|
4382 | |
---|
4383 | |
---|
4384 | boolean treeLabelEnd (int ch) |
---|
4385 | { /* treeLabelEnd */ |
---|
4386 | switch (ch) { |
---|
4387 | case EOF: case '\0': case '\t': case '\n': case ' ': |
---|
4388 | case ':': case ',': case '(': case ')': case '[': |
---|
4389 | case ';': |
---|
4390 | return TRUE; |
---|
4391 | default: |
---|
4392 | break; |
---|
4393 | } |
---|
4394 | return FALSE; |
---|
4395 | } /* treeLabelEnd */ |
---|
4396 | |
---|
4397 | |
---|
4398 | boolean treeGetLabel (FILE *fp, char *lblPtr, int maxlen) |
---|
4399 | { /* treeGetLabel */ |
---|
4400 | int ch; |
---|
4401 | boolean done, quoted, lblfound; |
---|
4402 | |
---|
4403 | if (--maxlen < 0) lblPtr = (char *) NULL; /* reserves space for '\0' */ |
---|
4404 | else if (lblPtr == NULL) maxlen = 0; |
---|
4405 | |
---|
4406 | ch = getc(fp); |
---|
4407 | done = treeLabelEnd(ch); |
---|
4408 | |
---|
4409 | lblfound = ! done; |
---|
4410 | quoted = (ch == '\''); |
---|
4411 | if (quoted && ! done) {ch = getc(fp); done = (ch == EOF);} |
---|
4412 | |
---|
4413 | while (! done) { |
---|
4414 | if (quoted) { |
---|
4415 | if (ch == '\'') {ch = getc(fp); if (ch != '\'') break;} |
---|
4416 | } |
---|
4417 | |
---|
4418 | else if (treeLabelEnd(ch)) break; |
---|
4419 | |
---|
4420 | else if (ch == '_') ch = ' '; /* unquoted _ goes to space */ |
---|
4421 | |
---|
4422 | if (--maxlen >= 0) *lblPtr++ = ch; |
---|
4423 | ch = getc(fp); |
---|
4424 | if (ch == EOF) break; |
---|
4425 | } |
---|
4426 | |
---|
4427 | if (ch != EOF) (void) ungetc(ch, fp); |
---|
4428 | |
---|
4429 | if (lblPtr != NULL) *lblPtr = '\0'; |
---|
4430 | |
---|
4431 | return lblfound; |
---|
4432 | } /* treeGetLabel */ |
---|
4433 | |
---|
4434 | |
---|
4435 | boolean treeFlushLabel (FILE *fp) |
---|
4436 | { /* treeFlushLabel */ |
---|
4437 | return treeGetLabel(fp, (char *) NULL, (int) 0); |
---|
4438 | } /* treeFlushLabel */ |
---|
4439 | |
---|
4440 | |
---|
4441 | int treeFindTipByLabel (char *str, tree *tr) |
---|
4442 | /* str -- label string pointer */ |
---|
4443 | { /* treeFindTipByLabel */ |
---|
4444 | nodeptr q; |
---|
4445 | char *nameptr; |
---|
4446 | int ch, i, n; |
---|
4447 | boolean found; |
---|
4448 | |
---|
4449 | for (n = 1; n <= tr->mxtips; n++) { |
---|
4450 | q = tr->nodep[n]; |
---|
4451 | if (! (q->back)) { /* Only consider unused tips */ |
---|
4452 | i = 0; |
---|
4453 | nameptr = q->name; |
---|
4454 | while ((found = (str[i++] == (ch = *nameptr++))) && ch) ; |
---|
4455 | if (found) return n; |
---|
4456 | } |
---|
4457 | } |
---|
4458 | |
---|
4459 | printf("ERROR: Cannot find tree species: %s\n", str); |
---|
4460 | |
---|
4461 | return 0; |
---|
4462 | } /* treeFindTipByLabel */ |
---|
4463 | |
---|
4464 | |
---|
4465 | int treeFindTipName (FILE *fp, tree *tr) |
---|
4466 | { /* treeFindTipName */ |
---|
4467 | char *nameptr, str[nmlngth+2]; |
---|
4468 | int n; |
---|
4469 | |
---|
4470 | if (tr->prelabeled) { |
---|
4471 | if (treeGetLabel(fp, str, nmlngth+2)) |
---|
4472 | n = treeFindTipByLabel(str, tr); |
---|
4473 | else |
---|
4474 | n = 0; |
---|
4475 | } |
---|
4476 | |
---|
4477 | else if (tr->ntips < tr->mxtips) { |
---|
4478 | n = tr->ntips + 1; |
---|
4479 | nameptr = tr->nodep[n]->name; |
---|
4480 | if (! treeGetLabel(fp, nameptr, nmlngth+1)) n = 0; |
---|
4481 | } |
---|
4482 | |
---|
4483 | else { |
---|
4484 | n = 0; |
---|
4485 | } |
---|
4486 | |
---|
4487 | return n; |
---|
4488 | } /* treeFindTipName */ |
---|
4489 | |
---|
4490 | |
---|
4491 | void treeEchoContext (FILE *fp1, FILE *fp2, int n) |
---|
4492 | { /* treeEchoContext */ |
---|
4493 | int ch; |
---|
4494 | boolean waswhite; |
---|
4495 | |
---|
4496 | waswhite = TRUE; |
---|
4497 | |
---|
4498 | while (n > 0 && ((ch = getc(fp1)) != EOF)) { |
---|
4499 | if (whitechar(ch)) { |
---|
4500 | ch = waswhite ? '\0' : ' '; |
---|
4501 | waswhite = TRUE; |
---|
4502 | } |
---|
4503 | else { |
---|
4504 | waswhite = FALSE; |
---|
4505 | } |
---|
4506 | |
---|
4507 | if (ch > '\0') {putc(ch, fp2); n--;} |
---|
4508 | } |
---|
4509 | } /* treeEchoContext */ |
---|
4510 | |
---|
4511 | |
---|
4512 | boolean treeProcessLength (FILE *fp, double *dptr) |
---|
4513 | { /* treeProcessLength */ |
---|
4514 | int ch; |
---|
4515 | |
---|
4516 | if ((ch = treeGetCh(fp)) == EOF) return FALSE; /* Skip comments */ |
---|
4517 | (void) ungetc(ch, fp); |
---|
4518 | |
---|
4519 | if (fscanf(fp, "%lf", dptr) != 1) { |
---|
4520 | printf("ERROR: treeProcessLength: Problem reading branch length\n"); |
---|
4521 | treeEchoContext(fp, stdout, 40); |
---|
4522 | printf("\n"); |
---|
4523 | return FALSE; |
---|
4524 | } |
---|
4525 | |
---|
4526 | return TRUE; |
---|
4527 | } /* treeProcessLength */ |
---|
4528 | |
---|
4529 | |
---|
4530 | boolean treeFlushLen (FILE *fp) |
---|
4531 | { /* treeFlushLen */ |
---|
4532 | double dummy; |
---|
4533 | int ch; |
---|
4534 | |
---|
4535 | if ((ch = treeGetCh(fp)) == ':') return treeProcessLength(fp, & dummy); |
---|
4536 | |
---|
4537 | if (ch != EOF) (void) ungetc(ch, fp); |
---|
4538 | return TRUE; |
---|
4539 | } /* treeFlushLen */ |
---|
4540 | |
---|
4541 | |
---|
4542 | boolean treeNeedCh (FILE *fp, int c1, char *where) |
---|
4543 | { /* treeNeedCh */ |
---|
4544 | int c2; |
---|
4545 | |
---|
4546 | if ((c2 = treeGetCh(fp)) == c1) return TRUE; |
---|
4547 | |
---|
4548 | printf("ERROR: Expecting '%c' %s tree; found:", c1, where); |
---|
4549 | if (c2 == EOF) { |
---|
4550 | printf("End-of-File"); |
---|
4551 | } |
---|
4552 | else { |
---|
4553 | ungetc(c2, fp); |
---|
4554 | treeEchoContext(fp, stdout, 40); |
---|
4555 | } |
---|
4556 | putchar('\n'); |
---|
4557 | return FALSE; |
---|
4558 | } /* treeNeedCh */ |
---|
4559 | |
---|
4560 | |
---|
4561 | boolean addElementLen (FILE *fp, tree *tr, nodeptr p) |
---|
4562 | { /* addElementLen */ |
---|
4563 | double z, branch; |
---|
4564 | nodeptr q; |
---|
4565 | int n, ch; |
---|
4566 | |
---|
4567 | if ((ch = treeGetCh(fp)) == '(') { /* A new internal node */ |
---|
4568 | n = (tr->nextnode)++; |
---|
4569 | if (n > 2*(tr->mxtips) - 2) { |
---|
4570 | if (tr->rooted || n > 2*(tr->mxtips) - 1) { |
---|
4571 | printf("ERROR: Too many internal nodes. Is tree rooted?\n"); |
---|
4572 | printf(" Deepest splitting should be a trifurcation.\n"); |
---|
4573 | return FALSE; |
---|
4574 | } |
---|
4575 | else { |
---|
4576 | tr->rooted = TRUE; |
---|
4577 | } |
---|
4578 | } |
---|
4579 | q = tr->nodep[n]; |
---|
4580 | if (! addElementLen(fp, tr, q->next)) return FALSE; |
---|
4581 | if (! treeNeedCh(fp, ',', "in")) return FALSE; |
---|
4582 | if (! addElementLen(fp, tr, q->next->next)) return FALSE; |
---|
4583 | if (! treeNeedCh(fp, ')', "in")) return FALSE; |
---|
4584 | (void) treeFlushLabel(fp); |
---|
4585 | } |
---|
4586 | |
---|
4587 | else { /* A new tip */ |
---|
4588 | ungetc(ch, fp); |
---|
4589 | if ((n = treeFindTipName(fp, tr)) <= 0) return FALSE; |
---|
4590 | q = tr->nodep[n]; |
---|
4591 | if (tr->start->number > n) tr->start = q; |
---|
4592 | (tr->ntips)++; |
---|
4593 | } /* End of tip processing */ |
---|
4594 | |
---|
4595 | if (tr->userlen) { |
---|
4596 | if (! treeNeedCh(fp, ':', "in")) return FALSE; |
---|
4597 | if (! treeProcessLength(fp, & branch)) return FALSE; |
---|
4598 | z = exp(-branch / tr->rdta->fracchange); |
---|
4599 | if (z > zmax) z = zmax; |
---|
4600 | hookup(p, q, z); |
---|
4601 | } |
---|
4602 | else { |
---|
4603 | if (! treeFlushLen(fp)) return FALSE; |
---|
4604 | hookup(p, q, defaultz); |
---|
4605 | } |
---|
4606 | |
---|
4607 | return TRUE; |
---|
4608 | } /* addElementLen */ |
---|
4609 | |
---|
4610 | |
---|
4611 | int saveTreeCom (char **comstrp) |
---|
4612 | { /* saveTreeCom */ |
---|
4613 | int ch; |
---|
4614 | boolean inquote; |
---|
4615 | |
---|
4616 | inquote = FALSE; |
---|
4617 | while ((ch = getc(INFILE)) != EOF && (inquote || ch != ']')) { |
---|
4618 | *(*comstrp)++ = ch; /* save character */ |
---|
4619 | if (ch == '[' && ! inquote) { /* comment; find its end */ |
---|
4620 | if ((ch = saveTreeCom(comstrp)) == EOF) break; |
---|
4621 | *(*comstrp)++ = ch; /* add ] */ |
---|
4622 | } |
---|
4623 | else if (ch == '\'') inquote = ! inquote; /* start or end of quote */ |
---|
4624 | } |
---|
4625 | |
---|
4626 | return ch; |
---|
4627 | } /* saveTreeCom */ |
---|
4628 | |
---|
4629 | |
---|
4630 | boolean processTreeCom (FILE *fp, tree *tr) |
---|
4631 | { /* processTreeCom */ |
---|
4632 | int text_started, functor_read, com_open; |
---|
4633 | |
---|
4634 | /* Accept prefatory "phylip_tree(" or "pseudoNewick(" */ |
---|
4635 | |
---|
4636 | functor_read = text_started = 0; |
---|
4637 | (void) fscanf(fp, " p%nhylip_tree(%n", & text_started, & functor_read); |
---|
4638 | if (text_started && ! functor_read) { |
---|
4639 | (void) fscanf(fp, "seudoNewick(%n", & functor_read); |
---|
4640 | if (! functor_read) { |
---|
4641 | printf("Start of tree 'p...' not understood.\n"); |
---|
4642 | return FALSE; |
---|
4643 | } |
---|
4644 | } |
---|
4645 | |
---|
4646 | com_open = 0; |
---|
4647 | (void) fscanf(fp, " [%n", & com_open); |
---|
4648 | |
---|
4649 | if (com_open) { /* comment; read it */ |
---|
4650 | char com[1024], *com_end; |
---|
4651 | |
---|
4652 | com_end = com; |
---|
4653 | if (treeFinishCom(fp, & com_end) == EOF) { /* omits enclosing []s */ |
---|
4654 | printf("Missing end of tree comment\n"); |
---|
4655 | return FALSE; |
---|
4656 | } |
---|
4657 | |
---|
4658 | (void) readKeyValue(com, likelihood_key, "%lg", |
---|
4659 | (void *) &(tr->likelihood)); |
---|
4660 | (void) readKeyValue(com, opt_level_key, "%d", |
---|
4661 | (void *) &(tr->opt_level)); |
---|
4662 | (void) readKeyValue(com, smoothed_key, "%d", |
---|
4663 | (void *) &(tr->smoothed)); |
---|
4664 | |
---|
4665 | if (functor_read) (void) fscanf(fp, " ,"); /* remove trailing comma */ |
---|
4666 | } |
---|
4667 | |
---|
4668 | return (functor_read > 0); |
---|
4669 | } /* processTreeCom */ |
---|
4670 | |
---|
4671 | |
---|
4672 | nodeptr uprootTree (tree *tr, nodeptr p) |
---|
4673 | { /* uprootTree */ |
---|
4674 | nodeptr q, r, s, start; |
---|
4675 | int n; |
---|
4676 | |
---|
4677 | if (p->tip || p->back) { |
---|
4678 | printf("ERROR: Unable to uproot tree.\n"); |
---|
4679 | printf(" Inappropriate node marked for removal.\n"); |
---|
4680 | return (nodeptr) NULL; |
---|
4681 | } |
---|
4682 | |
---|
4683 | n = --(tr->nextnode); /* last internal node added */ |
---|
4684 | if (n != tr->mxtips + tr->ntips - 1) { |
---|
4685 | printf("ERROR: Unable to uproot tree. Inconsistent\n"); |
---|
4686 | printf(" number of tips and nodes for rooted tree.\n"); |
---|
4687 | return (nodeptr) NULL; |
---|
4688 | } |
---|
4689 | |
---|
4690 | q = p->next->back; /* remove p from tree */ |
---|
4691 | r = p->next->next->back; |
---|
4692 | hookup(q, r, tr->userlen ? (q->z * r->z) : defaultz); |
---|
4693 | |
---|
4694 | start = (r->tip || (! q->tip)) ? r : r->next->next->back; |
---|
4695 | |
---|
4696 | if (tr->ntips > 2 && p->number != n) { |
---|
4697 | q = tr->nodep[n]; /* transfer last node's conections to p */ |
---|
4698 | r = q->next; |
---|
4699 | s = q->next->next; |
---|
4700 | hookup(p, q->back, q->z); /* move connections to p */ |
---|
4701 | hookup(p->next, r->back, r->z); |
---|
4702 | hookup(p->next->next, s->back, s->z); |
---|
4703 | if (start->number == q->number) start = start->back->back; |
---|
4704 | q->back = r->back = s->back = (nodeptr) NULL; |
---|
4705 | } |
---|
4706 | else { |
---|
4707 | p->back = p->next->back = p->next->next->back = (nodeptr) NULL; |
---|
4708 | } |
---|
4709 | |
---|
4710 | tr->rooted = FALSE; |
---|
4711 | return start; |
---|
4712 | } /* uprootTree */ |
---|
4713 | |
---|
4714 | |
---|
4715 | boolean treeReadLen (FILE *fp, tree *tr) |
---|
4716 | { /* treeReadLen */ |
---|
4717 | nodeptr p; |
---|
4718 | int i, ch; |
---|
4719 | boolean is_fact; |
---|
4720 | |
---|
4721 | for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL; |
---|
4722 | tr->start = tr->nodep[tr->mxtips]; |
---|
4723 | tr->ntips = 0; |
---|
4724 | tr->nextnode = tr->mxtips + 1; |
---|
4725 | tr->opt_level = 0; |
---|
4726 | tr->log_f_valid = 0; |
---|
4727 | tr->smoothed = FALSE; |
---|
4728 | tr->rooted = FALSE; |
---|
4729 | |
---|
4730 | is_fact = processTreeCom(fp, tr); |
---|
4731 | |
---|
4732 | p = tr->nodep[(tr->nextnode)++]; |
---|
4733 | if (! treeNeedCh(fp, '(', "at start of")) return FALSE; |
---|
4734 | if (! addElementLen(fp, tr, p)) return FALSE; |
---|
4735 | if (! treeNeedCh(fp, ',', "in")) return FALSE; |
---|
4736 | if (! addElementLen(fp, tr, p->next)) return FALSE; |
---|
4737 | if (! tr->rooted) { |
---|
4738 | if ((ch = treeGetCh(fp)) == ',') { /* An unrooted format */ |
---|
4739 | if (! addElementLen(fp, tr, p->next->next)) return FALSE; |
---|
4740 | } |
---|
4741 | else { /* A rooted format */ |
---|
4742 | tr->rooted = TRUE; |
---|
4743 | if (ch != EOF) (void) ungetc(ch, fp); |
---|
4744 | } |
---|
4745 | } |
---|
4746 | else { |
---|
4747 | p->next->next->back = (nodeptr) NULL; |
---|
4748 | } |
---|
4749 | if (! treeNeedCh(fp, ')', "in")) return FALSE; |
---|
4750 | (void) treeFlushLabel(fp); |
---|
4751 | if (! treeFlushLen(fp)) return FALSE; |
---|
4752 | if (is_fact) { |
---|
4753 | if (! treeNeedCh(fp, ')', "at end of")) return FALSE; |
---|
4754 | if (! treeNeedCh(fp, '.', "at end of")) return FALSE; |
---|
4755 | } |
---|
4756 | else { |
---|
4757 | if (! treeNeedCh(fp, ';', "at end of")) return FALSE; |
---|
4758 | } |
---|
4759 | |
---|
4760 | if (tr->rooted) { |
---|
4761 | p->next->next->back = (nodeptr) NULL; |
---|
4762 | tr->start = uprootTree(tr, p->next->next); |
---|
4763 | if (! tr->start) return FALSE; |
---|
4764 | } |
---|
4765 | else { |
---|
4766 | tr->start = p->next->next->back; /* This is start used by treeString */ |
---|
4767 | } |
---|
4768 | |
---|
4769 | return (initrav(tr, tr->start) && initrav(tr, tr->start->back)); |
---|
4770 | } /* treeReadLen */ |
---|
4771 | |
---|
4772 | |
---|
4773 | /*=======================================================================*/ |
---|
4774 | /* Read a tree from a string */ |
---|
4775 | /*=======================================================================*/ |
---|
4776 | |
---|
4777 | |
---|
4778 | #if Master || Slave |
---|
4779 | int str_treeFinishCom (char **treestrp, char **strp) |
---|
4780 | /* treestrp -- tree string pointer */ |
---|
4781 | /* strp -- comment string pointer */ |
---|
4782 | { /* str_treeFinishCom */ |
---|
4783 | int ch; |
---|
4784 | |
---|
4785 | while ((ch = *(*treestrp)++) != NULL && ch != ']') { |
---|
4786 | if (strp != NULL) *(*strp)++ = ch; /* save character */ |
---|
4787 | if (ch == '[') { /* nested comment; find its end */ |
---|
4788 | if ((ch = str_treeFinishCom(treestrp)) == NULL) break; |
---|
4789 | if (strp != NULL) *(*strp)++ = ch; /* save closing ] */ |
---|
4790 | } |
---|
4791 | } |
---|
4792 | if (strp != NULL) **strp = '\0'; /* terminate string */ |
---|
4793 | return ch; |
---|
4794 | } /* str_treeFinishCom */ |
---|
4795 | |
---|
4796 | |
---|
4797 | int str_treeGetCh (char **treestrp) |
---|
4798 | /* get next nonblank, noncomment character */ |
---|
4799 | { /* str_treeGetCh */ |
---|
4800 | int ch; |
---|
4801 | |
---|
4802 | while ((ch = *(*treestrp)++) != NULL) { |
---|
4803 | if (whitechar(ch)) ; |
---|
4804 | else if (ch == '[') { /* comment; find its end */ |
---|
4805 | if ((ch = str_treeFinishCom(treestrp, (char *) NULL)) == NULL) break; |
---|
4806 | } |
---|
4807 | else break; |
---|
4808 | } |
---|
4809 | |
---|
4810 | return ch; |
---|
4811 | } /* str_treeGetCh */ |
---|
4812 | |
---|
4813 | |
---|
4814 | boolean str_treeGetLabel (char **treestrp, char *lblPtr, int maxlen) |
---|
4815 | { /* str_treeGetLabel */ |
---|
4816 | int ch; |
---|
4817 | boolean done, quoted, lblfound; |
---|
4818 | |
---|
4819 | if (--maxlen < 0) lblPtr = (char *) NULL; /* reserves space for '\0' */ |
---|
4820 | else if (lblPtr == NULL) maxlen = 0; |
---|
4821 | |
---|
4822 | ch = *(*treestrp)++; |
---|
4823 | done = treeLabelEnd(ch); |
---|
4824 | |
---|
4825 | lblfound = ! done; |
---|
4826 | quoted = (ch == '\''); |
---|
4827 | if (quoted && ! done) {ch = *(*treestrp)++; done = (ch == '\0');} |
---|
4828 | |
---|
4829 | while (! done) { |
---|
4830 | if (quoted) { |
---|
4831 | if (ch == '\'') {ch = *(*treestrp)++; if (ch != '\'') break;} |
---|
4832 | } |
---|
4833 | |
---|
4834 | else if (treeLabelEnd(ch)) break; |
---|
4835 | |
---|
4836 | else if (ch == '_') ch = ' '; /* unquoted _ goes to space */ |
---|
4837 | |
---|
4838 | if (--maxlen >= 0) *lblPtr++ = ch; |
---|
4839 | ch = *(*treestrp)++; |
---|
4840 | if (ch == '\0') break; |
---|
4841 | } |
---|
4842 | |
---|
4843 | (*treestrp)--; |
---|
4844 | |
---|
4845 | if (lblPtr != NULL) *lblPtr = '\0'; |
---|
4846 | |
---|
4847 | return lblfound; |
---|
4848 | } /* str_treeGetLabel */ |
---|
4849 | |
---|
4850 | |
---|
4851 | boolean str_treeFlushLabel (char **treestrp) |
---|
4852 | { /* str_treeFlushLabel */ |
---|
4853 | return str_treeGetLabel(treestrp, (char *) NULL, (int) 0); |
---|
4854 | } /* str_treeFlushLabel */ |
---|
4855 | |
---|
4856 | |
---|
4857 | int str_treeFindTipName (char **treestrp, tree *tr) |
---|
4858 | { /* str_treeFindTipName */ |
---|
4859 | nodeptr q; |
---|
4860 | char *nameptr, str[nmlngth+2]; |
---|
4861 | int ch, i, n; |
---|
4862 | |
---|
4863 | if (tr->prelabeled) { |
---|
4864 | if (str_treeGetLabel(treestrp, str, nmlngth+2)) |
---|
4865 | n = treeFindTipByLabel(str, tr); |
---|
4866 | else |
---|
4867 | n = 0; |
---|
4868 | } |
---|
4869 | |
---|
4870 | else if (tr->ntips < tr->mxtips) { |
---|
4871 | n = tr->ntips + 1; |
---|
4872 | nameptr = tr->nodep[n]->name; |
---|
4873 | if (! str_treeGetLabel(treestrp, nameptr, nmlngth+1)) n = 0; |
---|
4874 | } |
---|
4875 | |
---|
4876 | else { |
---|
4877 | n = 0; |
---|
4878 | } |
---|
4879 | |
---|
4880 | return n; |
---|
4881 | } /* str_treeFindTipName */ |
---|
4882 | |
---|
4883 | |
---|
4884 | boolean str_treeProcessLength (char **treestrp, double *dptr) |
---|
4885 | { /* str_treeProcessLength */ |
---|
4886 | int used; |
---|
4887 | |
---|
4888 | if(! str_treeGetCh(treestrp)) return FALSE; /* Skip comments */ |
---|
4889 | (*treestrp)--; |
---|
4890 | |
---|
4891 | if (sscanf(*treestrp, "%lf%n", dptr, & used) != 1) { |
---|
4892 | printf("ERROR: str_treeProcessLength: Problem reading branch length\n"); |
---|
4893 | printf("%40s\n", *treestrp); |
---|
4894 | *dptr = 0.0; |
---|
4895 | return FALSE; |
---|
4896 | } |
---|
4897 | else { |
---|
4898 | *treestrp += used; |
---|
4899 | } |
---|
4900 | |
---|
4901 | return TRUE; |
---|
4902 | } /* str_treeProcessLength */ |
---|
4903 | |
---|
4904 | |
---|
4905 | boolean str_treeFlushLen (char **treestrp) |
---|
4906 | { /* str_treeFlushLen */ |
---|
4907 | int ch; |
---|
4908 | |
---|
4909 | if ((ch = str_treeGetCh(treestrp)) == ':') |
---|
4910 | return str_treeProcessLength(treestrp, (double *) NULL); |
---|
4911 | else { |
---|
4912 | (*treestrp)--; |
---|
4913 | return TRUE; |
---|
4914 | } |
---|
4915 | } /* str_treeFlushLen */ |
---|
4916 | |
---|
4917 | |
---|
4918 | boolean str_treeNeedCh (char **treestrp, int c1, char *where) |
---|
4919 | { /* str_treeNeedCh */ |
---|
4920 | int c2, i; |
---|
4921 | |
---|
4922 | if ((c2 = str_treeGetCh(treestrp)) == c1) return TRUE; |
---|
4923 | |
---|
4924 | printf("ERROR: Missing '%c' %s tree; ", c1, where); |
---|
4925 | if (c2 == '\0') |
---|
4926 | printf("end-of-string"); |
---|
4927 | else { |
---|
4928 | putchar('"'); |
---|
4929 | for (i = 24; i-- && (c2 != '\0'); c2 = *(*treestrp)++) putchar(c2); |
---|
4930 | putchar('"'); |
---|
4931 | } |
---|
4932 | |
---|
4933 | printf(" found instead\n"); |
---|
4934 | return FALSE; |
---|
4935 | } /* str_treeNeedCh */ |
---|
4936 | |
---|
4937 | |
---|
4938 | boolean str_addElementLen (char **treestrp, tree *tr, nodeptr p) |
---|
4939 | { /* str_addElementLen */ |
---|
4940 | double z, branch; |
---|
4941 | nodeptr q; |
---|
4942 | int n, ch; |
---|
4943 | |
---|
4944 | if ((ch = str_treeGetCh(treestrp)) == '(') { /* A new internal node */ |
---|
4945 | n = (tr->nextnode)++; |
---|
4946 | if (n > 2*(tr->mxtips) - 2) { |
---|
4947 | if (tr->rooted || n > 2*(tr->mxtips) - 1) { |
---|
4948 | printf("ERROR: too many internal nodes. Is tree rooted?\n"); |
---|
4949 | printf("Deepest splitting should be a trifurcation.\n"); |
---|
4950 | return FALSE; |
---|
4951 | } |
---|
4952 | else { |
---|
4953 | tr->rooted = TRUE; |
---|
4954 | } |
---|
4955 | } |
---|
4956 | q = tr->nodep[n]; |
---|
4957 | if (! str_addElementLen(treestrp, tr, q->next)) return FALSE; |
---|
4958 | if (! str_treeNeedCh(treestrp, ',', "in")) return FALSE; |
---|
4959 | if (! str_addElementLen(treestrp, tr, q->next->next)) return FALSE; |
---|
4960 | if (! str_treeNeedCh(treestrp, ')', "in")) return FALSE; |
---|
4961 | if (! str_treeFlushLabel(treestrp)) return FALSE; |
---|
4962 | } |
---|
4963 | |
---|
4964 | else { /* A new tip */ |
---|
4965 | n = str_treeFindTipName(treestrp, tr, ch); |
---|
4966 | if (n <= 0) return FALSE; |
---|
4967 | q = tr->nodep[n]; |
---|
4968 | if (tr->start->number > n) tr->start = q; |
---|
4969 | (tr->ntips)++; |
---|
4970 | } /* End of tip processing */ |
---|
4971 | |
---|
4972 | /* Master and Slave always use lengths */ |
---|
4973 | |
---|
4974 | if (! str_treeNeedCh(treestrp, ':', "in")) return FALSE; |
---|
4975 | if (! str_treeProcessLength(treestrp, & branch)) return FALSE; |
---|
4976 | z = exp(-branch / tr->rdta->fracchange); |
---|
4977 | if (z > zmax) z = zmax; |
---|
4978 | hookup(p, q, z); |
---|
4979 | |
---|
4980 | return TRUE; |
---|
4981 | } /* str_addElementLen */ |
---|
4982 | |
---|
4983 | |
---|
4984 | boolean str_processTreeCom(tree *tr, char **treestrp) |
---|
4985 | { /* str_processTreeCom */ |
---|
4986 | char *com, *com_end; |
---|
4987 | int text_started, functor_read, com_open; |
---|
4988 | |
---|
4989 | com = *treestrp; |
---|
4990 | |
---|
4991 | functor_read = text_started = 0; |
---|
4992 | sscanf(com, " p%nhylip_tree(%n", & text_started, & functor_read); |
---|
4993 | if (functor_read) { |
---|
4994 | com += functor_read; |
---|
4995 | } |
---|
4996 | else if (text_started) { |
---|
4997 | com += text_started; |
---|
4998 | sscanf(com, "seudoNewick(%n", & functor_read); |
---|
4999 | if (! functor_read) { |
---|
5000 | printf("Start of tree 'p...' not understood.\n"); |
---|
5001 | return FALSE; |
---|
5002 | } |
---|
5003 | else { |
---|
5004 | com += functor_read; |
---|
5005 | } |
---|
5006 | } |
---|
5007 | |
---|
5008 | com_open = 0; |
---|
5009 | sscanf(com, " [%n", & com_open); |
---|
5010 | com += com_open; |
---|
5011 | |
---|
5012 | if (com_open) { /* comment; read it */ |
---|
5013 | if (!(com_end = strchr(com, ']'))) { |
---|
5014 | printf("Missing end of tree comment.\n"); |
---|
5015 | return FALSE; |
---|
5016 | } |
---|
5017 | |
---|
5018 | *com_end = 0; |
---|
5019 | (void) readKeyValue(com, likelihood_key, "%lg", |
---|
5020 | (void *) &(tr->likelihood)); |
---|
5021 | (void) readKeyValue(com, opt_level_key, "%d", |
---|
5022 | (void *) &(tr->opt_level)); |
---|
5023 | (void) readKeyValue(com, smoothed_key, "%d", |
---|
5024 | (void *) &(tr->smoothed)); |
---|
5025 | *com_end = ']'; |
---|
5026 | com_end++; |
---|
5027 | |
---|
5028 | if (functor_read) { /* remove trailing comma */ |
---|
5029 | text_started = 0; |
---|
5030 | sscanf(com_end, " ,%n", & text_started); |
---|
5031 | com_end += text_started; |
---|
5032 | } |
---|
5033 | |
---|
5034 | *treestrp = com_end; |
---|
5035 | } |
---|
5036 | |
---|
5037 | return (functor_read > 0); |
---|
5038 | } /* str_processTreeCom */ |
---|
5039 | |
---|
5040 | |
---|
5041 | boolean str_treeReadLen (char *treestr, tree *tr) |
---|
5042 | /* read string with representation of tree */ |
---|
5043 | { /* str_treeReadLen */ |
---|
5044 | nodeptr p; |
---|
5045 | int i; |
---|
5046 | boolean is_fact, found; |
---|
5047 | |
---|
5048 | for (i = 1; i <= tr->mxtips; i++) tr->nodep[i]->back = (node *) NULL; |
---|
5049 | tr->start = tr->nodep[tr->mxtips]; |
---|
5050 | tr->ntips = 0; |
---|
5051 | tr->nextnode = tr->mxtips + 1; |
---|
5052 | tr->opt_level = 0; |
---|
5053 | tr->log_f_valid = 0; |
---|
5054 | tr->smoothed = Master; |
---|
5055 | tr->rooted = FALSE; |
---|
5056 | |
---|
5057 | is_fact = str_processTreeCom(tr, & treestr); |
---|
5058 | |
---|
5059 | p = tr->nodep[(tr->nextnode)++]; |
---|
5060 | if (! str_treeNeedCh(& treestr, '(', "at start of")) return FALSE; |
---|
5061 | if (! str_addElementLen(& treestr, tr, p)) return FALSE; |
---|
5062 | if (! str_treeNeedCh(& treestr, ',', "in")) return FALSE; |
---|
5063 | if (! str_addElementLen(& treestr, tr, p->next)) return FALSE; |
---|
5064 | if (! tr->rooted) { |
---|
5065 | if (str_treeGetCh(& treestr) == ',') { /* An unrooted format */ |
---|
5066 | if (! str_addElementLen(& treestr, tr, p->next->next)) return FALSE; |
---|
5067 | } |
---|
5068 | else { /* A rooted format */ |
---|
5069 | p->next->next->back = (nodeptr) NULL; |
---|
5070 | tr->rooted = TRUE; |
---|
5071 | treestr--; |
---|
5072 | } |
---|
5073 | } |
---|
5074 | if (! str_treeNeedCh(& treestr, ')', "in")) return FALSE; |
---|
5075 | if (! str_treeFlushLabel(& treestr)) return FALSE; |
---|
5076 | if (! str_treeFlushLen(& treestr)) return FALSE; |
---|
5077 | if (is_fact) { |
---|
5078 | if (! str_treeNeedCh(& treestr, ')', "at end of")) return FALSE; |
---|
5079 | if (! str_treeNeedCh(& treestr, '.', "at end of")) return FALSE; |
---|
5080 | } |
---|
5081 | else { |
---|
5082 | if (! str_treeNeedCh(& treestr, ';', "at end of")) return FALSE; |
---|
5083 | } |
---|
5084 | |
---|
5085 | if (tr->rooted) if (! uprootTree(tr, p->next->next)) return FALSE; |
---|
5086 | tr->start = p->next->next->back; /* This is start used by treeString */ |
---|
5087 | |
---|
5088 | return (initrav(tr, tr->start) && initrav(tr, tr->start->back)); |
---|
5089 | } /* str_treeReadLen */ |
---|
5090 | #endif |
---|
5091 | |
---|
5092 | |
---|
5093 | boolean treeEvaluate (tree *tr, bestlist *bt) /* Evaluate a user tree */ |
---|
5094 | { /* treeEvaluate */ |
---|
5095 | |
---|
5096 | if (Slave || ! tr->userlen) { |
---|
5097 | if (! smoothTree(tr, 4 * smoothings)) return FALSE; |
---|
5098 | } |
---|
5099 | |
---|
5100 | if (evaluate(tr, tr->start) == badEval) return FALSE; |
---|
5101 | |
---|
5102 | # if ! Slave |
---|
5103 | (void) saveBestTree(bt, tr); |
---|
5104 | # endif |
---|
5105 | return TRUE; |
---|
5106 | } /* treeEvaluate */ |
---|
5107 | |
---|
5108 | |
---|
5109 | #if Master || Slave |
---|
5110 | FILE *freopen_pid (char *filenm, char *mode, FILE *stream) |
---|
5111 | { /* freopen_pid */ |
---|
5112 | char scr[512]; |
---|
5113 | |
---|
5114 | (void) sprintf(scr, "%s.%d", filenm, getpid()); |
---|
5115 | return freopen(scr, mode, stream); |
---|
5116 | } /* freopen_pid */ |
---|
5117 | #endif |
---|
5118 | |
---|
5119 | |
---|
5120 | boolean showBestTrees (bestlist *bt, tree *tr, analdef *adef, FILE *treefile) |
---|
5121 | { /* showBestTrees */ |
---|
5122 | int rank; |
---|
5123 | |
---|
5124 | for (rank = 1; rank <= bt->nvalid; rank++) { |
---|
5125 | if (rank > 1) { |
---|
5126 | if (rank != recallBestTree(bt, rank, tr)) break; |
---|
5127 | } |
---|
5128 | if (evaluate(tr, tr->start) == badEval) return FALSE; |
---|
5129 | if (tr->outgrnode->back) tr->start = tr->outgrnode; |
---|
5130 | printTree(tr, adef); |
---|
5131 | summarize(tr); |
---|
5132 | if (treefile) treeOut(treefile, tr, adef->trout); |
---|
5133 | } |
---|
5134 | |
---|
5135 | return TRUE; |
---|
5136 | } /* showBestTrees */ |
---|
5137 | |
---|
5138 | |
---|
5139 | boolean cmpBestTrees (bestlist *bt, tree *tr) |
---|
5140 | { /* cmpBestTrees */ |
---|
5141 | double sum, sum2, sd, temp, wtemp, bestscore; |
---|
5142 | double *log_f0, *log_f0_ptr; /* Save a copy of best log_f */ |
---|
5143 | double *log_f_ptr; |
---|
5144 | int i, j, num, besttips; |
---|
5145 | |
---|
5146 | num = bt->nvalid; |
---|
5147 | if ((num <= 1) || (tr->cdta->wgtsum <= 1)) return TRUE; |
---|
5148 | |
---|
5149 | if (! (log_f0 = (double *) Malloc(sizeof(double) * tr->cdta->endsite))) { |
---|
5150 | printf("ERROR: cmpBestTrees unable to obtain space for log_f0\n"); |
---|
5151 | return FALSE; |
---|
5152 | } |
---|
5153 | |
---|
5154 | printf("Tree Ln L Diff Ln L Its S.D."); |
---|
5155 | printf(" Significantly worse?\n\n"); |
---|
5156 | |
---|
5157 | for (i = 1; i <= num; i++) { |
---|
5158 | if (i != recallBestTree(bt, i, tr)) break; |
---|
5159 | if (! (tr->log_f_valid)) { |
---|
5160 | if (evaluate(tr, tr->start) == badEval) return FALSE; |
---|
5161 | } |
---|
5162 | |
---|
5163 | printf("%3d%14.5f", i, tr->likelihood); |
---|
5164 | if (i == 1) { |
---|
5165 | printf(" <------ best\n"); |
---|
5166 | besttips = tr->ntips; |
---|
5167 | bestscore = tr->likelihood; |
---|
5168 | log_f0_ptr = log_f0; |
---|
5169 | log_f_ptr = tr->log_f; |
---|
5170 | for (j = 0; j < tr->cdta->endsite; j++) *log_f0_ptr++ = *log_f_ptr++; |
---|
5171 | } |
---|
5172 | else if (tr->ntips != besttips) |
---|
5173 | printf(" (different number of species)\n"); |
---|
5174 | else { |
---|
5175 | sum = sum2 = 0.0; |
---|
5176 | log_f0_ptr = log_f0; |
---|
5177 | log_f_ptr = tr->log_f; |
---|
5178 | for (j = 0; j < tr->cdta->endsite; j++) { |
---|
5179 | temp = *log_f0_ptr++ - *log_f_ptr++; |
---|
5180 | wtemp = tr->cdta->aliaswgt[j] * temp; |
---|
5181 | sum += wtemp; |
---|
5182 | sum2 += wtemp * temp; |
---|
5183 | } |
---|
5184 | sd = sqrt( tr->cdta->wgtsum * (sum2 - sum*sum / tr->cdta->wgtsum) |
---|
5185 | / (tr->cdta->wgtsum - 1) ); |
---|
5186 | printf("%14.5f%14.4f", tr->likelihood - bestscore, sd); |
---|
5187 | printf(" %s\n", (sum > 1.95996 * sd) ? "Yes" : " No"); |
---|
5188 | } |
---|
5189 | } |
---|
5190 | |
---|
5191 | Free(log_f0); |
---|
5192 | printf("\n\n"); |
---|
5193 | |
---|
5194 | return TRUE; |
---|
5195 | } /* cmpBestTrees */ |
---|
5196 | |
---|
5197 | |
---|
5198 | boolean makeUserTree (tree *tr, bestlist *bt, analdef *adef) |
---|
5199 | { /* makeUserTree */ |
---|
5200 | char filename[128]; |
---|
5201 | FILE *treefile; |
---|
5202 | int nusertrees, which; |
---|
5203 | |
---|
5204 | nusertrees = adef->numutrees; |
---|
5205 | |
---|
5206 | printf("User-defined %s:\n\n", (nusertrees == 1) ? "tree" : "trees"); |
---|
5207 | |
---|
5208 | treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL; |
---|
5209 | |
---|
5210 | for (which = 1; which <= nusertrees; which++) { |
---|
5211 | if (! treeReadLen(INFILE, tr)) return FALSE; |
---|
5212 | if (! treeEvaluate(tr, bt)) return FALSE; |
---|
5213 | if (tr->global <= 0) { |
---|
5214 | if (tr->outgrnode->back) tr->start = tr->outgrnode; |
---|
5215 | printTree(tr, adef); |
---|
5216 | summarize(tr); |
---|
5217 | if (treefile) treeOut(treefile, tr, adef->trout); |
---|
5218 | } |
---|
5219 | else { |
---|
5220 | printf("%6d: Ln Likelihood =%14.5f\n", which, tr->likelihood); |
---|
5221 | } |
---|
5222 | } |
---|
5223 | |
---|
5224 | if (tr->global > 0) { |
---|
5225 | putchar('\n'); |
---|
5226 | if (! recallBestTree(bt, 1, tr)) return FALSE; |
---|
5227 | printf(" Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
5228 | if (! optimize(tr, tr->global, bt)) return FALSE; |
---|
5229 | if (tr->outgrnode->back) tr->start = tr->outgrnode; |
---|
5230 | printTree(tr, adef); |
---|
5231 | summarize(tr); |
---|
5232 | if (treefile) treeOut(treefile, tr, adef->trout); |
---|
5233 | } |
---|
5234 | |
---|
5235 | if (treefile) { |
---|
5236 | (void) fclose(treefile); |
---|
5237 | printf("Tree also written to %s\n", filename); |
---|
5238 | } |
---|
5239 | |
---|
5240 | putchar('\n'); |
---|
5241 | |
---|
5242 | (void) cmpBestTrees(bt, tr); |
---|
5243 | return TRUE; |
---|
5244 | } /* makeUserTree */ |
---|
5245 | |
---|
5246 | |
---|
5247 | #if Slave |
---|
5248 | boolean slaveTreeEvaluate (tree *tr, bestlist *bt) |
---|
5249 | { /* slaveTreeEvaluate */ |
---|
5250 | boolean done; |
---|
5251 | |
---|
5252 | do { |
---|
5253 | requestForWork(); |
---|
5254 | if (! receiveTree(& comm_master, tr)) return FALSE; |
---|
5255 | done = tr->likelihood > 0.0; |
---|
5256 | if (! done) { |
---|
5257 | if (! treeEvaluate(tr, bt)) return FALSE; |
---|
5258 | if (! sendTree(& comm_master, tr)) return FALSE; |
---|
5259 | } |
---|
5260 | } while (! done); |
---|
5261 | |
---|
5262 | return TRUE; |
---|
5263 | } /* slaveTreeEvaluate */ |
---|
5264 | #endif |
---|
5265 | |
---|
5266 | |
---|
5267 | double randum (long *seed) |
---|
5268 | /* random number generator, modified to use 12 bit chunks */ |
---|
5269 | { /* randum */ |
---|
5270 | long sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2; |
---|
5271 | |
---|
5272 | mult0 = 1549; |
---|
5273 | seed0 = *seed & 4095; |
---|
5274 | sum = mult0 * seed0; |
---|
5275 | newseed0 = sum & 4095; |
---|
5276 | sum >>= 12; |
---|
5277 | seed1 = (*seed >> 12) & 4095; |
---|
5278 | mult1 = 406; |
---|
5279 | sum += mult0 * seed1 + mult1 * seed0; |
---|
5280 | newseed1 = sum & 4095; |
---|
5281 | sum >>= 12; |
---|
5282 | seed2 = (*seed >> 24) & 255; |
---|
5283 | sum += mult0 * seed2 + mult1 * seed1; |
---|
5284 | newseed2 = sum & 255; |
---|
5285 | |
---|
5286 | *seed = newseed2 << 24 | newseed1 << 12 | newseed0; |
---|
5287 | return 0.00390625 * (newseed2 |
---|
5288 | + 0.000244140625 * (newseed1 |
---|
5289 | + 0.000244140625 * newseed0)); |
---|
5290 | } /* randum */ |
---|
5291 | |
---|
5292 | |
---|
5293 | boolean makeDenovoTree (tree *tr, bestlist *bt, analdef *adef) |
---|
5294 | { /* makeDenovoTree */ |
---|
5295 | char filename[128]; |
---|
5296 | FILE *treefile; |
---|
5297 | nodeptr p; |
---|
5298 | int *enterorder; /* random entry order */ |
---|
5299 | int i, j, k, nextsp, newsp, maxtrav, tested; |
---|
5300 | |
---|
5301 | double randum(); |
---|
5302 | |
---|
5303 | |
---|
5304 | enterorder = (int *) Malloc(sizeof(int) * (tr->mxtips + 1)); |
---|
5305 | if (! enterorder) { |
---|
5306 | fprintf(stderr, "makeDenovoTree: Malloc failure for enterorder\n"); |
---|
5307 | return 0; |
---|
5308 | } |
---|
5309 | |
---|
5310 | if (adef->restart) { |
---|
5311 | printf("Restarting from tree with the following sequences:\n"); |
---|
5312 | tr->userlen = TRUE; |
---|
5313 | if (! treeReadLen(INFILE, tr)) return FALSE; |
---|
5314 | if (! smoothTree(tr, smoothings)) return FALSE; |
---|
5315 | if (evaluate(tr, tr->start) == badEval) return FALSE; |
---|
5316 | if (saveBestTree(bt, tr) < 1) return FALSE; |
---|
5317 | |
---|
5318 | for (i = 1, j = tr->ntips; i <= tr->mxtips; i++) { /* find loose tips */ |
---|
5319 | if (! tr->nodep[i]->back) { |
---|
5320 | enterorder[++j] = i; |
---|
5321 | } |
---|
5322 | else { |
---|
5323 | printf(" %s\n", tr->nodep[i]->name); |
---|
5324 | |
---|
5325 | # if Master |
---|
5326 | if (i>3) REPORT_ADD_SPECS; |
---|
5327 | # endif |
---|
5328 | } |
---|
5329 | } |
---|
5330 | putchar('\n'); |
---|
5331 | } |
---|
5332 | |
---|
5333 | else { /* start from scratch */ |
---|
5334 | tr->ntips = 0; |
---|
5335 | for (i = 1; i <= tr->mxtips; i++) enterorder[i] = i; |
---|
5336 | } |
---|
5337 | |
---|
5338 | if (adef->jumble) for (i = tr->ntips + 1; i <= tr->mxtips; i++) { |
---|
5339 | j = randum(&(adef->jumble))*(tr->mxtips - tr->ntips) + tr->ntips + 1; |
---|
5340 | k = enterorder[j]; |
---|
5341 | enterorder[j] = enterorder[i]; |
---|
5342 | enterorder[i] = k; |
---|
5343 | } |
---|
5344 | |
---|
5345 | bt->numtrees = 1; |
---|
5346 | if (tr->ntips < tr->mxtips) printf("Adding species:\n"); |
---|
5347 | |
---|
5348 | if (tr->ntips == 0) { |
---|
5349 | for (i = 1; i <= 3; i++) { |
---|
5350 | printf(" %s\n", tr->nodep[enterorder[i]]->name); |
---|
5351 | } |
---|
5352 | tr->nextnode = tr->mxtips + 1; |
---|
5353 | if (! buildSimpleTree(tr, enterorder[1], enterorder[2], enterorder[3])) |
---|
5354 | return FALSE; |
---|
5355 | } |
---|
5356 | |
---|
5357 | while (tr->ntips < tr->mxtips || tr->opt_level < tr->global) { |
---|
5358 | maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap; |
---|
5359 | if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; |
---|
5360 | |
---|
5361 | if (tr->opt_level >= maxtrav) { |
---|
5362 | nextsp = ++(tr->ntips); |
---|
5363 | newsp = enterorder[nextsp]; |
---|
5364 | p = tr->nodep[newsp]; |
---|
5365 | printf(" %s\n", p->name); |
---|
5366 | |
---|
5367 | # if Master |
---|
5368 | if (nextsp % DNAML_STEP_TIME_COUNT == 1) { |
---|
5369 | REPORT_STEP_TIME; |
---|
5370 | } |
---|
5371 | REPORT_ADD_SPECS; |
---|
5372 | # endif |
---|
5373 | |
---|
5374 | (void) buildNewTip(tr, p); |
---|
5375 | |
---|
5376 | resetBestTree(bt); |
---|
5377 | cacheZ(tr); |
---|
5378 | tested = addTraverse(tr, p->back, findAnyTip(tr->start)->back, |
---|
5379 | 1, tr->ntips - 2, bt, adef->qadd); |
---|
5380 | if (tested == badRear) return FALSE; |
---|
5381 | bt->numtrees += tested; |
---|
5382 | |
---|
5383 | # if Master |
---|
5384 | getReturnedTrees(tr, bt, tested); |
---|
5385 | # endif |
---|
5386 | |
---|
5387 | printf(" Tested %d alternative trees\n", tested); |
---|
5388 | |
---|
5389 | |
---|
5390 | |
---|
5391 | (void) recallBestTree(bt, 1, tr); |
---|
5392 | if (! tr->smoothed) { |
---|
5393 | if (! smoothTree(tr, smoothings)) return FALSE; |
---|
5394 | if (evaluate(tr, tr->start) == badEval) return FALSE; |
---|
5395 | (void) saveBestTree(bt, tr); |
---|
5396 | } |
---|
5397 | |
---|
5398 | if (tr->ntips == 4) tr->opt_level = 1; /* All 4 taxon trees done */ |
---|
5399 | maxtrav = (tr->ntips == tr->mxtips) ? tr->global : tr->partswap; |
---|
5400 | if (maxtrav > tr->ntips - 3) maxtrav = tr->ntips - 3; |
---|
5401 | } |
---|
5402 | |
---|
5403 | printf(" Ln Likelihood =%14.5f\n", tr->likelihood); |
---|
5404 | if (! optimize(tr, maxtrav, bt)) return FALSE; |
---|
5405 | } |
---|
5406 | |
---|
5407 | printf("\nExamined %d %s\n", bt->numtrees, |
---|
5408 | bt->numtrees != 1 ? "trees" : "tree"); |
---|
5409 | |
---|
5410 | treefile = adef->trout ? fopen_pid("treefile", "w", filename) : (FILE *) NULL; |
---|
5411 | (void) showBestTrees(bt, tr, adef, treefile); |
---|
5412 | if (treefile) { |
---|
5413 | (void) fclose(treefile); |
---|
5414 | printf("Tree also written to %s\n\n", filename); |
---|
5415 | } |
---|
5416 | |
---|
5417 | (void) cmpBestTrees(bt, tr); |
---|
5418 | |
---|
5419 | # if DeleteCheckpointFile |
---|
5420 | unlink_pid(checkpointname); |
---|
5421 | # endif |
---|
5422 | |
---|
5423 | Free(enterorder); |
---|
5424 | |
---|
5425 | return TRUE; |
---|
5426 | } /* makeDenovoTree */ |
---|
5427 | |
---|
5428 | /*==========================================================================*/ |
---|
5429 | /* "main" routine */ |
---|
5430 | /*==========================================================================*/ |
---|
5431 | |
---|
5432 | #if Sequential |
---|
5433 | main () |
---|
5434 | #else |
---|
5435 | slave () |
---|
5436 | #endif |
---|
5437 | { /* DNA Maximum Likelihood */ |
---|
5438 | # if Master |
---|
5439 | int starttime, inputtime, endtime; |
---|
5440 | # endif |
---|
5441 | |
---|
5442 | # if Master || Slave |
---|
5443 | int my_id, nprocs, type, from, sz; |
---|
5444 | char *msg; |
---|
5445 | # endif |
---|
5446 | |
---|
5447 | analdef *adef; |
---|
5448 | rawdata *rdta; |
---|
5449 | cruncheddata *cdta; |
---|
5450 | tree *tr; /* current tree */ |
---|
5451 | bestlist *bt; /* topology of best found tree */ |
---|
5452 | |
---|
5453 | globTime = gettime(); |
---|
5454 | |
---|
5455 | # if Debug |
---|
5456 | { |
---|
5457 | char debugfilename[128]; |
---|
5458 | debug = fopen_pid("dnaml_debug", "w", debugfilename); |
---|
5459 | } |
---|
5460 | # endif |
---|
5461 | |
---|
5462 | # if Master |
---|
5463 | starttime = p4_clock(); |
---|
5464 | nprocs = p4_num_total_slaves(); |
---|
5465 | |
---|
5466 | if ((OUTFILE = freopen_pid("master.out", "w", stdout)) == NULL) { |
---|
5467 | fprintf(stderr, "Could not open output file\n"); |
---|
5468 | exit(1); |
---|
5469 | } |
---|
5470 | |
---|
5471 | /* Receive input file name from host */ |
---|
5472 | type = DNAML_FILE_NAME; |
---|
5473 | from = DNAML_HOST_ID; |
---|
5474 | msg = NULL; |
---|
5475 | p4_recv(& type, & from, & msg, & sz); |
---|
5476 | if ((INFILE = fopen(msg, "r")) == NULL) { |
---|
5477 | fprintf(stderr, "master could not open input file %s\n", msg); |
---|
5478 | exit(1); |
---|
5479 | } |
---|
5480 | p4_msg_free(msg); |
---|
5481 | |
---|
5482 | open_link(& comm_slave); |
---|
5483 | # endif |
---|
5484 | |
---|
5485 | # if DNAML_STEP |
---|
5486 | begin_step_time = starttime; |
---|
5487 | # endif |
---|
5488 | |
---|
5489 | # if Slave |
---|
5490 | my_id = p4_get_my_id(); |
---|
5491 | nprocs = p4_num_total_slaves(); |
---|
5492 | |
---|
5493 | /* Receive input file name from host */ |
---|
5494 | type = DNAML_FILE_NAME; |
---|
5495 | from = DNAML_HOST_ID; |
---|
5496 | msg = NULL; |
---|
5497 | p4_recv(& type, & from, & msg, & sz); |
---|
5498 | if ((INFILE = fopen(msg, "r")) == NULL) { |
---|
5499 | fprintf(stderr, "slave could not open input file %s\n",msg); |
---|
5500 | exit(1); |
---|
5501 | } |
---|
5502 | p4_msg_free(msg); |
---|
5503 | |
---|
5504 | # ifdef P4DEBUG |
---|
5505 | if ((OUTFILE = freopen_pid("slave.out", "w", stdout)) == NULL) { |
---|
5506 | fprintf(stderr, "Could not open output file\n"); |
---|
5507 | exit(1); |
---|
5508 | } |
---|
5509 | # else |
---|
5510 | if ((OUTFILE = freopen("/dev/null", "w", stdout)) == NULL) { |
---|
5511 | fprintf(stderr, "Could not open output file\n"); |
---|
5512 | exit(1); |
---|
5513 | } |
---|
5514 | # endif |
---|
5515 | |
---|
5516 | open_link(& comm_master); |
---|
5517 | # endif |
---|
5518 | |
---|
5519 | |
---|
5520 | /* Get data structure memory */ |
---|
5521 | |
---|
5522 | if (! (adef = (analdef *) Malloc(sizeof(analdef)))) { |
---|
5523 | printf("ERROR: Unable to get memory for analysis definition\n\n"); |
---|
5524 | return 1; |
---|
5525 | } |
---|
5526 | |
---|
5527 | if (! (rdta = (rawdata *) Malloc(sizeof(rawdata)))) { |
---|
5528 | printf("ERROR: Unable to get memory for raw DNA\n\n"); |
---|
5529 | return 1; |
---|
5530 | } |
---|
5531 | |
---|
5532 | if (! (cdta = (cruncheddata *) Malloc(sizeof(cruncheddata)))) { |
---|
5533 | printf("ERROR: Unable to get memory for crunched DNA\n\n"); |
---|
5534 | return 1; |
---|
5535 | } |
---|
5536 | |
---|
5537 | if ((tr = (tree *) Malloc(sizeof(tree))) && |
---|
5538 | (bt = (bestlist *) Malloc(sizeof(bestlist)))) ; |
---|
5539 | else { |
---|
5540 | printf("ERROR: Unable to get memory for trees\n\n"); |
---|
5541 | return 1; |
---|
5542 | } |
---|
5543 | bt->ninit = 0; |
---|
5544 | |
---|
5545 | if (! getinput(adef, rdta, cdta, tr)) return 1; |
---|
5546 | |
---|
5547 | # if Master |
---|
5548 | inputtime = p4_clock(); |
---|
5549 | printf("Input time %d milliseconds\n", inputtime - starttime); |
---|
5550 | REPORT_STEP_TIME; |
---|
5551 | # endif |
---|
5552 | |
---|
5553 | # if Slave |
---|
5554 | (void) fclose(INFILE); |
---|
5555 | # endif |
---|
5556 | |
---|
5557 | /* The material below would be a loop over jumbles and/or boots */ |
---|
5558 | |
---|
5559 | if (! makeweights(adef, rdta, cdta)) return 1; |
---|
5560 | if (! makevalues(rdta, cdta)) return 1; |
---|
5561 | if (adef->empf && ! empiricalfreqs(rdta, cdta)) return 1; |
---|
5562 | reportfreqs(adef, rdta); |
---|
5563 | if (! linkdata2tree(rdta, cdta, tr)) return 1; |
---|
5564 | |
---|
5565 | if (! linkxarray(3, 3, cdta->endsite, & freextip, & usedxtip)) return 1; |
---|
5566 | if (! setupnodex(tr)) return 1; |
---|
5567 | |
---|
5568 | |
---|
5569 | /* AxML modification start*/ |
---|
5570 | |
---|
5571 | if( ! setupEqualityVector(tr)) return 1; |
---|
5572 | |
---|
5573 | /* initialize equality vectors */ |
---|
5574 | |
---|
5575 | |
---|
5576 | /* AxML modification end*/ |
---|
5577 | |
---|
5578 | # if Slave |
---|
5579 | if (! slaveTreeEvaluate(tr, bt)) return 1; |
---|
5580 | # else |
---|
5581 | if (! initBestTree(bt, adef->nkeep, tr->mxtips, tr->cdta->endsite)) return 1; |
---|
5582 | if (! adef->usertree) { |
---|
5583 | if (! makeDenovoTree(tr, bt, adef)) return 1; |
---|
5584 | } |
---|
5585 | else { |
---|
5586 | if (! makeUserTree(tr, bt, adef)) return 1; |
---|
5587 | } |
---|
5588 | if (! freeBestTree(bt)) return 1; |
---|
5589 | # endif |
---|
5590 | |
---|
5591 | /* Endpoint for jumble and/or boot loop */ |
---|
5592 | |
---|
5593 | # if Master |
---|
5594 | tr->likelihood = 1.0; /* terminate slaves */ |
---|
5595 | (void) sendTree(& comm_slave, tr); |
---|
5596 | # endif |
---|
5597 | |
---|
5598 | freeTree(tr); |
---|
5599 | |
---|
5600 | # if Master |
---|
5601 | close_link(& comm_slave); |
---|
5602 | (void) fclose(INFILE); |
---|
5603 | |
---|
5604 | REPORT_STEP_TIME; |
---|
5605 | endtime = p4_clock(); |
---|
5606 | printf("Execution time %d milliseconds\n", endtime - inputtime); |
---|
5607 | (void) fclose(OUTFILE); |
---|
5608 | # endif |
---|
5609 | |
---|
5610 | # if Slave |
---|
5611 | close_link(& comm_master); |
---|
5612 | (void) fclose(OUTFILE); |
---|
5613 | # endif |
---|
5614 | |
---|
5615 | # if Debug |
---|
5616 | (void) fclose(debug); |
---|
5617 | # endif |
---|
5618 | |
---|
5619 | # if Master || Slave |
---|
5620 | p4_send(DNAML_DONE, DNAML_HOST_ID, NULL, 0); |
---|
5621 | # else |
---|
5622 | return 0; |
---|
5623 | # endif |
---|
5624 | } /* DNA Maximum Likelihood */ |
---|