1 | /* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees |
---|
2 | * Copyright August 2006 by Alexandros Stamatakis |
---|
3 | * |
---|
4 | * Partially derived from |
---|
5 | * fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen |
---|
6 | * |
---|
7 | * and |
---|
8 | * |
---|
9 | * Programs of the PHYLIP package by Joe Felsenstein. |
---|
10 | * |
---|
11 | * This program is free software; you may redistribute it and/or modify its |
---|
12 | * under the terms of the GNU General Public License as published by the Free |
---|
13 | * Software Foundation; either version 2 of the License, or (at your option) |
---|
14 | * any later version. |
---|
15 | * |
---|
16 | * This program is distributed in the hope that it will be useful, but |
---|
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
18 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
19 | * for more details. |
---|
20 | * |
---|
21 | * |
---|
22 | * For any other enquiries send an Email to Alexandros Stamatakis |
---|
23 | * Alexandros.Stamatakis@epfl.ch |
---|
24 | * |
---|
25 | * When publishing work that is based on the results from RAxML-VI-HPC please cite: |
---|
26 | * |
---|
27 | * Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". |
---|
28 | * Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446 |
---|
29 | */ |
---|
30 | |
---|
31 | #ifndef WIN32 |
---|
32 | #include <sys/times.h> |
---|
33 | #include <sys/types.h> |
---|
34 | #include <sys/time.h> |
---|
35 | #include <unistd.h> |
---|
36 | #endif |
---|
37 | |
---|
38 | #include <limits.h> |
---|
39 | #include <math.h> |
---|
40 | #include <time.h> |
---|
41 | #include <stdlib.h> |
---|
42 | #include <stdio.h> |
---|
43 | #include <ctype.h> |
---|
44 | #include <string.h> |
---|
45 | |
---|
46 | #include "axml.h" |
---|
47 | |
---|
48 | extern char **globalArgv; |
---|
49 | extern int globalArgc; |
---|
50 | extern char workdir[1024]; |
---|
51 | extern char run_id[128]; |
---|
52 | extern double masterTime; |
---|
53 | |
---|
54 | |
---|
55 | #ifdef _USE_PTHREADS |
---|
56 | extern volatile int NumberOfThreads; |
---|
57 | extern volatile int NumberOfJobs; |
---|
58 | #endif |
---|
59 | |
---|
60 | static double getBranch(tree *tr, double *b, double *bb) |
---|
61 | { |
---|
62 | double z = 0.0; |
---|
63 | |
---|
64 | if(!tr->multiBranch) |
---|
65 | { |
---|
66 | assert(tr->fracchange != -1.0); |
---|
67 | assert(b[0] == bb[0]); |
---|
68 | z = b[0]; |
---|
69 | if (z < zmin) |
---|
70 | z = zmin; |
---|
71 | if(z > zmax) |
---|
72 | z = zmax; |
---|
73 | z = -log(z) * tr->fracchange; |
---|
74 | return z; |
---|
75 | } |
---|
76 | else |
---|
77 | { |
---|
78 | int i; |
---|
79 | double x = 0; |
---|
80 | |
---|
81 | for(i = 0; i < tr->numBranches; i++) |
---|
82 | { |
---|
83 | assert(b[i] == bb[i]); |
---|
84 | assert(tr->partitionContributions[i] != -1.0); |
---|
85 | assert(tr->fracchanges[i] != -1.0); |
---|
86 | x = b[i]; |
---|
87 | if (x < zmin) |
---|
88 | x = zmin; |
---|
89 | if(x > zmax) |
---|
90 | x = zmax; |
---|
91 | x = -log(x) * tr->fracchanges[i]; |
---|
92 | |
---|
93 | z += x * tr->partitionContributions[i]; |
---|
94 | } |
---|
95 | |
---|
96 | return z; |
---|
97 | } |
---|
98 | |
---|
99 | } |
---|
100 | |
---|
101 | static double getBranchPerPartition(tree *tr, double *b, double *bb, int j) |
---|
102 | { |
---|
103 | double z = 0.0; |
---|
104 | |
---|
105 | if(!tr->multiBranch) |
---|
106 | { |
---|
107 | assert(tr->fracchange != -1.0); |
---|
108 | assert(b[0] == bb[0]); |
---|
109 | z = b[0]; |
---|
110 | if (z < zmin) |
---|
111 | z = zmin; |
---|
112 | if(z > zmax) |
---|
113 | z = zmax; |
---|
114 | z = -log(z) * tr->fracchange; |
---|
115 | return z; |
---|
116 | } |
---|
117 | else |
---|
118 | { |
---|
119 | int |
---|
120 | i = tr->readPartition[j]; |
---|
121 | |
---|
122 | assert(b[i] == bb[i]); |
---|
123 | assert(tr->fracchanges[i] != -1.0); |
---|
124 | z = b[i]; |
---|
125 | if (z < zmin) |
---|
126 | z = zmin; |
---|
127 | if(z > zmax) |
---|
128 | z = zmax; |
---|
129 | z = -log(z) * tr->fracchanges[i]; |
---|
130 | |
---|
131 | return z; |
---|
132 | } |
---|
133 | |
---|
134 | } |
---|
135 | |
---|
136 | |
---|
137 | static char *Tree2StringClassifyRec(char *treestr, tree *tr, nodeptr p, int *countBranches, |
---|
138 | int *inserts, boolean originalTree, boolean jointLabels, boolean likelihood) |
---|
139 | { |
---|
140 | branchInfo *bInf = p->bInf; |
---|
141 | int i, countQuery = 0; |
---|
142 | |
---|
143 | *countBranches = *countBranches + 1; |
---|
144 | |
---|
145 | |
---|
146 | |
---|
147 | if(!originalTree) |
---|
148 | { |
---|
149 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
150 | if(bInf->epa->countThem[i] > 0) |
---|
151 | countQuery++; |
---|
152 | |
---|
153 | if(countQuery > 0) |
---|
154 | { |
---|
155 | int |
---|
156 | localCounter = 0; |
---|
157 | |
---|
158 | *treestr++ = '('; |
---|
159 | |
---|
160 | if(countQuery > 1) |
---|
161 | *treestr++ = '('; |
---|
162 | |
---|
163 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
164 | { |
---|
165 | if(bInf->epa->countThem[i] > 0) |
---|
166 | { |
---|
167 | if(likelihood) |
---|
168 | { |
---|
169 | char |
---|
170 | branchLength[128]; |
---|
171 | |
---|
172 | sprintf(branchLength, "%f", bInf->epa->branches[i]); |
---|
173 | sprintf(treestr,"QUERY___%s:%s", tr->nameList[inserts[i]], branchLength); |
---|
174 | } |
---|
175 | else |
---|
176 | sprintf(treestr,"QUERY___%s", tr->nameList[inserts[i]]); |
---|
177 | |
---|
178 | while (*treestr) treestr++; |
---|
179 | if(localCounter < countQuery - 1) |
---|
180 | *treestr++ = ','; |
---|
181 | localCounter++; |
---|
182 | } |
---|
183 | } |
---|
184 | |
---|
185 | if(countQuery > 1) |
---|
186 | { |
---|
187 | sprintf(treestr,"):0.0,"); |
---|
188 | while (*treestr) treestr++; |
---|
189 | } |
---|
190 | else |
---|
191 | *treestr++ = ','; |
---|
192 | |
---|
193 | } |
---|
194 | } |
---|
195 | |
---|
196 | if(isTip(p->number, tr->rdta->numsp)) |
---|
197 | { |
---|
198 | char *nameptr = tr->nameList[p->number]; |
---|
199 | |
---|
200 | sprintf(treestr, "%s", nameptr); |
---|
201 | while (*treestr) treestr++; |
---|
202 | } |
---|
203 | else |
---|
204 | { |
---|
205 | *treestr++ = '('; |
---|
206 | treestr = Tree2StringClassifyRec(treestr, tr, p->next->back, |
---|
207 | countBranches, inserts, originalTree, jointLabels, likelihood); |
---|
208 | *treestr++ = ','; |
---|
209 | treestr = Tree2StringClassifyRec(treestr, tr, p->next->next->back, |
---|
210 | countBranches, inserts, originalTree, jointLabels, likelihood); |
---|
211 | *treestr++ = ')'; |
---|
212 | } |
---|
213 | |
---|
214 | if(countQuery > 0) |
---|
215 | { |
---|
216 | sprintf(treestr, ":%8.20f[%s]", 0.5 * p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); |
---|
217 | while (*treestr) treestr++; |
---|
218 | *treestr++ = ')'; |
---|
219 | } |
---|
220 | |
---|
221 | if(originalTree) |
---|
222 | { |
---|
223 | if(jointLabels) |
---|
224 | { |
---|
225 | if(tr->wasRooted) |
---|
226 | { |
---|
227 | if(p == tr->leftRootNode) |
---|
228 | { |
---|
229 | sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength * 0.5, p->bInf->epa->jointLabel); |
---|
230 | assert(tr->rootLabel == p->bInf->epa->jointLabel); |
---|
231 | } |
---|
232 | else |
---|
233 | { |
---|
234 | if(p == tr->rightRootNode) |
---|
235 | { |
---|
236 | sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength * 0.5, tr->numberOfBranches); |
---|
237 | assert(tr->rootLabel == p->bInf->epa->jointLabel); |
---|
238 | } |
---|
239 | else |
---|
240 | sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength, p->bInf->epa->jointLabel); |
---|
241 | } |
---|
242 | } |
---|
243 | else |
---|
244 | sprintf(treestr, ":%8.20f{%d", p->bInf->epa->originalBranchLength, p->bInf->epa->jointLabel); |
---|
245 | } |
---|
246 | else |
---|
247 | sprintf(treestr, ":%8.20f[%s", p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); |
---|
248 | } |
---|
249 | else |
---|
250 | { |
---|
251 | if(countQuery > 0) |
---|
252 | sprintf(treestr, ":%8.20f[%s", 0.5 * p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); |
---|
253 | else |
---|
254 | sprintf(treestr, ":%8.20f[%s", p->bInf->epa->originalBranchLength, p->bInf->epa->branchLabel); |
---|
255 | } |
---|
256 | |
---|
257 | while (*treestr) treestr++; |
---|
258 | |
---|
259 | assert(!(countQuery > 0 && originalTree == TRUE)); |
---|
260 | |
---|
261 | if(jointLabels) |
---|
262 | sprintf(treestr, "}"); |
---|
263 | else |
---|
264 | sprintf(treestr, "]"); |
---|
265 | |
---|
266 | while (*treestr) treestr++; |
---|
267 | |
---|
268 | return treestr; |
---|
269 | } |
---|
270 | |
---|
271 | |
---|
272 | |
---|
273 | |
---|
274 | char *Tree2StringClassify(char *treestr, tree *tr, int *inserts, |
---|
275 | boolean originalTree, boolean jointLabels, boolean likelihood) |
---|
276 | { |
---|
277 | nodeptr |
---|
278 | p; |
---|
279 | |
---|
280 | int |
---|
281 | countBranches = 0; |
---|
282 | |
---|
283 | |
---|
284 | if(jointLabels && tr->wasRooted) |
---|
285 | { |
---|
286 | assert(originalTree); |
---|
287 | |
---|
288 | *treestr++ = '('; |
---|
289 | treestr = Tree2StringClassifyRec(treestr, tr, tr->leftRootNode, &countBranches, |
---|
290 | inserts, originalTree, jointLabels, likelihood); |
---|
291 | *treestr++ = ','; |
---|
292 | treestr = Tree2StringClassifyRec(treestr, tr, tr->rightRootNode, &countBranches, |
---|
293 | inserts, originalTree, jointLabels, likelihood); |
---|
294 | *treestr++ = ')'; |
---|
295 | *treestr++ = ';'; |
---|
296 | |
---|
297 | assert(countBranches == 2 * tr->ntips - 2); |
---|
298 | |
---|
299 | *treestr++ = '\0'; |
---|
300 | while (*treestr) treestr++; |
---|
301 | return treestr; |
---|
302 | } |
---|
303 | else |
---|
304 | { |
---|
305 | if(jointLabels) |
---|
306 | p = tr->nodep[tr->mxtips + 1]; |
---|
307 | else |
---|
308 | p = tr->start->back; |
---|
309 | |
---|
310 | assert(!isTip(p->number, tr->mxtips)); |
---|
311 | |
---|
312 | *treestr++ = '('; |
---|
313 | treestr = Tree2StringClassifyRec(treestr, tr, p->back, &countBranches, |
---|
314 | inserts, originalTree, jointLabels, likelihood); |
---|
315 | *treestr++ = ','; |
---|
316 | treestr = Tree2StringClassifyRec(treestr, tr, p->next->back, &countBranches, |
---|
317 | inserts, originalTree, jointLabels, likelihood); |
---|
318 | *treestr++ = ','; |
---|
319 | treestr = Tree2StringClassifyRec(treestr, tr, p->next->next->back, &countBranches, |
---|
320 | inserts, originalTree, jointLabels, likelihood); |
---|
321 | *treestr++ = ')'; |
---|
322 | *treestr++ = ';'; |
---|
323 | |
---|
324 | assert(countBranches == 2 * tr->ntips - 3); |
---|
325 | |
---|
326 | *treestr++ = '\0'; |
---|
327 | while (*treestr) treestr++; |
---|
328 | return treestr; |
---|
329 | } |
---|
330 | } |
---|
331 | |
---|
332 | |
---|
333 | |
---|
334 | |
---|
335 | void markTips(nodeptr p, int *perm, int maxTips) |
---|
336 | { |
---|
337 | if(isTip(p->number, maxTips)) |
---|
338 | { |
---|
339 | perm[p->number] = 1; |
---|
340 | return; |
---|
341 | } |
---|
342 | else |
---|
343 | { |
---|
344 | nodeptr q = p->next; |
---|
345 | while(q != p) |
---|
346 | { |
---|
347 | markTips(q->back, perm, maxTips); |
---|
348 | q = q->next; |
---|
349 | } |
---|
350 | } |
---|
351 | } |
---|
352 | |
---|
353 | |
---|
354 | static boolean containsRoot(nodeptr p, tree *tr, int rootNumber) |
---|
355 | { |
---|
356 | |
---|
357 | if(isTip(p->number, tr->mxtips)) |
---|
358 | { |
---|
359 | if(p->number == rootNumber) |
---|
360 | return TRUE; |
---|
361 | else |
---|
362 | return FALSE; |
---|
363 | } |
---|
364 | else |
---|
365 | { |
---|
366 | if(p->number == rootNumber) |
---|
367 | return TRUE; |
---|
368 | else |
---|
369 | { |
---|
370 | if(containsRoot(p->next->back, tr, rootNumber)) |
---|
371 | return TRUE; |
---|
372 | else |
---|
373 | { |
---|
374 | if(containsRoot(p->next->next->back, tr, rootNumber)) |
---|
375 | return TRUE; |
---|
376 | else |
---|
377 | return FALSE; |
---|
378 | } |
---|
379 | } |
---|
380 | |
---|
381 | } |
---|
382 | } |
---|
383 | |
---|
384 | static nodeptr findRootDirection(nodeptr p, tree *tr, int rootNumber) |
---|
385 | { |
---|
386 | if(containsRoot(p, tr, rootNumber)) |
---|
387 | return p; |
---|
388 | |
---|
389 | if(containsRoot(p->back, tr, rootNumber)) |
---|
390 | return (p->back); |
---|
391 | |
---|
392 | /* one of the two subtrees must contain the root */ |
---|
393 | |
---|
394 | assert(0); |
---|
395 | } |
---|
396 | |
---|
397 | |
---|
398 | void setPartitionMask(tree *tr, int i, boolean *executeModel) |
---|
399 | { |
---|
400 | int |
---|
401 | model = 0; |
---|
402 | |
---|
403 | if(tr->perPartitionEPA) |
---|
404 | { |
---|
405 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
406 | executeModel[model] = FALSE; |
---|
407 | |
---|
408 | executeModel[tr->readPartition[i]] = TRUE; |
---|
409 | } |
---|
410 | else |
---|
411 | { |
---|
412 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
413 | executeModel[model] = TRUE; |
---|
414 | } |
---|
415 | } |
---|
416 | |
---|
417 | void resetPartitionMask(tree *tr, boolean *executeModel) |
---|
418 | { |
---|
419 | int |
---|
420 | model = 0; |
---|
421 | |
---|
422 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
423 | executeModel[model] = TRUE; |
---|
424 | } |
---|
425 | |
---|
426 | |
---|
427 | |
---|
428 | static boolean allSmoothedEPA(tree *tr) |
---|
429 | { |
---|
430 | int i; |
---|
431 | boolean result = TRUE; |
---|
432 | |
---|
433 | for(i = 0; i < tr->numBranches; i++) |
---|
434 | { |
---|
435 | if(tr->partitionSmoothed[i] == FALSE) |
---|
436 | result = FALSE; |
---|
437 | else |
---|
438 | tr->partitionConverged[i] = TRUE; |
---|
439 | } |
---|
440 | |
---|
441 | return result; |
---|
442 | } |
---|
443 | |
---|
444 | static boolean updateEPA(tree *tr, nodeptr p, int j) |
---|
445 | { |
---|
446 | nodeptr q; |
---|
447 | boolean smoothedPartitions[NUM_BRANCHES]; |
---|
448 | int i; |
---|
449 | double z[NUM_BRANCHES], z0[NUM_BRANCHES]; |
---|
450 | double _deltaz; |
---|
451 | |
---|
452 | q = p->back; |
---|
453 | |
---|
454 | for(i = 0; i < tr->numBranches; i++) |
---|
455 | z0[i] = q->z[i]; |
---|
456 | |
---|
457 | setPartitionMask(tr, j, tr->executeModel); |
---|
458 | makenewzGeneric(tr, p, q, z0, newzpercycle, z, FALSE); |
---|
459 | |
---|
460 | for(i = 0; i < tr->numBranches; i++) |
---|
461 | smoothedPartitions[i] = tr->partitionSmoothed[i]; |
---|
462 | |
---|
463 | for(i = 0; i < tr->numBranches; i++) |
---|
464 | { |
---|
465 | if(!tr->partitionConverged[i]) |
---|
466 | { |
---|
467 | _deltaz = deltaz; |
---|
468 | |
---|
469 | if(ABS(z[i] - z0[i]) > _deltaz) |
---|
470 | { |
---|
471 | smoothedPartitions[i] = FALSE; |
---|
472 | } |
---|
473 | |
---|
474 | p->z[i] = q->z[i] = z[i]; |
---|
475 | } |
---|
476 | } |
---|
477 | |
---|
478 | for(i = 0; i < tr->numBranches; i++) |
---|
479 | tr->partitionSmoothed[i] = smoothedPartitions[i]; |
---|
480 | |
---|
481 | return TRUE; |
---|
482 | } |
---|
483 | |
---|
484 | static boolean localSmoothEPA(tree *tr, nodeptr p, int maxtimes, int j) |
---|
485 | { |
---|
486 | nodeptr q; |
---|
487 | int i; |
---|
488 | |
---|
489 | if (isTip(p->number, tr->rdta->numsp)) return FALSE; |
---|
490 | |
---|
491 | for(i = 0; i < tr->numBranches; i++) |
---|
492 | tr->partitionConverged[i] = FALSE; |
---|
493 | |
---|
494 | while (--maxtimes >= 0) |
---|
495 | { |
---|
496 | for(i = 0; i < tr->numBranches; i++) |
---|
497 | tr->partitionSmoothed[i] = TRUE; |
---|
498 | |
---|
499 | q = p; |
---|
500 | do |
---|
501 | { |
---|
502 | if (! updateEPA(tr, q, j)) return FALSE; |
---|
503 | q = q->next; |
---|
504 | } |
---|
505 | while (q != p); |
---|
506 | |
---|
507 | if (allSmoothedEPA(tr)) |
---|
508 | break; |
---|
509 | } |
---|
510 | |
---|
511 | for(i = 0; i < tr->numBranches; i++) |
---|
512 | { |
---|
513 | tr->partitionSmoothed[i] = FALSE; |
---|
514 | tr->partitionConverged[i] = FALSE; |
---|
515 | } |
---|
516 | |
---|
517 | return TRUE; |
---|
518 | } |
---|
519 | |
---|
520 | |
---|
521 | static void testInsertThorough(tree *tr, nodeptr r, nodeptr q) |
---|
522 | { |
---|
523 | double |
---|
524 | originalBranchLength = getBranch(tr, q->z, q->back->z), |
---|
525 | result, |
---|
526 | qz[NUM_BRANCHES], |
---|
527 | z[NUM_BRANCHES]; |
---|
528 | |
---|
529 | nodeptr |
---|
530 | root = (nodeptr)NULL, |
---|
531 | x = q->back; |
---|
532 | |
---|
533 | int |
---|
534 | *inserts = tr->inserts, |
---|
535 | j; |
---|
536 | |
---|
537 | boolean |
---|
538 | atRoot = FALSE; |
---|
539 | |
---|
540 | if(!tr->wasRooted) |
---|
541 | root = findRootDirection(q, tr, tr->mxtips + 1); |
---|
542 | else |
---|
543 | { |
---|
544 | if((q == tr->leftRootNode && x == tr->rightRootNode) || |
---|
545 | (x == tr->leftRootNode && q == tr->rightRootNode)) |
---|
546 | atRoot = TRUE; |
---|
547 | else |
---|
548 | { |
---|
549 | nodeptr |
---|
550 | r1 = findRootDirection(q, tr, tr->leftRootNode->number), |
---|
551 | r2 = findRootDirection(q, tr, tr->rightRootNode->number); |
---|
552 | |
---|
553 | assert(r1 == r2); |
---|
554 | |
---|
555 | root = r1; |
---|
556 | } |
---|
557 | } |
---|
558 | |
---|
559 | for(j = 0; j < tr->numBranches; j++) |
---|
560 | { |
---|
561 | qz[j] = q->z[j]; |
---|
562 | z[j] = sqrt(qz[j]); |
---|
563 | |
---|
564 | if(z[j] < zmin) |
---|
565 | z[j] = zmin; |
---|
566 | |
---|
567 | if(z[j] > zmax) |
---|
568 | z[j] = zmax; |
---|
569 | } |
---|
570 | |
---|
571 | |
---|
572 | |
---|
573 | for(j = 0; j < tr->numberOfTipsForInsertion; j++) |
---|
574 | { |
---|
575 | if(q->bInf->epa->executeThem[j]) |
---|
576 | { |
---|
577 | nodeptr |
---|
578 | s = tr->nodep[inserts[j]]; |
---|
579 | |
---|
580 | double |
---|
581 | ratio, |
---|
582 | modifiedBranchLength, |
---|
583 | distalLength; |
---|
584 | |
---|
585 | hookup(r->next, q, z, tr->numBranches); |
---|
586 | hookup(r->next->next, x, z, tr->numBranches); |
---|
587 | hookupDefault(r, s, tr->numBranches); |
---|
588 | |
---|
589 | |
---|
590 | if(tr->perPartitionEPA) |
---|
591 | { |
---|
592 | setPartitionMask(tr, j, tr->executeModel); |
---|
593 | newviewGenericMasked(tr, r); |
---|
594 | |
---|
595 | setPartitionMask(tr, j, tr->executeModel); |
---|
596 | localSmoothEPA(tr, r, smoothings, j); |
---|
597 | |
---|
598 | setPartitionMask(tr, j, tr->executeModel); |
---|
599 | evaluateGeneric(tr, r); |
---|
600 | |
---|
601 | result = tr->perPartitionLH[tr->readPartition[j]]; |
---|
602 | |
---|
603 | resetPartitionMask(tr, tr->executeModel); |
---|
604 | } |
---|
605 | else |
---|
606 | { |
---|
607 | newviewGeneric(tr, r); |
---|
608 | localSmooth(tr, r, smoothings); |
---|
609 | result = evaluateGeneric(tr, r); |
---|
610 | } |
---|
611 | |
---|
612 | |
---|
613 | if(tr->perPartitionEPA) |
---|
614 | tr->bInf[q->bInf->epa->branchNumber].epa->branches[j] = getBranchPerPartition(tr, r->z, r->back->z, j); |
---|
615 | else |
---|
616 | tr->bInf[q->bInf->epa->branchNumber].epa->branches[j] = getBranch(tr, r->z, r->back->z); |
---|
617 | |
---|
618 | tr->bInf[q->bInf->epa->branchNumber].epa->likelihoods[j] = result; |
---|
619 | |
---|
620 | if(tr->perPartitionEPA) |
---|
621 | modifiedBranchLength = getBranchPerPartition(tr, q->z, q->back->z, j) + getBranchPerPartition(tr, x->z, x->back->z, j); |
---|
622 | else |
---|
623 | modifiedBranchLength = getBranch(tr, q->z, q->back->z) + getBranch(tr, x->z, x->back->z); |
---|
624 | |
---|
625 | ratio = originalBranchLength / modifiedBranchLength; |
---|
626 | |
---|
627 | if(tr->wasRooted && atRoot) |
---|
628 | { |
---|
629 | /* always take distal length from left root node and then fix this later */ |
---|
630 | |
---|
631 | if(x == tr->leftRootNode) |
---|
632 | { |
---|
633 | if(tr->perPartitionEPA) |
---|
634 | distalLength = getBranchPerPartition(tr, x->z, x->back->z, j); |
---|
635 | else |
---|
636 | distalLength = getBranch(tr, x->z, x->back->z); |
---|
637 | } |
---|
638 | else |
---|
639 | { |
---|
640 | assert(x == tr->rightRootNode); |
---|
641 | if(tr->perPartitionEPA) |
---|
642 | distalLength = getBranchPerPartition(tr, q->z, q->back->z, j); |
---|
643 | else |
---|
644 | distalLength = getBranch(tr, q->z, q->back->z); |
---|
645 | } |
---|
646 | } |
---|
647 | else |
---|
648 | { |
---|
649 | if(root == x) |
---|
650 | { |
---|
651 | if(tr->perPartitionEPA) |
---|
652 | distalLength = getBranchPerPartition(tr, x->z, x->back->z, j); |
---|
653 | else |
---|
654 | distalLength = getBranch(tr, x->z, x->back->z); |
---|
655 | } |
---|
656 | else |
---|
657 | { |
---|
658 | assert(root == q); |
---|
659 | if(tr->perPartitionEPA) |
---|
660 | distalLength = getBranchPerPartition(tr, q->z, q->back->z, j); |
---|
661 | else |
---|
662 | distalLength = getBranch(tr, q->z, q->back->z); |
---|
663 | } |
---|
664 | } |
---|
665 | |
---|
666 | distalLength *= ratio; |
---|
667 | |
---|
668 | assert(distalLength <= originalBranchLength); |
---|
669 | |
---|
670 | tr->bInf[q->bInf->epa->branchNumber].epa->distalBranches[j] = distalLength; |
---|
671 | } |
---|
672 | } |
---|
673 | |
---|
674 | |
---|
675 | |
---|
676 | hookup(q, x, qz, tr->numBranches); |
---|
677 | |
---|
678 | r->next->next->back = r->next->back = (nodeptr) NULL; |
---|
679 | } |
---|
680 | |
---|
681 | |
---|
682 | |
---|
683 | static void testInsertFast(tree *tr, nodeptr r, nodeptr q) |
---|
684 | { |
---|
685 | double |
---|
686 | result, |
---|
687 | qz[NUM_BRANCHES], |
---|
688 | z[NUM_BRANCHES]; |
---|
689 | |
---|
690 | nodeptr |
---|
691 | x = q->back; |
---|
692 | |
---|
693 | int |
---|
694 | i, |
---|
695 | *inserts = tr->inserts; |
---|
696 | |
---|
697 | |
---|
698 | assert(!tr->grouped); |
---|
699 | |
---|
700 | for(i = 0; i < tr->numBranches; i++) |
---|
701 | { |
---|
702 | qz[i] = q->z[i]; |
---|
703 | z[i] = sqrt(q->z[i]); |
---|
704 | |
---|
705 | if(z[i] < zmin) |
---|
706 | z[i] = zmin; |
---|
707 | if(z[i] > zmax) |
---|
708 | z[i] = zmax; |
---|
709 | } |
---|
710 | |
---|
711 | hookup(r->next, q, z, tr->numBranches); |
---|
712 | hookup(r->next->next, x, z, tr->numBranches); |
---|
713 | |
---|
714 | newviewGeneric(tr, r); |
---|
715 | |
---|
716 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
717 | { |
---|
718 | if(q->bInf->epa->executeThem[i]) |
---|
719 | { |
---|
720 | hookupDefault(r, tr->nodep[inserts[i]], tr->numBranches); |
---|
721 | |
---|
722 | if(!tr->perPartitionEPA) |
---|
723 | { |
---|
724 | result = evaluateGeneric (tr, r); |
---|
725 | } |
---|
726 | else |
---|
727 | { |
---|
728 | setPartitionMask(tr, i, tr->executeModel); |
---|
729 | evaluateGeneric(tr, r); |
---|
730 | |
---|
731 | result = tr->perPartitionLH[tr->readPartition[i]]; |
---|
732 | |
---|
733 | resetPartitionMask(tr, tr->executeModel); |
---|
734 | } |
---|
735 | |
---|
736 | |
---|
737 | r->back = (nodeptr) NULL; |
---|
738 | tr->nodep[inserts[i]]->back = (nodeptr) NULL; |
---|
739 | |
---|
740 | tr->bInf[q->bInf->epa->branchNumber].epa->likelihoods[i] = result; |
---|
741 | } |
---|
742 | } |
---|
743 | |
---|
744 | hookup(q, x, qz, tr->numBranches); |
---|
745 | |
---|
746 | r->next->next->back = r->next->back = (nodeptr) NULL; |
---|
747 | } |
---|
748 | |
---|
749 | |
---|
750 | |
---|
751 | |
---|
752 | static void addTraverseRob(tree *tr, nodeptr r, nodeptr q, |
---|
753 | boolean thorough) |
---|
754 | { |
---|
755 | if(thorough) |
---|
756 | testInsertThorough(tr, r, q); |
---|
757 | else |
---|
758 | testInsertFast(tr, r, q); |
---|
759 | |
---|
760 | if(!isTip(q->number, tr->rdta->numsp)) |
---|
761 | { |
---|
762 | nodeptr a = q->next; |
---|
763 | |
---|
764 | while(a != q) |
---|
765 | { |
---|
766 | addTraverseRob(tr, r, a->back, thorough); |
---|
767 | a = a->next; |
---|
768 | } |
---|
769 | } |
---|
770 | } |
---|
771 | |
---|
772 | #ifdef _USE_PTHREADS |
---|
773 | |
---|
774 | size_t getContiguousVectorLength(tree *tr) |
---|
775 | { |
---|
776 | size_t length = 0; |
---|
777 | int model; |
---|
778 | |
---|
779 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
780 | { |
---|
781 | size_t |
---|
782 | realWidth = tr->partitionData[model].upper - tr->partitionData[model].lower; |
---|
783 | |
---|
784 | int |
---|
785 | states = tr->partitionData[model].states; |
---|
786 | |
---|
787 | length += (realWidth * (size_t)states * (size_t)(tr->discreteRateCategories)); |
---|
788 | } |
---|
789 | |
---|
790 | return length; |
---|
791 | } |
---|
792 | |
---|
793 | static size_t getContiguousScalingLength(tree *tr) |
---|
794 | { |
---|
795 | size_t |
---|
796 | length = 0; |
---|
797 | |
---|
798 | int |
---|
799 | model; |
---|
800 | |
---|
801 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
802 | length += tr->partitionData[model].upper - tr->partitionData[model].lower; |
---|
803 | |
---|
804 | return length; |
---|
805 | } |
---|
806 | |
---|
807 | static void allocBranchX(tree *tr) |
---|
808 | { |
---|
809 | int |
---|
810 | i = 0; |
---|
811 | |
---|
812 | for(i = 0; i < tr->numberOfBranches; i++) |
---|
813 | { |
---|
814 | branchInfo |
---|
815 | *b = &(tr->bInf[i]); |
---|
816 | |
---|
817 | b->epa->left = (double*)rax_malloc(sizeof(double) * tr->contiguousVectorLength); |
---|
818 | b->epa->leftScaling = (int*)rax_malloc(sizeof(int) * tr->contiguousScalingLength); |
---|
819 | |
---|
820 | b->epa->right = (double*)rax_malloc(sizeof(double) * tr->contiguousVectorLength); |
---|
821 | b->epa->rightScaling = (int*)rax_malloc(sizeof(int) * tr->contiguousScalingLength); |
---|
822 | } |
---|
823 | } |
---|
824 | |
---|
825 | static void updateClassify(tree *tr, double *z, boolean *partitionSmoothed, boolean *partitionConverged, double *x1, double *x2, unsigned char *tipX1, unsigned char *tipX2, int tipCase, int insertions) |
---|
826 | { |
---|
827 | int i; |
---|
828 | |
---|
829 | boolean smoothedPartitions[NUM_BRANCHES]; |
---|
830 | |
---|
831 | double |
---|
832 | newz[NUM_BRANCHES], |
---|
833 | z0[NUM_BRANCHES]; |
---|
834 | |
---|
835 | for(i = 0; i < tr->numBranches; i++) |
---|
836 | z0[i] = z[i]; |
---|
837 | |
---|
838 | makenewzClassify(tr, newzpercycle, newz, z0, x1, x2, tipX1, tipX2, tipCase, partitionConverged, insertions); |
---|
839 | |
---|
840 | for(i = 0; i < tr->numBranches; i++) |
---|
841 | smoothedPartitions[i] = partitionSmoothed[i]; |
---|
842 | |
---|
843 | for(i = 0; i < tr->numBranches; i++) |
---|
844 | { |
---|
845 | if(!partitionConverged[i]) |
---|
846 | { |
---|
847 | if(ABS(newz[i] - z0[i]) > deltaz) |
---|
848 | smoothedPartitions[i] = FALSE; |
---|
849 | |
---|
850 | z[i] = newz[i]; |
---|
851 | } |
---|
852 | } |
---|
853 | |
---|
854 | for(i = 0; i < tr->numBranches; i++) |
---|
855 | partitionSmoothed[i] = smoothedPartitions[i]; |
---|
856 | } |
---|
857 | |
---|
858 | |
---|
859 | static double localSmoothClassify (tree *tr, int maxtimes, int leftNodeNumber, int rightNodeNumber, int insertionNodeNumber, double *e1, double *e2, double *e3, |
---|
860 | branchInfo *b, int insertions) |
---|
861 | { |
---|
862 | int tipCase; |
---|
863 | |
---|
864 | boolean |
---|
865 | allSmoothed, |
---|
866 | partitionSmoothed[NUM_BRANCHES], |
---|
867 | partitionConverged[NUM_BRANCHES]; |
---|
868 | |
---|
869 | double |
---|
870 | result, |
---|
871 | *x1 = (double*)NULL, |
---|
872 | *x2 = (double*)NULL, |
---|
873 | *x3 = (double*)NULL; |
---|
874 | |
---|
875 | int |
---|
876 | i, |
---|
877 | *ex1 = (int*)NULL, |
---|
878 | *ex2 = (int*)NULL, |
---|
879 | *ex3 = (int*)NULL; |
---|
880 | |
---|
881 | unsigned char |
---|
882 | *tipX1 = (unsigned char *)NULL, |
---|
883 | *tipX2 = (unsigned char *)NULL; |
---|
884 | |
---|
885 | x3 = tr->temporaryVector; |
---|
886 | ex3 = tr->temporaryScaling; |
---|
887 | |
---|
888 | |
---|
889 | for(i = 0; i < tr->numBranches; i++) |
---|
890 | partitionConverged[i] = FALSE; |
---|
891 | |
---|
892 | while (--maxtimes >= 0) |
---|
893 | { |
---|
894 | for(i = 0; i < tr->numBranches; i++) |
---|
895 | partitionSmoothed[i] = TRUE; |
---|
896 | |
---|
897 | /* e3 */ |
---|
898 | if(isTip(leftNodeNumber, tr->mxtips) && isTip(rightNodeNumber, tr->mxtips)) |
---|
899 | { |
---|
900 | tipCase = TIP_TIP; |
---|
901 | |
---|
902 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
903 | tipX2 = tr->contiguousTips[rightNodeNumber]; |
---|
904 | |
---|
905 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
906 | } |
---|
907 | else |
---|
908 | { |
---|
909 | if (isTip(leftNodeNumber, tr->mxtips)) |
---|
910 | { |
---|
911 | tipCase = TIP_INNER; |
---|
912 | |
---|
913 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
914 | |
---|
915 | x2 = b->epa->right; |
---|
916 | ex2 = b->epa->rightScaling; |
---|
917 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
918 | } |
---|
919 | else |
---|
920 | { |
---|
921 | if(isTip(rightNodeNumber, tr->mxtips)) |
---|
922 | { |
---|
923 | tipCase = TIP_INNER; |
---|
924 | |
---|
925 | tipX1 = tr->contiguousTips[rightNodeNumber]; |
---|
926 | |
---|
927 | x2 = b->epa->left; |
---|
928 | ex2 = b->epa->leftScaling; |
---|
929 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e2, e1, insertions); |
---|
930 | } |
---|
931 | else |
---|
932 | { |
---|
933 | tipCase = INNER_INNER; |
---|
934 | |
---|
935 | x1 = b->epa->left; |
---|
936 | ex1 = b->epa->leftScaling; |
---|
937 | |
---|
938 | x2 = b->epa->right; |
---|
939 | ex2 = b->epa->rightScaling; |
---|
940 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
941 | } |
---|
942 | } |
---|
943 | } |
---|
944 | |
---|
945 | |
---|
946 | |
---|
947 | tipCase = TIP_INNER; |
---|
948 | |
---|
949 | x2 = tr->temporaryVector; |
---|
950 | tipX1 = tr->contiguousTips[insertionNodeNumber]; |
---|
951 | |
---|
952 | updateClassify(tr, e3, partitionSmoothed, partitionConverged, x1, x2, tipX1, tipX2, tipCase, insertions); |
---|
953 | |
---|
954 | /* e1 **********************************************************/ |
---|
955 | |
---|
956 | tipX1 = tr->contiguousTips[insertionNodeNumber]; |
---|
957 | |
---|
958 | if(isTip(rightNodeNumber, tr->mxtips)) |
---|
959 | { |
---|
960 | tipCase = TIP_TIP; |
---|
961 | |
---|
962 | tipX2 = tr->contiguousTips[rightNodeNumber]; |
---|
963 | } |
---|
964 | else |
---|
965 | { |
---|
966 | tipCase = TIP_INNER; |
---|
967 | |
---|
968 | x2 = b->epa->right; |
---|
969 | ex2 = b->epa->rightScaling; |
---|
970 | } |
---|
971 | |
---|
972 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e3, e2, insertions); |
---|
973 | |
---|
974 | if(isTip(leftNodeNumber, tr->mxtips)) |
---|
975 | { |
---|
976 | tipCase = TIP_INNER; |
---|
977 | |
---|
978 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
979 | } |
---|
980 | else |
---|
981 | { |
---|
982 | tipCase = INNER_INNER; |
---|
983 | |
---|
984 | x1 = b->epa->left; |
---|
985 | } |
---|
986 | |
---|
987 | updateClassify(tr, e1, partitionSmoothed, partitionConverged, x1, x3, tipX1, (unsigned char*)NULL, tipCase, insertions); |
---|
988 | |
---|
989 | /* e2 *********************************************************/ |
---|
990 | |
---|
991 | tipX1 = tr->contiguousTips[insertionNodeNumber]; |
---|
992 | |
---|
993 | if(isTip(leftNodeNumber, tr->mxtips)) |
---|
994 | { |
---|
995 | tipCase = TIP_TIP; |
---|
996 | |
---|
997 | tipX2 = tr->contiguousTips[leftNodeNumber]; |
---|
998 | } |
---|
999 | else |
---|
1000 | { |
---|
1001 | tipCase = TIP_INNER; |
---|
1002 | |
---|
1003 | x2 = b->epa->left; |
---|
1004 | ex2 = b->epa->leftScaling; |
---|
1005 | } |
---|
1006 | |
---|
1007 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e3, e1, insertions); |
---|
1008 | |
---|
1009 | if(isTip(rightNodeNumber, tr->mxtips)) |
---|
1010 | { |
---|
1011 | tipCase = TIP_INNER; |
---|
1012 | |
---|
1013 | tipX1 = tr->contiguousTips[rightNodeNumber]; |
---|
1014 | } |
---|
1015 | else |
---|
1016 | { |
---|
1017 | tipCase = INNER_INNER; |
---|
1018 | |
---|
1019 | x1 = b->epa->right; |
---|
1020 | } |
---|
1021 | |
---|
1022 | updateClassify(tr, e2, partitionSmoothed, partitionConverged, x1, x3, tipX1, (unsigned char*)NULL, tipCase, insertions); |
---|
1023 | |
---|
1024 | |
---|
1025 | allSmoothed = TRUE; |
---|
1026 | for(i = 0; i < tr->numBranches; i++) |
---|
1027 | { |
---|
1028 | if(partitionSmoothed[i] == FALSE) |
---|
1029 | allSmoothed = FALSE; |
---|
1030 | else |
---|
1031 | partitionConverged[i] = TRUE; |
---|
1032 | } |
---|
1033 | |
---|
1034 | if(allSmoothed) |
---|
1035 | break; |
---|
1036 | } |
---|
1037 | |
---|
1038 | |
---|
1039 | if(isTip(leftNodeNumber, tr->mxtips) && isTip(rightNodeNumber, tr->mxtips)) |
---|
1040 | { |
---|
1041 | tipCase = TIP_TIP; |
---|
1042 | |
---|
1043 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
1044 | tipX2 = tr->contiguousTips[rightNodeNumber]; |
---|
1045 | |
---|
1046 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
1047 | } |
---|
1048 | else |
---|
1049 | { |
---|
1050 | if (isTip(leftNodeNumber, tr->mxtips)) |
---|
1051 | { |
---|
1052 | tipCase = TIP_INNER; |
---|
1053 | |
---|
1054 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
1055 | |
---|
1056 | x2 = b->epa->right; |
---|
1057 | ex2 = b->epa->rightScaling; |
---|
1058 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
1059 | } |
---|
1060 | else |
---|
1061 | { |
---|
1062 | if(isTip(rightNodeNumber, tr->mxtips)) |
---|
1063 | { |
---|
1064 | tipCase = TIP_INNER; |
---|
1065 | |
---|
1066 | tipX1 = tr->contiguousTips[rightNodeNumber]; |
---|
1067 | |
---|
1068 | x2 = b->epa->left; |
---|
1069 | ex2 = b->epa->leftScaling; |
---|
1070 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e2, e1, insertions); |
---|
1071 | } |
---|
1072 | else |
---|
1073 | { |
---|
1074 | tipCase = INNER_INNER; |
---|
1075 | |
---|
1076 | x1 = b->epa->left; |
---|
1077 | ex1 = b->epa->leftScaling; |
---|
1078 | |
---|
1079 | x2 = b->epa->right; |
---|
1080 | ex2 = b->epa->rightScaling; |
---|
1081 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
1082 | } |
---|
1083 | } |
---|
1084 | } |
---|
1085 | |
---|
1086 | |
---|
1087 | tipCase = TIP_INNER; |
---|
1088 | |
---|
1089 | x2 = tr->temporaryVector; |
---|
1090 | tipX1 = tr->contiguousTips[insertionNodeNumber]; |
---|
1091 | |
---|
1092 | result = evalCL(tr, x2, ex3, tipX1, e3, insertions); |
---|
1093 | |
---|
1094 | return result; |
---|
1095 | } |
---|
1096 | |
---|
1097 | |
---|
1098 | |
---|
1099 | |
---|
1100 | |
---|
1101 | void testInsertThoroughIterative(tree *tr, int branchNumber) |
---|
1102 | { |
---|
1103 | double |
---|
1104 | result, |
---|
1105 | z, |
---|
1106 | e1[NUM_BRANCHES], |
---|
1107 | e2[NUM_BRANCHES], |
---|
1108 | e3[NUM_BRANCHES], |
---|
1109 | *x3 = tr->temporaryVector; |
---|
1110 | |
---|
1111 | branchInfo |
---|
1112 | *b = &(tr->bInf[branchNumber]); |
---|
1113 | |
---|
1114 | nodeptr |
---|
1115 | root = (nodeptr)NULL, |
---|
1116 | x = b->oP, |
---|
1117 | q = b->oQ; |
---|
1118 | |
---|
1119 | boolean |
---|
1120 | atRoot = FALSE; |
---|
1121 | |
---|
1122 | int |
---|
1123 | tipCase, |
---|
1124 | model, |
---|
1125 | insertions, |
---|
1126 | leftNodeNumber = b->epa->leftNodeNumber, |
---|
1127 | rightNodeNumber = b->epa->rightNodeNumber, |
---|
1128 | *ex3 = tr->temporaryScaling; |
---|
1129 | |
---|
1130 | assert(x->number == leftNodeNumber); |
---|
1131 | assert(q->number == rightNodeNumber); |
---|
1132 | |
---|
1133 | if(!tr->wasRooted) |
---|
1134 | root = findRootDirection(q, tr, tr->mxtips + 1); |
---|
1135 | else |
---|
1136 | { |
---|
1137 | if((q == tr->leftRootNode && x == tr->rightRootNode) || |
---|
1138 | (x == tr->leftRootNode && q == tr->rightRootNode)) |
---|
1139 | atRoot = TRUE; |
---|
1140 | else |
---|
1141 | { |
---|
1142 | nodeptr |
---|
1143 | r1 = findRootDirection(q, tr, tr->leftRootNode->number), |
---|
1144 | r2 = findRootDirection(q, tr, tr->rightRootNode->number); |
---|
1145 | |
---|
1146 | assert(r1 == r2); |
---|
1147 | |
---|
1148 | root = r1; |
---|
1149 | } |
---|
1150 | } |
---|
1151 | |
---|
1152 | for(insertions = 0; insertions < tr->numberOfTipsForInsertion; insertions++) |
---|
1153 | { |
---|
1154 | if(b->epa->executeThem[insertions]) |
---|
1155 | { |
---|
1156 | double |
---|
1157 | *x1 = (double*)NULL, |
---|
1158 | *x2 = (double*)NULL; |
---|
1159 | |
---|
1160 | int |
---|
1161 | *ex1 = (int*)NULL, |
---|
1162 | *ex2 = (int*)NULL; |
---|
1163 | |
---|
1164 | unsigned char |
---|
1165 | *tipX1 = (unsigned char *)NULL, |
---|
1166 | *tipX2 = (unsigned char *)NULL; |
---|
1167 | |
---|
1168 | double |
---|
1169 | ratio, |
---|
1170 | modifiedBranchLength, |
---|
1171 | distalLength; |
---|
1172 | |
---|
1173 | for(model = 0; model < tr->numBranches; model++) |
---|
1174 | { |
---|
1175 | z = sqrt(b->epa->branchLengths[model]); |
---|
1176 | if(z < zmin) |
---|
1177 | z = zmin; |
---|
1178 | if(z > zmax) |
---|
1179 | z = zmax; |
---|
1180 | |
---|
1181 | e1[model] = z; |
---|
1182 | e2[model] = z; |
---|
1183 | e3[model] = defaultz; |
---|
1184 | } |
---|
1185 | |
---|
1186 | if(isTip(leftNodeNumber, tr->mxtips) && isTip(rightNodeNumber, tr->mxtips)) |
---|
1187 | { |
---|
1188 | tipCase = TIP_TIP; |
---|
1189 | |
---|
1190 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
1191 | tipX2 = tr->contiguousTips[rightNodeNumber]; |
---|
1192 | |
---|
1193 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
1194 | } |
---|
1195 | else |
---|
1196 | { |
---|
1197 | if (isTip(leftNodeNumber, tr->mxtips)) |
---|
1198 | { |
---|
1199 | tipCase = TIP_INNER; |
---|
1200 | |
---|
1201 | tipX1 = tr->contiguousTips[leftNodeNumber]; |
---|
1202 | |
---|
1203 | x2 = b->epa->right; |
---|
1204 | ex2 = b->epa->rightScaling; |
---|
1205 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
1206 | } |
---|
1207 | else |
---|
1208 | { |
---|
1209 | if(isTip(rightNodeNumber, tr->mxtips)) |
---|
1210 | { |
---|
1211 | tipCase = TIP_INNER; |
---|
1212 | |
---|
1213 | tipX1 = tr->contiguousTips[rightNodeNumber]; |
---|
1214 | |
---|
1215 | x2 = b->epa->left; |
---|
1216 | ex2 = b->epa->leftScaling; |
---|
1217 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e2, e1, insertions); |
---|
1218 | } |
---|
1219 | else |
---|
1220 | { |
---|
1221 | tipCase = INNER_INNER; |
---|
1222 | |
---|
1223 | x1 = b->epa->left; |
---|
1224 | ex1 = b->epa->leftScaling; |
---|
1225 | |
---|
1226 | x2 = b->epa->right; |
---|
1227 | ex2 = b->epa->rightScaling; |
---|
1228 | newviewMultiGrain(tr, x1, x2, x3, ex1, ex2, ex3, tipX1, tipX2, tipCase, e1, e2, insertions); |
---|
1229 | } |
---|
1230 | } |
---|
1231 | } |
---|
1232 | |
---|
1233 | result = localSmoothClassify(tr, smoothings, leftNodeNumber, rightNodeNumber, tr->inserts[insertions], e1, e2, e3, b, insertions); |
---|
1234 | |
---|
1235 | b->epa->likelihoods[insertions] = result; |
---|
1236 | |
---|
1237 | if(tr->perPartitionEPA) |
---|
1238 | b->epa->branches[insertions] = getBranchPerPartition(tr, e3, e3, insertions); |
---|
1239 | else |
---|
1240 | b->epa->branches[insertions] = getBranch(tr, e3, e3); |
---|
1241 | |
---|
1242 | if(tr->perPartitionEPA) |
---|
1243 | modifiedBranchLength = getBranchPerPartition(tr, e1, e1, insertions) + getBranchPerPartition(tr, e2, e2, insertions); |
---|
1244 | else |
---|
1245 | modifiedBranchLength = getBranch(tr, e1, e1) + getBranch(tr, e2, e2); |
---|
1246 | |
---|
1247 | ratio = b->epa->originalBranchLength / modifiedBranchLength; |
---|
1248 | |
---|
1249 | if(tr->wasRooted && atRoot) |
---|
1250 | { |
---|
1251 | /* always take distal length from left root node and then fix this later */ |
---|
1252 | |
---|
1253 | if(x == tr->leftRootNode) |
---|
1254 | { |
---|
1255 | if(tr->perPartitionEPA) |
---|
1256 | distalLength = getBranchPerPartition(tr, e1, e1, insertions); |
---|
1257 | else |
---|
1258 | distalLength = getBranch(tr, e1, e1); |
---|
1259 | } |
---|
1260 | else |
---|
1261 | { |
---|
1262 | assert(x == tr->rightRootNode); |
---|
1263 | if(tr->perPartitionEPA) |
---|
1264 | distalLength = getBranchPerPartition(tr, e2, e2, insertions); |
---|
1265 | else |
---|
1266 | distalLength = getBranch(tr, e2, e2); |
---|
1267 | } |
---|
1268 | } |
---|
1269 | else |
---|
1270 | { |
---|
1271 | if(root == x) |
---|
1272 | { |
---|
1273 | if(tr->perPartitionEPA) |
---|
1274 | distalLength = getBranchPerPartition(tr, e1, e1, insertions); |
---|
1275 | else |
---|
1276 | distalLength = getBranch(tr, e1, e1); |
---|
1277 | } |
---|
1278 | else |
---|
1279 | { |
---|
1280 | assert(root == q); |
---|
1281 | if(tr->perPartitionEPA) |
---|
1282 | distalLength = getBranchPerPartition(tr, e2, e2, insertions); |
---|
1283 | else |
---|
1284 | distalLength = getBranch(tr, e2, e2); |
---|
1285 | } |
---|
1286 | } |
---|
1287 | |
---|
1288 | distalLength *= ratio; |
---|
1289 | |
---|
1290 | assert(distalLength <= b->epa->originalBranchLength); |
---|
1291 | |
---|
1292 | b->epa->distalBranches[insertions] = distalLength; |
---|
1293 | } |
---|
1294 | } |
---|
1295 | } |
---|
1296 | |
---|
1297 | |
---|
1298 | |
---|
1299 | |
---|
1300 | |
---|
1301 | |
---|
1302 | |
---|
1303 | void addTraverseRobIterative(tree *tr, int branchNumber) |
---|
1304 | { |
---|
1305 | int |
---|
1306 | inserts; |
---|
1307 | |
---|
1308 | branchInfo |
---|
1309 | *b = &(tr->bInf[branchNumber]); |
---|
1310 | |
---|
1311 | double |
---|
1312 | result, |
---|
1313 | z[NUM_BRANCHES], |
---|
1314 | defaultArray[NUM_BRANCHES]; |
---|
1315 | |
---|
1316 | |
---|
1317 | assert(!tr->useFastScaling); |
---|
1318 | |
---|
1319 | for(inserts = 0; inserts < tr->numBranches; inserts++) |
---|
1320 | { |
---|
1321 | z[inserts] = sqrt(b->epa->branchLengths[inserts]); |
---|
1322 | defaultArray[inserts] = defaultz; |
---|
1323 | |
---|
1324 | if(z[inserts] < zmin) |
---|
1325 | z[inserts] = zmin; |
---|
1326 | if(z[inserts] > zmax) |
---|
1327 | z[inserts] = zmax; |
---|
1328 | } |
---|
1329 | |
---|
1330 | newviewClassify(tr, b, z, inserts); |
---|
1331 | |
---|
1332 | for(inserts = 0; inserts < tr->numberOfTipsForInsertion; inserts++) |
---|
1333 | { |
---|
1334 | result = evalCL(tr, tr->temporaryVector, tr->temporaryScaling, tr->contiguousTips[tr->inserts[inserts]], defaultArray, inserts); |
---|
1335 | |
---|
1336 | b->epa->likelihoods[inserts] = result; |
---|
1337 | } |
---|
1338 | } |
---|
1339 | |
---|
1340 | |
---|
1341 | |
---|
1342 | |
---|
1343 | |
---|
1344 | #endif |
---|
1345 | |
---|
1346 | |
---|
1347 | |
---|
1348 | |
---|
1349 | |
---|
1350 | |
---|
1351 | static void setupBranchMetaInfo(tree *tr, nodeptr p, int nTips, branchInfo *bInf) |
---|
1352 | { |
---|
1353 | int |
---|
1354 | i, |
---|
1355 | countBranches = tr->branchCounter; |
---|
1356 | |
---|
1357 | if(isTip(p->number, tr->mxtips)) |
---|
1358 | { |
---|
1359 | p->bInf = &bInf[countBranches]; |
---|
1360 | p->back->bInf = &bInf[countBranches]; |
---|
1361 | |
---|
1362 | bInf[countBranches].oP = p; |
---|
1363 | bInf[countBranches].oQ = p->back; |
---|
1364 | |
---|
1365 | bInf[countBranches].epa->leftNodeNumber = p->number; |
---|
1366 | bInf[countBranches].epa->rightNodeNumber = p->back->number; |
---|
1367 | |
---|
1368 | bInf[countBranches].epa->originalBranchLength = getBranch(tr, p->z, p->back->z); |
---|
1369 | bInf[countBranches].epa->branchNumber = countBranches; |
---|
1370 | |
---|
1371 | for(i = 0; i < tr->numBranches; i++) |
---|
1372 | bInf[countBranches].epa->branchLengths[i] = p->z[i]; |
---|
1373 | |
---|
1374 | #ifdef _USE_PTHREADS |
---|
1375 | if(!p->back->x) |
---|
1376 | newviewGeneric(tr, p->back); |
---|
1377 | masterBarrier(THREAD_GATHER_LIKELIHOOD, tr); |
---|
1378 | #endif |
---|
1379 | |
---|
1380 | tr->branchCounter = tr->branchCounter + 1; |
---|
1381 | return; |
---|
1382 | } |
---|
1383 | else |
---|
1384 | { |
---|
1385 | nodeptr q; |
---|
1386 | assert(p == p->next->next->next); |
---|
1387 | |
---|
1388 | p->bInf = &bInf[countBranches]; |
---|
1389 | p->back->bInf = &bInf[countBranches]; |
---|
1390 | |
---|
1391 | bInf[countBranches].oP = p; |
---|
1392 | bInf[countBranches].oQ = p->back; |
---|
1393 | |
---|
1394 | bInf[countBranches].epa->leftNodeNumber = p->number; |
---|
1395 | bInf[countBranches].epa->rightNodeNumber = p->back->number; |
---|
1396 | |
---|
1397 | bInf[countBranches].epa->originalBranchLength = getBranch(tr, p->z, p->back->z); |
---|
1398 | bInf[countBranches].epa->branchNumber = countBranches; |
---|
1399 | |
---|
1400 | for(i = 0; i < tr->numBranches; i++) |
---|
1401 | bInf[countBranches].epa->branchLengths[i] = p->z[i]; |
---|
1402 | |
---|
1403 | |
---|
1404 | #ifdef _USE_PTHREADS |
---|
1405 | if(!p->x) |
---|
1406 | newviewGeneric(tr, p); |
---|
1407 | |
---|
1408 | if(!isTip(p->back->number, tr->mxtips)) |
---|
1409 | { |
---|
1410 | if(!p->back->x) |
---|
1411 | newviewGeneric(tr, p->back); |
---|
1412 | } |
---|
1413 | |
---|
1414 | masterBarrier(THREAD_GATHER_LIKELIHOOD, tr); |
---|
1415 | #endif |
---|
1416 | |
---|
1417 | tr->branchCounter = tr->branchCounter + 1; |
---|
1418 | |
---|
1419 | q = p->next; |
---|
1420 | |
---|
1421 | while(q != p) |
---|
1422 | { |
---|
1423 | setupBranchMetaInfo(tr, q->back, nTips, bInf); |
---|
1424 | q = q->next; |
---|
1425 | } |
---|
1426 | |
---|
1427 | return; |
---|
1428 | } |
---|
1429 | } |
---|
1430 | |
---|
1431 | |
---|
1432 | |
---|
1433 | static void setupJointFormat(tree *tr, nodeptr p, int ntips, branchInfo *bInf, int *count) |
---|
1434 | { |
---|
1435 | if(isTip(p->number, tr->mxtips)) |
---|
1436 | { |
---|
1437 | p->bInf->epa->jointLabel = *count; |
---|
1438 | *count = *count + 1; |
---|
1439 | |
---|
1440 | return; |
---|
1441 | } |
---|
1442 | else |
---|
1443 | { |
---|
1444 | setupJointFormat(tr, p->next->back, ntips, bInf, count); |
---|
1445 | setupJointFormat(tr, p->next->next->back, ntips, bInf, count); |
---|
1446 | |
---|
1447 | p->bInf->epa->jointLabel = *count; |
---|
1448 | *count = *count + 1; |
---|
1449 | |
---|
1450 | return; |
---|
1451 | } |
---|
1452 | } |
---|
1453 | |
---|
1454 | |
---|
1455 | |
---|
1456 | |
---|
1457 | |
---|
1458 | |
---|
1459 | static void setupBranchInfo(tree *tr, nodeptr q) |
---|
1460 | { |
---|
1461 | nodeptr |
---|
1462 | originalNode = tr->nodep[tr->mxtips + 1]; |
---|
1463 | |
---|
1464 | int |
---|
1465 | count = 0; |
---|
1466 | |
---|
1467 | tr->branchCounter = 0; |
---|
1468 | |
---|
1469 | setupBranchMetaInfo(tr, q, tr->ntips, tr->bInf); |
---|
1470 | |
---|
1471 | assert(tr->branchCounter == tr->numberOfBranches); |
---|
1472 | |
---|
1473 | if(tr->wasRooted) |
---|
1474 | { |
---|
1475 | assert(tr->leftRootNode->back == tr->rightRootNode); |
---|
1476 | assert(tr->leftRootNode == tr->rightRootNode->back); |
---|
1477 | |
---|
1478 | if(!isTip(tr->leftRootNode->number, tr->mxtips)) |
---|
1479 | { |
---|
1480 | setupJointFormat(tr, tr->leftRootNode->next->back, tr->ntips, tr->bInf, &count); |
---|
1481 | setupJointFormat(tr, tr->leftRootNode->next->next->back, tr->ntips, tr->bInf, &count); |
---|
1482 | } |
---|
1483 | |
---|
1484 | tr->leftRootNode->bInf->epa->jointLabel = count; |
---|
1485 | tr->rootLabel = count; |
---|
1486 | count = count + 1; |
---|
1487 | |
---|
1488 | if(!isTip(tr->rightRootNode->number, tr->mxtips)) |
---|
1489 | { |
---|
1490 | setupJointFormat(tr, tr->rightRootNode->next->back, tr->ntips, tr->bInf, &count); |
---|
1491 | setupJointFormat(tr, tr->rightRootNode->next->next->back, tr->ntips, tr->bInf, &count); |
---|
1492 | } |
---|
1493 | } |
---|
1494 | else |
---|
1495 | { |
---|
1496 | setupJointFormat(tr, originalNode->back, tr->ntips, tr->bInf, &count); |
---|
1497 | setupJointFormat(tr, originalNode->next->back, tr->ntips, tr->bInf, &count); |
---|
1498 | setupJointFormat(tr, originalNode->next->next->back, tr->ntips, tr->bInf, &count); |
---|
1499 | } |
---|
1500 | |
---|
1501 | assert(count == tr->numberOfBranches); |
---|
1502 | } |
---|
1503 | |
---|
1504 | |
---|
1505 | |
---|
1506 | |
---|
1507 | static int infoCompare(const void *p1, const void *p2) |
---|
1508 | { |
---|
1509 | info *rc1 = (info *)p1; |
---|
1510 | info *rc2 = (info *)p2; |
---|
1511 | |
---|
1512 | double i = rc1->lh; |
---|
1513 | double j = rc2->lh; |
---|
1514 | |
---|
1515 | if (i > j) |
---|
1516 | return (-1); |
---|
1517 | if (i < j) |
---|
1518 | return (1); |
---|
1519 | return (0); |
---|
1520 | } |
---|
1521 | |
---|
1522 | static void consolidateInfoMLHeuristics(tree *tr, int throwAwayStart) |
---|
1523 | { |
---|
1524 | int |
---|
1525 | i, |
---|
1526 | j; |
---|
1527 | |
---|
1528 | info |
---|
1529 | *inf = (info*)rax_malloc(sizeof(info) * tr->numberOfBranches); |
---|
1530 | |
---|
1531 | assert(tr->useEpaHeuristics); |
---|
1532 | |
---|
1533 | for(j = 0; j < tr->numberOfTipsForInsertion; j++) |
---|
1534 | { |
---|
1535 | for(i = 0; i < tr->numberOfBranches; i++) |
---|
1536 | { |
---|
1537 | inf[i].lh = tr->bInf[i].epa->likelihoods[j]; |
---|
1538 | inf[i].number = i; |
---|
1539 | } |
---|
1540 | |
---|
1541 | qsort(inf, tr->numberOfBranches, sizeof(info), infoCompare); |
---|
1542 | |
---|
1543 | for(i = throwAwayStart; i < tr->numberOfBranches; i++) |
---|
1544 | tr->bInf[inf[i].number].epa->executeThem[j] = 0; |
---|
1545 | } |
---|
1546 | |
---|
1547 | for(i = 0; i < tr->numberOfBranches; i++) |
---|
1548 | for(j = 0; j < tr->numberOfTipsForInsertion; j++) |
---|
1549 | tr->bInf[i].epa->likelihoods[j] = unlikely; |
---|
1550 | |
---|
1551 | |
---|
1552 | rax_free(inf); |
---|
1553 | } |
---|
1554 | |
---|
1555 | |
---|
1556 | |
---|
1557 | |
---|
1558 | |
---|
1559 | static void consolidateInfo(tree *tr) |
---|
1560 | { |
---|
1561 | int i, j; |
---|
1562 | |
---|
1563 | for(j = 0; j < tr->numberOfTipsForInsertion; j++) |
---|
1564 | { |
---|
1565 | double |
---|
1566 | max = unlikely; |
---|
1567 | |
---|
1568 | int |
---|
1569 | max_index = -1; |
---|
1570 | |
---|
1571 | for(i = 0; i < tr->numberOfBranches; i++) |
---|
1572 | { |
---|
1573 | if(tr->bInf[i].epa->likelihoods[j] > max) |
---|
1574 | { |
---|
1575 | max = tr->bInf[i].epa->likelihoods[j]; |
---|
1576 | max_index = i; |
---|
1577 | } |
---|
1578 | } |
---|
1579 | |
---|
1580 | assert(max_index >= 0); |
---|
1581 | |
---|
1582 | tr->bInf[max_index].epa->countThem[j] = tr->bInf[max_index].epa->countThem[j] + 1; |
---|
1583 | } |
---|
1584 | } |
---|
1585 | |
---|
1586 | |
---|
1587 | |
---|
1588 | |
---|
1589 | |
---|
1590 | |
---|
1591 | #ifdef _PAVLOS |
---|
1592 | |
---|
1593 | static void printPerBranchReadAlignments(tree *tr) |
---|
1594 | { |
---|
1595 | int |
---|
1596 | i, |
---|
1597 | j; |
---|
1598 | |
---|
1599 | for(j = 0; j < tr->numberOfBranches; j++) |
---|
1600 | { |
---|
1601 | int |
---|
1602 | readCounter = 0; |
---|
1603 | |
---|
1604 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
1605 | if(tr->bInf[j].epa->countThem[i] > 0) |
---|
1606 | readCounter++; |
---|
1607 | |
---|
1608 | if(readCounter > 0) |
---|
1609 | { |
---|
1610 | int l, w; |
---|
1611 | |
---|
1612 | char |
---|
1613 | alignmentFileName[2048] = "", |
---|
1614 | buf[64] = ""; |
---|
1615 | |
---|
1616 | FILE |
---|
1617 | *af; |
---|
1618 | |
---|
1619 | strcpy(alignmentFileName, workdir); |
---|
1620 | strcat(alignmentFileName, "RAxML_BranchAlignment.I"); |
---|
1621 | |
---|
1622 | sprintf(buf, "%d", j); |
---|
1623 | |
---|
1624 | strcat(alignmentFileName, buf); |
---|
1625 | |
---|
1626 | af = myfopen(alignmentFileName, "wb"); |
---|
1627 | |
---|
1628 | |
---|
1629 | fprintf(af, "%d %d\n", readCounter, tr->rdta->sites); |
---|
1630 | |
---|
1631 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
1632 | { |
---|
1633 | if(tr->bInf[j].epa->countThem[i] > 0) |
---|
1634 | { |
---|
1635 | unsigned char *tip = tr->yVector[tr->inserts[i]]; |
---|
1636 | |
---|
1637 | fprintf(af, "%s ", tr->nameList[tr->inserts[i]]); |
---|
1638 | |
---|
1639 | for(l = 0; l < tr->cdta->endsite; l++) |
---|
1640 | { |
---|
1641 | for(w = 0; w < tr->cdta->aliaswgt[l]; w++) |
---|
1642 | fprintf(af, "%c", getInverseMeaning(tr->dataVector[l], tip[l])); |
---|
1643 | } |
---|
1644 | |
---|
1645 | fprintf(af, "\n"); |
---|
1646 | } |
---|
1647 | } |
---|
1648 | fclose(af); |
---|
1649 | |
---|
1650 | printBothOpen("Branch read alignment for branch %d written to file %s\n", j, alignmentFileName); |
---|
1651 | } |
---|
1652 | } |
---|
1653 | } |
---|
1654 | |
---|
1655 | |
---|
1656 | #endif |
---|
1657 | |
---|
1658 | static void analyzeReads(tree *tr) |
---|
1659 | { |
---|
1660 | int |
---|
1661 | i, |
---|
1662 | *inserts = tr->inserts; |
---|
1663 | |
---|
1664 | tr->readPartition = (int *)rax_malloc(sizeof(int) * (size_t)tr->numberOfTipsForInsertion); |
---|
1665 | |
---|
1666 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
1667 | { |
---|
1668 | size_t |
---|
1669 | j; |
---|
1670 | |
---|
1671 | int |
---|
1672 | whichPartition = -1, |
---|
1673 | partitionCount = 0, |
---|
1674 | model, |
---|
1675 | nodeNumber = tr->nodep[inserts[i]]->number; |
---|
1676 | |
---|
1677 | unsigned char |
---|
1678 | *tipX1 = tr->yVector[nodeNumber]; |
---|
1679 | |
---|
1680 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
1681 | { |
---|
1682 | int |
---|
1683 | nonGap = 0; |
---|
1684 | |
---|
1685 | unsigned char |
---|
1686 | undetermined = getUndetermined(tr->partitionData[model].dataType); |
---|
1687 | |
---|
1688 | for(j = tr->partitionData[model].lower; j < tr->partitionData[model].upper; j++) |
---|
1689 | { |
---|
1690 | if(tipX1[j] != undetermined) |
---|
1691 | nonGap++; |
---|
1692 | } |
---|
1693 | |
---|
1694 | if(nonGap > 0) |
---|
1695 | { |
---|
1696 | partitionCount++; |
---|
1697 | whichPartition = model; |
---|
1698 | } |
---|
1699 | } |
---|
1700 | |
---|
1701 | assert(partitionCount == 1); |
---|
1702 | assert(whichPartition >= 0 && whichPartition < tr->NumberOfModels); |
---|
1703 | |
---|
1704 | tr->readPartition[i] = whichPartition; |
---|
1705 | } |
---|
1706 | } |
---|
1707 | |
---|
1708 | |
---|
1709 | void classifyML(tree *tr, analdef *adef) |
---|
1710 | { |
---|
1711 | int |
---|
1712 | i, |
---|
1713 | j, |
---|
1714 | *perm; |
---|
1715 | |
---|
1716 | |
---|
1717 | nodeptr |
---|
1718 | r, |
---|
1719 | q; |
---|
1720 | |
---|
1721 | char |
---|
1722 | entropyFileName[1024], |
---|
1723 | jointFormatTreeFileName[1024], |
---|
1724 | labelledTreeFileName[1024], |
---|
1725 | originalLabelledTreeFileName[1024], |
---|
1726 | classificationFileName[1024]; |
---|
1727 | |
---|
1728 | FILE |
---|
1729 | *entropyFile, |
---|
1730 | *treeFile, |
---|
1731 | *classificationFile; |
---|
1732 | |
---|
1733 | adef->outgroup = FALSE; |
---|
1734 | tr->doCutoff = FALSE; |
---|
1735 | |
---|
1736 | assert(adef->restart); |
---|
1737 | |
---|
1738 | tr->numberOfBranches = 2 * tr->ntips - 3; |
---|
1739 | |
---|
1740 | if(tr->perPartitionEPA) |
---|
1741 | printBothOpen("\nRAxML Evolutionary Placement Algorithm for partitioned multi-gene datasets (experimental version)\n"); |
---|
1742 | else |
---|
1743 | printBothOpen("\nRAxML Evolutionary Placement Algorithm\n"); |
---|
1744 | |
---|
1745 | if(adef->useBinaryModelFile) |
---|
1746 | { |
---|
1747 | evaluateGenericInitrav(tr, tr->start); |
---|
1748 | // treeEvaluate(tr, 2); |
---|
1749 | } |
---|
1750 | else |
---|
1751 | { |
---|
1752 | evaluateGenericInitrav(tr, tr->start); |
---|
1753 | |
---|
1754 | modOpt(tr, adef, TRUE, 1.0); |
---|
1755 | } |
---|
1756 | |
---|
1757 | printBothOpen("\nLikelihood of reference tree: %f\n\n", tr->likelihood); |
---|
1758 | |
---|
1759 | perm = (int *)rax_calloc(tr->mxtips + 1, sizeof(int)); |
---|
1760 | tr->inserts = (int *)rax_calloc(tr->mxtips, sizeof(int)); |
---|
1761 | |
---|
1762 | markTips(tr->start, perm, tr->mxtips); |
---|
1763 | markTips(tr->start->back, perm ,tr->mxtips); |
---|
1764 | |
---|
1765 | tr->numberOfTipsForInsertion = 0; |
---|
1766 | |
---|
1767 | for(i = 1; i <= tr->mxtips; i++) |
---|
1768 | { |
---|
1769 | if(perm[i] == 0) |
---|
1770 | { |
---|
1771 | tr->inserts[tr->numberOfTipsForInsertion] = i; |
---|
1772 | tr->numberOfTipsForInsertion = tr->numberOfTipsForInsertion + 1; |
---|
1773 | } |
---|
1774 | } |
---|
1775 | |
---|
1776 | rax_free(perm); |
---|
1777 | |
---|
1778 | printBothOpen("RAxML will place %d Query Sequences into the %d branches of the reference tree with %d taxa\n\n", tr->numberOfTipsForInsertion, (2 * tr->ntips - 3), tr->ntips); |
---|
1779 | |
---|
1780 | assert(tr->numberOfTipsForInsertion == (tr->mxtips - tr->ntips)); |
---|
1781 | |
---|
1782 | tr->bInf = (branchInfo*)rax_malloc(tr->numberOfBranches * sizeof(branchInfo)); |
---|
1783 | |
---|
1784 | for(i = 0; i < tr->numberOfBranches; i++) |
---|
1785 | { |
---|
1786 | tr->bInf[i].epa = (epaBranchData*)rax_malloc(sizeof(epaBranchData)); |
---|
1787 | |
---|
1788 | tr->bInf[i].epa->countThem = (int*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(int)); |
---|
1789 | |
---|
1790 | tr->bInf[i].epa->executeThem = (int*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(int)); |
---|
1791 | |
---|
1792 | for(j = 0; j < tr->numberOfTipsForInsertion; j++) |
---|
1793 | tr->bInf[i].epa->executeThem[j] = 1; |
---|
1794 | |
---|
1795 | tr->bInf[i].epa->branches = (double*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(double)); |
---|
1796 | tr->bInf[i].epa->distalBranches = (double*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(double)); |
---|
1797 | |
---|
1798 | tr->bInf[i].epa->likelihoods = (double*)rax_calloc(tr->numberOfTipsForInsertion, sizeof(double)); |
---|
1799 | tr->bInf[i].epa->branchNumber = i; |
---|
1800 | |
---|
1801 | sprintf(tr->bInf[i].epa->branchLabel, "I%d", i); |
---|
1802 | } |
---|
1803 | |
---|
1804 | r = tr->nodep[(tr->nextnode)++]; |
---|
1805 | |
---|
1806 | |
---|
1807 | q = findAnyTip(tr->start, tr->rdta->numsp); |
---|
1808 | |
---|
1809 | assert(isTip(q->number, tr->rdta->numsp)); |
---|
1810 | assert(!isTip(q->back->number, tr->rdta->numsp)); |
---|
1811 | |
---|
1812 | q = q->back; |
---|
1813 | |
---|
1814 | if(tr->perPartitionEPA) |
---|
1815 | analyzeReads(tr); |
---|
1816 | |
---|
1817 | #ifdef _USE_PTHREADS |
---|
1818 | tr->contiguousVectorLength = getContiguousVectorLength(tr); |
---|
1819 | tr->contiguousScalingLength = getContiguousScalingLength(tr); |
---|
1820 | allocBranchX(tr); |
---|
1821 | masterBarrier(THREAD_INIT_EPA, tr); |
---|
1822 | #endif |
---|
1823 | |
---|
1824 | setupBranchInfo(tr, q); |
---|
1825 | |
---|
1826 | if(tr->useEpaHeuristics) |
---|
1827 | { |
---|
1828 | int |
---|
1829 | heuristicInsertions = MAX(5, (int)(0.5 + (double)(tr->numberOfBranches) * tr->fastEPAthreshold)); |
---|
1830 | |
---|
1831 | printBothOpen("EPA heuristics: determining %d out of %d most promising insertion branches\n", heuristicInsertions, tr->numberOfBranches); |
---|
1832 | |
---|
1833 | #ifdef _USE_PTHREADS |
---|
1834 | NumberOfJobs = tr->numberOfBranches; |
---|
1835 | masterBarrier(THREAD_INSERT_CLASSIFY, tr); |
---|
1836 | #else |
---|
1837 | addTraverseRob(tr, r, q, FALSE); |
---|
1838 | #endif |
---|
1839 | |
---|
1840 | consolidateInfoMLHeuristics(tr, heuristicInsertions); |
---|
1841 | } |
---|
1842 | |
---|
1843 | #ifdef _USE_PTHREADS |
---|
1844 | NumberOfJobs = tr->numberOfBranches; |
---|
1845 | masterBarrier(THREAD_INSERT_CLASSIFY_THOROUGH, tr); |
---|
1846 | #else |
---|
1847 | addTraverseRob(tr, r, q, TRUE); |
---|
1848 | #endif |
---|
1849 | consolidateInfo(tr); |
---|
1850 | |
---|
1851 | printBothOpen("Overall Classification time: %f\n\n", gettime() - masterTime); |
---|
1852 | |
---|
1853 | |
---|
1854 | #ifdef _PAVLOS |
---|
1855 | assert(adef->compressPatterns == FALSE); |
---|
1856 | printPerBranchReadAlignments(tr); |
---|
1857 | #endif |
---|
1858 | |
---|
1859 | strcpy(entropyFileName, workdir); |
---|
1860 | strcpy(jointFormatTreeFileName, workdir); |
---|
1861 | strcpy(labelledTreeFileName, workdir); |
---|
1862 | strcpy(originalLabelledTreeFileName, workdir); |
---|
1863 | strcpy(classificationFileName, workdir); |
---|
1864 | |
---|
1865 | strcat(entropyFileName, "RAxML_entropy."); |
---|
1866 | strcat(jointFormatTreeFileName, "RAxML_portableTree."); |
---|
1867 | strcat(labelledTreeFileName, "RAxML_labelledTree."); |
---|
1868 | strcat(originalLabelledTreeFileName, "RAxML_originalLabelledTree."); |
---|
1869 | strcat(classificationFileName, "RAxML_classification."); |
---|
1870 | |
---|
1871 | strcat(entropyFileName, run_id); |
---|
1872 | strcat(jointFormatTreeFileName, run_id); |
---|
1873 | strcat(labelledTreeFileName, run_id); |
---|
1874 | strcat(originalLabelledTreeFileName, run_id); |
---|
1875 | strcat(classificationFileName, run_id); |
---|
1876 | |
---|
1877 | strcat(jointFormatTreeFileName, ".jplace"); |
---|
1878 | |
---|
1879 | rax_free(tr->tree_string); |
---|
1880 | tr->treeStringLength *= 16; |
---|
1881 | |
---|
1882 | tr->tree_string = (char*)rax_calloc(tr->treeStringLength, sizeof(char)); |
---|
1883 | |
---|
1884 | |
---|
1885 | treeFile = myfopen(labelledTreeFileName, "wb"); |
---|
1886 | Tree2StringClassify(tr->tree_string, tr, tr->inserts, FALSE, FALSE, TRUE); |
---|
1887 | fprintf(treeFile, "%s\n", tr->tree_string); |
---|
1888 | fclose(treeFile); |
---|
1889 | |
---|
1890 | |
---|
1891 | treeFile = myfopen(originalLabelledTreeFileName, "wb"); |
---|
1892 | Tree2StringClassify(tr->tree_string, tr, tr->inserts, TRUE, FALSE, TRUE); |
---|
1893 | fprintf(treeFile, "%s\n", tr->tree_string); |
---|
1894 | fclose(treeFile); |
---|
1895 | |
---|
1896 | /* |
---|
1897 | JSON format only works for sequential version |
---|
1898 | porting this to Pthreads will be a pain in the ass |
---|
1899 | |
---|
1900 | */ |
---|
1901 | |
---|
1902 | treeFile = myfopen(jointFormatTreeFileName, "wb"); |
---|
1903 | Tree2StringClassify(tr->tree_string, tr, tr->inserts, TRUE, TRUE, TRUE); |
---|
1904 | |
---|
1905 | fprintf(treeFile, "{\n"); |
---|
1906 | fprintf(treeFile, "\t\"tree\": \"%s\", \n", tr->tree_string); |
---|
1907 | fprintf(treeFile, "\t\"placements\": [\n"); |
---|
1908 | |
---|
1909 | { |
---|
1910 | info |
---|
1911 | *inf = (info*)rax_malloc(sizeof(info) * tr->numberOfBranches); |
---|
1912 | |
---|
1913 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
1914 | { |
---|
1915 | double |
---|
1916 | maxprob = 0.0, |
---|
1917 | lmax = 0.0, |
---|
1918 | acc = 0.0; |
---|
1919 | |
---|
1920 | int |
---|
1921 | validEntries = 0; |
---|
1922 | |
---|
1923 | for(j = 0; j < tr->numberOfBranches; j++) |
---|
1924 | { |
---|
1925 | inf[j].lh = tr->bInf[j].epa->likelihoods[i]; |
---|
1926 | inf[j].pendantBranch = tr->bInf[j].epa->branches[i]; |
---|
1927 | inf[j].distalBranch = tr->bInf[j].epa->distalBranches[i]; |
---|
1928 | inf[j].number = tr->bInf[j].epa->jointLabel; |
---|
1929 | } |
---|
1930 | |
---|
1931 | qsort(inf, tr->numberOfBranches, sizeof(info), infoCompare); |
---|
1932 | |
---|
1933 | for(j = 0; j < tr->numberOfBranches; j++) |
---|
1934 | if(inf[j].lh == unlikely) |
---|
1935 | break; |
---|
1936 | else |
---|
1937 | validEntries++; |
---|
1938 | |
---|
1939 | assert(validEntries > 0); |
---|
1940 | |
---|
1941 | j = 0; |
---|
1942 | |
---|
1943 | lmax = inf[0].lh; |
---|
1944 | |
---|
1945 | fprintf(treeFile, "\t{\"p\":["); |
---|
1946 | |
---|
1947 | /* |
---|
1948 | Erick's cutoff: |
---|
1949 | |
---|
1950 | I keep at most 7 placements and throw away anything that has less than |
---|
1951 | 0.01*best_ml_ratio. |
---|
1952 | |
---|
1953 | my old cutoff was at 0.95 accumulated likelihood weight: |
---|
1954 | |
---|
1955 | while(acc <= 0.95) |
---|
1956 | */ |
---|
1957 | |
---|
1958 | /*#define _ALL_ENTRIES*/ |
---|
1959 | |
---|
1960 | #ifdef _ALL_ENTRIES |
---|
1961 | assert(validEntries == tr->numberOfBranches); |
---|
1962 | while(j < validEntries) |
---|
1963 | #else |
---|
1964 | while(j < validEntries && j < 7) |
---|
1965 | #endif |
---|
1966 | { |
---|
1967 | int |
---|
1968 | k; |
---|
1969 | |
---|
1970 | double |
---|
1971 | all = 0.0, |
---|
1972 | prob = 0.0; |
---|
1973 | |
---|
1974 | for(k = 0; k < validEntries; k++) |
---|
1975 | all += exp(inf[k].lh - lmax); |
---|
1976 | |
---|
1977 | acc += (prob = (exp(inf[j].lh - lmax) / all)); |
---|
1978 | |
---|
1979 | if(j == 0) |
---|
1980 | maxprob = prob; |
---|
1981 | #ifndef _ALL_ENTRIES |
---|
1982 | if(prob >= maxprob * 0.01) |
---|
1983 | #endif |
---|
1984 | { |
---|
1985 | if(j > 0) |
---|
1986 | { |
---|
1987 | if(tr->wasRooted && inf[j].number == tr->rootLabel) |
---|
1988 | { |
---|
1989 | double |
---|
1990 | b = getBranch(tr, tr->leftRootNode->z, tr->rightRootNode->z); |
---|
1991 | |
---|
1992 | if(inf[j].distalBranch > 0.5 * b) |
---|
1993 | fprintf(treeFile, ",[%d, %f, %f, %f, %f]", tr->numberOfBranches, inf[j].lh, prob, inf[j].distalBranch - 0.5 * b, inf[j].pendantBranch); |
---|
1994 | else |
---|
1995 | fprintf(treeFile, ",[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, 0.5 * b - inf[j].distalBranch, inf[j].pendantBranch); |
---|
1996 | } |
---|
1997 | else |
---|
1998 | fprintf(treeFile, ",[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, inf[j].distalBranch, inf[j].pendantBranch); |
---|
1999 | } |
---|
2000 | else |
---|
2001 | { |
---|
2002 | if(tr->wasRooted && inf[j].number == tr->rootLabel) |
---|
2003 | { |
---|
2004 | double |
---|
2005 | b = getBranch(tr, tr->leftRootNode->z, tr->rightRootNode->z); |
---|
2006 | |
---|
2007 | if(inf[j].distalBranch > 0.5 * b) |
---|
2008 | fprintf(treeFile, "[%d, %f, %f, %f, %f]", tr->numberOfBranches, inf[j].lh, prob, inf[j].distalBranch - 0.5 * b, inf[j].pendantBranch); |
---|
2009 | else |
---|
2010 | fprintf(treeFile, "[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, 0.5 * b - inf[j].distalBranch, inf[j].pendantBranch); |
---|
2011 | } |
---|
2012 | else |
---|
2013 | fprintf(treeFile, "[%d, %f, %f, %f, %f]", inf[j].number, inf[j].lh, prob, inf[j].distalBranch, inf[j].pendantBranch); |
---|
2014 | } |
---|
2015 | } |
---|
2016 | |
---|
2017 | j++; |
---|
2018 | } |
---|
2019 | |
---|
2020 | if(i == tr->numberOfTipsForInsertion - 1) |
---|
2021 | fprintf(treeFile, "], \"n\":[\"%s\"]}\n", tr->nameList[tr->inserts[i]]); |
---|
2022 | else |
---|
2023 | fprintf(treeFile, "], \"n\":[\"%s\"]},\n", tr->nameList[tr->inserts[i]]); |
---|
2024 | } |
---|
2025 | |
---|
2026 | rax_free(inf); |
---|
2027 | } |
---|
2028 | |
---|
2029 | #ifdef _ALL_ENTRIES |
---|
2030 | assert(j == tr->numberOfBranches); |
---|
2031 | #endif |
---|
2032 | |
---|
2033 | fprintf(treeFile, "\t ],\n"); |
---|
2034 | fprintf(treeFile, "\t\"metadata\": {\"invocation\": "); |
---|
2035 | |
---|
2036 | fprintf(treeFile, "\""); |
---|
2037 | |
---|
2038 | { |
---|
2039 | int i; |
---|
2040 | |
---|
2041 | for(i = 0; i < globalArgc; i++) |
---|
2042 | fprintf(treeFile,"%s ", globalArgv[i]); |
---|
2043 | } |
---|
2044 | fprintf(treeFile, "\", \"raxml_version\": \"%s\"", programVersion); |
---|
2045 | fprintf(treeFile,"},\n"); |
---|
2046 | |
---|
2047 | fprintf(treeFile, "\t\"version\": 2,\n"); |
---|
2048 | fprintf(treeFile, "\t\"fields\": [\n"); |
---|
2049 | fprintf(treeFile, "\t\"edge_num\", \"likelihood\", \"like_weight_ratio\", \"distal_length\", \n"); |
---|
2050 | fprintf(treeFile, "\t\"pendant_length\"\n"); |
---|
2051 | fprintf(treeFile, "\t]\n"); |
---|
2052 | fprintf(treeFile, "}\n"); |
---|
2053 | |
---|
2054 | fclose(treeFile); |
---|
2055 | |
---|
2056 | /* JSON format end */ |
---|
2057 | |
---|
2058 | classificationFile = myfopen(classificationFileName, "wb"); |
---|
2059 | |
---|
2060 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
2061 | for(j = 0; j < tr->numberOfBranches; j++) |
---|
2062 | { |
---|
2063 | if(tr->bInf[j].epa->countThem[i] > 0) |
---|
2064 | fprintf(classificationFile, "%s I%d %d %8.20f\n", tr->nameList[tr->inserts[i]], j, tr->bInf[j].epa->countThem[i], |
---|
2065 | tr->bInf[j].epa->branches[i] / (double)(tr->bInf[j].epa->countThem[i])); |
---|
2066 | } |
---|
2067 | |
---|
2068 | fclose(classificationFile); |
---|
2069 | |
---|
2070 | |
---|
2071 | printBothOpen("\n\nLabelled reference tree including branch labels and query sequences written to file: %s\n\n", labelledTreeFileName); |
---|
2072 | printBothOpen("Labelled reference tree with branch labels (without query sequences) written to file: %s\n\n", originalLabelledTreeFileName); |
---|
2073 | printBothOpen("Labelled reference tree with branch labels in portable pplacer/EPA format (without query sequences) written to file: %s\n\n", jointFormatTreeFileName); |
---|
2074 | printBothOpen("Classification result file written to file: %s\n\n", classificationFileName); |
---|
2075 | |
---|
2076 | |
---|
2077 | |
---|
2078 | { |
---|
2079 | info |
---|
2080 | *inf = (info*)rax_malloc(sizeof(info) * tr->numberOfBranches); |
---|
2081 | |
---|
2082 | FILE |
---|
2083 | *likelihoodWeightsFile; |
---|
2084 | |
---|
2085 | char |
---|
2086 | likelihoodWeightsFileName[1024]; |
---|
2087 | |
---|
2088 | strcpy(likelihoodWeightsFileName, workdir); |
---|
2089 | strcat(likelihoodWeightsFileName, "RAxML_classificationLikelihoodWeights."); |
---|
2090 | strcat(likelihoodWeightsFileName, run_id); |
---|
2091 | |
---|
2092 | likelihoodWeightsFile = myfopen(likelihoodWeightsFileName, "wb"); |
---|
2093 | entropyFile = myfopen(entropyFileName, "wb"); |
---|
2094 | |
---|
2095 | for(i = 0; i < tr->numberOfTipsForInsertion; i++) |
---|
2096 | { |
---|
2097 | double |
---|
2098 | entropy = 0.0, |
---|
2099 | lmax = 0.0, |
---|
2100 | acc = 0.0; |
---|
2101 | |
---|
2102 | int |
---|
2103 | validEntries = 0; |
---|
2104 | |
---|
2105 | for(j = 0; j < tr->numberOfBranches; j++) |
---|
2106 | { |
---|
2107 | inf[j].lh = tr->bInf[j].epa->likelihoods[i]; |
---|
2108 | inf[j].number = j; |
---|
2109 | } |
---|
2110 | |
---|
2111 | qsort(inf, tr->numberOfBranches, sizeof(info), infoCompare); |
---|
2112 | |
---|
2113 | for(j = 0; j < tr->numberOfBranches; j++) |
---|
2114 | if(inf[j].lh == unlikely) |
---|
2115 | break; |
---|
2116 | else |
---|
2117 | validEntries++; |
---|
2118 | |
---|
2119 | assert(validEntries > 0); |
---|
2120 | |
---|
2121 | j = 0; |
---|
2122 | |
---|
2123 | lmax = inf[0].lh; |
---|
2124 | |
---|
2125 | while(acc <= 0.95 && j < validEntries) |
---|
2126 | { |
---|
2127 | int |
---|
2128 | k; |
---|
2129 | |
---|
2130 | double |
---|
2131 | all = 0.0, |
---|
2132 | prob = 0.0; |
---|
2133 | |
---|
2134 | for(k = 0; k < validEntries; k++) |
---|
2135 | all += exp(inf[k].lh - lmax); |
---|
2136 | |
---|
2137 | acc += (prob = (exp(inf[j].lh - lmax) / all)); |
---|
2138 | |
---|
2139 | fprintf(likelihoodWeightsFile, "%s I%d %f %f\n", tr->nameList[tr->inserts[i]], inf[j].number, prob, acc); |
---|
2140 | |
---|
2141 | j++; |
---|
2142 | } |
---|
2143 | |
---|
2144 | j = 0; |
---|
2145 | |
---|
2146 | while(j < validEntries) |
---|
2147 | { |
---|
2148 | int |
---|
2149 | k; |
---|
2150 | |
---|
2151 | double |
---|
2152 | all = 0.0, |
---|
2153 | prob = 0.0; |
---|
2154 | |
---|
2155 | for(k = 0; k < validEntries; k++) |
---|
2156 | all += exp(inf[k].lh - lmax); |
---|
2157 | |
---|
2158 | prob = exp(inf[j].lh - lmax) / all; |
---|
2159 | |
---|
2160 | if(prob > 0) |
---|
2161 | entropy -= ( prob * log(prob) ); /*pp 20110531 */ |
---|
2162 | |
---|
2163 | j++; |
---|
2164 | } |
---|
2165 | |
---|
2166 | /* normalize entropy by dividing with the log(validEntries) which is the maximum Entropy possible */ |
---|
2167 | |
---|
2168 | fprintf(entropyFile, "%s\t%f\n", tr->nameList[tr->inserts[i]], entropy / log((double)validEntries)); |
---|
2169 | } |
---|
2170 | |
---|
2171 | rax_free(inf); |
---|
2172 | |
---|
2173 | fclose(entropyFile); |
---|
2174 | fclose(likelihoodWeightsFile); |
---|
2175 | |
---|
2176 | printBothOpen("Classification result file using likelihood weights written to file: %s\n\n", likelihoodWeightsFileName); |
---|
2177 | printBothOpen("Classification entropy result file using likelihood weights written to file: %s\n\n", entropyFileName); |
---|
2178 | } |
---|
2179 | |
---|
2180 | exit(0); |
---|
2181 | } |
---|
2182 | |
---|
2183 | |
---|