1 | /* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees |
---|
2 | * Copyright August 2006 by Alexandros Stamatakis |
---|
3 | * |
---|
4 | * Partially derived from |
---|
5 | * fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen |
---|
6 | * |
---|
7 | * and |
---|
8 | * |
---|
9 | * Programs of the PHYLIP package by Joe Felsenstein. |
---|
10 | * |
---|
11 | * This program is free software; you may redistribute it and/or modify its |
---|
12 | * under the terms of the GNU General Public License as published by the Free |
---|
13 | * Software Foundation; either version 2 of the License, or (at your option) |
---|
14 | * any later version. |
---|
15 | * |
---|
16 | * This program is distributed in the hope that it will be useful, but |
---|
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
18 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
19 | * for more details. |
---|
20 | * |
---|
21 | * |
---|
22 | * For any other enquiries send an Email to Alexandros Stamatakis |
---|
23 | * Alexandros.Stamatakis@epfl.ch |
---|
24 | * |
---|
25 | * When publishing work that is based on the results from RAxML-VI-HPC please cite: |
---|
26 | * |
---|
27 | * Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". |
---|
28 | * Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446 |
---|
29 | */ |
---|
30 | |
---|
31 | #ifndef WIN32 |
---|
32 | #include <sys/times.h> |
---|
33 | #include <sys/types.h> |
---|
34 | #include <sys/time.h> |
---|
35 | #include <unistd.h> |
---|
36 | #endif |
---|
37 | |
---|
38 | #include <math.h> |
---|
39 | #include <time.h> |
---|
40 | #include <stdlib.h> |
---|
41 | #include <stdio.h> |
---|
42 | #include <ctype.h> |
---|
43 | #include <string.h> |
---|
44 | |
---|
45 | #include "axml.h" |
---|
46 | |
---|
47 | #define POSTERIOR_THRESHOLD 0.90 |
---|
48 | |
---|
49 | extern int Thorough; |
---|
50 | extern double masterTime; |
---|
51 | |
---|
52 | |
---|
53 | extern char permFileName[1024], resultFileName[1024], |
---|
54 | logFileName[1024], checkpointFileName[1024], infoFileName[1024], run_id[128], workdir[1024], bootStrapFile[1024], bootstrapFileName[1024], |
---|
55 | bipartitionsFileName[1024],bipartitionsFileNameBranchLabels[1024]; |
---|
56 | |
---|
57 | |
---|
58 | |
---|
59 | typedef struct |
---|
60 | { |
---|
61 | nodeptr p; |
---|
62 | double lh; |
---|
63 | } scores; |
---|
64 | |
---|
65 | |
---|
66 | typedef struct |
---|
67 | { |
---|
68 | scores *s; |
---|
69 | int count; |
---|
70 | int maxCount; |
---|
71 | } insertions; |
---|
72 | |
---|
73 | static void addInsertion(nodeptr p, double lh, insertions *ins) |
---|
74 | { |
---|
75 | if(ins->count < ins->maxCount) |
---|
76 | { |
---|
77 | ins->s[ins->count].lh = lh; |
---|
78 | ins->s[ins->count].p = p; |
---|
79 | ins->count = ins->count + 1; |
---|
80 | } |
---|
81 | else |
---|
82 | { |
---|
83 | ins->s = rax_realloc(ins->s, sizeof(scores) * ins->maxCount * 2, FALSE); |
---|
84 | |
---|
85 | ins->maxCount *= 2; |
---|
86 | |
---|
87 | ins->s[ins->count].lh = lh; |
---|
88 | ins->s[ins->count].p = p; |
---|
89 | ins->count = ins->count + 1; |
---|
90 | } |
---|
91 | } |
---|
92 | |
---|
93 | |
---|
94 | /* the two functions below just compute the subtree insertion likelihood score into |
---|
95 | a branch using the thorough method. |
---|
96 | */ |
---|
97 | |
---|
98 | static void insertFast (tree *tr, nodeptr p, nodeptr q, int numBranches) |
---|
99 | { |
---|
100 | nodeptr r, s; |
---|
101 | int i; |
---|
102 | |
---|
103 | r = q->back; |
---|
104 | s = p->back; |
---|
105 | |
---|
106 | for(i = 0; i < numBranches; i++) |
---|
107 | tr->lzi[i] = q->z[i]; |
---|
108 | |
---|
109 | if(Thorough) |
---|
110 | { |
---|
111 | double zqr[NUM_BRANCHES], zqs[NUM_BRANCHES], zrs[NUM_BRANCHES], lzqr, lzqs, lzrs, lzsum, lzq, lzr, lzs, lzmax; |
---|
112 | double defaultArray[NUM_BRANCHES]; |
---|
113 | double e1[NUM_BRANCHES], e2[NUM_BRANCHES], e3[NUM_BRANCHES]; |
---|
114 | double *qz; |
---|
115 | |
---|
116 | qz = q->z; |
---|
117 | |
---|
118 | for(i = 0; i < numBranches; i++) |
---|
119 | defaultArray[i] = defaultz; |
---|
120 | |
---|
121 | makenewzGeneric(tr, q, r, qz, iterations, zqr, FALSE); |
---|
122 | makenewzGeneric(tr, q, s, defaultArray, iterations, zqs, FALSE); |
---|
123 | makenewzGeneric(tr, r, s, defaultArray, iterations, zrs, FALSE); |
---|
124 | |
---|
125 | |
---|
126 | for(i = 0; i < numBranches; i++) |
---|
127 | { |
---|
128 | lzqr = (zqr[i] > zmin) ? log(zqr[i]) : log(zmin); |
---|
129 | lzqs = (zqs[i] > zmin) ? log(zqs[i]) : log(zmin); |
---|
130 | lzrs = (zrs[i] > zmin) ? log(zrs[i]) : log(zmin); |
---|
131 | lzsum = 0.5 * (lzqr + lzqs + lzrs); |
---|
132 | |
---|
133 | lzq = lzsum - lzrs; |
---|
134 | lzr = lzsum - lzqs; |
---|
135 | lzs = lzsum - lzqr; |
---|
136 | lzmax = log(zmax); |
---|
137 | |
---|
138 | if (lzq > lzmax) {lzq = lzmax; lzr = lzqr; lzs = lzqs;} |
---|
139 | else if (lzr > lzmax) {lzr = lzmax; lzq = lzqr; lzs = lzrs;} |
---|
140 | else if (lzs > lzmax) {lzs = lzmax; lzq = lzqs; lzr = lzrs;} |
---|
141 | |
---|
142 | e1[i] = exp(lzq); |
---|
143 | e2[i] = exp(lzr); |
---|
144 | e3[i] = exp(lzs); |
---|
145 | } |
---|
146 | hookup(p->next, q, e1, numBranches); |
---|
147 | hookup(p->next->next, r, e2, numBranches); |
---|
148 | hookup(p, s, e3, numBranches); |
---|
149 | } |
---|
150 | else |
---|
151 | { |
---|
152 | double z[NUM_BRANCHES]; |
---|
153 | |
---|
154 | for(i = 0; i < numBranches; i++) |
---|
155 | { |
---|
156 | z[i] = sqrt(q->z[i]); |
---|
157 | |
---|
158 | if(z[i] < zmin) |
---|
159 | z[i] = zmin; |
---|
160 | if(z[i] > zmax) |
---|
161 | z[i] = zmax; |
---|
162 | } |
---|
163 | |
---|
164 | hookup(p->next, q, z, tr->numBranches); |
---|
165 | hookup(p->next->next, r, z, tr->numBranches); |
---|
166 | } |
---|
167 | |
---|
168 | newviewGeneric(tr, p); |
---|
169 | |
---|
170 | if(Thorough) |
---|
171 | { |
---|
172 | localSmooth(tr, p, smoothings); |
---|
173 | |
---|
174 | for(i = 0; i < numBranches; i++) |
---|
175 | { |
---|
176 | tr->lzq[i] = p->next->z[i]; |
---|
177 | tr->lzr[i] = p->next->next->z[i]; |
---|
178 | tr->lzs[i] = p->z[i]; |
---|
179 | } |
---|
180 | } |
---|
181 | } |
---|
182 | |
---|
183 | |
---|
184 | |
---|
185 | |
---|
186 | static double testInsertFast (tree *tr, nodeptr p, nodeptr q, insertions *ins, boolean veryFast) |
---|
187 | { |
---|
188 | double qz[NUM_BRANCHES], pz[NUM_BRANCHES]; |
---|
189 | nodeptr r, s; |
---|
190 | double LH; |
---|
191 | int i; |
---|
192 | |
---|
193 | r = q->back; |
---|
194 | |
---|
195 | for(i = 0; i < tr->numBranches; i++) |
---|
196 | { |
---|
197 | qz[i] = q->z[i]; |
---|
198 | pz[i] = p->z[i]; |
---|
199 | } |
---|
200 | |
---|
201 | insertFast(tr, p, q, tr->numBranches); |
---|
202 | |
---|
203 | evaluateGeneric(tr, p->next->next); |
---|
204 | |
---|
205 | addInsertion(q, tr->likelihood, ins); |
---|
206 | |
---|
207 | |
---|
208 | if(veryFast) |
---|
209 | if(tr->likelihood > tr->endLH) |
---|
210 | { |
---|
211 | tr->insertNode = q; |
---|
212 | tr->removeNode = p; |
---|
213 | for(i = 0; i < tr->numBranches; i++) |
---|
214 | tr->currentZQR[i] = tr->zqr[i]; |
---|
215 | tr->endLH = tr->likelihood; |
---|
216 | } |
---|
217 | |
---|
218 | LH = tr->likelihood; |
---|
219 | |
---|
220 | hookup(q, r, qz, tr->numBranches); |
---|
221 | |
---|
222 | p->next->next->back = p->next->back = (nodeptr) NULL; |
---|
223 | |
---|
224 | if(Thorough) |
---|
225 | { |
---|
226 | s = p->back; |
---|
227 | hookup(p, s, pz, tr->numBranches); |
---|
228 | } |
---|
229 | |
---|
230 | return LH; |
---|
231 | } |
---|
232 | |
---|
233 | static double testInsertCandidates(tree *tr, nodeptr p, nodeptr q) |
---|
234 | { |
---|
235 | double qz[NUM_BRANCHES], pz[NUM_BRANCHES]; |
---|
236 | nodeptr r, s; |
---|
237 | double LH; |
---|
238 | int i; |
---|
239 | |
---|
240 | r = q->back; |
---|
241 | |
---|
242 | for(i = 0; i < tr->numBranches; i++) |
---|
243 | { |
---|
244 | qz[i] = q->z[i]; |
---|
245 | pz[i] = p->z[i]; |
---|
246 | } |
---|
247 | |
---|
248 | insertFast(tr, p, q, tr->numBranches); |
---|
249 | |
---|
250 | evaluateGeneric(tr, p->next->next); |
---|
251 | |
---|
252 | if(tr->likelihood > tr->endLH) |
---|
253 | { |
---|
254 | tr->insertNode = q; |
---|
255 | tr->removeNode = p; |
---|
256 | for(i = 0; i < tr->numBranches; i++) |
---|
257 | tr->currentZQR[i] = tr->zqr[i]; |
---|
258 | tr->endLH = tr->likelihood; |
---|
259 | } |
---|
260 | |
---|
261 | LH = tr->likelihood; |
---|
262 | |
---|
263 | hookup(q, r, qz, tr->numBranches); |
---|
264 | |
---|
265 | p->next->next->back = p->next->back = (nodeptr) NULL; |
---|
266 | |
---|
267 | if(Thorough) |
---|
268 | { |
---|
269 | s = p->back; |
---|
270 | hookup(p, s, pz, tr->numBranches); |
---|
271 | } |
---|
272 | |
---|
273 | return LH; |
---|
274 | } |
---|
275 | |
---|
276 | |
---|
277 | /* |
---|
278 | function below just computes all subtree roots, |
---|
279 | we go to every inner node and store the three outgoing subtree |
---|
280 | roots. Assumes evidently that nodeptr p that is passed to the first |
---|
281 | instance of this recursion is not a tip ;-) |
---|
282 | */ |
---|
283 | |
---|
284 | static void getSubtreeRoots(nodeptr p, nodeptr *ptr, int *count, int mxtips) |
---|
285 | { |
---|
286 | if(isTip(p->number, mxtips)) |
---|
287 | return; |
---|
288 | |
---|
289 | ptr[*count] = p; |
---|
290 | *count = *count + 1; |
---|
291 | |
---|
292 | ptr[*count] = p->next; |
---|
293 | *count = *count + 1; |
---|
294 | |
---|
295 | ptr[*count] = p->next->next; |
---|
296 | *count = *count + 1; |
---|
297 | |
---|
298 | getSubtreeRoots(p->next->back, ptr, count, mxtips); |
---|
299 | getSubtreeRoots(p->next->next->back, ptr, count, mxtips); |
---|
300 | } |
---|
301 | |
---|
302 | |
---|
303 | /* |
---|
304 | conducts linear SPRs, computes the approximate likelihood score |
---|
305 | fo an insertion of the subtree being rearranged into the left and |
---|
306 | right branch. Depending on the score it then descends into |
---|
307 | either the right or the left tree. |
---|
308 | |
---|
309 | I also added a radius that limits the number of branches away from the original pruning position into which the tree |
---|
310 | will be inserted. |
---|
311 | |
---|
312 | We actually need to test empirically what a good setting may be ! |
---|
313 | */ |
---|
314 | |
---|
315 | |
---|
316 | static void insertBeyond(tree *tr, nodeptr p, nodeptr q, int radius, insertions *ins, boolean veryFast) |
---|
317 | { |
---|
318 | if(radius > 0) |
---|
319 | { |
---|
320 | int count = 0; |
---|
321 | |
---|
322 | double |
---|
323 | twoScores[2]; |
---|
324 | |
---|
325 | twoScores[0] = unlikely; |
---|
326 | twoScores[1] = unlikely; |
---|
327 | |
---|
328 | if(isTip(q->next->back->number, tr->mxtips) && isTip(q->next->next->back->number, tr->mxtips)) |
---|
329 | return; |
---|
330 | |
---|
331 | if(!isTip(q->next->back->number, tr->mxtips)) |
---|
332 | { |
---|
333 | twoScores[0] = testInsertFast(tr, p, q->next->back, ins, veryFast); |
---|
334 | count++; |
---|
335 | } |
---|
336 | |
---|
337 | if(!isTip(q->next->next->back->number, tr->mxtips)) |
---|
338 | { |
---|
339 | twoScores[1] = testInsertFast(tr, p, q->next->next->back, ins, veryFast); |
---|
340 | count++; |
---|
341 | } |
---|
342 | |
---|
343 | if(count == 2 && !veryFast) |
---|
344 | { |
---|
345 | double |
---|
346 | weight; |
---|
347 | |
---|
348 | if(twoScores[0] > twoScores[1]) |
---|
349 | { |
---|
350 | weight = exp(twoScores[0] - twoScores[0]) / (exp(twoScores[0] - twoScores[0]) + exp(twoScores[1] - twoScores[0])); |
---|
351 | |
---|
352 | if(weight >= POSTERIOR_THRESHOLD) |
---|
353 | insertBeyond(tr, p, q->next->back, radius - 1, ins, veryFast); |
---|
354 | else |
---|
355 | { |
---|
356 | insertBeyond(tr, p, q->next->back, radius - 1, ins, veryFast); |
---|
357 | insertBeyond(tr, p, q->next->next->back, radius - 1, ins, veryFast); |
---|
358 | } |
---|
359 | } |
---|
360 | else |
---|
361 | { |
---|
362 | weight = exp(twoScores[1] - twoScores[1]) / (exp(twoScores[1] - twoScores[0]) + exp(twoScores[1] - twoScores[1])); |
---|
363 | |
---|
364 | if(weight >= POSTERIOR_THRESHOLD) |
---|
365 | insertBeyond(tr, p, q->next->next->back, radius - 1, ins, veryFast); |
---|
366 | else |
---|
367 | { |
---|
368 | insertBeyond(tr, p, q->next->back, radius - 1, ins, veryFast); |
---|
369 | insertBeyond(tr, p, q->next->next->back, radius - 1, ins, veryFast); |
---|
370 | } |
---|
371 | } |
---|
372 | } |
---|
373 | else |
---|
374 | { |
---|
375 | if(twoScores[0] > twoScores[1]) |
---|
376 | insertBeyond(tr, p, q->next->back, radius - 1, ins, veryFast); |
---|
377 | else |
---|
378 | insertBeyond(tr, p, q->next->next->back, radius - 1, ins, veryFast); |
---|
379 | } |
---|
380 | } |
---|
381 | |
---|
382 | } |
---|
383 | |
---|
384 | |
---|
385 | |
---|
386 | typedef struct |
---|
387 | { |
---|
388 | int direction; |
---|
389 | double likelihood; |
---|
390 | |
---|
391 | } fourLikelihoods; |
---|
392 | |
---|
393 | |
---|
394 | |
---|
395 | |
---|
396 | |
---|
397 | |
---|
398 | static int fourCompare(const void *p1, const void *p2) |
---|
399 | { |
---|
400 | fourLikelihoods *rc1 = (fourLikelihoods *)p1; |
---|
401 | fourLikelihoods *rc2 = (fourLikelihoods *)p2; |
---|
402 | |
---|
403 | double i = rc1->likelihood; |
---|
404 | double j = rc2->likelihood; |
---|
405 | |
---|
406 | if (i > j) |
---|
407 | return (-1); |
---|
408 | if (i < j) |
---|
409 | return (1); |
---|
410 | return (0); |
---|
411 | } |
---|
412 | |
---|
413 | static int scoreCompare(const void *p1, const void *p2) |
---|
414 | { |
---|
415 | scores *rc1 = (scores *)p1; |
---|
416 | scores *rc2 = (scores *)p2; |
---|
417 | |
---|
418 | double i = rc1->lh; |
---|
419 | double j = rc2->lh; |
---|
420 | |
---|
421 | if (i > j) |
---|
422 | return (-1); |
---|
423 | if (i < j) |
---|
424 | return (1); |
---|
425 | return (0); |
---|
426 | } |
---|
427 | |
---|
428 | |
---|
429 | |
---|
430 | |
---|
431 | static double linearSPRs(tree *tr, int radius, boolean veryFast) |
---|
432 | { |
---|
433 | int |
---|
434 | numberOfSubtrees = (tr->mxtips - 2) * 3, |
---|
435 | count = 0, |
---|
436 | k, |
---|
437 | i; |
---|
438 | |
---|
439 | double |
---|
440 | fourScores[4]; |
---|
441 | |
---|
442 | nodeptr |
---|
443 | *ptr = (nodeptr *)rax_malloc(sizeof(nodeptr) * numberOfSubtrees); |
---|
444 | |
---|
445 | fourLikelihoods |
---|
446 | *fourLi = (fourLikelihoods *)rax_malloc(sizeof(fourLikelihoods) * 4); |
---|
447 | |
---|
448 | insertions |
---|
449 | *ins = (insertions*)rax_malloc(sizeof(insertions)); |
---|
450 | |
---|
451 | |
---|
452 | |
---|
453 | ins->count = 0; |
---|
454 | ins->maxCount = 2048; |
---|
455 | |
---|
456 | ins->s = (scores *)rax_malloc(sizeof(scores) * ins->maxCount); |
---|
457 | |
---|
458 | |
---|
459 | /* recursively compute the roots of all subtrees in the current tree |
---|
460 | and store them in ptr */ |
---|
461 | |
---|
462 | getSubtreeRoots(tr->start->back, ptr, &count, tr->mxtips); |
---|
463 | |
---|
464 | assert(count == numberOfSubtrees); |
---|
465 | |
---|
466 | tr->startLH = tr->endLH = tr->likelihood; |
---|
467 | |
---|
468 | /* loop over subtrees, i.e., execute a full SPR cycle */ |
---|
469 | |
---|
470 | for(i = 0; i < numberOfSubtrees; i++) |
---|
471 | { |
---|
472 | nodeptr |
---|
473 | p = ptr[i], |
---|
474 | p1 = p->next->back, |
---|
475 | p2 = p->next->next->back; |
---|
476 | |
---|
477 | double |
---|
478 | p1z[NUM_BRANCHES], |
---|
479 | p2z[NUM_BRANCHES]; |
---|
480 | |
---|
481 | ins->count = 0; |
---|
482 | |
---|
483 | /*printf("Node %d %d\n", p->number, i);*/ |
---|
484 | |
---|
485 | tr->bestOfNode = unlikely; |
---|
486 | |
---|
487 | for(k = 0; k < 4; k++) |
---|
488 | fourScores[k] = unlikely; |
---|
489 | |
---|
490 | assert(!isTip(p->number, tr->rdta->numsp)); |
---|
491 | |
---|
492 | if(!isTip(p1->number, tr->rdta->numsp) || !isTip(p2->number, tr->rdta->numsp)) |
---|
493 | { |
---|
494 | double |
---|
495 | max = unlikely; |
---|
496 | |
---|
497 | int |
---|
498 | maxInt = -1; |
---|
499 | |
---|
500 | for(k = 0; k < tr->numBranches; k++) |
---|
501 | { |
---|
502 | p1z[k] = p1->z[k]; |
---|
503 | p2z[k] = p2->z[k]; |
---|
504 | } |
---|
505 | |
---|
506 | /* remove the current subtree */ |
---|
507 | |
---|
508 | removeNodeBIG(tr, p, tr->numBranches); |
---|
509 | |
---|
510 | /* pre score with fast insertions */ |
---|
511 | if(veryFast) |
---|
512 | Thorough = 1; |
---|
513 | else |
---|
514 | Thorough = 0; |
---|
515 | |
---|
516 | if (!isTip(p1->number, tr->rdta->numsp)) |
---|
517 | { |
---|
518 | fourScores[0] = testInsertFast(tr, p, p1->next->back, ins, veryFast); |
---|
519 | fourScores[1] = testInsertFast(tr, p, p1->next->next->back, ins, veryFast); |
---|
520 | } |
---|
521 | |
---|
522 | if (!isTip(p2->number, tr->rdta->numsp)) |
---|
523 | { |
---|
524 | fourScores[2] = testInsertFast(tr, p, p2->next->back, ins, veryFast); |
---|
525 | fourScores[3] = testInsertFast(tr, p, p2->next->next->back, ins, veryFast); |
---|
526 | } |
---|
527 | |
---|
528 | if(veryFast) |
---|
529 | Thorough = 1; |
---|
530 | else |
---|
531 | Thorough = 0; |
---|
532 | |
---|
533 | /* find the most promising direction */ |
---|
534 | |
---|
535 | |
---|
536 | if(!veryFast) |
---|
537 | { |
---|
538 | int |
---|
539 | j = 0, |
---|
540 | validEntries = 0; |
---|
541 | |
---|
542 | double |
---|
543 | lmax = unlikely, |
---|
544 | posterior = 0.0; |
---|
545 | |
---|
546 | for(k = 0; k < 4; k++) |
---|
547 | { |
---|
548 | fourLi[k].direction = k; |
---|
549 | fourLi[k].likelihood = fourScores[k]; |
---|
550 | } |
---|
551 | |
---|
552 | qsort(fourLi, 4, sizeof(fourLikelihoods), fourCompare); |
---|
553 | |
---|
554 | for(k = 0; k < 4; k++) |
---|
555 | if(fourLi[k].likelihood > unlikely) |
---|
556 | validEntries++; |
---|
557 | |
---|
558 | lmax = fourLi[0].likelihood; |
---|
559 | |
---|
560 | while(posterior <= POSTERIOR_THRESHOLD && j < validEntries) |
---|
561 | { |
---|
562 | double |
---|
563 | all = 0.0, |
---|
564 | prob = 0.0; |
---|
565 | |
---|
566 | for(k = 0; k < validEntries; k++) |
---|
567 | all += exp(fourLi[k].likelihood - lmax); |
---|
568 | |
---|
569 | posterior += (prob = (exp(fourLi[j].likelihood - lmax) / all)); |
---|
570 | |
---|
571 | switch(fourLi[j].direction) |
---|
572 | { |
---|
573 | case 0: |
---|
574 | insertBeyond(tr, p, p1->next->back, radius, ins, veryFast); |
---|
575 | break; |
---|
576 | case 1: |
---|
577 | insertBeyond(tr, p, p1->next->next->back, radius, ins, veryFast); |
---|
578 | break; |
---|
579 | case 2: |
---|
580 | insertBeyond(tr, p, p2->next->back, radius, ins, veryFast); |
---|
581 | break; |
---|
582 | case 3: |
---|
583 | insertBeyond(tr, p, p2->next->next->back, radius, ins, veryFast); |
---|
584 | break; |
---|
585 | default: |
---|
586 | assert(0); |
---|
587 | } |
---|
588 | |
---|
589 | j++; |
---|
590 | } |
---|
591 | |
---|
592 | qsort(ins->s, ins->count, sizeof(scores), scoreCompare); |
---|
593 | |
---|
594 | Thorough = 1; |
---|
595 | |
---|
596 | for(k = 0; k < MIN(ins->count, 20); k++) |
---|
597 | testInsertCandidates(tr, p, ins->s[k].p); |
---|
598 | } |
---|
599 | else |
---|
600 | { |
---|
601 | Thorough = 1; |
---|
602 | |
---|
603 | |
---|
604 | for(k = 0; k < 4; k++) |
---|
605 | { |
---|
606 | if(max < fourScores[k]) |
---|
607 | { |
---|
608 | max = fourScores[k]; |
---|
609 | maxInt = k; |
---|
610 | } |
---|
611 | } |
---|
612 | |
---|
613 | /* descend into this direction and re-insert subtree there */ |
---|
614 | |
---|
615 | if(maxInt >= 0) |
---|
616 | { |
---|
617 | switch(maxInt) |
---|
618 | { |
---|
619 | case 0: |
---|
620 | insertBeyond(tr, p, p1->next->back, radius, ins, veryFast); |
---|
621 | break; |
---|
622 | case 1: |
---|
623 | insertBeyond(tr, p, p1->next->next->back, radius, ins, veryFast); |
---|
624 | break; |
---|
625 | case 2: |
---|
626 | insertBeyond(tr, p, p2->next->back, radius, ins, veryFast); |
---|
627 | break; |
---|
628 | case 3: |
---|
629 | insertBeyond(tr, p, p2->next->next->back, radius, ins, veryFast); |
---|
630 | break; |
---|
631 | default: |
---|
632 | assert(0); |
---|
633 | } |
---|
634 | } |
---|
635 | } |
---|
636 | |
---|
637 | /* repair branch and reconnect subtree to its original position from which it was pruned */ |
---|
638 | |
---|
639 | hookup(p->next, p1, p1z, tr->numBranches); |
---|
640 | hookup(p->next->next, p2, p2z, tr->numBranches); |
---|
641 | |
---|
642 | /* repair likelihood vectors */ |
---|
643 | |
---|
644 | newviewGeneric(tr, p); |
---|
645 | |
---|
646 | /* if the rearrangement of subtree rooted at p yielded a better likelihood score |
---|
647 | restore the altered topology and use it from now on |
---|
648 | */ |
---|
649 | |
---|
650 | if(tr->endLH > tr->startLH) |
---|
651 | { |
---|
652 | restoreTreeFast(tr); |
---|
653 | tr->startLH = tr->endLH = tr->likelihood; |
---|
654 | } |
---|
655 | |
---|
656 | } |
---|
657 | } |
---|
658 | |
---|
659 | return tr->startLH; |
---|
660 | } |
---|
661 | |
---|
662 | |
---|
663 | |
---|
664 | |
---|
665 | static boolean allSmoothed(tree *tr) |
---|
666 | { |
---|
667 | int i; |
---|
668 | boolean result = TRUE; |
---|
669 | |
---|
670 | for(i = 0; i < tr->numBranches; i++) |
---|
671 | { |
---|
672 | if(tr->partitionSmoothed[i] == FALSE) |
---|
673 | result = FALSE; |
---|
674 | else |
---|
675 | tr->partitionConverged[i] = TRUE; |
---|
676 | } |
---|
677 | |
---|
678 | return result; |
---|
679 | } |
---|
680 | |
---|
681 | void nniSmooth(tree *tr, nodeptr p, int maxtimes) |
---|
682 | { |
---|
683 | int |
---|
684 | i; |
---|
685 | |
---|
686 | for(i = 0; i < tr->numBranches; i++) |
---|
687 | tr->partitionConverged[i] = FALSE; |
---|
688 | |
---|
689 | |
---|
690 | |
---|
691 | while (--maxtimes >= 0) |
---|
692 | { |
---|
693 | |
---|
694 | |
---|
695 | for(i = 0; i < tr->numBranches; i++) |
---|
696 | tr->partitionSmoothed[i] = TRUE; |
---|
697 | |
---|
698 | |
---|
699 | |
---|
700 | assert(!isTip(p->number, tr->mxtips)); |
---|
701 | |
---|
702 | |
---|
703 | |
---|
704 | assert(!isTip(p->back->number, tr->mxtips)); |
---|
705 | |
---|
706 | update(tr, p); |
---|
707 | |
---|
708 | update(tr, p->next); |
---|
709 | |
---|
710 | update(tr, p->next->next); |
---|
711 | |
---|
712 | update(tr, p->back->next); |
---|
713 | |
---|
714 | update(tr, p->back->next->next); |
---|
715 | |
---|
716 | if (allSmoothed(tr)) |
---|
717 | break; |
---|
718 | |
---|
719 | } |
---|
720 | |
---|
721 | |
---|
722 | |
---|
723 | for(i = 0; i < tr->numBranches; i++) |
---|
724 | { |
---|
725 | tr->partitionSmoothed[i] = FALSE; |
---|
726 | tr->partitionConverged[i] = FALSE; |
---|
727 | } |
---|
728 | |
---|
729 | |
---|
730 | } |
---|
731 | |
---|
732 | |
---|
733 | static void storeBranches(tree *tr, nodeptr p, double *pqz, double *pz1, double *pz2, double *qz1, double *qz2) |
---|
734 | { |
---|
735 | int |
---|
736 | i; |
---|
737 | |
---|
738 | nodeptr |
---|
739 | q = p->back; |
---|
740 | |
---|
741 | for(i = 0; i < tr->numBranches; i++) |
---|
742 | { |
---|
743 | pqz[i] = p->z[i]; |
---|
744 | pz1[i] = p->next->z[i]; |
---|
745 | pz2[i] = p->next->next->z[i]; |
---|
746 | qz1[i] = q->next->z[i]; |
---|
747 | qz2[i] = q->next->next->z[i]; |
---|
748 | } |
---|
749 | } |
---|
750 | |
---|
751 | |
---|
752 | static int SHSupport(int nPos, int nBootstrap, int *col, double loglk[3], double *siteloglk[3], int lower, int upper, boolean perPartition) |
---|
753 | { |
---|
754 | double |
---|
755 | resampleDelta, |
---|
756 | resample1, |
---|
757 | resample2, |
---|
758 | support = 0.0, |
---|
759 | delta1 = loglk[0] - loglk[1], |
---|
760 | delta2 = loglk[0] - loglk[2], |
---|
761 | delta = delta1 < delta2 ? delta1 : delta2, |
---|
762 | diff; |
---|
763 | |
---|
764 | int |
---|
765 | iBest, |
---|
766 | i, |
---|
767 | j, |
---|
768 | nSupport = 0, |
---|
769 | iBoot; |
---|
770 | |
---|
771 | boolean |
---|
772 | shittySplit = FALSE; |
---|
773 | |
---|
774 | |
---|
775 | if(loglk[1] >= loglk[0]) |
---|
776 | { |
---|
777 | diff = ABS(loglk[1] - loglk[0]); |
---|
778 | /*printf("%1.80f %1.80f\n", loglk[0], loglk[1]);*/ |
---|
779 | shittySplit = TRUE; |
---|
780 | if(!perPartition) |
---|
781 | assert(diff < 0.1); |
---|
782 | } |
---|
783 | |
---|
784 | if(loglk[2] >= loglk[0]) |
---|
785 | { |
---|
786 | diff = ABS(loglk[2] - loglk[0]); |
---|
787 | /*printf("%1.80f %1.80f\n", loglk[0], loglk[2]);*/ |
---|
788 | shittySplit = TRUE; |
---|
789 | if(!perPartition) |
---|
790 | assert(diff < 0.1); |
---|
791 | } |
---|
792 | |
---|
793 | if(loglk[0] > loglk[2] && loglk[0] > loglk[1]) |
---|
794 | { |
---|
795 | if(loglk[2] > loglk[1]) |
---|
796 | diff = ABS(loglk[2] - loglk[0]); |
---|
797 | else |
---|
798 | diff = ABS(loglk[1] - loglk[0]); |
---|
799 | if(diff < 0.1) |
---|
800 | shittySplit = TRUE; |
---|
801 | } |
---|
802 | |
---|
803 | if(shittySplit) |
---|
804 | return 0; |
---|
805 | |
---|
806 | |
---|
807 | /* |
---|
808 | printf("%f %f %f\n", loglk[0], loglk[1], loglk[2]); |
---|
809 | assert(loglk[0] >= loglk[1] && loglk[0] >= loglk[2]); |
---|
810 | */ |
---|
811 | |
---|
812 | for(iBoot = 0; iBoot < nBootstrap; iBoot++) |
---|
813 | { |
---|
814 | double resampled[3]; |
---|
815 | |
---|
816 | for (i = 0; i < 3; i++) |
---|
817 | resampled[i] = -loglk[i]; |
---|
818 | |
---|
819 | for (j = lower; j < upper; j++) |
---|
820 | { |
---|
821 | int |
---|
822 | pos = col[iBoot * nPos + j]; |
---|
823 | |
---|
824 | for (i = 0; i < 3; i++) |
---|
825 | resampled[i] += pos * siteloglk[i][j]; |
---|
826 | } |
---|
827 | |
---|
828 | iBest = 0; |
---|
829 | |
---|
830 | /*printf("%d %f %f %f\n", iBoot, resampled[0], resampled[1], resampled[2]);*/ |
---|
831 | |
---|
832 | for (i = 1; i < 3; i++) |
---|
833 | if (resampled[i] > resampled[iBest]) |
---|
834 | iBest = i; |
---|
835 | |
---|
836 | resample1 = resampled[iBest] - resampled[(iBest+1)%3]; |
---|
837 | resample2 = resampled[iBest] - resampled[(iBest+2)%3]; |
---|
838 | resampleDelta = resample1 < resample2 ? resample1 : resample2; |
---|
839 | |
---|
840 | if(resampleDelta < delta) |
---|
841 | nSupport++; |
---|
842 | } |
---|
843 | |
---|
844 | support = (nSupport/(double)nBootstrap); |
---|
845 | |
---|
846 | return ((int)((support * 100.0) + 0.5)); |
---|
847 | } |
---|
848 | |
---|
849 | static void setupBranchInfo(nodeptr p, tree *tr, int *counter) |
---|
850 | { |
---|
851 | if(!isTip(p->number, tr->mxtips)) |
---|
852 | { |
---|
853 | nodeptr q; |
---|
854 | |
---|
855 | if(!(isTip(p->back->number, tr->mxtips))) |
---|
856 | { |
---|
857 | p->bInf = p->back->bInf = &(tr->bInf[*counter]); |
---|
858 | |
---|
859 | p->bInf->oP = p; |
---|
860 | p->bInf->oQ = p->back; |
---|
861 | |
---|
862 | *counter = *counter + 1; |
---|
863 | } |
---|
864 | |
---|
865 | q = p->next; |
---|
866 | |
---|
867 | while(q != p) |
---|
868 | { |
---|
869 | setupBranchInfo(q->back, tr, counter); |
---|
870 | q = q->next; |
---|
871 | } |
---|
872 | |
---|
873 | return; |
---|
874 | } |
---|
875 | } |
---|
876 | |
---|
877 | |
---|
878 | |
---|
879 | |
---|
880 | static void doNNIs(tree *tr, nodeptr p, double *lhVectors[3], boolean shSupport, int *interchanges, int *innerBranches, |
---|
881 | double *pqz_0, double *pz1_0, double *pz2_0, double *qz1_0, double *qz2_0, double *pqz_1, double *pz1_1, double *pz2_1, |
---|
882 | double *qz1_1, double *qz2_1, double *pqz_2, double *pz1_2, double *pz2_2, double *qz1_2, double *qz2_2) |
---|
883 | { |
---|
884 | nodeptr |
---|
885 | q = p->back, |
---|
886 | pb1 = p->next->back, |
---|
887 | pb2 = p->next->next->back; |
---|
888 | |
---|
889 | assert(!isTip(p->number, tr->mxtips)); |
---|
890 | |
---|
891 | if(!isTip(q->number, tr->mxtips)) |
---|
892 | { |
---|
893 | int |
---|
894 | model, |
---|
895 | whichNNI = 0; |
---|
896 | |
---|
897 | nodeptr |
---|
898 | qb1 = q->next->back, |
---|
899 | qb2 = q->next->next->back; |
---|
900 | |
---|
901 | double |
---|
902 | lh[3], |
---|
903 | *lhv[3]; |
---|
904 | |
---|
905 | lhv[0] = (double*)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
906 | lhv[1] = (double*)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
907 | lhv[2] = (double*)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
908 | |
---|
909 | *innerBranches = *innerBranches + 1; |
---|
910 | |
---|
911 | nniSmooth(tr, p, 16); |
---|
912 | |
---|
913 | if(shSupport) |
---|
914 | { |
---|
915 | evaluateGenericVector(tr, p); |
---|
916 | memcpy(lhVectors[0], tr->perSiteLL, sizeof(double) * tr->cdta->endsite); |
---|
917 | } |
---|
918 | else |
---|
919 | evaluateGeneric(tr, p); |
---|
920 | |
---|
921 | lh[0] = tr->likelihood; |
---|
922 | |
---|
923 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
924 | lhv[0][model] = tr->perPartitionLH[model]; |
---|
925 | |
---|
926 | storeBranches(tr, p, pqz_0, pz1_0, pz2_0, qz1_0, qz2_0); |
---|
927 | |
---|
928 | /*******************************************/ |
---|
929 | |
---|
930 | hookup(p, q, pqz_0, tr->numBranches); |
---|
931 | |
---|
932 | hookup(p->next, qb1, qz1_0, tr->numBranches); |
---|
933 | hookup(p->next->next, pb2, pz2_0, tr->numBranches); |
---|
934 | |
---|
935 | hookup(q->next, pb1, pz1_0, tr->numBranches); |
---|
936 | hookup(q->next->next, qb2, qz2_0, tr->numBranches); |
---|
937 | |
---|
938 | newviewGeneric(tr, p); |
---|
939 | newviewGeneric(tr, p->back); |
---|
940 | |
---|
941 | nniSmooth(tr, p, 16); |
---|
942 | |
---|
943 | if(shSupport) |
---|
944 | { |
---|
945 | evaluateGenericVector(tr, p); |
---|
946 | memcpy(lhVectors[1], tr->perSiteLL, sizeof(double) * tr->cdta->endsite); |
---|
947 | } |
---|
948 | else |
---|
949 | evaluateGeneric(tr, p); |
---|
950 | |
---|
951 | lh[1] = tr->likelihood; |
---|
952 | |
---|
953 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
954 | lhv[1][model] = tr->perPartitionLH[model]; |
---|
955 | |
---|
956 | storeBranches(tr, p, pqz_1, pz1_1, pz2_1, qz1_1, qz2_1); |
---|
957 | |
---|
958 | if(lh[1] > lh[0]) |
---|
959 | whichNNI = 1; |
---|
960 | |
---|
961 | /*******************************************/ |
---|
962 | |
---|
963 | hookup(p, q, pqz_0, tr->numBranches); |
---|
964 | |
---|
965 | hookup(p->next, qb1, qz1_0, tr->numBranches); |
---|
966 | hookup(p->next->next, pb1, pz1_0, tr->numBranches); |
---|
967 | |
---|
968 | hookup(q->next, pb2, pz2_0, tr->numBranches); |
---|
969 | hookup(q->next->next, qb2, qz2_0, tr->numBranches); |
---|
970 | |
---|
971 | newviewGeneric(tr, p); |
---|
972 | newviewGeneric(tr, p->back); |
---|
973 | |
---|
974 | nniSmooth(tr, p, 16); |
---|
975 | |
---|
976 | if(shSupport) |
---|
977 | { |
---|
978 | evaluateGenericVector(tr, p); |
---|
979 | memcpy(lhVectors[2], tr->perSiteLL, sizeof(double) * tr->cdta->endsite); |
---|
980 | } |
---|
981 | else |
---|
982 | evaluateGeneric(tr, p); |
---|
983 | |
---|
984 | lh[2] = tr->likelihood; |
---|
985 | |
---|
986 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
987 | lhv[2][model] = tr->perPartitionLH[model]; |
---|
988 | |
---|
989 | storeBranches(tr, p, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); |
---|
990 | |
---|
991 | if(lh[2] > lh[0] && lh[2] > lh[1]) |
---|
992 | whichNNI = 2; |
---|
993 | |
---|
994 | /*******************************************/ |
---|
995 | |
---|
996 | if(shSupport) |
---|
997 | whichNNI = 0; |
---|
998 | |
---|
999 | switch(whichNNI) |
---|
1000 | { |
---|
1001 | case 0: |
---|
1002 | hookup(p, q, pqz_0, tr->numBranches); |
---|
1003 | |
---|
1004 | hookup(p->next, pb1, pz1_0, tr->numBranches); |
---|
1005 | hookup(p->next->next, pb2, pz2_0, tr->numBranches); |
---|
1006 | |
---|
1007 | hookup(q->next, qb1, qz1_0, tr->numBranches); |
---|
1008 | hookup(q->next->next, qb2, qz2_0, tr->numBranches); |
---|
1009 | break; |
---|
1010 | case 1: |
---|
1011 | hookup(p, q, pqz_1, tr->numBranches); |
---|
1012 | |
---|
1013 | hookup(p->next, qb1, pz1_1, tr->numBranches); |
---|
1014 | hookup(p->next->next, pb2, pz2_1, tr->numBranches); |
---|
1015 | |
---|
1016 | hookup(q->next, pb1, qz1_1, tr->numBranches); |
---|
1017 | hookup(q->next->next, qb2, qz2_1, tr->numBranches); |
---|
1018 | break; |
---|
1019 | case 2: |
---|
1020 | hookup(p, q, pqz_2, tr->numBranches); |
---|
1021 | |
---|
1022 | hookup(p->next, qb1, pz1_2, tr->numBranches); |
---|
1023 | hookup(p->next->next, pb1, pz2_2, tr->numBranches); |
---|
1024 | |
---|
1025 | hookup(q->next, pb2, qz1_2, tr->numBranches); |
---|
1026 | hookup(q->next->next, qb2, qz2_2, tr->numBranches); |
---|
1027 | break; |
---|
1028 | default: |
---|
1029 | assert(0); |
---|
1030 | } |
---|
1031 | |
---|
1032 | newviewGeneric(tr, p); |
---|
1033 | newviewGeneric(tr, q); |
---|
1034 | |
---|
1035 | if(whichNNI > 0) |
---|
1036 | *interchanges = *interchanges + 1; |
---|
1037 | |
---|
1038 | if(shSupport) |
---|
1039 | { |
---|
1040 | p->bInf->support = SHSupport(tr->cdta->endsite, 1000, tr->resample, lh, lhVectors, 0, tr->cdta->endsite, FALSE); |
---|
1041 | |
---|
1042 | //printf("ALL: %f %f %f ", lh[0], lh[1], lh[2]); |
---|
1043 | |
---|
1044 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
1045 | { |
---|
1046 | double |
---|
1047 | perPartitionLH[3]; |
---|
1048 | |
---|
1049 | perPartitionLH[0] = lhv[0][model]; |
---|
1050 | perPartitionLH[1] = lhv[1][model]; |
---|
1051 | perPartitionLH[2] = lhv[2][model]; |
---|
1052 | |
---|
1053 | p->bInf->supports[model] = SHSupport(tr->cdta->endsite, 1000, tr->resample, perPartitionLH, lhVectors, tr->partitionData[model].lower, tr->partitionData[model].upper, TRUE); |
---|
1054 | |
---|
1055 | //printf(" p%d %f %f %f ", model, perPartitionLH[0], perPartitionLH[1], perPartitionLH[2]); |
---|
1056 | } |
---|
1057 | |
---|
1058 | //printf("\n"); |
---|
1059 | } |
---|
1060 | |
---|
1061 | rax_free(lhv[0]); |
---|
1062 | rax_free(lhv[1]); |
---|
1063 | rax_free(lhv[2]); |
---|
1064 | } |
---|
1065 | |
---|
1066 | |
---|
1067 | if(!isTip(pb1->number, tr->mxtips)) |
---|
1068 | doNNIs(tr, pb1, lhVectors, shSupport, interchanges, innerBranches, |
---|
1069 | pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1, |
---|
1070 | qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); |
---|
1071 | |
---|
1072 | if(!isTip(pb2->number, tr->mxtips)) |
---|
1073 | doNNIs(tr, pb2, lhVectors, shSupport, interchanges, innerBranches, |
---|
1074 | pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1, |
---|
1075 | qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); |
---|
1076 | |
---|
1077 | |
---|
1078 | return; |
---|
1079 | } |
---|
1080 | |
---|
1081 | |
---|
1082 | static int encapsulateNNIs(tree *tr, double *lhVectors[3], boolean shSupport) |
---|
1083 | { |
---|
1084 | int |
---|
1085 | innerBranches = 0, |
---|
1086 | interchanges = 0; |
---|
1087 | |
---|
1088 | double |
---|
1089 | pqz_0[NUM_BRANCHES], |
---|
1090 | pz1_0[NUM_BRANCHES], |
---|
1091 | pz2_0[NUM_BRANCHES], |
---|
1092 | qz1_0[NUM_BRANCHES], |
---|
1093 | qz2_0[NUM_BRANCHES], |
---|
1094 | pqz_1[NUM_BRANCHES], |
---|
1095 | pz1_1[NUM_BRANCHES], |
---|
1096 | pz2_1[NUM_BRANCHES], |
---|
1097 | qz1_1[NUM_BRANCHES], |
---|
1098 | qz2_1[NUM_BRANCHES], |
---|
1099 | pqz_2[NUM_BRANCHES], |
---|
1100 | pz1_2[NUM_BRANCHES], |
---|
1101 | pz2_2[NUM_BRANCHES], |
---|
1102 | qz1_2[NUM_BRANCHES], |
---|
1103 | qz2_2[NUM_BRANCHES]; |
---|
1104 | |
---|
1105 | |
---|
1106 | doNNIs(tr, tr->start->back, lhVectors, shSupport, &interchanges, &innerBranches, |
---|
1107 | pqz_0, pz1_0, pz2_0, qz1_0, qz2_0, pqz_1, pz1_1, pz2_1, |
---|
1108 | qz1_1, qz2_1, pqz_2, pz1_2, pz2_2, qz1_2, qz2_2); |
---|
1109 | |
---|
1110 | assert(innerBranches == (tr->mxtips - 3)); |
---|
1111 | |
---|
1112 | return interchanges; |
---|
1113 | } |
---|
1114 | |
---|
1115 | int *permutationSH(tree *tr, int nBootstrap, long _randomSeed) |
---|
1116 | { |
---|
1117 | int |
---|
1118 | replicate, |
---|
1119 | model, |
---|
1120 | maxNonZero = 0, |
---|
1121 | *weightBuffer, |
---|
1122 | *col = (int*)rax_calloc(((size_t)tr->cdta->endsite) * ((size_t)nBootstrap), sizeof(int)), |
---|
1123 | *nonzero = (int*)rax_calloc(tr->NumberOfModels, sizeof(int)); |
---|
1124 | |
---|
1125 | long |
---|
1126 | randomSeed = _randomSeed; |
---|
1127 | |
---|
1128 | size_t |
---|
1129 | bufferSize; |
---|
1130 | |
---|
1131 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
1132 | { |
---|
1133 | int |
---|
1134 | j; |
---|
1135 | |
---|
1136 | for (j = 0; j < tr->cdta->endsite; j++) |
---|
1137 | { |
---|
1138 | if(tr->originalModel[j] == model) |
---|
1139 | nonzero[model] += tr->originalWeights[j]; |
---|
1140 | } |
---|
1141 | |
---|
1142 | if(nonzero[model] > maxNonZero) |
---|
1143 | maxNonZero = nonzero[model]; |
---|
1144 | } |
---|
1145 | |
---|
1146 | bufferSize = ((size_t)maxNonZero) * sizeof(int); |
---|
1147 | weightBuffer = (int*)rax_malloc(bufferSize); |
---|
1148 | |
---|
1149 | for(replicate = 0; replicate < nBootstrap; replicate++) |
---|
1150 | { |
---|
1151 | int |
---|
1152 | j, |
---|
1153 | *wgt = &col[((size_t)tr->cdta->endsite) * ((size_t)replicate)]; |
---|
1154 | |
---|
1155 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
1156 | { |
---|
1157 | int |
---|
1158 | pos, |
---|
1159 | nonz = nonzero[model]; |
---|
1160 | |
---|
1161 | memset(weightBuffer, 0, bufferSize); |
---|
1162 | |
---|
1163 | for(j = 0; j < nonz; j++) |
---|
1164 | weightBuffer[(int) (nonz * randum(&randomSeed))]++; |
---|
1165 | |
---|
1166 | for(j = 0, pos = 0; j < tr->cdta->endsite; j++) |
---|
1167 | { |
---|
1168 | if(model == tr->originalModel[j]) |
---|
1169 | { |
---|
1170 | int |
---|
1171 | w; |
---|
1172 | |
---|
1173 | for(w = 0; w < tr->originalWeights[j]; w++) |
---|
1174 | { |
---|
1175 | wgt[j] += weightBuffer[pos]; |
---|
1176 | pos++; |
---|
1177 | } |
---|
1178 | } |
---|
1179 | } |
---|
1180 | } |
---|
1181 | } |
---|
1182 | |
---|
1183 | |
---|
1184 | rax_free(weightBuffer); |
---|
1185 | rax_free(nonzero); |
---|
1186 | |
---|
1187 | return col; |
---|
1188 | } |
---|
1189 | |
---|
1190 | |
---|
1191 | |
---|
1192 | |
---|
1193 | |
---|
1194 | |
---|
1195 | |
---|
1196 | void fastSearch(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
1197 | { |
---|
1198 | double |
---|
1199 | likelihood, |
---|
1200 | startLikelihood, |
---|
1201 | *lhVectors[3]; |
---|
1202 | |
---|
1203 | char |
---|
1204 | bestTreeFileName[1024]; |
---|
1205 | |
---|
1206 | FILE |
---|
1207 | *f; |
---|
1208 | |
---|
1209 | int |
---|
1210 | model; |
---|
1211 | |
---|
1212 | |
---|
1213 | |
---|
1214 | lhVectors[0] = (double *)NULL; |
---|
1215 | lhVectors[1] = (double *)NULL; |
---|
1216 | lhVectors[2] = (double *)NULL; |
---|
1217 | |
---|
1218 | /* initialize model parameters with standard starting values */ |
---|
1219 | |
---|
1220 | initModel(tr, rdta, cdta, adef); |
---|
1221 | |
---|
1222 | printBothOpen("Time after init : %f\n", gettime() - masterTime); |
---|
1223 | |
---|
1224 | /* |
---|
1225 | compute starting tree, either by reading in a tree specified via -t |
---|
1226 | or by building one |
---|
1227 | */ |
---|
1228 | |
---|
1229 | getStartingTree(tr, adef); |
---|
1230 | |
---|
1231 | printBothOpen("Time after init and starting tree: %f\n", gettime() - masterTime); |
---|
1232 | |
---|
1233 | /* |
---|
1234 | rough model parameter optimization, the log likelihood epsilon should |
---|
1235 | actually be determined based on the initial tree score and not be hard-coded |
---|
1236 | */ |
---|
1237 | |
---|
1238 | if(adef->useBinaryModelFile) |
---|
1239 | { |
---|
1240 | readBinaryModel(tr); |
---|
1241 | evaluateGenericInitrav(tr, tr->start); |
---|
1242 | treeEvaluate(tr, 2); |
---|
1243 | } |
---|
1244 | else |
---|
1245 | modOpt(tr, adef, FALSE, 10.0); |
---|
1246 | |
---|
1247 | printBothOpen("Time after init, starting tree, mod opt: %f\n", gettime() - masterTime); |
---|
1248 | |
---|
1249 | /* print out the number of rate categories used for the CAT model, one should |
---|
1250 | use less then the default, e.g., -c 16 works quite well */ |
---|
1251 | |
---|
1252 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
1253 | printBothOpen("Partion %d number of Cats: %d\n", model, tr->partitionData[model].numberOfCategories); |
---|
1254 | |
---|
1255 | /* |
---|
1256 | means that we are going to do thorough insertions |
---|
1257 | with real newton-raphson based br-len opt at the three branches |
---|
1258 | adjactent to every insertion point |
---|
1259 | */ |
---|
1260 | |
---|
1261 | Thorough = 1; |
---|
1262 | |
---|
1263 | |
---|
1264 | /* |
---|
1265 | loop over SPR cycles until the likelihood difference |
---|
1266 | before and after the SPR cycle is <= 0.5 log likelihood units. |
---|
1267 | Rather than being hard-coded this should also be determined based on the |
---|
1268 | actual likelihood of the tree |
---|
1269 | */ |
---|
1270 | |
---|
1271 | do |
---|
1272 | { |
---|
1273 | startLikelihood = tr->likelihood; |
---|
1274 | |
---|
1275 | /* conduct a cycle of linear SPRs */ |
---|
1276 | |
---|
1277 | |
---|
1278 | |
---|
1279 | likelihood = linearSPRs(tr, 20, adef->veryFast); |
---|
1280 | |
---|
1281 | evaluateGeneric(tr, tr->start); |
---|
1282 | |
---|
1283 | /* the NNIs also optimize br-lens of resulting topology a bit */ |
---|
1284 | encapsulateNNIs(tr, lhVectors, FALSE); |
---|
1285 | |
---|
1286 | printBothOpen("LH after SPRs %f, after NNI %f\n", likelihood, tr->likelihood); |
---|
1287 | } |
---|
1288 | while(ABS(tr->likelihood - startLikelihood) > 0.5); |
---|
1289 | |
---|
1290 | |
---|
1291 | |
---|
1292 | |
---|
1293 | |
---|
1294 | |
---|
1295 | /* print out the resulting tree to the RAxML_bestTree. file. |
---|
1296 | note that boosttrapping or doing multiple inferences won't work. |
---|
1297 | This thing computes a single tree and that's it */ |
---|
1298 | |
---|
1299 | strcpy(bestTreeFileName, workdir); |
---|
1300 | strcat(bestTreeFileName, "RAxML_fastTree."); |
---|
1301 | strcat(bestTreeFileName, run_id); |
---|
1302 | |
---|
1303 | |
---|
1304 | |
---|
1305 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
1306 | |
---|
1307 | f = myfopen(bestTreeFileName, "wb"); |
---|
1308 | fprintf(f, "%s", tr->tree_string); |
---|
1309 | fclose(f); |
---|
1310 | |
---|
1311 | |
---|
1312 | |
---|
1313 | printBothOpen("RAxML fast tree written to file: %s\n", bestTreeFileName); |
---|
1314 | |
---|
1315 | writeBinaryModel(tr); |
---|
1316 | |
---|
1317 | printBothOpen("Total execution time: %f\n", gettime() - masterTime); |
---|
1318 | |
---|
1319 | printBothOpen("Good bye ... \n"); |
---|
1320 | } |
---|
1321 | |
---|
1322 | |
---|
1323 | void shSupports(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
1324 | { |
---|
1325 | double |
---|
1326 | diff, |
---|
1327 | *lhVectors[3]; |
---|
1328 | |
---|
1329 | char |
---|
1330 | bestTreeFileName[1024], |
---|
1331 | shSupportFileName[1024]; |
---|
1332 | |
---|
1333 | FILE |
---|
1334 | *f; |
---|
1335 | |
---|
1336 | int |
---|
1337 | i, |
---|
1338 | interchanges = 0, |
---|
1339 | counter = 0; |
---|
1340 | |
---|
1341 | assert(adef->restart); |
---|
1342 | |
---|
1343 | tr->resample = permutationSH(tr, 1000, 12345); |
---|
1344 | |
---|
1345 | //a tiny bit dirty here, all allocs should be cleaned up! |
---|
1346 | |
---|
1347 | lhVectors[0] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
1348 | lhVectors[1] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
1349 | lhVectors[2] = (double *)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
1350 | tr->bInf = (branchInfo*)rax_malloc(sizeof(branchInfo) * (tr->mxtips - 3)); |
---|
1351 | |
---|
1352 | for(i = 0; i < tr->mxtips - 3; i++) |
---|
1353 | tr->bInf[i].supports = (int*)rax_malloc(sizeof(int) * tr->NumberOfModels); |
---|
1354 | |
---|
1355 | initModel(tr, rdta, cdta, adef); |
---|
1356 | |
---|
1357 | |
---|
1358 | getStartingTree(tr, adef); |
---|
1359 | |
---|
1360 | if(adef->useBinaryModelFile) |
---|
1361 | { |
---|
1362 | readBinaryModel(tr); |
---|
1363 | evaluateGenericInitrav(tr, tr->start); |
---|
1364 | treeEvaluate(tr, 2); |
---|
1365 | } |
---|
1366 | else |
---|
1367 | { |
---|
1368 | evaluateGenericInitrav(tr, tr->start); |
---|
1369 | modOpt(tr, adef, FALSE, 1.0); |
---|
1370 | } |
---|
1371 | |
---|
1372 | printBothOpen("Time after model optimization: %f\n", gettime() - masterTime); |
---|
1373 | |
---|
1374 | printBothOpen("Initial Likelihood %f\n\n", tr->likelihood); |
---|
1375 | |
---|
1376 | do |
---|
1377 | { |
---|
1378 | double |
---|
1379 | lh1, |
---|
1380 | lh2; |
---|
1381 | |
---|
1382 | lh1 = tr->likelihood; |
---|
1383 | |
---|
1384 | interchanges = encapsulateNNIs(tr, lhVectors, FALSE); |
---|
1385 | |
---|
1386 | evaluateGeneric(tr, tr->start); |
---|
1387 | |
---|
1388 | lh2 = tr->likelihood; |
---|
1389 | |
---|
1390 | diff = ABS(lh1 - lh2); |
---|
1391 | |
---|
1392 | printBothOpen("NNI interchanges %d Likelihood %f\n", interchanges, tr->likelihood); |
---|
1393 | } |
---|
1394 | while(diff > 0.01); |
---|
1395 | |
---|
1396 | printBothOpen("\nFinal Likelihood of NNI-optimized tree: %f\n\n", tr->likelihood); |
---|
1397 | |
---|
1398 | setupBranchInfo(tr->start->back, tr, &counter); |
---|
1399 | assert(counter == tr->mxtips - 3); |
---|
1400 | |
---|
1401 | interchanges = encapsulateNNIs(tr, lhVectors, TRUE); |
---|
1402 | |
---|
1403 | strcpy(bestTreeFileName, workdir); |
---|
1404 | strcat(bestTreeFileName, "RAxML_fastTree."); |
---|
1405 | strcat(bestTreeFileName, run_id); |
---|
1406 | |
---|
1407 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
1408 | |
---|
1409 | f = myfopen(bestTreeFileName, "wb"); |
---|
1410 | fprintf(f, "%s", tr->tree_string); |
---|
1411 | fclose(f); |
---|
1412 | |
---|
1413 | |
---|
1414 | strcpy(shSupportFileName, workdir); |
---|
1415 | strcat(shSupportFileName, "RAxML_fastTreeSH_Support."); |
---|
1416 | strcat(shSupportFileName, run_id); |
---|
1417 | |
---|
1418 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, TRUE, FALSE, FALSE); |
---|
1419 | |
---|
1420 | f = myfopen(shSupportFileName, "wb"); |
---|
1421 | fprintf(f, "%s", tr->tree_string); |
---|
1422 | fclose(f); |
---|
1423 | |
---|
1424 | printBothOpen("RAxML NNI-optimized tree written to file: %s\n", bestTreeFileName); |
---|
1425 | |
---|
1426 | printBothOpen("\nSame tree with SH-like supports written to file: %s\n", shSupportFileName); |
---|
1427 | |
---|
1428 | if(tr->NumberOfModels > 1) |
---|
1429 | { |
---|
1430 | char |
---|
1431 | shSupportPerPartitionFileName[1024]; |
---|
1432 | |
---|
1433 | strcpy(shSupportPerPartitionFileName, workdir); |
---|
1434 | strcat(shSupportPerPartitionFileName, "RAxML_fastTree_perPartition_SH_Support."); |
---|
1435 | strcat(shSupportPerPartitionFileName, run_id); |
---|
1436 | |
---|
1437 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, FALSE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, TRUE); |
---|
1438 | |
---|
1439 | f = myfopen(shSupportPerPartitionFileName, "wb"); |
---|
1440 | fprintf(f, "%s", tr->tree_string); |
---|
1441 | fclose(f); |
---|
1442 | |
---|
1443 | printBothOpen("\nSame tree with SH-like support for each partition written to file: %s\n", shSupportPerPartitionFileName); |
---|
1444 | } |
---|
1445 | |
---|
1446 | printBothOpen("\nTotal execution time: %f\n", gettime() - masterTime); |
---|
1447 | |
---|
1448 | exit(0); |
---|
1449 | } |
---|