1 | /* |
---|
2 | * Best 2.2 |
---|
3 | * |
---|
4 | * This file contains the functions |
---|
5 | * for calculating the probability of |
---|
6 | * gene trees given the species tree |
---|
7 | * and the prior probability of the |
---|
8 | * species tree |
---|
9 | * |
---|
10 | * Liang Liu |
---|
11 | * Department of Statistics |
---|
12 | * The Ohio State University |
---|
13 | * Columbus, Ohio |
---|
14 | * |
---|
15 | * liuliang@stat.ohio-state.edu |
---|
16 | */ |
---|
17 | |
---|
18 | #include <assert.h> |
---|
19 | |
---|
20 | #include "best.h" |
---|
21 | #include "command.h" |
---|
22 | #include "globals.h" |
---|
23 | #include "mb.h" |
---|
24 | #include "mbmath.h" |
---|
25 | #include "mcmc.h" |
---|
26 | #include "model.h" |
---|
27 | #include "sumt.h" |
---|
28 | #include "tree.h" |
---|
29 | #include "utils.h" |
---|
30 | |
---|
31 | const char* const svnRevisionBestC="$Rev: 420 $"; /* Revision keyword which is expended/updated by svn on each commit/update*/ |
---|
32 | |
---|
33 | /****************************** Local functions converted by Fredrik from BEST code *****************************/ |
---|
34 | int CompareDepths (const void *x, const void *y); |
---|
35 | int CompareDoubles (const void *x, const void *y); |
---|
36 | int CompareNodes (const void *x, const void *y); |
---|
37 | int CompareNodesByX (const void *x, const void *y); |
---|
38 | int GetSpeciesTreeFromMinDepths (Tree* speciesTree, double *depthMatrix); |
---|
39 | int GetDepthMatrix(Tree * speciesTree, double *depthMatrix); |
---|
40 | int GetMeanDist (Tree *speciesTree, double *depthMatrix, double *mean); |
---|
41 | int GetMinDepthMatrix (Tree **geneTrees, int numGeneTrees, double *depthMatrix); |
---|
42 | void LineagesIn (TreeNode* geneTreeNode, TreeNode* speciesTreeNode); |
---|
43 | double LnPriorProbGeneTree (Tree *geneTree, double mu, Tree *speciesTree, double *popSizePtr); |
---|
44 | double LnProposalProbSpeciesTree (Tree *speciesTree, double *depthMatrix, double expRate); |
---|
45 | void MapGeneTreeToSpeciesTree (Tree *geneTree, Tree *speciesTree); |
---|
46 | int ModifyDepthMatrix (double expRate, double *depthMatrix, SafeLong *seed); |
---|
47 | |
---|
48 | |
---|
49 | /* Global BEST variables */ |
---|
50 | SafeLong **speciesPairSets; |
---|
51 | double *depthMatrix; |
---|
52 | |
---|
53 | |
---|
54 | /* Allocate variables used by best code during mcmc */ |
---|
55 | void AllocateBestChainVariables (void) |
---|
56 | { |
---|
57 | int i, j, index, numUpperTriang, nLongsNeeded; |
---|
58 | |
---|
59 | // Free if by mistake variables are already allocated |
---|
60 | if (memAllocs[ALLOC_BEST] == YES) |
---|
61 | FreeBestChainVariables (); |
---|
62 | |
---|
63 | // Allocate space for upper triangular pair sets |
---|
64 | numUpperTriang = (numSpecies * (numSpecies-1)) / 2; |
---|
65 | nLongsNeeded = ((numSpecies - 1) / nBitsInALong) + 1; |
---|
66 | speciesPairSets = (SafeLong **) SafeCalloc (numUpperTriang, sizeof(SafeLong *)); |
---|
67 | speciesPairSets[0] = (SafeLong *) SafeCalloc (numUpperTriang*nLongsNeeded, sizeof(SafeLong)); |
---|
68 | for (i=1; i<numUpperTriang; i++) |
---|
69 | speciesPairSets[i] = speciesPairSets[0] + i*nLongsNeeded; |
---|
70 | |
---|
71 | // Set upper triangular pair partitions once and for all |
---|
72 | index = 0; |
---|
73 | for (i=0; i<numSpecies; i++) { |
---|
74 | for (j=i+1; j<numSpecies; j++) { |
---|
75 | SetBit(i, speciesPairSets[index]); |
---|
76 | SetBit(j, speciesPairSets[index]); |
---|
77 | index++; |
---|
78 | } |
---|
79 | } |
---|
80 | |
---|
81 | /* allocate species for depthMatrix */ |
---|
82 | depthMatrix = SafeCalloc (numUpperTriang, sizeof(double)); |
---|
83 | |
---|
84 | memAllocs[ALLOC_BEST] = YES; |
---|
85 | } |
---|
86 | |
---|
87 | |
---|
88 | |
---|
89 | |
---|
90 | |
---|
91 | /** Compare function (Depth struct) for qsort */ |
---|
92 | int CompareDepths (const void *x, const void *y) { |
---|
93 | |
---|
94 | if ((*((Depth *)(x))).depth < (*((Depth *)(y))).depth) |
---|
95 | return -1; |
---|
96 | else if ((*((Depth *)(x))).depth > (*((Depth *)(y))).depth) |
---|
97 | return 1; |
---|
98 | else |
---|
99 | return 0; |
---|
100 | } |
---|
101 | |
---|
102 | |
---|
103 | |
---|
104 | |
---|
105 | |
---|
106 | /** Compare function (doubles) for qsort */ |
---|
107 | int CompareDoubles (const void *x, const void *y) { |
---|
108 | |
---|
109 | if (*((double *)(x)) < *((double *)(y))) |
---|
110 | return -1; |
---|
111 | else if (*((double *)(x)) > *((double *)(y))) |
---|
112 | return 1; |
---|
113 | else |
---|
114 | return 0; |
---|
115 | } |
---|
116 | |
---|
117 | |
---|
118 | /** Compare function (TreeNode struct) for qsort */ |
---|
119 | int CompareNodes (const void *x, const void *y) { |
---|
120 | |
---|
121 | if ((*((TreeNode **)(x)))->nodeDepth < (*((TreeNode**)(y)))->nodeDepth) |
---|
122 | return -1; |
---|
123 | else if ((*((TreeNode **)(x)))->nodeDepth > (*((TreeNode**)(y)))->nodeDepth) |
---|
124 | return 1; |
---|
125 | else |
---|
126 | return 0; |
---|
127 | } |
---|
128 | |
---|
129 | |
---|
130 | /** Compare function (TreeNode struct; sort by x, then by nodeDepth) for qsort */ |
---|
131 | int CompareNodesByX (const void *x, const void *y) { |
---|
132 | |
---|
133 | if ((*((TreeNode **)(x)))->x < (*((TreeNode**)(y)))->x) |
---|
134 | return -1; |
---|
135 | else if ((*((TreeNode **)(x)))->x > (*((TreeNode**)(y)))->x) |
---|
136 | return 1; |
---|
137 | else { |
---|
138 | if ((*((TreeNode **)(x)))->nodeDepth < (*((TreeNode**)(y)))->nodeDepth) |
---|
139 | return -1; |
---|
140 | else if ((*((TreeNode **)(x)))->nodeDepth > (*((TreeNode**)(y)))->nodeDepth) |
---|
141 | return 1; |
---|
142 | else |
---|
143 | return 0; |
---|
144 | } |
---|
145 | } |
---|
146 | |
---|
147 | |
---|
148 | |
---|
149 | |
---|
150 | |
---|
151 | /**----------------------------------------------------------------- |
---|
152 | | |
---|
153 | | FillSpeciesTreeParams: Fill in species trees (start value) |
---|
154 | | |
---|
155 | ------------------------------------------------------------------*/ |
---|
156 | int FillSpeciesTreeParams (SafeLong *seed, int fromChain, int toChain) |
---|
157 | |
---|
158 | { |
---|
159 | int i, k, chn, numGeneTrees, freeBestChainVars; |
---|
160 | Param *p; |
---|
161 | Tree *speciesTree, **geneTrees; |
---|
162 | |
---|
163 | // Allocate space for global best model variables used in this function, in case they are not allocated |
---|
164 | if (memAllocs[ALLOC_BEST] == NO) |
---|
165 | { |
---|
166 | freeBestChainVars = YES; |
---|
167 | AllocateBestChainVariables(); |
---|
168 | } |
---|
169 | else |
---|
170 | freeBestChainVars = NO; |
---|
171 | |
---|
172 | // Use global variable numTopologies to calculate number of gene trees |
---|
173 | // There is one topology for the species tree, the other ones are gene trees |
---|
174 | // The number of current divisions is not safe because one gene tree can have |
---|
175 | // several partitions, for instance if we assign the different genes on the |
---|
176 | // mitochondrion different substitution models, or if we assign different rates |
---|
177 | // to the codon site positions in a sequence |
---|
178 | numGeneTrees = numTopologies - 1; |
---|
179 | geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree*)); |
---|
180 | |
---|
181 | // Build species trees for state 0 |
---|
182 | for (chn=fromChain; chn<toChain; chn++) |
---|
183 | { |
---|
184 | for (k=0; k<numParams; k++) |
---|
185 | { |
---|
186 | p = ¶ms[k]; |
---|
187 | if (p->paramType == P_SPECIESTREE) |
---|
188 | { |
---|
189 | // Find species tree and gene trees |
---|
190 | speciesTree = GetTree(p, chn, 0); |
---|
191 | for (i=0; i<p->nSubParams; i++) |
---|
192 | geneTrees[i] = GetTree(p->subParams[i], chn, 0); |
---|
193 | |
---|
194 | // Get minimum depth matrix for species tree |
---|
195 | GetMinDepthMatrix (geneTrees, numGeneTrees, depthMatrix); |
---|
196 | |
---|
197 | // Get a species tree from min depth matrix |
---|
198 | GetSpeciesTreeFromMinDepths(speciesTree, depthMatrix); |
---|
199 | |
---|
200 | assert (IsSpeciesTreeConsistent(speciesTree, chn) == YES); |
---|
201 | |
---|
202 | // Label the tips |
---|
203 | if (LabelTree (speciesTree, speciesNameSets[speciespartitionNum].names) == ERROR) |
---|
204 | { |
---|
205 | FreeBestChainVariables(); |
---|
206 | return (ERROR); |
---|
207 | } |
---|
208 | } |
---|
209 | } |
---|
210 | } |
---|
211 | |
---|
212 | // Free gene trees |
---|
213 | free (geneTrees); |
---|
214 | |
---|
215 | // Free best model variables if appropriate |
---|
216 | if (freeBestChainVars == YES) |
---|
217 | FreeBestChainVariables(); |
---|
218 | |
---|
219 | return (NO_ERROR); |
---|
220 | } |
---|
221 | |
---|
222 | |
---|
223 | |
---|
224 | |
---|
225 | |
---|
226 | /**----------------------------------------------------------------- |
---|
227 | | |
---|
228 | | FreeBestChainVariables: Free best variables used during an mcmc |
---|
229 | | run. |
---|
230 | | |
---|
231 | ------------------------------------------------------------------*/ |
---|
232 | void FreeBestChainVariables(void) |
---|
233 | { |
---|
234 | |
---|
235 | if (memAllocs[ALLOC_BEST] == YES) { |
---|
236 | free (speciesPairSets[0]); |
---|
237 | free (speciesPairSets); |
---|
238 | speciesPairSets = NULL; |
---|
239 | } |
---|
240 | |
---|
241 | free (depthMatrix); |
---|
242 | depthMatrix = NULL; |
---|
243 | |
---|
244 | memAllocs[ALLOC_BEST] = NO; |
---|
245 | } |
---|
246 | |
---|
247 | |
---|
248 | |
---|
249 | |
---|
250 | |
---|
251 | /**--------------------------------------------------------------------- |
---|
252 | | |
---|
253 | | GetDepthMatrix: |
---|
254 | | |
---|
255 | | This algorithm calculates the upper triangular depth matrix for the |
---|
256 | | species tree. Time complexity O(n^2). |
---|
257 | | |
---|
258 | | @param speciesTree The species tree (in) |
---|
259 | | @param depthMatrix The minimum depth matrix, upper triangular array (out) |
---|
260 | | @returns Returns ERROR or NO_ERROR |
---|
261 | ----------------------------------------------------------------------*/ |
---|
262 | int GetDepthMatrix (Tree *speciesTree, double *depthMatrix) { |
---|
263 | |
---|
264 | int i, left, right, numUpperTriang, index, nLongsNeeded, freeBitsets; |
---|
265 | double maxDepth; |
---|
266 | TreeNode *p; |
---|
267 | |
---|
268 | // Make sure we have bitfields allocated and set |
---|
269 | freeBitsets = NO; |
---|
270 | if (speciesTree->bitsets == NULL) |
---|
271 | { |
---|
272 | AllocateTreePartitions(speciesTree); |
---|
273 | freeBitsets = YES; |
---|
274 | } |
---|
275 | else |
---|
276 | { |
---|
277 | ResetTreePartitions(speciesTree); // just in case |
---|
278 | freeBitsets = NO; |
---|
279 | } |
---|
280 | |
---|
281 | // Calculate number of values in the upper triangular matrix |
---|
282 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
283 | |
---|
284 | // Number of longs needed in a bitfield representing a species set |
---|
285 | nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1; |
---|
286 | |
---|
287 | // Set all cells to max |
---|
288 | maxDepth = speciesTree->root->left->nodeDepth; // root depth |
---|
289 | for (i=0; i<numUpperTriang; i++) |
---|
290 | depthMatrix[i] = maxDepth; |
---|
291 | |
---|
292 | // Loop over interior nodes |
---|
293 | for (i=0; i<speciesTree->nIntNodes; i++) |
---|
294 | { |
---|
295 | p = speciesTree->intDownPass[i]; |
---|
296 | for (left = FirstTaxonInPartition(p->left->partition, nLongsNeeded); left < numSpecies; left = NextTaxonInPartition(left, p->left->partition, nLongsNeeded)) |
---|
297 | { |
---|
298 | for (right = FirstTaxonInPartition(p->right->partition, nLongsNeeded); right < numSpecies; right = NextTaxonInPartition(right, p->right->partition, nLongsNeeded)) |
---|
299 | { |
---|
300 | index = UpperTriangIndex(left, right, numSpecies); |
---|
301 | depthMatrix[index] = p->nodeDepth; |
---|
302 | } |
---|
303 | } |
---|
304 | } |
---|
305 | |
---|
306 | // Free partitions if appropriate |
---|
307 | if (freeBitsets == YES) |
---|
308 | FreeTreePartitions(speciesTree); |
---|
309 | |
---|
310 | return (NO_ERROR); |
---|
311 | } |
---|
312 | |
---|
313 | |
---|
314 | |
---|
315 | |
---|
316 | |
---|
317 | /**--------------------------------------------------------------------- |
---|
318 | | |
---|
319 | | GetMeanDist |
---|
320 | | |
---|
321 | | This algorithm calculates the mean distance between a distance matrix |
---|
322 | | and the minimum depths that define a species tree. |
---|
323 | | |
---|
324 | | @param speciesTree The species tree (in) |
---|
325 | | @param minDepthMatrix The minimum depth matrix, upper triangular array (in) |
---|
326 | | @param mean The mean distance (out) |
---|
327 | | @returns Returns ERROR or NO_ERROR |
---|
328 | ----------------------------------------------------------------------*/ |
---|
329 | int GetMeanDist (Tree *speciesTree, double *minDepthMatrix, double *mean) { |
---|
330 | |
---|
331 | int i, left, right, numUpperTriang, index, nLongsNeeded, freeBitsets; |
---|
332 | double dist, minDist=0.0, distSum; |
---|
333 | TreeNode *p; |
---|
334 | |
---|
335 | // Make sure we have bitfields allocated and set |
---|
336 | freeBitsets = NO; |
---|
337 | if (speciesTree->bitsets == NULL) |
---|
338 | { |
---|
339 | AllocateTreePartitions(speciesTree); |
---|
340 | freeBitsets = YES; |
---|
341 | } |
---|
342 | else |
---|
343 | { |
---|
344 | ResetTreePartitions(speciesTree); // just in case |
---|
345 | freeBitsets = NO; |
---|
346 | } |
---|
347 | |
---|
348 | // Calculate number of values in the upper triangular matrix |
---|
349 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
350 | |
---|
351 | // Number of longs needed in a bitfield representing a species set |
---|
352 | nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1; |
---|
353 | |
---|
354 | // Loop over interior nodes |
---|
355 | distSum = 0.0; |
---|
356 | for (i=0; i<speciesTree->nIntNodes; i++) |
---|
357 | { |
---|
358 | p = speciesTree->intDownPass[i]; |
---|
359 | p->x = 0; |
---|
360 | while ((left=FirstTaxonInPartition(p->left->partition, nLongsNeeded)) < numSpecies) |
---|
361 | { |
---|
362 | while ((right=FirstTaxonInPartition(p->right->partition, nLongsNeeded)) < numSpecies) |
---|
363 | { |
---|
364 | p->x++; |
---|
365 | index = UpperTriangIndex(left, right, numSpecies); |
---|
366 | dist = depthMatrix[index] - p->nodeDepth; |
---|
367 | if (p->x == 1) |
---|
368 | minDist = dist; |
---|
369 | else if (dist < minDist) |
---|
370 | minDist = dist; |
---|
371 | ClearBit(right, p->right->partition); |
---|
372 | } |
---|
373 | ClearBit(left, p->left->partition); |
---|
374 | } |
---|
375 | distSum += minDist; |
---|
376 | } |
---|
377 | |
---|
378 | (*mean) = distSum / speciesTree->nIntNodes; |
---|
379 | |
---|
380 | // Reset partitions that were destroyed above or free partitions, as appropriate |
---|
381 | if (freeBitsets == YES) |
---|
382 | FreeTreePartitions(speciesTree); |
---|
383 | else |
---|
384 | ResetTreePartitions(speciesTree); |
---|
385 | |
---|
386 | return (NO_ERROR); |
---|
387 | } |
---|
388 | |
---|
389 | |
---|
390 | |
---|
391 | |
---|
392 | |
---|
393 | /**--------------------------------------------------------------------- |
---|
394 | | |
---|
395 | | GetMinDepthMatrix: converted from GetMinDists. |
---|
396 | | |
---|
397 | | This algorithm scans the gene trees and calculates the minimum depth |
---|
398 | | (height) separating species across gene trees. The complexity of the |
---|
399 | | original algorithm was O(mn^3), where m is the number of gene trees and |
---|
400 | | n is the number of taxa in each gene tree. I think this algorithm has |
---|
401 | | complexity that is better on average, but the difference is small. |
---|
402 | | |
---|
403 | | I have rewritten the algorithm also to show alternative techniques that |
---|
404 | | could be used in this and other BEST algorithms. |
---|
405 | | |
---|
406 | | @param geneTrees The gene trees (in) |
---|
407 | | @param depthMatrix The minimum depth matrix, upper triangular array (out) |
---|
408 | | @returns Returns ERROR or NO_ERROR |
---|
409 | ----------------------------------------------------------------------*/ |
---|
410 | int GetMinDepthMatrix (Tree **geneTrees, int numGeneTrees, double *depthMatrix) { |
---|
411 | |
---|
412 | int i, j, w, nLongsNeeded, numUpperTriang, index, trace=0; |
---|
413 | double maxDepth; |
---|
414 | TreeNode *p; |
---|
415 | SafeLong **speciesSets; |
---|
416 | |
---|
417 | // Allocate space for species partitions |
---|
418 | nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1; // number of longs needed in a bitfield representing a species set |
---|
419 | speciesSets = (SafeLong **) SafeCalloc (2*numLocalTaxa-1, sizeof(SafeLong *)); |
---|
420 | speciesSets[0] = (SafeLong *) SafeCalloc ((2*numLocalTaxa-1)*nLongsNeeded, sizeof(int)); |
---|
421 | for (i=1; i<2*numLocalTaxa-1; i++) |
---|
422 | speciesSets[i] = speciesSets[0] + i*nLongsNeeded; |
---|
423 | |
---|
424 | // Set tip species partitions once and for all |
---|
425 | for (i=0; i<numLocalTaxa; i++) |
---|
426 | SetBit(speciespartitionId[i][speciespartitionNum]-1, speciesSets[i]); |
---|
427 | |
---|
428 | // Set initial max depth for upper triangular matrix |
---|
429 | numUpperTriang = (numSpecies * (numSpecies - 1)) / 2; |
---|
430 | maxDepth = geneTrees[0]->root->left->nodeDepth; |
---|
431 | for (i=0; i<numUpperTriang; i++) |
---|
432 | depthMatrix[i] = maxDepth; |
---|
433 | |
---|
434 | // Now we are ready to cycle over gene trees |
---|
435 | for (w=0; w<numGeneTrees; w++) { |
---|
436 | if (trace) { |
---|
437 | printf("\nGene %d\n",w); |
---|
438 | ShowTree(geneTrees[w]); |
---|
439 | } |
---|
440 | |
---|
441 | // Set species sets for interior nodes. O(n) |
---|
442 | for (i=0; i<geneTrees[w]->nIntNodes; i++) { |
---|
443 | p = geneTrees[w]->intDownPass[i]; |
---|
444 | for (j=0; j<nLongsNeeded; j++) |
---|
445 | speciesSets[p->index][j] = speciesSets[p->left->index][j] | speciesSets[p->right->index][j]; |
---|
446 | } |
---|
447 | |
---|
448 | // Now order the interior nodes in terms of node depth. We rely on the fact that the |
---|
449 | // ordered sequence is a valid downpass sequence. O(log n). |
---|
450 | qsort((void *)(geneTrees[w]->intDownPass), (size_t) geneTrees[w]->nIntNodes, sizeof(TreeNode *), CompareNodes); |
---|
451 | |
---|
452 | // Finally find the minimum for each cell in the upper triangular matrix |
---|
453 | // This is the time critical step with complexity O(n^3) in the simplest |
---|
454 | // algorithm version. This algorithm should do a little better in most cases. |
---|
455 | for (i=0; i<numUpperTriang; i++) { |
---|
456 | |
---|
457 | // Find shallowest node that has the pair |
---|
458 | for (j=0; j<geneTrees[w]->nIntNodes; j++) { |
---|
459 | p = geneTrees[w]->intDownPass[j]; |
---|
460 | |
---|
461 | // Because nodes are ordered in time, if this test is true then we cannot beat the minimum |
---|
462 | if (p->nodeDepth > depthMatrix[i]) |
---|
463 | break; |
---|
464 | |
---|
465 | // Check whether the node is a candidate minimum for the species pair |
---|
466 | // If the test is true, we know from the test above that p->nodeDepth is |
---|
467 | // either a tie or the new minimum |
---|
468 | if (IsPartNested(speciesPairSets[i], speciesSets[p->index], nLongsNeeded) == YES) { |
---|
469 | depthMatrix[i] = p->nodeDepth; |
---|
470 | break; |
---|
471 | } |
---|
472 | } |
---|
473 | } |
---|
474 | } // Next gene tree |
---|
475 | |
---|
476 | if(trace) { |
---|
477 | index = 0; |
---|
478 | printf ("Mindepth matrix\n"); |
---|
479 | for(i=0;i<numSpecies;i++) { |
---|
480 | for (j=0; j<i; j++) |
---|
481 | printf(" "); |
---|
482 | for(j=i+1;j<numSpecies;j++) { |
---|
483 | printf("%.6f ",depthMatrix[index]); |
---|
484 | index++; |
---|
485 | } |
---|
486 | printf("\n"); |
---|
487 | } |
---|
488 | printf("\n"); |
---|
489 | } |
---|
490 | |
---|
491 | free (speciesSets[0]); |
---|
492 | free (speciesSets); |
---|
493 | |
---|
494 | return (NO_ERROR); |
---|
495 | } |
---|
496 | |
---|
497 | |
---|
498 | |
---|
499 | |
---|
500 | |
---|
501 | /**--------------------------------------------------------------------- |
---|
502 | | |
---|
503 | | GetSpeciesTreeFromMinDepths: converted from GetConstraints, Startsptree, |
---|
504 | | and MaximumTree. |
---|
505 | | |
---|
506 | | This is a clustering algorithm based on minimum depths for species pairs. |
---|
507 | | It reduces an n choose 2 upper triangular min depth matrix to an array |
---|
508 | | of n-1 node depths, which fit onto a tree. |
---|
509 | | |
---|
510 | | @param speciesTree The species tree to be filled (out) |
---|
511 | | @param depthMatrix The min depth matrix, upper triangular array (in) |
---|
512 | | @returns Returns NO_ERROR |
---|
513 | ----------------------------------------------------------------------*/ |
---|
514 | int GetSpeciesTreeFromMinDepths (Tree* speciesTree, double *depthMatrix) { |
---|
515 | |
---|
516 | int i, j, numUpperTriang, nLongsNeeded, index, nextNodeIndex; |
---|
517 | Depth *minDepth; |
---|
518 | PolyTree *polyTree; |
---|
519 | PolyNode *p, *q, *r, *u, *qPrev, *rPrev; |
---|
520 | |
---|
521 | nLongsNeeded = ((numSpecies - 1) / nBitsInALong) + 1; |
---|
522 | numUpperTriang = numSpecies*(numSpecies - 1) / 2; |
---|
523 | minDepth = (Depth *) SafeCalloc (numUpperTriang, sizeof(Depth)); |
---|
524 | |
---|
525 | // Convert depthMatrix to an array of Depth structs |
---|
526 | index = 0; |
---|
527 | for(i=0; i<numSpecies; i++) { |
---|
528 | for(j=i+1; j<numSpecies; j++) { |
---|
529 | minDepth[index].depth = depthMatrix[index]; |
---|
530 | minDepth[index].pairSet = speciesPairSets[index]; |
---|
531 | index++; |
---|
532 | } |
---|
533 | } |
---|
534 | |
---|
535 | // Sort the array of distance structs (O(log n^2)) |
---|
536 | qsort((void *)(minDepth), (size_t)(numUpperTriang), sizeof(Depth), CompareDepths); |
---|
537 | |
---|
538 | // The algorithm below reduces the upper triangular matrix (n choose 2) to an n-1 |
---|
539 | // array in O(n^2log(n)) time. We build the tree at the same time, since we can |
---|
540 | // find included pairs in the tree in log(n) time. We use a polytomous tree for this. |
---|
541 | |
---|
542 | // Allocate space for polytomous tree and set up partitions |
---|
543 | polyTree = AllocatePolyTree(numSpecies); |
---|
544 | AllocatePolyTreePartitions(polyTree); |
---|
545 | |
---|
546 | // Build initial tree (a bush) |
---|
547 | polyTree->isRooted = YES; |
---|
548 | polyTree->isClock = YES; |
---|
549 | polyTree->root = &polyTree->nodes[2*numSpecies-2]; |
---|
550 | for (i=0; i<numSpecies; i++) { |
---|
551 | p = &polyTree->nodes[i]; |
---|
552 | p->index = i; |
---|
553 | p->depth = 0.0; |
---|
554 | p->left = NULL; |
---|
555 | if (i<numSpecies-1) |
---|
556 | p->sib = &polyTree->nodes[i+1]; |
---|
557 | else |
---|
558 | p->sib = NULL; |
---|
559 | p->anc = polyTree->root; |
---|
560 | } |
---|
561 | p = polyTree->root; |
---|
562 | p->index = 2*numSpecies - 2; |
---|
563 | p->left = &polyTree->nodes[0]; |
---|
564 | p->sib = NULL; |
---|
565 | p->anc = NULL; |
---|
566 | p->depth = -1.0; |
---|
567 | polyTree->nNodes = numSpecies + 1; |
---|
568 | polyTree->nIntNodes = 1; |
---|
569 | GetPolyDownPass(polyTree); |
---|
570 | ResetPolyTreePartitions(polyTree); /* set bitsets (partitions) for initial tree */ |
---|
571 | |
---|
572 | // Resolve bush using sorted depth structs |
---|
573 | nextNodeIndex = numSpecies; |
---|
574 | for(i=0; i<numUpperTriang; i++) { |
---|
575 | |
---|
576 | // Find tip corresponding to first taxon in pair |
---|
577 | p = &polyTree->nodes[FirstTaxonInPartition(minDepth[i].pairSet, nLongsNeeded)]; |
---|
578 | |
---|
579 | // Descend tree until we find a node within which the pair set is nested |
---|
580 | do { |
---|
581 | p = p->anc; |
---|
582 | } while (!IsPartNested(minDepth[i].pairSet, p->partition, nLongsNeeded)); |
---|
583 | |
---|
584 | if (p->left->sib->sib != NULL) { |
---|
585 | |
---|
586 | // This node is still a polytomy |
---|
587 | |
---|
588 | // Find left and right descendants of new node |
---|
589 | qPrev = NULL; |
---|
590 | for (q=p->left; IsSectionEmpty(q->partition, minDepth[i].pairSet, nLongsNeeded); q=q->sib) |
---|
591 | qPrev = q; |
---|
592 | rPrev = q; |
---|
593 | for (r=q->sib; IsSectionEmpty(r->partition, minDepth[i].pairSet, nLongsNeeded); r=r->sib) |
---|
594 | rPrev = r; |
---|
595 | |
---|
596 | // Introduce the new node |
---|
597 | u = &polyTree->nodes[nextNodeIndex]; |
---|
598 | u->index = nextNodeIndex; |
---|
599 | nextNodeIndex++; |
---|
600 | polyTree->nIntNodes++; |
---|
601 | polyTree->nNodes++; |
---|
602 | u->left = q; |
---|
603 | u->anc = p; |
---|
604 | if (p->left == q) |
---|
605 | p->left = u; |
---|
606 | else |
---|
607 | qPrev->sib = u; |
---|
608 | // former upstream sibling to r should point to r->sib |
---|
609 | if (rPrev == q) |
---|
610 | u->sib = r->sib; |
---|
611 | else |
---|
612 | rPrev->sib = r->sib; |
---|
613 | if (q->sib == r) |
---|
614 | u->sib = r->sib; |
---|
615 | else |
---|
616 | u->sib = q->sib; |
---|
617 | u->depth = minDepth[i].depth; // because minDepth structs are sorted, we know this is the min depth |
---|
618 | |
---|
619 | // Create new taxon set with bitfield operations |
---|
620 | for (j=0; j<nLongsNeeded; j++) |
---|
621 | u->partition[j] = q->partition[j] | r->partition[j]; |
---|
622 | |
---|
623 | // Patch the tree together with the new node added |
---|
624 | q->sib = r; |
---|
625 | r->sib = NULL; |
---|
626 | q->anc = u; |
---|
627 | r->anc = u; |
---|
628 | } |
---|
629 | else if (p == polyTree->root && p->depth < 0.0) { |
---|
630 | |
---|
631 | // This is the first time we hit the root of the tree && it is resolved |
---|
632 | p->depth = minDepth[i].depth; |
---|
633 | |
---|
634 | } |
---|
635 | // other cases should not be added to tree |
---|
636 | } |
---|
637 | |
---|
638 | // Make sure we have a complete species tree |
---|
639 | assert (polyTree->nIntNodes == numSpecies - 1); |
---|
640 | |
---|
641 | // Set traversal sequences |
---|
642 | GetPolyDownPass(polyTree); |
---|
643 | |
---|
644 | // If we have ties, we might have zero-length branches; we ensure a minimum positive length here |
---|
645 | for (i=polyTree->nNodes-2; i>=0; i--) { |
---|
646 | p = polyTree->allDownPass[i]; |
---|
647 | if (p->anc->depth - p->depth < BRLENS_MIN) |
---|
648 | p->depth = p->anc->depth - BRLENS_MIN; |
---|
649 | } |
---|
650 | |
---|
651 | // Set branch lengths from node depths (not done automatically for us) |
---|
652 | for (i=0; i<polyTree->nNodes; i++) { |
---|
653 | p = polyTree->allDownPass[i]; |
---|
654 | if (p->anc == NULL) |
---|
655 | p->length = 0.0; |
---|
656 | else |
---|
657 | p->length = p->anc->depth - p->depth; |
---|
658 | } |
---|
659 | |
---|
660 | // Copy to species tree from polytomous tree |
---|
661 | CopyToSpeciesTreeFromPolyTree (speciesTree, polyTree); |
---|
662 | |
---|
663 | // Free locally allocated variables |
---|
664 | FreePolyTree(polyTree); |
---|
665 | free (minDepth); |
---|
666 | |
---|
667 | return(NO_ERROR); |
---|
668 | } |
---|
669 | |
---|
670 | |
---|
671 | |
---|
672 | |
---|
673 | |
---|
674 | /**--------------------------------------------------------------------------------------- |
---|
675 | | |
---|
676 | | IsSpeciesTreeConsistent: Called when user tries to set a species tree or when |
---|
677 | | attempting to use a species tree from a check point as starting value. |
---|
678 | | |
---|
679 | -----------------------------------------------------------------------------------------*/ |
---|
680 | |
---|
681 | int IsSpeciesTreeConsistent (Tree *speciesTree, int chain) |
---|
682 | { |
---|
683 | int i, answer, numGeneTrees, numUpperTriang, freeBestVars; |
---|
684 | double *speciesTreeDepthMatrix; |
---|
685 | Tree **geneTrees; |
---|
686 | |
---|
687 | answer = NO; |
---|
688 | |
---|
689 | freeBestVars = NO; |
---|
690 | if (memAllocs[ALLOC_BEST] == NO) |
---|
691 | { |
---|
692 | AllocateBestChainVariables(); |
---|
693 | freeBestVars = YES; |
---|
694 | } |
---|
695 | |
---|
696 | numGeneTrees = numTrees - 1; |
---|
697 | geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree *)); |
---|
698 | for (i=0; i<numTrees-1; i++) |
---|
699 | geneTrees[i] = GetTreeFromIndex(i, chain, state[chain]); |
---|
700 | |
---|
701 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
702 | speciesTreeDepthMatrix = (double *) SafeCalloc (numUpperTriang, sizeof(double)); |
---|
703 | |
---|
704 | GetMinDepthMatrix(geneTrees, numGeneTrees, depthMatrix); |
---|
705 | GetDepthMatrix(speciesTree, speciesTreeDepthMatrix); |
---|
706 | |
---|
707 | for (i=0; i<numUpperTriang; i++) |
---|
708 | { |
---|
709 | if (depthMatrix[i] < speciesTreeDepthMatrix[i]) |
---|
710 | break; |
---|
711 | } |
---|
712 | |
---|
713 | if (i == numUpperTriang) |
---|
714 | answer = YES; |
---|
715 | else |
---|
716 | answer = NO; |
---|
717 | |
---|
718 | if (answer == NO) |
---|
719 | ShowNodes(speciesTree->root, 0, YES); |
---|
720 | |
---|
721 | if (freeBestVars == YES) |
---|
722 | FreeBestChainVariables(); |
---|
723 | |
---|
724 | free (speciesTreeDepthMatrix); |
---|
725 | free (geneTrees); |
---|
726 | |
---|
727 | return answer; |
---|
728 | } |
---|
729 | |
---|
730 | |
---|
731 | |
---|
732 | |
---|
733 | |
---|
734 | /**--------------------------------------------------------------------------------------- |
---|
735 | | |
---|
736 | | LineagesIn: Recursive function to get number of gene tree lineages coming into each |
---|
737 | | branch of the species tree (in ->x of speciestree nodes). We also assign each gene |
---|
738 | | tree lineage to the corresponding species tree lineage (in ->x of genetree nodes). |
---|
739 | | Finally, number of coalescent events is recorded (in ->y of speciestree nodes). |
---|
740 | | Time complexity is O(n). |
---|
741 | | |
---|
742 | -----------------------------------------------------------------------------------------*/ |
---|
743 | void LineagesIn (TreeNode *geneTreeNode, TreeNode *speciesTreeNode) |
---|
744 | { |
---|
745 | int nLongsNeeded; |
---|
746 | |
---|
747 | if (geneTreeNode->nodeDepth < speciesTreeNode->nodeDepth) { |
---|
748 | // climb up species tree |
---|
749 | if (speciesTreeNode->left == NULL) { |
---|
750 | assert (geneTreeNode->left == NULL); |
---|
751 | speciesTreeNode->x++; |
---|
752 | } |
---|
753 | else { |
---|
754 | nLongsNeeded = (numSpecies - 1) / nBitsInALong + 1; |
---|
755 | speciesTreeNode->x++; |
---|
756 | if (IsPartNested(geneTreeNode->partition, speciesTreeNode->left->partition, nLongsNeeded) == YES) |
---|
757 | LineagesIn (geneTreeNode, speciesTreeNode->left); |
---|
758 | else if (IsPartNested(geneTreeNode->partition, speciesTreeNode->right->partition, nLongsNeeded) == YES) |
---|
759 | LineagesIn (geneTreeNode, speciesTreeNode->right); |
---|
760 | } |
---|
761 | } |
---|
762 | else { |
---|
763 | // climb up gene tree |
---|
764 | if (geneTreeNode->left != NULL) |
---|
765 | LineagesIn(geneTreeNode->left, speciesTreeNode); |
---|
766 | if (geneTreeNode->right != NULL) |
---|
767 | LineagesIn(geneTreeNode->right, speciesTreeNode); |
---|
768 | if (geneTreeNode->left == NULL) { |
---|
769 | speciesTreeNode->x++; |
---|
770 | assert (speciesTreeNode->left == NULL); |
---|
771 | } |
---|
772 | else { |
---|
773 | speciesTreeNode->y++; |
---|
774 | } |
---|
775 | geneTreeNode->x = speciesTreeNode->index; |
---|
776 | } |
---|
777 | } |
---|
778 | |
---|
779 | |
---|
780 | |
---|
781 | |
---|
782 | |
---|
783 | /**----------------------------------------------------------------- |
---|
784 | | |
---|
785 | | LnSpeciesTreeProb: Wrapper for LnJointGeneTreeSpeciesTreePr to |
---|
786 | | free calling functions from retrieving gene and species trees. |
---|
787 | | |
---|
788 | ------------------------------------------------------------------*/ |
---|
789 | double LnSpeciesTreeProb(int chain) |
---|
790 | { |
---|
791 | int i, numGeneTrees; |
---|
792 | double lnProb; |
---|
793 | Tree **geneTrees, *speciesTree; |
---|
794 | ModelInfo *m; |
---|
795 | |
---|
796 | m = &modelSettings[0]; |
---|
797 | |
---|
798 | speciesTree = GetTree(m->speciesTree, chain, state[chain]); |
---|
799 | |
---|
800 | numGeneTrees = m->speciesTree->nSubParams; |
---|
801 | geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree *)); |
---|
802 | |
---|
803 | for (i=0; i<m->speciesTree->nSubParams; i++) |
---|
804 | geneTrees[i] = GetTree(m->speciesTree->subParams[i], chain, state[chain]); |
---|
805 | |
---|
806 | lnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, speciesTree, chain); |
---|
807 | |
---|
808 | free (geneTrees); |
---|
809 | |
---|
810 | return lnProb; |
---|
811 | } |
---|
812 | |
---|
813 | |
---|
814 | |
---|
815 | |
---|
816 | |
---|
817 | /**----------------------------------------------------------------- |
---|
818 | | |
---|
819 | | LnJointGeneTreeSpeciesTreePr: Converted from LnJointGenetreePr, |
---|
820 | | SPLogLike, SPLogPrior. |
---|
821 | | |
---|
822 | | In this function we calculate the entire probability of the species |
---|
823 | | tree, including its probability given its priors, and the probability |
---|
824 | | of the gene trees given the species tree. |
---|
825 | | |
---|
826 | ------------------------------------------------------------------*/ |
---|
827 | double LnJointGeneTreeSpeciesTreePr(Tree **geneTrees, int numGeneTrees, Tree *speciesTree, int chain) |
---|
828 | { |
---|
829 | double lnPrior, lnLike, clockRate, mu, *popSizePtr, sR, eR, sF; |
---|
830 | int i; |
---|
831 | ModelInfo *m; |
---|
832 | ModelParams *mp; |
---|
833 | |
---|
834 | // Get model info for species tree |
---|
835 | m = &modelSettings[speciesTree->relParts[0]]; |
---|
836 | |
---|
837 | // Get model params for species tree |
---|
838 | mp = &modelParams[speciesTree->relParts[0]]; |
---|
839 | |
---|
840 | // Get popSize ptr |
---|
841 | popSizePtr = GetParamVals(m->popSize, chain, state[chain]); |
---|
842 | |
---|
843 | // Get clock rate |
---|
844 | if (speciesTree->isCalibrated == YES) |
---|
845 | clockRate = *GetParamVals(m->clockRate, chain, state[chain]); |
---|
846 | else |
---|
847 | clockRate = 1.0; |
---|
848 | |
---|
849 | // Calculate probability of gene trees given species tree |
---|
850 | // ShowNodes(speciesTree->root, 0, YES); |
---|
851 | lnLike = 0.0; |
---|
852 | mu = clockRate; |
---|
853 | for (i=0; i<numGeneTrees; i++) { |
---|
854 | lnLike += LnPriorProbGeneTree(geneTrees[i], mu, speciesTree, popSizePtr); |
---|
855 | } |
---|
856 | |
---|
857 | // Calculate probability of species tree given its priors |
---|
858 | if (strcmp(mp->speciesTreeBrlensPr, "Birthdeath") == 0) { |
---|
859 | sR = *GetParamVals(m->speciationRates, chain, state[chain]); |
---|
860 | eR = *GetParamVals(m->extinctionRates, chain, state[chain]); |
---|
861 | // sS = mp->sampleStrat; |
---|
862 | sF = mp->sampleProb; |
---|
863 | lnPrior = 0.0; |
---|
864 | // LnBirthDeathPriorPr(speciesTree, clockRate, &lnPrior, sR, eR, sS, sF); |
---|
865 | LnBirthDeathPriorPr(speciesTree, clockRate, &lnPrior, sR, eR, mp->sampleStrat, sF); |
---|
866 | } |
---|
867 | else |
---|
868 | lnPrior = 0.0; |
---|
869 | |
---|
870 | // The population size is taken care of elsewhere |
---|
871 | |
---|
872 | return lnLike + lnPrior; |
---|
873 | } |
---|
874 | |
---|
875 | |
---|
876 | |
---|
877 | |
---|
878 | |
---|
879 | /**----------------------------------------------------------------- |
---|
880 | | |
---|
881 | | LnPriorProbGeneTree: Calculate the prior probability of a gene |
---|
882 | | tree. |
---|
883 | | |
---|
884 | ------------------------------------------------------------------*/ |
---|
885 | double LnPriorProbGeneTree (Tree *geneTree, double mu, Tree *speciesTree, double *popSizePtr) |
---|
886 | { |
---|
887 | int i, k, index, nEvents, trace=0; |
---|
888 | double N, lnProb, ploidyFactor, theta, timeInterval; |
---|
889 | TreeNode *p, *q=NULL, *r; |
---|
890 | ModelInfo *m; |
---|
891 | ModelParams *mp; |
---|
892 | |
---|
893 | // Get model info |
---|
894 | m = &modelSettings[speciesTree->relParts[0]]; |
---|
895 | |
---|
896 | // Get model params |
---|
897 | mp = &modelParams[speciesTree->relParts[0]]; |
---|
898 | |
---|
899 | // Find ploidy setting |
---|
900 | if (strcmp(mp->ploidy, "Diploid") == 0) |
---|
901 | ploidyFactor = 4.0; |
---|
902 | else if (strcmp(mp->ploidy, "Haploid") == 0) |
---|
903 | ploidyFactor = 2.0; |
---|
904 | else /* if (strcmp(mp->ploidy, "Zlinked") == 0) */ |
---|
905 | ploidyFactor = 3.0; |
---|
906 | |
---|
907 | // Initialize species tree with theta in d |
---|
908 | for (i=0; i<speciesTree->nNodes-1; i++) { |
---|
909 | p = speciesTree->allDownPass[i]; |
---|
910 | if (strcmp(mp->popVarPr, "Equal") != 0) |
---|
911 | N = popSizePtr[p->index]; |
---|
912 | else |
---|
913 | N = popSizePtr[0]; |
---|
914 | p->d = ploidyFactor * N * mu; |
---|
915 | } |
---|
916 | |
---|
917 | // Map gene tree to species tree |
---|
918 | MapGeneTreeToSpeciesTree(geneTree, speciesTree); |
---|
919 | |
---|
920 | // Sort gene tree interior nodes first by speciestree branch on which they coalesce, then in time order |
---|
921 | qsort((void *)(geneTree->intDownPass), (size_t) geneTree->nIntNodes, sizeof(TreeNode *), CompareNodesByX); |
---|
922 | |
---|
923 | // Debug output of qsort result |
---|
924 | if (trace) { |
---|
925 | printf ("index -- x -- nodeDepth for gene tree\n"); |
---|
926 | for (i=0; i<geneTree->nIntNodes; i++) |
---|
927 | printf ("%d -- %d -- %e\n", geneTree->intDownPass[i]->index, geneTree->intDownPass[i]->x, geneTree->intDownPass[i]->nodeDepth); |
---|
928 | } |
---|
929 | |
---|
930 | // Now calculate probability after making sure species tree nodes appear in index order |
---|
931 | // (the order does not have to be a correct downpass sequence) |
---|
932 | for (i=0; i<speciesTree->memNodes; i++) |
---|
933 | { |
---|
934 | p = &(speciesTree->nodes[i]); |
---|
935 | speciesTree->allDownPass[p->index] = p; |
---|
936 | } |
---|
937 | index = 0; |
---|
938 | lnProb = 0.0; |
---|
939 | for (i=0; i<speciesTree->nNodes-1; i++) { |
---|
940 | |
---|
941 | p = speciesTree->allDownPass[i]; |
---|
942 | |
---|
943 | // Get theta |
---|
944 | theta = p->d; |
---|
945 | |
---|
946 | // Get number of events |
---|
947 | nEvents = p->y; |
---|
948 | |
---|
949 | // Calculate probability |
---|
950 | lnProb += nEvents * log (2.0 / theta); |
---|
951 | |
---|
952 | for (k=p->x; k > p->x - p->y; k--) { |
---|
953 | |
---|
954 | q = geneTree->intDownPass[index]; |
---|
955 | assert (q->x == p->index); |
---|
956 | |
---|
957 | if (k == p->x) |
---|
958 | timeInterval = (q->nodeDepth - p->nodeDepth) / mu; |
---|
959 | else { |
---|
960 | r = geneTree->intDownPass[index-1]; |
---|
961 | timeInterval = (q->nodeDepth - r->nodeDepth) / mu; |
---|
962 | } |
---|
963 | |
---|
964 | lnProb -= (k * (k - 1) * timeInterval) / theta; |
---|
965 | index++; |
---|
966 | } |
---|
967 | |
---|
968 | if (p->x - p->y > 1) { |
---|
969 | |
---|
970 | if (nEvents == 0) |
---|
971 | timeInterval = p->anc->nodeDepth - p->nodeDepth; |
---|
972 | else |
---|
973 | timeInterval = p->anc->nodeDepth - q->nodeDepth; |
---|
974 | |
---|
975 | assert (p->anc->anc != NULL); |
---|
976 | assert(timeInterval > 0.0); |
---|
977 | |
---|
978 | k = p->x - p->y; |
---|
979 | lnProb -= (k * (k - 1) * timeInterval) / theta; |
---|
980 | } |
---|
981 | } |
---|
982 | |
---|
983 | // Restore downpass sequences (probably not necessary for gene tree, but may be if some |
---|
984 | // code relies on intDownPass and allDownPass to be in same order) |
---|
985 | GetDownPass(speciesTree); |
---|
986 | GetDownPass(geneTree); |
---|
987 | |
---|
988 | // Free space |
---|
989 | FreeTreePartitions(speciesTree); |
---|
990 | FreeTreePartitions(geneTree); |
---|
991 | |
---|
992 | return lnProb; |
---|
993 | } |
---|
994 | |
---|
995 | |
---|
996 | |
---|
997 | |
---|
998 | |
---|
999 | /**--------------------------------------------------------------------- |
---|
1000 | | |
---|
1001 | | LnProposalProbSpeciesTree: |
---|
1002 | | |
---|
1003 | | This algorithm calculates the probability of proposing a particular |
---|
1004 | | species tree given a distance matrix modified using a scheme based on |
---|
1005 | | truncated exponential distributions with rate expRate. |
---|
1006 | | |
---|
1007 | | @param speciesTree The species tree (in) |
---|
1008 | | @param depthMatrix The minimum depth matrix, upper triangular array (in) |
---|
1009 | | @param expRate Rate of truncated exponential distribution |
---|
1010 | | @returns Returns probability of proposing the species tree |
---|
1011 | ----------------------------------------------------------------------*/ |
---|
1012 | double LnProposalProbSpeciesTree (Tree *speciesTree, double *depthMatrix, double expRate) { |
---|
1013 | |
---|
1014 | int i, left, right, numUpperTriang, index, nLongsNeeded, freeBitsets; |
---|
1015 | double dist, normConst, negLambdaX, eNegLambdaX, density, prob, |
---|
1016 | sumDensRatio, prodProb, lnProb; |
---|
1017 | TreeNode *p; |
---|
1018 | |
---|
1019 | // Make sure we have bitfields allocated and set |
---|
1020 | freeBitsets = NO; |
---|
1021 | if (speciesTree->bitsets == NULL) |
---|
1022 | freeBitsets = YES; |
---|
1023 | else |
---|
1024 | freeBitsets = NO; |
---|
1025 | AllocateTreePartitions(speciesTree); |
---|
1026 | |
---|
1027 | // Calculate number of values in the upper triangular matrix |
---|
1028 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
1029 | |
---|
1030 | // Number of longs needed in a bitfield representing a species set |
---|
1031 | nLongsNeeded = ((numSpecies -1) / nBitsInALong) + 1; |
---|
1032 | |
---|
1033 | // Loop over interior nodes |
---|
1034 | lnProb = 0.0; |
---|
1035 | for (i=0; i<speciesTree->nIntNodes; i++) |
---|
1036 | { |
---|
1037 | p = speciesTree->intDownPass[i]; |
---|
1038 | p->x = 0; |
---|
1039 | sumDensRatio = 0.0; |
---|
1040 | prodProb = 1.0; |
---|
1041 | for (left = FirstTaxonInPartition(p->left->partition, nLongsNeeded); left < numSpecies; left = NextTaxonInPartition(left, p->left->partition, nLongsNeeded)) |
---|
1042 | { |
---|
1043 | for (right = FirstTaxonInPartition(p->right->partition, nLongsNeeded); right < numSpecies; right = NextTaxonInPartition(right, p->right->partition, nLongsNeeded)) |
---|
1044 | { |
---|
1045 | p->x++; |
---|
1046 | index = UpperTriangIndex(left, right, numSpecies); |
---|
1047 | assert (index < numUpperTriang); |
---|
1048 | dist = depthMatrix[index] - p->nodeDepth; // distance between depth matrix entry and actual species-tree node |
---|
1049 | normConst = 1.0 - exp(-expRate * depthMatrix[index]); // normalization constant because of truncation of exp distribution |
---|
1050 | negLambdaX = - expRate * dist; |
---|
1051 | eNegLambdaX = exp(negLambdaX); |
---|
1052 | density = expRate * eNegLambdaX / normConst; // density for x == dist, f(dist) |
---|
1053 | prob = 1.0 - eNegLambdaX / normConst; // cumulative prob for x <= dist, F(dist) |
---|
1054 | sumDensRatio += density / prob; |
---|
1055 | prodProb *= prob; |
---|
1056 | } |
---|
1057 | } |
---|
1058 | if (p->x == 1) |
---|
1059 | lnProb += log(expRate) + negLambdaX - log(normConst); |
---|
1060 | else |
---|
1061 | lnProb += log (sumDensRatio * prodProb); |
---|
1062 | } |
---|
1063 | |
---|
1064 | // Free partitions if appropriate |
---|
1065 | if (freeBitsets == YES) |
---|
1066 | FreeTreePartitions(speciesTree); |
---|
1067 | |
---|
1068 | return (NO_ERROR); |
---|
1069 | } |
---|
1070 | |
---|
1071 | |
---|
1072 | |
---|
1073 | |
---|
1074 | |
---|
1075 | /**----------------------------------------------------------------- |
---|
1076 | | |
---|
1077 | | MapGeneTreeToSpeciesTree: Fold gene tree into species tree. We |
---|
1078 | | are going to use ->x of gene tree to give index of the |
---|
1079 | | corresponding node in the species tree. ->x in the species |
---|
1080 | | tree will give the number of lineages into the corresponding |
---|
1081 | | branch, and ->y will give the number of coalescent events on |
---|
1082 | | that branch. |
---|
1083 | | |
---|
1084 | ------------------------------------------------------------------*/ |
---|
1085 | void MapGeneTreeToSpeciesTree (Tree *geneTree, Tree *speciesTree) |
---|
1086 | { |
---|
1087 | int i, j, nLongsNeeded, trace=0; |
---|
1088 | TreeNode *p; |
---|
1089 | |
---|
1090 | // Initialize species partitions for both gene tree and species tree |
---|
1091 | // This will set the partitions to reflect the partitions in the tree itself, |
---|
1092 | // which is OK for the species tree, but we want the gene tree partitions to |
---|
1093 | // reflect the species partitions and not the gene partitions, so we need to |
---|
1094 | // set them here |
---|
1095 | AllocateTreePartitions(geneTree); |
---|
1096 | AllocateTreePartitions(speciesTree); |
---|
1097 | nLongsNeeded = (numSpecies - 1) / nBitsInALong + 1; |
---|
1098 | for (i=0; i<geneTree->nNodes-1; i++) { |
---|
1099 | p = geneTree->allDownPass[i]; |
---|
1100 | ClearBits(p->partition, nLongsNeeded); |
---|
1101 | if (p->left == NULL) |
---|
1102 | SetBit(speciespartitionId[p->index][speciespartitionNum]-1, p->partition); |
---|
1103 | else { |
---|
1104 | for (j=0; j<nLongsNeeded; j++) |
---|
1105 | p->partition[j] = p->left->partition[j] | p->right->partition[j]; |
---|
1106 | } |
---|
1107 | } |
---|
1108 | // Species tree partitions already set by call to AllocateTreePartitions |
---|
1109 | |
---|
1110 | // Reset ->x and ->y of species tree (->x of gene tree does not need to be initialized) |
---|
1111 | for (i=0; i<speciesTree->nNodes; i++) |
---|
1112 | { |
---|
1113 | p = speciesTree->allDownPass[i]; |
---|
1114 | p->x = 0; |
---|
1115 | p->y = 0; |
---|
1116 | } |
---|
1117 | |
---|
1118 | // Call recursive function to match gene tree and species tree |
---|
1119 | LineagesIn(geneTree->root->left, speciesTree->root->left); |
---|
1120 | |
---|
1121 | if (trace) { |
---|
1122 | printf ("index -- x -- y for species tree\n"); |
---|
1123 | for (i=0; i<speciesTree->nNodes-1; i++) |
---|
1124 | printf ("%-2d -- %d -- %d\n", speciesTree->allDownPass[i]->index, speciesTree->allDownPass[i]->x, speciesTree->allDownPass[i]->y); |
---|
1125 | } |
---|
1126 | |
---|
1127 | if (trace) { |
---|
1128 | printf ("index -- x -- nodeDepth for gene tree\n"); |
---|
1129 | for (i=0; i<geneTree->nIntNodes; i++) |
---|
1130 | printf ("%-2d -- %d -- %e\n", geneTree->intDownPass[i]->index, geneTree->intDownPass[i]->x, geneTree->intDownPass[i]->nodeDepth); |
---|
1131 | } |
---|
1132 | |
---|
1133 | // Free space |
---|
1134 | FreeTreePartitions(speciesTree); |
---|
1135 | FreeTreePartitions(geneTree); |
---|
1136 | } |
---|
1137 | |
---|
1138 | |
---|
1139 | |
---|
1140 | |
---|
1141 | |
---|
1142 | /**--------------------------------------------------------------------- |
---|
1143 | | |
---|
1144 | | ModifyDepthMatrix: |
---|
1145 | | |
---|
1146 | | This algorithm uses a truncated exponential distribution to modify |
---|
1147 | | a depth matrix. |
---|
1148 | | |
---|
1149 | | @param expRate The rate of the exponential distribution (in) |
---|
1150 | | @param depthMatrix The minimum depth matrix to be modified, upper triangular array (in/out) |
---|
1151 | | @param seed Pointer to seed for random number generator (in/ut) |
---|
1152 | | @returns Returns ERROR or NO_ERROR |
---|
1153 | ----------------------------------------------------------------------*/ |
---|
1154 | int ModifyDepthMatrix (double expRate, double *depthMatrix, SafeLong *seed) |
---|
1155 | { |
---|
1156 | int i, numUpperTriang; |
---|
1157 | double u, interval, delta; |
---|
1158 | |
---|
1159 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
1160 | for (i=0; i<numUpperTriang; i++) |
---|
1161 | { |
---|
1162 | interval = depthMatrix[i]; |
---|
1163 | u = RandomNumber (seed); |
---|
1164 | delta = log (1.0 - u*(1.0 - exp(-expRate*interval))) / (-expRate); |
---|
1165 | assert (delta <= interval); |
---|
1166 | depthMatrix[i] -= delta; |
---|
1167 | } |
---|
1168 | |
---|
1169 | return (NO_ERROR); |
---|
1170 | } |
---|
1171 | |
---|
1172 | |
---|
1173 | |
---|
1174 | |
---|
1175 | /**----------------------------------------------------------------- |
---|
1176 | | |
---|
1177 | | Move_GeneTree1: Propose a new gene tree using ExtSPRClock |
---|
1178 | | |
---|
1179 | | @param param The parameter (gene tree) to change |
---|
1180 | | @param chain The chain number |
---|
1181 | | @param seed Pointer to the seed of the random number gen. |
---|
1182 | | @param lnPriorRatio Pointer to the log prior ratio (out) |
---|
1183 | | @param lnProposalRatio Pointer to the log proposal (Hastings) ratio (out) |
---|
1184 | | @param mvp Pointer to tuning parameter(s) |
---|
1185 | ------------------------------------------------------------------*/ |
---|
1186 | int Move_GeneTree1 (Param *param, int chain, SafeLong *seed, MrBFlt *lnPriorRatio, MrBFlt *lnProposalRatio, MrBFlt *mvp) |
---|
1187 | |
---|
1188 | { |
---|
1189 | int i, numGeneTrees, numUpperTriang; |
---|
1190 | double newLnProb, oldLnProb, backwardLnProposalProb, forwardLnProposalProb, |
---|
1191 | *oldMinDepths, *modMinDepths, forwardLambda, backwardLambda, mean; |
---|
1192 | Tree *newSpeciesTree, *oldSpeciesTree, **geneTrees; |
---|
1193 | ModelInfo *m; |
---|
1194 | ModelParams *mp; |
---|
1195 | |
---|
1196 | // Calculate number of gene trees |
---|
1197 | numGeneTrees = numTopologies - 1; |
---|
1198 | |
---|
1199 | // Get model params |
---|
1200 | mp = &modelParams[param->relParts[0]]; |
---|
1201 | |
---|
1202 | // Get model settings |
---|
1203 | m = &modelSettings[param->relParts[0]]; |
---|
1204 | |
---|
1205 | // Get species tree (this trick is possible because we always copy tree params) |
---|
1206 | newSpeciesTree = GetTree (m->speciesTree, chain, state[chain]); |
---|
1207 | oldSpeciesTree = GetTree (m->speciesTree, chain, state[chain] ^ 1); |
---|
1208 | |
---|
1209 | // Get gene trees |
---|
1210 | geneTrees = (Tree **) SafeCalloc (2*numGeneTrees, sizeof(Tree *)); |
---|
1211 | for (i=0; i<m->speciesTree->nSubParams; i++) { |
---|
1212 | geneTrees[i] = GetTree(m->speciesTree->subParams[i], chain, state[chain]); |
---|
1213 | } |
---|
1214 | |
---|
1215 | // Allocate space for depth matrix copy |
---|
1216 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
1217 | oldMinDepths = (double *) SafeCalloc (2*numUpperTriang, sizeof(double)); |
---|
1218 | modMinDepths = oldMinDepths + numUpperTriang; |
---|
1219 | |
---|
1220 | // Get min depth matrix for old gene trees |
---|
1221 | GetMinDepthMatrix(geneTrees, numTopologies-1, depthMatrix); |
---|
1222 | |
---|
1223 | // Save a copy |
---|
1224 | for (i=0; i<numUpperTriang; i++) |
---|
1225 | oldMinDepths[i] = depthMatrix[i]; |
---|
1226 | |
---|
1227 | // Get forward lambda |
---|
1228 | GetMeanDist(oldSpeciesTree, depthMatrix, &mean); |
---|
1229 | forwardLambda = 1.0 / mean; |
---|
1230 | |
---|
1231 | // Calculate joint probability of old gene trees and old species tree |
---|
1232 | oldLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, oldSpeciesTree, chain); |
---|
1233 | |
---|
1234 | // Modify the picked gene tree using code from a regular MrBayes move |
---|
1235 | Move_ExtSPRClock(param, chain, seed, lnPriorRatio, lnProposalRatio, mvp); |
---|
1236 | |
---|
1237 | // Update the min depth matrix |
---|
1238 | GetMinDepthMatrix(geneTrees, numTopologies-1, depthMatrix); |
---|
1239 | |
---|
1240 | // Copy the min depth matrix |
---|
1241 | for (i=0; i<numUpperTriang; i++) |
---|
1242 | modMinDepths[i] = depthMatrix[i]; |
---|
1243 | |
---|
1244 | // Modify the min depth matrix |
---|
1245 | ModifyDepthMatrix (forwardLambda, modMinDepths, seed); |
---|
1246 | |
---|
1247 | // Get a new species tree |
---|
1248 | GetSpeciesTreeFromMinDepths (newSpeciesTree, modMinDepths); |
---|
1249 | |
---|
1250 | // Calculate joint probability of new gene trees and new species tree |
---|
1251 | newLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, newSpeciesTree, chain); |
---|
1252 | |
---|
1253 | // Get backward lambda |
---|
1254 | GetMeanDist(newSpeciesTree, depthMatrix, &mean); |
---|
1255 | backwardLambda = 1.0 / mean; |
---|
1256 | |
---|
1257 | // Get proposal probability of old species tree |
---|
1258 | backwardLnProposalProb = LnProposalProbSpeciesTree (oldSpeciesTree, oldMinDepths, backwardLambda); |
---|
1259 | |
---|
1260 | // Get proposal probability of new species tree |
---|
1261 | forwardLnProposalProb = LnProposalProbSpeciesTree (newSpeciesTree, depthMatrix, forwardLambda); |
---|
1262 | |
---|
1263 | // Update prior ratio taking species tree into account |
---|
1264 | (*lnPriorRatio) += (newLnProb - oldLnProb); |
---|
1265 | |
---|
1266 | // Update proposal ratio based on this move |
---|
1267 | (*lnProposalRatio) += (backwardLnProposalProb - forwardLnProposalProb); |
---|
1268 | |
---|
1269 | // Free allocated memory |
---|
1270 | free (geneTrees); |
---|
1271 | free (oldMinDepths); |
---|
1272 | |
---|
1273 | return (NO_ERROR); |
---|
1274 | } |
---|
1275 | |
---|
1276 | |
---|
1277 | |
---|
1278 | |
---|
1279 | |
---|
1280 | /**----------------------------------------------------------------- |
---|
1281 | | |
---|
1282 | | Move_GeneTree2: Propose a new gene tree using NNIClock |
---|
1283 | | |
---|
1284 | | @param param The parameter to change |
---|
1285 | | @param chain The chain number |
---|
1286 | | @param seed Pointer to the seed of the random number gen. |
---|
1287 | | @param lnPriorRatio Pointer to the log prior ratio (out) |
---|
1288 | | @param lnProposalRatio Pointer to the log proposal (Hastings) ratio (out) |
---|
1289 | | @param mvp Pointer to tuning parameter(s) |
---|
1290 | ------------------------------------------------------------------*/ |
---|
1291 | int Move_GeneTree2 (Param *param, int chain, SafeLong *seed, MrBFlt *lnPriorRatio, MrBFlt *lnProposalRatio, MrBFlt *mvp) |
---|
1292 | |
---|
1293 | { |
---|
1294 | int i, numGeneTrees, numUpperTriang; |
---|
1295 | double newLnProb, oldLnProb, backwardLnProposalProb, forwardLnProposalProb, |
---|
1296 | *oldMinDepths, *modMinDepths, forwardLambda, backwardLambda, mean; |
---|
1297 | Tree *newSpeciesTree, *oldSpeciesTree, **geneTrees; |
---|
1298 | ModelInfo *m; |
---|
1299 | ModelParams *mp; |
---|
1300 | |
---|
1301 | // Calculate number of gene trees |
---|
1302 | numGeneTrees = numTopologies - 1; |
---|
1303 | |
---|
1304 | // Get model params |
---|
1305 | mp = &modelParams[param->relParts[0]]; |
---|
1306 | |
---|
1307 | // Get model settings |
---|
1308 | m = &modelSettings[param->relParts[0]]; |
---|
1309 | |
---|
1310 | // Get species tree (this trick is possible because we always copy tree params) |
---|
1311 | newSpeciesTree = GetTree (m->speciesTree, chain, state[chain]); |
---|
1312 | oldSpeciesTree = GetTree (m->speciesTree, chain, state[chain] ^ 1); |
---|
1313 | |
---|
1314 | // Get gene trees |
---|
1315 | geneTrees = (Tree **) SafeCalloc (2*numGeneTrees, sizeof(Tree *)); |
---|
1316 | for (i=0; i<m->speciesTree->nSubParams; i++) { |
---|
1317 | geneTrees[i] = GetTree(m->speciesTree->subParams[i], chain, state[chain]); |
---|
1318 | } |
---|
1319 | |
---|
1320 | // Allocate space for depth matrix copy |
---|
1321 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
1322 | oldMinDepths = (double *) SafeCalloc (2*numUpperTriang, sizeof(double)); |
---|
1323 | modMinDepths = oldMinDepths + numUpperTriang; |
---|
1324 | |
---|
1325 | // Get min depth matrix for old gene trees |
---|
1326 | GetMinDepthMatrix(geneTrees, numTopologies-1, depthMatrix); |
---|
1327 | |
---|
1328 | // Save a copy |
---|
1329 | for (i=0; i<numUpperTriang; i++) |
---|
1330 | oldMinDepths[i] = depthMatrix[i]; |
---|
1331 | |
---|
1332 | // Get forward lambda |
---|
1333 | GetMeanDist(oldSpeciesTree, depthMatrix, &mean); |
---|
1334 | forwardLambda = 1.0 / mean; |
---|
1335 | |
---|
1336 | // Calculate joint probability of old gene trees and old species tree |
---|
1337 | oldLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, oldSpeciesTree, chain); |
---|
1338 | |
---|
1339 | // Modify the picked gene tree using code from a regular MrBayes move (no tuning parameter, so passing on mvp is OK) |
---|
1340 | Move_NNIClock(param, chain, seed, lnPriorRatio, lnProposalRatio, mvp); |
---|
1341 | |
---|
1342 | // Update the min depth matrix |
---|
1343 | GetMinDepthMatrix(geneTrees, numTopologies-1, depthMatrix); |
---|
1344 | |
---|
1345 | // Copy the min depth matrix |
---|
1346 | for (i=0; i<numUpperTriang; i++) |
---|
1347 | modMinDepths[i] = depthMatrix[i]; |
---|
1348 | |
---|
1349 | // Modify the min depth matrix |
---|
1350 | ModifyDepthMatrix (forwardLambda, modMinDepths, seed); |
---|
1351 | |
---|
1352 | // Get a new species tree |
---|
1353 | GetSpeciesTreeFromMinDepths (newSpeciesTree, modMinDepths); |
---|
1354 | |
---|
1355 | // Calculate joint probability of new gene trees and new species tree |
---|
1356 | newLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, newSpeciesTree, chain); |
---|
1357 | |
---|
1358 | // Get backward lambda |
---|
1359 | GetMeanDist(newSpeciesTree, depthMatrix, &mean); |
---|
1360 | backwardLambda = 1.0 / mean; |
---|
1361 | |
---|
1362 | // Get proposal probability of old species tree |
---|
1363 | backwardLnProposalProb = LnProposalProbSpeciesTree (oldSpeciesTree, oldMinDepths, backwardLambda); |
---|
1364 | |
---|
1365 | // Get proposal probability of new species tree |
---|
1366 | forwardLnProposalProb = LnProposalProbSpeciesTree (newSpeciesTree, depthMatrix, forwardLambda); |
---|
1367 | |
---|
1368 | // Update prior ratio taking species tree into account |
---|
1369 | (*lnPriorRatio) += (newLnProb - oldLnProb); |
---|
1370 | |
---|
1371 | // Update proposal ratio based on this move |
---|
1372 | (*lnProposalRatio) += (backwardLnProposalProb - forwardLnProposalProb); |
---|
1373 | |
---|
1374 | // Free allocated memory |
---|
1375 | free (geneTrees); |
---|
1376 | free (oldMinDepths); |
---|
1377 | |
---|
1378 | return (NO_ERROR); |
---|
1379 | } |
---|
1380 | |
---|
1381 | |
---|
1382 | |
---|
1383 | |
---|
1384 | |
---|
1385 | /**----------------------------------------------------------------- |
---|
1386 | | |
---|
1387 | | Move_GeneTree3: Propose a new gene tree using ParsSPRClock |
---|
1388 | | |
---|
1389 | | @param param The parameter to change |
---|
1390 | | @param chain The chain number |
---|
1391 | | @param seed Pointer to the seed of the random number gen. |
---|
1392 | | @param lnPriorRatio Pointer to the log prior ratio (out) |
---|
1393 | | @param lnProposalRatio Pointer to the log proposal (Hastings) ratio (out) |
---|
1394 | | @param mvp Pointer to tuning parameter(s) |
---|
1395 | ------------------------------------------------------------------*/ |
---|
1396 | int Move_GeneTree3 (Param *param, int chain, SafeLong *seed, MrBFlt *lnPriorRatio, MrBFlt *lnProposalRatio, MrBFlt *mvp) |
---|
1397 | |
---|
1398 | { |
---|
1399 | int i, numGeneTrees, numUpperTriang; |
---|
1400 | double newLnProb, oldLnProb, backwardLnProposalProb, forwardLnProposalProb, |
---|
1401 | *oldMinDepths, *modMinDepths, forwardLambda, backwardLambda, mean; |
---|
1402 | Tree *newSpeciesTree, *oldSpeciesTree, **geneTrees; |
---|
1403 | ModelInfo *m; |
---|
1404 | ModelParams *mp; |
---|
1405 | |
---|
1406 | // Calculate number of gene trees |
---|
1407 | numGeneTrees = numTopologies - 1; |
---|
1408 | |
---|
1409 | // Get model params |
---|
1410 | mp = &modelParams[param->relParts[0]]; |
---|
1411 | |
---|
1412 | // Get model settings |
---|
1413 | m = &modelSettings[param->relParts[0]]; |
---|
1414 | |
---|
1415 | // Get species tree (this trick is possible because we always copy tree params) |
---|
1416 | newSpeciesTree = GetTree (m->speciesTree, chain, state[chain]); |
---|
1417 | oldSpeciesTree = GetTree (m->speciesTree, chain, state[chain] ^ 1); |
---|
1418 | |
---|
1419 | // Get gene trees |
---|
1420 | geneTrees = (Tree **) SafeCalloc (2*numGeneTrees, sizeof(Tree *)); |
---|
1421 | for (i=0; i<m->speciesTree->nSubParams; i++) { |
---|
1422 | geneTrees[i] = GetTree(m->speciesTree->subParams[i], chain, state[chain]); |
---|
1423 | } |
---|
1424 | |
---|
1425 | // Allocate space for depth matrix copy |
---|
1426 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
1427 | oldMinDepths = (double *) SafeCalloc (2*numUpperTriang, sizeof(double)); |
---|
1428 | modMinDepths = oldMinDepths + numUpperTriang; |
---|
1429 | |
---|
1430 | // Get min depth matrix for old gene trees |
---|
1431 | GetMinDepthMatrix(geneTrees, numTopologies-1, depthMatrix); |
---|
1432 | |
---|
1433 | // Save a copy |
---|
1434 | for (i=0; i<numUpperTriang; i++) |
---|
1435 | oldMinDepths[i] = depthMatrix[i]; |
---|
1436 | |
---|
1437 | // Get forward lambda |
---|
1438 | GetMeanDist(oldSpeciesTree, depthMatrix, &mean); |
---|
1439 | forwardLambda = 1.0 / mean; |
---|
1440 | |
---|
1441 | // Calculate joint probability of old gene trees and old species tree |
---|
1442 | oldLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, oldSpeciesTree, chain); |
---|
1443 | |
---|
1444 | // Modify the picked gene tree using code from a regular MrBayes move (no tuning parameter here, so passing on mvp is OK) |
---|
1445 | Move_ParsSPRClock(param, chain, seed, lnPriorRatio, lnProposalRatio, mvp); |
---|
1446 | |
---|
1447 | // Update the min depth matrix |
---|
1448 | GetMinDepthMatrix(geneTrees, numTopologies-1, depthMatrix); |
---|
1449 | |
---|
1450 | // Copy the min depth matrix |
---|
1451 | for (i=0; i<numUpperTriang; i++) |
---|
1452 | modMinDepths[i] = depthMatrix[i]; |
---|
1453 | |
---|
1454 | // Modify the min depth matrix |
---|
1455 | ModifyDepthMatrix (forwardLambda, modMinDepths, seed); |
---|
1456 | |
---|
1457 | // Get a new species tree |
---|
1458 | GetSpeciesTreeFromMinDepths (newSpeciesTree, modMinDepths); |
---|
1459 | |
---|
1460 | // Calculate joint probability of new gene trees and new species tree |
---|
1461 | newLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, newSpeciesTree, chain); |
---|
1462 | |
---|
1463 | // Get backward lambda |
---|
1464 | GetMeanDist(newSpeciesTree, depthMatrix, &mean); |
---|
1465 | backwardLambda = 1.0 / mean; |
---|
1466 | |
---|
1467 | // Get proposal probability of old species tree |
---|
1468 | backwardLnProposalProb = LnProposalProbSpeciesTree (oldSpeciesTree, oldMinDepths, backwardLambda); |
---|
1469 | |
---|
1470 | // Get proposal probability of new species tree |
---|
1471 | forwardLnProposalProb = LnProposalProbSpeciesTree (newSpeciesTree, depthMatrix, forwardLambda); |
---|
1472 | |
---|
1473 | // Update prior ratio taking species tree into account |
---|
1474 | (*lnPriorRatio) += (newLnProb - oldLnProb); |
---|
1475 | |
---|
1476 | // Update proposal ratio based on this move |
---|
1477 | (*lnProposalRatio) += (backwardLnProposalProb - forwardLnProposalProb); |
---|
1478 | |
---|
1479 | // Free allocated memory |
---|
1480 | free (geneTrees); |
---|
1481 | free (oldMinDepths); |
---|
1482 | |
---|
1483 | return (NO_ERROR); |
---|
1484 | } |
---|
1485 | |
---|
1486 | |
---|
1487 | |
---|
1488 | |
---|
1489 | |
---|
1490 | /*----------------------------------------------------------------------------------- |
---|
1491 | | |
---|
1492 | | Move_NodeSliderGeneTree: Move the position of one (root or nonroot) node in a |
---|
1493 | | gene tree inside a species tree. |
---|
1494 | | |
---|
1495 | -------------------------------------------------------------------------------------*/ |
---|
1496 | |
---|
1497 | int Move_NodeSliderGeneTree (Param *param, int chain, SafeLong *seed, MrBFlt *lnPriorRatio, MrBFlt *lnProposalRatio, MrBFlt *mvp) |
---|
1498 | |
---|
1499 | { |
---|
1500 | int i, *nEvents; |
---|
1501 | MrBFlt window, minDepth, maxDepth, oldDepth, newDepth, |
---|
1502 | oldLeftLength=0.0, oldRightLength=0.0, clockRate, |
---|
1503 | oldPLength=0.0, lambda=0.0, nu=0.0, igrvar=0.0, |
---|
1504 | *brlens=NULL, *tk02Rate=NULL, *igrRate=NULL, *popSizePtr; |
---|
1505 | TreeNode *p, *q; |
---|
1506 | ModelParams *mp; |
---|
1507 | ModelInfo *m; |
---|
1508 | Tree *geneTree, *speciesTree; |
---|
1509 | Param *subParm; |
---|
1510 | |
---|
1511 | window = mvp[0]; /* window size */ |
---|
1512 | |
---|
1513 | m = &modelSettings[param->relParts[0]]; |
---|
1514 | mp = &modelParams[param->relParts[0]]; |
---|
1515 | |
---|
1516 | /* get gene tree and species tree */ |
---|
1517 | geneTree = GetTree (param, chain, state[chain]); |
---|
1518 | speciesTree = GetTree (m->speciesTree, chain, state[chain]); |
---|
1519 | |
---|
1520 | /* get population size(s) */ |
---|
1521 | popSizePtr = GetParamVals(m->popSize, chain, state[chain]); |
---|
1522 | |
---|
1523 | /* get clock rate */ |
---|
1524 | if (m->clockRate == NULL) |
---|
1525 | clockRate = 1.0; |
---|
1526 | else |
---|
1527 | clockRate = *GetParamVals(m->clockRate, chain, state[chain]); |
---|
1528 | |
---|
1529 | /* pick a node to be changed */ |
---|
1530 | p = geneTree->intDownPass[(int)(RandomNumber(seed)*geneTree->nIntNodes)]; |
---|
1531 | |
---|
1532 | #if defined (DEBUG_CSLIDER) |
---|
1533 | printf ("Before node slider (gene tree):\n"); |
---|
1534 | printf ("Picked branch with index %d and depth %f\n", p->index, p->nodeDepth); |
---|
1535 | if (p->anc->anc == NULL) |
---|
1536 | printf ("Old clock rate: %f\n", clockRate); |
---|
1537 | ShowNodes (t->root, 0, t->isRooted); |
---|
1538 | getchar(); |
---|
1539 | #endif |
---|
1540 | |
---|
1541 | /* get gene tree prior prob before move */ |
---|
1542 | (*lnPriorRatio) -= LnPriorProbGeneTree(geneTree, clockRate, speciesTree, popSizePtr); |
---|
1543 | |
---|
1544 | /* store values needed later for prior calculation (relaxed clocks) */ |
---|
1545 | oldPLength = p->length; |
---|
1546 | if (p->left != NULL) |
---|
1547 | { |
---|
1548 | oldLeftLength = p->left->length; |
---|
1549 | oldRightLength = p->right->length; |
---|
1550 | } |
---|
1551 | else |
---|
1552 | oldLeftLength = oldRightLength = 0.0; |
---|
1553 | |
---|
1554 | /* find species tree branch to which the gene tree node belongs */ |
---|
1555 | MapGeneTreeToSpeciesTree(geneTree, speciesTree); |
---|
1556 | q = NULL; |
---|
1557 | for (i=0; i<speciesTree->nNodes-1; i++) |
---|
1558 | { |
---|
1559 | q = speciesTree->allDownPass[i]; |
---|
1560 | if (p->x == q->index) |
---|
1561 | break; |
---|
1562 | } |
---|
1563 | assert (q != NULL && p->x == q->index); |
---|
1564 | |
---|
1565 | /* determine lower and upper bound */ |
---|
1566 | minDepth = p->left->nodeDepth + POS_MIN; |
---|
1567 | if (p->right->nodeDepth + POS_MIN > minDepth) |
---|
1568 | minDepth = p->right->nodeDepth + POS_MIN; |
---|
1569 | if (q->nodeDepth + POS_MIN > minDepth) |
---|
1570 | minDepth = q->nodeDepth + POS_MIN; |
---|
1571 | if (p->anc->anc == NULL) |
---|
1572 | maxDepth = TREEHEIGHT_MAX; |
---|
1573 | else |
---|
1574 | maxDepth = p->anc->nodeDepth - POS_MIN; |
---|
1575 | |
---|
1576 | /* abort if impossible */ |
---|
1577 | if (minDepth >= maxDepth) |
---|
1578 | { |
---|
1579 | abortMove = YES; |
---|
1580 | return (NO_ERROR); |
---|
1581 | } |
---|
1582 | |
---|
1583 | if( maxDepth-minDepth < window ) |
---|
1584 | { |
---|
1585 | window = maxDepth-minDepth; |
---|
1586 | } |
---|
1587 | |
---|
1588 | /* pick the new node depth */ |
---|
1589 | oldDepth = p->nodeDepth; |
---|
1590 | newDepth = oldDepth + (RandomNumber (seed) - 0.5) * window; |
---|
1591 | |
---|
1592 | /* reflect the new node depth */ |
---|
1593 | while (newDepth < minDepth || newDepth > maxDepth) |
---|
1594 | { |
---|
1595 | if (newDepth < minDepth) |
---|
1596 | newDepth = 2.0 * minDepth - newDepth; |
---|
1597 | if (newDepth > maxDepth) |
---|
1598 | newDepth = 2.0 * maxDepth - newDepth; |
---|
1599 | } |
---|
1600 | p->nodeDepth = newDepth; |
---|
1601 | |
---|
1602 | /* determine new branch lengths around p and set update of transition probabilities */ |
---|
1603 | if (p->left != NULL) |
---|
1604 | { |
---|
1605 | p->left->length = p->nodeDepth - p->left->nodeDepth; |
---|
1606 | assert (p->left->length >= POS_MIN); |
---|
1607 | p->left->upDateTi = YES; |
---|
1608 | p->right->length = p->nodeDepth - p->right->nodeDepth; |
---|
1609 | assert (p->right->length >= POS_MIN); |
---|
1610 | p->right->upDateTi = YES; |
---|
1611 | } |
---|
1612 | if (p->anc->anc != NULL) |
---|
1613 | { |
---|
1614 | p->length = p->anc->nodeDepth - p->nodeDepth; |
---|
1615 | assert (p->length >= POS_MIN); |
---|
1616 | p->upDateTi = YES; |
---|
1617 | } |
---|
1618 | |
---|
1619 | /* set flags for update of cond likes from p and down to root */ |
---|
1620 | q = p; |
---|
1621 | while (q->anc != NULL) |
---|
1622 | { |
---|
1623 | q->upDateCl = YES; |
---|
1624 | q = q->anc; |
---|
1625 | } |
---|
1626 | |
---|
1627 | /* calculate proposal ratio */ |
---|
1628 | (*lnProposalRatio) = 0.0; |
---|
1629 | |
---|
1630 | /* calculate prior ratio */ |
---|
1631 | (*lnPriorRatio) += LnPriorProbGeneTree (geneTree, clockRate, speciesTree, popSizePtr); |
---|
1632 | |
---|
1633 | /* adjust proposal and prior ratio for relaxed clock models */ |
---|
1634 | for (i=0; i<param->nSubParams; i++) |
---|
1635 | { |
---|
1636 | subParm = param->subParams[i]; |
---|
1637 | if (subParm->paramType == P_CPPEVENTS) |
---|
1638 | { |
---|
1639 | nEvents = subParm->nEvents[2*chain+state[chain]]; |
---|
1640 | lambda = *GetParamVals (modelSettings[subParm->relParts[0]].cppRate, chain, state[chain]); |
---|
1641 | |
---|
1642 | /* proposal ratio */ |
---|
1643 | if (p->left != NULL) |
---|
1644 | { |
---|
1645 | (*lnProposalRatio) += nEvents[p->left->index ] * log (p->left->length / oldLeftLength); |
---|
1646 | (*lnProposalRatio) += nEvents[p->right->index] * log (p->right->length / oldRightLength); |
---|
1647 | } |
---|
1648 | if (p->anc->anc != NULL) |
---|
1649 | (*lnProposalRatio) += nEvents[p->index] * log (p->length / oldPLength); |
---|
1650 | |
---|
1651 | /* prior ratio */ |
---|
1652 | if (p->anc->anc == NULL) // two branches changed in same direction |
---|
1653 | (*lnPriorRatio) += lambda * (2.0 * (oldDepth - newDepth)); |
---|
1654 | else if (p->left != NULL) // two branches changed in one direction, one branch in the other direction |
---|
1655 | (*lnPriorRatio) += lambda * (oldDepth - newDepth); |
---|
1656 | else /* if (p->left == NULL) */ // one branch changed |
---|
1657 | (*lnPriorRatio) += lambda * (newDepth - oldDepth); |
---|
1658 | |
---|
1659 | /* update effective evolutionary lengths */ |
---|
1660 | if (UpdateCppEvolLengths (subParm, p, chain) == ERROR) |
---|
1661 | { |
---|
1662 | abortMove = YES; |
---|
1663 | return (NO_ERROR); |
---|
1664 | } |
---|
1665 | } |
---|
1666 | else if (subParm->paramType == P_TK02BRANCHRATES) |
---|
1667 | { |
---|
1668 | nu = *GetParamVals (modelSettings[subParm->relParts[0]].tk02var, chain, state[chain]); |
---|
1669 | tk02Rate = GetParamVals (subParm, chain, state[chain]); |
---|
1670 | brlens = GetParamSubVals (subParm, chain, state[chain]); |
---|
1671 | |
---|
1672 | /* no proposal ratio effect */ |
---|
1673 | |
---|
1674 | /* prior ratio */ |
---|
1675 | if (p->left != NULL) |
---|
1676 | { |
---|
1677 | (*lnPriorRatio) -= LnProbTK02LogNormal (tk02Rate[p->index], nu*oldLeftLength, tk02Rate[p->left->index]); |
---|
1678 | (*lnPriorRatio) -= LnProbTK02LogNormal (tk02Rate[p->index], nu*oldRightLength, tk02Rate[p->right->index]); |
---|
1679 | (*lnPriorRatio) += LnProbTK02LogNormal (tk02Rate[p->index], nu*p->left->length, tk02Rate[p->left->index]); |
---|
1680 | (*lnPriorRatio) += LnProbTK02LogNormal (tk02Rate[p->index], nu*p->right->length, tk02Rate[p->right->index]); |
---|
1681 | } |
---|
1682 | if (p->anc->anc != NULL) |
---|
1683 | { |
---|
1684 | (*lnPriorRatio) -= LnProbTK02LogNormal (tk02Rate[p->anc->index], nu*oldPLength, tk02Rate[p->index]); |
---|
1685 | (*lnPriorRatio) += LnProbTK02LogNormal (tk02Rate[p->anc->index], nu*p->length, tk02Rate[p->index]); |
---|
1686 | } |
---|
1687 | |
---|
1688 | /* update effective evolutionary lengths */ |
---|
1689 | if (p->left != NULL) |
---|
1690 | { |
---|
1691 | brlens[p->left->index] = p->left->length * (tk02Rate[p->left->index]+tk02Rate[p->index])/2.0; |
---|
1692 | brlens[p->right->index] = p->right->length * (tk02Rate[p->right->index]+tk02Rate[p->index])/2.0; |
---|
1693 | } |
---|
1694 | if (p->anc->anc != NULL) |
---|
1695 | brlens[p->index] = p->length * (tk02Rate[p->index]+tk02Rate[p->anc->index])/2.0; |
---|
1696 | } |
---|
1697 | else if (subParm->paramType == P_IGRBRANCHLENS) |
---|
1698 | { |
---|
1699 | igrvar = *GetParamVals (modelSettings[subParm->relParts[0]].igrvar, chain, state[chain]); |
---|
1700 | igrRate = GetParamVals (subParm, chain, state[chain]); |
---|
1701 | brlens = GetParamSubVals (subParm, chain, state[chain]); |
---|
1702 | |
---|
1703 | if (p->left != NULL) |
---|
1704 | { |
---|
1705 | (*lnPriorRatio) -= LnProbGamma (oldLeftLength /igrvar, 1.0/igrvar, brlens[p->left->index ]); |
---|
1706 | (*lnPriorRatio) -= LnProbGamma (oldRightLength /igrvar, 1.0/igrvar, brlens[p->right->index]); |
---|
1707 | } |
---|
1708 | if (p->anc->anc != NULL) |
---|
1709 | (*lnPriorRatio) -= LnProbGamma (oldPLength/igrvar, 1.0/igrvar, brlens[p->index]); |
---|
1710 | |
---|
1711 | if (p->left != NULL) |
---|
1712 | { |
---|
1713 | brlens[p->left->index ] = igrRate[p->left->index ] * p->left->length; |
---|
1714 | brlens[p->right->index] = igrRate[p->right->index] * p->right->length; |
---|
1715 | if (brlens[p->left->index] <= 0.0 || brlens[p->right->index] <= 0.0) |
---|
1716 | { |
---|
1717 | abortMove = YES; |
---|
1718 | return (NO_ERROR); |
---|
1719 | } |
---|
1720 | (*lnProposalRatio) += log(p->left->length / oldLeftLength); |
---|
1721 | (*lnProposalRatio) += log(p->right->length / oldRightLength); |
---|
1722 | } |
---|
1723 | if (p->anc->anc != NULL) |
---|
1724 | { |
---|
1725 | brlens[p->index] = igrRate[p->index] * p->length; |
---|
1726 | if (brlens[p->index] <= 0.0) |
---|
1727 | { |
---|
1728 | abortMove = YES; |
---|
1729 | return (NO_ERROR); |
---|
1730 | } |
---|
1731 | (*lnProposalRatio) += log(p->length / oldPLength); |
---|
1732 | } |
---|
1733 | |
---|
1734 | if (p->left != NULL) |
---|
1735 | { |
---|
1736 | (*lnPriorRatio) += LnProbGamma (p->left->length /igrvar, 1.0/igrvar, brlens[p->left->index ]); |
---|
1737 | (*lnPriorRatio) += LnProbGamma (p->right->length/igrvar, 1.0/igrvar, brlens[p->right->index]); |
---|
1738 | } |
---|
1739 | if (p->anc->anc != NULL) |
---|
1740 | (*lnPriorRatio) += LnProbGamma (p->length /igrvar, 1.0/igrvar, brlens[p->index]); |
---|
1741 | } |
---|
1742 | } |
---|
1743 | |
---|
1744 | #if defined (DEBUG_CSLIDER) |
---|
1745 | printf ("After node slider (gene tree):\n"); |
---|
1746 | printf ("Old depth: %f -- New depth: %f -- LnPriorRatio %f -- LnProposalRatio %f\n", |
---|
1747 | oldDepth, newDepth, (*lnPriorRatio), (*lnProposalRatio)); |
---|
1748 | ShowNodes (t->root, 0, t->isRooted); |
---|
1749 | getchar(); |
---|
1750 | #endif |
---|
1751 | |
---|
1752 | return (NO_ERROR); |
---|
1753 | |
---|
1754 | } |
---|
1755 | |
---|
1756 | |
---|
1757 | |
---|
1758 | |
---|
1759 | |
---|
1760 | /*------------------------------------------------------------------ |
---|
1761 | | |
---|
1762 | | Move_SpeciesTree: Propose a new species tree |
---|
1763 | | |
---|
1764 | ------------------------------------------------------------------*/ |
---|
1765 | int Move_SpeciesTree (Param *param, int chain, SafeLong *seed, MrBFlt *lnPriorRatio, MrBFlt *lnProposalRatio, MrBFlt *mvp) |
---|
1766 | { |
---|
1767 | int i, numGeneTrees, numUpperTriang; |
---|
1768 | double newLnProb, oldLnProb, backwardLnProposalProb, forwardLnProposalProb, *modMinDepths, |
---|
1769 | forwardLambda, backwardLambda, lambdaDiv, mean; |
---|
1770 | Tree *newSpeciesTree, *oldSpeciesTree, **geneTrees; |
---|
1771 | ModelInfo *m; |
---|
1772 | ModelParams *mp; |
---|
1773 | |
---|
1774 | /* get tuning parameter (lambda divider) */ |
---|
1775 | lambdaDiv = mvp[0]; |
---|
1776 | |
---|
1777 | /* calculate number of gene trees */ |
---|
1778 | numGeneTrees = param->nSubParams; |
---|
1779 | |
---|
1780 | /* get model params */ |
---|
1781 | mp = &modelParams[param->relParts[0]]; |
---|
1782 | |
---|
1783 | /* get model settings */ |
---|
1784 | m = &modelSettings[param->relParts[0]]; |
---|
1785 | |
---|
1786 | /* get new and old species trees */ |
---|
1787 | newSpeciesTree = GetTree (m->speciesTree, chain, state[chain]); |
---|
1788 | oldSpeciesTree = GetTree (m->speciesTree, chain, state[chain] ^ 1); |
---|
1789 | |
---|
1790 | /* get gene trees */ |
---|
1791 | geneTrees = (Tree **) SafeCalloc (numGeneTrees, sizeof(Tree*)); |
---|
1792 | for (i=0; i<param->nSubParams; i++) |
---|
1793 | geneTrees[i] = GetTree(param->subParams[i], chain, state[chain]); |
---|
1794 | |
---|
1795 | /* get minimum depth matrix */ |
---|
1796 | GetMinDepthMatrix(geneTrees, numGeneTrees, depthMatrix); |
---|
1797 | |
---|
1798 | /* get forward lambda */ |
---|
1799 | GetMeanDist(oldSpeciesTree, depthMatrix, &mean); |
---|
1800 | forwardLambda = 1.0 / (mean * lambdaDiv); |
---|
1801 | |
---|
1802 | /* make a copy for modification */ |
---|
1803 | numUpperTriang = numSpecies * (numSpecies - 1) / 2; |
---|
1804 | modMinDepths = (double *) SafeCalloc (numUpperTriang, sizeof(double)); |
---|
1805 | for (i=0; i<numUpperTriang; i++) |
---|
1806 | modMinDepths[i] = depthMatrix[i]; |
---|
1807 | |
---|
1808 | /* modify minimum depth matrix */ |
---|
1809 | ModifyDepthMatrix (forwardLambda, modMinDepths, seed); |
---|
1810 | |
---|
1811 | /* construct a new species tree from the modified constraints */ |
---|
1812 | GetSpeciesTreeFromMinDepths(newSpeciesTree, modMinDepths); |
---|
1813 | |
---|
1814 | /* get lambda for back move */ |
---|
1815 | GetMeanDist(newSpeciesTree, depthMatrix, &mean); |
---|
1816 | backwardLambda = 1.0 / (mean * lambdaDiv); |
---|
1817 | |
---|
1818 | /* calculate proposal ratio */ |
---|
1819 | backwardLnProposalProb = LnProposalProbSpeciesTree (oldSpeciesTree, depthMatrix, backwardLambda); |
---|
1820 | forwardLnProposalProb = LnProposalProbSpeciesTree (newSpeciesTree, depthMatrix, forwardLambda ); |
---|
1821 | (*lnProposalRatio) = backwardLnProposalProb - forwardLnProposalProb; |
---|
1822 | |
---|
1823 | #if defined (BEST_MPI_ENABLED) |
---|
1824 | // Broadcast the proposed species tree to all processors if MPI version |
---|
1825 | #endif |
---|
1826 | |
---|
1827 | #if defined (BEST_MPI_ENABLED) |
---|
1828 | // Let each processor calculate the ln probability ratio of its current gene tree(s) |
---|
1829 | // given the new and old species tree in the MPI version |
---|
1830 | |
---|
1831 | // Assemble the ln probability ratios across the processors and to lnPriorRatio |
---|
1832 | #else |
---|
1833 | /* calculate the ln probability ratio of the current gene trees |
---|
1834 | given the new and old species trees */ |
---|
1835 | newLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, newSpeciesTree, chain); |
---|
1836 | oldLnProb = LnJointGeneTreeSpeciesTreePr(geneTrees, numGeneTrees, oldSpeciesTree, chain); |
---|
1837 | #endif |
---|
1838 | |
---|
1839 | /* set (*lnPriorRatio) to ln probability ratio */ |
---|
1840 | (*lnPriorRatio) = (newLnProb - oldLnProb); |
---|
1841 | |
---|
1842 | /* free allocated space */ |
---|
1843 | free (modMinDepths); |
---|
1844 | free (geneTrees); |
---|
1845 | |
---|
1846 | return (NO_ERROR); |
---|
1847 | } |
---|
1848 | |
---|
1849 | |
---|
1850 | |
---|
1851 | |
---|
1852 | |
---|
1853 | /** Show upper triangular matrix */ |
---|
1854 | void ShowUpperTriangMatrix (double *values, int squareSize) |
---|
1855 | { |
---|
1856 | int i, j, index; |
---|
1857 | |
---|
1858 | index = 0; |
---|
1859 | printf ("Upper triang matrix:\n"); |
---|
1860 | for(i=0; i<squareSize; i++) { |
---|
1861 | for (j=0; j<i; j++) |
---|
1862 | printf(" "); |
---|
1863 | for(j=i+1; j<squareSize; j++) { |
---|
1864 | printf("%.6f ", values[index]); |
---|
1865 | index++; |
---|
1866 | } |
---|
1867 | printf("\n"); |
---|
1868 | } |
---|
1869 | printf("\n"); |
---|
1870 | } |
---|
1871 | |
---|
1872 | |
---|
1873 | |
---|
1874 | |
---|
1875 | |
---|