| 1 | /* |
|---|
| 2 | |
|---|
| 3 | PhyML: a program that computes maximum likelihood phylogenies from |
|---|
| 4 | DNA or AA homologous sequences. |
|---|
| 5 | |
|---|
| 6 | Copyright (C) Stephane Guindon. Oct 2003 onward. |
|---|
| 7 | |
|---|
| 8 | All parts of the source except where indicated are distributed under |
|---|
| 9 | the GNU public licence. See http://www.opensource.org for details. |
|---|
| 10 | |
|---|
| 11 | */ |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | #include "geo.h" |
|---|
| 15 | |
|---|
| 16 | ////////////////////////////////////////////////////////////// |
|---|
| 17 | ////////////////////////////////////////////////////////////// |
|---|
| 18 | |
|---|
| 19 | int GEO_Main(int argc, char **argv) |
|---|
| 20 | { |
|---|
| 21 | GEO_Simulate_Estimate(argc,argv); |
|---|
| 22 | /* GEO_Estimate(argc,argv); */ |
|---|
| 23 | return(1); |
|---|
| 24 | } |
|---|
| 25 | |
|---|
| 26 | ////////////////////////////////////////////////////////////// |
|---|
| 27 | ////////////////////////////////////////////////////////////// |
|---|
| 28 | |
|---|
| 29 | int GEO_Estimate(int argc, char **argv) |
|---|
| 30 | { |
|---|
| 31 | t_geo *t; |
|---|
| 32 | int seed; |
|---|
| 33 | FILE *fp; |
|---|
| 34 | t_tree *tree; |
|---|
| 35 | phydbl *ldscp; |
|---|
| 36 | int *loc_hash; |
|---|
| 37 | int i; |
|---|
| 38 | phydbl *probs; |
|---|
| 39 | phydbl sum; |
|---|
| 40 | |
|---|
| 41 | // geo ./ban |
|---|
| 42 | |
|---|
| 43 | |
|---|
| 44 | seed = getpid(); |
|---|
| 45 | /* seed = 28224; */ |
|---|
| 46 | /* seed = 21249; */ |
|---|
| 47 | /* seed = 21596; */ |
|---|
| 48 | |
|---|
| 49 | printf("\n. Seed = %d",seed); |
|---|
| 50 | srand(seed); |
|---|
| 51 | |
|---|
| 52 | t = GEO_Make_Geo_Basic(); |
|---|
| 53 | GEO_Init_Geo_Struct(t); |
|---|
| 54 | |
|---|
| 55 | fp = Openfile(argv[1],0); /* Open tree file */ |
|---|
| 56 | |
|---|
| 57 | tree = Read_Tree_File_Phylip(fp); /* Read it */ |
|---|
| 58 | Update_Ancestors(tree->n_root,tree->n_root->v[2],tree); |
|---|
| 59 | Update_Ancestors(tree->n_root,tree->n_root->v[1],tree); |
|---|
| 60 | tree->rates = RATES_Make_Rate_Struct(tree->n_otu); |
|---|
| 61 | RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu); |
|---|
| 62 | Branch_To_Time(tree); |
|---|
| 63 | tree->geo = t; |
|---|
| 64 | |
|---|
| 65 | GEO_Read_In_Landscape(argv[2],t,&ldscp,&loc_hash,tree); |
|---|
| 66 | |
|---|
| 67 | GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,tree->n_otu,t); |
|---|
| 68 | |
|---|
| 69 | For(i,t->ldscape_sz*t->n_dim) t->ldscape[i] = ldscp[i]; |
|---|
| 70 | For(i,tree->n_otu) t->loc[i] = loc_hash[i]; |
|---|
| 71 | |
|---|
| 72 | t->cov[0*t->n_dim+0] = t->sigma; |
|---|
| 73 | t->cov[1*t->n_dim+1] = t->sigma; |
|---|
| 74 | t->cov[0*t->n_dim+1] = 0.0; |
|---|
| 75 | t->cov[1*t->n_dim+0] = 0.0; |
|---|
| 76 | |
|---|
| 77 | GEO_Get_Sigma_Max(t); |
|---|
| 78 | |
|---|
| 79 | t->max_sigma = t->sigma_thresh * 2.; |
|---|
| 80 | |
|---|
| 81 | PhyML_Printf("\n. Sigma max: %f",t->sigma_thresh); |
|---|
| 82 | |
|---|
| 83 | GEO_Get_Locations_Beneath(t,tree); |
|---|
| 84 | |
|---|
| 85 | probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl)); |
|---|
| 86 | |
|---|
| 87 | sum = 0.0; |
|---|
| 88 | For(i,tree->geo->ldscape_sz) sum += tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]; |
|---|
| 89 | For(i,tree->geo->ldscape_sz) probs[i] = tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum; |
|---|
| 90 | tree->geo->loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz); |
|---|
| 91 | Free(probs); |
|---|
| 92 | |
|---|
| 93 | GEO_Randomize_Locations(tree->n_root,t,tree); |
|---|
| 94 | |
|---|
| 95 | |
|---|
| 96 | GEO_Update_Occup(t,tree); |
|---|
| 97 | GEO_Lk(t,tree); |
|---|
| 98 | |
|---|
| 99 | PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL); |
|---|
| 100 | |
|---|
| 101 | DR_Draw_Tree("essai.ps",tree); |
|---|
| 102 | |
|---|
| 103 | GEO_MCMC(tree); |
|---|
| 104 | |
|---|
| 105 | fclose(fp); |
|---|
| 106 | Free(ldscp); |
|---|
| 107 | Free(loc_hash); |
|---|
| 108 | |
|---|
| 109 | return(1); |
|---|
| 110 | } |
|---|
| 111 | |
|---|
| 112 | ////////////////////////////////////////////////////////////// |
|---|
| 113 | ////////////////////////////////////////////////////////////// |
|---|
| 114 | |
|---|
| 115 | int GEO_Simulate_Estimate(int argc, char **argv) |
|---|
| 116 | { |
|---|
| 117 | t_geo *t; |
|---|
| 118 | int n_tax; |
|---|
| 119 | t_tree *tree,*ori_tree; |
|---|
| 120 | int seed; |
|---|
| 121 | phydbl *res; |
|---|
| 122 | FILE *fp; |
|---|
| 123 | int pid; |
|---|
| 124 | char *s; |
|---|
| 125 | int rand_loc; |
|---|
| 126 | phydbl *probs,sum; |
|---|
| 127 | int i; |
|---|
| 128 | phydbl mantel_p_val; |
|---|
| 129 | phydbl rf; |
|---|
| 130 | |
|---|
| 131 | s = (char *)mCalloc(T_MAX_NAME,sizeof(char)); |
|---|
| 132 | |
|---|
| 133 | strcpy(s,"geo.out"); |
|---|
| 134 | pid = getpid(); |
|---|
| 135 | sprintf(s+strlen(s),".%d",pid); |
|---|
| 136 | |
|---|
| 137 | fp = fopen(s,"w"); |
|---|
| 138 | |
|---|
| 139 | seed = getpid(); |
|---|
| 140 | /* seed = 15520; */ |
|---|
| 141 | seed = 5023; |
|---|
| 142 | printf("\n. Seed = %d",seed); |
|---|
| 143 | srand(seed); |
|---|
| 144 | |
|---|
| 145 | t = GEO_Make_Geo_Basic(); |
|---|
| 146 | GEO_Init_Geo_Struct(t); |
|---|
| 147 | |
|---|
| 148 | |
|---|
| 149 | /* t->tau = 3.0; */ |
|---|
| 150 | /* t->lbda = 0.02; */ |
|---|
| 151 | /* t->sigma = 10.; */ |
|---|
| 152 | |
|---|
| 153 | |
|---|
| 154 | t->ldscape_sz = (int)atoi(argv[1]); |
|---|
| 155 | t->n_dim = 2; |
|---|
| 156 | n_tax = (int)atoi(argv[2]); |
|---|
| 157 | |
|---|
| 158 | |
|---|
| 159 | GEO_Make_Geo_Complete(t->ldscape_sz,t->n_dim,n_tax,t); |
|---|
| 160 | |
|---|
| 161 | |
|---|
| 162 | t->cov[0*t->n_dim+0] = t->sigma; |
|---|
| 163 | t->cov[1*t->n_dim+1] = t->sigma; |
|---|
| 164 | t->cov[0*t->n_dim+1] = 0.0; |
|---|
| 165 | t->cov[1*t->n_dim+0] = 0.0; |
|---|
| 166 | |
|---|
| 167 | GEO_Simulate_Coordinates(t->ldscape_sz,t); |
|---|
| 168 | |
|---|
| 169 | GEO_Get_Sigma_Max(t); |
|---|
| 170 | |
|---|
| 171 | t->max_sigma = t->sigma_thresh * 2.; |
|---|
| 172 | |
|---|
| 173 | PhyML_Printf("\n. sigma max: %f threshold: %f",t->max_sigma,t->sigma_thresh); |
|---|
| 174 | |
|---|
| 175 | t->tau = Uni()*(t->max_tau/100.-t->min_tau*10.) + t->min_tau*10.; |
|---|
| 176 | t->lbda = EXP(Uni()*(LOG(t->max_lbda/100.)-LOG(t->min_lbda*10.)) + LOG(t->min_lbda*10.)); |
|---|
| 177 | t->sigma = Uni()*(t->max_sigma-t->min_sigma) + t->min_sigma; |
|---|
| 178 | |
|---|
| 179 | |
|---|
| 180 | PhyML_Fprintf(fp,"\n# SigmaTrue\t SigmaThresh\t LbdaTrue\t TauTrue\txTrue\t yTrue\t xRand\t yRand\t RF\t Sigma5\t Sigma50\t Sigma95\t ProbSigmaInfThresh\t Lbda5\t Lbda50\t Lbda95\t ProbLbdaInf1\t Tau5\t Tau50\t Tau95\t X5\t X50\t X95\t Y5\t Y50\t Y95\t RandX5\t RandX50\t RandX95\t RandY5\t RandY50\t RandY95\t Mantel\t"); |
|---|
| 181 | PhyML_Fprintf(fp,"\n"); |
|---|
| 182 | |
|---|
| 183 | tree = GEO_Simulate(t,n_tax); |
|---|
| 184 | |
|---|
| 185 | ori_tree = Make_Tree_From_Scratch(tree->n_otu,NULL); |
|---|
| 186 | Copy_Tree(tree,ori_tree); |
|---|
| 187 | |
|---|
| 188 | Random_SPRs_On_Rooted_Tree(tree); |
|---|
| 189 | |
|---|
| 190 | Free_Bip(ori_tree); |
|---|
| 191 | Alloc_Bip(ori_tree); |
|---|
| 192 | Get_Bip(ori_tree->a_nodes[0],ori_tree->a_nodes[0]->v[0],ori_tree); |
|---|
| 193 | |
|---|
| 194 | Free_Bip(tree); |
|---|
| 195 | Alloc_Bip(tree); |
|---|
| 196 | Get_Bip(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree); |
|---|
| 197 | Match_Tip_Numbers(tree,ori_tree); |
|---|
| 198 | |
|---|
| 199 | rf = (phydbl)Compare_Bip(ori_tree,tree,NO)/(tree->n_otu-3); |
|---|
| 200 | PhyML_Printf("\n. rf: %f",rf); |
|---|
| 201 | |
|---|
| 202 | phydbl scale; |
|---|
| 203 | scale = EXP(Rnorm(0,0.2)); |
|---|
| 204 | PhyML_Printf("\n. Scale: %f",scale); |
|---|
| 205 | For(i,2*tree->n_otu-1) tree->rates->nd_t[i] *= scale; |
|---|
| 206 | |
|---|
| 207 | |
|---|
| 208 | phydbl *tree_dist,*geo_dist; |
|---|
| 209 | int j; |
|---|
| 210 | |
|---|
| 211 | Time_To_Branch(tree); |
|---|
| 212 | tree_dist = Dist_Btw_Tips(tree); |
|---|
| 213 | |
|---|
| 214 | geo_dist = (phydbl *)mCalloc(tree->n_otu*tree->n_otu,sizeof(phydbl)); |
|---|
| 215 | |
|---|
| 216 | For(i,tree->n_otu) |
|---|
| 217 | { |
|---|
| 218 | For(j,tree->n_otu) |
|---|
| 219 | { |
|---|
| 220 | geo_dist[i*tree->n_otu+j] = |
|---|
| 221 | FABS(t->ldscape[t->loc[i]*t->n_dim+0] - |
|---|
| 222 | t->ldscape[t->loc[j]*t->n_dim+0])+ |
|---|
| 223 | FABS(t->ldscape[t->loc[i]*t->n_dim+1] - |
|---|
| 224 | t->ldscape[t->loc[j]*t->n_dim+1]); |
|---|
| 225 | |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | |
|---|
| 229 | printf("\n. tau: %f lbda: %f sigma: %f",t->tau,t->lbda,t->sigma); |
|---|
| 230 | mantel_p_val = Mantel(tree_dist,geo_dist,tree->n_otu,tree->n_otu); |
|---|
| 231 | printf("\n. Mantel test p-value: %f",mantel_p_val); |
|---|
| 232 | |
|---|
| 233 | Free(tree_dist); |
|---|
| 234 | Free(geo_dist); |
|---|
| 235 | |
|---|
| 236 | rand_loc = Rand_Int(0,t->ldscape_sz-1); |
|---|
| 237 | |
|---|
| 238 | PhyML_Printf("\n. sigma: %f\t lbda: %f\t tau:%f\t x:%f\t y:%f rand.x:%f\t rand.y:%f\t", |
|---|
| 239 | t->sigma, |
|---|
| 240 | t->lbda, |
|---|
| 241 | t->tau, |
|---|
| 242 | t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0], |
|---|
| 243 | t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1], |
|---|
| 244 | t->ldscape[rand_loc*t->n_dim+0], |
|---|
| 245 | t->ldscape[rand_loc*t->n_dim+1]); |
|---|
| 246 | |
|---|
| 247 | PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t", |
|---|
| 248 | t->sigma, |
|---|
| 249 | t->sigma_thresh, |
|---|
| 250 | t->lbda, |
|---|
| 251 | t->tau, |
|---|
| 252 | t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0], |
|---|
| 253 | t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1], |
|---|
| 254 | t->ldscape[rand_loc*t->n_dim+0], |
|---|
| 255 | t->ldscape[rand_loc*t->n_dim+1], |
|---|
| 256 | rf); |
|---|
| 257 | |
|---|
| 258 | GEO_Get_Locations_Beneath(t,tree); |
|---|
| 259 | |
|---|
| 260 | probs = (phydbl *)mCalloc(tree->geo->ldscape_sz,sizeof(phydbl)); |
|---|
| 261 | |
|---|
| 262 | sum = 0.0; |
|---|
| 263 | For(i,tree->geo->ldscape_sz) sum += tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]; |
|---|
| 264 | For(i,tree->geo->ldscape_sz) probs[i] = tree->geo->loc_beneath[tree->n_root->num * tree->geo->ldscape_sz + i]/sum; |
|---|
| 265 | tree->geo->loc[tree->n_root->num] = Sample_i_With_Proba_pi(probs,tree->geo->ldscape_sz); |
|---|
| 266 | Free(probs); |
|---|
| 267 | GEO_Randomize_Locations(tree->n_root,tree->geo,tree); |
|---|
| 268 | |
|---|
| 269 | GEO_Update_Occup(t,tree); |
|---|
| 270 | GEO_Lk(t,tree); |
|---|
| 271 | PhyML_Printf("\n. Init loglk: %f",tree->geo->c_lnL); |
|---|
| 272 | |
|---|
| 273 | |
|---|
| 274 | res = GEO_MCMC(tree); |
|---|
| 275 | |
|---|
| 276 | PhyML_Fprintf(fp,"%f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f\t %f \n", |
|---|
| 277 | |
|---|
| 278 | /* Sigma5 */ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 279 | /* Sigma50*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 280 | /* Sigma95*/ Quantile(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 281 | |
|---|
| 282 | /* ProbInfThesh */ Prob(res+0*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,t->sigma_thresh), |
|---|
| 283 | |
|---|
| 284 | /* Lbda5 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 285 | /* Lbda50 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 286 | /* Lbda95 */ Quantile(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 287 | |
|---|
| 288 | /* ProbInf1 */ Prob(res+1*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval,1.0), |
|---|
| 289 | |
|---|
| 290 | /* Tau5 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 291 | /* Tau50 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 292 | /* Tau 95 */ Quantile(res+2*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 293 | |
|---|
| 294 | /* X5 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 295 | /* X50 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 296 | /* X95 */ Quantile(res+4*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 297 | |
|---|
| 298 | /* Y5 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 299 | /* Y50 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 300 | /* Y95 */ Quantile(res+5*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 301 | |
|---|
| 302 | /* RandX5 */ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 303 | /* RandX50*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 304 | /* RandX95*/ Quantile(res+6*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 305 | |
|---|
| 306 | /* RandY5 */ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.025), |
|---|
| 307 | /* RandY50*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.50), |
|---|
| 308 | /* RandY95*/ Quantile(res+7*tree->mcmc->chain_len / tree->mcmc->sample_interval,tree->mcmc->run / tree->mcmc->sample_interval+1,0.975), |
|---|
| 309 | |
|---|
| 310 | mantel_p_val |
|---|
| 311 | ); |
|---|
| 312 | fflush(NULL); |
|---|
| 313 | |
|---|
| 314 | Free(s); |
|---|
| 315 | Free(res); |
|---|
| 316 | |
|---|
| 317 | fclose(fp); |
|---|
| 318 | |
|---|
| 319 | return 1; |
|---|
| 320 | } |
|---|
| 321 | |
|---|
| 322 | ////////////////////////////////////////////////////////////// |
|---|
| 323 | ////////////////////////////////////////////////////////////// |
|---|
| 324 | |
|---|
| 325 | phydbl *GEO_MCMC(t_tree *tree) |
|---|
| 326 | { |
|---|
| 327 | phydbl *res; |
|---|
| 328 | int n_vars; |
|---|
| 329 | int rand_loc; |
|---|
| 330 | t_geo *t; |
|---|
| 331 | |
|---|
| 332 | t = tree->geo; |
|---|
| 333 | |
|---|
| 334 | tree->mcmc = MCMC_Make_MCMC_Struct(); |
|---|
| 335 | MCMC_Complete_MCMC(tree->mcmc,tree); |
|---|
| 336 | |
|---|
| 337 | tree->mcmc->io = NULL; |
|---|
| 338 | tree->mcmc->is = NO; |
|---|
| 339 | tree->mcmc->use_data = YES; |
|---|
| 340 | tree->mcmc->run = 0; |
|---|
| 341 | tree->mcmc->sample_interval = 1E+3; |
|---|
| 342 | tree->mcmc->chain_len = 1E+6; |
|---|
| 343 | tree->mcmc->chain_len_burnin = 1E+5; |
|---|
| 344 | tree->mcmc->randomize = YES; |
|---|
| 345 | tree->mcmc->norm_freq = 1E+3; |
|---|
| 346 | tree->mcmc->max_tune = 1.E+20; |
|---|
| 347 | tree->mcmc->min_tune = 1.E-10; |
|---|
| 348 | tree->mcmc->print_every = 2; |
|---|
| 349 | tree->mcmc->is_burnin = NO; |
|---|
| 350 | tree->mcmc->nd_t_digits = 1; |
|---|
| 351 | |
|---|
| 352 | t->tau = 1.0; |
|---|
| 353 | t->lbda = 1.0; |
|---|
| 354 | t->sigma = 1.0; |
|---|
| 355 | |
|---|
| 356 | tree->mcmc->chain_len = 1.E+8; |
|---|
| 357 | tree->mcmc->sample_interval = 50; |
|---|
| 358 | |
|---|
| 359 | MCMC_Complete_MCMC(tree->mcmc,tree); |
|---|
| 360 | |
|---|
| 361 | GEO_Update_Occup(t,tree); |
|---|
| 362 | GEO_Lk(t,tree); |
|---|
| 363 | |
|---|
| 364 | PhyML_Printf("\n. Init loglk: %f",t->c_lnL); |
|---|
| 365 | |
|---|
| 366 | tree->mcmc->start_ess[tree->mcmc->num_move_geo_sigma] = YES; |
|---|
| 367 | tree->mcmc->start_ess[tree->mcmc->num_move_geo_lambda] = YES; |
|---|
| 368 | tree->mcmc->start_ess[tree->mcmc->num_move_geo_tau] = YES; |
|---|
| 369 | |
|---|
| 370 | n_vars = 10; |
|---|
| 371 | res = (phydbl *)mCalloc(tree->mcmc->chain_len / tree->mcmc->sample_interval * n_vars,sizeof(phydbl)); |
|---|
| 372 | |
|---|
| 373 | |
|---|
| 374 | tree->mcmc->run = 0; |
|---|
| 375 | do |
|---|
| 376 | { |
|---|
| 377 | MCMC_Geo_Lbda(tree); |
|---|
| 378 | MCMC_Geo_Tau(tree); |
|---|
| 379 | MCMC_Geo_Loc(tree); |
|---|
| 380 | |
|---|
| 381 | t->update_fmat = YES; |
|---|
| 382 | MCMC_Geo_Sigma(tree); |
|---|
| 383 | t->update_fmat = NO; |
|---|
| 384 | |
|---|
| 385 | |
|---|
| 386 | /* t->update_fmat = YES; */ |
|---|
| 387 | /* MCMC_Geo_Updown_Lbda_Sigma(tree); */ |
|---|
| 388 | /* t->update_fmat = NO; */ |
|---|
| 389 | |
|---|
| 390 | |
|---|
| 391 | /* MCMC_Geo_Updown_Tau_Lbda(tree); */ |
|---|
| 392 | /* MCMC_Geo_Updown_Tau_Lbda(tree); */ |
|---|
| 393 | /* MCMC_Geo_Updown_Tau_Lbda(tree); */ |
|---|
| 394 | |
|---|
| 395 | |
|---|
| 396 | /* printf("\n"); */ |
|---|
| 397 | /* int i; */ |
|---|
| 398 | /* For(i,2*tree->n_otu-1) */ |
|---|
| 399 | /* { */ |
|---|
| 400 | /* if(tree->a_nodes[i]->tax == NO) */ |
|---|
| 401 | /* { */ |
|---|
| 402 | /* printf("%2d ",tree->geo->loc[i]); */ |
|---|
| 403 | /* } */ |
|---|
| 404 | /* } */ |
|---|
| 405 | |
|---|
| 406 | |
|---|
| 407 | if(tree->mcmc->run%tree->mcmc->sample_interval == 0) |
|---|
| 408 | { |
|---|
| 409 | MCMC_Copy_To_New_Param_Val(tree->mcmc,tree); |
|---|
| 410 | |
|---|
| 411 | MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_lambda,tree->mcmc,tree); |
|---|
| 412 | MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_sigma,tree->mcmc,tree); |
|---|
| 413 | MCMC_Update_Effective_Sample_Size(tree->mcmc->num_move_geo_tau,tree->mcmc,tree); |
|---|
| 414 | |
|---|
| 415 | /* printf("\n. lbda:%f,%f tau:%f,%f sigma:%f,%f", */ |
|---|
| 416 | /* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_lambda], */ |
|---|
| 417 | /* tree->mcmc->tune_move[tree->mcmc->num_move_geo_lambda], */ |
|---|
| 418 | /* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_tau], */ |
|---|
| 419 | /* tree->mcmc->tune_move[tree->mcmc->num_move_geo_tau], */ |
|---|
| 420 | /* tree->mcmc->acc_rate[tree->mcmc->num_move_geo_sigma], */ |
|---|
| 421 | /* tree->mcmc->tune_move[tree->mcmc->num_move_geo_sigma]); */ |
|---|
| 422 | |
|---|
| 423 | PhyML_Printf("\n. Run %6d Sigma: %12f [%4.0f] Lambda: %12f [%4.0f] Tau: %12f [%4.0f] LogLk: %12f x: %12f y:%12f", |
|---|
| 424 | tree->mcmc->run, |
|---|
| 425 | |
|---|
| 426 | t->sigma, |
|---|
| 427 | tree->mcmc->ess[tree->mcmc->num_move_geo_sigma], |
|---|
| 428 | |
|---|
| 429 | t->lbda, |
|---|
| 430 | tree->mcmc->ess[tree->mcmc->num_move_geo_lambda], |
|---|
| 431 | |
|---|
| 432 | t->tau, |
|---|
| 433 | tree->mcmc->ess[tree->mcmc->num_move_geo_tau], |
|---|
| 434 | |
|---|
| 435 | t->c_lnL, |
|---|
| 436 | |
|---|
| 437 | t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0], |
|---|
| 438 | t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1]); |
|---|
| 439 | |
|---|
| 440 | rand_loc = Rand_Int(0,t->ldscape_sz-1); |
|---|
| 441 | |
|---|
| 442 | res[0 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->sigma; |
|---|
| 443 | res[1 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->lbda; |
|---|
| 444 | res[2 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->tau; |
|---|
| 445 | res[3 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->c_lnL; |
|---|
| 446 | |
|---|
| 447 | res[4 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->loc[tree->n_root->num]*t->n_dim+0]; |
|---|
| 448 | res[5 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[t->loc[tree->n_root->num]*t->n_dim+1]; |
|---|
| 449 | |
|---|
| 450 | res[6 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+0]; |
|---|
| 451 | res[7 * tree->mcmc->chain_len / tree->mcmc->sample_interval + tree->mcmc->run / tree->mcmc->sample_interval] = t->ldscape[rand_loc*t->n_dim+1]; |
|---|
| 452 | } |
|---|
| 453 | |
|---|
| 454 | tree->mcmc->run++; |
|---|
| 455 | |
|---|
| 456 | if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_sigma] = NO; |
|---|
| 457 | if(tree->mcmc->ess[tree->mcmc->num_move_geo_tau] > 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_tau] = NO; |
|---|
| 458 | if(tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 200.) tree->mcmc->adjust_tuning[tree->mcmc->num_move_geo_lambda] = NO; |
|---|
| 459 | |
|---|
| 460 | MCMC_Get_Acc_Rates(tree->mcmc); |
|---|
| 461 | |
|---|
| 462 | if(tree->mcmc->ess[tree->mcmc->num_move_geo_sigma] > 1000. && |
|---|
| 463 | tree->mcmc->ess[tree->mcmc->num_move_geo_tau] > 1000. && |
|---|
| 464 | tree->mcmc->ess[tree->mcmc->num_move_geo_lambda]> 1000.) break; |
|---|
| 465 | |
|---|
| 466 | |
|---|
| 467 | } |
|---|
| 468 | while(tree->mcmc->run < tree->mcmc->chain_len); |
|---|
| 469 | |
|---|
| 470 | return(res); |
|---|
| 471 | |
|---|
| 472 | } |
|---|
| 473 | |
|---|
| 474 | ////////////////////////////////////////////////////////////// |
|---|
| 475 | ////////////////////////////////////////////////////////////// |
|---|
| 476 | |
|---|
| 477 | t_geo *GEO_Make_Geo_Basic() |
|---|
| 478 | { |
|---|
| 479 | t_geo *t; |
|---|
| 480 | t = (t_geo *)mCalloc(1,sizeof(t_geo)); |
|---|
| 481 | return(t); |
|---|
| 482 | } |
|---|
| 483 | |
|---|
| 484 | ////////////////////////////////////////////////////////////// |
|---|
| 485 | ////////////////////////////////////////////////////////////// |
|---|
| 486 | |
|---|
| 487 | void GEO_Make_Geo_Complete(int ldscape_sz, int n_dim, int n_tax, t_geo *t) |
|---|
| 488 | { |
|---|
| 489 | |
|---|
| 490 | // F matrix |
|---|
| 491 | t->f_mat = (phydbl *)mCalloc(ldscape_sz*ldscape_sz,sizeof(phydbl)); |
|---|
| 492 | |
|---|
| 493 | // R matrix |
|---|
| 494 | t->r_mat = (phydbl *)mCalloc(ldscape_sz*ldscape_sz,sizeof(phydbl)); |
|---|
| 495 | |
|---|
| 496 | // Occupation vectors: one vector for each node |
|---|
| 497 | t->occup = (int *)mCalloc((2*n_tax-1)*ldscape_sz,sizeof(int)); |
|---|
| 498 | |
|---|
| 499 | // Locations |
|---|
| 500 | t->ldscape = (phydbl *)mCalloc((ldscape_sz*n_dim),sizeof(phydbl)); |
|---|
| 501 | |
|---|
| 502 | // Lineage locations |
|---|
| 503 | t->loc = (int *)mCalloc((int)(2*n_tax-1),sizeof(int)); |
|---|
| 504 | |
|---|
| 505 | // Sorted node heights |
|---|
| 506 | t->sorted_nd = (t_node **)mCalloc((int)(2*n_tax-1),sizeof(t_node *)); |
|---|
| 507 | |
|---|
| 508 | // Covariance matrix |
|---|
| 509 | t->cov = (phydbl *)mCalloc((int)(n_dim*n_dim),sizeof(phydbl)); |
|---|
| 510 | |
|---|
| 511 | // gives the location occupied beneath each node in the tree |
|---|
| 512 | t->loc_beneath = (int *)mCalloc((int)(2*n_tax-1)*ldscape_sz,sizeof(int)); |
|---|
| 513 | } |
|---|
| 514 | |
|---|
| 515 | ////////////////////////////////////////////////////////////// |
|---|
| 516 | ////////////////////////////////////////////////////////////// |
|---|
| 517 | |
|---|
| 518 | void Free_Geo(t_geo *t) |
|---|
| 519 | { |
|---|
| 520 | Free(t->f_mat); |
|---|
| 521 | Free(t->r_mat); |
|---|
| 522 | Free(t->occup); |
|---|
| 523 | Free(t->loc); |
|---|
| 524 | Free(t->ldscape); |
|---|
| 525 | Free(t->sorted_nd); |
|---|
| 526 | Free(t->cov); |
|---|
| 527 | Free(t->loc_beneath); |
|---|
| 528 | } |
|---|
| 529 | |
|---|
| 530 | ////////////////////////////////////////////////////////////// |
|---|
| 531 | ////////////////////////////////////////////////////////////// |
|---|
| 532 | |
|---|
| 533 | // Update F matrix. Assume a diagonal covariance matrix. |
|---|
| 534 | void GEO_Update_Fmat(t_geo *t) |
|---|
| 535 | { |
|---|
| 536 | phydbl *loc1, *loc2; |
|---|
| 537 | int i,j,k; |
|---|
| 538 | int err; |
|---|
| 539 | phydbl lognloc; |
|---|
| 540 | |
|---|
| 541 | For(i,t->n_dim) t->cov[i*t->n_dim+i] = t->sigma; // Diagonal covariance matrix. Same variance in every direction |
|---|
| 542 | |
|---|
| 543 | lognloc = LOG(t->ldscape_sz); |
|---|
| 544 | |
|---|
| 545 | // Fill in F matrix; |
|---|
| 546 | for(i=0;i<t->ldscape_sz;i++) |
|---|
| 547 | { |
|---|
| 548 | loc1 = t->ldscape + i*t->n_dim; |
|---|
| 549 | |
|---|
| 550 | for(j=i;j<t->ldscape_sz;j++) |
|---|
| 551 | { |
|---|
| 552 | loc2 = t->ldscape + j*t->n_dim; |
|---|
| 553 | |
|---|
| 554 | t->f_mat[i*t->ldscape_sz+j] = .0; |
|---|
| 555 | |
|---|
| 556 | // Calculate log(f(l_i,l_j)) - log(f(l_i,l_i) - log(m) |
|---|
| 557 | For(k,t->n_dim) t->f_mat[i*t->ldscape_sz+j] += Log_Dnorm(loc2[k],loc1[k],t->cov[k*t->n_dim+k],&err); |
|---|
| 558 | t->f_mat[i*t->ldscape_sz+j] -= lognloc; |
|---|
| 559 | For(k,t->n_dim) t->f_mat[i*t->ldscape_sz+j] -= Log_Dnorm(loc1[k],loc1[k],t->cov[k*t->n_dim+k],&err); |
|---|
| 560 | |
|---|
| 561 | // Take the exponential |
|---|
| 562 | t->f_mat[i*t->ldscape_sz+j] = EXP(t->f_mat[i*t->ldscape_sz+j]); |
|---|
| 563 | |
|---|
| 564 | // Matrix is symmetric |
|---|
| 565 | t->f_mat[j*t->ldscape_sz+i] = t->f_mat[i*t->ldscape_sz+j]; |
|---|
| 566 | |
|---|
| 567 | /* printf("\n. f[%d,%d] = %f (1:[%f;%f] 2:[%f;%f]) sigma=%f",i,j,t->f_mat[i*t->ldscape_sz+j],loc1[0],loc1[1],loc2[0],loc2[1],SQRT(t->cov[0*t->n_dim+0])); */ |
|---|
| 568 | } |
|---|
| 569 | } |
|---|
| 570 | } |
|---|
| 571 | |
|---|
| 572 | ////////////////////////////////////////////////////////////// |
|---|
| 573 | ////////////////////////////////////////////////////////////// |
|---|
| 574 | |
|---|
| 575 | // Sort node heights from oldest to youngest age. |
|---|
| 576 | void GEO_Update_Sorted_Nd(t_geo *t, t_tree *tree) |
|---|
| 577 | { |
|---|
| 578 | int i; |
|---|
| 579 | int swap; |
|---|
| 580 | t_node *buff; |
|---|
| 581 | |
|---|
| 582 | buff = NULL; |
|---|
| 583 | |
|---|
| 584 | For(i,2*tree->n_otu-1) t->sorted_nd[i] = tree->a_nodes[i]; |
|---|
| 585 | |
|---|
| 586 | // Bubble sort of the node heights |
|---|
| 587 | do |
|---|
| 588 | { |
|---|
| 589 | swap = NO; |
|---|
| 590 | For(i,2*tree->n_otu-2) |
|---|
| 591 | { |
|---|
| 592 | if(tree->rates->nd_t[t->sorted_nd[i+1]->num] < tree->rates->nd_t[t->sorted_nd[i]->num]) |
|---|
| 593 | { |
|---|
| 594 | buff = t->sorted_nd[i]; |
|---|
| 595 | t->sorted_nd[i] = t->sorted_nd[i+1]; |
|---|
| 596 | t->sorted_nd[i+1] = buff; |
|---|
| 597 | |
|---|
| 598 | swap = YES; |
|---|
| 599 | } |
|---|
| 600 | } |
|---|
| 601 | } |
|---|
| 602 | while(swap == YES); |
|---|
| 603 | } |
|---|
| 604 | |
|---|
| 605 | ////////////////////////////////////////////////////////////// |
|---|
| 606 | ////////////////////////////////////////////////////////////// |
|---|
| 607 | |
|---|
| 608 | // Update the set of vectors of occupation along the tree |
|---|
| 609 | void GEO_Update_Occup(t_geo *t, t_tree *tree) |
|---|
| 610 | { |
|---|
| 611 | int i,j; |
|---|
| 612 | t_node *v1, *v2; |
|---|
| 613 | |
|---|
| 614 | GEO_Update_Sorted_Nd(t,tree); |
|---|
| 615 | |
|---|
| 616 | For(i,t->ldscape_sz*(2*tree->n_otu-1)) t->occup[i] = 0; |
|---|
| 617 | |
|---|
| 618 | t->occup[tree->n_root->num*t->ldscape_sz + t->loc[tree->n_root->num]] = 1; |
|---|
| 619 | |
|---|
| 620 | for(i=1;i<2*tree->n_otu-1;i++) |
|---|
| 621 | { |
|---|
| 622 | For(j,t->ldscape_sz) |
|---|
| 623 | { |
|---|
| 624 | t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j] = |
|---|
| 625 | t->occup[t->sorted_nd[i-1]->num*t->ldscape_sz + j]; |
|---|
| 626 | } |
|---|
| 627 | |
|---|
| 628 | |
|---|
| 629 | if(t->sorted_nd[i-1]->tax == NO) |
|---|
| 630 | { |
|---|
| 631 | v1 = v2 = NULL; |
|---|
| 632 | if(t->sorted_nd[i-1] == tree->n_root) |
|---|
| 633 | { |
|---|
| 634 | v1 = tree->n_root->v[1]; |
|---|
| 635 | v2 = tree->n_root->v[2]; |
|---|
| 636 | } |
|---|
| 637 | else |
|---|
| 638 | { |
|---|
| 639 | For(j,3) |
|---|
| 640 | { |
|---|
| 641 | if(t->sorted_nd[i-1]->v[j] != t->sorted_nd[i-1]->anc && |
|---|
| 642 | t->sorted_nd[i-1]->b[j] != tree->e_root) |
|---|
| 643 | { |
|---|
| 644 | if(!v1) v1 = t->sorted_nd[i-1]->v[j]; |
|---|
| 645 | else v2 = t->sorted_nd[i-1]->v[j]; |
|---|
| 646 | } |
|---|
| 647 | } |
|---|
| 648 | } |
|---|
| 649 | |
|---|
| 650 | |
|---|
| 651 | if(t->loc[v1->num] != t->loc[t->sorted_nd[i-1]->num]) |
|---|
| 652 | { |
|---|
| 653 | t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->loc[v1->num]]++; |
|---|
| 654 | } |
|---|
| 655 | else |
|---|
| 656 | { |
|---|
| 657 | t->occup[t->sorted_nd[i]->num * t->ldscape_sz + t->loc[v2->num]]++; |
|---|
| 658 | } |
|---|
| 659 | } |
|---|
| 660 | } |
|---|
| 661 | |
|---|
| 662 | /* printf("\n"); */ |
|---|
| 663 | /* For(i,2*tree->n_otu-1) */ |
|---|
| 664 | /* { */ |
|---|
| 665 | /* printf("\n. Node %3d: ",t->sorted_nd[i]->num); */ |
|---|
| 666 | /* For(j,t->ldscape_sz) */ |
|---|
| 667 | /* { */ |
|---|
| 668 | /* /\* printf("%3d [%12f;%12f] ", *\/ */ |
|---|
| 669 | /* /\* t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j], *\/ */ |
|---|
| 670 | /* /\* t->ldscape[j*t->n_dim+0],t->ldscape[j*t->n_dim+1]); *\/ */ |
|---|
| 671 | /* /\* printf("%3d ", *\/ */ |
|---|
| 672 | /* /\* t->occup[t->sorted_nd[i]->num*t->ldscape_sz + j]); *\/ */ |
|---|
| 673 | /* } */ |
|---|
| 674 | /* } */ |
|---|
| 675 | } |
|---|
| 676 | |
|---|
| 677 | ////////////////////////////////////////////////////////////// |
|---|
| 678 | ////////////////////////////////////////////////////////////// |
|---|
| 679 | |
|---|
| 680 | // Calculate R mat at node n |
|---|
| 681 | void GEO_Update_Rmat(t_node *n, t_geo *t, t_tree *tree) |
|---|
| 682 | { |
|---|
| 683 | int i,j; |
|---|
| 684 | phydbl lbda_j; |
|---|
| 685 | |
|---|
| 686 | For(i,t->ldscape_sz) |
|---|
| 687 | { |
|---|
| 688 | For(j,t->ldscape_sz) |
|---|
| 689 | { |
|---|
| 690 | lbda_j = ((t->occup[n->num*t->ldscape_sz + j]==0) ? (1.0) : (t->lbda)); |
|---|
| 691 | t->r_mat[i*t->ldscape_sz+j] = t->f_mat[i*t->ldscape_sz+j] * lbda_j * t->tau; |
|---|
| 692 | } |
|---|
| 693 | } |
|---|
| 694 | } |
|---|
| 695 | |
|---|
| 696 | ////////////////////////////////////////////////////////////// |
|---|
| 697 | ////////////////////////////////////////////////////////////// |
|---|
| 698 | |
|---|
| 699 | phydbl GEO_Lk(t_geo *t, t_tree *tree) |
|---|
| 700 | { |
|---|
| 701 | int i,j; |
|---|
| 702 | phydbl loglk; |
|---|
| 703 | phydbl R; |
|---|
| 704 | int dep,arr; // departure and arrival location indices; |
|---|
| 705 | t_node *curr_n,*prev_n,*v1,*v2; |
|---|
| 706 | phydbl sum; |
|---|
| 707 | |
|---|
| 708 | GEO_Update_Occup(t,tree); // Same here. |
|---|
| 709 | |
|---|
| 710 | if(t->update_fmat == YES) GEO_Update_Fmat(t); |
|---|
| 711 | |
|---|
| 712 | prev_n = NULL; |
|---|
| 713 | curr_n = NULL; |
|---|
| 714 | loglk = .0; |
|---|
| 715 | for(i=1;i<tree->n_otu-1;i++) // Consider all the time slices, from oldest to youngest. |
|---|
| 716 | // Start at first node below root |
|---|
| 717 | { |
|---|
| 718 | prev_n = t->sorted_nd[i-1]; // node just above |
|---|
| 719 | curr_n = t->sorted_nd[i]; // current node |
|---|
| 720 | |
|---|
| 721 | GEO_Update_Rmat(curr_n,t,tree); // NOTE: don't need to do that every time. Add check later. |
|---|
| 722 | |
|---|
| 723 | R = GEO_Total_Migration_Rate(curr_n,t); // Total migration rate calculated at node n |
|---|
| 724 | |
|---|
| 725 | /* if(i < 2) */ |
|---|
| 726 | /* { */ |
|---|
| 727 | /* printf("\n. t0: %f t1: %f R: %f tau: %f sigma: %f lbda: %f x1: %f y1: %f x2: %f y2: %f log0: %d loc1: %d rij: %G fij: %G", */ |
|---|
| 728 | /* tree->rates->nd_t[t->sorted_nd[i-1]->num], */ |
|---|
| 729 | /* tree->rates->nd_t[t->sorted_nd[i]->num], */ |
|---|
| 730 | /* R, */ |
|---|
| 731 | /* t->tau, */ |
|---|
| 732 | /* t->lbda, */ |
|---|
| 733 | /* t->sigma, */ |
|---|
| 734 | /* t->ldscape[t->loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+0], */ |
|---|
| 735 | /* t->ldscape[t->loc[tree->geo->sorted_nd[i-1]->num]*t->n_dim+1], */ |
|---|
| 736 | /* t->ldscape[t->loc[tree->geo->sorted_nd[i]->num]*t->n_dim+0], */ |
|---|
| 737 | /* t->ldscape[t->loc[tree->geo->sorted_nd[i]->num]*t->n_dim+1], */ |
|---|
| 738 | /* t->loc[tree->geo->sorted_nd[i-1]->num], */ |
|---|
| 739 | /* t->loc[tree->geo->sorted_nd[i]->num], */ |
|---|
| 740 | /* t->r_mat[t->loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->loc[tree->geo->sorted_nd[i]->num]], */ |
|---|
| 741 | /* t->f_mat[t->loc[tree->geo->sorted_nd[i-1]->num] * t->ldscape_sz + t->loc[tree->geo->sorted_nd[i]->num]] */ |
|---|
| 742 | /* ); */ |
|---|
| 743 | |
|---|
| 744 | /* printf("\n >> "); */ |
|---|
| 745 | /* int j; */ |
|---|
| 746 | /* For(j,t->ldscape_sz) */ |
|---|
| 747 | /* if(t->occup[t->sorted_nd[i]->num * t->ldscape_sz + j] > 0) */ |
|---|
| 748 | /* printf("%f %f -- ", */ |
|---|
| 749 | /* t->ldscape[j*t->n_dim+0], */ |
|---|
| 750 | /* t->ldscape[j*t->n_dim+1]); */ |
|---|
| 751 | /* } */ |
|---|
| 752 | |
|---|
| 753 | |
|---|
| 754 | /* printf("\n. %d %d (%d) %f %p %p \n",i,curr_n->num,curr_n->tax,tree->rates->nd_t[curr_n->num],curr_n->v[1],curr_n->v[2]); */ |
|---|
| 755 | |
|---|
| 756 | v1 = v2 = NULL; |
|---|
| 757 | For(j,3) |
|---|
| 758 | if(curr_n->v[j] != curr_n->anc && curr_n->b[j] != tree->e_root) |
|---|
| 759 | { |
|---|
| 760 | if(!v1) v1 = curr_n->v[j]; |
|---|
| 761 | else v2 = curr_n->v[j]; |
|---|
| 762 | } |
|---|
| 763 | |
|---|
| 764 | dep = t->loc[curr_n->num]; // departure location |
|---|
| 765 | arr = // arrival location |
|---|
| 766 | (t->loc[v1->num] == t->loc[curr_n->num] ? |
|---|
| 767 | t->loc[v2->num] : |
|---|
| 768 | t->loc[v1->num]); |
|---|
| 769 | |
|---|
| 770 | /* printf("\n%f\t%f", */ |
|---|
| 771 | /* t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */ |
|---|
| 772 | /* t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */ |
|---|
| 773 | |
|---|
| 774 | loglk -= R * FABS(tree->rates->nd_t[curr_n->num] - tree->rates->nd_t[prev_n->num]); |
|---|
| 775 | loglk += LOG(t->r_mat[dep * t->ldscape_sz + arr]); |
|---|
| 776 | |
|---|
| 777 | /* printf("\n <> %d %f %f %d %d v1:%d v2:%d anc:%d %d %d %d", */ |
|---|
| 778 | /* curr_n->num, */ |
|---|
| 779 | /* R * FABS(tree->rates->nd_t[curr_n->num] - tree->rates->nd_t[prev_n->num]), */ |
|---|
| 780 | /* LOG(t->r_mat[dep * t->ldscape_sz + arr]), */ |
|---|
| 781 | /* dep,arr,v1->num,v2->num,curr_n->anc->num,curr_n->v[0]->num,curr_n->v[1]->num,curr_n->v[2]->num); */ |
|---|
| 782 | |
|---|
| 783 | /* if(i<2) */ |
|---|
| 784 | /* { */ |
|---|
| 785 | /* printf("\n. R = %f r_mat = %f f_mat = %f dt = %f loglk = %f", */ |
|---|
| 786 | /* R, */ |
|---|
| 787 | /* t->r_mat[dep * t->ldscape_sz + arr], */ |
|---|
| 788 | /* t->f_mat[dep * t->ldscape_sz + arr], */ |
|---|
| 789 | /* FABS(t->sorted_nd[i] - t->sorted_nd[i-1]),loglk); */ |
|---|
| 790 | /* Exit("\n"); */ |
|---|
| 791 | /* } */ |
|---|
| 792 | |
|---|
| 793 | } |
|---|
| 794 | |
|---|
| 795 | |
|---|
| 796 | // Likelihood for the first 'slice' (i.e., the part just below the root down to |
|---|
| 797 | // the next node) |
|---|
| 798 | GEO_Update_Rmat(tree->n_root,t,tree); |
|---|
| 799 | |
|---|
| 800 | loglk -= LOG(t->ldscape_sz); |
|---|
| 801 | dep = t->loc[tree->n_root->num]; |
|---|
| 802 | arr = |
|---|
| 803 | (t->loc[tree->n_root->num] != t->loc[tree->n_root->v[1]->num] ? |
|---|
| 804 | t->loc[tree->n_root->v[1]->num] : |
|---|
| 805 | t->loc[tree->n_root->v[2]->num]); |
|---|
| 806 | |
|---|
| 807 | /* printf("\n %f %f",t->ldscape[dep],t->ldscape[arr]); */ |
|---|
| 808 | |
|---|
| 809 | loglk += LOG(t->r_mat[dep * t->ldscape_sz + arr]); |
|---|
| 810 | |
|---|
| 811 | sum = .0; |
|---|
| 812 | For(i,t->ldscape_sz) sum += t->r_mat[dep * t->ldscape_sz + i]; |
|---|
| 813 | |
|---|
| 814 | loglk -= LOG(sum); |
|---|
| 815 | |
|---|
| 816 | tree->geo->c_lnL = loglk; |
|---|
| 817 | |
|---|
| 818 | return loglk; |
|---|
| 819 | } |
|---|
| 820 | |
|---|
| 821 | ////////////////////////////////////////////////////////////// |
|---|
| 822 | ////////////////////////////////////////////////////////////// |
|---|
| 823 | |
|---|
| 824 | void GEO_Init_Tloc_Tips(t_geo *t, t_tree *tree) |
|---|
| 825 | { |
|---|
| 826 | int i; |
|---|
| 827 | |
|---|
| 828 | // TO DO |
|---|
| 829 | For(i,tree->n_otu) |
|---|
| 830 | { |
|---|
| 831 | t->loc[i] = i; |
|---|
| 832 | } |
|---|
| 833 | } |
|---|
| 834 | |
|---|
| 835 | ////////////////////////////////////////////////////////////// |
|---|
| 836 | ////////////////////////////////////////////////////////////// |
|---|
| 837 | |
|---|
| 838 | // Do not forget to call GEO_Update_Rmat (with node n) before calling this function |
|---|
| 839 | phydbl GEO_Total_Migration_Rate(t_node *n, t_geo *t) |
|---|
| 840 | { |
|---|
| 841 | phydbl R; |
|---|
| 842 | int i,j; |
|---|
| 843 | |
|---|
| 844 | R = .0; |
|---|
| 845 | |
|---|
| 846 | For(i,t->ldscape_sz) |
|---|
| 847 | { |
|---|
| 848 | For(j,t->ldscape_sz) |
|---|
| 849 | { |
|---|
| 850 | R += |
|---|
| 851 | t->r_mat[i * t->ldscape_sz + j] * |
|---|
| 852 | t->occup[n->num * t->ldscape_sz + i]; |
|---|
| 853 | } |
|---|
| 854 | } |
|---|
| 855 | |
|---|
| 856 | return R; |
|---|
| 857 | } |
|---|
| 858 | |
|---|
| 859 | ////////////////////////////////////////////////////////////// |
|---|
| 860 | ////////////////////////////////////////////////////////////// |
|---|
| 861 | |
|---|
| 862 | // Find the arrival location for the migration leaving from n |
|---|
| 863 | int GEO_Get_Arrival_Location(t_node *n, t_geo *t, t_tree *tree) |
|---|
| 864 | { |
|---|
| 865 | int i; |
|---|
| 866 | t_node *v1, *v2; // the two daughters of n |
|---|
| 867 | |
|---|
| 868 | v1 = v2 = NULL; |
|---|
| 869 | |
|---|
| 870 | For(i,3) |
|---|
| 871 | { |
|---|
| 872 | if(n->v[i] && n->v[i] != n->anc) |
|---|
| 873 | { |
|---|
| 874 | if(!v1) v1 = n->v[i]; |
|---|
| 875 | else v2 = n->v[i]; |
|---|
| 876 | } |
|---|
| 877 | } |
|---|
| 878 | |
|---|
| 879 | if(t->loc[v1->num] == t->loc[v2->num]) // Migrated to the same location as that of n |
|---|
| 880 | { |
|---|
| 881 | if(t->loc[n->num] != t->loc[v1->num]) |
|---|
| 882 | { |
|---|
| 883 | PhyML_Printf("\n== Error detected in location labeling."); |
|---|
| 884 | PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); |
|---|
| 885 | Exit("\n"); |
|---|
| 886 | } |
|---|
| 887 | else |
|---|
| 888 | return t->loc[n->num]; |
|---|
| 889 | } |
|---|
| 890 | else // Migrated to a different spot |
|---|
| 891 | { |
|---|
| 892 | if((t->loc[v1->num] != t->loc[n->num]) && (t->loc[v2->num] != t->loc[n->num])) |
|---|
| 893 | { |
|---|
| 894 | PhyML_Printf("\n== Error detected in location labeling."); |
|---|
| 895 | PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); |
|---|
| 896 | Exit("\n"); |
|---|
| 897 | } |
|---|
| 898 | else |
|---|
| 899 | { |
|---|
| 900 | if(t->loc[v1->num] == t->loc[n->num]) return t->loc[v2->num]; // v2 gets the new location, v1 inheritates from n |
|---|
| 901 | else return t->loc[v1->num]; // v1 gets the new location, v2 inheritates from n |
|---|
| 902 | } |
|---|
| 903 | } |
|---|
| 904 | return -1; |
|---|
| 905 | } |
|---|
| 906 | |
|---|
| 907 | ////////////////////////////////////////////////////////////// |
|---|
| 908 | ////////////////////////////////////////////////////////////// |
|---|
| 909 | |
|---|
| 910 | t_tree *GEO_Simulate(t_geo *t, int n_otu) |
|---|
| 911 | { |
|---|
| 912 | t_tree *tree; |
|---|
| 913 | int n_branching_nodes; |
|---|
| 914 | t_node **branching_nodes; // vector of nodes available for branching out |
|---|
| 915 | phydbl *p_branch; // p_branch[i]: proba that the i-th node in branching_nodes branches out (p_x vector in the article) |
|---|
| 916 | phydbl *p_mig; // p_branch[i]: proba of migrating to location i from the location of the edge branching out (q_i vector in the article) |
|---|
| 917 | int hit; |
|---|
| 918 | phydbl time; |
|---|
| 919 | int dep, arr; |
|---|
| 920 | int i,j; |
|---|
| 921 | phydbl sum; |
|---|
| 922 | phydbl R; |
|---|
| 923 | int *occup; // occupation vector. Updated as we move down the tree |
|---|
| 924 | int nd_idx; |
|---|
| 925 | t_node *buff_nd; |
|---|
| 926 | phydbl buff_t; |
|---|
| 927 | int buff_l; |
|---|
| 928 | int swap; |
|---|
| 929 | |
|---|
| 930 | tree = Make_Tree_From_Scratch(n_otu,NULL); |
|---|
| 931 | tree->rates = RATES_Make_Rate_Struct(tree->n_otu); |
|---|
| 932 | RATES_Init_Rate_Struct(tree->rates,NULL,tree->n_otu); |
|---|
| 933 | tree->n_root = tree->a_nodes[2*tree->n_otu-2]; // Set the root node to the last element in the list of nodes |
|---|
| 934 | tree->geo = t; |
|---|
| 935 | |
|---|
| 936 | |
|---|
| 937 | For(i,2*tree->n_otu-2) tree->rates->nd_t[i] = -1.; |
|---|
| 938 | |
|---|
| 939 | occup = (int *)mCalloc(t->ldscape_sz,sizeof(int)); |
|---|
| 940 | |
|---|
| 941 | GEO_Update_Fmat(t); |
|---|
| 942 | |
|---|
| 943 | branching_nodes = (t_node **)mCalloc(tree->n_otu,sizeof(t_node *)); |
|---|
| 944 | branching_nodes[0] = tree->n_root; |
|---|
| 945 | n_branching_nodes = 1; |
|---|
| 946 | nd_idx = 0; |
|---|
| 947 | |
|---|
| 948 | |
|---|
| 949 | p_branch = (phydbl *)mCalloc(tree->n_otu,sizeof(phydbl )); |
|---|
| 950 | p_branch[0] = 1.0; |
|---|
| 951 | |
|---|
| 952 | p_mig = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl )); |
|---|
| 953 | |
|---|
| 954 | time = 0.0; // Time at the root node (this will be changed afterward) |
|---|
| 955 | |
|---|
| 956 | // Sample a location uniformly for the root |
|---|
| 957 | t->loc[tree->n_root->num] = Rand_Int(0,t->ldscape_sz-1); |
|---|
| 958 | |
|---|
| 959 | // Update the occupancy vector |
|---|
| 960 | occup[t->loc[tree->n_root->num]] = 1; |
|---|
| 961 | |
|---|
| 962 | dep = arr = -1; |
|---|
| 963 | |
|---|
| 964 | |
|---|
| 965 | // total migration rate |
|---|
| 966 | R = 0.0; |
|---|
| 967 | For(i,t->ldscape_sz) |
|---|
| 968 | { |
|---|
| 969 | R += |
|---|
| 970 | t->f_mat[t->loc[tree->n_root->num]*t->ldscape_sz+i] * |
|---|
| 971 | ((occup[i] == 0) ? (1.0) : (t->lbda)) * |
|---|
| 972 | t->tau; |
|---|
| 973 | } |
|---|
| 974 | |
|---|
| 975 | do |
|---|
| 976 | { |
|---|
| 977 | // Select the node that branches out |
|---|
| 978 | hit = Sample_i_With_Proba_pi(p_branch,n_branching_nodes); |
|---|
| 979 | |
|---|
| 980 | /* printf("\n. [%d] Select node %d (location %d)",n_branching_nodes,branching_nodes[hit]->num,t->loc[branching_nodes[hit]->num]); */ |
|---|
| 981 | |
|---|
| 982 | // Set the time for the branching node |
|---|
| 983 | tree->rates->nd_t[branching_nodes[hit]->num] = time; |
|---|
| 984 | |
|---|
| 985 | |
|---|
| 986 | /* printf("\n. Set its time to %f",time); */ |
|---|
| 987 | |
|---|
| 988 | // Select the destination location |
|---|
| 989 | dep = t->loc[branching_nodes[hit]->num]; // Departure point |
|---|
| 990 | |
|---|
| 991 | sum = .0; |
|---|
| 992 | For(i,t->ldscape_sz) // Total rate of migration out of departure point |
|---|
| 993 | { |
|---|
| 994 | p_mig[i] = |
|---|
| 995 | t->f_mat[dep*t->ldscape_sz+i] * |
|---|
| 996 | ((occup[i] == 0) ? (1.0) : (t->lbda)) * |
|---|
| 997 | t->tau; |
|---|
| 998 | |
|---|
| 999 | sum += p_mig[i]; |
|---|
| 1000 | } |
|---|
| 1001 | For(i,t->ldscape_sz) p_mig[i] /= sum; |
|---|
| 1002 | |
|---|
| 1003 | arr = Sample_i_With_Proba_pi(p_mig,t->ldscape_sz); |
|---|
| 1004 | |
|---|
| 1005 | /* printf("\n. Migrate from %d [%5.2f,%5.2f] to %d [%5.2f,%5.2f]", */ |
|---|
| 1006 | /* dep, */ |
|---|
| 1007 | /* t->ldscape[dep*t->n_dim+0], */ |
|---|
| 1008 | /* t->ldscape[dep*t->n_dim+1], */ |
|---|
| 1009 | /* arr, */ |
|---|
| 1010 | /* t->ldscape[arr*t->n_dim+0], */ |
|---|
| 1011 | /* t->ldscape[arr*t->n_dim+1]); */ |
|---|
| 1012 | |
|---|
| 1013 | /* printf("\n%f\t%f", */ |
|---|
| 1014 | /* t->ldscape[arr*t->n_dim+0]-t->ldscape[dep*t->n_dim+0], */ |
|---|
| 1015 | /* t->ldscape[arr*t->n_dim+1]-t->ldscape[dep*t->n_dim+1]); */ |
|---|
| 1016 | |
|---|
| 1017 | |
|---|
| 1018 | // Update vector of occupation |
|---|
| 1019 | occup[arr]++; |
|---|
| 1020 | |
|---|
| 1021 | /* printf("\n. Remove %d. Add %d and %d",branching_nodes[hit]->num,tree->a_nodes[nd_idx]->num,tree->a_nodes[nd_idx+1]->num); */ |
|---|
| 1022 | // Connect two new nodes to the node undergoing a branching event |
|---|
| 1023 | tree->a_nodes[nd_idx]->v[0] = branching_nodes[hit]; |
|---|
| 1024 | tree->a_nodes[nd_idx+1]->v[0] = branching_nodes[hit]; |
|---|
| 1025 | branching_nodes[hit]->v[1] = tree->a_nodes[nd_idx]; |
|---|
| 1026 | branching_nodes[hit]->v[2] = tree->a_nodes[nd_idx+1]; |
|---|
| 1027 | |
|---|
| 1028 | // update branching_nodes vector. Element 'hit' is being replaced so that the corresponding node can no longer branch out |
|---|
| 1029 | branching_nodes[hit] = tree->a_nodes[nd_idx]; |
|---|
| 1030 | branching_nodes[n_branching_nodes] = tree->a_nodes[nd_idx+1]; |
|---|
| 1031 | |
|---|
| 1032 | // Update t_loc vector. |
|---|
| 1033 | t->loc[tree->a_nodes[nd_idx]->num] = dep; |
|---|
| 1034 | t->loc[tree->a_nodes[nd_idx+1]->num] = arr; |
|---|
| 1035 | |
|---|
| 1036 | // Update total migration rate |
|---|
| 1037 | R = .0; |
|---|
| 1038 | For(i,t->ldscape_sz) |
|---|
| 1039 | { |
|---|
| 1040 | if(occup[i] > 0) |
|---|
| 1041 | { |
|---|
| 1042 | For(j,t->ldscape_sz) |
|---|
| 1043 | { |
|---|
| 1044 | R += |
|---|
| 1045 | occup[i] * |
|---|
| 1046 | t->f_mat[i*t->ldscape_sz+j] * |
|---|
| 1047 | ((occup[j] == 0) ? (1.0) : (t->lbda)) * |
|---|
| 1048 | t->tau; |
|---|
| 1049 | } |
|---|
| 1050 | } |
|---|
| 1051 | } |
|---|
| 1052 | |
|---|
| 1053 | // Set the time until next branching event |
|---|
| 1054 | time = time + Rexp(R); |
|---|
| 1055 | |
|---|
| 1056 | // Update p_branch vector |
|---|
| 1057 | For(i,n_branching_nodes+1) |
|---|
| 1058 | { |
|---|
| 1059 | dep = t->loc[branching_nodes[i]->num]; |
|---|
| 1060 | p_branch[i] = 0.0; |
|---|
| 1061 | For(j,t->ldscape_sz) |
|---|
| 1062 | { |
|---|
| 1063 | p_branch[i] += |
|---|
| 1064 | t->f_mat[dep*t->ldscape_sz+j] * |
|---|
| 1065 | ((occup[j] == 0) ? (1.0) : (t->lbda)) * |
|---|
| 1066 | t->tau / R; |
|---|
| 1067 | |
|---|
| 1068 | /* printf("\n. %f %f %f %f", */ |
|---|
| 1069 | /* R, */ |
|---|
| 1070 | /* t->f_mat[dep*t->ldscape_sz+j], */ |
|---|
| 1071 | /* ((occup[j]>0) ? (t->lbda) : (1.0)), */ |
|---|
| 1072 | /* t->tau); */ |
|---|
| 1073 | } |
|---|
| 1074 | /* printf("\n. %f ",p_branch[i]); */ |
|---|
| 1075 | } |
|---|
| 1076 | |
|---|
| 1077 | |
|---|
| 1078 | // Increase the number of branching nodes by one (you just added 2 new and removed 1 old) |
|---|
| 1079 | n_branching_nodes++; |
|---|
| 1080 | nd_idx += 2; |
|---|
| 1081 | |
|---|
| 1082 | } |
|---|
| 1083 | while(n_branching_nodes < n_otu); |
|---|
| 1084 | |
|---|
| 1085 | |
|---|
| 1086 | // Set the times at the tips |
|---|
| 1087 | For(i,2*tree->n_otu-1) if(tree->rates->nd_t[i] < 0.0) tree->rates->nd_t[i] = time; |
|---|
| 1088 | |
|---|
| 1089 | // Reverse time scale |
|---|
| 1090 | For(i,2*tree->n_otu-1) tree->rates->nd_t[i] -= time; |
|---|
| 1091 | /* For(i,2*tree->n_otu-1) tree->rates->nd_t[i] = FABS(tree->rates->nd_t[i]); */ |
|---|
| 1092 | |
|---|
| 1093 | // Bubble sort to put all the tips at the top of the tree->a_nodes array |
|---|
| 1094 | do |
|---|
| 1095 | { |
|---|
| 1096 | swap = NO; |
|---|
| 1097 | For(i,2*tree->n_otu-2) |
|---|
| 1098 | { |
|---|
| 1099 | if(!tree->a_nodes[i+1]->v[1] && tree->a_nodes[i]->v[1]) |
|---|
| 1100 | { |
|---|
| 1101 | buff_nd = tree->a_nodes[i+1]; |
|---|
| 1102 | tree->a_nodes[i+1] = tree->a_nodes[i]; |
|---|
| 1103 | tree->a_nodes[i] = buff_nd; |
|---|
| 1104 | |
|---|
| 1105 | buff_t = tree->rates->nd_t[i+1]; |
|---|
| 1106 | tree->rates->nd_t[i+1] = tree->rates->nd_t[i]; |
|---|
| 1107 | tree->rates->nd_t[i] = buff_t; |
|---|
| 1108 | |
|---|
| 1109 | buff_l = t->loc[i+1]; |
|---|
| 1110 | t->loc[i+1] = t->loc[i]; |
|---|
| 1111 | t->loc[i] = buff_l; |
|---|
| 1112 | |
|---|
| 1113 | swap = YES; |
|---|
| 1114 | } |
|---|
| 1115 | } |
|---|
| 1116 | } |
|---|
| 1117 | while(swap == YES); |
|---|
| 1118 | |
|---|
| 1119 | |
|---|
| 1120 | // The rest below is just bookeeping... |
|---|
| 1121 | |
|---|
| 1122 | |
|---|
| 1123 | For(i,2*tree->n_otu-1) tree->a_nodes[i]->num = i; |
|---|
| 1124 | For(i,2*tree->n_otu-1) |
|---|
| 1125 | { |
|---|
| 1126 | if(i < tree->n_otu) tree->a_nodes[i]->tax = YES; |
|---|
| 1127 | else tree->a_nodes[i]->tax = NO; |
|---|
| 1128 | } |
|---|
| 1129 | |
|---|
| 1130 | /* printf("\n++++++++++++++++++\n"); */ |
|---|
| 1131 | /* For(i,2*tree->n_otu-1) */ |
|---|
| 1132 | /* { */ |
|---|
| 1133 | /* printf("\n. Node %3d [%p] anc:%3d v1:%3d v2:%3d time: %f", */ |
|---|
| 1134 | /* tree->a_nodes[i]->num, */ |
|---|
| 1135 | /* (void *)tree->a_nodes[i], */ |
|---|
| 1136 | /* tree->a_nodes[i]->v[0] ? tree->a_nodes[i]->v[0]->num : -1, */ |
|---|
| 1137 | /* tree->a_nodes[i]->v[1] ? tree->a_nodes[i]->v[1]->num : -1, */ |
|---|
| 1138 | /* tree->a_nodes[i]->v[2] ? tree->a_nodes[i]->v[2]->num : -1, */ |
|---|
| 1139 | /* tree->rates->nd_t[i]); */ |
|---|
| 1140 | /* } */ |
|---|
| 1141 | |
|---|
| 1142 | |
|---|
| 1143 | For(i,tree->n_otu) |
|---|
| 1144 | { |
|---|
| 1145 | if(!tree->a_nodes[i]->name) tree->a_nodes[i]->name = (char *)mCalloc(T_MAX_NAME,sizeof(char)); |
|---|
| 1146 | strcpy(tree->a_nodes[i]->name,"x"); |
|---|
| 1147 | sprintf(tree->a_nodes[i]->name+1,"%d",i); |
|---|
| 1148 | } |
|---|
| 1149 | |
|---|
| 1150 | tree->n_root->v[1]->v[0] = tree->n_root->v[2]; |
|---|
| 1151 | tree->n_root->v[2]->v[0] = tree->n_root->v[1]; |
|---|
| 1152 | |
|---|
| 1153 | tree->num_curr_branch_available = 0; |
|---|
| 1154 | Connect_Edges_To_Nodes_Recur(tree->a_nodes[0],tree->a_nodes[0]->v[0],tree); |
|---|
| 1155 | |
|---|
| 1156 | tree->e_root = tree->n_root->v[1]->b[0]; |
|---|
| 1157 | |
|---|
| 1158 | For(i,2*tree->n_otu-3) |
|---|
| 1159 | { |
|---|
| 1160 | tree->a_edges[i]->l->v = FABS(tree->rates->nd_t[tree->a_edges[i]->left->num] - |
|---|
| 1161 | tree->rates->nd_t[tree->a_edges[i]->rght->num]); |
|---|
| 1162 | } |
|---|
| 1163 | |
|---|
| 1164 | tree->e_root->l->v = |
|---|
| 1165 | FABS(tree->rates->nd_t[tree->n_root->v[1]->num] - |
|---|
| 1166 | tree->rates->nd_t[tree->n_root->num]) + |
|---|
| 1167 | FABS(tree->rates->nd_t[tree->n_root->v[2]->num] - |
|---|
| 1168 | tree->rates->nd_t[tree->n_root->num]); |
|---|
| 1169 | |
|---|
| 1170 | tree->n_root->l[1] = FABS(tree->rates->nd_t[tree->n_root->v[1]->num] - |
|---|
| 1171 | tree->rates->nd_t[tree->n_root->num]); |
|---|
| 1172 | |
|---|
| 1173 | tree->n_root->l[2] = FABS(tree->rates->nd_t[tree->n_root->v[2]->num] - |
|---|
| 1174 | tree->rates->nd_t[tree->n_root->num]); |
|---|
| 1175 | |
|---|
| 1176 | tree->n_root_pos = |
|---|
| 1177 | FABS(tree->rates->nd_t[tree->n_root->v[2]->num] - |
|---|
| 1178 | tree->rates->nd_t[tree->n_root->num]) / tree->e_root->l->v; |
|---|
| 1179 | |
|---|
| 1180 | /* printf("\n. %s ",Write_Tree(tree,NO)); */ |
|---|
| 1181 | |
|---|
| 1182 | DR_Draw_Tree("essai.ps",tree); |
|---|
| 1183 | |
|---|
| 1184 | /* For(i,tree->n_otu) */ |
|---|
| 1185 | /* printf("\n. %4s %4d [%5.2f %5.2f]",tree->a_nodes[i]->name, */ |
|---|
| 1186 | /* t->loc[i], */ |
|---|
| 1187 | /* t->ldscape[t->loc[i]*t->n_dim+0], */ |
|---|
| 1188 | /* t->ldscape[t->loc[i]*t->n_dim+1]); */ |
|---|
| 1189 | |
|---|
| 1190 | |
|---|
| 1191 | Update_Ancestors(tree->n_root,tree->n_root->v[2],tree); |
|---|
| 1192 | Update_Ancestors(tree->n_root,tree->n_root->v[1],tree); |
|---|
| 1193 | |
|---|
| 1194 | Free(branching_nodes); |
|---|
| 1195 | Free(p_branch); |
|---|
| 1196 | Free(p_mig); |
|---|
| 1197 | |
|---|
| 1198 | return(tree); |
|---|
| 1199 | } |
|---|
| 1200 | |
|---|
| 1201 | ////////////////////////////////////////////////////////////// |
|---|
| 1202 | ////////////////////////////////////////////////////////////// |
|---|
| 1203 | |
|---|
| 1204 | // Simualte n coordinates (in 2D) |
|---|
| 1205 | void GEO_Simulate_Coordinates(int n, t_geo *t) |
|---|
| 1206 | { |
|---|
| 1207 | int i; |
|---|
| 1208 | phydbl width; |
|---|
| 1209 | |
|---|
| 1210 | width = 5.; |
|---|
| 1211 | |
|---|
| 1212 | For(i,n) |
|---|
| 1213 | { |
|---|
| 1214 | t->ldscape[i*t->n_dim+0] = -width/2. + Uni()*width; |
|---|
| 1215 | t->ldscape[i*t->n_dim+1] = -width/2. + Uni()*width; |
|---|
| 1216 | } |
|---|
| 1217 | |
|---|
| 1218 | |
|---|
| 1219 | /* t->ldscape[0*t->n_dim+0] = 0.0; */ |
|---|
| 1220 | /* t->ldscape[0*t->n_dim+1] = 0.0; */ |
|---|
| 1221 | |
|---|
| 1222 | /* t->ldscape[1*t->n_dim+0] = 0.1; */ |
|---|
| 1223 | /* t->ldscape[1*t->n_dim+1] = 0.1; */ |
|---|
| 1224 | |
|---|
| 1225 | } |
|---|
| 1226 | |
|---|
| 1227 | ////////////////////////////////////////////////////////////// |
|---|
| 1228 | ////////////////////////////////////////////////////////////// |
|---|
| 1229 | |
|---|
| 1230 | void GEO_Optimize_Sigma(t_geo *t, t_tree *tree) |
|---|
| 1231 | { |
|---|
| 1232 | Generic_Brent_Lk(&(t->sigma), |
|---|
| 1233 | t->min_sigma, |
|---|
| 1234 | t->max_sigma, |
|---|
| 1235 | 1.E-5, |
|---|
| 1236 | 1000, |
|---|
| 1237 | NO, |
|---|
| 1238 | GEO_Wrap_Lk,NULL,tree,NULL); |
|---|
| 1239 | } |
|---|
| 1240 | |
|---|
| 1241 | ////////////////////////////////////////////////////////////// |
|---|
| 1242 | ////////////////////////////////////////////////////////////// |
|---|
| 1243 | |
|---|
| 1244 | void GEO_Optimize_Lambda(t_geo *t, t_tree *tree) |
|---|
| 1245 | { |
|---|
| 1246 | Generic_Brent_Lk(&(t->lbda), |
|---|
| 1247 | t->min_lbda, |
|---|
| 1248 | t->max_lbda, |
|---|
| 1249 | 1.E-5, |
|---|
| 1250 | 1000, |
|---|
| 1251 | NO, |
|---|
| 1252 | GEO_Wrap_Lk,NULL,tree,NULL); |
|---|
| 1253 | } |
|---|
| 1254 | |
|---|
| 1255 | ////////////////////////////////////////////////////////////// |
|---|
| 1256 | ////////////////////////////////////////////////////////////// |
|---|
| 1257 | |
|---|
| 1258 | void GEO_Optimize_Tau(t_geo *t, t_tree *tree) |
|---|
| 1259 | { |
|---|
| 1260 | Generic_Brent_Lk(&(t->tau), |
|---|
| 1261 | t->min_tau, |
|---|
| 1262 | t->max_tau, |
|---|
| 1263 | 1.E-5, |
|---|
| 1264 | 1000, |
|---|
| 1265 | NO, |
|---|
| 1266 | GEO_Wrap_Lk,NULL,tree,NULL); |
|---|
| 1267 | } |
|---|
| 1268 | |
|---|
| 1269 | ////////////////////////////////////////////////////////////// |
|---|
| 1270 | ////////////////////////////////////////////////////////////// |
|---|
| 1271 | |
|---|
| 1272 | phydbl GEO_Wrap_Lk(t_edge *b, t_tree *tree, supert_tree *stree) |
|---|
| 1273 | { |
|---|
| 1274 | return GEO_Lk(tree->geo,tree); |
|---|
| 1275 | } |
|---|
| 1276 | |
|---|
| 1277 | ////////////////////////////////////////////////////////////// |
|---|
| 1278 | ////////////////////////////////////////////////////////////// |
|---|
| 1279 | |
|---|
| 1280 | void GEO_Init_Geo_Struct(t_geo *t) |
|---|
| 1281 | { |
|---|
| 1282 | t->c_lnL = UNLIKELY; |
|---|
| 1283 | |
|---|
| 1284 | t->sigma = 1.0; |
|---|
| 1285 | t->min_sigma = 1.E-3; |
|---|
| 1286 | t->max_sigma = 1.E+3; |
|---|
| 1287 | t->sigma_thresh = t->max_sigma; |
|---|
| 1288 | |
|---|
| 1289 | t->lbda = 1.0; |
|---|
| 1290 | t->min_lbda = 1.E-3; |
|---|
| 1291 | t->max_lbda = 1.E+3; |
|---|
| 1292 | |
|---|
| 1293 | t->tau = 1.0; |
|---|
| 1294 | t->min_tau = 1.E-3; |
|---|
| 1295 | t->max_tau = 1.E+3; |
|---|
| 1296 | |
|---|
| 1297 | t->tau = 1.0; |
|---|
| 1298 | |
|---|
| 1299 | t->n_dim = 2; |
|---|
| 1300 | t->ldscape_sz = 1; |
|---|
| 1301 | |
|---|
| 1302 | t->update_fmat = YES; |
|---|
| 1303 | } |
|---|
| 1304 | |
|---|
| 1305 | ////////////////////////////////////////////////////////////// |
|---|
| 1306 | ////////////////////////////////////////////////////////////// |
|---|
| 1307 | |
|---|
| 1308 | void GEO_Randomize_Locations(t_node *n, t_geo *t, t_tree *tree) |
|---|
| 1309 | { |
|---|
| 1310 | if(n->tax == YES) return; |
|---|
| 1311 | else |
|---|
| 1312 | { |
|---|
| 1313 | t_node *v1, *v2; |
|---|
| 1314 | int i; |
|---|
| 1315 | phydbl *probs; // vector of probability of picking each location |
|---|
| 1316 | phydbl sum; |
|---|
| 1317 | phydbl u; |
|---|
| 1318 | |
|---|
| 1319 | |
|---|
| 1320 | probs = (phydbl *)mCalloc(t->ldscape_sz,sizeof(phydbl)); |
|---|
| 1321 | |
|---|
| 1322 | v1 = v2 = NULL; |
|---|
| 1323 | For(i,3) |
|---|
| 1324 | { |
|---|
| 1325 | if(n->v[i] != n->anc && n->b[i] != tree->e_root) |
|---|
| 1326 | { |
|---|
| 1327 | if(!v1) v1 = n->v[i]; |
|---|
| 1328 | else v2 = n->v[i]; |
|---|
| 1329 | } |
|---|
| 1330 | } |
|---|
| 1331 | |
|---|
| 1332 | if(v1->tax && v2->tax) |
|---|
| 1333 | { |
|---|
| 1334 | Free(probs); |
|---|
| 1335 | return; |
|---|
| 1336 | } |
|---|
| 1337 | else if(v1->tax && !v2->tax && t->loc[v1->num] != t->loc[n->num]) |
|---|
| 1338 | { |
|---|
| 1339 | t->loc[v2->num] = t->loc[n->num]; |
|---|
| 1340 | } |
|---|
| 1341 | else if(v2->tax && !v1->tax && t->loc[v2->num] != t->loc[n->num]) |
|---|
| 1342 | { |
|---|
| 1343 | t->loc[v1->num] = t->loc[n->num]; |
|---|
| 1344 | } |
|---|
| 1345 | else if(v1->tax && !v2->tax && t->loc[v1->num] == t->loc[n->num]) |
|---|
| 1346 | { |
|---|
| 1347 | sum = 0.0; |
|---|
| 1348 | For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i]; |
|---|
| 1349 | For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum; |
|---|
| 1350 | |
|---|
| 1351 | t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz); |
|---|
| 1352 | } |
|---|
| 1353 | else if(v2->tax && !v1->tax && t->loc[v2->num] == t->loc[n->num]) |
|---|
| 1354 | { |
|---|
| 1355 | sum = 0.0; |
|---|
| 1356 | For(i,t->ldscape_sz) sum += t->loc_beneath[v1->num * t->ldscape_sz + i]; |
|---|
| 1357 | For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v1->num * t->ldscape_sz + i]/sum; |
|---|
| 1358 | |
|---|
| 1359 | t->loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz); |
|---|
| 1360 | } |
|---|
| 1361 | else |
|---|
| 1362 | { |
|---|
| 1363 | int n_v1, n_v2; |
|---|
| 1364 | phydbl p; |
|---|
| 1365 | |
|---|
| 1366 | n_v1 = t->loc_beneath[v1->num * t->ldscape_sz + t->loc[n->num]]; |
|---|
| 1367 | n_v2 = t->loc_beneath[v2->num * t->ldscape_sz + t->loc[n->num]]; |
|---|
| 1368 | |
|---|
| 1369 | if(n_v1 + n_v2 < 1) |
|---|
| 1370 | { |
|---|
| 1371 | PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); |
|---|
| 1372 | Exit("\n"); |
|---|
| 1373 | } |
|---|
| 1374 | |
|---|
| 1375 | |
|---|
| 1376 | p = n_v1 / (n_v1 + n_v2); |
|---|
| 1377 | |
|---|
| 1378 | u = Uni(); |
|---|
| 1379 | |
|---|
| 1380 | if(u < p) |
|---|
| 1381 | { |
|---|
| 1382 | sum = 0.0; |
|---|
| 1383 | For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i]; |
|---|
| 1384 | For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum; |
|---|
| 1385 | |
|---|
| 1386 | t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz); |
|---|
| 1387 | t->loc[v1->num] = t->loc[n->num]; |
|---|
| 1388 | } |
|---|
| 1389 | else |
|---|
| 1390 | { |
|---|
| 1391 | if(t->loc_beneath[v2->num * t->ldscape_sz + t->loc[n->num]] > 0) |
|---|
| 1392 | { |
|---|
| 1393 | sum = 0.0; |
|---|
| 1394 | For(i,t->ldscape_sz) sum += t->loc_beneath[v1->num * t->ldscape_sz + i]; |
|---|
| 1395 | For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v1->num * t->ldscape_sz + i]/sum; |
|---|
| 1396 | |
|---|
| 1397 | t->loc[v1->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz); |
|---|
| 1398 | t->loc[v2->num] = t->loc[n->num]; |
|---|
| 1399 | } |
|---|
| 1400 | else |
|---|
| 1401 | { |
|---|
| 1402 | sum = 0.0; |
|---|
| 1403 | For(i,t->ldscape_sz) sum += t->loc_beneath[v2->num * t->ldscape_sz + i]; |
|---|
| 1404 | For(i,t->ldscape_sz) probs[i] = t->loc_beneath[v2->num * t->ldscape_sz + i]/sum; |
|---|
| 1405 | |
|---|
| 1406 | t->loc[v2->num] = Sample_i_With_Proba_pi(probs,t->ldscape_sz); |
|---|
| 1407 | t->loc[v1->num] = t->loc[n->num]; |
|---|
| 1408 | } |
|---|
| 1409 | } |
|---|
| 1410 | |
|---|
| 1411 | if(t->loc[v1->num] != t->loc[n->num] && t->loc[v2->num] != t->loc[n->num]) |
|---|
| 1412 | { |
|---|
| 1413 | PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]); |
|---|
| 1414 | PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); |
|---|
| 1415 | Exit("\n"); |
|---|
| 1416 | } |
|---|
| 1417 | |
|---|
| 1418 | if(t->loc_beneath[v1->num * t->ldscape_sz + t->loc[v1->num]] < 1) |
|---|
| 1419 | { |
|---|
| 1420 | PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]); |
|---|
| 1421 | PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); |
|---|
| 1422 | Exit("\n"); |
|---|
| 1423 | } |
|---|
| 1424 | |
|---|
| 1425 | if(t->loc_beneath[v2->num * t->ldscape_sz + t->loc[v2->num]] < 1) |
|---|
| 1426 | { |
|---|
| 1427 | PhyML_Printf("\n. %d %d %d",t->loc[v1->num],t->loc[v2->num],t->loc[n->num]); |
|---|
| 1428 | PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); |
|---|
| 1429 | Exit("\n"); |
|---|
| 1430 | } |
|---|
| 1431 | |
|---|
| 1432 | } |
|---|
| 1433 | |
|---|
| 1434 | Free(probs); |
|---|
| 1435 | |
|---|
| 1436 | For(i,3) |
|---|
| 1437 | if(n->v[i] != n->anc && n->b[i] != tree->e_root) |
|---|
| 1438 | GEO_Randomize_Locations(n->v[i],t,tree); |
|---|
| 1439 | |
|---|
| 1440 | } |
|---|
| 1441 | } |
|---|
| 1442 | |
|---|
| 1443 | ////////////////////////////////////////////////////////////// |
|---|
| 1444 | ////////////////////////////////////////////////////////////// |
|---|
| 1445 | |
|---|
| 1446 | void GEO_Get_Locations_Beneath(t_geo *t, t_tree *tree) |
|---|
| 1447 | { |
|---|
| 1448 | int i; |
|---|
| 1449 | GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[1],t,tree); |
|---|
| 1450 | GEO_Get_Locations_Beneath_Post(tree->n_root,tree->n_root->v[2],t,tree); |
|---|
| 1451 | |
|---|
| 1452 | For(i,t->ldscape_sz) |
|---|
| 1453 | { |
|---|
| 1454 | t->loc_beneath[tree->n_root->num*t->ldscape_sz+i] = |
|---|
| 1455 | t->loc_beneath[tree->n_root->v[1]->num*t->ldscape_sz+i] + |
|---|
| 1456 | t->loc_beneath[tree->n_root->v[2]->num*t->ldscape_sz+i]; |
|---|
| 1457 | } |
|---|
| 1458 | |
|---|
| 1459 | |
|---|
| 1460 | } |
|---|
| 1461 | |
|---|
| 1462 | ////////////////////////////////////////////////////////////// |
|---|
| 1463 | ////////////////////////////////////////////////////////////// |
|---|
| 1464 | |
|---|
| 1465 | void GEO_Get_Locations_Beneath_Post(t_node *a, t_node *d, t_geo *t, t_tree *tree) |
|---|
| 1466 | { |
|---|
| 1467 | |
|---|
| 1468 | if(d->tax) |
|---|
| 1469 | { |
|---|
| 1470 | t->loc_beneath[d->num*t->ldscape_sz+t->loc[d->num]] = 1; |
|---|
| 1471 | return; |
|---|
| 1472 | } |
|---|
| 1473 | else |
|---|
| 1474 | { |
|---|
| 1475 | int i; |
|---|
| 1476 | t_node *v1, *v2; |
|---|
| 1477 | |
|---|
| 1478 | For(i,3) |
|---|
| 1479 | { |
|---|
| 1480 | if(d->v[i] != a && d->b[i] != tree->e_root) |
|---|
| 1481 | { |
|---|
| 1482 | GEO_Get_Locations_Beneath_Post(d,d->v[i],t,tree); |
|---|
| 1483 | } |
|---|
| 1484 | } |
|---|
| 1485 | |
|---|
| 1486 | |
|---|
| 1487 | v1 = v2 = NULL; |
|---|
| 1488 | For(i,3) |
|---|
| 1489 | { |
|---|
| 1490 | if(d->v[i] != a && d->b[i] != tree->e_root) |
|---|
| 1491 | { |
|---|
| 1492 | if(!v1) v1 = d->v[i]; |
|---|
| 1493 | else v2 = d->v[i]; |
|---|
| 1494 | } |
|---|
| 1495 | } |
|---|
| 1496 | |
|---|
| 1497 | |
|---|
| 1498 | For(i,t->ldscape_sz) |
|---|
| 1499 | { |
|---|
| 1500 | t->loc_beneath[ d->num*t->ldscape_sz+i] = |
|---|
| 1501 | t->loc_beneath[v1->num*t->ldscape_sz+i] + |
|---|
| 1502 | t->loc_beneath[v2->num*t->ldscape_sz+i] ; |
|---|
| 1503 | } |
|---|
| 1504 | } |
|---|
| 1505 | } |
|---|
| 1506 | |
|---|
| 1507 | ////////////////////////////////////////////////////////////// |
|---|
| 1508 | ////////////////////////////////////////////////////////////// |
|---|
| 1509 | |
|---|
| 1510 | void GEO_Get_Sigma_Max(t_geo *t) |
|---|
| 1511 | { |
|---|
| 1512 | int i,j; |
|---|
| 1513 | phydbl mean_dist,inv_mean_dist; |
|---|
| 1514 | phydbl sigma_a, sigma_b, sigma_c; |
|---|
| 1515 | phydbl overlap_a, overlap_b, overlap_c; |
|---|
| 1516 | phydbl d_intersect; |
|---|
| 1517 | phydbl overlap_target; |
|---|
| 1518 | phydbl eps; |
|---|
| 1519 | int n_iter,n_iter_max; |
|---|
| 1520 | |
|---|
| 1521 | eps = 1.E-6; |
|---|
| 1522 | overlap_target = 0.95; |
|---|
| 1523 | n_iter_max = 100; |
|---|
| 1524 | |
|---|
| 1525 | mean_dist = -1.; |
|---|
| 1526 | inv_mean_dist = -1.; |
|---|
| 1527 | For(i,t->ldscape_sz-1) |
|---|
| 1528 | { |
|---|
| 1529 | for(j=i+1;j<t->ldscape_sz;j++) |
|---|
| 1530 | { |
|---|
| 1531 | /* dist = POW(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0],1); */ |
|---|
| 1532 | /* if(dist > mean_dist) mean_dist = dist; */ |
|---|
| 1533 | /* dist = POW(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1],1); */ |
|---|
| 1534 | /* if(dist > mean_dist) mean_dist = dist; */ |
|---|
| 1535 | mean_dist += FABS(t->ldscape[i*t->n_dim+0] - t->ldscape[j*t->n_dim+0]); |
|---|
| 1536 | mean_dist += FABS(t->ldscape[i*t->n_dim+1] - t->ldscape[j*t->n_dim+1]); |
|---|
| 1537 | } |
|---|
| 1538 | } |
|---|
| 1539 | |
|---|
| 1540 | mean_dist /= t->ldscape_sz*(t->ldscape_sz-1)/2.; |
|---|
| 1541 | inv_mean_dist = 1./mean_dist; |
|---|
| 1542 | |
|---|
| 1543 | |
|---|
| 1544 | PhyML_Printf("\n. mean_dist = %f",mean_dist); |
|---|
| 1545 | |
|---|
| 1546 | sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = t->max_sigma; |
|---|
| 1547 | /* sigma_a = t->min_sigma; sigma_b = 1.0; sigma_c = 10.; */ |
|---|
| 1548 | n_iter = 0; |
|---|
| 1549 | do |
|---|
| 1550 | { |
|---|
| 1551 | d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_a,0.0,mean_dist); |
|---|
| 1552 | overlap_a = |
|---|
| 1553 | (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(d_intersect,0.0,sigma_a))/ |
|---|
| 1554 | (Pnorm(mean_dist,0.0,sigma_a) - Pnorm(0.0,0.0,sigma_a)) + |
|---|
| 1555 | d_intersect / mean_dist; |
|---|
| 1556 | /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */ |
|---|
| 1557 | |
|---|
| 1558 | d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_b,0.0,mean_dist); |
|---|
| 1559 | overlap_b = |
|---|
| 1560 | (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(d_intersect,0.0,sigma_b))/ |
|---|
| 1561 | (Pnorm(mean_dist,0.0,sigma_b) - Pnorm(0.0,0.0,sigma_b)) + |
|---|
| 1562 | d_intersect / mean_dist; |
|---|
| 1563 | /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */ |
|---|
| 1564 | |
|---|
| 1565 | d_intersect = Inverse_Truncated_Normal(inv_mean_dist,0.0,sigma_c,0.0,mean_dist); |
|---|
| 1566 | overlap_c = |
|---|
| 1567 | (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(d_intersect,0.0,sigma_c))/ |
|---|
| 1568 | (Pnorm(mean_dist,0.0,sigma_c) - Pnorm(0.0,0.0,sigma_c)) + |
|---|
| 1569 | d_intersect / mean_dist; |
|---|
| 1570 | |
|---|
| 1571 | /* printf("\n. inter: %f %f [%f]",d_intersect,mean_dist,d_intersect / mean_dist); */ |
|---|
| 1572 | |
|---|
| 1573 | /* printf("\n. sigma_a:%f overlap_a:%f sigma_b:%f overlap_b:%f sigma_c:%f overlap_c:%f", */ |
|---|
| 1574 | /* sigma_a,overlap_a, */ |
|---|
| 1575 | /* sigma_b,overlap_b, */ |
|---|
| 1576 | /* sigma_c,overlap_c); */ |
|---|
| 1577 | |
|---|
| 1578 | if(overlap_target > overlap_a && overlap_target < overlap_b) |
|---|
| 1579 | { |
|---|
| 1580 | sigma_c = sigma_b; |
|---|
| 1581 | sigma_b = sigma_a + (sigma_c - sigma_a)/2.; |
|---|
| 1582 | } |
|---|
| 1583 | else if(overlap_target > overlap_b && overlap_target < overlap_c) |
|---|
| 1584 | { |
|---|
| 1585 | sigma_a = sigma_b; |
|---|
| 1586 | sigma_b = sigma_a + (sigma_c - sigma_a)/2.; |
|---|
| 1587 | } |
|---|
| 1588 | else if(overlap_target < overlap_a) |
|---|
| 1589 | { |
|---|
| 1590 | sigma_a /= 2.; |
|---|
| 1591 | } |
|---|
| 1592 | else if(overlap_target > overlap_c) |
|---|
| 1593 | { |
|---|
| 1594 | sigma_c *= 2.; |
|---|
| 1595 | } |
|---|
| 1596 | |
|---|
| 1597 | n_iter++; |
|---|
| 1598 | |
|---|
| 1599 | } |
|---|
| 1600 | while(sigma_c - sigma_a > eps && n_iter < n_iter_max); |
|---|
| 1601 | |
|---|
| 1602 | /* if(sigma_c - sigma_a > eps) */ |
|---|
| 1603 | /* { */ |
|---|
| 1604 | /* PhyML_Printf("\n== Error detected in getting maximum value of sigma."); */ |
|---|
| 1605 | /* PhyML_Printf("\n== Err in file %s at line %d\n",__FILE__,__LINE__); */ |
|---|
| 1606 | /* Exit("\n"); */ |
|---|
| 1607 | /* } */ |
|---|
| 1608 | /* else */ |
|---|
| 1609 | /* { */ |
|---|
| 1610 | /* PhyML_Printf("\n== Threshold for sigma: %f",sigma_b); */ |
|---|
| 1611 | /* } */ |
|---|
| 1612 | |
|---|
| 1613 | t->sigma_thresh = sigma_b; |
|---|
| 1614 | } |
|---|
| 1615 | |
|---|
| 1616 | ////////////////////////////////////////////////////////////// |
|---|
| 1617 | ////////////////////////////////////////////////////////////// |
|---|
| 1618 | |
|---|
| 1619 | void MCMC_Geo_Updown_Tau_Lbda(t_tree *tree) |
|---|
| 1620 | { |
|---|
| 1621 | phydbl K,mult,u,alpha,ratio; |
|---|
| 1622 | phydbl cur_lnL,new_lnL; |
|---|
| 1623 | phydbl cur_tau,new_tau; |
|---|
| 1624 | phydbl cur_lbda,new_lbda; |
|---|
| 1625 | |
|---|
| 1626 | K = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_tau_lbda]; |
|---|
| 1627 | cur_lnL = tree->geo->c_lnL; |
|---|
| 1628 | new_lnL = tree->geo->c_lnL; |
|---|
| 1629 | cur_tau = tree->geo->tau; |
|---|
| 1630 | new_tau = tree->geo->tau; |
|---|
| 1631 | cur_lbda = tree->geo->lbda; |
|---|
| 1632 | new_lbda = tree->geo->lbda; |
|---|
| 1633 | |
|---|
| 1634 | u = Uni(); |
|---|
| 1635 | mult = EXP(K*(u-0.5)); |
|---|
| 1636 | |
|---|
| 1637 | /* Multiply tau by K */ |
|---|
| 1638 | new_tau = cur_tau * K; |
|---|
| 1639 | |
|---|
| 1640 | /* Divide lbda by same amount */ |
|---|
| 1641 | new_lbda = cur_lbda / K; |
|---|
| 1642 | |
|---|
| 1643 | |
|---|
| 1644 | if( |
|---|
| 1645 | new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda || |
|---|
| 1646 | new_tau < tree->geo->min_tau || new_tau > tree->geo->max_tau |
|---|
| 1647 | ) |
|---|
| 1648 | { |
|---|
| 1649 | tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++; |
|---|
| 1650 | return; |
|---|
| 1651 | } |
|---|
| 1652 | |
|---|
| 1653 | tree->geo->tau = new_tau; |
|---|
| 1654 | tree->geo->lbda = new_lbda; |
|---|
| 1655 | |
|---|
| 1656 | if(tree->mcmc->use_data) new_lnL = GEO_Lk(tree->geo,tree); |
|---|
| 1657 | |
|---|
| 1658 | ratio = 0.0; |
|---|
| 1659 | /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */ |
|---|
| 1660 | ratio += 0.0*LOG(mult); /* (1-1)*LOG(mult); */ |
|---|
| 1661 | /* Likelihood density ratio */ |
|---|
| 1662 | ratio += (new_lnL - cur_lnL); |
|---|
| 1663 | |
|---|
| 1664 | /* printf("\n. new_tau: %f new_lbda:%f cur_lnL:%f new_lnL:%f",new_tau,new_lbda,cur_lnL,new_lnL); */ |
|---|
| 1665 | |
|---|
| 1666 | |
|---|
| 1667 | ratio = EXP(ratio); |
|---|
| 1668 | alpha = MIN(1.,ratio); |
|---|
| 1669 | u = Uni(); |
|---|
| 1670 | |
|---|
| 1671 | if(u > alpha) /* Reject */ |
|---|
| 1672 | { |
|---|
| 1673 | tree->geo->tau = cur_tau; |
|---|
| 1674 | tree->geo->lbda = cur_lbda; |
|---|
| 1675 | tree->geo->c_lnL = cur_lnL; |
|---|
| 1676 | } |
|---|
| 1677 | else |
|---|
| 1678 | { |
|---|
| 1679 | tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_tau_lbda]++; |
|---|
| 1680 | } |
|---|
| 1681 | |
|---|
| 1682 | tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_tau_lbda]++; |
|---|
| 1683 | } |
|---|
| 1684 | |
|---|
| 1685 | ////////////////////////////////////////////////////////////// |
|---|
| 1686 | ////////////////////////////////////////////////////////////// |
|---|
| 1687 | |
|---|
| 1688 | |
|---|
| 1689 | void MCMC_Geo_Updown_Lbda_Sigma(t_tree *tree) |
|---|
| 1690 | { |
|---|
| 1691 | phydbl K,mult,u,alpha,ratio; |
|---|
| 1692 | phydbl cur_lnL,new_lnL; |
|---|
| 1693 | phydbl cur_lbda,new_lbda; |
|---|
| 1694 | phydbl cur_sigma,new_sigma; |
|---|
| 1695 | |
|---|
| 1696 | K = tree->mcmc->tune_move[tree->mcmc->num_move_geo_updown_lbda_sigma]; |
|---|
| 1697 | cur_lnL = tree->geo->c_lnL; |
|---|
| 1698 | new_lnL = tree->geo->c_lnL; |
|---|
| 1699 | cur_lbda = tree->geo->lbda; |
|---|
| 1700 | new_lbda = tree->geo->lbda; |
|---|
| 1701 | cur_sigma = tree->geo->sigma; |
|---|
| 1702 | new_sigma = tree->geo->sigma; |
|---|
| 1703 | |
|---|
| 1704 | u = Uni(); |
|---|
| 1705 | mult = EXP(K*(u-0.5)); |
|---|
| 1706 | |
|---|
| 1707 | /* Multiply lbda by K */ |
|---|
| 1708 | new_lbda = cur_lbda * K; |
|---|
| 1709 | |
|---|
| 1710 | /* Divide sigma by same amount */ |
|---|
| 1711 | new_sigma = cur_sigma / K; |
|---|
| 1712 | |
|---|
| 1713 | |
|---|
| 1714 | if( |
|---|
| 1715 | new_sigma < tree->geo->min_sigma || new_sigma > tree->geo->max_sigma || |
|---|
| 1716 | new_lbda < tree->geo->min_lbda || new_lbda > tree->geo->max_lbda |
|---|
| 1717 | ) |
|---|
| 1718 | { |
|---|
| 1719 | tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++; |
|---|
| 1720 | return; |
|---|
| 1721 | } |
|---|
| 1722 | |
|---|
| 1723 | tree->geo->lbda = new_lbda; |
|---|
| 1724 | tree->geo->sigma = new_sigma; |
|---|
| 1725 | |
|---|
| 1726 | if(tree->mcmc->use_data) new_lnL = GEO_Lk(tree->geo,tree); |
|---|
| 1727 | |
|---|
| 1728 | ratio = 0.0; |
|---|
| 1729 | /* Proposal ratio: 2n-2=> number of multiplications, 1=>number of divisions */ |
|---|
| 1730 | ratio += 0.0*LOG(mult); /* (1-1)*LOG(mult); */ |
|---|
| 1731 | /* Likelihood density ratio */ |
|---|
| 1732 | ratio += (new_lnL - cur_lnL); |
|---|
| 1733 | |
|---|
| 1734 | /* printf("\n. new_lbda: %f new_sigma:%f cur_lnL:%f new_lnL:%f",new_lbda,new_sigma,cur_lnL,new_lnL); */ |
|---|
| 1735 | |
|---|
| 1736 | |
|---|
| 1737 | ratio = EXP(ratio); |
|---|
| 1738 | alpha = MIN(1.,ratio); |
|---|
| 1739 | u = Uni(); |
|---|
| 1740 | |
|---|
| 1741 | if(u > alpha) /* Reject */ |
|---|
| 1742 | { |
|---|
| 1743 | tree->geo->lbda = cur_lbda; |
|---|
| 1744 | tree->geo->sigma = cur_sigma; |
|---|
| 1745 | tree->geo->c_lnL = cur_lnL; |
|---|
| 1746 | } |
|---|
| 1747 | else |
|---|
| 1748 | { |
|---|
| 1749 | tree->mcmc->acc_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++; |
|---|
| 1750 | } |
|---|
| 1751 | |
|---|
| 1752 | tree->mcmc->run_move[tree->mcmc->num_move_geo_updown_lbda_sigma]++; |
|---|
| 1753 | } |
|---|
| 1754 | |
|---|
| 1755 | ////////////////////////////////////////////////////////////// |
|---|
| 1756 | ////////////////////////////////////////////////////////////// |
|---|
| 1757 | |
|---|
| 1758 | void GEO_Read_In_Landscape(char *file_name, t_geo *t, phydbl **ldscape, int **loc_hash, t_tree *tree) |
|---|
| 1759 | { |
|---|
| 1760 | FILE *fp; |
|---|
| 1761 | char *s,*line; |
|---|
| 1762 | phydbl longitude, lattitude; |
|---|
| 1763 | int i, pos, tax,loc; |
|---|
| 1764 | |
|---|
| 1765 | PhyML_Printf("\n"); |
|---|
| 1766 | PhyML_Printf("\n. Reading landscape file '%s'.\n",file_name); |
|---|
| 1767 | |
|---|
| 1768 | line = (char *)mCalloc(T_MAX_LINE,sizeof(char)); |
|---|
| 1769 | s = (char *)mCalloc(T_MAX_LINE,sizeof(char)); |
|---|
| 1770 | (*ldscape) = (phydbl *)mCalloc(10*t->n_dim,sizeof(phydbl)); |
|---|
| 1771 | (*loc_hash) = (int *)mCalloc(tree->n_otu,sizeof(int)); |
|---|
| 1772 | |
|---|
| 1773 | fp = Openfile(file_name,0); |
|---|
| 1774 | |
|---|
| 1775 | tax = loc = -1; |
|---|
| 1776 | |
|---|
| 1777 | t->ldscape_sz = 0; |
|---|
| 1778 | |
|---|
| 1779 | do |
|---|
| 1780 | { |
|---|
| 1781 | if(!fgets(line,T_MAX_LINE,fp)) break; |
|---|
| 1782 | |
|---|
| 1783 | // Read in taxon name |
|---|
| 1784 | pos = 0; |
|---|
| 1785 | do |
|---|
| 1786 | { |
|---|
| 1787 | while(line[pos] == ' ') pos++; |
|---|
| 1788 | |
|---|
| 1789 | i = 0; |
|---|
| 1790 | s[0] = '\0'; |
|---|
| 1791 | while((line[pos] != ' ') && (line[pos] != '\n') && (line[pos] != '\t')) |
|---|
| 1792 | { |
|---|
| 1793 | s[i] = line[pos]; |
|---|
| 1794 | i++; |
|---|
| 1795 | pos++; |
|---|
| 1796 | } |
|---|
| 1797 | s[i] = '\0'; |
|---|
| 1798 | |
|---|
| 1799 | if(line[pos] == '\n' || line[pos] == ' ') break; |
|---|
| 1800 | pos++; |
|---|
| 1801 | }while(1); |
|---|
| 1802 | |
|---|
| 1803 | if(strlen(s) > 0) For(tax,tree->n_otu) if(!strcmp(tree->a_nodes[tax]->name,s)) break; |
|---|
| 1804 | |
|---|
| 1805 | if(tax == tree->n_otu) |
|---|
| 1806 | { |
|---|
| 1807 | PhyML_Printf("\n== Could not find a taxon with name '%s' in the tree provided.",s); |
|---|
| 1808 | /* PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__); */ |
|---|
| 1809 | /* Exit("\n"); */ |
|---|
| 1810 | continue; |
|---|
| 1811 | } |
|---|
| 1812 | |
|---|
| 1813 | |
|---|
| 1814 | sscanf(line+pos,"%lf %lf",&longitude,&lattitude); |
|---|
| 1815 | |
|---|
| 1816 | For(loc,t->ldscape_sz) |
|---|
| 1817 | { |
|---|
| 1818 | if(FABS(longitude-(*ldscape)[loc*t->n_dim+0]) < 1.E-10 && |
|---|
| 1819 | FABS(lattitude-(*ldscape)[loc*t->n_dim+1]) < 1.E-10) |
|---|
| 1820 | { |
|---|
| 1821 | break; |
|---|
| 1822 | } |
|---|
| 1823 | } |
|---|
| 1824 | |
|---|
| 1825 | if(loc == t->ldscape_sz) |
|---|
| 1826 | { |
|---|
| 1827 | t->ldscape_sz++; |
|---|
| 1828 | (*ldscape)[(t->ldscape_sz-1)*t->n_dim+0] = longitude; |
|---|
| 1829 | (*ldscape)[(t->ldscape_sz-1)*t->n_dim+1] = lattitude; |
|---|
| 1830 | |
|---|
| 1831 | printf("\n. new loc: %f %f",longitude,lattitude); |
|---|
| 1832 | if(!(t->ldscape_sz%10)) |
|---|
| 1833 | { |
|---|
| 1834 | (*ldscape) = (phydbl *)mRealloc((*ldscape),(t->ldscape_sz+10)*t->n_dim,sizeof(phydbl)); |
|---|
| 1835 | } |
|---|
| 1836 | } |
|---|
| 1837 | |
|---|
| 1838 | (*loc_hash)[tax] = loc; |
|---|
| 1839 | |
|---|
| 1840 | } |
|---|
| 1841 | while(1); |
|---|
| 1842 | |
|---|
| 1843 | For(tax,tree->n_otu) |
|---|
| 1844 | PhyML_Printf("\n. Taxon %30s, longitude: %12f, lattitude: %12f [%4d]", |
|---|
| 1845 | tree->a_nodes[tax]->name, |
|---|
| 1846 | (*ldscape)[(*loc_hash)[tax]*t->n_dim+0], |
|---|
| 1847 | (*ldscape)[(*loc_hash)[tax]*t->n_dim+1], |
|---|
| 1848 | (*loc_hash)[tax]); |
|---|
| 1849 | |
|---|
| 1850 | |
|---|
| 1851 | } |
|---|
| 1852 | |
|---|
| 1853 | ////////////////////////////////////////////////////////////// |
|---|
| 1854 | ////////////////////////////////////////////////////////////// |
|---|
| 1855 | |
|---|
| 1856 | |
|---|
| 1857 | |
|---|
| 1858 | |
|---|
| 1859 | ////////////////////////////////////////////////////////////// |
|---|
| 1860 | ////////////////////////////////////////////////////////////// |
|---|
| 1861 | |
|---|
| 1862 | |
|---|
| 1863 | ////////////////////////////////////////////////////////////// |
|---|
| 1864 | ////////////////////////////////////////////////////////////// |
|---|
| 1865 | |
|---|
| 1866 | |
|---|