1 | /* |
---|
2 | |
---|
3 | PHYML : a program that computes maximum likelihood phylogenies from |
---|
4 | DNA or AA homologous sequences |
---|
5 | |
---|
6 | Copyright (C) Stephane Guindon. Oct 2003 onward |
---|
7 | |
---|
8 | All parts of the source except where indicated are distributed under |
---|
9 | the GNU public licence. See http://www.opensource.org for details. |
---|
10 | |
---|
11 | */ |
---|
12 | |
---|
13 | #include "utilities.h" |
---|
14 | #include "lk.h" |
---|
15 | #include "optimiz.h" |
---|
16 | #include "models.h" |
---|
17 | #include "free.h" |
---|
18 | |
---|
19 | /* int LIM_SCALE; */ |
---|
20 | /* double LIM_SCALE_VAL; */ |
---|
21 | /* double MDBL_MAX; */ |
---|
22 | /* double MDBL_MIN; */ |
---|
23 | |
---|
24 | /*********************************************************/ |
---|
25 | |
---|
26 | void Init_Tips_At_One_Site_Nucleotides(char state, double **p_lk) |
---|
27 | { |
---|
28 | switch(state){ |
---|
29 | case 'A' : (*p_lk)[0]=1.; (*p_lk)[1]=(*p_lk)[2]=(*p_lk)[3]=.0; |
---|
30 | break; |
---|
31 | case 'C' : (*p_lk)[1]=1.; (*p_lk)[0]=(*p_lk)[2]=(*p_lk)[3]=.0; |
---|
32 | break; |
---|
33 | case 'G' : (*p_lk)[2]=1.; (*p_lk)[1]=(*p_lk)[0]=(*p_lk)[3]=.0; |
---|
34 | break; |
---|
35 | case 'T' : (*p_lk)[3]=1.; (*p_lk)[1]=(*p_lk)[2]=(*p_lk)[0]=.0; |
---|
36 | break; |
---|
37 | case 'U' : (*p_lk)[3]=1.; (*p_lk)[1]=(*p_lk)[2]=(*p_lk)[0]=.0; |
---|
38 | break; |
---|
39 | case 'M' : (*p_lk)[0]=(*p_lk)[1]=1.; (*p_lk)[2]=(*p_lk)[3]=.0; |
---|
40 | break; |
---|
41 | case 'R' : (*p_lk)[0]=(*p_lk)[2]=1.; (*p_lk)[1]=(*p_lk)[3]=.0; |
---|
42 | break; |
---|
43 | case 'W' : (*p_lk)[0]=(*p_lk)[3]=1.; (*p_lk)[1]=(*p_lk)[2]=.0; |
---|
44 | break; |
---|
45 | case 'S' : (*p_lk)[1]=(*p_lk)[2]=1.; (*p_lk)[0]=(*p_lk)[3]=.0; |
---|
46 | break; |
---|
47 | case 'Y' : (*p_lk)[1]=(*p_lk)[3]=1.; (*p_lk)[0]=(*p_lk)[2]=.0; |
---|
48 | break; |
---|
49 | case 'K' : (*p_lk)[2]=(*p_lk)[3]=1.; (*p_lk)[0]=(*p_lk)[1]=.0; |
---|
50 | break; |
---|
51 | case 'B' : (*p_lk)[1]=(*p_lk)[2]=(*p_lk)[3]=1.; (*p_lk)[0]=.0; |
---|
52 | break; |
---|
53 | case 'D' : (*p_lk)[0]=(*p_lk)[2]=(*p_lk)[3]=1.; (*p_lk)[1]=.0; |
---|
54 | break; |
---|
55 | case 'H' : (*p_lk)[0]=(*p_lk)[1]=(*p_lk)[3]=1.; (*p_lk)[2]=.0; |
---|
56 | break; |
---|
57 | case 'V' : (*p_lk)[0]=(*p_lk)[1]=(*p_lk)[2]=1.; (*p_lk)[3]=.0; |
---|
58 | break; |
---|
59 | case 'N' : case 'X' : case '?' : case 'O' : case '-' : |
---|
60 | (*p_lk)[0]=(*p_lk)[1]=(*p_lk)[2]=(*p_lk)[3]=1.;break; |
---|
61 | default : |
---|
62 | { |
---|
63 | printf("\n. Unknown character state : %c\n",state); |
---|
64 | Exit("\n. Init failed (check the data type)\n"); |
---|
65 | break; |
---|
66 | } |
---|
67 | } |
---|
68 | } |
---|
69 | |
---|
70 | /*********************************************************/ |
---|
71 | |
---|
72 | void Init_Tips_At_One_Site_AA(char aa, double **p_lk) |
---|
73 | { |
---|
74 | int i; |
---|
75 | |
---|
76 | For(i,20) (*p_lk)[i] = .0; |
---|
77 | |
---|
78 | switch(aa){ |
---|
79 | case 'A' : (*p_lk)[0]= 1.; break;/* Alanine */ |
---|
80 | case 'R' : (*p_lk)[1]= 1.; break;/* Arginine */ |
---|
81 | case 'N' : (*p_lk)[2]= 1.; break;/* Asparagine */ |
---|
82 | case 'D' : (*p_lk)[3]= 1.; break;/* Aspartic acid */ |
---|
83 | case 'C' : (*p_lk)[4]= 1.; break;/* Cysteine */ |
---|
84 | case 'Q' : (*p_lk)[5]= 1.; break;/* Glutamine */ |
---|
85 | case 'E' : (*p_lk)[6]= 1.; break;/* Glutamic acid */ |
---|
86 | case 'G' : (*p_lk)[7]= 1.; break;/* Glycine */ |
---|
87 | case 'H' : (*p_lk)[8]= 1.; break;/* Histidine */ |
---|
88 | case 'I' : (*p_lk)[9]= 1.; break;/* Isoleucine */ |
---|
89 | case 'L' : (*p_lk)[10]=1.; break;/* Leucine */ |
---|
90 | case 'K' : (*p_lk)[11]=1.; break;/* Lysine */ |
---|
91 | case 'M' : (*p_lk)[12]=1.; break;/* Methionine */ |
---|
92 | case 'F' : (*p_lk)[13]=1.; break;/* Phenylalanin */ |
---|
93 | case 'P' : (*p_lk)[14]=1.; break;/* Proline */ |
---|
94 | case 'S' : (*p_lk)[15]=1.; break;/* Serine */ |
---|
95 | case 'T' : (*p_lk)[16]=1.; break;/* Threonine */ |
---|
96 | case 'W' : (*p_lk)[17]=1.; break;/* Tryptophan */ |
---|
97 | case 'Y' : (*p_lk)[18]=1.; break;/* Tyrosine */ |
---|
98 | case 'V' : (*p_lk)[19]=1.; break;/* Valine */ |
---|
99 | |
---|
100 | case 'B' : (*p_lk)[2]= 1.; break;/* Asparagine */ |
---|
101 | case 'Z' : (*p_lk)[5]= 1.; break;/* Glutamine */ |
---|
102 | |
---|
103 | case 'X' : case '?' : case '-' : For(i,20) (*p_lk)[i] = 1.; break; |
---|
104 | default : |
---|
105 | { |
---|
106 | printf("\n. Unknown character state : %c\n",aa); |
---|
107 | Exit("\n. Init failed (check the data type)\n"); |
---|
108 | break; |
---|
109 | } |
---|
110 | } |
---|
111 | } |
---|
112 | |
---|
113 | /*********************************************************/ |
---|
114 | |
---|
115 | void Get_All_Partial_Lk(arbre *tree, edge *b_fcus, node *a, node *d) |
---|
116 | { |
---|
117 | int i,j; |
---|
118 | double p1_lk1,p2_lk2; |
---|
119 | double ***p_lk,***p_lk_v1,***p_lk_v2; |
---|
120 | int catg,site; |
---|
121 | double ***Pij1,***Pij2; |
---|
122 | |
---|
123 | if(d->tax) return; |
---|
124 | else |
---|
125 | { |
---|
126 | int dir1,dir2; |
---|
127 | |
---|
128 | dir1=dir2=-1; |
---|
129 | For(i,3) if(d->v[i] != a) (dir1<0)?(dir1=i):(dir2=i); |
---|
130 | |
---|
131 | if(b_fcus->l < BL_MIN) b_fcus->l = BL_MIN; |
---|
132 | |
---|
133 | p_lk = |
---|
134 | (d == b_fcus->left)? |
---|
135 | (b_fcus->p_lk_left): |
---|
136 | (b_fcus->p_lk_rght); |
---|
137 | |
---|
138 | p_lk_v1 = |
---|
139 | (d == d->b[dir1]->left)? |
---|
140 | (d->b[dir1]->p_lk_rght): |
---|
141 | (d->b[dir1]->p_lk_left); |
---|
142 | |
---|
143 | p_lk_v2 = |
---|
144 | (d == d->b[dir2]->left)? |
---|
145 | (d->b[dir2]->p_lk_rght): |
---|
146 | (d->b[dir2]->p_lk_left); |
---|
147 | |
---|
148 | Pij1 = d->b[dir1]->Pij_rr; |
---|
149 | Pij2 = d->b[dir2]->Pij_rr; |
---|
150 | |
---|
151 | For(catg,tree->mod->n_catg) |
---|
152 | { |
---|
153 | For(site,tree->n_pattern) |
---|
154 | { |
---|
155 | For(i,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
156 | { |
---|
157 | p1_lk1 = p2_lk2 = .0; |
---|
158 | For(j,tree->mod->ns) |
---|
159 | { |
---|
160 | p1_lk1 += (Pij1[catg][i][j] * p_lk_v1[site][catg][j]); |
---|
161 | p2_lk2 += (Pij2[catg][i][j] * p_lk_v2[site][catg][j]); |
---|
162 | } |
---|
163 | |
---|
164 | p_lk[site][catg][i] = p1_lk1*p2_lk2; |
---|
165 | |
---|
166 | |
---|
167 | if(p_lk[site][catg][i] < MDBL_MIN) |
---|
168 | { |
---|
169 | printf("\nWARNING : scaling is required at site %d\n",site); |
---|
170 | /* printf("Alpha = %f\n",tree->mod->alpha); */ |
---|
171 | /* Exit(""); */ |
---|
172 | } |
---|
173 | } |
---|
174 | } |
---|
175 | } |
---|
176 | |
---|
177 | } |
---|
178 | } |
---|
179 | |
---|
180 | /*********************************************************/ |
---|
181 | |
---|
182 | void Get_All_Partial_Lk_Scale(arbre *tree, edge *b_fcus, node *a, node *d) |
---|
183 | { |
---|
184 | int i,j; |
---|
185 | double p1_lk1,p2_lk2; |
---|
186 | double ***p_lk,***p_lk_v1,***p_lk_v2; |
---|
187 | int catg,site; |
---|
188 | double ***Pij1,***Pij2; |
---|
189 | double max_p_lk; |
---|
190 | /* double min_p_lk; */ |
---|
191 | double sum_scale_d1,sum_scale_d2; |
---|
192 | double try; |
---|
193 | |
---|
194 | p1_lk1 = p2_lk2 = .0; |
---|
195 | if(d->tax) return; |
---|
196 | else |
---|
197 | { |
---|
198 | int dir1,dir2; |
---|
199 | |
---|
200 | dir1=dir2=-1; |
---|
201 | For(i,3) if(d->v[i] != a) (dir1<0)?(dir1=i):(dir2=i); |
---|
202 | |
---|
203 | if(b_fcus->l < BL_MIN) b_fcus->l = BL_MIN; |
---|
204 | |
---|
205 | p_lk = |
---|
206 | (d == b_fcus->left)? |
---|
207 | (b_fcus->p_lk_left): |
---|
208 | (b_fcus->p_lk_rght); |
---|
209 | |
---|
210 | p_lk_v1 = |
---|
211 | (d == d->b[dir1]->left)? |
---|
212 | (d->b[dir1]->p_lk_rght): |
---|
213 | (d->b[dir1]->p_lk_left); |
---|
214 | |
---|
215 | p_lk_v2 = |
---|
216 | (d == d->b[dir2]->left)? |
---|
217 | (d->b[dir2]->p_lk_rght): |
---|
218 | (d->b[dir2]->p_lk_left); |
---|
219 | |
---|
220 | Pij1 = d->b[dir1]->Pij_rr; |
---|
221 | Pij2 = d->b[dir2]->Pij_rr; |
---|
222 | |
---|
223 | |
---|
224 | |
---|
225 | For(site,tree->n_pattern) |
---|
226 | { |
---|
227 | sum_scale_d1 = sum_scale_d2 = .0; |
---|
228 | |
---|
229 | (d == d->b[dir1]->left)? |
---|
230 | (sum_scale_d1 = d->b[dir1]->sum_scale_f_rght[site]): |
---|
231 | (sum_scale_d1 = d->b[dir1]->sum_scale_f_left[site]); |
---|
232 | |
---|
233 | (d == d->b[dir2]->left)? |
---|
234 | (sum_scale_d2 = d->b[dir2]->sum_scale_f_rght[site]): |
---|
235 | (sum_scale_d2 = d->b[dir2]->sum_scale_f_left[site]); |
---|
236 | |
---|
237 | (d == b_fcus->left)? |
---|
238 | (b_fcus->sum_scale_f_left[site] = sum_scale_d1 + sum_scale_d2): |
---|
239 | (b_fcus->sum_scale_f_rght[site] = sum_scale_d1 + sum_scale_d2); |
---|
240 | |
---|
241 | /* min_p_lk = MDBL_MAX; */ |
---|
242 | max_p_lk = MDBL_MIN; |
---|
243 | For(catg,tree->mod->n_catg) |
---|
244 | { |
---|
245 | For(i,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
246 | { |
---|
247 | p1_lk1 = p2_lk2 = .0; |
---|
248 | For(j,tree->mod->ns) |
---|
249 | { |
---|
250 | p1_lk1 += (Pij1[catg][i][j] * p_lk_v1[site][catg][j]); |
---|
251 | p2_lk2 += (Pij2[catg][i][j] * p_lk_v2[site][catg][j]); |
---|
252 | } |
---|
253 | |
---|
254 | try = p1_lk1*p2_lk2; |
---|
255 | |
---|
256 | p_lk[site][catg][i]=try; |
---|
257 | |
---|
258 | if((p_lk[site][catg][i] > max_p_lk)) max_p_lk = p_lk[site][catg][i]; |
---|
259 | |
---|
260 | /* if((p_lk[site][catg][i] < min_p_lk)) min_p_lk = p_lk[site][catg][i]; */ |
---|
261 | } |
---|
262 | } |
---|
263 | |
---|
264 | |
---|
265 | if(max_p_lk < LIM_SCALE_VAL) |
---|
266 | { |
---|
267 | For(catg,tree->mod->n_catg) |
---|
268 | { |
---|
269 | For(i,tree->mod->ns) |
---|
270 | { |
---|
271 | p_lk[site][catg][i] /= max_p_lk; |
---|
272 | |
---|
273 | if(p_lk[site][catg][i] > MDBL_MAX) |
---|
274 | { |
---|
275 | Exit("\n. Numerical underflow ! (send me an e-mail : s.guindon@auckland.ac.nz)\n"); |
---|
276 | /* p_lk[site][catg][i] = p_lk[site][catg-1][i] ; */ |
---|
277 | } |
---|
278 | } |
---|
279 | } |
---|
280 | |
---|
281 | (d == b_fcus->left)? |
---|
282 | (b_fcus->sum_scale_f_left[site] += log(max_p_lk)): |
---|
283 | (b_fcus->sum_scale_f_rght[site] += log(max_p_lk)); |
---|
284 | } |
---|
285 | |
---|
286 | if(max_p_lk > (1./LIM_SCALE_VAL)) |
---|
287 | { |
---|
288 | For(catg,tree->mod->n_catg) |
---|
289 | { |
---|
290 | For(i,tree->mod->ns) |
---|
291 | { |
---|
292 | p_lk[site][catg][i] /= max_p_lk; |
---|
293 | |
---|
294 | if(p_lk[site][catg][i] < MDBL_MIN) |
---|
295 | { |
---|
296 | Exit("\n. Numerical overflow ! (send me an e-mail : s.guindon@auckland.ac.nz)\n"); |
---|
297 | /* p_lk[site][catg][i] = p_lk[site][catg-1][i] ; */ |
---|
298 | } |
---|
299 | } |
---|
300 | } |
---|
301 | |
---|
302 | (d == b_fcus->left)? |
---|
303 | (b_fcus->sum_scale_f_left[site] += log(max_p_lk)): |
---|
304 | (b_fcus->sum_scale_f_rght[site] += log(max_p_lk)); |
---|
305 | } |
---|
306 | } |
---|
307 | } |
---|
308 | } |
---|
309 | |
---|
310 | |
---|
311 | /*********************************************************/ |
---|
312 | |
---|
313 | void Post_Order_Lk(node *pere, node *fils, arbre *tree) |
---|
314 | { |
---|
315 | int i,dir1,dir2,dir3; |
---|
316 | |
---|
317 | dir1 = dir2 = dir3 = -1; |
---|
318 | |
---|
319 | if(fils->tax) return; |
---|
320 | else |
---|
321 | { |
---|
322 | For(i,3) |
---|
323 | { |
---|
324 | if(fils->v[i] != pere) |
---|
325 | { |
---|
326 | Post_Order_Lk(fils,fils->v[i],tree); |
---|
327 | if(dir1 < 0) dir1 = i; |
---|
328 | else dir2 = i; |
---|
329 | } |
---|
330 | else dir3 = i; |
---|
331 | } |
---|
332 | |
---|
333 | (tree->n_otu > LIM_SCALE)? |
---|
334 | (Get_All_Partial_Lk_Scale(tree,fils->b[dir3],pere,fils)): |
---|
335 | (Get_All_Partial_Lk(tree,fils->b[dir3],pere,fils)); |
---|
336 | } |
---|
337 | } |
---|
338 | |
---|
339 | /*********************************************************/ |
---|
340 | |
---|
341 | void Pre_Order_Lk(node *pere, node *fils, arbre *tree) |
---|
342 | { |
---|
343 | int i,j,dir1,dir2; |
---|
344 | |
---|
345 | dir1 = dir2 = -1; |
---|
346 | |
---|
347 | if(fils->tax) return; |
---|
348 | else |
---|
349 | { |
---|
350 | For(i,3) |
---|
351 | { |
---|
352 | if(fils->v[i] != pere) |
---|
353 | { |
---|
354 | For(j,3) |
---|
355 | { |
---|
356 | if(j != i) |
---|
357 | { |
---|
358 | if(dir1 < 0) dir1 = j; |
---|
359 | else dir2 = j; |
---|
360 | } |
---|
361 | } |
---|
362 | |
---|
363 | (tree->n_otu > LIM_SCALE)? |
---|
364 | (Get_All_Partial_Lk_Scale(tree,fils->b[i],fils->v[i],fils)): |
---|
365 | (Get_All_Partial_Lk(tree,fils->b[i],fils->v[i],fils)); |
---|
366 | dir1 = dir2 = -1; |
---|
367 | Pre_Order_Lk(fils,fils->v[i],tree); |
---|
368 | } |
---|
369 | } |
---|
370 | } |
---|
371 | } |
---|
372 | |
---|
373 | /*********************************************************/ |
---|
374 | |
---|
375 | void Lk(arbre *tree,allseq *alldata) |
---|
376 | { |
---|
377 | int br,site,catg; |
---|
378 | double len; |
---|
379 | |
---|
380 | Set_Model_Parameters(tree); |
---|
381 | |
---|
382 | For(br,2*tree->n_otu-3) |
---|
383 | { |
---|
384 | For(site,tree->n_pattern) |
---|
385 | { |
---|
386 | tree->t_edges[br]->sum_scale_f_rght[site] = .0; |
---|
387 | tree->t_edges[br]->sum_scale_f_left[site] = .0; |
---|
388 | } |
---|
389 | |
---|
390 | |
---|
391 | if(tree->t_edges[br]->l < BL_MIN) tree->t_edges[br]->l = BL_MIN; |
---|
392 | |
---|
393 | if(tree->t_edges[br]->l > BL_MAX) tree->t_edges[br]->l = BL_MAX; |
---|
394 | |
---|
395 | |
---|
396 | |
---|
397 | For(catg,tree->mod->n_catg) |
---|
398 | { |
---|
399 | len = tree->t_edges[br]->l*tree->mod->rr[catg]; |
---|
400 | if(len < BL_MIN) len = BL_MIN; |
---|
401 | |
---|
402 | PMat(len, |
---|
403 | tree->mod, |
---|
404 | &tree->t_edges[br]->Pij_rr[catg]); |
---|
405 | |
---|
406 | } |
---|
407 | } |
---|
408 | |
---|
409 | Post_Order_Lk(tree->noeud[0],tree->noeud[0]->v[0],tree); |
---|
410 | if(tree->both_sides) |
---|
411 | Pre_Order_Lk(tree->noeud[0], |
---|
412 | tree->noeud[0]->v[0], |
---|
413 | tree); |
---|
414 | |
---|
415 | tree->tot_loglk = .0; |
---|
416 | tree->curr_catg = 0; |
---|
417 | tree->curr_site = 0; |
---|
418 | |
---|
419 | For(site,tree->n_pattern) |
---|
420 | { |
---|
421 | tree->tot_loglk_sorted[site] = .0; |
---|
422 | tree->site_lk[site] = .0; |
---|
423 | tree->curr_site = site; |
---|
424 | Site_Lk(tree,alldata); |
---|
425 | } |
---|
426 | |
---|
427 | qksort(tree->tot_loglk_sorted, 0, tree->n_pattern-1); |
---|
428 | /* qsort(tree->tot_loglk_sorted,tree->n_pattern,sizeof(double),Sort_Double_Decrease); */ |
---|
429 | |
---|
430 | tree->tot_loglk = .0; |
---|
431 | For(site, tree->n_pattern) |
---|
432 | { |
---|
433 | if(tree->tot_loglk_sorted[site] < .0) /* WARNING : change cautiously */ |
---|
434 | tree->tot_loglk += tree->tot_loglk_sorted[site]; |
---|
435 | } |
---|
436 | |
---|
437 | |
---|
438 | For(br,2*tree->n_otu-3) |
---|
439 | { |
---|
440 | if(tree->t_edges[br]->get_p_lk_left) tree->t_edges[br]->ud_p_lk_left = 1; |
---|
441 | if(tree->t_edges[br]->get_p_lk_rght) tree->t_edges[br]->ud_p_lk_rght = 1; |
---|
442 | } |
---|
443 | } |
---|
444 | |
---|
445 | /*********************************************************/ |
---|
446 | |
---|
447 | void Site_Lk(arbre *tree, allseq *alldata) |
---|
448 | { |
---|
449 | int j,k,l,m; |
---|
450 | double *site_dlk=NULL, *site_d2lk=NULL; |
---|
451 | double log_site_lk,site_lk,site_lk_previous,aux; |
---|
452 | int left; |
---|
453 | edge *eroot; |
---|
454 | int is_ambigu; |
---|
455 | int state_root, state_elsewhere, site = tree->curr_site; |
---|
456 | |
---|
457 | |
---|
458 | if(alldata->wght[site] < MDBL_MIN) |
---|
459 | { |
---|
460 | tree->tot_loglk_sorted[tree->curr_site] = 1.; /* WARNING : change cautiously */ |
---|
461 | return; |
---|
462 | } |
---|
463 | |
---|
464 | |
---|
465 | if(tree->mod->s_opt->opt_bl) |
---|
466 | { |
---|
467 | site_dlk = (double *)mCalloc(2*tree->n_otu-3,sizeof(double)); |
---|
468 | site_d2lk = (double *)mCalloc(2*tree->n_otu-3,sizeof(double)); |
---|
469 | } |
---|
470 | |
---|
471 | |
---|
472 | eroot = tree->noeud[0]->b[0]; |
---|
473 | (eroot->rght->tax)?(left=1):(left=0); |
---|
474 | |
---|
475 | /* state = tree->data->invar[site]; */ |
---|
476 | is_ambigu = alldata->ambigu[site]; |
---|
477 | |
---|
478 | state_root = -1; |
---|
479 | state_root = Assign_State(alldata->c_seq[eroot->rght->num]->state + site, |
---|
480 | tree->mod->datatype, |
---|
481 | tree->mod->stepsize); |
---|
482 | |
---|
483 | state_elsewhere = -1; |
---|
484 | state_elsewhere = tree->data->invar[site]; |
---|
485 | |
---|
486 | /**/ |
---|
487 | /* is_ambigu = 1; */ |
---|
488 | /**/ |
---|
489 | |
---|
490 | |
---|
491 | if(tree->mod->s_opt->opt_bl) |
---|
492 | For(j,2*tree->n_otu-3) site_dlk[j] = site_d2lk[j] = 0.0; |
---|
493 | |
---|
494 | log_site_lk = site_lk = .0; |
---|
495 | site_lk_previous = .0; |
---|
496 | |
---|
497 | For(j,tree->mod->n_catg) |
---|
498 | { |
---|
499 | if(is_ambigu) |
---|
500 | { |
---|
501 | For(k,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
502 | { |
---|
503 | For(l,tree->mod->ns) |
---|
504 | { |
---|
505 | site_lk += |
---|
506 | tree->mod->r_proba[j] * |
---|
507 | tree->mod->pi[k] * |
---|
508 | eroot->p_lk_left[site][j][k] * |
---|
509 | eroot->Pij_rr[j][k][l] * |
---|
510 | eroot->p_lk_rght[site][j][l]; |
---|
511 | } |
---|
512 | } |
---|
513 | } |
---|
514 | else |
---|
515 | { |
---|
516 | For(k,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
517 | { |
---|
518 | site_lk += |
---|
519 | tree->mod->r_proba[j] * |
---|
520 | tree->mod->pi[k] * |
---|
521 | eroot->p_lk_left[site][j][k] * |
---|
522 | eroot->Pij_rr[j][k][state_root]; |
---|
523 | } |
---|
524 | } |
---|
525 | |
---|
526 | tree->log_site_lk_cat[j][tree->curr_site] = (site_lk/tree->mod->r_proba[j] - site_lk_previous); |
---|
527 | site_lk_previous = site_lk/tree->mod->r_proba[j]; |
---|
528 | |
---|
529 | if(tree->mod->s_opt->opt_bl) |
---|
530 | { |
---|
531 | For(k,2*tree->n_otu-3) |
---|
532 | { |
---|
533 | tree->t_edges[k]->site_dlk_rr[j] = .0; |
---|
534 | tree->t_edges[k]->site_d2lk_rr[j] = .0; |
---|
535 | |
---|
536 | For(l,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
537 | { |
---|
538 | For(m,tree->mod->ns) |
---|
539 | { |
---|
540 | tree->t_edges[k]->site_dlk_rr[j] += |
---|
541 | tree->mod->pi[l] * |
---|
542 | tree->t_edges[k]->p_lk_left[site][j][l]* |
---|
543 | tree->t_edges[k]->p_lk_rght[site][j][m]* |
---|
544 | tree->t_edges[k]->dPij_rr[j][l][m]; |
---|
545 | |
---|
546 | |
---|
547 | tree->t_edges[k]->site_d2lk_rr[j] += |
---|
548 | tree->mod->pi[l] * |
---|
549 | tree->t_edges[k]->p_lk_left[site][j][l]* |
---|
550 | tree->t_edges[k]->p_lk_rght[site][j][m]* |
---|
551 | tree->t_edges[k]->d2Pij_rr[j][l][m]; |
---|
552 | } |
---|
553 | } |
---|
554 | |
---|
555 | site_dlk[k] += tree->t_edges[k]->site_dlk_rr[j]* |
---|
556 | tree->mod->r_proba[j]; |
---|
557 | |
---|
558 | site_d2lk[k] += tree->t_edges[k]->site_d2lk_rr[j]* |
---|
559 | tree->mod->r_proba[j]; |
---|
560 | } |
---|
561 | } |
---|
562 | } |
---|
563 | |
---|
564 | |
---|
565 | |
---|
566 | /* code 2.3 begin*/ |
---|
567 | |
---|
568 | if (!tree->mod->invar) |
---|
569 | { |
---|
570 | log_site_lk += log(site_lk) |
---|
571 | + eroot->sum_scale_f_left[site] + eroot->sum_scale_f_rght[site]; |
---|
572 | } |
---|
573 | else |
---|
574 | { |
---|
575 | if ((double)tree->data->invar[site] > -0.5) |
---|
576 | { |
---|
577 | if (!(eroot->sum_scale_f_left[site] + eroot->sum_scale_f_rght[site]==0.0)) |
---|
578 | site_lk *= exp(eroot->sum_scale_f_left[site] + eroot->sum_scale_f_rght[site]); |
---|
579 | |
---|
580 | log_site_lk = log(site_lk*(1.0-tree->mod->pinvar) + tree->mod->pinvar*tree->mod->pi[state_elsewhere]); |
---|
581 | } |
---|
582 | else |
---|
583 | { |
---|
584 | log_site_lk = log(site_lk*(1.0-tree->mod->pinvar)) |
---|
585 | + eroot->sum_scale_f_left[site] + eroot->sum_scale_f_rght[site]; |
---|
586 | } |
---|
587 | } |
---|
588 | |
---|
589 | For(j,tree->mod->n_catg) |
---|
590 | tree->log_site_lk_cat[j][tree->curr_site] = log(tree->log_site_lk_cat[j][tree->curr_site]) + |
---|
591 | + eroot->sum_scale_f_left[site] + eroot->sum_scale_f_rght[site]; |
---|
592 | |
---|
593 | |
---|
594 | /* code 2.3 end*/ |
---|
595 | |
---|
596 | if(log_site_lk < -MDBL_MAX) |
---|
597 | { |
---|
598 | printf("%d %E %f %f %f %f\n", |
---|
599 | site, |
---|
600 | log_site_lk, |
---|
601 | tree->mod->alpha, |
---|
602 | eroot->sum_scale_f_left[site], |
---|
603 | eroot->sum_scale_f_rght[site], |
---|
604 | tree->mod->pinvar); |
---|
605 | Exit("\nlog_site_lk < -MDBL_MAX\n"); |
---|
606 | } |
---|
607 | |
---|
608 | |
---|
609 | tree->site_lk[site] = log_site_lk; |
---|
610 | |
---|
611 | |
---|
612 | if(tree->mod->s_opt->opt_bl) |
---|
613 | { |
---|
614 | For(k,2*tree->n_otu-3) |
---|
615 | { |
---|
616 | aux = exp(tree->t_edges[k]->sum_scale_f_rght[site]+ |
---|
617 | tree->t_edges[k]->sum_scale_f_left[site]); |
---|
618 | site_dlk[k] *= aux; |
---|
619 | site_d2lk[k] *= aux; |
---|
620 | |
---|
621 | tree->t_edges[k]->site_dlk = site_dlk[k]; |
---|
622 | tree->t_edges[k]->site_d2lk = site_d2lk[k]; |
---|
623 | } |
---|
624 | } |
---|
625 | |
---|
626 | /* tree->tot_loglk += alldata->wght[site]*log_site_lk; */ |
---|
627 | tree->tot_loglk_sorted[site] = alldata->wght[site]*log_site_lk; |
---|
628 | |
---|
629 | |
---|
630 | if((tree->mod->s_opt->opt_bl) && |
---|
631 | (fabs(site_lk = exp(log_site_lk)) > sqrt(MDBL_MIN))) |
---|
632 | { |
---|
633 | For(k,2*tree->n_otu-3) |
---|
634 | { |
---|
635 | aux = site_dlk[k] / site_lk; |
---|
636 | tree->tot_dloglk[k] += alldata->wght[site] |
---|
637 | * aux; |
---|
638 | tree->tot_d2loglk[k] += alldata->wght[site] |
---|
639 | * (site_d2lk[k]/site_lk |
---|
640 | - aux*aux); |
---|
641 | } |
---|
642 | } |
---|
643 | |
---|
644 | if(tree->mod->s_opt->opt_bl) |
---|
645 | { |
---|
646 | Free(site_dlk); |
---|
647 | Free(site_d2lk); |
---|
648 | } |
---|
649 | } |
---|
650 | |
---|
651 | /*********************************************************/ |
---|
652 | |
---|
653 | double Lk_At_Given_Edge(arbre *tree, edge *b_fcus) |
---|
654 | { |
---|
655 | int site,catg,k,l,edge_num, ns = tree->mod->ns; |
---|
656 | double site_lk,log_site_lk,site_dlk,site_d2lk,aux; |
---|
657 | |
---|
658 | edge_num = b_fcus->num; |
---|
659 | tree->tot_loglk = log_site_lk = .0; |
---|
660 | tree->tot_dloglk[edge_num] = tree->tot_d2loglk[edge_num] = .0; |
---|
661 | tree->n_pattern = tree->data->crunch_len/tree->mod->stepsize; |
---|
662 | |
---|
663 | |
---|
664 | if(b_fcus->l < BL_MIN) b_fcus->l = BL_MIN; |
---|
665 | if(b_fcus->l > BL_MAX) b_fcus->l = BL_MAX; |
---|
666 | |
---|
667 | For(catg,tree->mod->n_catg) |
---|
668 | { |
---|
669 | aux = b_fcus->l*tree->mod->rr[catg]; |
---|
670 | if(aux < BL_MIN) aux = BL_MIN; |
---|
671 | PMat(aux, tree->mod,&b_fcus->Pij_rr[catg]); |
---|
672 | } |
---|
673 | |
---|
674 | |
---|
675 | if((tree->mod->s_opt->opt_bl) && (tree->mod->datatype == NT)) |
---|
676 | { |
---|
677 | For(catg,tree->mod->n_catg) |
---|
678 | { |
---|
679 | dPMat(tree->t_edges[edge_num]->l, |
---|
680 | tree->mod->rr[catg], |
---|
681 | tree->mod, |
---|
682 | &b_fcus->dPij_rr[catg]); |
---|
683 | d2PMat(tree->t_edges[edge_num]->l, |
---|
684 | tree->mod->rr[catg], |
---|
685 | tree->mod, |
---|
686 | &b_fcus->d2Pij_rr[catg]); |
---|
687 | } |
---|
688 | } |
---|
689 | |
---|
690 | |
---|
691 | For(site,tree->data->crunch_len) |
---|
692 | { |
---|
693 | if(tree->data->wght[site]) |
---|
694 | { |
---|
695 | log_site_lk = site_lk = .0; |
---|
696 | /* see equation (2) in phyml_tech_doc.pdf */ |
---|
697 | For(catg,tree->mod->n_catg) |
---|
698 | { |
---|
699 | For(k,ns) /*sort sum terms ? No global effect*/ |
---|
700 | { |
---|
701 | For(l,ns) |
---|
702 | { |
---|
703 | site_lk += |
---|
704 | tree->mod->r_proba[catg] * |
---|
705 | tree->mod->pi[k] * |
---|
706 | b_fcus->p_lk_left[site][catg][k] * |
---|
707 | b_fcus->p_lk_rght[site][catg][l] * |
---|
708 | b_fcus->Pij_rr[catg][k][l]; |
---|
709 | } |
---|
710 | } |
---|
711 | } |
---|
712 | |
---|
713 | /* code 2.3 begin*//* see equations in phyml_tech_doc.pdf */ |
---|
714 | /* compute log_site_lk */ |
---|
715 | if (!tree->mod->invar) |
---|
716 | { |
---|
717 | log_site_lk += log(site_lk) |
---|
718 | + b_fcus->sum_scale_f_left[site] + b_fcus->sum_scale_f_rght[site]; |
---|
719 | } |
---|
720 | else |
---|
721 | { |
---|
722 | if ((double)tree->data->invar[site] > -0.5) |
---|
723 | { |
---|
724 | if (!(b_fcus->sum_scale_f_left[site] + b_fcus->sum_scale_f_rght[site]==0.0)) |
---|
725 | site_lk *= exp(b_fcus->sum_scale_f_left[site] + b_fcus->sum_scale_f_rght[site]); |
---|
726 | |
---|
727 | log_site_lk = log(site_lk*(1.0-tree->mod->pinvar) + tree->mod->pinvar*tree->mod->pi[tree->data->invar[site]]); |
---|
728 | } |
---|
729 | else |
---|
730 | { |
---|
731 | log_site_lk = log(site_lk*(1.0-tree->mod->pinvar)) |
---|
732 | + b_fcus->sum_scale_f_left[site] + b_fcus->sum_scale_f_rght[site]; |
---|
733 | } |
---|
734 | } |
---|
735 | /* code 2.3 end*/ |
---|
736 | |
---|
737 | if(log_site_lk < -MDBL_MAX) Exit("\nlog_site_lk < -MDBL_MAX\n"); |
---|
738 | |
---|
739 | tree->site_lk[site] = log_site_lk; |
---|
740 | |
---|
741 | /*tree->tot_loglk += *//* old code */ |
---|
742 | tree->tot_loglk_sorted[site] = /* code 2.3 */ |
---|
743 | tree->data->wght[site]*log_site_lk; |
---|
744 | } |
---|
745 | else tree->tot_loglk_sorted[site] = 1.; /* WARNING : change cautiously */ |
---|
746 | } |
---|
747 | |
---|
748 | /* code 2.3 begin*/ |
---|
749 | /* sort and add numbers from smallest to biggest */ |
---|
750 | |
---|
751 | qksort(tree->tot_loglk_sorted, 0, tree->n_pattern-1); |
---|
752 | /* qsort(tree->tot_loglk_sorted,tree->n_pattern,sizeof(double),Sort_Double_Decrease); */ |
---|
753 | |
---|
754 | tree->tot_loglk = .0; |
---|
755 | For(k, tree->data->crunch_len) |
---|
756 | if(tree->tot_loglk_sorted[k] < .0) /* WARNING : change cautiously */ |
---|
757 | tree->tot_loglk += tree->tot_loglk_sorted[k]; |
---|
758 | /* code 2.3 end*/ |
---|
759 | |
---|
760 | if((tree->mod->s_opt->opt_bl) && (tree->mod->datatype == NT)) |
---|
761 | { |
---|
762 | For(site,tree->n_pattern) |
---|
763 | { |
---|
764 | if(tree->data->wght[site]) |
---|
765 | { |
---|
766 | site_dlk = site_d2lk = .0; |
---|
767 | b_fcus->site_dlk = b_fcus->site_d2lk = .0; |
---|
768 | |
---|
769 | For(catg,tree->mod->n_catg) |
---|
770 | { |
---|
771 | For(k,ns) /*sort sum terms ? No global effect*/ |
---|
772 | { |
---|
773 | For(l,ns) |
---|
774 | { |
---|
775 | aux = tree->mod->r_proba[catg] * |
---|
776 | tree->mod->pi[k] * |
---|
777 | b_fcus->p_lk_left[site][catg][k]; |
---|
778 | |
---|
779 | site_dlk += |
---|
780 | aux * |
---|
781 | b_fcus->p_lk_rght[site][catg][l] * |
---|
782 | b_fcus->dPij_rr[catg][k][l]; |
---|
783 | |
---|
784 | site_d2lk += |
---|
785 | aux * |
---|
786 | b_fcus->p_lk_rght[site][catg][l] * |
---|
787 | b_fcus->d2Pij_rr[catg][k][l]; |
---|
788 | } |
---|
789 | } |
---|
790 | } |
---|
791 | |
---|
792 | if(tree->n_otu > LIM_SCALE) |
---|
793 | { |
---|
794 | aux = exp(b_fcus->sum_scale_f_rght[site]+ |
---|
795 | b_fcus->sum_scale_f_left[site]); |
---|
796 | site_dlk *= aux; |
---|
797 | site_d2lk *= aux; |
---|
798 | } |
---|
799 | |
---|
800 | b_fcus->site_dlk = site_dlk; |
---|
801 | b_fcus->site_d2lk = site_d2lk; |
---|
802 | |
---|
803 | if(fabs(exp(log_site_lk)) > sqrt(MDBL_MIN)) |
---|
804 | { |
---|
805 | aux = site_dlk / tree->site_lk[site]; |
---|
806 | tree->tot_dloglk[edge_num] += tree->data->wght[site] |
---|
807 | * aux; |
---|
808 | tree->tot_d2loglk[edge_num] += tree->data->wght[site] |
---|
809 | * (site_d2lk/tree->site_lk[site] |
---|
810 | - aux*aux); |
---|
811 | } |
---|
812 | } |
---|
813 | } |
---|
814 | } |
---|
815 | |
---|
816 | return tree->tot_loglk; |
---|
817 | } |
---|
818 | |
---|
819 | /*********************************************************/ |
---|
820 | |
---|
821 | void Update_P(arbre *tree, int t_edge_num) |
---|
822 | { |
---|
823 | int i; |
---|
824 | double len; |
---|
825 | |
---|
826 | len = -1.0; |
---|
827 | For(i,tree->mod->n_catg) |
---|
828 | { |
---|
829 | tree->curr_catg = i; |
---|
830 | len = tree->t_edges[t_edge_num]->l*tree->mod->rr[i]; |
---|
831 | if(len < BL_MIN) len = BL_MIN; |
---|
832 | tree->mod->update_eigen = 0; |
---|
833 | PMat(len,tree->mod,&tree->t_edges[t_edge_num]->Pij_rr[i]); |
---|
834 | /* Derivatives(tree->t_edges[t_edge_num],tree); */ |
---|
835 | } |
---|
836 | } |
---|
837 | |
---|
838 | /*********************************************************/ |
---|
839 | |
---|
840 | double Return_Lk(arbre *tree) |
---|
841 | { |
---|
842 | Lk(tree,tree->data); |
---|
843 | return tree->tot_loglk; |
---|
844 | } |
---|
845 | |
---|
846 | /*********************************************************/ |
---|
847 | |
---|
848 | double Return_Abs_Lk(arbre *tree) |
---|
849 | { |
---|
850 | Lk(tree,tree->data); |
---|
851 | return fabs(tree->tot_loglk); |
---|
852 | } |
---|
853 | |
---|
854 | /*********************************************************/ |
---|
855 | |
---|
856 | matrix *ML_Dist(allseq *data, model *mod) |
---|
857 | { |
---|
858 | int j,k,l; |
---|
859 | double init; |
---|
860 | int n_catg; |
---|
861 | double d_max; |
---|
862 | matrix *mat; |
---|
863 | allseq *twodata,*tmpdata; |
---|
864 | |
---|
865 | tmpdata = (allseq *)mCalloc(1,sizeof(allseq)); |
---|
866 | tmpdata->n_otu=2; |
---|
867 | tmpdata->c_seq = (seq **)mCalloc(2,sizeof(seq *)); |
---|
868 | tmpdata->b_frq = (double *)mCalloc(mod->ns,sizeof(double)); |
---|
869 | tmpdata->ambigu = (int *)mCalloc(data->crunch_len,sizeof(int)); |
---|
870 | |
---|
871 | tmpdata->crunch_len = tmpdata->init_len = data->crunch_len; |
---|
872 | |
---|
873 | |
---|
874 | mat = |
---|
875 | (mod->datatype == NT) ? |
---|
876 | ((mod->whichmodel < 10)?(K2P_dist(data,2000)):(JC69_Dist(data,mod))): |
---|
877 | (JC69_Dist(data,mod)); |
---|
878 | |
---|
879 | n_catg = -1; |
---|
880 | |
---|
881 | |
---|
882 | For(j,data->n_otu-1) |
---|
883 | { |
---|
884 | tmpdata->c_seq[0]=data->c_seq[j]; |
---|
885 | tmpdata->c_seq[0]->name=data->c_seq[j]->name; |
---|
886 | tmpdata->wght = data->wght; |
---|
887 | |
---|
888 | for(k=j+1;k<data->n_otu;k++) |
---|
889 | { |
---|
890 | tmpdata->c_seq[1]=data->c_seq[k]; |
---|
891 | tmpdata->c_seq[1]->name=data->c_seq[k]->name; |
---|
892 | |
---|
893 | twodata = Compact_CSeq(tmpdata,mod); |
---|
894 | For(l,mod->ns) twodata->b_frq[l]=data->b_frq[l]; |
---|
895 | |
---|
896 | Check_Ambiguities(twodata,mod->datatype,1); |
---|
897 | |
---|
898 | Hide_Ambiguities(twodata); |
---|
899 | |
---|
900 | init = mat->dist[j][k]; |
---|
901 | if((init == DIST_MAX) || (init < .0)) init = 0.1; |
---|
902 | |
---|
903 | |
---|
904 | n_catg = mod->n_catg; |
---|
905 | mod->n_catg = 1; |
---|
906 | |
---|
907 | |
---|
908 | /* BRENT */ |
---|
909 | d_max = Optimize_Dist(mod,init,twodata); |
---|
910 | |
---|
911 | /* d_max = init; */ |
---|
912 | |
---|
913 | /* NEWTON-RAPHSON */ |
---|
914 | /* if(d_max < .0) */ |
---|
915 | /* { */ |
---|
916 | /* d_max = Optimize_One_Dist(twodata,0,1,init,mod); */ |
---|
917 | /* d_max = init; */ |
---|
918 | |
---|
919 | /* } */ |
---|
920 | |
---|
921 | mod->n_catg = n_catg; |
---|
922 | |
---|
923 | |
---|
924 | if(d_max >= DIST_MAX) |
---|
925 | { |
---|
926 | printf("\n. Large distance encountered between %s and %s sequences\n", |
---|
927 | tmpdata->c_seq[1]->name, |
---|
928 | tmpdata->c_seq[0]->name); |
---|
929 | d_max = DIST_MAX; |
---|
930 | } |
---|
931 | |
---|
932 | mat->dist[j][k] = d_max; |
---|
933 | mat->dist[k][j] = mat->dist[j][k]; |
---|
934 | Free_Cseq(twodata); |
---|
935 | } |
---|
936 | } |
---|
937 | |
---|
938 | |
---|
939 | Free(tmpdata->ambigu); |
---|
940 | Free(tmpdata->b_frq); |
---|
941 | Free(tmpdata->c_seq); |
---|
942 | free(tmpdata); |
---|
943 | |
---|
944 | return mat; |
---|
945 | } |
---|
946 | |
---|
947 | /*********************************************************/ |
---|
948 | |
---|
949 | double Lk_Given_Two_Seq(allseq *data, int numseq1, int numseq2, double dist, model *mod, |
---|
950 | double *loglk, double *dloglk, double *d2loglk) |
---|
951 | { |
---|
952 | seq *seq1,*seq2; |
---|
953 | double site_lk,site_dlk,site_d2lk,log_site_lk; |
---|
954 | int i,j,k,l; |
---|
955 | double **p_lk_l,**p_lk_r; |
---|
956 | double len; |
---|
957 | |
---|
958 | DiscreteGamma (mod->r_proba, mod->rr, mod->alpha, |
---|
959 | mod->alpha,mod->n_catg,0); |
---|
960 | |
---|
961 | |
---|
962 | |
---|
963 | seq1 = data->c_seq[numseq1]; |
---|
964 | seq2 = data->c_seq[numseq2]; |
---|
965 | |
---|
966 | p_lk_l = (double **)mCalloc(data->c_seq[0]->len,sizeof(double *)); |
---|
967 | p_lk_r = (double **)mCalloc(data->c_seq[0]->len,sizeof(double *)); |
---|
968 | |
---|
969 | For(i,data->c_seq[0]->len) |
---|
970 | { |
---|
971 | p_lk_l[i] = (double *)mCalloc(mod->ns,sizeof(double)); |
---|
972 | p_lk_r[i] = (double *)mCalloc(mod->ns,sizeof(double)); |
---|
973 | } |
---|
974 | |
---|
975 | |
---|
976 | if(dist < BL_MIN) dist = BL_START; |
---|
977 | |
---|
978 | For(i,mod->n_catg) |
---|
979 | { |
---|
980 | len = dist*mod->rr[i]; |
---|
981 | if(len < BL_MIN) len = BL_MIN; |
---|
982 | PMat(len,mod,&(mod->Pij_rr[i])); |
---|
983 | } |
---|
984 | |
---|
985 | |
---|
986 | if(mod->datatype == NT) |
---|
987 | { |
---|
988 | For(i,mod->n_catg) |
---|
989 | { |
---|
990 | dPMat(dist,mod->rr[i],mod,&(mod->dPij_rr[i])); |
---|
991 | d2PMat(dist,mod->rr[i],mod,&(mod->d2Pij_rr[i])); |
---|
992 | } |
---|
993 | |
---|
994 | For(i,data->c_seq[0]->len) |
---|
995 | { |
---|
996 | Init_Tips_At_One_Site_Nucleotides(seq1->state[i], |
---|
997 | &p_lk_l[i]); |
---|
998 | Init_Tips_At_One_Site_Nucleotides(seq2->state[i], |
---|
999 | &p_lk_r[i]); |
---|
1000 | } |
---|
1001 | } |
---|
1002 | else |
---|
1003 | { |
---|
1004 | For(i,data->c_seq[0]->len) |
---|
1005 | { |
---|
1006 | Init_Tips_At_One_Site_AA(seq1->state[i], |
---|
1007 | &p_lk_l[i]); |
---|
1008 | Init_Tips_At_One_Site_AA(seq2->state[i], |
---|
1009 | &p_lk_r[i]); |
---|
1010 | } |
---|
1011 | } |
---|
1012 | |
---|
1013 | |
---|
1014 | site_lk = site_dlk = site_d2lk = .0; |
---|
1015 | *loglk = *dloglk = *d2loglk = 0; |
---|
1016 | |
---|
1017 | For(i,data->c_seq[0]->len) |
---|
1018 | { |
---|
1019 | if(data->wght[i]) |
---|
1020 | { |
---|
1021 | site_lk = log_site_lk = .0; |
---|
1022 | if(!data->ambigu[i]) |
---|
1023 | { |
---|
1024 | For(k,mod->ns) {if(p_lk_l[i][k] > .0001) break;} |
---|
1025 | For(l,mod->ns) {if(p_lk_r[i][l] > .0001) break;} |
---|
1026 | For(j,mod->n_catg) |
---|
1027 | { |
---|
1028 | site_lk += |
---|
1029 | mod->r_proba[j] * |
---|
1030 | mod->pi[k] * |
---|
1031 | p_lk_l[i][k] * |
---|
1032 | mod->Pij_rr[j][k][l] * |
---|
1033 | p_lk_r[i][l]; |
---|
1034 | } |
---|
1035 | } |
---|
1036 | else |
---|
1037 | { |
---|
1038 | For(j,mod->n_catg) |
---|
1039 | { |
---|
1040 | For(k,mod->ns) /*sort sum terms ? No global effect*/ |
---|
1041 | { |
---|
1042 | For(l,mod->ns) |
---|
1043 | { |
---|
1044 | site_lk += |
---|
1045 | mod->r_proba[j] * |
---|
1046 | mod->pi[k] * |
---|
1047 | p_lk_l[i][k] * |
---|
1048 | mod->Pij_rr[j][k][l] * |
---|
1049 | p_lk_r[i][l]; |
---|
1050 | } |
---|
1051 | } |
---|
1052 | } |
---|
1053 | } |
---|
1054 | |
---|
1055 | /* printf("'%c' '%c' -> %f\n",seq1->state[i],seq2->state[i],site_lk); */ |
---|
1056 | |
---|
1057 | if(site_lk <= .0) |
---|
1058 | { |
---|
1059 | printf("'%c' '%c'\n",seq1->state[i],seq2->state[i]); |
---|
1060 | Exit("\n. Err: site lk <= 0\n"); |
---|
1061 | } |
---|
1062 | |
---|
1063 | log_site_lk += log(site_lk); |
---|
1064 | |
---|
1065 | *loglk += data->wght[i] * log_site_lk;/* sort sum terms ? No global effect*/ |
---|
1066 | } |
---|
1067 | } |
---|
1068 | |
---|
1069 | For(i,data->c_seq[0]->len) |
---|
1070 | { |
---|
1071 | Free(p_lk_l[i]); |
---|
1072 | Free(p_lk_r[i]); |
---|
1073 | } |
---|
1074 | Free(p_lk_l); Free(p_lk_r); |
---|
1075 | return *loglk; |
---|
1076 | } |
---|
1077 | |
---|
1078 | /*********************************************************/ |
---|
1079 | |
---|
1080 | double ***Get_Partial_Lk_Struct(arbre *tree, int len, int n_catg) |
---|
1081 | { |
---|
1082 | double ***p_lk; |
---|
1083 | int j,k; |
---|
1084 | |
---|
1085 | p_lk = (double ***)mCalloc(len,sizeof(double **)); |
---|
1086 | For(j,len) |
---|
1087 | { |
---|
1088 | p_lk[j] = (double **)mCalloc((int)n_catg,sizeof(double *)); |
---|
1089 | For(k,n_catg) p_lk[j][k] = (double *)mCalloc(tree->mod->ns,sizeof(double )); |
---|
1090 | } |
---|
1091 | return p_lk; |
---|
1092 | } |
---|
1093 | |
---|
1094 | /*********************************************************/ |
---|
1095 | |
---|
1096 | void Unconstraint_Lk(arbre *tree) |
---|
1097 | { |
---|
1098 | int i; |
---|
1099 | |
---|
1100 | tree->unconstraint_lk = .0; |
---|
1101 | |
---|
1102 | For(i,tree->data->crunch_len) |
---|
1103 | { |
---|
1104 | tree->unconstraint_lk += |
---|
1105 | tree->data->wght[i]*log(tree->data->wght[i]); |
---|
1106 | } |
---|
1107 | tree->unconstraint_lk -= |
---|
1108 | tree->data->init_len*log(tree->data->init_len); |
---|
1109 | } |
---|
1110 | |
---|
1111 | /*********************************************************/ |
---|
1112 | |
---|
1113 | void Update_P_Lk(arbre *tree, edge *b_fcus, node *n) |
---|
1114 | { |
---|
1115 | /* |
---|
1116 | | |
---|
1117 | |<- b_cus |
---|
1118 | | |
---|
1119 | n |
---|
1120 | / \ |
---|
1121 | / \ |
---|
1122 | / \ |
---|
1123 | */ |
---|
1124 | |
---|
1125 | int k,l; |
---|
1126 | int site, catg; |
---|
1127 | double ***p_lk, ***p_lk_v1, ***p_lk_v2; |
---|
1128 | double **Pij1, **Pij2; |
---|
1129 | double *n_scale_f, *d1_scale_f, *d2_scale_f; |
---|
1130 | double p1_lk1,p2_lk2; |
---|
1131 | double max_p_lk; |
---|
1132 | edge *b1, *b2; |
---|
1133 | |
---|
1134 | |
---|
1135 | b1 = b2 = NULL; |
---|
1136 | p_lk = p_lk_v1 = p_lk_v2 = NULL; |
---|
1137 | max_p_lk = MDBL_MIN; |
---|
1138 | |
---|
1139 | |
---|
1140 | if(n == b_fcus->left) |
---|
1141 | { |
---|
1142 | /* if(b_fcus->ud_p_lk_left) { printf("This p_lk is up to date\n"); return;} */ |
---|
1143 | p_lk = b_fcus->p_lk_left; |
---|
1144 | |
---|
1145 | p_lk_v1 = |
---|
1146 | (n == n->b[b_fcus->l_v1]->left)? |
---|
1147 | (n->b[b_fcus->l_v1]->p_lk_rght): |
---|
1148 | (n->b[b_fcus->l_v1]->p_lk_left); |
---|
1149 | |
---|
1150 | p_lk_v2 = |
---|
1151 | (n == n->b[b_fcus->l_v2]->left)? |
---|
1152 | (n->b[b_fcus->l_v2]->p_lk_rght): |
---|
1153 | (n->b[b_fcus->l_v2]->p_lk_left); |
---|
1154 | |
---|
1155 | |
---|
1156 | n_scale_f = b_fcus->sum_scale_f_left; |
---|
1157 | |
---|
1158 | d1_scale_f = |
---|
1159 | (n == n->b[b_fcus->l_v1]->left)? |
---|
1160 | (n->b[b_fcus->l_v1]->sum_scale_f_rght): |
---|
1161 | (n->b[b_fcus->l_v1]->sum_scale_f_left); |
---|
1162 | |
---|
1163 | d2_scale_f = |
---|
1164 | (n == n->b[b_fcus->l_v2]->left)? |
---|
1165 | (n->b[b_fcus->l_v2]->sum_scale_f_rght): |
---|
1166 | (n->b[b_fcus->l_v2]->sum_scale_f_left); |
---|
1167 | |
---|
1168 | b_fcus->get_p_lk_left = 1; |
---|
1169 | b_fcus->ud_p_lk_left = 1; |
---|
1170 | } |
---|
1171 | |
---|
1172 | else |
---|
1173 | { |
---|
1174 | p_lk = b_fcus->p_lk_rght; |
---|
1175 | |
---|
1176 | p_lk_v1 = |
---|
1177 | (n == n->b[b_fcus->r_v1]->left)? |
---|
1178 | (n->b[b_fcus->r_v1]->p_lk_rght): |
---|
1179 | (n->b[b_fcus->r_v1]->p_lk_left); |
---|
1180 | |
---|
1181 | p_lk_v2 = |
---|
1182 | (n == n->b[b_fcus->r_v2]->left)? |
---|
1183 | (n->b[b_fcus->r_v2]->p_lk_rght): |
---|
1184 | (n->b[b_fcus->r_v2]->p_lk_left); |
---|
1185 | |
---|
1186 | n_scale_f = b_fcus->sum_scale_f_rght; |
---|
1187 | |
---|
1188 | d1_scale_f = |
---|
1189 | (n == n->b[b_fcus->r_v1]->left)? |
---|
1190 | (n->b[b_fcus->r_v1]->sum_scale_f_rght): |
---|
1191 | (n->b[b_fcus->r_v1]->sum_scale_f_left); |
---|
1192 | |
---|
1193 | d2_scale_f = |
---|
1194 | (n == n->b[b_fcus->r_v2]->left)? |
---|
1195 | (n->b[b_fcus->r_v2]->sum_scale_f_rght): |
---|
1196 | (n->b[b_fcus->r_v2]->sum_scale_f_left); |
---|
1197 | |
---|
1198 | b_fcus->get_p_lk_rght = 1; |
---|
1199 | b_fcus->ud_p_lk_rght = 1; |
---|
1200 | } |
---|
1201 | |
---|
1202 | if(b_fcus->l < BL_MIN) b_fcus->l = BL_MIN; |
---|
1203 | |
---|
1204 | |
---|
1205 | |
---|
1206 | if(n == b_fcus->left) |
---|
1207 | { |
---|
1208 | b1 = n->b[b_fcus->l_v1]; |
---|
1209 | b2 = n->b[b_fcus->l_v2]; |
---|
1210 | } |
---|
1211 | else |
---|
1212 | { |
---|
1213 | b1 = n->b[b_fcus->r_v1]; |
---|
1214 | b2 = n->b[b_fcus->r_v2]; |
---|
1215 | } |
---|
1216 | |
---|
1217 | |
---|
1218 | if(tree->n_otu <= LIM_SCALE) |
---|
1219 | { |
---|
1220 | /* NO SCALE */ |
---|
1221 | |
---|
1222 | For(site,tree->n_pattern) |
---|
1223 | { |
---|
1224 | if(tree->data->wght[site]) |
---|
1225 | { |
---|
1226 | For(catg,tree->mod->n_catg) |
---|
1227 | { |
---|
1228 | |
---|
1229 | Pij1 = b1->Pij_rr[catg]; |
---|
1230 | Pij2 = b2->Pij_rr[catg]; |
---|
1231 | |
---|
1232 | |
---|
1233 | For(k,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
1234 | { |
---|
1235 | p1_lk1 = p2_lk2 = .0; |
---|
1236 | |
---|
1237 | For(l,tree->mod->ns) |
---|
1238 | { |
---|
1239 | p1_lk1 += Pij1[k][l] * p_lk_v1[site][catg][l]; |
---|
1240 | p2_lk2 += Pij2[k][l] * p_lk_v2[site][catg][l]; |
---|
1241 | } |
---|
1242 | p_lk[site][catg][k] = p1_lk1 * p2_lk2; |
---|
1243 | } |
---|
1244 | } |
---|
1245 | } |
---|
1246 | } |
---|
1247 | } |
---|
1248 | else |
---|
1249 | { |
---|
1250 | /* SCALE */ |
---|
1251 | |
---|
1252 | For(site,tree->n_pattern) |
---|
1253 | { |
---|
1254 | if(tree->data->wght[site]) |
---|
1255 | { |
---|
1256 | For(catg,tree->mod->n_catg) |
---|
1257 | { |
---|
1258 | |
---|
1259 | Pij1 = b1->Pij_rr[catg]; |
---|
1260 | Pij2 = b2->Pij_rr[catg]; |
---|
1261 | |
---|
1262 | if(!catg) |
---|
1263 | { |
---|
1264 | n_scale_f[site] = d1_scale_f[site] + d2_scale_f[site]; |
---|
1265 | max_p_lk = -MDBL_MAX; |
---|
1266 | } |
---|
1267 | |
---|
1268 | For(k,tree->mod->ns) /*sort sum terms ? No global effect*/ |
---|
1269 | { |
---|
1270 | p_lk[site][catg][k] = .0; |
---|
1271 | p1_lk1 = p2_lk2 = .0; |
---|
1272 | |
---|
1273 | For(l,tree->mod->ns) |
---|
1274 | { |
---|
1275 | p1_lk1 += Pij1[k][l] * p_lk_v1[site][catg][l]; |
---|
1276 | p2_lk2 += Pij2[k][l] * p_lk_v2[site][catg][l]; |
---|
1277 | } |
---|
1278 | |
---|
1279 | p_lk[site][catg][k] = p1_lk1 * p2_lk2; |
---|
1280 | |
---|
1281 | |
---|
1282 | if((p_lk[site][catg][k] > max_p_lk)) max_p_lk = p_lk[site][catg][k]; |
---|
1283 | |
---|
1284 | } |
---|
1285 | } |
---|
1286 | |
---|
1287 | if(max_p_lk < LIM_SCALE_VAL) |
---|
1288 | { |
---|
1289 | For(catg,tree->mod->n_catg) |
---|
1290 | { |
---|
1291 | For(k,tree->mod->ns) |
---|
1292 | { |
---|
1293 | p_lk[site][catg][k] /= max_p_lk; |
---|
1294 | |
---|
1295 | if(p_lk[site][catg][k] > MDBL_MAX) |
---|
1296 | { |
---|
1297 | Exit("\n. Numerical overflow ! (send me an e-mail : s.guindon@auckland.ac.nz)\n"); |
---|
1298 | } |
---|
1299 | } |
---|
1300 | } |
---|
1301 | n_scale_f[site] += log(max_p_lk); |
---|
1302 | } |
---|
1303 | |
---|
1304 | if(max_p_lk > (1./LIM_SCALE_VAL)) |
---|
1305 | { |
---|
1306 | For(catg,tree->mod->n_catg) |
---|
1307 | { |
---|
1308 | For(k,tree->mod->ns) |
---|
1309 | { |
---|
1310 | p_lk[site][catg][k] /= max_p_lk; |
---|
1311 | if(p_lk[site][catg][k] < MDBL_MIN) |
---|
1312 | { |
---|
1313 | printf("\n. Numerical underflow ! (send me an e-mail : s.guindon@auckland.ac.nz)\n"); |
---|
1314 | } |
---|
1315 | } |
---|
1316 | } |
---|
1317 | n_scale_f[site] += log(max_p_lk); |
---|
1318 | } |
---|
1319 | } |
---|
1320 | } |
---|
1321 | } |
---|
1322 | } |
---|
1323 | |
---|
1324 | /*********************************************************/ |
---|
1325 | |
---|
1326 | void Make_Tree_4_Lk(arbre *tree, allseq *alldata, int n_site) |
---|
1327 | { |
---|
1328 | int i; |
---|
1329 | |
---|
1330 | if(!tree->tot_loglk_sorted) tree->tot_loglk_sorted = (double *)mCalloc(tree->n_pattern, sizeof(double)); |
---|
1331 | if(!tree->tot_dloglk) tree->tot_dloglk = (double *)mCalloc(2*tree->n_otu-3,sizeof(double)); |
---|
1332 | if(!tree->tot_d2loglk) tree->tot_d2loglk = (double *)mCalloc(2*tree->n_otu-3,sizeof(double)); |
---|
1333 | if(!tree->site_lk) tree->site_lk = (double *)mCalloc(alldata->crunch_len,sizeof(double)); |
---|
1334 | if(!tree->log_site_lk_cat) |
---|
1335 | { |
---|
1336 | tree->log_site_lk_cat = (double **)mCalloc(tree->mod->n_catg,sizeof(double *)); |
---|
1337 | For(i,tree->mod->n_catg) |
---|
1338 | tree->log_site_lk_cat[i] = (double *)mCalloc(alldata->crunch_len,sizeof(double)); |
---|
1339 | } |
---|
1340 | |
---|
1341 | tree->root = tree->noeud[0]; |
---|
1342 | |
---|
1343 | For(i,2*tree->n_otu-3) |
---|
1344 | { |
---|
1345 | Make_Edge_Lk(tree->t_edges[i]->left, |
---|
1346 | tree->t_edges[i]->rght, |
---|
1347 | tree); |
---|
1348 | } |
---|
1349 | |
---|
1350 | For(i,2*tree->n_otu-2) |
---|
1351 | Make_Node_Lk(tree->noeud[i]); |
---|
1352 | |
---|
1353 | Alloc_All_P_Lk(tree); |
---|
1354 | /* Make_P_Lk_Struct(tree); */ |
---|
1355 | |
---|
1356 | Init_P_Lk_Tips(tree); |
---|
1357 | } |
---|
1358 | |
---|
1359 | /*********************************************************/ |
---|
1360 | |
---|
1361 | void Init_P_Lk_Tips(arbre *tree) |
---|
1362 | { |
---|
1363 | int curr_site,i,j,k; |
---|
1364 | |
---|
1365 | |
---|
1366 | Fors(curr_site,tree->data->crunch_len,tree->mod->stepsize) |
---|
1367 | { |
---|
1368 | For(i,tree->n_otu) |
---|
1369 | { |
---|
1370 | if (tree->mod->datatype == NT) |
---|
1371 | { |
---|
1372 | Init_Tips_At_One_Site_Nucleotides(tree->data->c_seq[i]->state[curr_site], |
---|
1373 | &tree->noeud[i]->b[0]->p_lk_rght[curr_site][0]); |
---|
1374 | |
---|
1375 | } |
---|
1376 | else |
---|
1377 | Init_Tips_At_One_Site_AA(tree->data->c_seq[i]->state[curr_site], |
---|
1378 | &tree->noeud[i]->b[0]->p_lk_rght[curr_site][0]); |
---|
1379 | |
---|
1380 | |
---|
1381 | |
---|
1382 | if((tree->noeud[i]->b[0]->p_lk_rght) && (tree->noeud[i]->b[0]->get_p_lk_rght)) |
---|
1383 | { |
---|
1384 | for(j=1;j<tree->mod->n_catg;j++) |
---|
1385 | { |
---|
1386 | For(k,tree->mod->ns) |
---|
1387 | { |
---|
1388 | tree->noeud[i]->b[0]->p_lk_rght[curr_site][j][k]= |
---|
1389 | tree->noeud[i]->b[0]->p_lk_rght[curr_site][0][k]; |
---|
1390 | } |
---|
1391 | } |
---|
1392 | } |
---|
1393 | else |
---|
1394 | { |
---|
1395 | for(j=1;j<tree->mod->n_catg;j++) |
---|
1396 | For(k,tree->mod->ns) |
---|
1397 | tree->noeud[i]->b[0]->p_lk_rght[curr_site][j][k]= |
---|
1398 | tree->noeud[i]->b[0]->p_lk_rght[curr_site][0][k]; |
---|
1399 | } |
---|
1400 | } |
---|
1401 | } |
---|
1402 | } |
---|
1403 | |
---|
1404 | /*********************************************************/ |
---|