1 | /* |
---|
2 | |
---|
3 | PHYML : a program that computes maximum likelihood phylogenies from |
---|
4 | DNA or AA homologous sequences |
---|
5 | |
---|
6 | Copyright (C) Stephane Guindon. Oct 2003 onward |
---|
7 | |
---|
8 | All parts of the source except where indicated are distributed under |
---|
9 | the GNU public licence. See http://www.opensource.org for details. |
---|
10 | |
---|
11 | */ |
---|
12 | |
---|
13 | #ifndef UTILITIES_H |
---|
14 | #define UTILITIES_H |
---|
15 | |
---|
16 | |
---|
17 | #include <stdio.h> |
---|
18 | #include <stdlib.h> |
---|
19 | #include <math.h> |
---|
20 | #include <ctype.h> |
---|
21 | #include <string.h> |
---|
22 | #include <time.h> |
---|
23 | |
---|
24 | #define VERSION "v2.4.5" |
---|
25 | |
---|
26 | extern int NODE_DEG_MAX; |
---|
27 | extern int BRENT_ITMAX; |
---|
28 | extern double BRENT_CGOLD; |
---|
29 | extern double BRENT_ZEPS; |
---|
30 | extern double MNBRAK_GOLD; |
---|
31 | extern double MNBRAK_GLIMIT; |
---|
32 | extern double MNBRAK_TINY; |
---|
33 | extern double ALPHA_MIN; |
---|
34 | extern double ALPHA_MAX; |
---|
35 | extern double BL_MIN; |
---|
36 | extern double BL_START; |
---|
37 | extern double BL_MAX; |
---|
38 | extern double MIN_DIFF_LK; |
---|
39 | extern double GOLDEN_R; |
---|
40 | extern double GOLDEN_C; |
---|
41 | extern int T_MAX_FILE; |
---|
42 | extern int T_MAX_LINE; |
---|
43 | extern int T_MAX_NAME; |
---|
44 | extern int T_MAX_SEQ; |
---|
45 | extern int N_MAX_INSERT; |
---|
46 | extern int N_MAX_OTU; |
---|
47 | extern double UNLIKELY; |
---|
48 | extern double NJ_SEUIL; |
---|
49 | extern int MAX_TOPO_DIST; |
---|
50 | extern double ROUND_MAX; |
---|
51 | extern double DIST_MAX; |
---|
52 | extern int LIM_SCALE; |
---|
53 | extern double LIM_SCALE_VAL; |
---|
54 | extern double AROUND_LK; |
---|
55 | extern double PROP_STEP; |
---|
56 | extern int T_MAX_ALPHABET; |
---|
57 | extern double MDBL_MAX; |
---|
58 | extern double MDBL_MIN; |
---|
59 | extern int POWELL_ITMAX; |
---|
60 | extern double LINMIN_TOL; |
---|
61 | |
---|
62 | #define For(i,n) for(i=0; i<n; i++) |
---|
63 | #define Fors(i,n,s) for(i=0; i<n; i+=s) |
---|
64 | #define PointGamma(prob,alpha,beta) PointChi2(prob,2.0*(alpha))/(2.0*(beta)) |
---|
65 | #define SHFT2(a,b,c) (a)=(b);(b)=(c); |
---|
66 | #define SHFT3(a,b,c,d) (a)=(b);(b)=(c);(c)=(d); |
---|
67 | #define MAX(a,b) ((a)>(b)?(a):(b)) |
---|
68 | #define MIN(a,b) ((a)<(b)?(a):(b)) |
---|
69 | #define SIGN(a,b) ((b) > 0.0 ? fabs(a) : -fabs(a)) |
---|
70 | #define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d); |
---|
71 | |
---|
72 | #define NT 0 /* nucleotides */ |
---|
73 | #define AA 1 /* amino acids */ |
---|
74 | |
---|
75 | #define ACGT 0 /* A,G,G,T encoding */ |
---|
76 | #define RY 1 /* R,Y encoding */ |
---|
77 | |
---|
78 | |
---|
79 | #define NODE_DEG_MAX 50 |
---|
80 | #define BRENT_ITMAX 100 |
---|
81 | #define BRENT_CGOLD 0.3819660 |
---|
82 | #define BRENT_ZEPS 1.e-10 |
---|
83 | #define MNBRAK_GOLD 1.618034 |
---|
84 | #define MNBRAK_GLIMIT 100.0 |
---|
85 | #define MNBRAK_TINY 1.e-20 |
---|
86 | #define ALPHA_MIN 0.04 |
---|
87 | #define ALPHA_MAX 100 |
---|
88 | #define BL_MIN 1.e-10 |
---|
89 | #define BL_START 1.e-03 |
---|
90 | #define BL_MAX 1.e+05 |
---|
91 | #define MIN_DIFF_LK 1.e-06 |
---|
92 | #define GOLDEN_R 0.61803399 |
---|
93 | #define GOLDEN_C (1.0-GOLDEN_R) |
---|
94 | #define T_MAX_FILE 200 |
---|
95 | #define T_MAX_LINE 100000 |
---|
96 | #define T_MAX_NAME 100 |
---|
97 | #define T_MAX_SEQ 1000000 |
---|
98 | #define N_MAX_INSERT 20 |
---|
99 | #define N_MAX_OTU 4000 |
---|
100 | #define UNLIKELY -1.e10 |
---|
101 | #define NJ_SEUIL 0.1 |
---|
102 | #define ROUND_MAX 100 |
---|
103 | #define DIST_MAX 2.00 |
---|
104 | #define AROUND_LK 50.0 |
---|
105 | #define PROP_STEP 1.0 |
---|
106 | #define T_MAX_ALPHABET 100 |
---|
107 | #define MDBL_MIN 2.225074E-308 |
---|
108 | #define MDBL_MAX 1.797693E+308 |
---|
109 | #define POWELL_ITMAX 200 |
---|
110 | #define LINMIN_TOL 2.0E-04 |
---|
111 | #define LIM_SCALE 3 |
---|
112 | #define LIM_SCALE_VAL 1.E-50 |
---|
113 | /* LIM_SCALE = 300; */ |
---|
114 | /* LIM_SCALE_VAL = 1.E-500; */ |
---|
115 | |
---|
116 | |
---|
117 | /*********************************************************/ |
---|
118 | |
---|
119 | typedef struct __Node { |
---|
120 | struct __Node **v; /* table of pointers to neighbor nodes. Dimension = 2 x n_otu - 3 */ |
---|
121 | struct __Node ***bip_node; /* three lists of pointer to tip nodes. One list for each direction */ |
---|
122 | struct __Edge **b; /* table of pointers to neighbor branches */ |
---|
123 | char ***bip_name; /* three lists of tip node names. One list for each direction */ |
---|
124 | int *bip_size; /* Size of each of the three lists from bip_node */ |
---|
125 | double *l; /* lengths of the (three or one) branches connected to one internal node */ |
---|
126 | int num; /* node number */ |
---|
127 | char *name; /* taxon name (if exists) */ |
---|
128 | int tax; /* tax = 1 -> external node, else -> internal node */ |
---|
129 | int check_branch; /* check_branch=1 is the corresponding branch is labelled with '*' */ |
---|
130 | double *score; /* score used in BIONJ to determine the best pair of nodes to agglomerate */ |
---|
131 | }node; |
---|
132 | |
---|
133 | |
---|
134 | /*********************************************************/ |
---|
135 | |
---|
136 | typedef struct __Edge { |
---|
137 | /* |
---|
138 | syntax : (node) [edge] |
---|
139 | (left_1) . .(right_1) |
---|
140 | \ (left) (right) / |
---|
141 | \._____________./ |
---|
142 | / [b_fcus] \ |
---|
143 | / \ |
---|
144 | (left_2) . .(right_2) |
---|
145 | |
---|
146 | */ |
---|
147 | |
---|
148 | struct __Node *left,*rght; /* node on the left/right side of the edge */ |
---|
149 | int l_r,r_l,l_v1,l_v2,r_v1,r_v2; |
---|
150 | /* these are directions (i.e., 0, 1 or 2): */ |
---|
151 | /* l_r (left to right) -> left[b_fcus->l_r] = right */ |
---|
152 | /* r_l (right to left) -> right[b_fcus->r_l] = left */ |
---|
153 | /* l_v1 (left node to first node != from right) -> left[b_fcus->l_v1] = left_1 */ |
---|
154 | /* l_v2 (left node to secnd node != from right) -> left[b_fcus->l_v2] = left_2 */ |
---|
155 | /* r_v1 (right node to first node != from left) -> right[b_fcus->r_v1] = right_1 */ |
---|
156 | /* r_v2 (right node to secnd node != from left) -> right[b_fcus->r_v2] = right_2 */ |
---|
157 | |
---|
158 | int num; /* branch number */ |
---|
159 | double l; /* branch length */ |
---|
160 | double best_l; /* best branch length found so far */ |
---|
161 | double l_old; /* old branch length */ |
---|
162 | |
---|
163 | int bip_score; /* score of the bipartition generated by the corresponding edge |
---|
164 | bip_score = 1 iif the branch is fond in both trees to be compared, |
---|
165 | bip_score = 0 otherwise. */ |
---|
166 | double nj_score; /* score of the agglomeration that generated that branch in BIONJ */ |
---|
167 | double diff_lk; /* difference of likelihood between the current topological |
---|
168 | configuration at this branch and the best alternative one */ |
---|
169 | |
---|
170 | int get_p_lk_left; /* 1 if the likelihood of the subtree on the left has to be computed */ |
---|
171 | int get_p_lk_rght; /* 1 if the likelihood of the subtree on the right has to be computed */ |
---|
172 | int ud_p_lk_left; /* 1 if the likelihood of the subtree on the left is up to date */ |
---|
173 | int ud_p_lk_rght; /* 1 if the likelihood of the subtree on the right is up to date */ |
---|
174 | double site_dlk; /* derivative of the likelihood (deprecated) */ |
---|
175 | double site_d2lk; /* 2nd derivative of the likelihood (deprecated) */ |
---|
176 | double *site_dlk_rr; /* derivative of the likelihood conditional on the current relative rate */ |
---|
177 | double *site_d2lk_rr; /* 2nd derivative of the likelihood conditional on the current relative rate */ |
---|
178 | double ***p_lk_left,***p_lk_rght; /* likelihoods of the subtree on the left and |
---|
179 | right side (for each site and each relative rate category) */ |
---|
180 | double **site_p_lk_rght, **site_p_lk_left; /* deprecated */ |
---|
181 | double ***Pij_rr,***dPij_rr,***d2Pij_rr; /* matrix of change probabilities and its first and secnd derivates */ |
---|
182 | |
---|
183 | double *ql; /* ql[0], ql[1], ql[2] give the likelihood of the three topological |
---|
184 | configurations around that branch */ |
---|
185 | int best_conf; /* best topological configuration : |
---|
186 | ((left_1,left_2),right_1,right_2) or |
---|
187 | ((left_1,right_2),right_1,left_2) or |
---|
188 | ((left_1,right_1),right_1,left_2) */ |
---|
189 | |
---|
190 | int num_st_left; /* number of the subtree on the left side */ |
---|
191 | int num_st_rght; /* number of the subtree on the right side */ |
---|
192 | |
---|
193 | |
---|
194 | /* Below are the likelihood scaling factors (used in functions |
---|
195 | `Get_All_Partial_Lk_Scale' in lk.c */ |
---|
196 | int scale_left; |
---|
197 | int scale_rght; |
---|
198 | double site_sum_scale_f_left; |
---|
199 | double site_sum_scale_f_rght; |
---|
200 | double site_scale_f_left; |
---|
201 | double site_scale_f_rght; |
---|
202 | double *sum_scale_f_left; |
---|
203 | double *sum_scale_f_rght; |
---|
204 | |
---|
205 | |
---|
206 | double bootval; /* bootstrap value (if exists) */ |
---|
207 | }edge; |
---|
208 | |
---|
209 | /*********************************************************/ |
---|
210 | |
---|
211 | typedef struct __Arbre { |
---|
212 | struct __Node *root; /* root node */ |
---|
213 | struct __Node **noeud; /* array of nodes that defines the tree topology */ |
---|
214 | struct __Edge **t_edges; /* array of edges */ |
---|
215 | struct __Arbre *old_tree; /* old copy of the tree */ |
---|
216 | struct __Arbre *best_tree; /* best tree found so far */ |
---|
217 | struct __Model *mod; /* substitution model */ |
---|
218 | struct __AllSeq *data; /* sequences */ |
---|
219 | struct __Option *input; /* input parameters */ |
---|
220 | struct __Matrix *mat; /* pairwise distance matrix */ |
---|
221 | |
---|
222 | int has_bip; /*if has_bip=1, then the structure to compare |
---|
223 | tree topologies is allocated, has_bip=0 otherwise */ |
---|
224 | double min_diff_lk; /* min_diff_lk is the minimum taken among the 2n-3 |
---|
225 | diff_lk values */ |
---|
226 | |
---|
227 | int both_sides; /* both_sides=1 -> a pre-order and a post-order tree |
---|
228 | traversals are required to compute the likelihood |
---|
229 | of every subtree in the phylogeny*/ |
---|
230 | int n_otu; /* number of taxa */ |
---|
231 | |
---|
232 | int curr_site; /* current site of the alignment to be processed */ |
---|
233 | int curr_catg; /* current class of the discrete gamma rate distribution */ |
---|
234 | double best_loglk; /* highest value of the loglikelihood found so far */ |
---|
235 | |
---|
236 | double tot_loglk; /* loglikelihood */ |
---|
237 | double *tot_loglk_sorted; /* used to compute tot_loglk by adding sorted terms to minimize CPU errors */ |
---|
238 | double *tot_dloglk; /* first derivative of the likelihood with respect to |
---|
239 | branch lengths */ |
---|
240 | double *tot_d2loglk; /* second derivative of the likelihood with respect to |
---|
241 | branch lengths */ |
---|
242 | double *site_lk; /* vector of likelihoods at individual sites */ |
---|
243 | |
---|
244 | double **log_site_lk_cat; /* loglikelihood at individual sites and for each class of rate*/ |
---|
245 | |
---|
246 | double unconstraint_lk; /* unconstrained (or multinomial) likelihood */ |
---|
247 | |
---|
248 | int n_swap; /* number of NNIs performed */ |
---|
249 | int n_pattern; /* number of distinct site patterns */ |
---|
250 | int has_branch_lengths; /* =1 iff input tree displays branch lengths */ |
---|
251 | int print_boot_val; /* if print_boot_val=1, the bootstrap values are printed */ |
---|
252 | }arbre; |
---|
253 | |
---|
254 | |
---|
255 | /*********************************************************/ |
---|
256 | |
---|
257 | typedef struct __Seq { |
---|
258 | char *name; /* sequence name */ |
---|
259 | int len; /* sequence length */ |
---|
260 | char *state; /* sequence itself */ |
---|
261 | }seq; |
---|
262 | |
---|
263 | /*********************************************************/ |
---|
264 | |
---|
265 | |
---|
266 | typedef struct __AllSeq { |
---|
267 | seq **c_seq; /* compressed sequences */ |
---|
268 | int *invar; /* 1 -> states are identical, 0 states vary */ |
---|
269 | double *wght; /* # of each site in c_seq */ |
---|
270 | int n_otu; /* number of taxa */ |
---|
271 | int clean_len; /* uncrunched sequences lenghts without gaps */ |
---|
272 | int crunch_len; /* crunched sequences lengths */ |
---|
273 | double *b_frq; /* observed state frequencies */ |
---|
274 | int init_len; /* length of the uncompressed sequences */ |
---|
275 | int *ambigu; /* ambigu[i]=1 is one or more of the sequences at site |
---|
276 | i display an ambiguous character */ |
---|
277 | int *sitepatt; /* this array maps the position of the patterns in the |
---|
278 | compressed alignment to the positions in the uncompressed |
---|
279 | one */ |
---|
280 | }allseq; |
---|
281 | |
---|
282 | /*********************************************************/ |
---|
283 | |
---|
284 | typedef struct __Matrix { /* mostly used in BIONJ */ |
---|
285 | double **P,**Q,**dist; /* observed proportions of transition, transverion and distances |
---|
286 | between pairs of sequences */ |
---|
287 | arbre *tree; /* tree... */ |
---|
288 | int *on_off; /* on_off[i]=1 if column/line i corresponds to a node that has not |
---|
289 | been agglomerated yet */ |
---|
290 | int n_otu; /* number of taxa */ |
---|
291 | char **name; /* sequence names */ |
---|
292 | int r; /* number of nodes that have not been agglomerated yet */ |
---|
293 | struct __Node **tip_node; /* array of pointer to the leaves of the tree */ |
---|
294 | int curr_int; /* used in the NJ/BIONJ algorithms */ |
---|
295 | int method; /* if method=1->NJ method is used, BIONJ otherwise */ |
---|
296 | }matrix; |
---|
297 | |
---|
298 | /*********************************************************/ |
---|
299 | |
---|
300 | typedef struct __Model { |
---|
301 | int whichmodel; |
---|
302 | /* |
---|
303 | 1 => JC69 |
---|
304 | 2 => K2P |
---|
305 | 3 => F81 |
---|
306 | 4 => HKY85 |
---|
307 | 5 => F84 |
---|
308 | 6 => TN93 |
---|
309 | 7 => GTR |
---|
310 | 11 => Dayhoff |
---|
311 | 12 => JTT |
---|
312 | 13 => MtREV |
---|
313 | */ |
---|
314 | int ns; /* number of states (4 for ADN, 20 for AA) */ |
---|
315 | double *pi; /* states frequencies */ |
---|
316 | int datatype; /* 0->DNA, 1->AA */ |
---|
317 | |
---|
318 | /* ADN parameters */ |
---|
319 | double kappa; /* transition/transversion rate */ |
---|
320 | double lambda; /* parameter used to define the ts/tv ratios in the F84 and TN93 models */ |
---|
321 | double alpha; /* gamma shapa parameter */ |
---|
322 | double *r_proba; /* probabilities of the substitution rates defined by the discrete gamma distribution */ |
---|
323 | double *rr; /* substitution rates defined by the discrete gamma distribution */ |
---|
324 | int n_catg; /* number of categories in the discrete gamma distribution */ |
---|
325 | double pinvar; /* proportion of invariable sites */ |
---|
326 | int invar; /* =1 iff the substitution model takes into account invariable sites */ |
---|
327 | |
---|
328 | /* Below are 'old' values of some substitution parameters (see the comments above) */ |
---|
329 | double alpha_old; |
---|
330 | double kappa_old; |
---|
331 | double lambda_old; |
---|
332 | double pinvar_old; |
---|
333 | |
---|
334 | char *custom_mod_string; /* string of characters used to define custom |
---|
335 | models of substitution */ |
---|
336 | double **rr_param; /* table of pointers to relative rate parameters of the GTR or custom model */ |
---|
337 | double *rr_param_values; /* relative rate parameters of the GTR or custom model */ |
---|
338 | int **rr_param_num; /* each line of this 2d table gives a serie of equal relative rate parameter number */ |
---|
339 | /* A<->C : number 0 */ |
---|
340 | /* A<->G : number 1 */ |
---|
341 | /* A<->T : number 2 */ |
---|
342 | /* C<->G : number 3 */ |
---|
343 | /* C<->T : number 4 */ |
---|
344 | /* G<->T : number 5 */ |
---|
345 | /* For example, [0][2][3] |
---|
346 | [1] |
---|
347 | [4][5] |
---|
348 | corresponds to the model 010022, i.e., |
---|
349 | (A<->C = A<->T = C<->T) != (A<->G) != (C<->T = G<->T) |
---|
350 | */ |
---|
351 | int *n_rr_param_per_cat; /* [3][1][2] for the previous example */ |
---|
352 | int n_diff_rr_param; /* number of different relative substitution rates in the custom model */ |
---|
353 | |
---|
354 | int update_eigen; /* update_eigen=1-> eigen values/vectors need to be updated */ |
---|
355 | |
---|
356 | double ***Pij_rr; /* matrix of change probabilities */ |
---|
357 | double ***dPij_rr; /* first derivative of the change probabilities with respect to branch length */ |
---|
358 | double ***d2Pij_rr; /* second derivative of the change probabilities with respect to branch length */ |
---|
359 | |
---|
360 | |
---|
361 | int seq_len; /* sequence length */ |
---|
362 | /* AA parameters */ |
---|
363 | /* see PMat_Empirical in models.c for AA algorithm explanation */ |
---|
364 | double *mat_Q; /* 20x20 amino-acids substitution rates matrix */ |
---|
365 | double *mat_Vr; /* 20x20 right eigenvectors of mat_Q */ |
---|
366 | double *mat_Vi; /* 20x20 inverse matrix of mat_Vr */ |
---|
367 | double *vct_ev; /* eigen values */ |
---|
368 | double mr; /* mean rate = branch length/time interval */ |
---|
369 | /* mr = -sum(i)(vct_pi[i].mat_Q[ii]) */ |
---|
370 | double *vct_eDmr; /* diagonal terms of a 20x20 diagonal matrix */ |
---|
371 | /* term n = exp(nth eigenvalue of mat_Q / mr) */ |
---|
372 | int stepsize; /* stepsize=1 for nucleotide models, 3 for codon models */ |
---|
373 | int n_otu; /* number of taxa */ |
---|
374 | struct __Optimiz *s_opt; /* pointer to parameters to optimize */ |
---|
375 | int bootstrap; /* bootstrap values are computed if bootstrap > 0. |
---|
376 | The value give the number of replicates */ |
---|
377 | double *user_b_freq; /* user-defined nucleotide frequencies */ |
---|
378 | |
---|
379 | |
---|
380 | }model; |
---|
381 | |
---|
382 | /*********************************************************/ |
---|
383 | |
---|
384 | typedef struct __Option { /* mostly used in 'options.c' */ |
---|
385 | char *seqfile; /* sequence file name */ |
---|
386 | char *modelname; /* name of the model */ |
---|
387 | struct __Model *mod; /* substitution model */ |
---|
388 | int interleaved; /* interleaved or sequential sequence file format ? */ |
---|
389 | int inputtree; /* =1 iff a user input tree is used as input */ |
---|
390 | struct __Arbre *tree; /* pointer to the current tree */ |
---|
391 | char *inputtreefile; /* input tree file name */ |
---|
392 | FILE *fp_seq; /* pointer to the sequence file */ |
---|
393 | FILE *fp_input_tree; /* pointer to the input tree file */ |
---|
394 | FILE *fp_boot_tree; /* pointer to the bootstrap tree file */ |
---|
395 | FILE *fp_boot_stats; /* pointer to the statistics file */ |
---|
396 | int print_boot_trees; /* =1 if the bootstrapped trees are printed in output */ |
---|
397 | char *phyml_stat_file; /* name of the statistics file */ |
---|
398 | char *phyml_tree_file; /* name of the tree file */ |
---|
399 | char *phyml_lk_file; /* name of the file in which the likelihood of the model is written */ |
---|
400 | int phyml_stat_file_open_mode; /* opening file mode for statistics file */ |
---|
401 | int phyml_tree_file_open_mode; /* opening file mode for tree file */ |
---|
402 | int n_data_sets; /* number of data sets to be analysed */ |
---|
403 | int n_trees; /* number of trees */ |
---|
404 | int seq_len; /* sequence length */ |
---|
405 | int n_data_set_asked; /* number of bootstrap replicates */ |
---|
406 | struct __Seq **data; /* pointer to the uncompressed sequences */ |
---|
407 | struct __AllSeq *alldata; /* pointer to the compressed sequences */ |
---|
408 | char *nt_or_cd; /* nucleotide or codon data ? (not used) */ |
---|
409 | }option; |
---|
410 | |
---|
411 | /*********************************************************/ |
---|
412 | |
---|
413 | typedef struct __Optimiz { /* parameters to be optimised (mostly used in 'optimiz.c') */ |
---|
414 | int print; /* =1 -> verbose mode */ |
---|
415 | |
---|
416 | int opt_alpha; /* =1 -> the gamma shape parameter is optimised */ |
---|
417 | int opt_kappa; /* =1 -> the ts/tv ratio parameter is optimised */ |
---|
418 | int opt_lambda; /* =1 -> the F84|TN93 model specific parameter is optimised */ |
---|
419 | int opt_pinvar; /* =1 -> the proportion of invariants is optimised */ |
---|
420 | int opt_bfreq; /* =1 -> the nucleotide frequencies are optimised */ |
---|
421 | int opt_rr_param; /* =1 -> the relative rate parameters of the GTR or the customn model are optimised */ |
---|
422 | int opt_free_param; /* if opt_topo=0 and opt_free_param=1 -> the numerical parameters of the |
---|
423 | model are optimised. if opt_topo=0 and opt_free_param=0 -> no parameter is |
---|
424 | optimised */ |
---|
425 | int opt_bl; /* =1 -> the branch lengths are optimised */ |
---|
426 | int opt_topo; /* =1 -> the tree topology is optimised */ |
---|
427 | double init_lk; /* initial loglikelihood value */ |
---|
428 | int n_it_max; /* maximum bnumber of iteration during an optimisation step */ |
---|
429 | int last_opt; /* =1 -> the numerical parameters are optimised further while the |
---|
430 | tree topology remains fixed */ |
---|
431 | }optimiz; |
---|
432 | |
---|
433 | /*********************************************************/ |
---|
434 | |
---|
435 | typedef struct __Qmat{ |
---|
436 | double **u_mat; /* right eigen vectors */ |
---|
437 | double **v_mat; /* left eigen vectors = inv(u_mat) */ |
---|
438 | double *root_vct; /* eigen values */ |
---|
439 | double *q; /* instantaneous rate matrix */ |
---|
440 | }qmat; |
---|
441 | |
---|
442 | /*********************************************************/ |
---|
443 | |
---|
444 | double bico(int n,int k); |
---|
445 | double factln(int n); |
---|
446 | double gammln(double xx); |
---|
447 | double Pbinom(int N,int ni,double p); |
---|
448 | void Plim_Binom(double pH0,int N,double *pinf,double *psup); |
---|
449 | double LnGamma(double alpha); |
---|
450 | double IncompleteGamma(double x,double alpha,double ln_gamma_alpha); |
---|
451 | double PointChi2(double prob,double v); |
---|
452 | double PointNormal(double prob); |
---|
453 | int DiscreteGamma(double freqK[],double rK[],double alfa,double beta,int K,int median); |
---|
454 | arbre *Read_Tree(char *s_tree); |
---|
455 | void Make_All_Edges_Light(node *a,node *d); |
---|
456 | void Make_All_Edges_Lk(node *a,node *d,arbre *tree); |
---|
457 | void R_rtree(char *s_tree,node *pere,arbre *tree,int *n_int,int *n_ext); |
---|
458 | void Clean_Multifurcation(char **subtrees,int current_deg,int end_deg); |
---|
459 | char **Sub_Trees(char *tree,int *degree); |
---|
460 | int Next_Par(char *s,int pos); |
---|
461 | char *Write_Tree(arbre *tree); |
---|
462 | void R_wtree(node *pere,node *fils,char *s_tree,arbre *tree); |
---|
463 | void Init_Tree(arbre *tree); |
---|
464 | void Make_Edge_Light(node *a,node *d); |
---|
465 | void Init_Edge_Light(edge *b); |
---|
466 | void Make_Edge_Dirs(edge *b,node *a,node *d); |
---|
467 | void Make_Edge_Lk(node *a,node *d,arbre *tree); |
---|
468 | void Make_Node_Light(node *n); |
---|
469 | void Init_Node_Light(node *n); |
---|
470 | seq **Get_Seq(option *input); |
---|
471 | seq **Read_Seq_Sequential(FILE *in,int *n_otu); |
---|
472 | seq **Read_Seq_Interleaved(FILE *in,int *n_otu); |
---|
473 | int Read_One_Line_Seq(seq ***data,int num_otu,FILE *in); |
---|
474 | void Uppercase(char *ch); |
---|
475 | allseq *Compact_Seq(seq **data,option *input); |
---|
476 | allseq *Compact_CSeq(allseq *data,model *mod); |
---|
477 | void Get_Base_Freqs(allseq *data); |
---|
478 | void Get_AA_Freqs(allseq *data); |
---|
479 | arbre *Read_Tree_File(FILE *fp_input_tree); |
---|
480 | void Init_Tree_Edges(node *a,node *d,arbre *tree,int *cur); |
---|
481 | void Exit(const char *message); |
---|
482 | void *mCalloc(int nb,size_t size); |
---|
483 | void *mRealloc(void *p,int nb,size_t size); |
---|
484 | arbre *Make_Light_Tree_Struct(int n_otu); |
---|
485 | int Sort_Double_Decrease(const void *a,const void *b); |
---|
486 | void qksort(double *A,int ilo,int ihi); |
---|
487 | void Print_Site(allseq *alldata,int num,int n_otu,char *sep,int stepsize); |
---|
488 | void Print_Seq(seq **data,int n_otu); |
---|
489 | void Print_CSeq(FILE *fp,allseq *alldata); |
---|
490 | void Order_Tree_Seq(arbre *tree,seq **data); |
---|
491 | void Order_Tree_CSeq(arbre *tree,allseq *data); |
---|
492 | matrix *Make_Mat(int n_otu); |
---|
493 | void Init_Mat(matrix *mat,allseq *data); |
---|
494 | arbre *Make_Tree(allseq *data); |
---|
495 | void Print_Dist(matrix *mat); |
---|
496 | void Print_Node(node *a,node *d,arbre *tree); |
---|
497 | void Share_Lk_Struct(arbre *t_full,arbre *t_empt); |
---|
498 | void Init_Constant(); |
---|
499 | void Print_Mat(matrix *mat); |
---|
500 | int Sort_Edges_Diff_Lk(edge **sorted_edges,int n_elem); |
---|
501 | void NNI(arbre *tree,edge *b_fcus,int do_swap); |
---|
502 | void Swap(node *a,node *b,node *c,node *d); |
---|
503 | void Update_All_Partial_Lk(edge *b_fcus,arbre *tree); |
---|
504 | void Update_SubTree_Partial_Lk(edge *b_fcus,node *a,node *d,arbre *tree); |
---|
505 | double Update_Lk_At_Given_Edge(edge *b_fcus,arbre *tree); |
---|
506 | void Update_PMat_At_Given_Edge(edge *b_fcus,arbre *tree); |
---|
507 | allseq *Make_Seq(int n_otu,int len,char **sp_names); |
---|
508 | allseq *Copy_CData(allseq *ori,model *mod); |
---|
509 | optimiz *Alloc_Optimiz(); |
---|
510 | void Init_Optimiz(optimiz *s_opt); |
---|
511 | int Filexists(char *filename); |
---|
512 | FILE *Openfile(char *filename,int mode); |
---|
513 | void Print_Fp_Out(FILE *fp_out,time_t t_beg,time_t t_end,arbre *tree,option *input,int n_data_set); |
---|
514 | void Print_Fp_Out_Lines(FILE *fp_out,time_t t_beg,time_t t_end,arbre *tree,option *input,int n_data_set); |
---|
515 | void Alloc_All_P_Lk(arbre *tree); |
---|
516 | matrix *K2P_dist(allseq *data,double g_shape); |
---|
517 | matrix *JC69_Dist(allseq *data,model *mod); |
---|
518 | matrix *Hamming_Dist(allseq *data,model *mod); |
---|
519 | int Is_Ambigu(char *state,int datatype,int stepsize); |
---|
520 | void Check_Ambiguities(allseq *data,int datatype,int stepsize); |
---|
521 | int Assign_State(char *c,int datatype,int stepsize); |
---|
522 | void Bootstrap(arbre *tree); |
---|
523 | void Update_BrLen_Invar(arbre *tree); |
---|
524 | void Getstring_Stdin(char *file_name); |
---|
525 | void Print_Freq(arbre *tree); |
---|
526 | double Num_Derivatives_One_Param(double(*func)(arbre *tree),arbre *tree,double f0,double *param,double stepsize,double *err,int precise); |
---|
527 | void Num_Derivative_Several_Param(arbre *tree,double *param,int n_param,double stepsize,double(*func)(arbre *tree),double *derivatives); |
---|
528 | int Compare_Two_States(char *state1,char *state2,int state_size); |
---|
529 | void Copy_One_State(char *from,char *to,int state_size); |
---|
530 | model *Make_Model_Basic(); |
---|
531 | void Make_Model_Complete(model *mod); |
---|
532 | model *Copy_Model(model *ori); |
---|
533 | void Set_Defaults_Input(option *input); |
---|
534 | void Set_Defaults_Model(model *mod); |
---|
535 | void Set_Defaults_Optimiz(optimiz *s_opt); |
---|
536 | void Copy_Optimiz(optimiz *ori,optimiz *cpy); |
---|
537 | void Get_Bip(node *a,node *d,arbre *tree); |
---|
538 | void Alloc_Bip(arbre *tree); |
---|
539 | int Sort_Double_Increase(const void *a,const void *b); |
---|
540 | int Sort_String(const void *a,const void *b); |
---|
541 | void Compare_Bip(arbre *tree1,arbre *tree2); |
---|
542 | void Test_Multiple_Data_Set_Format(option *input); |
---|
543 | int Are_Compatible(char *statea,char *stateb,int stepsize,int datatype); |
---|
544 | void Hide_Ambiguities(allseq *data); |
---|
545 | void Print_Site_Lk(arbre *tree, FILE *fp); |
---|
546 | #endif |
---|
547 | |
---|
548 | |
---|
549 | |
---|
550 | |
---|