1 | /* |
---|
2 | * ml.h |
---|
3 | * |
---|
4 | * |
---|
5 | * Part of TREE-PUZZLE 5.0 (June 2000) |
---|
6 | * |
---|
7 | * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, |
---|
8 | * M. Vingron, and Arndt von Haeseler |
---|
9 | * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler |
---|
10 | * |
---|
11 | * All parts of the source except where indicated are distributed under |
---|
12 | * the GNU public licence. See http://www.opensource.org for details. |
---|
13 | */ |
---|
14 | |
---|
15 | |
---|
16 | #ifndef _ML_ |
---|
17 | #define _ML_ |
---|
18 | |
---|
19 | /* definitions */ |
---|
20 | |
---|
21 | #define MINTS 0.20 /* Ts/Tv parameter */ |
---|
22 | #define MAXTS 30.0 |
---|
23 | #define MINYR 0.10 /* Y/R Ts parameter */ |
---|
24 | #define MAXYR 6.00 |
---|
25 | #define MINFI 0.00 /* fraction invariable sites */ |
---|
26 | #define MAXFI 0.99 /* only for input */ |
---|
27 | #define MINGE 0.01 /* rate heterogeneity parameter */ |
---|
28 | #define MAXGE 0.99 |
---|
29 | #define MINCAT 4 /* discrete Gamma categories */ |
---|
30 | #define MAXCAT 16 |
---|
31 | |
---|
32 | #define RMHROOT 5.0 /* upper relative bound for height of root */ |
---|
33 | #define MAXARC 900.0 /* upper limit on branch length (PAM) = 6.0 */ |
---|
34 | #define MINARC 0.001 /* lower limit on branch length (PAM) = 0.00001 */ |
---|
35 | #define EPSILON 0.0001 /* error in branch length (PAM) = 0.000001 */ |
---|
36 | #define HEPSILON 0.0001 /* error in node and root heights */ |
---|
37 | #define MAXIT 100 /* maximum number of iterates of smoothing */ |
---|
38 | #define MINFDIFF 0.00002 /* lower limit on base frequency differences */ |
---|
39 | #define MINFREQ 0.0001 /* lower limit on base frequencies = 0.01% */ |
---|
40 | #define NUMQBRNCH 5 /* number of branches in a quartet */ |
---|
41 | #define NUMQIBRNCH 1 /* number of internal branches in a quartet */ |
---|
42 | #define NUMQSPC 4 /* number of sequences in a quartet */ |
---|
43 | |
---|
44 | /* 2D minimisation */ |
---|
45 | #define PEPS1 0.01 /* epsilon substitution process estimation */ |
---|
46 | #define PEPS2 0.01 /* epsilon rate heterogeneity estimation */ |
---|
47 | |
---|
48 | /* quartet series */ |
---|
49 | #define MINPERTAXUM 2 |
---|
50 | #define MAXPERTAXUM 6 |
---|
51 | #define TSDIFF 0.20 |
---|
52 | #define YRDIFF 0.10 |
---|
53 | |
---|
54 | /* type definitions */ |
---|
55 | |
---|
56 | typedef struct node |
---|
57 | { |
---|
58 | struct node *isop; |
---|
59 | struct node *kinp; |
---|
60 | int descen; |
---|
61 | int number; |
---|
62 | double length; |
---|
63 | double lengthc; |
---|
64 | double varlen; |
---|
65 | double height; |
---|
66 | double varheight; |
---|
67 | ivector paths; |
---|
68 | cvector eprob; |
---|
69 | dcube partials; /* partial likelihoods */ |
---|
70 | char *label; /* internal labels */ |
---|
71 | } Node; |
---|
72 | |
---|
73 | typedef struct tree |
---|
74 | { |
---|
75 | Node *rootp; |
---|
76 | Node **ebrnchp; /* list of pointers to external branches */ |
---|
77 | Node **ibrnchp; /* list of pointers to internal branches */ |
---|
78 | double lklhd; /* total log-likelihood */ |
---|
79 | double lklhdc; /* total log-likelihood clock */ |
---|
80 | dmatrix condlkl; /* likelihoods for each pattern and non-zero rate */ |
---|
81 | double rssleast; |
---|
82 | } Tree; |
---|
83 | |
---|
84 | |
---|
85 | /* global variables */ |
---|
86 | |
---|
87 | EXTERN Node *chep; /* pointer to current height node */ |
---|
88 | EXTERN Node *rootbr; /* pointer to root branch */ |
---|
89 | EXTERN Node **heights; /* pointer to height nodes in unrooted tree */ |
---|
90 | EXTERN int Numhts; /* number of height nodes in unrooted tree */ |
---|
91 | EXTERN double hroot; /* height of root */ |
---|
92 | EXTERN double varhroot; /* variance of height of root */ |
---|
93 | EXTERN double maxhroot; /* maximal height of root */ |
---|
94 | EXTERN int locroot; /* location of root */ |
---|
95 | EXTERN int numbestroot; /* number of best locations for root */ |
---|
96 | EXTERN int clockmode; /* clocklike vs. nonclocklike computation */ |
---|
97 | EXTERN cmatrix Identif; /* sequence names */ |
---|
98 | EXTERN cmatrix Seqchar; /* ML sequence data */ |
---|
99 | EXTERN cmatrix Seqpat; /* ordered site patterns */ |
---|
100 | EXTERN ivector constpat; /* indicates constant site patterns */ |
---|
101 | EXTERN cvector seqchi; |
---|
102 | EXTERN cvector seqchj; |
---|
103 | EXTERN dcube partiali; |
---|
104 | EXTERN dcube partialj; |
---|
105 | EXTERN dcube ltprobr; /* transition probabilites (for all non-zero rates */ |
---|
106 | EXTERN dmatrix Distanmat; /* matrix with maximum likelihood distances */ |
---|
107 | EXTERN dmatrix Evec; /* Eigenvectors */ |
---|
108 | EXTERN dmatrix Ievc; /* Inverse eigenvectors */ |
---|
109 | EXTERN double TSparam; /* Ts/Tv parameter */ |
---|
110 | EXTERN double tsmean, yrmean; |
---|
111 | EXTERN double YRparam; /* Y/R Ts parameter */ |
---|
112 | EXTERN double geerr; /* estimated error of rate heterogeneity */ |
---|
113 | EXTERN double Geta; /* rate heterogeneity parameter */ |
---|
114 | EXTERN double fracconst; /* fraction of constant sites */ |
---|
115 | EXTERN double fracconstpat;/* fraction of constant patterns */ |
---|
116 | EXTERN double Proportion; /* for tree drawing */ |
---|
117 | EXTERN double tserr; /* estimated error of TSparam */ |
---|
118 | EXTERN double yrerr; /* estimated error of YRparam */ |
---|
119 | EXTERN double fracinv; /* fraction of invariable sites */ |
---|
120 | EXTERN double fierr; /* estimated error of fracinv */ |
---|
121 | EXTERN dvector Brnlength; |
---|
122 | EXTERN dvector Distanvec; |
---|
123 | EXTERN dvector Eval; /* Eigenvalues of 1 PAM rate matrix */ |
---|
124 | EXTERN dvector Freqtpm; /* base frequencies */ |
---|
125 | EXTERN dvector Rates; /* rate of each of the categories */ |
---|
126 | EXTERN dmatrix iexp; |
---|
127 | EXTERN imatrix Basecomp; /* base composition of each taxon */ |
---|
128 | EXTERN ivector usedtaxa; /* list needed in the input treefile procedure */ |
---|
129 | EXTERN int numtc; /* auxiliary variable for printing rooted tree */ |
---|
130 | EXTERN int qcalg_optn; /* use quartet subsampling algorithm */ |
---|
131 | EXTERN int approxp_optn; /* approximate parameter estimation */ |
---|
132 | EXTERN int chi2fail; /* flag for chi2 test */ |
---|
133 | EXTERN int Converg; /* flag for ML convergence (no clock) */ |
---|
134 | EXTERN int Convergc; /* flag for ML convergence (clock) */ |
---|
135 | EXTERN int data_optn; /* type of sequence input data */ |
---|
136 | EXTERN int Dayhf_optn; /* Dayhoff model */ |
---|
137 | EXTERN int HKY_optn; /* use HKY model */ |
---|
138 | EXTERN int Jtt_optn; /* JTT model */ |
---|
139 | EXTERN int blosum62_optn; /* BLOSUM 62 model */ |
---|
140 | EXTERN int mtrev_optn; /* mtREV model */ |
---|
141 | EXTERN int cprev_optn; /* cpREV model */ |
---|
142 | EXTERN int vtmv_optn; /* VT model */ |
---|
143 | EXTERN int wag_optn; /* WAG model */ |
---|
144 | EXTERN int Maxsite; /* number of ML characters per taxum */ |
---|
145 | EXTERN int Maxspc; /* number of sequences */ |
---|
146 | EXTERN int mlmode; /* quartet ML or user defined tree ML */ |
---|
147 | EXTERN int nuc_optn; /* nucleotide (4x4) models */ |
---|
148 | EXTERN int Numbrnch; /* number of branches of current tree */ |
---|
149 | EXTERN int numcats; /* number of rate categories */ |
---|
150 | EXTERN int Numconst; /* number of constant sites */ |
---|
151 | EXTERN int Numconstpat; /* number of constant patterns */ |
---|
152 | EXTERN int Numibrnch; /* number of internal branches of current tree */ |
---|
153 | EXTERN int Numitc; /* number of ML iterations assumning clock */ |
---|
154 | EXTERN int Numit; /* number of ML iterations if there is convergence */ |
---|
155 | EXTERN int Numptrn; /* number of site patterns */ |
---|
156 | EXTERN int Numspc; /* number of sequences of current tree */ |
---|
157 | EXTERN int optim_optn; /* optimize model parameters */ |
---|
158 | EXTERN int grate_optim; /* optimize Gamma rate heterogeneity parameter */ |
---|
159 | EXTERN int SH_optn; /* SH nucleotide (16x16) model */ |
---|
160 | EXTERN int TN_optn; /* use TN model */ |
---|
161 | EXTERN int tpmradix; /* number of different states */ |
---|
162 | EXTERN int fracinv_optim; /* optimize fraction of invariable sites */ |
---|
163 | EXTERN int typ_optn; /* type of PUZZLE analysis */ |
---|
164 | EXTERN ivector Weight; /* weight of each site pattern */ |
---|
165 | EXTERN Tree *Ctree; /* pointer to current tree */ |
---|
166 | EXTERN ulivector badtaxon; /* involment of each taxon in a bad quartet */ |
---|
167 | EXTERN int qca, qcb, qcc, qcd; /* quartet currently optimized */ |
---|
168 | EXTERN ivector Alias; /* link site -> corresponding site pattern */ |
---|
169 | EXTERN ivector bestrate; /* optimal assignment of rates to sequence sites */ |
---|
170 | |
---|
171 | EXTERN int bestratefound; |
---|
172 | |
---|
173 | /* function prototypes of all ml function */ |
---|
174 | |
---|
175 | void convfreq(dvector); |
---|
176 | void a_radixsort(cmatrix, ivector, int, int, int *); |
---|
177 | void condenceseq(cmatrix, ivector, cmatrix, ivector, int, int, int); |
---|
178 | void countconstantsites(cmatrix, ivector, int, int, int *, int*); |
---|
179 | void evaluateseqs(void); |
---|
180 | void elmhes(dmatrix, ivector, int); |
---|
181 | void eltran(dmatrix, dmatrix, ivector, int); |
---|
182 | void mcdiv(double, double, double, double, double *, double *); |
---|
183 | void hqr2(int, int, int, dmatrix, dmatrix, dvector, dvector); |
---|
184 | void onepamratematrix(dmatrix); |
---|
185 | void eigensystem(dvector, dmatrix); |
---|
186 | void luinverse(dmatrix, dmatrix, int); |
---|
187 | void checkevector(dmatrix, dmatrix, int); |
---|
188 | void tranprobmat(void); |
---|
189 | void tprobmtrx(double, dmatrix); |
---|
190 | double comptotloglkl(dmatrix); |
---|
191 | void allsitelkl(dmatrix, dvector); |
---|
192 | double pairlkl(double); |
---|
193 | double mldistance(int, int); |
---|
194 | void initdistan(void); |
---|
195 | void computedistan(void); |
---|
196 | void productpartials(Node *); |
---|
197 | void partialsinternal(Node *); |
---|
198 | void partialsexternal(Node *); |
---|
199 | void initpartials(Tree *); |
---|
200 | double intlkl(double); |
---|
201 | void optinternalbranch(Node *); |
---|
202 | double extlkl(double); |
---|
203 | void optexternalbranch(Node *); |
---|
204 | void finishlkl(Node *); |
---|
205 | double optlkl(Tree *); |
---|
206 | double treelkl(Tree *); |
---|
207 | void luequation(dmatrix, dvector, int); |
---|
208 | void lslength(Tree *, dvector, int, int, dvector); |
---|
209 | |
---|
210 | void getusertree(FILE *, cvector, int); |
---|
211 | Node *internalnode(Tree *, char **, int *); |
---|
212 | void constructtree(Tree *, cvector); |
---|
213 | void removebasalbif(cvector); |
---|
214 | void makeusertree(FILE *); |
---|
215 | Tree *new_tree(int, int, cmatrix); |
---|
216 | Tree *new_quartet(int, cmatrix); |
---|
217 | void free_tree(Tree *, int); |
---|
218 | void make_quartet(int, int, int, int); |
---|
219 | void changedistan(dmatrix, dvector, int); |
---|
220 | double quartet_lklhd(int, int, int, int); |
---|
221 | double quartet_alklhd(int, int, int, int); |
---|
222 | void readusertree(FILE *); |
---|
223 | double usertree_lklhd(void); |
---|
224 | double usertree_alklhd(void); |
---|
225 | void mlstart(void); |
---|
226 | void distupdate(int, int, int, int); |
---|
227 | void mlfinish(void); |
---|
228 | void prbranch(Node *, int, int, int, ivector, ivector, FILE *); |
---|
229 | void getproportion(double *, dvector, int); |
---|
230 | void prtopology(FILE *); |
---|
231 | void fputphylogeny(FILE *); |
---|
232 | void resulttree(FILE *); |
---|
233 | void njtree(FILE *); |
---|
234 | void njdistantree(Tree *); |
---|
235 | void findbestratecombination(void); |
---|
236 | void printbestratecombination(FILE *); |
---|
237 | int checkedge(int); |
---|
238 | void fputsubstree(FILE *, Node *); |
---|
239 | void fputrooted(FILE *, int); |
---|
240 | void findheights(Node *); |
---|
241 | void initclock(int); |
---|
242 | double clock_alklhd(int); |
---|
243 | double heightlkl(double); |
---|
244 | void optheight(void); |
---|
245 | double rheightlkl(double); |
---|
246 | void optrheight(void); |
---|
247 | double clock_lklhd(int); |
---|
248 | int findrootedge(void); |
---|
249 | void resultheights(FILE *); |
---|
250 | |
---|
251 | double homogentest(int); |
---|
252 | void YangDiscreteGamma(double, int, double *); |
---|
253 | void updaterates(void); |
---|
254 | void computestat(double *, int, double *, double *); |
---|
255 | double quartetml(int, int, int, int); |
---|
256 | double opttsq(double); |
---|
257 | double optyrq(double); |
---|
258 | void optimseqevolparamsq(void); |
---|
259 | double opttst(double); |
---|
260 | double optyrt(double); |
---|
261 | void optimseqevolparamst(void); |
---|
262 | double optfi(double); |
---|
263 | double optge(double); |
---|
264 | void optimrateparams(void); |
---|
265 | |
---|
266 | int gettpmradix(void); |
---|
267 | void rtfdata(dmatrix, double *); |
---|
268 | int code2int(cvector); |
---|
269 | char *int2code(int); |
---|
270 | |
---|
271 | void jttdata(dmatrix, double *); |
---|
272 | void dyhfdata(dmatrix, double *); |
---|
273 | void mtrevdata(dmatrix, double *); |
---|
274 | void cprev45data(dmatrix, double *); |
---|
275 | void blosum62data(dmatrix, double *); |
---|
276 | void vtmvdata(dmatrix, double *); |
---|
277 | void wagdata(dmatrix, double *); |
---|
278 | |
---|
279 | #endif |
---|