| 1 | /* |
|---|
| 2 | * ml.h |
|---|
| 3 | * |
|---|
| 4 | * |
|---|
| 5 | * Part of TREE-PUZZLE 5.0 (June 2000) |
|---|
| 6 | * |
|---|
| 7 | * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, |
|---|
| 8 | * M. Vingron, and Arndt von Haeseler |
|---|
| 9 | * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler |
|---|
| 10 | * |
|---|
| 11 | * All parts of the source except where indicated are distributed under |
|---|
| 12 | * the GNU public licence. See http://www.opensource.org for details. |
|---|
| 13 | */ |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | #ifndef _ML_ |
|---|
| 17 | #define _ML_ |
|---|
| 18 | |
|---|
| 19 | /* definitions */ |
|---|
| 20 | |
|---|
| 21 | #define MINTS 0.20 /* Ts/Tv parameter */ |
|---|
| 22 | #define MAXTS 30.0 |
|---|
| 23 | #define MINYR 0.10 /* Y/R Ts parameter */ |
|---|
| 24 | #define MAXYR 6.00 |
|---|
| 25 | #define MINFI 0.00 /* fraction invariable sites */ |
|---|
| 26 | #define MAXFI 0.99 /* only for input */ |
|---|
| 27 | #define MINGE 0.01 /* rate heterogeneity parameter */ |
|---|
| 28 | #define MAXGE 0.99 |
|---|
| 29 | #define MINCAT 4 /* discrete Gamma categories */ |
|---|
| 30 | #define MAXCAT 16 |
|---|
| 31 | |
|---|
| 32 | #define RMHROOT 5.0 /* upper relative bound for height of root */ |
|---|
| 33 | #define MAXARC 900.0 /* upper limit on branch length (PAM) = 6.0 */ |
|---|
| 34 | #define MINARC 0.001 /* lower limit on branch length (PAM) = 0.00001 */ |
|---|
| 35 | #define EPSILON 0.0001 /* error in branch length (PAM) = 0.000001 */ |
|---|
| 36 | #define HEPSILON 0.0001 /* error in node and root heights */ |
|---|
| 37 | #define MAXIT 100 /* maximum number of iterates of smoothing */ |
|---|
| 38 | #define MINFDIFF 0.00002 /* lower limit on base frequency differences */ |
|---|
| 39 | #define MINFREQ 0.0001 /* lower limit on base frequencies = 0.01% */ |
|---|
| 40 | #define NUMQBRNCH 5 /* number of branches in a quartet */ |
|---|
| 41 | #define NUMQIBRNCH 1 /* number of internal branches in a quartet */ |
|---|
| 42 | #define NUMQSPC 4 /* number of sequences in a quartet */ |
|---|
| 43 | |
|---|
| 44 | /* 2D minimisation */ |
|---|
| 45 | #define PEPS1 0.01 /* epsilon substitution process estimation */ |
|---|
| 46 | #define PEPS2 0.01 /* epsilon rate heterogeneity estimation */ |
|---|
| 47 | |
|---|
| 48 | /* quartet series */ |
|---|
| 49 | #define MINPERTAXUM 2 |
|---|
| 50 | #define MAXPERTAXUM 6 |
|---|
| 51 | #define TSDIFF 0.20 |
|---|
| 52 | #define YRDIFF 0.10 |
|---|
| 53 | |
|---|
| 54 | /* type definitions */ |
|---|
| 55 | |
|---|
| 56 | typedef struct node |
|---|
| 57 | { |
|---|
| 58 | struct node *isop; |
|---|
| 59 | struct node *kinp; |
|---|
| 60 | int descen; |
|---|
| 61 | int number; |
|---|
| 62 | double length; |
|---|
| 63 | double lengthc; |
|---|
| 64 | double varlen; |
|---|
| 65 | double height; |
|---|
| 66 | double varheight; |
|---|
| 67 | ivector paths; |
|---|
| 68 | cvector eprob; |
|---|
| 69 | dcube partials; /* partial likelihoods */ |
|---|
| 70 | char *label; /* internal labels */ |
|---|
| 71 | } Node; |
|---|
| 72 | |
|---|
| 73 | typedef struct tree |
|---|
| 74 | { |
|---|
| 75 | Node *rootp; |
|---|
| 76 | Node **ebrnchp; /* list of pointers to external branches */ |
|---|
| 77 | Node **ibrnchp; /* list of pointers to internal branches */ |
|---|
| 78 | double lklhd; /* total log-likelihood */ |
|---|
| 79 | double lklhdc; /* total log-likelihood clock */ |
|---|
| 80 | dmatrix condlkl; /* likelihoods for each pattern and non-zero rate */ |
|---|
| 81 | double rssleast; |
|---|
| 82 | } Tree; |
|---|
| 83 | |
|---|
| 84 | |
|---|
| 85 | /* global variables */ |
|---|
| 86 | |
|---|
| 87 | EXTERN Node *chep; /* pointer to current height node */ |
|---|
| 88 | EXTERN Node *rootbr; /* pointer to root branch */ |
|---|
| 89 | EXTERN Node **heights; /* pointer to height nodes in unrooted tree */ |
|---|
| 90 | EXTERN int Numhts; /* number of height nodes in unrooted tree */ |
|---|
| 91 | EXTERN double hroot; /* height of root */ |
|---|
| 92 | EXTERN double varhroot; /* variance of height of root */ |
|---|
| 93 | EXTERN double maxhroot; /* maximal height of root */ |
|---|
| 94 | EXTERN int locroot; /* location of root */ |
|---|
| 95 | EXTERN int numbestroot; /* number of best locations for root */ |
|---|
| 96 | EXTERN int clockmode; /* clocklike vs. nonclocklike computation */ |
|---|
| 97 | EXTERN cmatrix Identif; /* sequence names */ |
|---|
| 98 | EXTERN cmatrix Seqchar; /* ML sequence data */ |
|---|
| 99 | EXTERN cmatrix Seqpat; /* ordered site patterns */ |
|---|
| 100 | EXTERN ivector constpat; /* indicates constant site patterns */ |
|---|
| 101 | EXTERN cvector seqchi; |
|---|
| 102 | EXTERN cvector seqchj; |
|---|
| 103 | EXTERN dcube partiali; |
|---|
| 104 | EXTERN dcube partialj; |
|---|
| 105 | EXTERN dcube ltprobr; /* transition probabilites (for all non-zero rates */ |
|---|
| 106 | EXTERN dmatrix Distanmat; /* matrix with maximum likelihood distances */ |
|---|
| 107 | EXTERN dmatrix Evec; /* Eigenvectors */ |
|---|
| 108 | EXTERN dmatrix Ievc; /* Inverse eigenvectors */ |
|---|
| 109 | EXTERN double TSparam; /* Ts/Tv parameter */ |
|---|
| 110 | EXTERN double tsmean, yrmean; |
|---|
| 111 | EXTERN double YRparam; /* Y/R Ts parameter */ |
|---|
| 112 | EXTERN double geerr; /* estimated error of rate heterogeneity */ |
|---|
| 113 | EXTERN double Geta; /* rate heterogeneity parameter */ |
|---|
| 114 | EXTERN double fracconst; /* fraction of constant sites */ |
|---|
| 115 | EXTERN double fracconstpat;/* fraction of constant patterns */ |
|---|
| 116 | EXTERN double Proportion; /* for tree drawing */ |
|---|
| 117 | EXTERN double tserr; /* estimated error of TSparam */ |
|---|
| 118 | EXTERN double yrerr; /* estimated error of YRparam */ |
|---|
| 119 | EXTERN double fracinv; /* fraction of invariable sites */ |
|---|
| 120 | EXTERN double fierr; /* estimated error of fracinv */ |
|---|
| 121 | EXTERN dvector Brnlength; |
|---|
| 122 | EXTERN dvector Distanvec; |
|---|
| 123 | EXTERN dvector Eval; /* Eigenvalues of 1 PAM rate matrix */ |
|---|
| 124 | EXTERN dvector Freqtpm; /* base frequencies */ |
|---|
| 125 | EXTERN dvector Rates; /* rate of each of the categories */ |
|---|
| 126 | EXTERN dmatrix iexp; |
|---|
| 127 | EXTERN imatrix Basecomp; /* base composition of each taxon */ |
|---|
| 128 | EXTERN ivector usedtaxa; /* list needed in the input treefile procedure */ |
|---|
| 129 | EXTERN int numtc; /* auxiliary variable for printing rooted tree */ |
|---|
| 130 | EXTERN int qcalg_optn; /* use quartet subsampling algorithm */ |
|---|
| 131 | EXTERN int approxp_optn; /* approximate parameter estimation */ |
|---|
| 132 | EXTERN int chi2fail; /* flag for chi2 test */ |
|---|
| 133 | EXTERN int Converg; /* flag for ML convergence (no clock) */ |
|---|
| 134 | EXTERN int Convergc; /* flag for ML convergence (clock) */ |
|---|
| 135 | EXTERN int data_optn; /* type of sequence input data */ |
|---|
| 136 | EXTERN int Dayhf_optn; /* Dayhoff model */ |
|---|
| 137 | EXTERN int HKY_optn; /* use HKY model */ |
|---|
| 138 | EXTERN int Jtt_optn; /* JTT model */ |
|---|
| 139 | EXTERN int blosum62_optn; /* BLOSUM 62 model */ |
|---|
| 140 | EXTERN int mtrev_optn; /* mtREV model */ |
|---|
| 141 | EXTERN int cprev_optn; /* cpREV model */ |
|---|
| 142 | EXTERN int vtmv_optn; /* VT model */ |
|---|
| 143 | EXTERN int wag_optn; /* WAG model */ |
|---|
| 144 | EXTERN int Maxsite; /* number of ML characters per taxum */ |
|---|
| 145 | EXTERN int Maxspc; /* number of sequences */ |
|---|
| 146 | EXTERN int mlmode; /* quartet ML or user defined tree ML */ |
|---|
| 147 | EXTERN int nuc_optn; /* nucleotide (4x4) models */ |
|---|
| 148 | EXTERN int Numbrnch; /* number of branches of current tree */ |
|---|
| 149 | EXTERN int numcats; /* number of rate categories */ |
|---|
| 150 | EXTERN int Numconst; /* number of constant sites */ |
|---|
| 151 | EXTERN int Numconstpat; /* number of constant patterns */ |
|---|
| 152 | EXTERN int Numibrnch; /* number of internal branches of current tree */ |
|---|
| 153 | EXTERN int Numitc; /* number of ML iterations assumning clock */ |
|---|
| 154 | EXTERN int Numit; /* number of ML iterations if there is convergence */ |
|---|
| 155 | EXTERN int Numptrn; /* number of site patterns */ |
|---|
| 156 | EXTERN int Numspc; /* number of sequences of current tree */ |
|---|
| 157 | EXTERN int optim_optn; /* optimize model parameters */ |
|---|
| 158 | EXTERN int grate_optim; /* optimize Gamma rate heterogeneity parameter */ |
|---|
| 159 | EXTERN int SH_optn; /* SH nucleotide (16x16) model */ |
|---|
| 160 | EXTERN int TN_optn; /* use TN model */ |
|---|
| 161 | EXTERN int tpmradix; /* number of different states */ |
|---|
| 162 | EXTERN int fracinv_optim; /* optimize fraction of invariable sites */ |
|---|
| 163 | EXTERN int typ_optn; /* type of PUZZLE analysis */ |
|---|
| 164 | EXTERN ivector Weight; /* weight of each site pattern */ |
|---|
| 165 | EXTERN Tree *Ctree; /* pointer to current tree */ |
|---|
| 166 | EXTERN ulivector badtaxon; /* involment of each taxon in a bad quartet */ |
|---|
| 167 | EXTERN int qca, qcb, qcc, qcd; /* quartet currently optimized */ |
|---|
| 168 | EXTERN ivector Alias; /* link site -> corresponding site pattern */ |
|---|
| 169 | EXTERN ivector bestrate; /* optimal assignment of rates to sequence sites */ |
|---|
| 170 | |
|---|
| 171 | EXTERN int bestratefound; |
|---|
| 172 | |
|---|
| 173 | /* function prototypes of all ml function */ |
|---|
| 174 | |
|---|
| 175 | void convfreq(dvector); |
|---|
| 176 | void a_radixsort(cmatrix, ivector, int, int, int *); |
|---|
| 177 | void condenceseq(cmatrix, ivector, cmatrix, ivector, int, int, int); |
|---|
| 178 | void countconstantsites(cmatrix, ivector, int, int, int *, int*); |
|---|
| 179 | void evaluateseqs(void); |
|---|
| 180 | void elmhes(dmatrix, ivector, int); |
|---|
| 181 | void eltran(dmatrix, dmatrix, ivector, int); |
|---|
| 182 | void mcdiv(double, double, double, double, double *, double *); |
|---|
| 183 | void hqr2(int, int, int, dmatrix, dmatrix, dvector, dvector); |
|---|
| 184 | void onepamratematrix(dmatrix); |
|---|
| 185 | void eigensystem(dvector, dmatrix); |
|---|
| 186 | void luinverse(dmatrix, dmatrix, int); |
|---|
| 187 | void checkevector(dmatrix, dmatrix, int); |
|---|
| 188 | void tranprobmat(void); |
|---|
| 189 | void tprobmtrx(double, dmatrix); |
|---|
| 190 | double comptotloglkl(dmatrix); |
|---|
| 191 | void allsitelkl(dmatrix, dvector); |
|---|
| 192 | double pairlkl(double); |
|---|
| 193 | double mldistance(int, int); |
|---|
| 194 | void initdistan(void); |
|---|
| 195 | void computedistan(void); |
|---|
| 196 | void productpartials(Node *); |
|---|
| 197 | void partialsinternal(Node *); |
|---|
| 198 | void partialsexternal(Node *); |
|---|
| 199 | void initpartials(Tree *); |
|---|
| 200 | double intlkl(double); |
|---|
| 201 | void optinternalbranch(Node *); |
|---|
| 202 | double extlkl(double); |
|---|
| 203 | void optexternalbranch(Node *); |
|---|
| 204 | void finishlkl(Node *); |
|---|
| 205 | double optlkl(Tree *); |
|---|
| 206 | double treelkl(Tree *); |
|---|
| 207 | void luequation(dmatrix, dvector, int); |
|---|
| 208 | void lslength(Tree *, dvector, int, int, dvector); |
|---|
| 209 | |
|---|
| 210 | void getusertree(FILE *, cvector, int); |
|---|
| 211 | Node *internalnode(Tree *, char **, int *); |
|---|
| 212 | void constructtree(Tree *, cvector); |
|---|
| 213 | void removebasalbif(cvector); |
|---|
| 214 | void makeusertree(FILE *); |
|---|
| 215 | Tree *new_tree(int, int, cmatrix); |
|---|
| 216 | Tree *new_quartet(int, cmatrix); |
|---|
| 217 | void free_tree(Tree *, int); |
|---|
| 218 | void make_quartet(int, int, int, int); |
|---|
| 219 | void changedistan(dmatrix, dvector, int); |
|---|
| 220 | double quartet_lklhd(int, int, int, int); |
|---|
| 221 | double quartet_alklhd(int, int, int, int); |
|---|
| 222 | void readusertree(FILE *); |
|---|
| 223 | double usertree_lklhd(void); |
|---|
| 224 | double usertree_alklhd(void); |
|---|
| 225 | void mlstart(void); |
|---|
| 226 | void distupdate(int, int, int, int); |
|---|
| 227 | void mlfinish(void); |
|---|
| 228 | void prbranch(Node *, int, int, int, ivector, ivector, FILE *); |
|---|
| 229 | void getproportion(double *, dvector, int); |
|---|
| 230 | void prtopology(FILE *); |
|---|
| 231 | void fputphylogeny(FILE *); |
|---|
| 232 | void resulttree(FILE *); |
|---|
| 233 | void njtree(FILE *); |
|---|
| 234 | void njdistantree(Tree *); |
|---|
| 235 | void findbestratecombination(void); |
|---|
| 236 | void printbestratecombination(FILE *); |
|---|
| 237 | int checkedge(int); |
|---|
| 238 | void fputsubstree(FILE *, Node *); |
|---|
| 239 | void fputrooted(FILE *, int); |
|---|
| 240 | void findheights(Node *); |
|---|
| 241 | void initclock(int); |
|---|
| 242 | double clock_alklhd(int); |
|---|
| 243 | double heightlkl(double); |
|---|
| 244 | void optheight(void); |
|---|
| 245 | double rheightlkl(double); |
|---|
| 246 | void optrheight(void); |
|---|
| 247 | double clock_lklhd(int); |
|---|
| 248 | int findrootedge(void); |
|---|
| 249 | void resultheights(FILE *); |
|---|
| 250 | |
|---|
| 251 | double homogentest(int); |
|---|
| 252 | void YangDiscreteGamma(double, int, double *); |
|---|
| 253 | void updaterates(void); |
|---|
| 254 | void computestat(double *, int, double *, double *); |
|---|
| 255 | double quartetml(int, int, int, int); |
|---|
| 256 | double opttsq(double); |
|---|
| 257 | double optyrq(double); |
|---|
| 258 | void optimseqevolparamsq(void); |
|---|
| 259 | double opttst(double); |
|---|
| 260 | double optyrt(double); |
|---|
| 261 | void optimseqevolparamst(void); |
|---|
| 262 | double optfi(double); |
|---|
| 263 | double optge(double); |
|---|
| 264 | void optimrateparams(void); |
|---|
| 265 | |
|---|
| 266 | int gettpmradix(void); |
|---|
| 267 | void rtfdata(dmatrix, double *); |
|---|
| 268 | int code2int(cvector); |
|---|
| 269 | const char *int2code(int); |
|---|
| 270 | |
|---|
| 271 | void jttdata(dmatrix, double *); |
|---|
| 272 | void dyhfdata(dmatrix, double *); |
|---|
| 273 | void mtrevdata(dmatrix, double *); |
|---|
| 274 | void cprev45data(dmatrix, double *); |
|---|
| 275 | void blosum62data(dmatrix, double *); |
|---|
| 276 | void vtmvdata(dmatrix, double *); |
|---|
| 277 | void wagdata(dmatrix, double *); |
|---|
| 278 | |
|---|
| 279 | #endif |
|---|