source: trunk/GDE/TREEPUZZLE/src/puzzle.h

Last change on this file was 19480, checked in by westram, 15 months ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1/*
2 * puzzle.h
3 *
4 *
5 * Part of TREE-PUZZLE 5.0 (June 2000)
6 *
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 *                  M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10 *
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence.  See http://www.opensource.org for details.
13 */
14
15
16#ifndef _PUZZLE_
17#define _PUZZLE_
18
19#ifndef PACKAGE
20#  define PACKAGE    "tree-puzzle"
21#endif
22#ifndef VERSION
23#  define VERSION    "5.0"
24#endif
25#define DATE       "October 2000"
26
27/* prototypes */
28#include <stdio.h>
29#include <stdlib.h>
30#include <time.h>
31#include <math.h>
32#include <ctype.h>
33#include <string.h>
34#include <limits.h>
35#include <float.h>
36#include "util.h"
37#include "ml.h"
38#ifdef PARALLEL
39#  include "ppuzzle.h"
40#endif
41
42#define STDOUT stdout
43#ifndef PARALLEL        /* because printf() runs significantly faster */
44                        /* than fprintf(stdout) on an Apple McIntosh  */
45                        /* (HS) */
46#       define FPRINTF    printf
47#       define STDOUTFILE
48#else
49#       define FPRINTF    fprintf
50#       define STDOUTFILE STDOUT,
51#endif
52
53/* filenames */
54#  define FILENAMELENTH 2048
55
56
57#  define INFILEDEFAULT     "infile"
58#  define OUTFILEDEFAULT    "outfile"
59#  define TREEFILEDEFAULT   "outtree"
60#  define INTREEDEFAULT     "intree"
61#  define DISTANCESDEFAULT  "outdist"
62#  define TRIANGLEDEFAULT   "outlm.eps"
63#  define UNRESOLVEDDEFAULT "outqlist"
64#  define ALLQUARTDEFAULT   "outallquart"
65#  define ALLQUARTLHDEFAULT "outallquartlh"
66#  define OUTPTLISTDEFAULT  "outpstep"
67#  define OUTPTORDERDEFAULT "outptorder"
68
69#  define INFILE     infilename
70#  define OUTFILE    outfilename
71#  define TREEFILE   outtreename
72#  define INTREE     intreename
73#  define DISTANCES  outdistname
74#  define TRIANGLE   outlmname
75#  define UNRESOLVED outqlistname
76#  define ALLQUART   outallquartname
77#  define ALLQUARTLH outallquartlhname
78#  define OUTPTLIST  outpstepname
79#  define OUTPTORDER outptordername
80
81EXTERN char infilename        [FILENAMELENTH];
82EXTERN char outfilename       [FILENAMELENTH];
83EXTERN char outtreename       [FILENAMELENTH];
84EXTERN char intreename        [FILENAMELENTH];
85EXTERN char outdistname       [FILENAMELENTH];
86EXTERN char outlmname         [FILENAMELENTH];
87EXTERN char outqlistname      [FILENAMELENTH];
88EXTERN char outallquartname   [FILENAMELENTH];
89EXTERN char outallquartlhname [FILENAMELENTH];
90EXTERN char outpstepname      [FILENAMELENTH];
91EXTERN char outptordername    [FILENAMELENTH];
92
93#define OUTFILEEXT    "puzzle"
94#define TREEFILEEXT   "tree"
95#define DISTANCESEXT  "dist"
96#define TRIANGLEEXT   "eps"
97#define UNRESOLVEDEXT "qlist"
98#define ALLQUARTEXT   "allquart"
99#define ALLQUARTLHEXT "allquartlh"
100#define OUTPTLISTEXT  "pstep"
101#define OUTPTORDEREXT "ptorder"
102
103#ifndef PARALLEL             /* because printf() runs significantly faster */
104                             /* than fprintf(stdout) on an Apple McIntosh  */
105                             /* (HS) */
106#       define FPRINTF    printf
107#       define STDOUTFILE
108#else
109#       define FPRINTF    fprintf
110#       define STDOUT     stdout
111#       define STDOUTFILE STDOUT,
112#endif
113
114
115/* auto_aamodel/auto_datatype values  (xxx) */
116#define AUTO_OFF      0
117#define AUTO_GUESS    1
118#define AUTO_DEFAULT  2
119
120
121/* qptlist values  (xxx) */
122#define PSTOUT_NONE      0
123#define PSTOUT_ORDER     1
124#define PSTOUT_LISTORDER 2
125#define PSTOUT_LIST      3
126
127/* dtat_optn values  (xxx) */
128#define NUCLEOTIDE 0
129#define AMINOACID  1
130#define BINARY     2
131
132/* typ_optn values  (xxx) */
133#define LIKMAPING_OPTN 1
134#define TREERECON_OPTN 0
135
136/* puzzlemodes (xxx) */
137#define QUARTPUZ 0
138#define USERTREE 1
139#define PAIRDIST 2
140
141/* rhetmodes (xxx) Modes of rate heterogeneity */
142#define UNIFORMRATE 0
143#define GAMMARATE   1
144#define TWORATE     2
145#define MIXEDRATE   3
146
147/* defines for types of quartet likelihood computation (xxx) */
148#define EXACT  0
149#define APPROX 1
150
151/* tree structure */
152typedef struct oneedge {
153        /* pointer to other three edges */
154        struct oneedge *up;
155        struct oneedge *downleft;
156        struct oneedge *downright;
157        int numedge;    /* number of edge */
158        uli edgeinfo;   /* value of this edge */
159        int *edgemap;   /* pointer to the local edgemap */
160} ONEEDGE;
161
162
163/* variables */
164EXTERN cmatrix biparts;      /* bipartitions of tree of current puzzling step */
165EXTERN cmatrix consbiparts;  /* bipartitions of majority rule consensus tree */
166EXTERN cmatrix seqchars;     /* characters contained in data set */
167EXTERN cmatrix treepict;     /* picture of consensus tree */
168EXTERN double minscore;      /* value of edgescore on minedge */
169EXTERN double tstvf84;       /* F84 transition/transversion ratio */
170EXTERN double tstvratio;     /* expected transition/transversion ratio */
171EXTERN double yrtsratio;     /* expected pyrimidine/purine transition ratio */
172EXTERN dvector ulkl;         /* log L of user trees */
173EXTERN dmatrix allsites;     /* log L per sites of user trees */
174EXTERN dvector ulklc;        /* log L of user trees (clock) */
175EXTERN dmatrix allsitesc;    /* log L per sites of user trees (clock) */
176EXTERN FILE *utfp;           /* pointer to user tree file */
177EXTERN FILE *seqfp;          /* pointer to sequence input file */
178EXTERN FILE *tfp;            /* pointer to tree file */
179EXTERN FILE *dfp;            /* pointer to distance file */
180EXTERN FILE *trifp;          /* pointer to triangle file */
181EXTERN FILE *unresfp;        /* pointer to file with unresolved quartets */
182EXTERN FILE *tmpfp;          /* pointer to temporary file */
183EXTERN FILE *qptlist;        /* pointer to file with puzzling step trees */
184EXTERN FILE *qptorder;       /* pointer to file with unique puzzling step trees */
185EXTERN int SHcodon;          /* whether SH should be applied to 1st, 2nd codon positions */
186EXTERN int utree_optn;       /* use first user tree for estimation */
187EXTERN int listqptrees;      /* list puzzling step trees */
188EXTERN int approxqp;         /* approximate QP quartets */
189EXTERN int *edgeofleaf;      /* vector with edge number of all leaves */
190EXTERN int codon_optn;       /* declares what positions in a codon should be used */
191EXTERN int compclock;        /* computation of clocklike branch lengths */
192EXTERN int chooseA;          /* leaf variable */
193EXTERN int chooseB;          /* leaf variable */
194EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */
195EXTERN int column;           /* used for breaking lines (writing tree to treefile) */
196EXTERN int Frequ_optn;       /* use empirical base frequencies */
197EXTERN int Maxbrnch;         /* 2*Maxspc - 3 */
198EXTERN int Maxseqc;          /* number of sequence characters per taxum */
199EXTERN int mflag;            /* flag used for correct printing of runtime messages */
200EXTERN int minedge;          /* edge with minimum edgeinfo */
201EXTERN int nextedge;         /* number of edges in the current tree */
202EXTERN int nextleaf;         /* next leaf to add to tree */
203EXTERN int numclust;         /* number of clusters in LM analysis */
204EXTERN int outgroup;         /* outgroup */
205EXTERN int puzzlemode;       /* computation of QP tree and/or ML distances */
206EXTERN int rootsearch;       /* how location of root is found */
207EXTERN int rhetmode;         /* model of rate heterogeneity */
208EXTERN int splitlength;      /* length of one entry in splitpatterns */
209EXTERN int *splitsizes;      /* size of all different splits of all trees */
210EXTERN int usebestq_optn;    /* use only best quartet topology, no bayesian weights */
211EXTERN int show_optn;        /* show unresolved quartets                        */
212EXTERN int savequart_optn;   /* save memory block which quartets to file */
213EXTERN int savequartlh_optn; /* save quartet likelihoods to file */
214EXTERN int saveqlhbin_optn;  /* save quartet likelihoods binary */
215EXTERN int readquart_optn;   /* read memory block which quartets from file */
216EXTERN int sym_optn;         /* symmetrize doublet frequencies */
217EXTERN int xsize;            /* depth of consensus tree picture */
218EXTERN int ytaxcounter;      /* counter for establishing y-coordinates of all taxa */
219EXTERN int numutrees;        /* number of users trees in input tree file */
220EXTERN ivector clusterA, clusterB, clusterC, clusterD;  /* clusters for LM analysis */
221EXTERN ivector consconfid;   /* confidence values of majority rule consensus tree */
222EXTERN ivector conssizes;    /* partition sizes of majority rule consensus tree */
223EXTERN ivector trueID;       /* leaf -> taxon on this leaf */
224EXTERN ivector xcor;         /* x-coordinates of consensus tree nodes */
225EXTERN ivector ycor;         /* y-coordinates of consensus tree nodes */
226EXTERN ivector ycormax;      /* maximal y-coordinates of consensus tree nodes */
227EXTERN ivector ycormin;      /* minimal y-coordinates of consensus tree nodes */
228EXTERN ivector ycortax;      /* y-coordinates of all taxa */
229EXTERN ONEEDGE *edge;        /* vector with all the edges of the tree */
230EXTERN uli *splitcomp;       /* bipartition storage */
231EXTERN uli *splitfreqs;      /* frequencies of all different splits of all trees */
232EXTERN uli *splitpatterns;   /* all different splits of all trees */
233EXTERN uli badqs;            /* number of bad quartets */
234EXTERN uli consincluded;     /* number of included biparts in the consensus tree */
235EXTERN uli Currtrial;        /* counter for puzzling steps */
236EXTERN uli maxbiparts;       /* space is reserved for that many bipartitions */
237EXTERN uli mininfo;          /* value of edgeinfo on minedge */
238EXTERN uli numbiparts;       /* number of different bipartitions */
239EXTERN uli Numquartets;      /* number of quartets */
240EXTERN uli Numtrial;         /* number of puzzling steps */
241EXTERN uli lmqts;            /* quartets investigated in LM analysis (0 = ALL) */
242
243EXTERN int auto_datatype;       /* guess datatype ? */
244EXTERN int guessdata_optn;      /* guessed datatype */
245
246EXTERN int auto_aamodel;        /* guess amino acid modell ? */
247EXTERN int guessauto_aamodel;   /* guessed amino acid modell ? */
248EXTERN int guessDayhf_optn;     /* guessed Dayhoff model option */
249EXTERN int guessJtt_optn;       /* guessed JTT model option */
250EXTERN int guessblosum62_optn;  /* guessed BLOSUM 62 model option */
251EXTERN int guessmtrev_optn;     /* guessed mtREV model option */
252EXTERN int guesscprev_optn;     /* guessed cpREV model option */
253EXTERN int guessvtmv_optn;      /* guessed VT model option */
254EXTERN int guesswag_optn;       /* guessed WAG model option */
255
256/* counter variables needed in likelihood mapping analysis */
257EXTERN uli ar1, ar2, ar3;
258EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7;
259EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d,
260 reg4u, reg4d, reg5l, reg5r, reg6u, reg6d;
261EXTERN unsigned char *quartetinfo; /* place where quartets are stored */
262EXTERN dvector qweight; /* for use in QP and LM analysis */
263EXTERN dvector sqdiff;
264EXTERN ivector qworder;
265EXTERN ivector sqorder;
266
267EXTERN int randseed;
268EXTERN int psteptreestrlen;
269
270typedef struct treelistitemtypedummy {
271        struct treelistitemtypedummy *pred;
272        struct treelistitemtypedummy *succ;
273        struct treelistitemtypedummy *sortnext;
274        struct treelistitemtypedummy *sortlast;
275        char  *tree;
276        int    count;
277        int    id;
278        int    idx;
279} treelistitemtype;
280
281EXTERN treelistitemtype *psteptreelist;
282EXTERN treelistitemtype *psteptreesortlist;
283EXTERN int               psteptreenum;
284EXTERN int               psteptreesum;
285
286
287/* prototypes */
288void makeF84model(void);
289void compnumqts(void);
290void setoptions(void);
291void openfiletoread(FILE **, const char[], const char[]);
292void openfiletowrite(FILE **, const char[], const char[]);
293void openfiletoappend(FILE **, const char[], const char[]);
294void closefile(FILE *);
295void symdoublets(void);
296void computeexpectations(void);
297void putdistance(FILE *);
298void findidenticals(FILE *);
299double averagedist(void);
300void initps(FILE *);
301void plotlmpoint(FILE *, double, double);
302void finishps(FILE *);
303void makelmpoint(FILE *, double, double, double);
304void printtreestats(FILE *);
305void timestamp(FILE *);
306void writeoutputfile(FILE *, int);
307
308/* definitions for writing output */
309#define WRITEALL    0
310#define WRITEPARAMS 1
311#define WRITEREST   2
312
313void writetimesstat(FILE *ofp);
314void writecutree(FILE *, int);
315void starttimer(void);
316void checktimer(uli);
317void estimateparametersnotree(void);
318void estimateparameterstree(void);
319int main(int, char *[]);
320int ulicmp(const void *, const void *);
321int intcmp(const void *, const void *);
322
323void readid(FILE *, int);
324char readnextcharacter(FILE *, int, int);
325void skiprestofline(FILE *, int, int);
326void skipcntrl(FILE *, int, int);
327void getseqs(FILE *);
328void initid(int);
329void fputid10(FILE *, int);
330int fputid(FILE *, int);
331void getsizesites(FILE *);
332void getdataset(FILE *);
333int guessdatatype(void);
334void translatedataset(void);
335void estimatebasefreqs(void);
336void guessmodel(void);
337void inittree(void);
338void addnextleaf(int);
339void freetree(void);
340void writeOTU(FILE *, int);
341void writetree(FILE *);
342int *initctree();
343void copytree(int *ctree);
344void freectree(int **snodes);
345void printctree(int *ctree);
346char *sprintfctree(int *ctree, int strlen);
347void fprintffullpstree(FILE *outf, char *treestr);
348int printfsortctree(int *ctree);
349int sortctree(int *ctree);
350int ct_1stedge(int node);
351int ct_2ndedge(int node);
352int ct_3rdedge(int node);
353
354void printfpstrees(treelistitemtype *list);
355void printfsortedpstrees(treelistitemtype *list);
356void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff);
357
358void sortbynum(treelistitemtype *list, treelistitemtype **sortlist);
359treelistitemtype *addtree2list(char             **tree,
360                               int                numtrees,
361                               treelistitemtype **list,
362                               int               *numitems,
363                               int               *numsum);
364void freetreelist(treelistitemtype **list,
365                  int               *numitems,
366                  int               *numsum);
367void resetedgeinfo(void);
368void incrementedgeinfo(int, int);
369void minimumedgeinfo(void);
370void initconsensus(void);
371void makepart(int, int);
372void computebiparts(void);
373void printsplit(FILE *, uli);
374void makenewsplitentries(void);
375void copysplit(uli, int);
376void makeconsensus(void);
377void writenode(FILE *, int);
378void writeconsensustree(FILE *);
379void nodecoordinates(int);
380void drawnode(int, int);
381void plotconsensustree(FILE *);
382unsigned char *mallocquartets(int);
383void freequartets(void);
384unsigned char readquartet(int, int, int, int);
385void writequartet(int, int, int, int, unsigned char);
386void sort3doubles(dvector, ivector);
387void computeallquartets(void);
388void checkquartet(int, int, int, int);
389void num2quart(uli qnum, int *a, int *b, int *c, int *d);
390uli numquarts(int maxspc);
391uli quart2num (int a, int b, int c, int d);
392
393void writetpqfheader(int nspec, FILE *ofp, int flag);
394
395
396/* extracted from main (xxx) */
397void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx);
398
399
400/* definitions for timing */
401
402#define OVERALL   0
403#define GENERAL   1
404#define OPTIONS   2
405#define PARAMEST  3
406#define QUARTETS  4
407#define PUZZLING  5
408#define TREEEVAL  6
409
410typedef struct {
411        int      currentjob;
412        clock_t  tempcpu;
413        clock_t  tempfullcpu;
414        clock_t  tempcpustart;
415        time_t   temptime;
416        time_t   tempfulltime;
417        time_t   temptimestart;
418
419        clock_t  maxcpu;
420        clock_t  mincpu;
421        time_t   maxtime;
422        time_t   mintime;
423
424        double   maxcpublock;
425        double   mincpublock;
426        double   mincputick;
427        double   mincputicktime;
428        double   maxtimeblock;
429        double   mintimeblock;
430
431        double   generalcpu;
432        double   optionscpu;
433        double   paramestcpu;
434        double   quartcpu;
435        double   quartblockcpu;
436        double   quartmaxcpu;
437        double   quartmincpu;
438        double   puzzcpu;
439        double   puzzblockcpu;
440        double   puzzmaxcpu;
441        double   puzzmincpu;
442        double   treecpu;
443        double   treeblockcpu;
444        double   treemaxcpu;
445        double   treemincpu;
446        double   cpu;
447        double   fullcpu;
448
449        double   generaltime;
450        double   optionstime;
451        double   paramesttime;
452        double   quarttime;
453        double   quartblocktime;
454        double   quartmaxtime;
455        double   quartmintime;
456        double   puzztime;
457        double   puzzblocktime;
458        double   puzzmaxtime;
459        double   puzzmintime;
460        double   treetime;
461        double   treeblocktime;
462        double   treemaxtime;
463        double   treemintime;
464        double   time;
465        double   fulltime;
466} timearray_t;
467
468EXTERN double cputime, walltime;
469EXTERN double fullcpu, fulltime;
470EXTERN double fullcputime, fullwalltime;
471EXTERN double altcputime, altwalltime;
472EXTERN clock_t cputimestart,  cputimestop, cputimedummy;
473EXTERN time_t  walltimestart, walltimestop, walltimedummy;
474EXTERN clock_t Startcpu;     /* start cpu time */
475EXTERN clock_t Stopcpu;      /* stop cpu time */
476EXTERN time_t Starttime;     /* start time */
477EXTERN time_t Stoptime;      /* stop time */
478EXTERN time_t time0;         /* timer variable */
479EXTERN time_t time1;         /* yet another timer */
480EXTERN time_t time2;         /* yet another timer */
481EXTERN timearray_t tarr;
482
483void resetqblocktime(timearray_t *ta);
484void resetpblocktime(timearray_t *ta);
485void inittimearr(timearray_t *ta);
486void addtimes(int jobtype, timearray_t *ta);
487#ifdef TIMEDEBUG
488  void printtimearr(timearray_t *ta);
489#endif /* TIMEDEBUG */
490
491#endif /* _PUZZLE_ */
492
Note: See TracBrowser for help on using the repository browser.