source: branches/items/GDE/TREEPUZZLE/src/puzzle.h

Last change on this file was 191, checked in by jobb, 24 years ago

treepuzzle

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 16.7 KB
Line 
1/*
2 * puzzle.h
3 *
4 *
5 * Part of TREE-PUZZLE 5.0 (June 2000)
6 *
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 *                  M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10 *
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence.  See http://www.opensource.org for details.
13 */
14
15
16#ifndef _PUZZLE_
17#define _PUZZLE_
18
19#ifndef PACKAGE
20#  define PACKAGE    "tree-puzzle"
21#endif
22#ifndef VERSION
23#  define VERSION    "5.0"
24#endif
25#define DATE       "October 2000"
26
27/* prototypes */
28#include <stdio.h>
29#include <stdlib.h>
30#include <time.h>
31#include <math.h>
32#include <ctype.h>
33#include <string.h>
34#include <limits.h>
35#include <float.h>
36#include "util.h"
37#include "ml.h"
38#ifdef PARALLEL
39#  include "ppuzzle.h"
40#endif
41
42#define STDOUT stdout
43#ifndef PARALLEL        /* because printf() runs significantly faster */
44                        /* than fprintf(stdout) on an Apple McIntosh  */
45                        /* (HS) */
46#       define FPRINTF    printf
47#       define STDOUTFILE
48#else
49#       define FPRINTF    fprintf
50#       define STDOUTFILE STDOUT,
51#endif
52
53/* filenames */
54#  define FILENAMELENTH 2048
55
56
57#  define INFILEDEFAULT     "infile"
58#  define OUTFILEDEFAULT    "outfile"
59#  define TREEFILEDEFAULT   "outtree"
60#  define INTREEDEFAULT     "intree"
61#  define DISTANCESDEFAULT  "outdist"
62#  define TRIANGLEDEFAULT   "outlm.eps"
63#  define UNRESOLVEDDEFAULT "outqlist"
64#  define ALLQUARTDEFAULT   "outallquart"
65#  define ALLQUARTLHDEFAULT "outallquartlh"
66#  define OUTPTLISTDEFAULT  "outpstep"
67#  define OUTPTORDERDEFAULT "outptorder"
68
69#  define INFILE     infilename
70#  define OUTFILE    outfilename
71#  define TREEFILE   outtreename
72#  define INTREE     intreename
73#  define DISTANCES  outdistname
74#  define TRIANGLE   outlmname
75#  define UNRESOLVED outqlistname
76#  define ALLQUART   outallquartname
77#  define ALLQUARTLH outallquartlhname
78#  define OUTPTLIST  outpstepname
79#  define OUTPTORDER outptordername
80
81EXTERN char infilename        [FILENAMELENTH];
82EXTERN char outfilename       [FILENAMELENTH];
83EXTERN char outtreename       [FILENAMELENTH];
84EXTERN char intreename        [FILENAMELENTH];
85EXTERN char outdistname       [FILENAMELENTH];
86EXTERN char outlmname         [FILENAMELENTH];
87EXTERN char outqlistname      [FILENAMELENTH];
88EXTERN char outallquartname   [FILENAMELENTH];
89EXTERN char outallquartlhname [FILENAMELENTH];
90EXTERN char outpstepname      [FILENAMELENTH];
91EXTERN char outptordername    [FILENAMELENTH];
92
93#define OUTFILEEXT    "puzzle"
94#define TREEFILEEXT   "tree"
95#define DISTANCESEXT  "dist"
96#define TRIANGLEEXT   "eps"
97#define UNRESOLVEDEXT "qlist"
98#define ALLQUARTEXT   "allquart"
99#define ALLQUARTLHEXT "allquartlh"
100#define OUTPTLISTEXT  "pstep"
101#define OUTPTORDEREXT "ptorder"
102
103#ifndef PARALLEL             /* because printf() runs significantly faster */
104                             /* than fprintf(stdout) on an Apple McIntosh  */
105                             /* (HS) */
106#       define FPRINTF    printf
107#       define STDOUTFILE
108#else
109#       define FPRINTF    fprintf
110#       define STDOUT     stdout
111#       define STDOUTFILE STDOUT,
112#endif
113
114
115/* auto_aamodel/auto_datatype values  (xxx) */
116#define AUTO_OFF      0
117#define AUTO_GUESS    1
118#define AUTO_DEFAULT  2
119
120
121/* qptlist values  (xxx) */
122#define PSTOUT_NONE      0
123#define PSTOUT_ORDER     1
124#define PSTOUT_LISTORDER 2
125#define PSTOUT_LIST      3
126
127/* dtat_optn values  (xxx) */
128#define NUCLEOTIDE 0
129#define AMINOACID  1
130#define BINARY     2
131
132/* typ_optn values  (xxx) */
133#define LIKMAPING_OPTN 1
134#define TREERECON_OPTN 0
135
136/* puzzlemodes (xxx) */
137#define QUARTPUZ 0
138#define USERTREE 1
139#define PAIRDIST 2
140
141/* rhetmodes (xxx) Modes of rate heterogeneity */
142#define UNIFORMRATE 0
143#define GAMMARATE   1
144#define TWORATE     2
145#define MIXEDRATE   3
146
147/* defines for types of quartet likelihood computation (xxx) */
148#define EXACT  0
149#define APPROX 1
150
151/* tree structure */
152typedef struct oneedge {
153        /* pointer to other three edges */
154        struct oneedge *up;
155        struct oneedge *downleft;
156        struct oneedge *downright;
157        int numedge;    /* number of edge */
158        uli edgeinfo;   /* value of this edge */
159        int *edgemap;   /* pointer to the local edgemap */
160} ONEEDGE;
161
162
163/* variables */
164EXTERN cmatrix biparts;      /* bipartitions of tree of current puzzling step */
165EXTERN cmatrix consbiparts;  /* bipartitions of majority rule consensus tree */
166EXTERN cmatrix seqchars;     /* characters contained in data set */
167EXTERN cmatrix treepict;     /* picture of consensus tree */
168EXTERN double minscore;      /* value of edgescore on minedge */
169EXTERN double tstvf84;       /* F84 transition/transversion ratio */
170EXTERN double tstvratio;     /* expected transition/transversion ratio */
171EXTERN double yrtsratio;     /* expected pyrimidine/purine transition ratio */
172EXTERN dvector ulkl;         /* log L of user trees */
173EXTERN dmatrix allsites;     /* log L per sites of user trees */
174EXTERN dvector ulklc;        /* log L of user trees (clock) */
175EXTERN dmatrix allsitesc;    /* log L per sites of user trees (clock) */
176EXTERN FILE *utfp;           /* pointer to user tree file */
177EXTERN FILE *ofp;            /* pointer to output file */
178EXTERN FILE *seqfp;          /* pointer to sequence input file */
179EXTERN FILE *tfp;            /* pointer to tree file */
180EXTERN FILE *dfp;            /* pointer to distance file */
181EXTERN FILE *trifp;          /* pointer to triangle file */
182EXTERN FILE *unresfp;        /* pointer to file with unresolved quartets */
183EXTERN FILE *tmpfp;          /* pointer to temporary file */
184EXTERN FILE *qptlist;        /* pointer to file with puzzling step trees */
185EXTERN FILE *qptorder;       /* pointer to file with unique puzzling step trees */
186EXTERN int SHcodon;          /* whether SH should be applied to 1st, 2nd codon positions */
187EXTERN int utree_optn;       /* use first user tree for estimation */
188EXTERN int listqptrees;      /* list puzzling step trees */
189EXTERN int approxqp;         /* approximate QP quartets */
190EXTERN int *edgeofleaf;      /* vector with edge number of all leaves */
191EXTERN int codon_optn;       /* declares what positions in a codon should be used */
192EXTERN int compclock;        /* computation of clocklike branch lengths */
193EXTERN int chooseA;          /* leaf variable */
194EXTERN int chooseB;          /* leaf variable */
195EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */
196EXTERN int column;           /* used for breaking lines (writing tree to treefile) */
197EXTERN int Frequ_optn;       /* use empirical base frequencies */
198EXTERN int Maxbrnch;         /* 2*Maxspc - 3 */
199EXTERN int Maxseqc;          /* number of sequence characters per taxum */
200EXTERN int mflag;            /* flag used for correct printing of runtime messages */
201EXTERN int minedge;          /* edge with minimum edgeinfo */
202EXTERN int nextedge;         /* number of edges in the current tree */
203EXTERN int nextleaf;         /* next leaf to add to tree */
204EXTERN int numclust;         /* number of clusters in LM analysis */
205EXTERN int outgroup;         /* outgroup */
206EXTERN int puzzlemode;       /* computation of QP tree and/or ML distances */
207EXTERN int rootsearch;       /* how location of root is found */
208EXTERN int rhetmode;         /* model of rate heterogeneity */
209EXTERN int splitlength;      /* length of one entry in splitpatterns */
210EXTERN int *splitsizes;      /* size of all different splits of all trees */
211EXTERN int usebestq_optn;    /* use only best quartet topology, no bayesian weights */
212EXTERN int show_optn;        /* show unresolved quartets                        */
213EXTERN int savequart_optn;   /* save memory block which quartets to file */
214EXTERN int savequartlh_optn; /* save quartet likelihoods to file */
215EXTERN int saveqlhbin_optn;  /* save quartet likelihoods binary */
216EXTERN int readquart_optn;   /* read memory block which quartets from file */
217EXTERN int sym_optn;         /* symmetrize doublet frequencies */
218EXTERN int xsize;            /* depth of consensus tree picture */
219EXTERN int ytaxcounter;      /* counter for establishing y-coordinates of all taxa */
220EXTERN int numutrees;        /* number of users trees in input tree file */
221EXTERN ivector clusterA, clusterB, clusterC, clusterD;  /* clusters for LM analysis */
222EXTERN ivector consconfid;   /* confidence values of majority rule consensus tree */
223EXTERN ivector conssizes;    /* partition sizes of majority rule consensus tree */
224EXTERN ivector trueID;       /* leaf -> taxon on this leaf */
225EXTERN ivector xcor;         /* x-coordinates of consensus tree nodes */
226EXTERN ivector ycor;         /* y-coordinates of consensus tree nodes */
227EXTERN ivector ycormax;      /* maximal y-coordinates of consensus tree nodes */
228EXTERN ivector ycormin;      /* minimal y-coordinates of consensus tree nodes */
229EXTERN ivector ycortax;      /* y-coordinates of all taxa */
230EXTERN ONEEDGE *edge;        /* vector with all the edges of the tree */
231EXTERN uli *splitcomp;       /* bipartition storage */
232EXTERN uli *splitfreqs;      /* frequencies of all different splits of all trees */
233EXTERN uli *splitpatterns;   /* all different splits of all trees */
234EXTERN uli badqs;            /* number of bad quartets */
235EXTERN uli consincluded;     /* number of included biparts in the consensus tree */
236EXTERN uli Currtrial;        /* counter for puzzling steps */
237EXTERN uli maxbiparts;       /* space is reserved for that many bipartitions */
238EXTERN uli mininfo;          /* value of edgeinfo on minedge */
239EXTERN uli numbiparts;       /* number of different bipartitions */
240EXTERN uli Numquartets;      /* number of quartets */
241EXTERN uli Numtrial;         /* number of puzzling steps */
242EXTERN uli lmqts;            /* quartets investigated in LM analysis (0 = ALL) */
243
244EXTERN int auto_datatype;       /* guess datatype ? */
245EXTERN int guessdata_optn;      /* guessed datatype */
246
247EXTERN int auto_aamodel;        /* guess amino acid modell ? */
248EXTERN int guessauto_aamodel;   /* guessed amino acid modell ? */
249EXTERN int guessDayhf_optn;     /* guessed Dayhoff model option */
250EXTERN int guessJtt_optn;       /* guessed JTT model option */
251EXTERN int guessblosum62_optn;  /* guessed BLOSUM 62 model option */
252EXTERN int guessmtrev_optn;     /* guessed mtREV model option */
253EXTERN int guesscprev_optn;     /* guessed cpREV model option */
254EXTERN int guessvtmv_optn;      /* guessed VT model option */
255EXTERN int guesswag_optn;       /* guessed WAG model option */
256
257/* counter variables needed in likelihood mapping analysis */
258EXTERN uli ar1, ar2, ar3;
259EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7;
260EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d,
261 reg4u, reg4d, reg5l, reg5r, reg6u, reg6d;
262EXTERN unsigned char *quartetinfo; /* place where quartets are stored */
263EXTERN dvector qweight; /* for use in QP and LM analysis */
264EXTERN dvector sqdiff;
265EXTERN ivector qworder;
266EXTERN ivector sqorder;
267
268EXTERN int randseed;
269EXTERN int psteptreestrlen;
270
271typedef struct treelistitemtypedummy {
272        struct treelistitemtypedummy *pred;
273        struct treelistitemtypedummy *succ;
274        struct treelistitemtypedummy *sortnext;
275        struct treelistitemtypedummy *sortlast;
276        char  *tree;
277        int    count;
278        int    id;
279        int    idx;
280} treelistitemtype;
281
282EXTERN treelistitemtype *psteptreelist;
283EXTERN treelistitemtype *psteptreesortlist;
284EXTERN int               psteptreenum;
285EXTERN int               psteptreesum;
286
287
288/* prototypes */
289void makeF84model(void);
290void compnumqts(void);
291void setoptions(void);
292void openfiletoread(FILE **, char[], char[]);
293void openfiletowrite(FILE **, char[], char[]);
294void openfiletoappend(FILE **, char[], char[]);
295void closefile(FILE *);
296void symdoublets(void);
297void computeexpectations(void);
298void putdistance(FILE *);
299void findidenticals(FILE *);
300double averagedist(void);
301void initps(FILE *);
302void plotlmpoint(FILE *, double, double);
303void finishps(FILE *);
304void makelmpoint(FILE *, double, double, double);
305void printtreestats(FILE *);
306void timestamp(FILE *);
307void writeoutputfile(FILE *, int);
308
309/* definitions for writing output */
310#define WRITEALL    0
311#define WRITEPARAMS 1
312#define WRITEREST   2
313
314void writetimesstat(FILE *ofp);
315void writecutree(FILE *, int);
316void starttimer(void);
317void checktimer(uli);
318void estimateparametersnotree(void);
319void estimateparameterstree(void);
320int main(int, char *[]);
321int ulicmp(const void *, const void *);
322int intcmp(const void *, const void *);
323
324void readid(FILE *, int);
325char readnextcharacter(FILE *, int, int);
326void skiprestofline(FILE *, int, int);
327void skipcntrl(FILE *, int, int);
328void getseqs(FILE *);
329void initid(int);
330void fputid10(FILE *, int);
331int fputid(FILE *, int);
332void getsizesites(FILE *);
333void getdataset(FILE *);
334int guessdatatype(void);
335void translatedataset(void);
336void estimatebasefreqs(void);
337void guessmodel(void);
338void inittree(void);
339void addnextleaf(int);
340void freetree(void);
341void writeOTU(FILE *, int);
342void writetree(FILE *);
343int *initctree();
344void copytree(int *ctree);
345void freectree(int **snodes);
346void printctree(int *ctree);
347char *sprintfctree(int *ctree, int strlen);
348void fprintffullpstree(FILE *outf, char *treestr);
349int printfsortctree(int *ctree);
350int sortctree(int *ctree);
351int ct_1stedge(int node);
352int ct_2ndedge(int node);
353int ct_3rdedge(int node);
354
355void printfpstrees(treelistitemtype *list);
356void printfsortedpstrees(treelistitemtype *list);
357void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff);
358
359void sortbynum(treelistitemtype *list, treelistitemtype **sortlist);
360treelistitemtype *addtree2list(char             **tree,
361                               int                numtrees,
362                               treelistitemtype **list,
363                               int               *numitems,
364                               int               *numsum);
365void freetreelist(treelistitemtype **list,
366                  int               *numitems,
367                  int               *numsum);
368void resetedgeinfo(void);
369void incrementedgeinfo(int, int);
370void minimumedgeinfo(void);
371void initconsensus(void);
372void makepart(int, int);
373void computebiparts(void);
374void printsplit(FILE *, uli);
375void makenewsplitentries(void);
376void copysplit(uli, int);
377void makeconsensus(void);
378void writenode(FILE *, int);
379void writeconsensustree(FILE *);
380void nodecoordinates(int);
381void drawnode(int, int);
382void plotconsensustree(FILE *);
383unsigned char *mallocquartets(int);
384void freequartets(void);
385unsigned char readquartet(int, int, int, int);
386void writequartet(int, int, int, int, unsigned char);
387void sort3doubles(dvector, ivector);
388void computeallquartets(void);
389void checkquartet(int, int, int, int);
390void num2quart(uli qnum, int *a, int *b, int *c, int *d);
391uli numquarts(int maxspc);
392uli quart2num (int a, int b, int c, int d);
393
394void writetpqfheader(int nspec, FILE *ofp, int flag);
395
396
397/* extracted from main (xxx) */
398void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx);
399
400
401/* definitions for timing */
402
403#define OVERALL   0
404#define GENERAL   1
405#define OPTIONS   2
406#define PARAMEST  3
407#define QUARTETS  4
408#define PUZZLING  5
409#define TREEEVAL  6
410
411typedef struct {
412        int      currentjob;
413        clock_t  tempcpu;
414        clock_t  tempfullcpu;
415        clock_t  tempcpustart;
416        time_t   temptime;
417        time_t   tempfulltime;
418        time_t   temptimestart;
419
420        clock_t  maxcpu;
421        clock_t  mincpu;
422        time_t   maxtime;
423        time_t   mintime;
424
425        double   maxcpublock;
426        double   mincpublock;
427        double   mincputick;
428        double   mincputicktime;
429        double   maxtimeblock;
430        double   mintimeblock;
431
432        double   generalcpu;
433        double   optionscpu;
434        double   paramestcpu;
435        double   quartcpu;
436        double   quartblockcpu;
437        double   quartmaxcpu;
438        double   quartmincpu;
439        double   puzzcpu;
440        double   puzzblockcpu;
441        double   puzzmaxcpu;
442        double   puzzmincpu;
443        double   treecpu;
444        double   treeblockcpu;
445        double   treemaxcpu;
446        double   treemincpu;
447        double   cpu;
448        double   fullcpu;
449
450        double   generaltime;
451        double   optionstime;
452        double   paramesttime;
453        double   quarttime;
454        double   quartblocktime;
455        double   quartmaxtime;
456        double   quartmintime;
457        double   puzztime;
458        double   puzzblocktime;
459        double   puzzmaxtime;
460        double   puzzmintime;
461        double   treetime;
462        double   treeblocktime;
463        double   treemaxtime;
464        double   treemintime;
465        double   time;
466        double   fulltime;
467} timearray_t;
468
469EXTERN double cputime, walltime;
470EXTERN double fullcpu, fulltime;
471EXTERN double fullcputime, fullwalltime;
472EXTERN double altcputime, altwalltime;
473EXTERN clock_t cputimestart,  cputimestop, cputimedummy;
474EXTERN time_t  walltimestart, walltimestop, walltimedummy;
475EXTERN clock_t Startcpu;     /* start cpu time */
476EXTERN clock_t Stopcpu;      /* stop cpu time */
477EXTERN time_t Starttime;     /* start time */
478EXTERN time_t Stoptime;      /* stop time */
479EXTERN time_t time0;         /* timer variable */
480EXTERN time_t time1;         /* yet another timer */
481EXTERN time_t time2;         /* yet another timer */
482EXTERN timearray_t tarr;
483
484void resetqblocktime(timearray_t *ta);
485void resetpblocktime(timearray_t *ta);
486void inittimearr(timearray_t *ta);
487void addtimes(int jobtype, timearray_t *ta);
488#ifdef TIMEDEBUG
489  void printtimearr(timearray_t *ta);
490#endif /* TIMEDEBUG */
491
492#endif /* _PUZZLE_ */
493
Note: See TracBrowser for help on using the repository browser.