source: tags/initial/GDE/PHYLIP/dnainvar.c

Last change on this file was 2, checked in by oldcode, 24 years ago

Initial revision

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 29.8 KB
Line 
1#include "phylip.h"
2
3/* version 3.52c. (c) Copyright 1993 by Joseph Felsenstein.
4   Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe.
5   Permission is granted to copy and use this program provided no fee is
6   charged for it and provided that this copyright notice is not removed. */
7
8#define maxsp           4   /* maximum number of species -- must be 4 */
9#define nmlngth         10   /* max. number characters in species name */
10
11#define ibmpc0          false
12#define ansi0           true
13#define vt520           false
14
15
16typedef enum {
17  xx, yy, zz, ww
18} simbol;
19typedef Char **sequence;
20typedef Char naym[nmlngth];
21
22
23Static FILE *infile, *outfile;
24Static long numsp, sites, endsite, datasets, ith;
25Static boolean weights, anerror, printdata, progress, prntpat, printinv,
26               mulsets, interleaved, ibmpc, vt52, ansi, firstset;
27Static naym nayme[maxsp];
28Static sequence y;
29Static long *weight,*alias,*aliasweight;
30
31long f[(long)ww - (long)xx + 1][(long)ww - (long)xx + 1]
32       [(long)ww - (long)xx + 1]; /* made global from being local to makeinv */
33
34openfile(fp,filename,mode,application,perm)
35FILE **fp;
36char *filename;
37char *mode;
38char *application;
39char *perm;
40{
41  FILE *of;
42  char file[100];
43  strcpy(file,filename);
44  while (1){
45    of = fopen(file,mode);
46    if (of)
47      break;
48    else {
49      switch (*mode){
50      case 'r':
51        printf("%s:  can't read %s\n",application,file);
52        file[0] = '\0';
53        while (file[0] =='\0'){
54          printf("Please enter a new filename>");
55          gets(file);}
56        break;
57      case 'w':
58      case 'a':
59        printf("%s: can't write %s\n",application,file);
60        file[0] = '\0';
61        while (file[0] =='\0'){
62          printf("Please enter a new filename>");
63          gets(file);}
64        break;
65      }
66    }
67  }
68  *fp=of;
69  if (perm != NULL)
70    strcpy(perm,file);
71}
72
73
74void uppercase(ch)
75Char *ch;
76{
77   *ch = (islower(*ch) ?  toupper(*ch) : (*ch));
78}  /* uppercase */
79
80
81void getnums()
82{
83  /* input number of species, number of sites */
84  fscanf(infile, "%ld%ld", &numsp, &sites);
85  if (numsp > maxsp){
86    printf("TOO MANY SPECIES: only 4 allowed\n");
87    exit(-1);}
88  if (printdata)
89    fprintf(outfile, "%4ld Species, %4ld Sites\n", numsp, sites);
90}  /* getnums */
91
92void getoptions()
93{
94  /* interactively set options */
95  Char ch;
96  boolean done, done1;
97
98  fprintf(outfile, "\nNucleic acid sequence Invariants ");
99  fprintf(outfile, "method, version %s\n\n",VERSION);
100  putchar('\n');
101  printdata = false;
102  progress = true;
103  prntpat = true;
104  printinv = true;
105  interleaved = true;
106  do {
107    if (ansi)
108      printf("\033[2J\033[H");
109    else if (vt52)
110      printf("\033E\033H");
111    else
112      putchar('\n');
113    printf("\nNucleic acid sequence Invariants ");
114    printf("method, version %s\n\n",VERSION);
115    printf("Settings for this run:\n");
116    printf("  M           Analyze multiple data sets?");
117    if (mulsets)
118      printf("  Yes, %2ld sets\n", datasets);
119    else
120      printf("  No\n");
121    printf("  I          Input sequences interleaved?");
122    if (interleaved)
123      printf("  Yes\n");
124    else
125      printf("  No, sequential\n");
126    printf("  0   Terminal type (IBM PC, VT52, ANSI)?");
127    if (ibmpc)
128      printf("  IBM PC\n");
129    if (ansi)
130      printf("  ANSI\n");
131    if (vt52)
132      printf("  VT52\n");
133    if (!(ibmpc || vt52 || ansi))
134      printf("  (none)\n");
135    printf("  1    Print out the data at start of run");
136    if (printdata)
137      printf("  Yes\n");
138    else
139      printf("  No\n");
140    printf("  2  Print indications of progress of run  %s\n",
141           (progress ? "Yes" : "No"));
142    printf("  3      Print out the counts of patterns");
143    if (prntpat)
144      printf("  Yes\n");
145    else
146      printf("  No\n");
147    printf("  4              Print out the invariants");
148    if (printinv)
149      printf("  Yes\n");
150    else
151      printf("  No\n");
152    printf("\nAre these settings correct? (type Y or the letter for one to change)\n");
153    scanf("%c%*[^\n]", &ch);
154    getchar();
155    uppercase(&ch);
156    done = (ch == 'Y');
157    if (!done) {
158      if (ch == 'M' || ch == 'I' || ch == '1' || ch == '2' || ch == '3' ||
159          ch == '4' || ch == '0') {
160        switch (ch) {
161
162        case 'M':
163          mulsets = !mulsets;
164          if (mulsets) {
165            done1 = false;
166            do {
167              printf("How many data sets?\n");
168              scanf("%ld%*[^\n]", &datasets);
169              getchar();
170              done1 = (datasets >= 1);
171              if (!done1)
172                printf("BAD DATA SETS NUMBER:  it must be greater than 1\n");
173            } while (done1 != true);
174          }
175          break;
176
177        case 'I':
178          interleaved = !interleaved;
179          break;
180
181        case '0':
182          if (ibmpc) {
183            ibmpc = false;
184            vt52 = true;
185          } else {
186            if (vt52) {
187              vt52 = false;
188              ansi = true;
189            } else if (ansi)
190              ansi = false;
191            else
192              ibmpc = true;
193          }
194          break;
195
196        case '1':
197          printdata = !printdata;
198          break;
199
200        case '2':
201          progress = !progress;
202          break;
203       
204        case '3':
205          prntpat = !prntpat;
206          break;
207
208        case '4':
209          printinv = !printinv;
210          break;
211        }
212      } else
213        printf("Not a possible option!\n");
214    }
215  } while (!done);
216}  /* getoptions */
217
218
219void doinit()
220{
221  /* initializes variables */
222  long i;
223
224  getnums();
225  if (!anerror)
226    getoptions();
227  y       = (Char **)Malloc(numsp*sizeof(Char *));
228  for (i = 0; i < numsp; i++)
229    y[i] = (Char *)Malloc(sites*sizeof(Char));
230
231  weight       = (long *)Malloc(sites * sizeof(long));
232  alias        = (long *)Malloc(sites * sizeof(long));
233  aliasweight  = (long *)Malloc(sites * sizeof(long));
234}  /* doinit*/
235
236
237void inputweights()
238{
239  /* input the character weights, which must be 0 or 1 */
240  Char ch;
241  long i;
242
243  for (i = 1; i < nmlngth; i++)
244    ch = getc(infile);
245  for (i = 0; i < sites; i++) {
246    do {
247      if (eoln(infile)) {
248        fscanf(infile, "%*[^\n]");
249        getc(infile);
250      }
251      ch = getc(infile);
252    } while (ch == ' ');
253    weight[i] = 1;
254    if (ch == '0')
255      weight[i] = 0;
256    else if (ch != '1') {
257      printf("BAD WEIGHT CHARACTER: %c\n", ch);
258      anerror = true;
259    }
260  }
261  fscanf(infile, "%*[^\n]");
262  getc(infile);
263  weights = true;
264}  /* inputweights */
265
266void printweights()
267{
268  /* print out the weights of sites */
269  long i, j, k;
270
271  fprintf(outfile, "\n\n   Sites are weighted as follows:\n");
272  fprintf(outfile, "        ");
273  for (i = 0; i <= 9; i++)
274    fprintf(outfile, "%3ld", i);
275  fprintf(outfile, "\n     *---------------------------------\n");
276  for (j = 0; j <= (sites/10); j++) {
277    fprintf(outfile, "%5ld!  ", j * 10);
278    for (i = 0; i <= 9; i++) {
279      k = j * 10 + i;
280      if (k > 0 && k <= sites)
281        fprintf(outfile, "%3ld", weight[k - 1]);
282      else
283        fprintf(outfile, "   ");
284    }
285    putc('\n', outfile);
286  }
287  putc('\n', outfile);
288}  /* printweights */
289
290void inputoptions()
291{
292  /* input the information on the options */
293  Char ch;
294  long extranum, i, cursp, curst;
295
296  if (!firstset) {
297    if (eoln(infile)) {
298      fscanf(infile, "%*[^\n]");
299      getc(infile);
300    }
301    fscanf(infile, "%ld%ld", &cursp, &curst);
302    if (cursp != numsp) {
303      printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4ld\n", ith);
304      anerror = true;
305    }
306    sites = curst;
307  }
308  weights = false;
309  extranum = 0;
310  while (!(eoln(infile) || anerror)) {
311    ch = getc(infile);
312    uppercase(&ch);
313    if (ch == 'W')
314      extranum++;
315    else if (ch != ' ') {
316      printf("BAD OPTION CHARACTER: %c\n", ch);
317      anerror = true;
318    }
319  }
320  fscanf(infile, "%*[^\n]");
321  getc(infile);
322  for (i = 0; i < sites; i++)
323    weight[i] = 1;
324  for (i = 1; i <= extranum; i++) {
325    if (!anerror) {
326      ch = getc(infile);
327      uppercase(&ch);
328      if (ch == 'W')
329        inputweights();
330      anerror = (ch != 'W');
331      if (anerror)
332        printf("ERROR: INCORRECT AUXILIARY OPTIONS LINE WHICH STARTS WITH %c\n",
333               ch);
334    }
335  }
336  if (weights)
337    printweights();
338}  /* inputoptions */
339
340void inputdata()
341{
342  /* Input the names and sequences for each species */
343  long i, j, k, l, basesread, basesnew;
344  Char charstate;
345  boolean allread, done;
346
347  putc('\n', outfile);
348  j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5;
349  if (j < nmlngth - 1)
350    j = nmlngth - 1;
351  if (j > 37)
352    j = 37;
353  if (printdata) {
354    fprintf(outfile, "Name");
355    for (i = 1; i <= j; i++)
356      putc(' ', outfile);
357    fprintf(outfile, "Sequences\n");
358    fprintf(outfile, "----");
359    for (i = 1; i <= j; i++)
360      putc(' ', outfile);
361    fprintf(outfile, "---------\n\n");
362  }
363  basesread = 0;
364  allread = false;
365  while (!(allread || anerror)) {
366    allread = true;
367    if (eoln(infile)) {
368      fscanf(infile, "%*[^\n]");
369      getc(infile);
370    }
371    i = 1;
372    while (i <= numsp && !anerror) {
373      if ((interleaved && basesread == 0) || !interleaved) {
374        for (j = 0; j < nmlngth; j++) {
375          if (!anerror) {
376            nayme[i - 1][j] = getc(infile);
377            anerror = (anerror || eof(infile) || eoln(infile));
378            if (anerror)
379              printf(
380                "ERROR: END-OF-LINE OR END-OF-FILE IN THE MIDDLE OF A SPECIES NAME\n");
381          }
382        }
383      }
384      if (interleaved)
385        j = basesread;
386      else
387        j = 0;
388      done = false;
389      while (!done && !eof(infile) && !anerror) {
390        if (interleaved)
391          done = true;
392        while (j < sites && !(eoln(infile) || eof(infile)) && !anerror) {
393          charstate = getc(infile);
394          if (charstate == ' ' || (charstate >= '0' && charstate <= '9'))
395            continue;
396          uppercase(&charstate);
397          if (charstate != 'A' && charstate != 'B' && charstate != 'C' &&
398              charstate != 'D' && charstate != 'G' && charstate != 'H' &&
399              charstate != 'K' && charstate != 'M' && charstate != 'N' &&
400              charstate != 'R' && charstate != 'S' && charstate != 'T' &&
401              charstate != 'U' && charstate != 'V' && charstate != 'W' &&
402              charstate != 'X' && charstate != 'Y' && charstate != '?' &&
403              charstate != 'O' && charstate != '-' && charstate != '.') {
404
405            printf("ERROR: BAD BASE:%c AT POSITION%5ld OF SPECIES %3ld\n",
406                   charstate, j, i);
407            anerror = true;
408          }
409          j++;
410          if (charstate == '.')
411            charstate = y[0][j - 1];
412          y[i - 1][j - 1] = charstate;
413        }
414        if (interleaved)
415          continue;
416        if (j < sites) {
417          fscanf(infile, "%*[^\n]");
418          getc(infile);
419        } else if (j == sites)
420          done = true;
421      }
422      if (interleaved && i == 1)
423        basesnew = j;
424      fscanf(infile, "%*[^\n]");
425      getc(infile);
426      if (interleaved)
427        anerror = (anerror || j != basesnew);
428      else
429        anerror = (anerror || j != sites);
430      if (anerror)
431        printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n");
432      i++;
433    }
434    if (interleaved) {
435      basesread = basesnew;
436      allread = (basesread == sites);
437    } else
438      allread = (i > numsp);
439  }
440  if (!printdata || anerror)
441    return;
442  for (i = 1; i <= ((sites - 1) / 60 + 1); i++) {
443    for (j = 1; j <= numsp; j++) {
444      for (k = 0; k < nmlngth; k++)
445        putc(nayme[j - 1][k], outfile);
446      fprintf(outfile, "   ");
447      l = i * 60;
448      if (l > sites)
449        l = sites;
450      for (k = (i - 1) * 60 + 1; k <= l; k++) {
451        if (!anerror) {
452          if (j > 1 && y[j - 1][k - 1] == y[0][k - 1])
453            charstate = '.';
454          else
455            charstate = y[j - 1][k - 1];
456          putc(charstate, outfile);
457          if (k % 10 == 0 && k % 60 != 0)
458            putc(' ', outfile);
459        }
460      }
461      putc('\n', outfile);
462    }
463    putc('\n', outfile);
464  }
465  putc('\n', outfile);
466}  /* inputdata */
467
468void sitesort()
469{
470  /* Shell sort keeping sites, weights in same order */
471  long gap, i, j, jj, jg, k, itemp;
472  boolean flip, tied;
473
474  gap = sites / 2;
475  while (gap > 0) {
476    for (i = gap + 1; i <= sites; i++) {
477      j = i - gap;
478      flip = true;
479      while (j > 0 && flip) {
480        jj = alias[j - 1];
481        jg = alias[j + gap - 1];
482        flip = false;
483        k = 1;
484        tied = true;
485        while (k <= numsp && tied) {
486          flip = (y[k - 1][jj - 1] > y[k - 1][jg - 1]);
487          tied = (tied && y[k - 1][jj - 1] == y[k - 1][jg - 1]);
488          k++;
489        }
490        if (!flip)
491          break;
492        itemp = alias[j - 1];
493        alias[j - 1] = alias[j + gap - 1];
494        alias[j + gap - 1] = itemp;
495        itemp = aliasweight[j - 1];
496        aliasweight[j - 1] = aliasweight[j + gap - 1];
497        aliasweight[j + gap - 1] = itemp;
498        j -= gap;
499      }
500    }
501    gap /= 2;
502  }
503}  /* sitesort */
504
505void sitecombine()
506{
507  /* combine sites that have identical patterns */
508  long i, j, k;
509  boolean tied;
510
511  i = 1;
512  while (i < sites) {
513    j = i + 1;
514    tied = true;
515    while (j <= sites && tied) {
516      k = 1;
517      while (k <= numsp && tied) {
518        tied = (tied &&
519            y[k - 1][alias[i - 1] - 1] == y[k - 1][alias[j - 1] - 1]);
520        k++;
521      }
522      if (tied && aliasweight[j - 1] > 0) {
523        aliasweight[i - 1] += aliasweight[j - 1];
524        aliasweight[j - 1] = 0;
525      }
526      j++;
527    }
528    i = j - 1;
529  }
530}  /* sitecombine */
531
532void sitescrunch()
533{
534  /* Bubble sort so positively weighted sites come first */
535  long i, j, itemp;
536  boolean done, found;
537
538  done = false;
539  i = 1;
540  j = 2;
541  while (!done) {
542    found = false;
543    if (aliasweight[i - 1] > 0)
544      i++;
545    else {
546      if (j <= i)
547        j = i + 1;
548      if (j <= sites) {
549        found = false;
550        do {
551          found = (aliasweight[j - 1] > 0);
552          j++;
553        } while (!(found || j > sites));
554        if (found) {
555          j--;
556          itemp = alias[i - 1];
557          alias[i - 1] = alias[j - 1];
558          alias[j - 1] = itemp;
559          itemp = aliasweight[i - 1];
560          aliasweight[i - 1] = aliasweight[j - 1];
561          aliasweight[j - 1] = itemp;
562        } else
563          done = true;
564      }
565    }
566    done = (done || i >= sites);
567  }
568}  /* sitescrunch */
569
570void makeweights()
571{
572  /* make up weights vector to avoid duplicate computations */
573  long i;
574
575  for (i = 1; i <= sites; i++) {
576    alias[i - 1] = i;
577    aliasweight[i - 1] = weight[i - 1];
578  }
579  sitesort();
580  sitecombine();
581  sitescrunch();
582  for (i = 1; i <= sites; i++) {
583    weight[i - 1] = aliasweight[i - 1];
584    if (weight[i - 1] > 0)
585      endsite = i;
586  }
587}  /* makeweights */
588
589
590void doinput()
591{  /* getinput */
592  /* reads the input data */
593  if (!anerror)
594    inputoptions();
595  if (!anerror)
596    inputdata();
597  if (!anerror)
598    makeweights();
599}  /* getinput */
600
601
602
603void prntpatterns()
604{
605  /* print out patterns */
606  long i, j;
607
608  fprintf(outfile, "\n   Pattern");
609  if (prntpat)
610    fprintf(outfile, "   Number of times");
611  fprintf(outfile, "\n\n");
612  for (i = 0; i < endsite; i++) {
613    fprintf(outfile, "     ");
614    for (j = 0; j < numsp; j++)
615      putc(y[j][alias[i] - 1], outfile);
616    if (prntpat)
617      fprintf(outfile, "  %8ld", weight[i]);
618    putc('\n', outfile);
619  }
620  putc('\n', outfile);
621}  /* prntpatterns */
622
623void makesymmetries()
624{
625  /* get frequencies of symmetrized patterns */
626  long i, j;
627  boolean drop, usedz;
628  Char ch, ch1, zchar;
629  simbol s1, s2, s3;
630  simbol t[maxsp - 1];
631
632  for (s1 = xx; (long)s1 <= (long)ww; s1 = (simbol)((long)s1 + 1)) {
633    for (s2 = xx; (long)s2 <= (long)ww; s2 = (simbol)((long)s2 + 1)) {
634      for (s3 = xx; (long)s3 <= (long)ww; s3 = (simbol)((long)s3 + 1))
635        f[(long)s1 - (long)xx][(long)s2 - (long)xx]
636          [(long)s3 - (long)xx] = 0;
637    }
638  }
639  for (i = 0; i < endsite; i++) {
640    drop = false;
641    for (j = 0; j < numsp; j++) {
642      ch = y[j][alias[i] - 1];
643      drop = (drop ||
644              (ch != 'A' && ch != 'C' && ch != 'G' && ch != 'T' && ch != 'U'));
645    }
646    ch1 = y[0][alias[i] - 1];
647    if (!drop) {
648      usedz = false;
649      zchar = ' ';
650      for (j = 2; j <= numsp; j++) {
651        ch = y[j - 1][alias[i] - 1];
652        if (ch == ch1)
653          t[j - 2] = xx;
654        else if ((ch1 == 'A' && ch == 'G') || (ch1 == 'G' && ch == 'A') ||
655                 (ch1 == 'C' && (ch == 'T' || ch == 'U')) ||
656                 ((ch1 == 'T' || ch1 == 'U') && ch == 'C'))
657          t[j - 2] = yy;
658        else if (!usedz) {
659          t[j - 2] = zz;
660          usedz = true;
661          zchar = ch;
662        } else if (usedz && ch == zchar)
663          t[j - 2] = zz;
664        else if (usedz && ch != zchar)
665          t[j - 2] = ww;
666      }
667      f[(long)t[0] - (long)xx][(long)t[1] - (long)xx]
668        [(long)t[2] - (long)xx] += weight[i];
669    }
670  }
671}  /* makesymmetries */
672
673void prntsymbol(s)
674simbol s;
675{
676  /* print 1, 2, 3, 4 as appropriate */
677  switch (s) {
678
679  case xx:
680    putc('1', outfile);
681    break;
682
683  case yy:
684    putc('2', outfile);
685    break;
686
687  case zz:
688    putc('3', outfile);
689    break;
690
691  case ww:
692    putc('4', outfile);
693    break;
694  }
695}  /* prntsymbol */
696
697void prntsymmetries()
698{
699  /* print out symmetrized pattern numbers */
700  simbol s1, s2, s3;
701
702  fprintf(outfile, "\nSymmetrized patterns (1, 2 = the two purines  ");
703  fprintf(outfile, "and  3, 4 = the two pyrimidines\n");
704  fprintf(outfile, "                  or  1, 2 = the two pyrimidines  ");
705  fprintf(outfile, "and  3, 4 = the two purines)\n\n");
706  for (s1 = xx; (long)s1 <= (long)ww; s1 = (simbol)((long)s1 + 1)) {
707    for (s2 = xx; (long)s2 <= (long)ww; s2 = (simbol)((long)s2 + 1)) {
708      for (s3 = xx; (long)s3 <= (long)ww; s3 = (simbol)((long)s3 + 1)) {
709        if (f[(long)s1 - (long)xx][(long)s2 - (long)xx]
710            [(long)s3 - (long)xx] > 0) {
711          fprintf(outfile, "     1");
712          prntsymbol(s1);
713          prntsymbol(s2);
714          prntsymbol(s3);
715          if (prntpat)
716            fprintf(outfile, "   %7ld",
717                    f[(long)s1 - (long)xx][(long)s2 - (long)xx]
718                    [(long)s3 - (long)xx]);
719          putc('\n', outfile);
720        }
721      }
722    }
723  }
724}  /* prntsymmetries */
725
726
727void tabulate(mm, nn, pp, qq, mr,nr,pr,qr)
728long mm, nn, pp, qq;
729double *mr,*nr,*pr,*qr;
730{
731  /* make quadratic invariant, table, chi-square */
732  long total;
733  double k, TEMP;
734
735  fprintf(outfile, "\n   Contingency Table\n\n");
736  fprintf(outfile, "%7ld%6ld\n", mm, nn);
737  fprintf(outfile, "%7ld%6ld\n\n", pp, qq);
738  *mr = (long)(mm);
739  *nr = (long)(nn);
740  *pr = (long)pp;
741  *qr = (long)qq;
742  total = mm + nn + pp + qq;
743  if (printinv)
744    fprintf(outfile, "   Quadratic invariant = %15.1f\n\n",
745            (*nr) * (*pr) - (*mr) * (*qr));
746  fprintf(outfile, "   Chi-square = ");
747  TEMP = (*mr) * (*qr) - (*nr) * (*pr);
748  k = total * (TEMP * TEMP) / (((*mr) + (*nr)) * ((*mr) + (*pr)) *
749                               ((*nr) + (*qr)) * ((*pr) + (*qr)));
750  fprintf(outfile, "%10.5f", k);
751  if ((*mr) * (*qr) > (*nr) * (*pr) && k > 2.71)
752    fprintf(outfile, " (P < 0.05)\n");
753  else
754    fprintf(outfile, " (not significant)\n");
755  fprintf(outfile, "\n\n");
756}  /* tabulate */
757
758
759void writename(m)
760long m;
761{
762  /* write out a species name */
763  long i, n;
764
765  n = nmlngth;
766  while (nayme[m - 1][n - 1] == ' ')
767    n--;
768  if (n == 0)
769    n = 1;
770  for (i = 0; i < n; i++)
771    putc(nayme[m - 1][i], outfile);
772}  /* writename */
773
774void writetree(i, j, k, l)
775long i, j, k, l;
776{
777  /* write out tree topology ((i,j),(k,l)) using names */
778  fprintf(outfile, "((");
779  writename(i);
780  putc(',', outfile);
781  writename(j);
782  fprintf(outfile, "),(");
783  writename(k);
784  putc(',', outfile);
785  writename(l);
786  fprintf(outfile, "))\n");
787}  /* writetree */
788
789void exacttest(m, n)
790long m, n;
791{
792  /* exact binomial test that m <= n */
793  long i;
794  double p, sum;
795
796  p = 1.0;
797  for (i = 1; i <= m + n; i++)
798    p /= 2.0;
799  sum = p;
800  for (i = 1; i <= n; i++) {
801    p = p * (m + n - i + 1) / i;
802    sum += p;
803  }
804  fprintf(outfile, "      %7.4f", sum);
805  if (sum <= 0.05)
806    fprintf(outfile, "              yes\n");
807  else
808    fprintf(outfile, "               no\n");
809}  /* exacttest */
810
811void invariants()
812{
813  /* compute invariants */
814  long  m, n, p, q;
815  double L1, L2, L3;
816  double mr,nr,pr,qr;
817
818  fprintf(outfile, "\nTree topologies (unrooted): \n\n");
819  fprintf(outfile, "    I:  ");
820  writetree(1, 2, 3, 4);
821  fprintf(outfile, "   II:  ");
822  writetree(1, 3, 2, 4);
823  fprintf(outfile, "  III:  ");
824  writetree(1, 4, 2, 3);
825  fprintf(outfile, "\n\nLake's linear invariants\n");
826  fprintf(outfile,
827    " (these are expected to be zero for the two incorrect tree topologies.\n");
828  fprintf(outfile,
829          "  This is tested by testing the equality of the two parts\n");
830  fprintf(outfile,
831          "  of each expression using a one-sided exact binomial test.\n");
832  fprintf(outfile,
833    "  The null hypothesis is that the first part is no larger than the second.)\n\n");
834  fprintf(outfile, " Tree                           ");
835  fprintf(outfile, "  Exact test P value    Significant?\n\n");
836  m = f[(long)yy - (long)xx][(long)zz - (long)xx]
837      [(long)ww - (long)xx] + f[0][(long)zz - (long)xx]
838      [(long)zz - (long)xx];
839  n = f[(long)yy - (long)xx][(long)zz - (long)xx]
840      [(long)zz - (long)xx] + f[0][(long)zz - (long)xx]
841      [(long)ww - (long)xx];
842  fprintf(outfile, "   I  %5ld    - %5ld   = %5ld", m, n, m - n);
843  exacttest(m, n);
844  m = f[(long)zz - (long)xx][(long)yy - (long)xx]
845      [(long)ww - (long)xx] + f[(long)zz - (long)xx][0]
846      [(long)zz - (long)xx];
847  n = f[(long)zz - (long)xx][(long)yy - (long)xx]
848      [(long)zz - (long)xx] + f[(long)zz - (long)xx][0]
849      [(long)ww - (long)xx];
850  fprintf(outfile, "   II %5ld    - %5ld   = %5ld", m, n, m - n);
851  exacttest(m, n);
852  m = f[(long)zz - (long)xx][(long)ww - (long)xx]
853      [(long)yy - (long)xx] + f[(long)zz - (long)xx]
854      [(long)zz - (long)xx][0];
855  n = f[(long)zz - (long)xx][(long)zz - (long)xx]
856      [(long)yy - (long)xx] + f[(long)zz - (long)xx]
857      [(long)ww - (long)xx][0];
858  fprintf(outfile, "   III%5ld    - %5ld   = %5ld", m, n, m - n);
859  exacttest(m, n);
860  fprintf(outfile, "\n\nCavender's quadratic invariants (type L)");
861  fprintf(outfile, " using purines vs. pyrimidines\n");
862  fprintf(outfile,
863          " (these are expected to be zero, and thus have a nonsignificant\n");
864  fprintf(outfile, "  chi-square, for the correct tree topology)\n");
865  fprintf(outfile, "They will be misled if there are substantially\n");
866  fprintf(outfile, "different evolutionary rate between sites, or\n");
867  fprintf(outfile, "different purine:pyrimidine ratios from 1:1.\n\n");
868  fprintf(outfile, "  Tree I:\n");
869  m = f[0][0][0] + f[0][(long)yy - (long)xx]
870      [(long)yy - (long)xx] + f[0][(long)zz - (long)xx]
871      [(long)zz - (long)xx];
872  n = f[0][0][(long)yy - (long)xx] + f[0][0]
873      [(long)zz - (long)xx] + f[0][(long)yy - (long)xx][0] + f[0]
874      [(long)yy - (long)xx][(long)zz - (long)xx] + f[0]
875      [(long)zz - (long)xx][0] + f[0][(long)zz - (long)xx]
876      [(long)yy - (long)xx] + f[0][(long)zz - (long)xx]
877      [(long)ww - (long)xx];
878  p = f[(long)yy - (long)xx][0][0] + f[(long)yy - (long)xx]
879      [(long)yy - (long)xx]
880      [(long)yy - (long)xx] + f[(long)yy - (long)xx]
881      [(long)zz - (long)xx]
882      [(long)zz - (long)xx] + f[(long)zz - (long)xx][0]
883      [0] + f[(long)zz - (long)xx][(long)yy - (long)xx]
884      [(long)yy - (long)xx] + f[(long)zz - (long)xx]
885      [(long)zz - (long)xx]
886      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
887      [(long)ww - (long)xx][(long)ww - (long)xx];
888  q = f[(long)yy - (long)xx][0][(long)yy - (long)xx] +
889      f[(long)yy - (long)xx][0][(long)zz - (long)xx] +
890      f[(long)yy - (long)xx][(long)yy - (long)xx][0] +
891      f[(long)yy - (long)xx][(long)yy - (long)xx][(long)zz - (long)xx] +
892      f[(long)yy - (long)xx][(long)zz - (long)xx][0] +
893      f[(long)yy - (long)xx][(long)zz - (long)xx][(long)yy - (long)xx] +
894      f[(long)yy - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] +
895      f[(long)zz - (long)xx][0][(long)yy - (long)xx] +
896      f[(long)zz - (long)xx][0][(long)zz - (long)xx] +
897      f[(long)zz - (long)xx][0][(long)ww - (long)xx] +
898      f[(long)zz - (long)xx][(long)yy - (long)xx][0] +
899      f[(long)zz - (long)xx][(long)yy - (long)xx][(long)zz - (long)xx] +
900      f[(long)zz - (long)xx][(long)yy - (long)xx][(long)ww - (long)xx] +
901      f[(long)zz - (long)xx][(long)zz - (long)xx][0] +
902      f[(long)zz - (long)xx][(long)zz - (long)xx][(long)yy - (long)xx] +
903      f[(long)zz - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] +
904      f[(long)zz - (long)xx][(long)ww - (long)xx][0] +
905      f[(long)zz - (long)xx][(long)ww - (long)xx][(long)yy - (long)xx] +
906      f[(long)zz - (long)xx][(long)ww - (long)xx][(long)zz - (long)xx];
907
908  nr = n;
909  pr = p;
910  mr = m;
911  qr = q;
912  L1 = nr * pr - mr * qr;
913  tabulate(m, n, p, q, &mr,&nr,&pr,&qr);
914  fprintf(outfile, "  Tree II:\n");
915  m = f[0][0][0] + f[(long)yy - (long)xx][0]
916      [(long)yy - (long)xx] + f[(long)zz - (long)xx][0]
917      [(long)zz - (long)xx];
918  n = f[0][0][(long)yy - (long)xx] + f[0][0]
919      [(long)zz - (long)xx] + f[(long)yy - (long)xx][0]
920      [0] + f[(long)yy - (long)xx][0]
921      [(long)zz - (long)xx] + f[(long)zz - (long)xx][0]
922      [0] + f[(long)zz - (long)xx][0]
923      [(long)yy - (long)xx] + f[(long)zz - (long)xx][0]
924      [(long)ww - (long)xx];
925  p = f[0][(long)yy - (long)xx][0] + f[(long)yy - (long)xx]
926      [(long)yy - (long)xx]
927      [(long)yy - (long)xx] + f[(long)zz - (long)xx]
928      [(long)yy - (long)xx][(long)zz - (long)xx] + f[0]
929      [(long)zz - (long)xx][0] + f[(long)yy - (long)xx]
930      [(long)zz - (long)xx]
931      [(long)yy - (long)xx] + f[(long)zz - (long)xx]
932      [(long)zz - (long)xx]
933      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
934      [(long)ww - (long)xx][(long)zz - (long)xx];
935  q = f[0][(long)yy - (long)xx][(long)yy - (long)xx] + f[0]
936      [(long)yy - (long)xx][(long)zz - (long)xx] +
937      f[(long)yy - (long)xx][(long)yy - (long)xx][0] +
938      f[(long)yy - (long)xx][(long)yy - (long)xx][(long)zz - (long)xx] +
939      f[(long)zz - (long)xx][(long)yy - (long)xx][0] +
940      f[(long)zz - (long)xx][(long)yy - (long)xx][(long)yy - (long)xx] +
941      f[(long)zz - (long)xx][(long)yy - (long)xx][(long)ww - (long)xx] +
942      f[0][(long)zz - (long)xx][(long)yy - (long)xx] + f[0]
943      [(long)zz - (long)xx][(long)zz - (long)xx] + f[0]
944      [(long)zz - (long)xx][(long)ww - (long)xx] +
945      f[(long)yy - (long)xx][(long)zz - (long)xx][0] +
946      f[(long)yy - (long)xx][(long)zz - (long)xx][(long)zz - (long)xx] +
947      f[(long)yy - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] +
948      f[(long)zz - (long)xx][(long)zz - (long)xx][0] +
949      f[(long)zz - (long)xx][(long)zz - (long)xx][(long)yy - (long)xx] +
950      f[(long)zz - (long)xx][(long)zz - (long)xx][(long)ww - (long)xx] +
951      f[(long)zz - (long)xx][(long)ww - (long)xx][0] +
952      f[(long)zz - (long)xx][(long)ww - (long)xx][(long)yy - (long)xx] +
953      f[(long)zz - (long)xx][(long)ww - (long)xx][(long)ww - (long)xx];
954  nr = n;
955  pr = p;
956  mr = m;
957  qr = q;
958  L2 = nr * pr - mr * qr;
959  tabulate(m, n, p, q, &mr,&nr,&pr,&qr);
960  fprintf(outfile, "  Tree III:\n");
961  m = f[0][0][0] + f[(long)yy - (long)xx][(long)yy - (long)xx]
962      [0] + f[(long)zz - (long)xx][(long)zz - (long)xx][0];
963  n = f[(long)yy - (long)xx][0][0] + f[(long)zz - (long)xx][0]
964      [0] + f[0][(long)yy - (long)xx][0] + f[(long)zz - (long)xx]
965      [(long)yy - (long)xx][0] + f[0][(long)zz - (long)xx]
966      [0] + f[(long)yy - (long)xx][(long)zz - (long)xx]
967      [0] + f[(long)zz - (long)xx][(long)ww - (long)xx][0];
968  p = f[0][0][(long)yy - (long)xx] + f[(long)yy - (long)xx]
969      [(long)yy - (long)xx]
970      [(long)yy - (long)xx] + f[(long)zz - (long)xx]
971      [(long)zz - (long)xx][(long)yy - (long)xx] + f[0][0]
972      [(long)zz - (long)xx] + f[(long)yy - (long)xx]
973      [(long)yy - (long)xx]
974      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
975      [(long)zz - (long)xx]
976      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
977      [(long)zz - (long)xx][(long)ww - (long)xx];
978  q = f[(long)yy - (long)xx][0][(long)yy - (long)xx] + f[(long)zz - (long)xx]
979      [0][(long)yy - (long)xx] + f[0][(long)yy - (long)xx][(long)yy - (long)xx] +
980      f[(long)zz - (long)xx][(long)yy - (long)xx][(long)yy - (long)xx] +
981      f[0][(long)zz - (long)xx]
982      [(long)yy - (long)xx] + f[(long)yy - (long)xx][(long)zz - (long)xx]
983      [(long)yy - (long)xx] + f[(long)zz - (long)xx][(long)ww - (long)xx]
984      [(long)yy - (long)xx] + f[(long)yy - (long)xx][0]
985      [(long)zz - (long)xx] + f[(long)zz - (long)xx][0]
986      [(long)zz - (long)xx] + f[0][(long)zz - (long)xx]
987      [(long)ww - (long)xx] + f[0][(long)yy - (long)xx]
988      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
989      [(long)yy - (long)xx]
990      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
991      [(long)yy - (long)xx][(long)ww - (long)xx] + f[0]
992      [(long)zz - (long)xx]
993      [(long)zz - (long)xx] + f[(long)yy - (long)xx]
994      [(long)zz - (long)xx]
995      [(long)zz - (long)xx] + f[(long)zz - (long)xx]
996      [(long)ww - (long)xx]
997      [(long)ww - (long)xx] + f[(long)zz - (long)xx][0]
998      [(long)ww - (long)xx] + f[(long)yy - (long)xx]
999      [(long)zz - (long)xx][(long)ww - (long)xx] +
1000      f[(long)zz - (long)xx][(long)ww - (long)xx][(long)zz - (long)xx];
1001  nr = n;
1002  pr = p;
1003  mr = m;
1004  qr = q;
1005  L3 = nr * pr - mr * qr;
1006  tabulate(m, n, p, q, &mr,&nr,&pr,&qr);
1007  fprintf(outfile, "\n\nCavender's quadratic invariants (type K)");
1008  fprintf(outfile, " using purines vs. pyrimidines\n");
1009  fprintf(outfile,
1010          " (these are expected to be zero for the correct tree topology)\n");
1011  fprintf(outfile, "They will be misled if there are substantially\n");
1012  fprintf(outfile, "different evolutionary rate between sites, or\n");
1013  fprintf(outfile, "different purine:pyrimidine ratios from 1:1.\n");
1014  fprintf(outfile, "No statistical test is done on them here.\n\n");
1015  fprintf(outfile, "  Tree I:   %15.1f\n", L2 - L3);
1016  fprintf(outfile, "  Tree II:  %15.1f\n", L3 - L1);
1017  fprintf(outfile, "  Tree III: %15.1f\n\n", L1 - L2);
1018}  /* invariants */
1019
1020void makeinv()
1021{
1022  /* print out patterns and compute invariants */
1023
1024  prntpatterns();
1025  makesymmetries();
1026  prntsymmetries();
1027  if (printinv)
1028    invariants();
1029}  /* makeinv */
1030
1031
1032main(argc, argv)
1033int argc;
1034Char *argv[];
1035{  /* DNA Invariants */
1036char infilename[100],outfilename[100];
1037#ifdef MAC
1038  macsetup("Dnainvar","");
1039#endif
1040openfile(&infile,INFILE,"r",argv[0],infilename);
1041openfile(&outfile,OUTFILE,"w",argv[0],outfilename);
1042
1043  ibmpc = ibmpc0;
1044  ansi = ansi0;
1045  vt52 = vt520;
1046  mulsets = false;
1047  firstset = true;
1048  datasets = 1;
1049  anerror = false;
1050  doinit();
1051  if (!anerror) {
1052    for (ith = 1; ith <= datasets; ith++) {
1053      if (!anerror)
1054        doinput();
1055      if (ith == 1)
1056        firstset = false;
1057      if (!anerror) {
1058        if (datasets > 1) {
1059          if (progress)
1060            printf("\nData set # %ld:\n",ith);
1061          fprintf(outfile, "Data set # %ld:\n\n",ith);
1062        }
1063        makeinv();
1064      }
1065      if (progress) {
1066        putchar('\n');
1067        if (!anerror)
1068          printf("Output written to output file\n");
1069        putchar('\n');
1070      }
1071    }
1072  }
1073  FClose(outfile);
1074  FClose(infile);
1075#ifdef MAC
1076  fixmacfile(outfilename);
1077#endif
1078  exit(0);
1079}  /* DNA Invariants */
1080
1081int eof(f)
1082FILE *f;
1083{
1084    register int ch;
1085
1086    if (feof(f))
1087        return 1;
1088    if (f == stdin)
1089        return 0;
1090    ch = getc(f);
1091    if (ch == EOF)
1092        return 1;
1093    ungetc(ch, f);
1094    return 0;
1095}
1096
1097
1098int eoln(f)
1099FILE *f;
1100{
1101    register int ch;
1102
1103    ch = getc(f);
1104    if (ch == EOF)
1105        return 1;
1106    ungetc(ch, f);
1107    return (ch == '\n');
1108}
1109
1110void memerror()
1111{
1112printf("Error allocating memory\n");
1113exit(-1);
1114}
1115
1116MALLOCRETURN *mymalloc(x)
1117long x;
1118{
1119MALLOCRETURN *mem;
1120mem = (MALLOCRETURN *)malloc(x);
1121if (!mem)
1122  memerror();
1123else
1124  return (MALLOCRETURN *)mem;
1125}
1126
Note: See TracBrowser for help on using the repository browser.