| 1 | #include "phylip.h" |
|---|
| 2 | |
|---|
| 3 | /* version 3.56c. (c) Copyright 1993 by Joseph Felsenstein. |
|---|
| 4 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
|---|
| 5 | Permission is granted to copy and use this program provided no fee is |
|---|
| 6 | charged for it and provided that this copyright notice is not removed. */ |
|---|
| 7 | |
|---|
| 8 | #define namelength 10 /* number of characters max. in species name */ |
|---|
| 9 | #define epsilon 0.02 /* a small number */ |
|---|
| 10 | |
|---|
| 11 | #define ibmpc0 false |
|---|
| 12 | #define ansi0 true |
|---|
| 13 | #define vt520 false |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | typedef double *phenotype; |
|---|
| 17 | typedef Char naym[namelength]; |
|---|
| 18 | |
|---|
| 19 | Static FILE *infile, *outfile; |
|---|
| 20 | Static short numsp, loci, totalleles, df, datasets, ith; |
|---|
| 21 | Static short *alleles; |
|---|
| 22 | Static phenotype *x; |
|---|
| 23 | Static double **d; |
|---|
| 24 | Static naym *nayms; |
|---|
| 25 | Static boolean all, cavalli, lower, nei, reynolds, mulsets, ibmpc, |
|---|
| 26 | vt52, ansi, firstset, progress; |
|---|
| 27 | |
|---|
| 28 | |
|---|
| 29 | openfile(fp,filename,mode,application,perm) |
|---|
| 30 | FILE **fp; |
|---|
| 31 | char *filename; |
|---|
| 32 | char *mode; |
|---|
| 33 | char *application; |
|---|
| 34 | char *perm; |
|---|
| 35 | { |
|---|
| 36 | FILE *of; |
|---|
| 37 | char file[100]; |
|---|
| 38 | strcpy(file,filename); |
|---|
| 39 | while (1){ |
|---|
| 40 | of = fopen(file,mode); |
|---|
| 41 | if (of) |
|---|
| 42 | break; |
|---|
| 43 | else { |
|---|
| 44 | switch (*mode){ |
|---|
| 45 | case 'r': |
|---|
| 46 | printf("%s: can't read %s\n",application,file); |
|---|
| 47 | file[0] = '\0'; |
|---|
| 48 | while (file[0] =='\0'){ |
|---|
| 49 | printf("Please enter a new filename>"); |
|---|
| 50 | gets(file);} |
|---|
| 51 | break; |
|---|
| 52 | case 'w': |
|---|
| 53 | printf("%s: can't write %s\n",application,file); |
|---|
| 54 | file[0] = '\0'; |
|---|
| 55 | while (file[0] =='\0'){ |
|---|
| 56 | printf("Please enter a new filename>"); |
|---|
| 57 | gets(file);} |
|---|
| 58 | break; |
|---|
| 59 | } |
|---|
| 60 | } |
|---|
| 61 | } |
|---|
| 62 | *fp=of; |
|---|
| 63 | if (perm != NULL) |
|---|
| 64 | strcpy(perm,file); |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | |
|---|
| 68 | Static Void uppercase(ch) |
|---|
| 69 | Char *ch; |
|---|
| 70 | { /* convert a character to upper case -- either ASCII or EBCDIC */ |
|---|
| 71 | *ch = (islower(*ch) ? toupper(*ch) : (*ch)); |
|---|
| 72 | } /* uppercase */ |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | void getnums() |
|---|
| 76 | { |
|---|
| 77 | /* read number of species and loci for first data set */ |
|---|
| 78 | short i; |
|---|
| 79 | fscanf(infile, "%hd%hd", &numsp, &loci); |
|---|
| 80 | } /* getnums */ |
|---|
| 81 | |
|---|
| 82 | void getoptions() |
|---|
| 83 | { |
|---|
| 84 | /* interactively set options */ |
|---|
| 85 | Char ch; |
|---|
| 86 | boolean done1; |
|---|
| 87 | |
|---|
| 88 | all = false; |
|---|
| 89 | cavalli = false; |
|---|
| 90 | lower = false; |
|---|
| 91 | nei = true; |
|---|
| 92 | reynolds = false; |
|---|
| 93 | lower = false; |
|---|
| 94 | progress = true; |
|---|
| 95 | for (;;) { |
|---|
| 96 | printf(ansi ? "\033[2J\033[H" : |
|---|
| 97 | vt52 ? "\033E\033H" : "\n"); |
|---|
| 98 | printf("\nGenetic Distance Matrix program, version %s\n\n",VERSION); |
|---|
| 99 | printf("Settings for this run:\n"); |
|---|
| 100 | printf(" A Input file contains all alleles at each locus? %s\n", |
|---|
| 101 | all ? "Yes" : "One omitted at each locus"); |
|---|
| 102 | printf(" N Use Nei genetic distance? %s\n", |
|---|
| 103 | nei ? "Yes" : "No"); |
|---|
| 104 | printf(" C Use Cavalli-Sforza chord measure? %s\n", |
|---|
| 105 | cavalli ? "Yes" : "No"); |
|---|
| 106 | printf(" R Use Reynolds genetic distance? %s\n", |
|---|
| 107 | reynolds ? "Yes" : "No"); |
|---|
| 108 | printf(" L Form of distance matrix? %s\n", |
|---|
| 109 | lower ? "Lower-triangular" : "Square"); |
|---|
| 110 | printf(" M Analyze multiple data sets?"); |
|---|
| 111 | if (mulsets) |
|---|
| 112 | printf(" Yes, %2hd sets\n", datasets); |
|---|
| 113 | else |
|---|
| 114 | printf(" No\n"); |
|---|
| 115 | printf(" 0 Terminal type (IBM PC, VT52, ANSI)? %s\n", |
|---|
| 116 | ibmpc ? "IBM PC" : |
|---|
| 117 | ansi ? "ANSI" : |
|---|
| 118 | vt52 ? "VT52" : "(none)"); |
|---|
| 119 | printf(" 1 Print indications of progress of run? %s\n", |
|---|
| 120 | progress ? "Yes" : "No"); |
|---|
| 121 | printf("\nAre these settings correct? (type Y or the letter for one to change)\n"); |
|---|
| 122 | scanf("%c%*[^\n]", &ch); |
|---|
| 123 | getchar(); |
|---|
| 124 | uppercase(&ch); |
|---|
| 125 | if (ch == 'Y') |
|---|
| 126 | break; |
|---|
| 127 | if (ch == 'A' || ch == 'C' || ch == 'N' || ch == 'M' || ch == 'R' || |
|---|
| 128 | ch == 'L' || ch == '0' || ch == '1') { |
|---|
| 129 | switch (ch) { |
|---|
| 130 | |
|---|
| 131 | case 'A': |
|---|
| 132 | all = !all; |
|---|
| 133 | break; |
|---|
| 134 | |
|---|
| 135 | case 'C': |
|---|
| 136 | cavalli = true; |
|---|
| 137 | nei = false; |
|---|
| 138 | reynolds = false; |
|---|
| 139 | break; |
|---|
| 140 | |
|---|
| 141 | case 'N': |
|---|
| 142 | cavalli = false; |
|---|
| 143 | nei = true; |
|---|
| 144 | reynolds = false; |
|---|
| 145 | break; |
|---|
| 146 | |
|---|
| 147 | case 'R': |
|---|
| 148 | reynolds = true; |
|---|
| 149 | cavalli = false; |
|---|
| 150 | nei = false; |
|---|
| 151 | break; |
|---|
| 152 | |
|---|
| 153 | case 'L': |
|---|
| 154 | lower = !lower; |
|---|
| 155 | break; |
|---|
| 156 | |
|---|
| 157 | case 'M': |
|---|
| 158 | mulsets = !mulsets; |
|---|
| 159 | if (mulsets) { |
|---|
| 160 | done1 = false; |
|---|
| 161 | do { |
|---|
| 162 | printf("How many data sets?\n"); |
|---|
| 163 | scanf("%hd%*[^\n]", &datasets); |
|---|
| 164 | getchar(); |
|---|
| 165 | done1 = (datasets >= 1); |
|---|
| 166 | if (!done1) |
|---|
| 167 | printf("BAD DATA SETS NUMBER: it must be greater than 1\n"); |
|---|
| 168 | } while (done1 != true); |
|---|
| 169 | } |
|---|
| 170 | break; |
|---|
| 171 | |
|---|
| 172 | case '0': |
|---|
| 173 | if (ibmpc) { |
|---|
| 174 | ibmpc = false; |
|---|
| 175 | vt52 = true; |
|---|
| 176 | } else { |
|---|
| 177 | if (vt52) { |
|---|
| 178 | vt52 = false; |
|---|
| 179 | ansi = true; |
|---|
| 180 | } else if (ansi) |
|---|
| 181 | ansi = false; |
|---|
| 182 | else |
|---|
| 183 | ibmpc = true; |
|---|
| 184 | } |
|---|
| 185 | break; |
|---|
| 186 | |
|---|
| 187 | case '1': |
|---|
| 188 | progress = !progress; |
|---|
| 189 | break; |
|---|
| 190 | } |
|---|
| 191 | } else |
|---|
| 192 | printf("Not a possible option!\n"); |
|---|
| 193 | } |
|---|
| 194 | putchar('\n'); |
|---|
| 195 | } /* getoptions */ |
|---|
| 196 | |
|---|
| 197 | |
|---|
| 198 | void doinit() |
|---|
| 199 | { |
|---|
| 200 | /* initializes variables */ |
|---|
| 201 | short i; |
|---|
| 202 | |
|---|
| 203 | getnums(); |
|---|
| 204 | x = (phenotype *)Malloc(numsp*sizeof(phenotype)); |
|---|
| 205 | d = (double **)Malloc(numsp*sizeof(double *)); |
|---|
| 206 | for (i = 0; i < (numsp); i++) |
|---|
| 207 | d[i] = (double *)Malloc(numsp*sizeof(double)); |
|---|
| 208 | alleles = (short *)Malloc(loci*sizeof(short)); |
|---|
| 209 | getoptions(); |
|---|
| 210 | } /* doinit */ |
|---|
| 211 | |
|---|
| 212 | |
|---|
| 213 | void getalleles() |
|---|
| 214 | { |
|---|
| 215 | short i, cursp, curloc; |
|---|
| 216 | |
|---|
| 217 | if (!firstset) { |
|---|
| 218 | if (eoln(infile)) { |
|---|
| 219 | fscanf(infile, "%*[^\n]"); |
|---|
| 220 | getc(infile); |
|---|
| 221 | } |
|---|
| 222 | fscanf(infile, "%hd%hd", &cursp, &curloc); |
|---|
| 223 | if (cursp != numsp) { |
|---|
| 224 | printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4hd\n", ith); |
|---|
| 225 | exit(-1); |
|---|
| 226 | } |
|---|
| 227 | loci = curloc; |
|---|
| 228 | } |
|---|
| 229 | totalleles = 0; |
|---|
| 230 | fscanf(infile, "%*[^\n]"); |
|---|
| 231 | getc(infile); |
|---|
| 232 | for (i = 0; i < (loci); i++) { |
|---|
| 233 | if (eoln(infile)) { |
|---|
| 234 | fscanf(infile, "%*[^\n]"); |
|---|
| 235 | getc(infile); |
|---|
| 236 | } |
|---|
| 237 | fscanf(infile, "%hd", &alleles[i]); |
|---|
| 238 | totalleles += alleles[i]; |
|---|
| 239 | } |
|---|
| 240 | df = totalleles - loci; |
|---|
| 241 | } /* getalleles */ |
|---|
| 242 | |
|---|
| 243 | void getdata() |
|---|
| 244 | { |
|---|
| 245 | /* read allele frequencies */ |
|---|
| 246 | short i, j, k, m, n, p; |
|---|
| 247 | double sum; |
|---|
| 248 | |
|---|
| 249 | for (i = 0; i < numsp; i++) |
|---|
| 250 | x[i] = (phenotype)Malloc(totalleles*sizeof(double)); |
|---|
| 251 | for (i = 1; i <= (numsp); i++) { |
|---|
| 252 | fscanf(infile, "%*[^\n]"); |
|---|
| 253 | getc(infile); |
|---|
| 254 | for (j = 0; j < namelength; j++) |
|---|
| 255 | nayms[i - 1][j] = getc(infile); |
|---|
| 256 | m = 1; |
|---|
| 257 | p = 1; |
|---|
| 258 | for (j = 1; j <= (loci); j++) { |
|---|
| 259 | sum = 0.0; |
|---|
| 260 | if (all) |
|---|
| 261 | n = alleles[j - 1]; |
|---|
| 262 | else |
|---|
| 263 | n = alleles[j - 1] - 1; |
|---|
| 264 | for (k = 1; k <= n; k++) { |
|---|
| 265 | if (eoln(infile)) { |
|---|
| 266 | fscanf(infile, "%*[^\n]"); |
|---|
| 267 | getc(infile); |
|---|
| 268 | } |
|---|
| 269 | fscanf(infile, "%lf", &x[i - 1][m - 1]); |
|---|
| 270 | sum += x[i - 1][m - 1]; |
|---|
| 271 | if (x[i - 1][m - 1] < 0.0) { |
|---|
| 272 | printf("\nLOCUS%3hd IN SPECIES%3hd: AN ALLELE", j, i); |
|---|
| 273 | printf(" FREQUENCY IS NEGATIVE\n"); |
|---|
| 274 | exit(-1); |
|---|
| 275 | } |
|---|
| 276 | p++; |
|---|
| 277 | m++; |
|---|
| 278 | } |
|---|
| 279 | if (all && fabs(sum - 1.0) > epsilon) { |
|---|
| 280 | printf("\nLOCUS%3hd IN SPECIES%3hd: FREQUENCIES DO NOT ADD UP TO 1\n", |
|---|
| 281 | j, i); |
|---|
| 282 | exit(-1); |
|---|
| 283 | } |
|---|
| 284 | if (!all) { |
|---|
| 285 | x[i - 1][m - 1] = 1.0 - sum; |
|---|
| 286 | if (x[i - 1][m - 1] < -epsilon) { |
|---|
| 287 | printf("\nLOCUS%3hd IN SPECIES%3hd: ",j,i); |
|---|
| 288 | printf("FREQUENCIES ADD UP TO MORE THAN 1\n"); |
|---|
| 289 | exit(-1); |
|---|
| 290 | } |
|---|
| 291 | m++; |
|---|
| 292 | } |
|---|
| 293 | } |
|---|
| 294 | } |
|---|
| 295 | } /* getdata */ |
|---|
| 296 | |
|---|
| 297 | |
|---|
| 298 | void getinput() |
|---|
| 299 | { |
|---|
| 300 | /* read the input data */ |
|---|
| 301 | getalleles(); |
|---|
| 302 | getdata(); |
|---|
| 303 | } /* getinput */ |
|---|
| 304 | |
|---|
| 305 | |
|---|
| 306 | void makedists() |
|---|
| 307 | { |
|---|
| 308 | short i, j, k; |
|---|
| 309 | double s, s1, s2, s3, f; |
|---|
| 310 | double TEMP; |
|---|
| 311 | |
|---|
| 312 | for (i = 0; i < (numsp); i++) |
|---|
| 313 | d[i][i] = 0.0; |
|---|
| 314 | for (i = 1; i <= (numsp); i++) { |
|---|
| 315 | for (j = 0; j <= i - 2; j++) { |
|---|
| 316 | if (cavalli) { |
|---|
| 317 | s = 0.0; |
|---|
| 318 | for (k = 0; k < (totalleles); k++) { |
|---|
| 319 | f = x[i - 1][k] * x[j][k]; |
|---|
| 320 | if (f > 0.0) |
|---|
| 321 | s += sqrt(f); |
|---|
| 322 | } |
|---|
| 323 | d[i - 1][j] = 4 * (loci - s) / df; |
|---|
| 324 | } |
|---|
| 325 | if (nei) { |
|---|
| 326 | s1 = 0.0; |
|---|
| 327 | s2 = 0.0; |
|---|
| 328 | s3 = 0.0; |
|---|
| 329 | for (k = 0; k < (totalleles); k++) { |
|---|
| 330 | s1 += x[i - 1][k] * x[j][k]; |
|---|
| 331 | TEMP = x[i - 1][k]; |
|---|
| 332 | s2 += TEMP * TEMP; |
|---|
| 333 | TEMP = x[j][k]; |
|---|
| 334 | s3 += TEMP * TEMP; |
|---|
| 335 | } |
|---|
| 336 | if (s1 <= 1.0e-20) |
|---|
| 337 | d[i - 1][j] = -1.0; |
|---|
| 338 | else |
|---|
| 339 | d[i - 1][j] = -log(s1 / sqrt(s2 * s3)); |
|---|
| 340 | } |
|---|
| 341 | if (reynolds) { |
|---|
| 342 | s1 = 0.0; |
|---|
| 343 | s2 = 0.0; |
|---|
| 344 | for (k = 0; k < (totalleles); k++) { |
|---|
| 345 | TEMP = x[i - 1][k] - x[j][k]; |
|---|
| 346 | s1 += TEMP * TEMP; |
|---|
| 347 | s2 += x[i - 1][k] * x[j][k]; |
|---|
| 348 | } |
|---|
| 349 | d[i - 1][j] = s1 / (loci * 2 - 2 * s2); |
|---|
| 350 | } |
|---|
| 351 | d[j][i - 1] = d[i - 1][j]; |
|---|
| 352 | } |
|---|
| 353 | } |
|---|
| 354 | } /* makedists */ |
|---|
| 355 | |
|---|
| 356 | void writedists() |
|---|
| 357 | { |
|---|
| 358 | short i, j, k; |
|---|
| 359 | |
|---|
| 360 | fprintf(outfile, "%5hd\n", numsp); |
|---|
| 361 | for (i = 0; i < (numsp); i++) { |
|---|
| 362 | for (j = 0; j < namelength; j++) |
|---|
| 363 | putc(nayms[i][j], outfile); |
|---|
| 364 | if (lower) |
|---|
| 365 | k = i; |
|---|
| 366 | else |
|---|
| 367 | k = numsp; |
|---|
| 368 | for (j = 1; j <= k; j++) { |
|---|
| 369 | fprintf(outfile, "%8.4f", d[i][j - 1]); |
|---|
| 370 | if ((j + 1) % 9 == 0 && j < k) |
|---|
| 371 | putc('\n', outfile); |
|---|
| 372 | } |
|---|
| 373 | putc('\n', outfile); |
|---|
| 374 | } |
|---|
| 375 | if (progress) |
|---|
| 376 | printf("Distances written to file\n\n"); |
|---|
| 377 | } /* writedists */ |
|---|
| 378 | |
|---|
| 379 | |
|---|
| 380 | main(argc, argv) |
|---|
| 381 | int argc; |
|---|
| 382 | Char *argv[]; |
|---|
| 383 | { /* main program */ |
|---|
| 384 | char infilename[100],outfilename[100]; |
|---|
| 385 | #ifdef MAC |
|---|
| 386 | macsetup("Gendist",""); |
|---|
| 387 | #endif |
|---|
| 388 | openfile(&infile,INFILE,"r",argv[0],infilename); |
|---|
| 389 | openfile(&outfile,OUTFILE,"w",argv[0],outfilename); |
|---|
| 390 | |
|---|
| 391 | ibmpc = ibmpc0; |
|---|
| 392 | ansi = ansi0; |
|---|
| 393 | vt52 = vt520; |
|---|
| 394 | mulsets = false; |
|---|
| 395 | firstset = true; |
|---|
| 396 | datasets = 1; |
|---|
| 397 | doinit(); |
|---|
| 398 | nayms = (naym *)Malloc(numsp*sizeof(naym)); |
|---|
| 399 | for (ith = 1; ith <= (datasets); ith++) { |
|---|
| 400 | getinput(); |
|---|
| 401 | firstset = false; |
|---|
| 402 | if ((datasets > 1) && progress) |
|---|
| 403 | printf("\nData set # %hd:\n\n",ith); |
|---|
| 404 | makedists(); |
|---|
| 405 | writedists(); |
|---|
| 406 | } |
|---|
| 407 | FClose(infile); |
|---|
| 408 | FClose(outfile); |
|---|
| 409 | #ifdef MAC |
|---|
| 410 | fixmacfile(outfilename); |
|---|
| 411 | #endif |
|---|
| 412 | exit(0); |
|---|
| 413 | } |
|---|
| 414 | |
|---|
| 415 | |
|---|
| 416 | int eof(f) |
|---|
| 417 | FILE *f; |
|---|
| 418 | { |
|---|
| 419 | register int ch; |
|---|
| 420 | |
|---|
| 421 | if (feof(f)) |
|---|
| 422 | return 1; |
|---|
| 423 | if (f == stdin) |
|---|
| 424 | return 0; |
|---|
| 425 | ch = getc(f); |
|---|
| 426 | if (ch == EOF) |
|---|
| 427 | return 1; |
|---|
| 428 | ungetc(ch, f); |
|---|
| 429 | return 0; |
|---|
| 430 | } |
|---|
| 431 | |
|---|
| 432 | |
|---|
| 433 | int eoln(f) |
|---|
| 434 | FILE *f; |
|---|
| 435 | { |
|---|
| 436 | register int ch; |
|---|
| 437 | |
|---|
| 438 | ch = getc(f); |
|---|
| 439 | if (ch == EOF) |
|---|
| 440 | return 1; |
|---|
| 441 | ungetc(ch, f); |
|---|
| 442 | return (ch == '\n'); |
|---|
| 443 | } |
|---|
| 444 | |
|---|
| 445 | void memerror() |
|---|
| 446 | { |
|---|
| 447 | printf("Error allocating memory\n"); |
|---|
| 448 | exit(-1); |
|---|
| 449 | } |
|---|
| 450 | |
|---|
| 451 | MALLOCRETURN *mymalloc(x) |
|---|
| 452 | long x; |
|---|
| 453 | { |
|---|
| 454 | MALLOCRETURN *mem; |
|---|
| 455 | mem = (MALLOCRETURN *)malloc(x); |
|---|
| 456 | if (!mem) |
|---|
| 457 | memerror(); |
|---|
| 458 | else |
|---|
| 459 | return (MALLOCRETURN *)mem; |
|---|
| 460 | } |
|---|
| 461 | |
|---|