| 1 | #include "phylip.h" |
|---|
| 2 | |
|---|
| 3 | /* version 3.6. (c) Copyright 1993-1997 by the University of Washington. |
|---|
| 4 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
|---|
| 5 | Permission is granted to copy and use this program provided no fee is |
|---|
| 6 | charged for it and provided that this copyright notice is not removed. */ |
|---|
| 7 | |
|---|
| 8 | #define epsilong 0.02 /* a small number */ |
|---|
| 9 | |
|---|
| 10 | #ifndef OLDC |
|---|
| 11 | /* function prototypes */ |
|---|
| 12 | void getoptions(void); |
|---|
| 13 | void allocrest(void); |
|---|
| 14 | void doinit(void); |
|---|
| 15 | void getalleles(void); |
|---|
| 16 | void inputdata(void); |
|---|
| 17 | void getinput(void); |
|---|
| 18 | void makedists(void); |
|---|
| 19 | void writedists(void); |
|---|
| 20 | /* function prototypes */ |
|---|
| 21 | #endif |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | Char infilename[FNMLNGTH], outfilename[FNMLNGTH]; |
|---|
| 26 | long loci, totalleles, df, datasets, ith; |
|---|
| 27 | long nonodes; |
|---|
| 28 | long *alleles; |
|---|
| 29 | phenotype3 *x; |
|---|
| 30 | double **d; |
|---|
| 31 | boolean all, cavalli, lower, nei, reynolds, mulsets, |
|---|
| 32 | firstset, progress; |
|---|
| 33 | |
|---|
| 34 | void getoptions() |
|---|
| 35 | { |
|---|
| 36 | /* interactively set options */ |
|---|
| 37 | long loopcount; |
|---|
| 38 | Char ch; |
|---|
| 39 | |
|---|
| 40 | all = false; |
|---|
| 41 | cavalli = false; |
|---|
| 42 | lower = false; |
|---|
| 43 | nei = true; |
|---|
| 44 | reynolds = false; |
|---|
| 45 | lower = false; |
|---|
| 46 | progress = true; |
|---|
| 47 | loopcount = 0; |
|---|
| 48 | for (;;) { |
|---|
| 49 | cleerhome(); |
|---|
| 50 | printf("\nGenetic Distance Matrix program, version %s\n\n",VERSION); |
|---|
| 51 | printf("Settings for this run:\n"); |
|---|
| 52 | printf(" A Input file contains all alleles at each locus? %s\n", |
|---|
| 53 | all ? "Yes" : "One omitted at each locus"); |
|---|
| 54 | printf(" N Use Nei genetic distance? %s\n", |
|---|
| 55 | nei ? "Yes" : "No"); |
|---|
| 56 | printf(" C Use Cavalli-Sforza chord measure? %s\n", |
|---|
| 57 | cavalli ? "Yes" : "No"); |
|---|
| 58 | printf(" R Use Reynolds genetic distance? %s\n", |
|---|
| 59 | reynolds ? "Yes" : "No"); |
|---|
| 60 | printf(" L Form of distance matrix? %s\n", |
|---|
| 61 | lower ? "Lower-triangular" : "Square"); |
|---|
| 62 | printf(" M Analyze multiple data sets?"); |
|---|
| 63 | if (mulsets) |
|---|
| 64 | printf(" Yes, %2ld sets\n", datasets); |
|---|
| 65 | else |
|---|
| 66 | printf(" No\n"); |
|---|
| 67 | printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", |
|---|
| 68 | ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"); |
|---|
| 69 | printf(" 1 Print indications of progress of run? %s\n", |
|---|
| 70 | progress ? "Yes" : "No"); |
|---|
| 71 | printf("\n Y to accept these or type the letter for one to change\n"); |
|---|
| 72 | #ifdef WIN32 |
|---|
| 73 | phyFillScreenColor(); |
|---|
| 74 | #endif |
|---|
| 75 | scanf("%c%*[^\n]", &ch); |
|---|
| 76 | getchar(); |
|---|
| 77 | uppercase(&ch); |
|---|
| 78 | if (ch == 'Y') |
|---|
| 79 | break; |
|---|
| 80 | if (strchr("ACNMRL01", ch) != NULL) { |
|---|
| 81 | switch (ch) { |
|---|
| 82 | |
|---|
| 83 | case 'A': |
|---|
| 84 | all = !all; |
|---|
| 85 | break; |
|---|
| 86 | |
|---|
| 87 | case 'C': |
|---|
| 88 | cavalli = true; |
|---|
| 89 | nei = false; |
|---|
| 90 | reynolds = false; |
|---|
| 91 | break; |
|---|
| 92 | |
|---|
| 93 | case 'N': |
|---|
| 94 | cavalli = false; |
|---|
| 95 | nei = true; |
|---|
| 96 | reynolds = false; |
|---|
| 97 | break; |
|---|
| 98 | |
|---|
| 99 | case 'R': |
|---|
| 100 | reynolds = true; |
|---|
| 101 | cavalli = false; |
|---|
| 102 | nei = false; |
|---|
| 103 | break; |
|---|
| 104 | |
|---|
| 105 | case 'L': |
|---|
| 106 | lower = !lower; |
|---|
| 107 | break; |
|---|
| 108 | |
|---|
| 109 | case 'M': |
|---|
| 110 | mulsets = !mulsets; |
|---|
| 111 | if (mulsets) |
|---|
| 112 | initdatasets(&datasets); |
|---|
| 113 | break; |
|---|
| 114 | |
|---|
| 115 | case '0': |
|---|
| 116 | initterminal(&ibmpc, &ansi); |
|---|
| 117 | break; |
|---|
| 118 | |
|---|
| 119 | case '1': |
|---|
| 120 | progress = !progress; |
|---|
| 121 | break; |
|---|
| 122 | } |
|---|
| 123 | } else |
|---|
| 124 | printf("Not a possible option!\n"); |
|---|
| 125 | countup(&loopcount, 100); |
|---|
| 126 | } |
|---|
| 127 | putchar('\n'); |
|---|
| 128 | } /* getoptions */ |
|---|
| 129 | |
|---|
| 130 | |
|---|
| 131 | void allocrest() |
|---|
| 132 | { |
|---|
| 133 | long i; |
|---|
| 134 | |
|---|
| 135 | x = (phenotype3 *)Malloc(spp*sizeof(phenotype3)); |
|---|
| 136 | d = (double **)Malloc(spp*sizeof(double *)); |
|---|
| 137 | for (i = 0; i < (spp); i++) |
|---|
| 138 | d[i] = (double *)Malloc(spp*sizeof(double)); |
|---|
| 139 | alleles = (long *)Malloc(loci*sizeof(long)); |
|---|
| 140 | nayme = (naym *)Malloc(spp*sizeof(naym)); |
|---|
| 141 | } /* allocrest */ |
|---|
| 142 | |
|---|
| 143 | |
|---|
| 144 | void doinit() |
|---|
| 145 | { |
|---|
| 146 | /* initializes variables */ |
|---|
| 147 | |
|---|
| 148 | inputnumbers(&spp, &loci, &nonodes, 1); |
|---|
| 149 | getoptions(); |
|---|
| 150 | allocrest(); |
|---|
| 151 | } /* doinit */ |
|---|
| 152 | |
|---|
| 153 | |
|---|
| 154 | void getalleles() |
|---|
| 155 | { |
|---|
| 156 | long i; |
|---|
| 157 | |
|---|
| 158 | if (!firstset) |
|---|
| 159 | samenumsp(&loci, ith); |
|---|
| 160 | totalleles = 0; |
|---|
| 161 | scan_eoln(infile); |
|---|
| 162 | for (i = 0; i < (loci); i++) { |
|---|
| 163 | if (eoln(infile)) |
|---|
| 164 | scan_eoln(infile); |
|---|
| 165 | fscanf(infile, "%ld", &alleles[i]); |
|---|
| 166 | totalleles += alleles[i]; |
|---|
| 167 | } |
|---|
| 168 | df = totalleles - loci; |
|---|
| 169 | } /* getalleles */ |
|---|
| 170 | |
|---|
| 171 | |
|---|
| 172 | void inputdata() |
|---|
| 173 | { |
|---|
| 174 | /* read allele frequencies */ |
|---|
| 175 | long i, j, k, m, n, p; |
|---|
| 176 | double sum; |
|---|
| 177 | |
|---|
| 178 | for (i = 0; i < spp; i++) |
|---|
| 179 | x[i] = (phenotype3)Malloc(totalleles*sizeof(double)); |
|---|
| 180 | for (i = 1; i <= (spp); i++) { |
|---|
| 181 | scan_eoln(infile); |
|---|
| 182 | initname(i-1); |
|---|
| 183 | m = 1; |
|---|
| 184 | p = 1; |
|---|
| 185 | for (j = 1; j <= (loci); j++) { |
|---|
| 186 | sum = 0.0; |
|---|
| 187 | if (all) |
|---|
| 188 | n = alleles[j - 1]; |
|---|
| 189 | else |
|---|
| 190 | n = alleles[j - 1] - 1; |
|---|
| 191 | for (k = 1; k <= n; k++) { |
|---|
| 192 | if (eoln(infile)) |
|---|
| 193 | scan_eoln(infile); |
|---|
| 194 | fscanf(infile, "%lf", &x[i - 1][m - 1]); |
|---|
| 195 | sum += x[i - 1][m - 1]; |
|---|
| 196 | if (x[i - 1][m - 1] < 0.0) { |
|---|
| 197 | printf("\n\nERROR: Locus %ld in species %ld: an allele", j, i); |
|---|
| 198 | printf(" frequency is negative\n\n"); |
|---|
| 199 | exxit(-1); |
|---|
| 200 | } |
|---|
| 201 | p++; |
|---|
| 202 | m++; |
|---|
| 203 | } |
|---|
| 204 | if (all && fabs(sum - 1.0) > epsilong) { |
|---|
| 205 | printf( |
|---|
| 206 | "\n\nERROR: Locus %ld in species %ld: frequencies do not add up to 1\n\n", |
|---|
| 207 | j, i); |
|---|
| 208 | exxit(-1); |
|---|
| 209 | } |
|---|
| 210 | if (!all) { |
|---|
| 211 | x[i - 1][m - 1] = 1.0 - sum; |
|---|
| 212 | if (x[i-1][m-1] < -epsilong) { |
|---|
| 213 | printf("\n\nERROR: Locus %ld in species %ld: ",j,i); |
|---|
| 214 | printf("frequencies add up to more than 1\n\n"); |
|---|
| 215 | exxit(-1); |
|---|
| 216 | } |
|---|
| 217 | m++; |
|---|
| 218 | } |
|---|
| 219 | } |
|---|
| 220 | } |
|---|
| 221 | } /* inputdata */ |
|---|
| 222 | |
|---|
| 223 | |
|---|
| 224 | void getinput() |
|---|
| 225 | { |
|---|
| 226 | /* read the input data */ |
|---|
| 227 | getalleles(); |
|---|
| 228 | inputdata(); |
|---|
| 229 | } /* getinput */ |
|---|
| 230 | |
|---|
| 231 | |
|---|
| 232 | void makedists() |
|---|
| 233 | { |
|---|
| 234 | long i, j, k; |
|---|
| 235 | double s, s1, s2, s3, f; |
|---|
| 236 | double TEMP; |
|---|
| 237 | |
|---|
| 238 | if (progress) |
|---|
| 239 | printf("Distances calculated for species\n"); |
|---|
| 240 | for (i = 0; i < spp; i++) |
|---|
| 241 | d[i][i] = 0.0; |
|---|
| 242 | for (i = 1; i <= spp; i++) { |
|---|
| 243 | if (progress) { |
|---|
| 244 | #ifdef WIN32 |
|---|
| 245 | phyFillScreenColor(); |
|---|
| 246 | #endif |
|---|
| 247 | printf(" "); |
|---|
| 248 | for (j = 0; j < nmlngth; j++) |
|---|
| 249 | putchar(nayme[i - 1][j]); |
|---|
| 250 | printf(" "); |
|---|
| 251 | } |
|---|
| 252 | for (j = 0; j <= i - 1; j++) { |
|---|
| 253 | if (cavalli) { |
|---|
| 254 | s = 0.0; |
|---|
| 255 | for (k = 0; k < (totalleles); k++) { |
|---|
| 256 | f = x[i - 1][k] * x[j][k]; |
|---|
| 257 | if (f > 0.0) |
|---|
| 258 | s += sqrt(f); |
|---|
| 259 | else f = 0.0; |
|---|
| 260 | } |
|---|
| 261 | d[i - 1][j] = 4 * (loci - s) / df; |
|---|
| 262 | } |
|---|
| 263 | if (nei) { |
|---|
| 264 | s1 = 0.0; |
|---|
| 265 | s2 = 0.0; |
|---|
| 266 | s3 = 0.0; |
|---|
| 267 | for (k = 0; k < (totalleles); k++) { |
|---|
| 268 | s1 += x[i - 1][k] * x[j][k]; |
|---|
| 269 | TEMP = x[i - 1][k]; |
|---|
| 270 | s2 += TEMP * TEMP; |
|---|
| 271 | TEMP = x[j][k]; |
|---|
| 272 | s3 += TEMP * TEMP; |
|---|
| 273 | } |
|---|
| 274 | if (s1 <= 1.0e-20) { |
|---|
| 275 | d[i - 1][j] = -1.0; |
|---|
| 276 | printf("\nWARNING: INFINITE DISTANCE BETWEEN SPECIES "); |
|---|
| 277 | printf("%ld AND %ld; -1.0 WAS WRITTEN\n", i, j); |
|---|
| 278 | } else |
|---|
| 279 | d[i - 1][j] = fabs(-log(s1 / sqrt(s2 * s3))); |
|---|
| 280 | } |
|---|
| 281 | if (reynolds) { |
|---|
| 282 | s1 = 0.0; |
|---|
| 283 | s2 = 0.0; |
|---|
| 284 | for (k = 0; k < (totalleles); k++) { |
|---|
| 285 | TEMP = x[i - 1][k] - x[j][k]; |
|---|
| 286 | s1 += TEMP * TEMP; |
|---|
| 287 | s2 += x[i - 1][k] * x[j][k]; |
|---|
| 288 | } |
|---|
| 289 | d[i - 1][j] = s1 / (loci * 2 - 2 * s2); |
|---|
| 290 | } |
|---|
| 291 | if (progress) { |
|---|
| 292 | putchar('.'); |
|---|
| 293 | fflush(stdout); |
|---|
| 294 | } |
|---|
| 295 | d[j][i - 1] = d[i - 1][j]; |
|---|
| 296 | } |
|---|
| 297 | if (progress) { |
|---|
| 298 | putchar('\n'); |
|---|
| 299 | fflush(stdout); |
|---|
| 300 | } |
|---|
| 301 | } |
|---|
| 302 | if (progress) { |
|---|
| 303 | putchar('\n'); |
|---|
| 304 | fflush(stdout); |
|---|
| 305 | } |
|---|
| 306 | } /* makedists */ |
|---|
| 307 | |
|---|
| 308 | |
|---|
| 309 | void writedists() |
|---|
| 310 | { |
|---|
| 311 | long i, j, k; |
|---|
| 312 | |
|---|
| 313 | fprintf(outfile, "%5ld\n", spp); |
|---|
| 314 | for (i = 0; i < (spp); i++) { |
|---|
| 315 | for (j = 0; j < nmlngth; j++) |
|---|
| 316 | putc(nayme[i][j], outfile); |
|---|
| 317 | if (lower) |
|---|
| 318 | k = i; |
|---|
| 319 | else |
|---|
| 320 | k = spp; |
|---|
| 321 | for (j = 1; j <= k; j++) { |
|---|
| 322 | fprintf(outfile, "%8.4f", d[i][j - 1]); |
|---|
| 323 | if ((j + 1) % 9 == 0 && j < k) |
|---|
| 324 | putc('\n', outfile); |
|---|
| 325 | } |
|---|
| 326 | putc('\n', outfile); |
|---|
| 327 | } |
|---|
| 328 | if (progress) |
|---|
| 329 | printf("Distances written to file \"%s\"\n\n", outfilename); |
|---|
| 330 | } /* writedists */ |
|---|
| 331 | |
|---|
| 332 | |
|---|
| 333 | int main(int argc, Char *argv[]) |
|---|
| 334 | { /* main program */ |
|---|
| 335 | #ifdef MAC |
|---|
| 336 | argc = 1; /* macsetup("Gendist",""); */ |
|---|
| 337 | argv[0] = "Gendist"; |
|---|
| 338 | #endif |
|---|
| 339 | init(argc, argv); |
|---|
| 340 | openfile(&infile,INFILE,"input file", "r",argv[0],infilename); |
|---|
| 341 | openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename); |
|---|
| 342 | |
|---|
| 343 | ibmpc = IBMCRT; |
|---|
| 344 | ansi = ANSICRT; |
|---|
| 345 | mulsets = false; |
|---|
| 346 | firstset = true; |
|---|
| 347 | datasets = 1; |
|---|
| 348 | doinit(); |
|---|
| 349 | for (ith = 1; ith <= (datasets); ith++) { |
|---|
| 350 | getinput(); |
|---|
| 351 | firstset = false; |
|---|
| 352 | if ((datasets > 1) && progress) |
|---|
| 353 | printf("\nData set # %ld:\n\n",ith); |
|---|
| 354 | makedists(); |
|---|
| 355 | writedists(); |
|---|
| 356 | } |
|---|
| 357 | FClose(infile); |
|---|
| 358 | FClose(outfile); |
|---|
| 359 | #ifdef MAC |
|---|
| 360 | fixmacfile(outfilename); |
|---|
| 361 | #endif |
|---|
| 362 | printf("Done.\n\n"); |
|---|
| 363 | #ifdef WIN32 |
|---|
| 364 | phyRestoreConsoleAttributes(); |
|---|
| 365 | #endif |
|---|
| 366 | return 0; |
|---|
| 367 | } |
|---|