1 | #include "phylip.h" |
---|
2 | |
---|
3 | /* version 3.56c. (c) Copyright 1993 by Joseph Felsenstein. |
---|
4 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
---|
5 | Permission is granted to copy and use this program provided no fee is |
---|
6 | charged for it and provided that this copyright notice is not removed. */ |
---|
7 | |
---|
8 | #define namelength 10 /* number of characters max. in species name */ |
---|
9 | #define epsilon 0.02 /* a small number */ |
---|
10 | |
---|
11 | #define ibmpc0 false |
---|
12 | #define ansi0 true |
---|
13 | #define vt520 false |
---|
14 | |
---|
15 | |
---|
16 | typedef double *phenotype; |
---|
17 | typedef Char naym[namelength]; |
---|
18 | |
---|
19 | Static FILE *infile, *outfile; |
---|
20 | Static short numsp, loci, totalleles, df, datasets, ith; |
---|
21 | Static short *alleles; |
---|
22 | Static phenotype *x; |
---|
23 | Static double **d; |
---|
24 | Static naym *nayms; |
---|
25 | Static boolean all, cavalli, lower, nei, reynolds, mulsets, ibmpc, |
---|
26 | vt52, ansi, firstset, progress; |
---|
27 | |
---|
28 | |
---|
29 | openfile(fp,filename,mode,application,perm) |
---|
30 | FILE **fp; |
---|
31 | char *filename; |
---|
32 | char *mode; |
---|
33 | char *application; |
---|
34 | char *perm; |
---|
35 | { |
---|
36 | FILE *of; |
---|
37 | char file[100]; |
---|
38 | strcpy(file,filename); |
---|
39 | while (1){ |
---|
40 | of = fopen(file,mode); |
---|
41 | if (of) |
---|
42 | break; |
---|
43 | else { |
---|
44 | switch (*mode){ |
---|
45 | case 'r': |
---|
46 | printf("%s: can't read %s\n",application,file); |
---|
47 | file[0] = '\0'; |
---|
48 | while (file[0] =='\0'){ |
---|
49 | printf("Please enter a new filename>"); |
---|
50 | gets(file);} |
---|
51 | break; |
---|
52 | case 'w': |
---|
53 | printf("%s: can't write %s\n",application,file); |
---|
54 | file[0] = '\0'; |
---|
55 | while (file[0] =='\0'){ |
---|
56 | printf("Please enter a new filename>"); |
---|
57 | gets(file);} |
---|
58 | break; |
---|
59 | } |
---|
60 | } |
---|
61 | } |
---|
62 | *fp=of; |
---|
63 | if (perm != NULL) |
---|
64 | strcpy(perm,file); |
---|
65 | } |
---|
66 | |
---|
67 | |
---|
68 | Static Void uppercase(ch) |
---|
69 | Char *ch; |
---|
70 | { /* convert a character to upper case -- either ASCII or EBCDIC */ |
---|
71 | *ch = (islower(*ch) ? toupper(*ch) : (*ch)); |
---|
72 | } /* uppercase */ |
---|
73 | |
---|
74 | |
---|
75 | void getnums() |
---|
76 | { |
---|
77 | /* read number of species and loci for first data set */ |
---|
78 | short i; |
---|
79 | fscanf(infile, "%hd%hd", &numsp, &loci); |
---|
80 | } /* getnums */ |
---|
81 | |
---|
82 | void getoptions() |
---|
83 | { |
---|
84 | /* interactively set options */ |
---|
85 | Char ch; |
---|
86 | boolean done1; |
---|
87 | |
---|
88 | all = false; |
---|
89 | cavalli = false; |
---|
90 | lower = false; |
---|
91 | nei = true; |
---|
92 | reynolds = false; |
---|
93 | lower = false; |
---|
94 | progress = true; |
---|
95 | for (;;) { |
---|
96 | printf(ansi ? "\033[2J\033[H" : |
---|
97 | vt52 ? "\033E\033H" : "\n"); |
---|
98 | printf("\nGenetic Distance Matrix program, version %s\n\n",VERSION); |
---|
99 | printf("Settings for this run:\n"); |
---|
100 | printf(" A Input file contains all alleles at each locus? %s\n", |
---|
101 | all ? "Yes" : "One omitted at each locus"); |
---|
102 | printf(" N Use Nei genetic distance? %s\n", |
---|
103 | nei ? "Yes" : "No"); |
---|
104 | printf(" C Use Cavalli-Sforza chord measure? %s\n", |
---|
105 | cavalli ? "Yes" : "No"); |
---|
106 | printf(" R Use Reynolds genetic distance? %s\n", |
---|
107 | reynolds ? "Yes" : "No"); |
---|
108 | printf(" L Form of distance matrix? %s\n", |
---|
109 | lower ? "Lower-triangular" : "Square"); |
---|
110 | printf(" M Analyze multiple data sets?"); |
---|
111 | if (mulsets) |
---|
112 | printf(" Yes, %2hd sets\n", datasets); |
---|
113 | else |
---|
114 | printf(" No\n"); |
---|
115 | printf(" 0 Terminal type (IBM PC, VT52, ANSI)? %s\n", |
---|
116 | ibmpc ? "IBM PC" : |
---|
117 | ansi ? "ANSI" : |
---|
118 | vt52 ? "VT52" : "(none)"); |
---|
119 | printf(" 1 Print indications of progress of run? %s\n", |
---|
120 | progress ? "Yes" : "No"); |
---|
121 | printf("\nAre these settings correct? (type Y or the letter for one to change)\n"); |
---|
122 | scanf("%c%*[^\n]", &ch); |
---|
123 | getchar(); |
---|
124 | uppercase(&ch); |
---|
125 | if (ch == 'Y') |
---|
126 | break; |
---|
127 | if (ch == 'A' || ch == 'C' || ch == 'N' || ch == 'M' || ch == 'R' || |
---|
128 | ch == 'L' || ch == '0' || ch == '1') { |
---|
129 | switch (ch) { |
---|
130 | |
---|
131 | case 'A': |
---|
132 | all = !all; |
---|
133 | break; |
---|
134 | |
---|
135 | case 'C': |
---|
136 | cavalli = true; |
---|
137 | nei = false; |
---|
138 | reynolds = false; |
---|
139 | break; |
---|
140 | |
---|
141 | case 'N': |
---|
142 | cavalli = false; |
---|
143 | nei = true; |
---|
144 | reynolds = false; |
---|
145 | break; |
---|
146 | |
---|
147 | case 'R': |
---|
148 | reynolds = true; |
---|
149 | cavalli = false; |
---|
150 | nei = false; |
---|
151 | break; |
---|
152 | |
---|
153 | case 'L': |
---|
154 | lower = !lower; |
---|
155 | break; |
---|
156 | |
---|
157 | case 'M': |
---|
158 | mulsets = !mulsets; |
---|
159 | if (mulsets) { |
---|
160 | done1 = false; |
---|
161 | do { |
---|
162 | printf("How many data sets?\n"); |
---|
163 | scanf("%hd%*[^\n]", &datasets); |
---|
164 | getchar(); |
---|
165 | done1 = (datasets >= 1); |
---|
166 | if (!done1) |
---|
167 | printf("BAD DATA SETS NUMBER: it must be greater than 1\n"); |
---|
168 | } while (done1 != true); |
---|
169 | } |
---|
170 | break; |
---|
171 | |
---|
172 | case '0': |
---|
173 | if (ibmpc) { |
---|
174 | ibmpc = false; |
---|
175 | vt52 = true; |
---|
176 | } else { |
---|
177 | if (vt52) { |
---|
178 | vt52 = false; |
---|
179 | ansi = true; |
---|
180 | } else if (ansi) |
---|
181 | ansi = false; |
---|
182 | else |
---|
183 | ibmpc = true; |
---|
184 | } |
---|
185 | break; |
---|
186 | |
---|
187 | case '1': |
---|
188 | progress = !progress; |
---|
189 | break; |
---|
190 | } |
---|
191 | } else |
---|
192 | printf("Not a possible option!\n"); |
---|
193 | } |
---|
194 | putchar('\n'); |
---|
195 | } /* getoptions */ |
---|
196 | |
---|
197 | |
---|
198 | void doinit() |
---|
199 | { |
---|
200 | /* initializes variables */ |
---|
201 | short i; |
---|
202 | |
---|
203 | getnums(); |
---|
204 | x = (phenotype *)Malloc(numsp*sizeof(phenotype)); |
---|
205 | d = (double **)Malloc(numsp*sizeof(double *)); |
---|
206 | for (i = 0; i < (numsp); i++) |
---|
207 | d[i] = (double *)Malloc(numsp*sizeof(double)); |
---|
208 | alleles = (short *)Malloc(loci*sizeof(short)); |
---|
209 | getoptions(); |
---|
210 | } /* doinit */ |
---|
211 | |
---|
212 | |
---|
213 | void getalleles() |
---|
214 | { |
---|
215 | short i, cursp, curloc; |
---|
216 | |
---|
217 | if (!firstset) { |
---|
218 | if (eoln(infile)) { |
---|
219 | fscanf(infile, "%*[^\n]"); |
---|
220 | getc(infile); |
---|
221 | } |
---|
222 | fscanf(infile, "%hd%hd", &cursp, &curloc); |
---|
223 | if (cursp != numsp) { |
---|
224 | printf("\nERROR: INCONSISTENT NUMBER OF SPECIES IN DATA SET %4hd\n", ith); |
---|
225 | exit(-1); |
---|
226 | } |
---|
227 | loci = curloc; |
---|
228 | } |
---|
229 | totalleles = 0; |
---|
230 | fscanf(infile, "%*[^\n]"); |
---|
231 | getc(infile); |
---|
232 | for (i = 0; i < (loci); i++) { |
---|
233 | if (eoln(infile)) { |
---|
234 | fscanf(infile, "%*[^\n]"); |
---|
235 | getc(infile); |
---|
236 | } |
---|
237 | fscanf(infile, "%hd", &alleles[i]); |
---|
238 | totalleles += alleles[i]; |
---|
239 | } |
---|
240 | df = totalleles - loci; |
---|
241 | } /* getalleles */ |
---|
242 | |
---|
243 | void getdata() |
---|
244 | { |
---|
245 | /* read allele frequencies */ |
---|
246 | short i, j, k, m, n, p; |
---|
247 | double sum; |
---|
248 | |
---|
249 | for (i = 0; i < numsp; i++) |
---|
250 | x[i] = (phenotype)Malloc(totalleles*sizeof(double)); |
---|
251 | for (i = 1; i <= (numsp); i++) { |
---|
252 | fscanf(infile, "%*[^\n]"); |
---|
253 | getc(infile); |
---|
254 | for (j = 0; j < namelength; j++) |
---|
255 | nayms[i - 1][j] = getc(infile); |
---|
256 | m = 1; |
---|
257 | p = 1; |
---|
258 | for (j = 1; j <= (loci); j++) { |
---|
259 | sum = 0.0; |
---|
260 | if (all) |
---|
261 | n = alleles[j - 1]; |
---|
262 | else |
---|
263 | n = alleles[j - 1] - 1; |
---|
264 | for (k = 1; k <= n; k++) { |
---|
265 | if (eoln(infile)) { |
---|
266 | fscanf(infile, "%*[^\n]"); |
---|
267 | getc(infile); |
---|
268 | } |
---|
269 | fscanf(infile, "%lf", &x[i - 1][m - 1]); |
---|
270 | sum += x[i - 1][m - 1]; |
---|
271 | if (x[i - 1][m - 1] < 0.0) { |
---|
272 | printf("\nLOCUS%3hd IN SPECIES%3hd: AN ALLELE", j, i); |
---|
273 | printf(" FREQUENCY IS NEGATIVE\n"); |
---|
274 | exit(-1); |
---|
275 | } |
---|
276 | p++; |
---|
277 | m++; |
---|
278 | } |
---|
279 | if (all && fabs(sum - 1.0) > epsilon) { |
---|
280 | printf("\nLOCUS%3hd IN SPECIES%3hd: FREQUENCIES DO NOT ADD UP TO 1\n", |
---|
281 | j, i); |
---|
282 | exit(-1); |
---|
283 | } |
---|
284 | if (!all) { |
---|
285 | x[i - 1][m - 1] = 1.0 - sum; |
---|
286 | if (x[i - 1][m - 1] < -epsilon) { |
---|
287 | printf("\nLOCUS%3hd IN SPECIES%3hd: ",j,i); |
---|
288 | printf("FREQUENCIES ADD UP TO MORE THAN 1\n"); |
---|
289 | exit(-1); |
---|
290 | } |
---|
291 | m++; |
---|
292 | } |
---|
293 | } |
---|
294 | } |
---|
295 | } /* getdata */ |
---|
296 | |
---|
297 | |
---|
298 | void getinput() |
---|
299 | { |
---|
300 | /* read the input data */ |
---|
301 | getalleles(); |
---|
302 | getdata(); |
---|
303 | } /* getinput */ |
---|
304 | |
---|
305 | |
---|
306 | void makedists() |
---|
307 | { |
---|
308 | short i, j, k; |
---|
309 | double s, s1, s2, s3, f; |
---|
310 | double TEMP; |
---|
311 | |
---|
312 | for (i = 0; i < (numsp); i++) |
---|
313 | d[i][i] = 0.0; |
---|
314 | for (i = 1; i <= (numsp); i++) { |
---|
315 | for (j = 0; j <= i - 2; j++) { |
---|
316 | if (cavalli) { |
---|
317 | s = 0.0; |
---|
318 | for (k = 0; k < (totalleles); k++) { |
---|
319 | f = x[i - 1][k] * x[j][k]; |
---|
320 | if (f > 0.0) |
---|
321 | s += sqrt(f); |
---|
322 | } |
---|
323 | d[i - 1][j] = 4 * (loci - s) / df; |
---|
324 | } |
---|
325 | if (nei) { |
---|
326 | s1 = 0.0; |
---|
327 | s2 = 0.0; |
---|
328 | s3 = 0.0; |
---|
329 | for (k = 0; k < (totalleles); k++) { |
---|
330 | s1 += x[i - 1][k] * x[j][k]; |
---|
331 | TEMP = x[i - 1][k]; |
---|
332 | s2 += TEMP * TEMP; |
---|
333 | TEMP = x[j][k]; |
---|
334 | s3 += TEMP * TEMP; |
---|
335 | } |
---|
336 | if (s1 <= 1.0e-20) |
---|
337 | d[i - 1][j] = -1.0; |
---|
338 | else |
---|
339 | d[i - 1][j] = -log(s1 / sqrt(s2 * s3)); |
---|
340 | } |
---|
341 | if (reynolds) { |
---|
342 | s1 = 0.0; |
---|
343 | s2 = 0.0; |
---|
344 | for (k = 0; k < (totalleles); k++) { |
---|
345 | TEMP = x[i - 1][k] - x[j][k]; |
---|
346 | s1 += TEMP * TEMP; |
---|
347 | s2 += x[i - 1][k] * x[j][k]; |
---|
348 | } |
---|
349 | d[i - 1][j] = s1 / (loci * 2 - 2 * s2); |
---|
350 | } |
---|
351 | d[j][i - 1] = d[i - 1][j]; |
---|
352 | } |
---|
353 | } |
---|
354 | } /* makedists */ |
---|
355 | |
---|
356 | void writedists() |
---|
357 | { |
---|
358 | short i, j, k; |
---|
359 | |
---|
360 | fprintf(outfile, "%5hd\n", numsp); |
---|
361 | for (i = 0; i < (numsp); i++) { |
---|
362 | for (j = 0; j < namelength; j++) |
---|
363 | putc(nayms[i][j], outfile); |
---|
364 | if (lower) |
---|
365 | k = i; |
---|
366 | else |
---|
367 | k = numsp; |
---|
368 | for (j = 1; j <= k; j++) { |
---|
369 | fprintf(outfile, "%8.4f", d[i][j - 1]); |
---|
370 | if ((j + 1) % 9 == 0 && j < k) |
---|
371 | putc('\n', outfile); |
---|
372 | } |
---|
373 | putc('\n', outfile); |
---|
374 | } |
---|
375 | if (progress) |
---|
376 | printf("Distances written to file\n\n"); |
---|
377 | } /* writedists */ |
---|
378 | |
---|
379 | |
---|
380 | main(argc, argv) |
---|
381 | int argc; |
---|
382 | Char *argv[]; |
---|
383 | { /* main program */ |
---|
384 | char infilename[100],outfilename[100]; |
---|
385 | #ifdef MAC |
---|
386 | macsetup("Gendist",""); |
---|
387 | #endif |
---|
388 | openfile(&infile,INFILE,"r",argv[0],infilename); |
---|
389 | openfile(&outfile,OUTFILE,"w",argv[0],outfilename); |
---|
390 | |
---|
391 | ibmpc = ibmpc0; |
---|
392 | ansi = ansi0; |
---|
393 | vt52 = vt520; |
---|
394 | mulsets = false; |
---|
395 | firstset = true; |
---|
396 | datasets = 1; |
---|
397 | doinit(); |
---|
398 | nayms = (naym *)Malloc(numsp*sizeof(naym)); |
---|
399 | for (ith = 1; ith <= (datasets); ith++) { |
---|
400 | getinput(); |
---|
401 | firstset = false; |
---|
402 | if ((datasets > 1) && progress) |
---|
403 | printf("\nData set # %hd:\n\n",ith); |
---|
404 | makedists(); |
---|
405 | writedists(); |
---|
406 | } |
---|
407 | FClose(infile); |
---|
408 | FClose(outfile); |
---|
409 | #ifdef MAC |
---|
410 | fixmacfile(outfilename); |
---|
411 | #endif |
---|
412 | exit(0); |
---|
413 | } |
---|
414 | |
---|
415 | |
---|
416 | int eof(f) |
---|
417 | FILE *f; |
---|
418 | { |
---|
419 | register int ch; |
---|
420 | |
---|
421 | if (feof(f)) |
---|
422 | return 1; |
---|
423 | if (f == stdin) |
---|
424 | return 0; |
---|
425 | ch = getc(f); |
---|
426 | if (ch == EOF) |
---|
427 | return 1; |
---|
428 | ungetc(ch, f); |
---|
429 | return 0; |
---|
430 | } |
---|
431 | |
---|
432 | |
---|
433 | int eoln(f) |
---|
434 | FILE *f; |
---|
435 | { |
---|
436 | register int ch; |
---|
437 | |
---|
438 | ch = getc(f); |
---|
439 | if (ch == EOF) |
---|
440 | return 1; |
---|
441 | ungetc(ch, f); |
---|
442 | return (ch == '\n'); |
---|
443 | } |
---|
444 | |
---|
445 | void memerror() |
---|
446 | { |
---|
447 | printf("Error allocating memory\n"); |
---|
448 | exit(-1); |
---|
449 | } |
---|
450 | |
---|
451 | MALLOCRETURN *mymalloc(x) |
---|
452 | long x; |
---|
453 | { |
---|
454 | MALLOCRETURN *mem; |
---|
455 | mem = (MALLOCRETURN *)malloc(x); |
---|
456 | if (!mem) |
---|
457 | memerror(); |
---|
458 | else |
---|
459 | return (MALLOCRETURN *)mem; |
---|
460 | } |
---|
461 | |
---|