1 | #include "phylip.h" |
---|
2 | |
---|
3 | /* version 3.6. (c) Copyright 1993-1997 by the University of Washington. |
---|
4 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
---|
5 | Permission is granted to copy and use this program provided no fee is |
---|
6 | charged for it and provided that this copyright notice is not removed. */ |
---|
7 | |
---|
8 | #define epsilong 0.02 /* a small number */ |
---|
9 | |
---|
10 | #ifndef OLDC |
---|
11 | /* function prototypes */ |
---|
12 | void getoptions(void); |
---|
13 | void allocrest(void); |
---|
14 | void doinit(void); |
---|
15 | void getalleles(void); |
---|
16 | void inputdata(void); |
---|
17 | void getinput(void); |
---|
18 | void makedists(void); |
---|
19 | void writedists(void); |
---|
20 | /* function prototypes */ |
---|
21 | #endif |
---|
22 | |
---|
23 | |
---|
24 | |
---|
25 | Char infilename[FNMLNGTH], outfilename[FNMLNGTH]; |
---|
26 | long loci, totalleles, df, datasets, ith; |
---|
27 | long nonodes; |
---|
28 | long *alleles; |
---|
29 | phenotype3 *x; |
---|
30 | double **d; |
---|
31 | boolean all, cavalli, lower, nei, reynolds, mulsets, |
---|
32 | firstset, progress; |
---|
33 | |
---|
34 | void getoptions() |
---|
35 | { |
---|
36 | /* interactively set options */ |
---|
37 | long loopcount; |
---|
38 | Char ch; |
---|
39 | |
---|
40 | all = false; |
---|
41 | cavalli = false; |
---|
42 | lower = false; |
---|
43 | nei = true; |
---|
44 | reynolds = false; |
---|
45 | lower = false; |
---|
46 | progress = true; |
---|
47 | loopcount = 0; |
---|
48 | for (;;) { |
---|
49 | cleerhome(); |
---|
50 | printf("\nGenetic Distance Matrix program, version %s\n\n",VERSION); |
---|
51 | printf("Settings for this run:\n"); |
---|
52 | printf(" A Input file contains all alleles at each locus? %s\n", |
---|
53 | all ? "Yes" : "One omitted at each locus"); |
---|
54 | printf(" N Use Nei genetic distance? %s\n", |
---|
55 | nei ? "Yes" : "No"); |
---|
56 | printf(" C Use Cavalli-Sforza chord measure? %s\n", |
---|
57 | cavalli ? "Yes" : "No"); |
---|
58 | printf(" R Use Reynolds genetic distance? %s\n", |
---|
59 | reynolds ? "Yes" : "No"); |
---|
60 | printf(" L Form of distance matrix? %s\n", |
---|
61 | lower ? "Lower-triangular" : "Square"); |
---|
62 | printf(" M Analyze multiple data sets?"); |
---|
63 | if (mulsets) |
---|
64 | printf(" Yes, %2ld sets\n", datasets); |
---|
65 | else |
---|
66 | printf(" No\n"); |
---|
67 | printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n", |
---|
68 | ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"); |
---|
69 | printf(" 1 Print indications of progress of run? %s\n", |
---|
70 | progress ? "Yes" : "No"); |
---|
71 | printf("\n Y to accept these or type the letter for one to change\n"); |
---|
72 | #ifdef WIN32 |
---|
73 | phyFillScreenColor(); |
---|
74 | #endif |
---|
75 | scanf("%c%*[^\n]", &ch); |
---|
76 | getchar(); |
---|
77 | uppercase(&ch); |
---|
78 | if (ch == 'Y') |
---|
79 | break; |
---|
80 | if (strchr("ACNMRL01", ch) != NULL) { |
---|
81 | switch (ch) { |
---|
82 | |
---|
83 | case 'A': |
---|
84 | all = !all; |
---|
85 | break; |
---|
86 | |
---|
87 | case 'C': |
---|
88 | cavalli = true; |
---|
89 | nei = false; |
---|
90 | reynolds = false; |
---|
91 | break; |
---|
92 | |
---|
93 | case 'N': |
---|
94 | cavalli = false; |
---|
95 | nei = true; |
---|
96 | reynolds = false; |
---|
97 | break; |
---|
98 | |
---|
99 | case 'R': |
---|
100 | reynolds = true; |
---|
101 | cavalli = false; |
---|
102 | nei = false; |
---|
103 | break; |
---|
104 | |
---|
105 | case 'L': |
---|
106 | lower = !lower; |
---|
107 | break; |
---|
108 | |
---|
109 | case 'M': |
---|
110 | mulsets = !mulsets; |
---|
111 | if (mulsets) |
---|
112 | initdatasets(&datasets); |
---|
113 | break; |
---|
114 | |
---|
115 | case '0': |
---|
116 | initterminal(&ibmpc, &ansi); |
---|
117 | break; |
---|
118 | |
---|
119 | case '1': |
---|
120 | progress = !progress; |
---|
121 | break; |
---|
122 | } |
---|
123 | } else |
---|
124 | printf("Not a possible option!\n"); |
---|
125 | countup(&loopcount, 100); |
---|
126 | } |
---|
127 | putchar('\n'); |
---|
128 | } /* getoptions */ |
---|
129 | |
---|
130 | |
---|
131 | void allocrest() |
---|
132 | { |
---|
133 | long i; |
---|
134 | |
---|
135 | x = (phenotype3 *)Malloc(spp*sizeof(phenotype3)); |
---|
136 | d = (double **)Malloc(spp*sizeof(double *)); |
---|
137 | for (i = 0; i < (spp); i++) |
---|
138 | d[i] = (double *)Malloc(spp*sizeof(double)); |
---|
139 | alleles = (long *)Malloc(loci*sizeof(long)); |
---|
140 | nayme = (naym *)Malloc(spp*sizeof(naym)); |
---|
141 | } /* allocrest */ |
---|
142 | |
---|
143 | |
---|
144 | void doinit() |
---|
145 | { |
---|
146 | /* initializes variables */ |
---|
147 | |
---|
148 | inputnumbers(&spp, &loci, &nonodes, 1); |
---|
149 | getoptions(); |
---|
150 | allocrest(); |
---|
151 | } /* doinit */ |
---|
152 | |
---|
153 | |
---|
154 | void getalleles() |
---|
155 | { |
---|
156 | long i; |
---|
157 | |
---|
158 | if (!firstset) |
---|
159 | samenumsp(&loci, ith); |
---|
160 | totalleles = 0; |
---|
161 | scan_eoln(infile); |
---|
162 | for (i = 0; i < (loci); i++) { |
---|
163 | if (eoln(infile)) |
---|
164 | scan_eoln(infile); |
---|
165 | fscanf(infile, "%ld", &alleles[i]); |
---|
166 | totalleles += alleles[i]; |
---|
167 | } |
---|
168 | df = totalleles - loci; |
---|
169 | } /* getalleles */ |
---|
170 | |
---|
171 | |
---|
172 | void inputdata() |
---|
173 | { |
---|
174 | /* read allele frequencies */ |
---|
175 | long i, j, k, m, n, p; |
---|
176 | double sum; |
---|
177 | |
---|
178 | for (i = 0; i < spp; i++) |
---|
179 | x[i] = (phenotype3)Malloc(totalleles*sizeof(double)); |
---|
180 | for (i = 1; i <= (spp); i++) { |
---|
181 | scan_eoln(infile); |
---|
182 | initname(i-1); |
---|
183 | m = 1; |
---|
184 | p = 1; |
---|
185 | for (j = 1; j <= (loci); j++) { |
---|
186 | sum = 0.0; |
---|
187 | if (all) |
---|
188 | n = alleles[j - 1]; |
---|
189 | else |
---|
190 | n = alleles[j - 1] - 1; |
---|
191 | for (k = 1; k <= n; k++) { |
---|
192 | if (eoln(infile)) |
---|
193 | scan_eoln(infile); |
---|
194 | fscanf(infile, "%lf", &x[i - 1][m - 1]); |
---|
195 | sum += x[i - 1][m - 1]; |
---|
196 | if (x[i - 1][m - 1] < 0.0) { |
---|
197 | printf("\n\nERROR: Locus %ld in species %ld: an allele", j, i); |
---|
198 | printf(" frequency is negative\n\n"); |
---|
199 | exxit(-1); |
---|
200 | } |
---|
201 | p++; |
---|
202 | m++; |
---|
203 | } |
---|
204 | if (all && fabs(sum - 1.0) > epsilong) { |
---|
205 | printf( |
---|
206 | "\n\nERROR: Locus %ld in species %ld: frequencies do not add up to 1\n\n", |
---|
207 | j, i); |
---|
208 | exxit(-1); |
---|
209 | } |
---|
210 | if (!all) { |
---|
211 | x[i - 1][m - 1] = 1.0 - sum; |
---|
212 | if (x[i-1][m-1] < -epsilong) { |
---|
213 | printf("\n\nERROR: Locus %ld in species %ld: ",j,i); |
---|
214 | printf("frequencies add up to more than 1\n\n"); |
---|
215 | exxit(-1); |
---|
216 | } |
---|
217 | m++; |
---|
218 | } |
---|
219 | } |
---|
220 | } |
---|
221 | } /* inputdata */ |
---|
222 | |
---|
223 | |
---|
224 | void getinput() |
---|
225 | { |
---|
226 | /* read the input data */ |
---|
227 | getalleles(); |
---|
228 | inputdata(); |
---|
229 | } /* getinput */ |
---|
230 | |
---|
231 | |
---|
232 | void makedists() |
---|
233 | { |
---|
234 | long i, j, k; |
---|
235 | double s, s1, s2, s3, f; |
---|
236 | double TEMP; |
---|
237 | |
---|
238 | if (progress) |
---|
239 | printf("Distances calculated for species\n"); |
---|
240 | for (i = 0; i < spp; i++) |
---|
241 | d[i][i] = 0.0; |
---|
242 | for (i = 1; i <= spp; i++) { |
---|
243 | if (progress) { |
---|
244 | #ifdef WIN32 |
---|
245 | phyFillScreenColor(); |
---|
246 | #endif |
---|
247 | printf(" "); |
---|
248 | for (j = 0; j < nmlngth; j++) |
---|
249 | putchar(nayme[i - 1][j]); |
---|
250 | printf(" "); |
---|
251 | } |
---|
252 | for (j = 0; j <= i - 1; j++) { |
---|
253 | if (cavalli) { |
---|
254 | s = 0.0; |
---|
255 | for (k = 0; k < (totalleles); k++) { |
---|
256 | f = x[i - 1][k] * x[j][k]; |
---|
257 | if (f > 0.0) |
---|
258 | s += sqrt(f); |
---|
259 | else f = 0.0; |
---|
260 | } |
---|
261 | d[i - 1][j] = 4 * (loci - s) / df; |
---|
262 | } |
---|
263 | if (nei) { |
---|
264 | s1 = 0.0; |
---|
265 | s2 = 0.0; |
---|
266 | s3 = 0.0; |
---|
267 | for (k = 0; k < (totalleles); k++) { |
---|
268 | s1 += x[i - 1][k] * x[j][k]; |
---|
269 | TEMP = x[i - 1][k]; |
---|
270 | s2 += TEMP * TEMP; |
---|
271 | TEMP = x[j][k]; |
---|
272 | s3 += TEMP * TEMP; |
---|
273 | } |
---|
274 | if (s1 <= 1.0e-20) { |
---|
275 | d[i - 1][j] = -1.0; |
---|
276 | printf("\nWARNING: INFINITE DISTANCE BETWEEN SPECIES "); |
---|
277 | printf("%ld AND %ld; -1.0 WAS WRITTEN\n", i, j); |
---|
278 | } else |
---|
279 | d[i - 1][j] = fabs(-log(s1 / sqrt(s2 * s3))); |
---|
280 | } |
---|
281 | if (reynolds) { |
---|
282 | s1 = 0.0; |
---|
283 | s2 = 0.0; |
---|
284 | for (k = 0; k < (totalleles); k++) { |
---|
285 | TEMP = x[i - 1][k] - x[j][k]; |
---|
286 | s1 += TEMP * TEMP; |
---|
287 | s2 += x[i - 1][k] * x[j][k]; |
---|
288 | } |
---|
289 | d[i - 1][j] = s1 / (loci * 2 - 2 * s2); |
---|
290 | } |
---|
291 | if (progress) { |
---|
292 | putchar('.'); |
---|
293 | fflush(stdout); |
---|
294 | } |
---|
295 | d[j][i - 1] = d[i - 1][j]; |
---|
296 | } |
---|
297 | if (progress) { |
---|
298 | putchar('\n'); |
---|
299 | fflush(stdout); |
---|
300 | } |
---|
301 | } |
---|
302 | if (progress) { |
---|
303 | putchar('\n'); |
---|
304 | fflush(stdout); |
---|
305 | } |
---|
306 | } /* makedists */ |
---|
307 | |
---|
308 | |
---|
309 | void writedists() |
---|
310 | { |
---|
311 | long i, j, k; |
---|
312 | |
---|
313 | fprintf(outfile, "%5ld\n", spp); |
---|
314 | for (i = 0; i < (spp); i++) { |
---|
315 | for (j = 0; j < nmlngth; j++) |
---|
316 | putc(nayme[i][j], outfile); |
---|
317 | if (lower) |
---|
318 | k = i; |
---|
319 | else |
---|
320 | k = spp; |
---|
321 | for (j = 1; j <= k; j++) { |
---|
322 | fprintf(outfile, "%8.4f", d[i][j - 1]); |
---|
323 | if ((j + 1) % 9 == 0 && j < k) |
---|
324 | putc('\n', outfile); |
---|
325 | } |
---|
326 | putc('\n', outfile); |
---|
327 | } |
---|
328 | if (progress) |
---|
329 | printf("Distances written to file \"%s\"\n\n", outfilename); |
---|
330 | } /* writedists */ |
---|
331 | |
---|
332 | |
---|
333 | int main(int argc, Char *argv[]) |
---|
334 | { /* main program */ |
---|
335 | #ifdef MAC |
---|
336 | argc = 1; /* macsetup("Gendist",""); */ |
---|
337 | argv[0] = "Gendist"; |
---|
338 | #endif |
---|
339 | init(argc, argv); |
---|
340 | openfile(&infile,INFILE,"input file", "r",argv[0],infilename); |
---|
341 | openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename); |
---|
342 | |
---|
343 | ibmpc = IBMCRT; |
---|
344 | ansi = ANSICRT; |
---|
345 | mulsets = false; |
---|
346 | firstset = true; |
---|
347 | datasets = 1; |
---|
348 | doinit(); |
---|
349 | for (ith = 1; ith <= (datasets); ith++) { |
---|
350 | getinput(); |
---|
351 | firstset = false; |
---|
352 | if ((datasets > 1) && progress) |
---|
353 | printf("\nData set # %ld:\n\n",ith); |
---|
354 | makedists(); |
---|
355 | writedists(); |
---|
356 | } |
---|
357 | FClose(infile); |
---|
358 | FClose(outfile); |
---|
359 | #ifdef MAC |
---|
360 | fixmacfile(outfilename); |
---|
361 | #endif |
---|
362 | printf("Done.\n\n"); |
---|
363 | #ifdef WIN32 |
---|
364 | phyRestoreConsoleAttributes(); |
---|
365 | #endif |
---|
366 | return 0; |
---|
367 | } |
---|