1 | #include "phylip.h" |
---|
2 | |
---|
3 | /* version 3.54c. (c) Copyright 1993 by Joseph Felsenstein. |
---|
4 | Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe. |
---|
5 | Permission is granted to copy and use this program provided no fee is |
---|
6 | charged for it and provided that this copyright notice is not removed. */ |
---|
7 | |
---|
8 | #define nmlngth 10 /* number of characters in species name */ |
---|
9 | |
---|
10 | #define ibmpc0 false |
---|
11 | #define ansi0 true |
---|
12 | #define vt520 false |
---|
13 | |
---|
14 | typedef Char naym[nmlngth]; |
---|
15 | typedef long longer[6]; |
---|
16 | typedef long *stepp; |
---|
17 | typedef struct steptr { |
---|
18 | stepp steps; |
---|
19 | double *dummy; |
---|
20 | } steptr; |
---|
21 | typedef enum { |
---|
22 | seqs, morphology, restsites, genefreqs |
---|
23 | } datatype; |
---|
24 | |
---|
25 | |
---|
26 | Static FILE *infile, *outfile; |
---|
27 | Static long spp, sites, loci, maxalleles, groups, newsites, newersites, |
---|
28 | newgroups, newergroups, nenzymes, reps, ws; |
---|
29 | Static boolean permute, jackknife, weights, factors, enzymes, all, |
---|
30 | printdata, progress, interleaved, ibmpc, vt52, ansi; |
---|
31 | Static longer seed; |
---|
32 | Static datatype data; |
---|
33 | Static steptr oldweight, weight, where, how_many, newwhere, newhowmany, |
---|
34 | newerwhere, newerhowmany, factor, newerfactor; |
---|
35 | Static naym *nayme; /* names of species */ |
---|
36 | Static long *alleles; |
---|
37 | Static Char **nodep; |
---|
38 | Static double **nodef; |
---|
39 | Static long **sppord; |
---|
40 | |
---|
41 | |
---|
42 | |
---|
43 | openfile(fp,filename,mode,application,perm) |
---|
44 | FILE **fp; |
---|
45 | char *filename; |
---|
46 | char *mode; |
---|
47 | char *application; |
---|
48 | char *perm; |
---|
49 | { |
---|
50 | FILE *of; |
---|
51 | char file[100]; |
---|
52 | strcpy(file,filename); |
---|
53 | while (1){ |
---|
54 | of = fopen(file,mode); |
---|
55 | if (of) |
---|
56 | break; |
---|
57 | else { |
---|
58 | switch (*mode){ |
---|
59 | case 'r': |
---|
60 | printf("%s: can't read %s\n",application,file); |
---|
61 | file[0] = '\0'; |
---|
62 | while (file[0] =='\0'){ |
---|
63 | printf("Please enter a new filename>"); |
---|
64 | gets(file);} |
---|
65 | break; |
---|
66 | case 'w': |
---|
67 | printf("%s: can't write %s\n",application,file); |
---|
68 | file[0] = '\0'; |
---|
69 | while (file[0] =='\0'){ |
---|
70 | printf("Please enter a new filename>"); |
---|
71 | gets(file);} |
---|
72 | break; |
---|
73 | } |
---|
74 | } |
---|
75 | } |
---|
76 | *fp=of; |
---|
77 | if (perm != NULL) |
---|
78 | strcpy(perm,file); |
---|
79 | } |
---|
80 | |
---|
81 | void uppercase(ch) |
---|
82 | Char *ch; |
---|
83 | { /* convert ch to upper case -- either ASCII or EBCDIC */ |
---|
84 | *ch = (islower(*ch) ? toupper(*ch) : (*ch)); |
---|
85 | } /* uppercase */ |
---|
86 | |
---|
87 | double randum(seed) |
---|
88 | long *seed; |
---|
89 | { |
---|
90 | /* random number generator -- slow but machine independent */ |
---|
91 | long i, j, k, sum; |
---|
92 | longer mult, newseed; |
---|
93 | double x; |
---|
94 | |
---|
95 | mult[0] = 13; |
---|
96 | mult[1] = 24; |
---|
97 | mult[2] = 22; |
---|
98 | mult[3] = 6; |
---|
99 | for (i = 0; i <= 5; i++) |
---|
100 | newseed[i] = 0; |
---|
101 | for (i = 0; i <= 5; i++) { |
---|
102 | sum = newseed[i]; |
---|
103 | k = i; |
---|
104 | if (i > 3) |
---|
105 | k = 3; |
---|
106 | for (j = 0; j <= k; j++) |
---|
107 | sum += mult[j] * seed[i - j]; |
---|
108 | newseed[i] = sum; |
---|
109 | for (j = i; j <= 4; j++) { |
---|
110 | newseed[j + 1] += newseed[j] / 64; |
---|
111 | newseed[j] &= 63; |
---|
112 | } |
---|
113 | } |
---|
114 | memcpy(seed, newseed, sizeof(longer)); |
---|
115 | seed[5] &= 3; |
---|
116 | x = 0.0; |
---|
117 | for (i = 0; i <= 5; i++) |
---|
118 | x = x / 64.0 + seed[i]; |
---|
119 | x /= 4.0; |
---|
120 | return x; |
---|
121 | } /* randum */ |
---|
122 | |
---|
123 | |
---|
124 | |
---|
125 | void getoptions() |
---|
126 | { |
---|
127 | /* interactively set options */ |
---|
128 | long i, inseed, reps0; |
---|
129 | Char ch; |
---|
130 | boolean done1; |
---|
131 | |
---|
132 | data = seqs; |
---|
133 | jackknife = false; |
---|
134 | all = false; |
---|
135 | reps = 100; |
---|
136 | printdata = false; |
---|
137 | progress = true; |
---|
138 | interleaved = true; |
---|
139 | do { |
---|
140 | printf("\nRandom number seed (must be odd)?\n"); |
---|
141 | scanf("%ld%*[^\n]", &inseed); |
---|
142 | getchar(); |
---|
143 | } while (!(inseed > 0 && (inseed & 1))); |
---|
144 | for (i = 0; i <= 5; i++) |
---|
145 | seed[i] = 0; |
---|
146 | i = 0; |
---|
147 | do { |
---|
148 | seed[i] = inseed & 63; |
---|
149 | inseed /= 64; |
---|
150 | i++; |
---|
151 | } while (inseed != 0); |
---|
152 | for (;;) { |
---|
153 | printf(ansi ? "\033[2J\033[H" : |
---|
154 | vt52 ? "\033E\033H" : "\n"); |
---|
155 | printf("\nBootstrapped sequences algorithm, version %s\n\n",VERSION); |
---|
156 | printf("Settings for this run:\n"); |
---|
157 | printf(" D Sequence, Morph, Rest., Gene Freqs? %s\n", |
---|
158 | (data == seqs ) ? "Molecular sequences" : |
---|
159 | (data == morphology ) ? "Discrete Morphology" : |
---|
160 | (data == restsites) ? "Restriction Sites" : |
---|
161 | (data == genefreqs) ? "Gene Frequencies" : ""); |
---|
162 | if (data == restsites) |
---|
163 | printf(" E Number of enzymes? %s\n", |
---|
164 | enzymes ? "Present in input file" : |
---|
165 | "Not present in input file"); |
---|
166 | if (data == genefreqs) |
---|
167 | printf(" A All alleles present at each locus? %s\n", |
---|
168 | all ? "Yes" : "No, one absent at each locus"); |
---|
169 | |
---|
170 | printf(" J Bootstrap, Jackknife, or Permute? %s\n", |
---|
171 | jackknife ? "Delete-half jackknife" : |
---|
172 | permute ? "Permute species for each character" : |
---|
173 | "Bootstrap"); |
---|
174 | printf(" R How many replicates?%5ld\n", reps); |
---|
175 | if (data == seqs || data == restsites) { |
---|
176 | printf(" I Input sequences interleaved? %s\n", |
---|
177 | interleaved ? "Yes" : "No, sequential"); |
---|
178 | } |
---|
179 | printf(" 0 Terminal type (IBM PC, VT52, ANSI)? %s\n", |
---|
180 | ibmpc ? "IBM PC" : |
---|
181 | ansi ? "ANSI" : |
---|
182 | vt52 ? "VT52" : "(none)"); |
---|
183 | printf(" 1 Print out the data at start of run %s\n", |
---|
184 | printdata ? "Yes" : "No"); |
---|
185 | printf(" 2 Print indications of progress of run %s\n", |
---|
186 | progress ? "Yes" : "No"); |
---|
187 | printf("\nAre these settings correct? (type Y or the letter for one to change)\n"); |
---|
188 | scanf("%c%*[^\n]", &ch); |
---|
189 | getchar(); |
---|
190 | uppercase(&ch); |
---|
191 | if (ch == 'Y') |
---|
192 | break; |
---|
193 | if (strchr("ADEJRI120",ch)){ |
---|
194 | switch (ch) { |
---|
195 | |
---|
196 | case 'D': |
---|
197 | if (data == genefreqs) |
---|
198 | data = seqs; |
---|
199 | else |
---|
200 | data = (datatype)((long)data + 1); |
---|
201 | break; |
---|
202 | |
---|
203 | case 'A': |
---|
204 | all = !all; |
---|
205 | break; |
---|
206 | |
---|
207 | case 'E': |
---|
208 | enzymes = !enzymes; |
---|
209 | break; |
---|
210 | |
---|
211 | case 'J': |
---|
212 | if (permute) |
---|
213 | permute = false; |
---|
214 | else if (jackknife) { |
---|
215 | jackknife = false; |
---|
216 | permute = true; |
---|
217 | } else |
---|
218 | jackknife = true; |
---|
219 | break; |
---|
220 | |
---|
221 | case 'R': |
---|
222 | done1 = true; |
---|
223 | reps0 = reps; |
---|
224 | do { |
---|
225 | printf("Number of replicates?\n"); |
---|
226 | scanf("%ld%*[^\n]", &reps); |
---|
227 | getchar(); |
---|
228 | done1 = (reps > 0); |
---|
229 | if (!done1) { |
---|
230 | printf("BAD NUMBER: must be positive\n"); |
---|
231 | reps = reps0; |
---|
232 | } |
---|
233 | } while (done1 != true); |
---|
234 | break; |
---|
235 | |
---|
236 | case 'I': |
---|
237 | interleaved = !interleaved; |
---|
238 | break; |
---|
239 | |
---|
240 | case '0': |
---|
241 | if (ibmpc) { |
---|
242 | ibmpc = false; |
---|
243 | vt52 = true; |
---|
244 | } else { |
---|
245 | if (vt52) { |
---|
246 | vt52 = false; |
---|
247 | ansi = true; |
---|
248 | } else if (ansi) |
---|
249 | ansi = false; |
---|
250 | else |
---|
251 | ibmpc = true; |
---|
252 | } |
---|
253 | break; |
---|
254 | |
---|
255 | case '1': |
---|
256 | printdata = !printdata; |
---|
257 | break; |
---|
258 | |
---|
259 | case '2': |
---|
260 | progress = !progress; |
---|
261 | break; |
---|
262 | } |
---|
263 | } else |
---|
264 | printf("Not a possible option!\n"); |
---|
265 | } |
---|
266 | if (printdata) |
---|
267 | fprintf(outfile, "\nBootstrapped sequences algorithm, version %s\n\n\n", |
---|
268 | VERSION); |
---|
269 | } /* getoptions */ |
---|
270 | |
---|
271 | Local Void getnums() |
---|
272 | { |
---|
273 | /* read numbers of species and of sites */ |
---|
274 | long i; |
---|
275 | |
---|
276 | fscanf(infile, "%ld%ld", &spp, &sites); |
---|
277 | loci = sites; |
---|
278 | maxalleles = 1; |
---|
279 | if (data == restsites && enzymes) |
---|
280 | fscanf(infile, "%ld", &nenzymes); |
---|
281 | if (data == genefreqs) { |
---|
282 | alleles = (long *)Malloc(sites*sizeof(long)); |
---|
283 | fscanf(infile, "%*[^\n]"); |
---|
284 | getc(infile); |
---|
285 | sites = 0; |
---|
286 | for (i = 0; i < (loci); i++) { |
---|
287 | if (eoln(infile)) { |
---|
288 | fscanf(infile, "%*[^\n]"); |
---|
289 | getc(infile); |
---|
290 | } |
---|
291 | fscanf(infile, "%ld", &alleles[i]); |
---|
292 | if (alleles[i] > maxalleles) |
---|
293 | maxalleles = alleles[i]; |
---|
294 | if (all) |
---|
295 | sites += alleles[i]; |
---|
296 | else |
---|
297 | sites += alleles[i] - 1; |
---|
298 | } |
---|
299 | if (!all) |
---|
300 | maxalleles--; |
---|
301 | } |
---|
302 | if (printdata) { |
---|
303 | if (data == genefreqs) |
---|
304 | fprintf(outfile, "%3ld species, %3ld loci\n", spp, loci); |
---|
305 | else |
---|
306 | fprintf(outfile, "%3ld species, %3ld sites\n", spp, sites); |
---|
307 | } |
---|
308 | } /* getnums */ |
---|
309 | |
---|
310 | Local Void inputfactors() |
---|
311 | { |
---|
312 | long i, j; |
---|
313 | Char ch, prevch; |
---|
314 | |
---|
315 | for (i = 2; i <= nmlngth; i++) |
---|
316 | ch = getc(infile); |
---|
317 | prevch = ' '; |
---|
318 | j = 0; |
---|
319 | for (i = 0; i < (sites); i++) { |
---|
320 | do { |
---|
321 | if (eoln(infile)) { |
---|
322 | fscanf(infile, "%*[^\n]"); |
---|
323 | getc(infile); |
---|
324 | } |
---|
325 | ch = getc(infile); |
---|
326 | } while (ch == ' '); |
---|
327 | if (ch != prevch) |
---|
328 | j++; |
---|
329 | prevch = ch; |
---|
330 | factor.steps[i] = j; |
---|
331 | } |
---|
332 | fscanf(infile, "%*[^\n]"); |
---|
333 | getc(infile); |
---|
334 | } /* inputfactors */ |
---|
335 | |
---|
336 | Local Void printfactors() |
---|
337 | { |
---|
338 | long i, j; |
---|
339 | Char ch; |
---|
340 | |
---|
341 | fprintf(outfile, "Factors (least significant digit)\n\n"); |
---|
342 | for (i = 1; i <= nmlngth + 3; i++) |
---|
343 | putc(' ', outfile); |
---|
344 | for (i = 1; i <= (sites); i++) { |
---|
345 | if (i % 55 == 1 && i != 1) { |
---|
346 | fprintf(outfile, " \n"); |
---|
347 | for (j = 1; j <= nmlngth + 3; j++) |
---|
348 | putc(' ', outfile); |
---|
349 | } |
---|
350 | putc(factor.steps[i - 1], outfile); |
---|
351 | } |
---|
352 | fprintf(outfile, "\n\n"); |
---|
353 | } /* printfactors */ |
---|
354 | |
---|
355 | Local Void inputweights() |
---|
356 | { |
---|
357 | /* input the character weights, 0 or 1 */ |
---|
358 | Char ch; |
---|
359 | long i; |
---|
360 | |
---|
361 | for (i = 1; i < nmlngth; i++) |
---|
362 | ch = getc(infile); |
---|
363 | for (i = 0; i < (sites); i++) { |
---|
364 | do { |
---|
365 | if (eoln(infile)) { |
---|
366 | fscanf(infile, "%*[^\n]"); |
---|
367 | getc(infile); |
---|
368 | } |
---|
369 | ch = getc(infile); |
---|
370 | } while (ch == ' '); |
---|
371 | oldweight.steps[i] = 1; |
---|
372 | if (ch == '0' || ch == '1') |
---|
373 | oldweight.steps[i] = ch - '0'; |
---|
374 | else { |
---|
375 | printf("BAD WEIGHT CHARACTER: %c -- WEIGHTS IN DNABOOT MUST BE 0 OR 1\n", |
---|
376 | ch); |
---|
377 | exit(-1); |
---|
378 | } |
---|
379 | } |
---|
380 | fscanf(infile, "%*[^\n]"); |
---|
381 | getc(infile); |
---|
382 | } /* inputweights */ |
---|
383 | |
---|
384 | Local Void printweights() |
---|
385 | { |
---|
386 | /* print out the weights of sites */ |
---|
387 | long i, j; |
---|
388 | |
---|
389 | fprintf(outfile, "\n Sites are weighted as follows:\n"); |
---|
390 | for (i = 1; i <= (sites); i++) { |
---|
391 | if ((i - 1) % 60 == 0) { |
---|
392 | putc('\n', outfile); |
---|
393 | for (j = 1; j <= nmlngth + 3; j++) |
---|
394 | putc(' ', outfile); |
---|
395 | } |
---|
396 | fprintf(outfile, "%ld", oldweight.steps[i - 1]); |
---|
397 | if (i % 10 == 0 && i % 60 != 0) |
---|
398 | putc(' ', outfile); |
---|
399 | |
---|
400 | } |
---|
401 | fprintf(outfile, "\n\n"); |
---|
402 | } /* printweights */ |
---|
403 | |
---|
404 | Local Void inputoptions() |
---|
405 | { |
---|
406 | /* input the information on the options */ |
---|
407 | Char ch; |
---|
408 | long extranum, i, j, k, l, m; |
---|
409 | |
---|
410 | factors = false; |
---|
411 | weights = false; |
---|
412 | if (data == genefreqs) { |
---|
413 | k = 0; |
---|
414 | l = 0; |
---|
415 | for (i = 0; i < (loci); i++) { |
---|
416 | if (all) |
---|
417 | m = alleles[i]; |
---|
418 | else |
---|
419 | m = alleles[i] - 1; |
---|
420 | k++; |
---|
421 | for (j = 1; j <= m; j++) { |
---|
422 | l++; |
---|
423 | factor.steps[l - 1] = k; |
---|
424 | } |
---|
425 | } |
---|
426 | } else { |
---|
427 | for (i = 1; i <= (sites); i++) |
---|
428 | factor.steps[i - 1] = i; |
---|
429 | } |
---|
430 | for (i = 0; i < (sites); i++) |
---|
431 | oldweight.steps[i] = 1; |
---|
432 | extranum = 0; |
---|
433 | while (!eoln(infile)) { |
---|
434 | ch = getc(infile); |
---|
435 | uppercase(&ch); |
---|
436 | if (ch != 'W' && ch != 'F') { |
---|
437 | if (ch != ' ') { |
---|
438 | printf("BAD OPTION CHARACTER: %c\n", ch); |
---|
439 | exit(-1); |
---|
440 | } |
---|
441 | continue; |
---|
442 | } |
---|
443 | switch (ch) { |
---|
444 | |
---|
445 | case 'F': |
---|
446 | factors = true; |
---|
447 | extranum++; |
---|
448 | break; |
---|
449 | |
---|
450 | case 'W': |
---|
451 | weights = true; |
---|
452 | extranum++; |
---|
453 | break; |
---|
454 | } |
---|
455 | } |
---|
456 | fscanf(infile, "%*[^\n]"); |
---|
457 | getc(infile); |
---|
458 | for (i = 1; i <= extranum; i++) { |
---|
459 | ch = getc(infile); |
---|
460 | uppercase(&ch); |
---|
461 | if (ch == 'F') |
---|
462 | inputfactors(); |
---|
463 | if (ch == 'W') |
---|
464 | inputweights(); |
---|
465 | if (ch != 'W' && ch != 'F'){ |
---|
466 | printf("ERROR: INCORRECT AUXILIARY OPTIONS LINE WHICH STARTS WITH %c\n", |
---|
467 | ch); |
---|
468 | exit(-1);} |
---|
469 | } |
---|
470 | if (factors && printdata) |
---|
471 | printfactors(); |
---|
472 | if (weights && printdata) |
---|
473 | printweights(); |
---|
474 | for (i = 0; i < (loci); i++) |
---|
475 | how_many.steps[i] = 0; |
---|
476 | for (i = 0; i < (loci); i++) |
---|
477 | where.steps[i] = 0; |
---|
478 | for (i = 1; i <= (sites); i++) { |
---|
479 | how_many.steps[factor.steps[i - 1] - 1]++; |
---|
480 | if (where.steps[factor.steps[i - 1] - 1] == 0) |
---|
481 | where.steps[factor.steps[i - 1] - 1] = i; |
---|
482 | } |
---|
483 | groups = factor.steps[sites - 1]; |
---|
484 | newgroups = 0; |
---|
485 | newsites = 0; |
---|
486 | for (i = 0; i < (groups); i++) { |
---|
487 | if (oldweight.steps[where.steps[i] - 1] > 0) { |
---|
488 | newgroups++; |
---|
489 | newsites += how_many.steps[i]; |
---|
490 | newwhere.steps[newgroups - 1] = where.steps[i]; |
---|
491 | newhowmany.steps[newgroups - 1] = how_many.steps[i]; |
---|
492 | } |
---|
493 | } |
---|
494 | } /* inputoptions */ |
---|
495 | |
---|
496 | Local Void inputdata() |
---|
497 | { |
---|
498 | /* input the names and sequences for each species */ |
---|
499 | long i, j, k, l, m, n, basesread, basesnew; |
---|
500 | double x; |
---|
501 | Char charstate; |
---|
502 | boolean allread, done; |
---|
503 | |
---|
504 | if (data == genefreqs) { |
---|
505 | nodef = (double **)Malloc(spp*sizeof(double *)); |
---|
506 | for (i = 0; i < (spp); i++) |
---|
507 | nodef[i] = (double *)Malloc(sites*sizeof(double)); |
---|
508 | } else { |
---|
509 | nodep = (Char **)Malloc(spp*sizeof(Char *)); |
---|
510 | for (i = 0; i < (spp); i++) |
---|
511 | nodep[i] = (Char *)Malloc(sites*sizeof(Char)); |
---|
512 | } |
---|
513 | j = nmlngth + (sites + (sites - 1) / 10) / 2 - 5; |
---|
514 | if (j < nmlngth - 1) |
---|
515 | j = nmlngth - 1; |
---|
516 | if (j > 37) |
---|
517 | j = 37; |
---|
518 | if (printdata) { |
---|
519 | fprintf(outfile, "Name"); |
---|
520 | for (i = 1; i <= j; i++) |
---|
521 | putc(' ', outfile); |
---|
522 | fprintf(outfile, "Data\n"); |
---|
523 | fprintf(outfile, "----"); |
---|
524 | for (i = 1; i <= j; i++) |
---|
525 | putc(' ', outfile); |
---|
526 | fprintf(outfile, "----\n\n"); |
---|
527 | } |
---|
528 | interleaved = (interleaved && ((data == seqs) || (data == restsites))); |
---|
529 | if (data == genefreqs) { |
---|
530 | for (i = 1; i <= (spp); i++) { |
---|
531 | for (j = 0; j < nmlngth; j++) { |
---|
532 | if (eof(infile) || eoln(infile)){ |
---|
533 | printf("ERROR: END-OF-LINE OR END-OF-FILE"); |
---|
534 | printf(" IN THE MIDDLE OF A SPECIES NAME\n"); |
---|
535 | exit(-1); |
---|
536 | } |
---|
537 | nayme[i - 1][j] = getc(infile); |
---|
538 | } |
---|
539 | j = 1; |
---|
540 | while (j <= sites && !eof(infile)) { |
---|
541 | if (eoln(infile)) { |
---|
542 | fscanf(infile, "%*[^\n]"); |
---|
543 | getc(infile); |
---|
544 | } |
---|
545 | fscanf(infile, "%lf", &x); |
---|
546 | if ((unsigned)x > 1.0) { |
---|
547 | printf("GENE FREQ OUTSIDE [0,1], species%3ld\n", i); |
---|
548 | exit(-1); |
---|
549 | } else { |
---|
550 | nodef[i - 1][j - 1] = x; |
---|
551 | j++; |
---|
552 | } |
---|
553 | } |
---|
554 | fscanf(infile, "%*[^\n]"); |
---|
555 | getc(infile); |
---|
556 | } |
---|
557 | return; |
---|
558 | } |
---|
559 | basesread = 0; |
---|
560 | allread = false; |
---|
561 | while (!allread) { |
---|
562 | allread = true; |
---|
563 | if (eoln(infile)) { |
---|
564 | fscanf(infile, "%*[^\n]"); |
---|
565 | getc(infile); |
---|
566 | } |
---|
567 | i = 1; |
---|
568 | while (i <= spp) { |
---|
569 | if ((interleaved && basesread == 0) || !interleaved) { |
---|
570 | for (j = 0; j < nmlngth; j++) { |
---|
571 | if (eof(infile) || eoln(infile)){ |
---|
572 | printf("ERROR: END-OF-LINE OR END-OF-FILE"); |
---|
573 | printf(" IN THE MIDDLE OF A SPECIES NAME\n"); |
---|
574 | exit(-1); |
---|
575 | } |
---|
576 | nayme[i - 1][j] = getc(infile); |
---|
577 | } |
---|
578 | } |
---|
579 | j = interleaved ? basesread : 0; |
---|
580 | done = false; |
---|
581 | while (!done && !eof(infile)) { |
---|
582 | if (interleaved) |
---|
583 | done = true; |
---|
584 | while (j < sites && !(eoln(infile) ||eof(infile))) { |
---|
585 | charstate = getc(infile); |
---|
586 | if (charstate == ' ' || |
---|
587 | (data == seqs && charstate >= '0' && charstate <= '9')) |
---|
588 | continue; |
---|
589 | uppercase(&charstate); |
---|
590 | j++; |
---|
591 | if (charstate == '.') |
---|
592 | charstate = nodep[0][j - 1]; |
---|
593 | nodep[i - 1][j - 1] = charstate; |
---|
594 | } |
---|
595 | if (interleaved) |
---|
596 | continue; |
---|
597 | if (j < sites) { |
---|
598 | fscanf(infile, "%*[^\n]"); |
---|
599 | getc(infile); |
---|
600 | } else if (j == sites) |
---|
601 | done = true; |
---|
602 | } |
---|
603 | if (interleaved && i == 1) |
---|
604 | basesnew = j; |
---|
605 | fscanf(infile, "%*[^\n]"); |
---|
606 | getc(infile); |
---|
607 | if ((interleaved && j != basesnew) || ((!interleaved) && j != sites)){ |
---|
608 | printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n"); |
---|
609 | exit(-1);} |
---|
610 | i++; |
---|
611 | } |
---|
612 | if (interleaved) { |
---|
613 | basesread = basesnew; |
---|
614 | allread = (basesread == sites); |
---|
615 | } else |
---|
616 | allread = (i > spp); |
---|
617 | } |
---|
618 | if (!printdata) |
---|
619 | return; |
---|
620 | if (data == genefreqs) |
---|
621 | m = (sites - 1) / 8 + 1; |
---|
622 | else |
---|
623 | m = (sites - 1) / 60 + 1; |
---|
624 | for (i = 1; i <= m; i++) { |
---|
625 | for (j = 0; j < (spp); j++) { |
---|
626 | for (k = 0; k < nmlngth; k++) |
---|
627 | putc(nayme[j][k], outfile); |
---|
628 | fprintf(outfile, " "); |
---|
629 | if (data == genefreqs) |
---|
630 | l = i * 8; |
---|
631 | else |
---|
632 | l = i * 60; |
---|
633 | if (l > sites) |
---|
634 | l = sites; |
---|
635 | if (data == genefreqs) |
---|
636 | n = (i - 1) * 8; |
---|
637 | else |
---|
638 | n = (i - 1) * 60; |
---|
639 | for (k = n; k < l; k++) { |
---|
640 | if (data == genefreqs) |
---|
641 | fprintf(outfile, "%8.5f", nodef[j][k]); |
---|
642 | else { |
---|
643 | if (j + 1 > 1 && nodep[j][k] == nodep[0][k]) |
---|
644 | charstate = '.'; |
---|
645 | else |
---|
646 | charstate = nodep[j][k]; |
---|
647 | putc(charstate, outfile); |
---|
648 | if ((k + 1) % 10 == 0 && (k + 1) % 60 != 0) |
---|
649 | putc(' ', outfile); |
---|
650 | |
---|
651 | } |
---|
652 | } |
---|
653 | putc('\n', outfile); |
---|
654 | } |
---|
655 | putc('\n', outfile); |
---|
656 | } |
---|
657 | putc('\n', outfile); |
---|
658 | } /* inputdata */ |
---|
659 | |
---|
660 | |
---|
661 | Static Void doinput() |
---|
662 | { |
---|
663 | /* reads the input data */ |
---|
664 | getoptions(); |
---|
665 | getnums(); |
---|
666 | oldweight.steps = (stepp)Malloc(sites*sizeof(long)); |
---|
667 | oldweight.dummy = (double *)Malloc(sizeof(double)); |
---|
668 | weight.steps = (stepp)Malloc(sites*sizeof(long)); |
---|
669 | weight.dummy = (double *)Malloc(sizeof(double)); |
---|
670 | where.steps = (stepp)Malloc(loci*sizeof(long)); |
---|
671 | where.dummy = (double *)Malloc(sizeof(double)); |
---|
672 | how_many.steps = (stepp)Malloc(loci*sizeof(long)); |
---|
673 | how_many.dummy = (double *)Malloc(sizeof(double)); |
---|
674 | factor.steps = (stepp)Malloc(sites*sizeof(long)); |
---|
675 | factor.dummy = (double *)Malloc(sizeof(double)); |
---|
676 | newwhere.steps = (stepp)Malloc(loci*sizeof(long)); |
---|
677 | newwhere.dummy = (double *)Malloc(sizeof(double)); |
---|
678 | newhowmany.steps = (stepp)Malloc(loci*sizeof(long)); |
---|
679 | newhowmany.dummy = (double *)Malloc(sizeof(double)); |
---|
680 | newerwhere.steps = (stepp)Malloc(loci*sizeof(long)); |
---|
681 | newerwhere.dummy = (double *)Malloc(sizeof(double)); |
---|
682 | newerhowmany.steps = (stepp)Malloc(loci*sizeof(long)); |
---|
683 | newerhowmany.dummy = (double *)Malloc(sizeof(double)); |
---|
684 | newerfactor.steps = (stepp)Malloc(loci*maxalleles*sizeof(long)); |
---|
685 | newerfactor.dummy = (double *)Malloc(sizeof(double)); |
---|
686 | inputoptions(); |
---|
687 | nayme = (naym *)Malloc(spp*sizeof(naym)); |
---|
688 | inputdata(); |
---|
689 | } /* doinput */ |
---|
690 | |
---|
691 | Local Void bootweights() |
---|
692 | { |
---|
693 | /* sets up weights by resampling data */ |
---|
694 | long i, j, k; |
---|
695 | double p, q, r; |
---|
696 | |
---|
697 | ws = newgroups; |
---|
698 | for (i = 0; i < (ws); i++) |
---|
699 | weight.steps[i] = 0; |
---|
700 | if (jackknife) { |
---|
701 | if (newgroups & 1) { |
---|
702 | if (randum(seed) < 0.5) |
---|
703 | q = (newgroups - 1.0) / 2; |
---|
704 | else |
---|
705 | q = (newgroups + 1.0) / 2; |
---|
706 | } else |
---|
707 | q = newgroups / 2.0; |
---|
708 | r = newgroups; |
---|
709 | p = q / r; |
---|
710 | ws = 0; |
---|
711 | for (i = 0; i < (newgroups); i++) { |
---|
712 | if (randum(seed) < p) { |
---|
713 | weight.steps[i]++; |
---|
714 | ws++; |
---|
715 | q--; |
---|
716 | } |
---|
717 | r--; |
---|
718 | if (i + 1 < newgroups) |
---|
719 | p = q / r; |
---|
720 | } |
---|
721 | } else if (permute) { |
---|
722 | for (i = 0; i < (newgroups); i++) |
---|
723 | weight.steps[i] = 1; |
---|
724 | } else { |
---|
725 | for (i = 1; i <= (newgroups); i++) { |
---|
726 | j = (long)(newgroups * randum(seed)) + 1; |
---|
727 | weight.steps[j - 1]++; |
---|
728 | } |
---|
729 | } |
---|
730 | for (i = 0; i < (newgroups); i++) |
---|
731 | newerwhere.steps[i] = 0; |
---|
732 | for (i = 0; i < (newgroups); i++) |
---|
733 | newerhowmany.steps[i] = 0; |
---|
734 | newergroups = 0; |
---|
735 | newersites = 0; |
---|
736 | for (i = 0; i < (newgroups); i++) { |
---|
737 | for (j = 1; j <= (weight.steps[i]); j++) { |
---|
738 | newergroups++; |
---|
739 | for (k = 1; k <= (newhowmany.steps[i]); k++) { |
---|
740 | newersites++; |
---|
741 | newerfactor.steps[newersites - 1] = newergroups; |
---|
742 | } |
---|
743 | newerwhere.steps[newergroups - 1] = newwhere.steps[i]; |
---|
744 | newerhowmany.steps[newergroups - 1] = newhowmany.steps[i]; |
---|
745 | } |
---|
746 | } |
---|
747 | } /* bootweights */ |
---|
748 | |
---|
749 | void sppermute(n) |
---|
750 | long n; |
---|
751 | { long i, j, k; |
---|
752 | for (i = 1; i <= (spp - 1); i++) { |
---|
753 | k = (long)((i+1)* randum(seed)); |
---|
754 | j = sppord[n - 1][i]; |
---|
755 | sppord[n - 1][i] = sppord[n - 1][k]; |
---|
756 | sppord[n - 1][k] = j; |
---|
757 | } |
---|
758 | } /* sppermute */ |
---|
759 | |
---|
760 | void writedata() |
---|
761 | { |
---|
762 | /* write out one set of bootstrapped sequences */ |
---|
763 | long i, j, k, l, m, n, n2; |
---|
764 | double x; |
---|
765 | Char charstate; |
---|
766 | |
---|
767 | sppord = (long **)Malloc(newergroups*sizeof(long *)); |
---|
768 | for (i = 0; i < (newergroups); i++) |
---|
769 | sppord[i] = (long *)Malloc(spp*sizeof(long)); |
---|
770 | for (i = 0; i < (newergroups); i++) { |
---|
771 | for (j = 1; j <= (spp); j++) |
---|
772 | sppord[i][j - 1] = j; |
---|
773 | } |
---|
774 | if (data == restsites && enzymes) |
---|
775 | fprintf(outfile, "%5ld%5ld%5ld\n", spp, newergroups, nenzymes); |
---|
776 | else if (data == genefreqs) |
---|
777 | fprintf(outfile, "%5ld%5ld\n", spp, newergroups); |
---|
778 | else |
---|
779 | fprintf(outfile, "%5ld%5ld\n", spp, newersites); |
---|
780 | if (data == genefreqs) { |
---|
781 | for (i = 0; i < (newergroups); i++) |
---|
782 | fprintf(outfile, "%3ld", alleles[factor.steps[newerwhere.steps[i] - 1] - 1]); |
---|
783 | putc('\n', outfile); |
---|
784 | } |
---|
785 | l = 1; |
---|
786 | if (interleaved) |
---|
787 | m = 60; |
---|
788 | else |
---|
789 | m = newergroups; |
---|
790 | do { |
---|
791 | if (m > newergroups) |
---|
792 | m = newergroups; |
---|
793 | for (j = 0; j < (spp); j++) { |
---|
794 | n = 0; |
---|
795 | if (l == 1) { |
---|
796 | for (k = 0; k < nmlngth; k++) |
---|
797 | putc(nayme[j][k], outfile); |
---|
798 | } else { |
---|
799 | for (k = 1; k <= nmlngth; k++) |
---|
800 | putc(' ', outfile); |
---|
801 | } |
---|
802 | fprintf(outfile, " "); |
---|
803 | for (k = l - 1; k < m; k++) { |
---|
804 | if (permute && j + 1 == 1) |
---|
805 | sppermute(newerfactor.steps[n]); |
---|
806 | for (n2 = -1; n2 <= (newerhowmany.steps[k] - 2); n2++) { |
---|
807 | n++; |
---|
808 | if (data == genefreqs) { |
---|
809 | if (n > 1 && (n & 7) == 1) |
---|
810 | fprintf(outfile, "\n "); |
---|
811 | x = nodef[sppord[newerfactor.steps[n - 1] - 1][j] - 1][newerwhere.steps[k] + n2]; |
---|
812 | fprintf(outfile, "%8.5f", x); |
---|
813 | } else { |
---|
814 | if (!interleaved && n > 1 && n % 60 == 1) |
---|
815 | fprintf(outfile, "\n "); |
---|
816 | charstate = |
---|
817 | nodep[sppord[newerfactor.steps[n - 1] - 1][j] - 1][newerwhere.steps[k] + n2]; |
---|
818 | putc(charstate, outfile); |
---|
819 | if (n % 10 == 0 && n % 60 != 0) |
---|
820 | putc(' ', outfile); |
---|
821 | } |
---|
822 | } |
---|
823 | } |
---|
824 | putc('\n', outfile); |
---|
825 | } |
---|
826 | if (interleaved && i <= (newsites - 1) / 60) |
---|
827 | putc('\n', outfile); |
---|
828 | if (interleaved) { |
---|
829 | if (m < newersites) |
---|
830 | putc('\n', outfile); |
---|
831 | l += 60; |
---|
832 | m += 60; |
---|
833 | } |
---|
834 | } while (interleaved && l <= newersites); |
---|
835 | for (i = 0; i < (newergroups); i++) |
---|
836 | free(sppord[i]); |
---|
837 | free(sppord); |
---|
838 | } /* writedata */ |
---|
839 | |
---|
840 | |
---|
841 | Static Void bootwrite() |
---|
842 | { |
---|
843 | /* does bootstrapping and writes out data sets */ |
---|
844 | long rr, repdiv10; |
---|
845 | |
---|
846 | repdiv10 = reps / 10; |
---|
847 | if (repdiv10 < 1) |
---|
848 | repdiv10 = 1; |
---|
849 | if (progress) |
---|
850 | putchar('\n'); |
---|
851 | for (rr = 1; rr <= (reps); rr++) { |
---|
852 | bootweights(); |
---|
853 | writedata(); |
---|
854 | if (progress && rr % repdiv10 == 0) |
---|
855 | printf("completed replicate number %4ld\n", rr); |
---|
856 | } |
---|
857 | if (progress) |
---|
858 | printf("\nOutput written to output file\n\n"); |
---|
859 | } /* bootwrite */ |
---|
860 | |
---|
861 | |
---|
862 | main(argc, argv) |
---|
863 | int argc; |
---|
864 | Char *argv[]; |
---|
865 | { /* Read in sequences or frequencies and bootstrap or jackknife them */ |
---|
866 | char infilename[100],outfilename[100]; |
---|
867 | #ifdef MAC |
---|
868 | macsetup("Seqboot",""); |
---|
869 | #endif |
---|
870 | openfile(&infile,INFILE,"r",argv[0],infilename); |
---|
871 | openfile(&outfile,OUTFILE,"w",argv[0],outfilename); |
---|
872 | |
---|
873 | ibmpc = ibmpc0; |
---|
874 | ansi = ansi0; |
---|
875 | vt52 = vt520; |
---|
876 | doinput(); |
---|
877 | bootwrite(); |
---|
878 | FClose(infile); |
---|
879 | FClose(outfile); |
---|
880 | #ifdef MAC |
---|
881 | fixmacfile(outfilename); |
---|
882 | #endif |
---|
883 | exit(0); |
---|
884 | } |
---|
885 | |
---|
886 | |
---|
887 | int eof(f) |
---|
888 | FILE *f; |
---|
889 | { |
---|
890 | register int ch; |
---|
891 | |
---|
892 | if (feof(f)) |
---|
893 | return 1; |
---|
894 | if (f == stdin) |
---|
895 | return 0; |
---|
896 | ch = getc(f); |
---|
897 | if (ch == EOF) |
---|
898 | return 1; |
---|
899 | ungetc(ch, f); |
---|
900 | return 0; |
---|
901 | } |
---|
902 | |
---|
903 | |
---|
904 | int eoln(f) |
---|
905 | FILE *f; |
---|
906 | { |
---|
907 | register int ch; |
---|
908 | |
---|
909 | ch = getc(f); |
---|
910 | if (ch == EOF) |
---|
911 | return 1; |
---|
912 | ungetc(ch, f); |
---|
913 | return (ch == '\n'); |
---|
914 | } |
---|
915 | |
---|
916 | void memerror() |
---|
917 | { |
---|
918 | printf("Error allocating memory\n"); |
---|
919 | exit(-1); |
---|
920 | } |
---|
921 | |
---|
922 | MALLOCRETURN *mymalloc(x) |
---|
923 | long x; |
---|
924 | { |
---|
925 | MALLOCRETURN *mem; |
---|
926 | mem = (MALLOCRETURN *)malloc(x); |
---|
927 | if (!mem) |
---|
928 | memerror(); |
---|
929 | else |
---|
930 | return (MALLOCRETURN *)mem; |
---|
931 | } |
---|
932 | |
---|