1 | /* |
---|
2 | * puzzle2.c |
---|
3 | * |
---|
4 | * |
---|
5 | * Part of TREE-PUZZLE 5.0 (June 2000) |
---|
6 | * |
---|
7 | * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer, |
---|
8 | * M. Vingron, and Arndt von Haeseler |
---|
9 | * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler |
---|
10 | * |
---|
11 | * All parts of the source except where indicated are distributed under |
---|
12 | * the GNU public licence. See http://www.opensource.org for details. |
---|
13 | */ |
---|
14 | |
---|
15 | |
---|
16 | #define EXTERN extern |
---|
17 | |
---|
18 | #include "puzzle.h" |
---|
19 | #include <string.h> |
---|
20 | |
---|
21 | #if PARALLEL |
---|
22 | # include "sched.h" |
---|
23 | #endif /* PARALLEL */ |
---|
24 | |
---|
25 | |
---|
26 | /******************************************************************************/ |
---|
27 | /* sequences */ |
---|
28 | /******************************************************************************/ |
---|
29 | |
---|
30 | /* read ten characters of current line as identifier */ |
---|
31 | void readid(FILE *infp, int t) |
---|
32 | { |
---|
33 | int i, j, flag, ci; |
---|
34 | |
---|
35 | for (i = 0; i < 10; i++) { |
---|
36 | ci = fgetc(infp); |
---|
37 | if (ci == EOF || !isprint(ci)) { |
---|
38 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1); |
---|
39 | exit(1); |
---|
40 | } |
---|
41 | Identif[t][i] = (char) ci; |
---|
42 | } |
---|
43 | /* convert leading blanks in taxon name to underscores */ |
---|
44 | flag = FALSE; |
---|
45 | for (i = 9; i > -1; i--) { |
---|
46 | if (flag == FALSE) { |
---|
47 | if (Identif[t][i] != ' ') flag = TRUE; |
---|
48 | } else { |
---|
49 | if (Identif[t][i] == ' ') Identif[t][i] = '_'; |
---|
50 | } |
---|
51 | } |
---|
52 | /* check whether this name is already used */ |
---|
53 | for (i = 0; i < t; i++) { /* compare with all other taxa */ |
---|
54 | flag = TRUE; /* assume identity */ |
---|
55 | for (j = 0; (j < 10) && (flag == TRUE); j++) |
---|
56 | if (Identif[t][j] != Identif[i][j]) |
---|
57 | flag = FALSE; |
---|
58 | if (flag) { |
---|
59 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurrences of sequence name '"); |
---|
60 | fputid(STDOUT, t); |
---|
61 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
62 | exit(1); |
---|
63 | } |
---|
64 | } |
---|
65 | } |
---|
66 | |
---|
67 | /* read next allowed character */ |
---|
68 | char readnextcharacter(FILE *ifp, int notu, int nsite) |
---|
69 | { |
---|
70 | char c; |
---|
71 | |
---|
72 | /* ignore blanks and control characters except newline */ |
---|
73 | do { |
---|
74 | if (fscanf(ifp, "%c", &c) != 1) { |
---|
75 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1); |
---|
76 | fputid(STDOUT, notu); |
---|
77 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
78 | exit(1); |
---|
79 | } |
---|
80 | } while (c == ' ' || (iscntrl((int) c) && c != '\n')); |
---|
81 | return c; |
---|
82 | } |
---|
83 | |
---|
84 | /* skip rest of the line */ |
---|
85 | void skiprestofline(FILE* ifp, int notu, int nsite) |
---|
86 | { |
---|
87 | int ci; |
---|
88 | |
---|
89 | /* read chars until the first newline */ |
---|
90 | do{ |
---|
91 | ci = fgetc(ifp); |
---|
92 | if (ci == EOF) { |
---|
93 | FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1); |
---|
94 | fputid(STDOUT, notu); |
---|
95 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
96 | exit(1); |
---|
97 | } |
---|
98 | } while ((char) ci != '\n'); |
---|
99 | } |
---|
100 | |
---|
101 | /* skip control characters and blanks */ |
---|
102 | void skipcntrl(FILE *ifp, int notu, int nsite) |
---|
103 | { |
---|
104 | int ci; |
---|
105 | |
---|
106 | /* read over all control characters and blanks */ |
---|
107 | do { |
---|
108 | ci = fgetc(ifp); |
---|
109 | if (ci == EOF) { |
---|
110 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1); |
---|
111 | fputid(STDOUT, notu); |
---|
112 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
113 | exit(1); |
---|
114 | } |
---|
115 | } while (iscntrl(ci) || (char) ci == ' '); |
---|
116 | /* go one character back */ |
---|
117 | if (ungetc(ci, ifp) == EOF) { |
---|
118 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1); |
---|
119 | fputid(STDOUT, notu); |
---|
120 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
121 | exit(1); |
---|
122 | } |
---|
123 | } |
---|
124 | |
---|
125 | /* read sequences of one data set */ |
---|
126 | void getseqs(FILE *ifp) |
---|
127 | { |
---|
128 | int notu, nsite, endofline, linelength, i; |
---|
129 | char c; |
---|
130 | |
---|
131 | seqchars = new_cmatrix(Maxspc, Maxseqc); |
---|
132 | /* read all characters */ |
---|
133 | nsite = 0; /* next site to be read */ |
---|
134 | while (nsite < Maxseqc) { |
---|
135 | /* read first taxon */ |
---|
136 | notu = 0; |
---|
137 | /* go to next true line */ |
---|
138 | skiprestofline(ifp, notu, nsite); |
---|
139 | skipcntrl(ifp, notu, nsite); |
---|
140 | if (nsite == 0) readid(ifp, notu); |
---|
141 | endofline = FALSE; |
---|
142 | linelength = 0; |
---|
143 | do { |
---|
144 | c = readnextcharacter(ifp, notu, nsite + linelength); |
---|
145 | if (c == '\n') endofline = TRUE; |
---|
146 | else if (c == '.') { |
---|
147 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position "); |
---|
148 | FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1); |
---|
149 | exit(1); |
---|
150 | } else if (nsite + linelength < Maxseqc) { |
---|
151 | /* change to upper case */ |
---|
152 | seqchars[notu][nsite + linelength] = (char) toupper((int) c); |
---|
153 | linelength++; |
---|
154 | } else { |
---|
155 | endofline = TRUE; |
---|
156 | skiprestofline(ifp, notu, nsite + linelength); |
---|
157 | } |
---|
158 | } while (!endofline); |
---|
159 | if (linelength == 0) { |
---|
160 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1); |
---|
161 | fputid(STDOUT, notu); |
---|
162 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
163 | exit(1); |
---|
164 | } |
---|
165 | /* read other taxa */ |
---|
166 | for (notu = 1; notu < Maxspc; notu++) { |
---|
167 | /* go to next true line */ |
---|
168 | if (notu != 1) skiprestofline(ifp, notu, nsite); |
---|
169 | skipcntrl(ifp, notu, nsite); |
---|
170 | if (nsite == 0) readid(ifp, notu); |
---|
171 | for (i = nsite; i < nsite + linelength; i++) { |
---|
172 | c = readnextcharacter(ifp, notu, i); |
---|
173 | if (c == '\n') { /* too short */ |
---|
174 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1); |
---|
175 | fputid(STDOUT, notu); |
---|
176 | FPRINTF(STDOUTFILE "')\n\n\n"); |
---|
177 | exit(1); |
---|
178 | } else if (c == '.') { |
---|
179 | seqchars[notu][i] = seqchars[0][i]; |
---|
180 | } else { |
---|
181 | /* change to upper case */ |
---|
182 | seqchars[notu][i] = (char) toupper((int) c); |
---|
183 | } |
---|
184 | } |
---|
185 | } |
---|
186 | nsite = nsite + linelength; |
---|
187 | } |
---|
188 | } |
---|
189 | |
---|
190 | /* initialize identifer array */ |
---|
191 | void initid(int t) |
---|
192 | { |
---|
193 | int i, j; |
---|
194 | |
---|
195 | Identif = new_cmatrix(t, 10); |
---|
196 | for (i = 0; i < t; i++) |
---|
197 | for (j = 0; j < 10; j++) |
---|
198 | Identif[i][j] = ' '; |
---|
199 | } |
---|
200 | |
---|
201 | /* print identifier of specified taxon in full 10 char length */ |
---|
202 | void fputid10(FILE *ofp, int t) |
---|
203 | { |
---|
204 | int i; |
---|
205 | |
---|
206 | for (i = 0; i < 10; i++) fputc(Identif[t][i], ofp); |
---|
207 | } |
---|
208 | |
---|
209 | /* print identifier of specified taxon up to first space */ |
---|
210 | int fputid(FILE *ofp, int t) |
---|
211 | { |
---|
212 | int i; |
---|
213 | |
---|
214 | i = 0; |
---|
215 | while (Identif[t][i] != ' ' && i < 10) { |
---|
216 | fputc(Identif[t][i], ofp); |
---|
217 | i++; |
---|
218 | } |
---|
219 | return i; |
---|
220 | } |
---|
221 | |
---|
222 | /* read first line of sequence data set */ |
---|
223 | void getsizesites(FILE *ifp) |
---|
224 | { |
---|
225 | if (fscanf(ifp, "%d", &Maxspc) != 1) { |
---|
226 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n"); |
---|
227 | exit(1); |
---|
228 | } |
---|
229 | if (fscanf(ifp, "%d", &Maxseqc) != 1) { |
---|
230 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n"); |
---|
231 | exit(1); |
---|
232 | } |
---|
233 | |
---|
234 | if (Maxspc < 4) { |
---|
235 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n"); |
---|
236 | exit(1); |
---|
237 | } |
---|
238 | if (Maxspc > 257) { |
---|
239 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 257 sequences)\n\n\n"); |
---|
240 | exit(1); |
---|
241 | } |
---|
242 | if (Maxseqc < 1) { |
---|
243 | FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n"); |
---|
244 | exit(1); |
---|
245 | } |
---|
246 | Maxbrnch = 2*Maxspc - 3; |
---|
247 | } |
---|
248 | |
---|
249 | /* read one data set - PHYLIP interleaved */ |
---|
250 | void getdataset(FILE *ifp) |
---|
251 | { |
---|
252 | initid(Maxspc); |
---|
253 | getseqs(ifp); |
---|
254 | } |
---|
255 | |
---|
256 | /* guess data type */ |
---|
257 | int guessdatatype() |
---|
258 | { |
---|
259 | uli numnucs, numchars, numbins; |
---|
260 | int notu, nsite; |
---|
261 | char c; |
---|
262 | |
---|
263 | /* count A, C, G, T, U, N */ |
---|
264 | numnucs = 0; |
---|
265 | numchars = 0; |
---|
266 | numbins = 0; |
---|
267 | for (notu = 0; notu < Maxspc; notu++) |
---|
268 | for (nsite = 0; nsite < Maxseqc; nsite++) { |
---|
269 | c = seqchars[notu][nsite]; |
---|
270 | if (c == 'A' || c == 'C' || c == 'G' || |
---|
271 | c == 'T' || c == 'U' || c == 'N') numnucs++; |
---|
272 | if (c != '-' && c != '?') numchars++; |
---|
273 | if (c == '0' || c == '1') numbins++; |
---|
274 | } |
---|
275 | if (numchars == 0) numchars = 1; |
---|
276 | /* more than 85 % frequency means nucleotide data */ |
---|
277 | if ((double) numnucs / (double) numchars > 0.85) return 0; |
---|
278 | else if ((double) numbins / (double) numchars > 0.2) return 2; |
---|
279 | else return 1; |
---|
280 | } |
---|
281 | |
---|
282 | /* translate characters into format used by ML engine */ |
---|
283 | void translatedataset() |
---|
284 | { |
---|
285 | int notu, sn, co; |
---|
286 | char c; |
---|
287 | cvector code; |
---|
288 | |
---|
289 | |
---|
290 | /* determine Maxsite - number of ML sites per taxon */ |
---|
291 | if (data_optn == 0 && SH_optn) { |
---|
292 | if (SHcodon) |
---|
293 | Maxsite = Maxseqc / 3; |
---|
294 | else |
---|
295 | Maxsite = Maxseqc / 2; /* assume doublets */ |
---|
296 | |
---|
297 | } else |
---|
298 | Maxsite = Maxseqc; |
---|
299 | if (data_optn == 0 && (Maxsite % 3) == 0 && !SH_optn) { |
---|
300 | if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3) |
---|
301 | Maxsite = Maxsite / 3; /* only one of the three codon positions */ |
---|
302 | if (codon_optn == 4) |
---|
303 | Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */ |
---|
304 | } |
---|
305 | |
---|
306 | /* reserve memory */ |
---|
307 | if (Seqchar != NULL) free_cmatrix(Seqchar); |
---|
308 | Seqchar = new_cmatrix(Maxspc, Maxsite); |
---|
309 | |
---|
310 | /* code length */ |
---|
311 | if (data_optn == 0 && SH_optn) |
---|
312 | code = new_cvector(2); |
---|
313 | else |
---|
314 | code = new_cvector(1); |
---|
315 | |
---|
316 | /* decode characters */ |
---|
317 | if (data_optn == 0 && SH_optn) { /* SH doublets */ |
---|
318 | |
---|
319 | for (notu = 0; notu < Maxspc; notu++) { |
---|
320 | for (sn = 0; sn < Maxsite; sn++) { |
---|
321 | for (co = 0; co < 2; co++) { |
---|
322 | if (SHcodon) |
---|
323 | c = seqchars[notu][sn*3 + co]; |
---|
324 | else |
---|
325 | c = seqchars[notu][sn*2 + co]; |
---|
326 | code[co] = c; |
---|
327 | } |
---|
328 | Seqchar[notu][sn] = code2int(code); |
---|
329 | } |
---|
330 | } |
---|
331 | |
---|
332 | } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */ |
---|
333 | |
---|
334 | for (notu = 0; notu < Maxspc; notu++) { |
---|
335 | for (sn = 0; sn < Maxsite; sn++) { |
---|
336 | code[0] = seqchars[notu][sn]; |
---|
337 | Seqchar[notu][sn] = code2int(code); |
---|
338 | } |
---|
339 | } |
---|
340 | |
---|
341 | } else { /* codons */ |
---|
342 | |
---|
343 | for (notu = 0; notu < Maxspc; notu++) { |
---|
344 | for (sn = 0; sn < Maxsite; sn++) { |
---|
345 | if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3) |
---|
346 | code[0] = seqchars[notu][sn*3+codon_optn-1]; |
---|
347 | else if (codon_optn == 4) { |
---|
348 | if ((sn % 2) == 0) |
---|
349 | code[0] = seqchars[notu][(sn/2)*3]; |
---|
350 | else |
---|
351 | code[0] = seqchars[notu][((sn-1)/2)*3+1]; |
---|
352 | } else |
---|
353 | code[0] = seqchars[notu][sn]; |
---|
354 | Seqchar[notu][sn] = code2int(code); |
---|
355 | } |
---|
356 | } |
---|
357 | |
---|
358 | } |
---|
359 | free_cvector(code); |
---|
360 | } |
---|
361 | |
---|
362 | /* estimate mean base frequencies from translated data set */ |
---|
363 | void estimatebasefreqs() |
---|
364 | { |
---|
365 | int local_tpmradix, i, j; |
---|
366 | uli all, *gene; |
---|
367 | |
---|
368 | local_tpmradix = gettpmradix(); |
---|
369 | |
---|
370 | if (Freqtpm != NULL) free_dvector(Freqtpm); |
---|
371 | Freqtpm = new_dvector(local_tpmradix); |
---|
372 | |
---|
373 | if (Basecomp != NULL) free_imatrix(Basecomp); |
---|
374 | Basecomp = new_imatrix(Maxspc, local_tpmradix); |
---|
375 | |
---|
376 | gene = (uli *) malloc((unsigned) ((local_tpmradix + 1) * sizeof(uli))); |
---|
377 | if (gene == NULL) maerror("gene in estimatebasefreqs"); |
---|
378 | |
---|
379 | for (i = 0; i < local_tpmradix + 1; i++) gene[i] = 0; |
---|
380 | for (i = 0; i < Maxspc; i++) |
---|
381 | for (j = 0; j < local_tpmradix; j++) Basecomp[i][j] = 0; |
---|
382 | for (i = 0; i < Maxspc; i++) |
---|
383 | for (j = 0; j < Maxsite; j++) { |
---|
384 | gene[(int) Seqchar[i][j]]++; |
---|
385 | if (Seqchar[i][j] != local_tpmradix) Basecomp[i][(int) Seqchar[i][j]]++; |
---|
386 | } |
---|
387 | |
---|
388 | all = Maxspc * Maxsite - gene[local_tpmradix]; |
---|
389 | if (all != 0) { /* normal case */ |
---|
390 | for (i = 0; i < local_tpmradix; i++) |
---|
391 | Freqtpm[i] = (double) gene[i] / (double) all; |
---|
392 | } else { /* pathological case with no unique character in data set */ |
---|
393 | for (i = 0; i < local_tpmradix; i++) |
---|
394 | Freqtpm[i] = 1.0 / (double) local_tpmradix; |
---|
395 | } |
---|
396 | |
---|
397 | free(gene); |
---|
398 | |
---|
399 | Frequ_optn = TRUE; |
---|
400 | } |
---|
401 | |
---|
402 | /* guess model of substitution */ |
---|
403 | void guessmodel() |
---|
404 | { |
---|
405 | double c1, c2, c3, c4, c5, c6; |
---|
406 | dvector f; |
---|
407 | dmatrix a; |
---|
408 | int i; |
---|
409 | |
---|
410 | Dayhf_optn = FALSE; |
---|
411 | Jtt_optn = TRUE; |
---|
412 | mtrev_optn = FALSE; |
---|
413 | cprev_optn = FALSE; |
---|
414 | blosum62_optn = FALSE; |
---|
415 | vtmv_optn = FALSE; |
---|
416 | wag_optn = FALSE; |
---|
417 | TSparam = 2.0; |
---|
418 | YRparam = 1.0; |
---|
419 | optim_optn = TRUE; |
---|
420 | HKY_optn = TRUE; |
---|
421 | TN_optn = FALSE; |
---|
422 | |
---|
423 | if (data_optn == 1) { /* amino acids */ |
---|
424 | |
---|
425 | /* chi2 fit to amino acid frequencies */ |
---|
426 | |
---|
427 | f = new_dvector(20); |
---|
428 | a = new_dmatrix(20,20); |
---|
429 | /* chi2 distance Dayhoff */ |
---|
430 | dyhfdata(a, f); |
---|
431 | c1 = 0; |
---|
432 | for (i = 0; i < 20; i++) |
---|
433 | c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); |
---|
434 | /* chi2 distance JTT */ |
---|
435 | jttdata(a, f); |
---|
436 | c2 = 0; |
---|
437 | for (i = 0; i < 20; i++) |
---|
438 | c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); |
---|
439 | /* chi2 distance mtREV */ |
---|
440 | mtrevdata(a, f); |
---|
441 | c3 = 0; |
---|
442 | for (i = 0; i < 20; i++) |
---|
443 | c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); |
---|
444 | /* chi2 distance VT */ |
---|
445 | vtmvdata(a, f); |
---|
446 | c4 = 0; |
---|
447 | for (i = 0; i < 20; i++) |
---|
448 | c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); |
---|
449 | /* chi2 distance WAG */ |
---|
450 | wagdata(a, f); |
---|
451 | c5 = 0; |
---|
452 | for (i = 0; i < 20; i++) |
---|
453 | c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); |
---|
454 | /* chi2 distance cpREV */ |
---|
455 | cprev45data(a, f); |
---|
456 | c6 = 0; |
---|
457 | for (i = 0; i < 20; i++) |
---|
458 | c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]); |
---|
459 | |
---|
460 | free_dvector(f); |
---|
461 | free_dmatrix(a); |
---|
462 | |
---|
463 | #ifndef CPREV |
---|
464 | if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) { |
---|
465 | /* c1 -> Dayhoff */ |
---|
466 | Dayhf_optn = TRUE; |
---|
467 | Jtt_optn = FALSE; |
---|
468 | mtrev_optn = FALSE; |
---|
469 | cprev_optn = FALSE; |
---|
470 | vtmv_optn = FALSE; |
---|
471 | wag_optn = FALSE; |
---|
472 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
473 | } else { |
---|
474 | if ((c2 < c3) && (c2 < c4) && (c2 < c5)) { |
---|
475 | /* c2 -> JTT */ |
---|
476 | Dayhf_optn = FALSE; |
---|
477 | Jtt_optn = TRUE; |
---|
478 | mtrev_optn = FALSE; |
---|
479 | cprev_optn = FALSE; |
---|
480 | vtmv_optn = FALSE; |
---|
481 | wag_optn = FALSE; |
---|
482 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
483 | } else { |
---|
484 | if ((c3 < c4) && (c3 < c5)) { |
---|
485 | /* c3 -> mtREV */ |
---|
486 | Dayhf_optn = FALSE; |
---|
487 | Jtt_optn = FALSE; |
---|
488 | mtrev_optn = TRUE; |
---|
489 | cprev_optn = FALSE; |
---|
490 | vtmv_optn = FALSE; |
---|
491 | wag_optn = FALSE; |
---|
492 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n"); |
---|
493 | } else { |
---|
494 | if ((c4 < c5)) { |
---|
495 | /* c4 -> VT */ |
---|
496 | Dayhf_optn = FALSE; |
---|
497 | Jtt_optn = FALSE; |
---|
498 | mtrev_optn = FALSE; |
---|
499 | cprev_optn = FALSE; |
---|
500 | vtmv_optn = TRUE; |
---|
501 | wag_optn = FALSE; |
---|
502 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
503 | } else { |
---|
504 | /* c5 -> WAG */ |
---|
505 | Dayhf_optn = FALSE; |
---|
506 | Jtt_optn = FALSE; |
---|
507 | mtrev_optn = FALSE; |
---|
508 | cprev_optn = FALSE; |
---|
509 | vtmv_optn = FALSE; |
---|
510 | wag_optn = TRUE; |
---|
511 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
512 | } /* if c4 else c5 */ |
---|
513 | } /* if c3 else c4 */ |
---|
514 | } /* if c2 */ |
---|
515 | } /* if c1 */ |
---|
516 | |
---|
517 | #else /* CPREV */ |
---|
518 | |
---|
519 | if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) { |
---|
520 | /* c1 -> Dayhoff */ |
---|
521 | Dayhf_optn = TRUE; |
---|
522 | Jtt_optn = FALSE; |
---|
523 | mtrev_optn = FALSE; |
---|
524 | cprev_optn = FALSE; |
---|
525 | vtmv_optn = FALSE; |
---|
526 | wag_optn = FALSE; |
---|
527 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
528 | } else { |
---|
529 | if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) { |
---|
530 | /* c2 -> JTT */ |
---|
531 | Dayhf_optn = FALSE; |
---|
532 | Jtt_optn = TRUE; |
---|
533 | mtrev_optn = FALSE; |
---|
534 | cprev_optn = FALSE; |
---|
535 | vtmv_optn = FALSE; |
---|
536 | wag_optn = FALSE; |
---|
537 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
538 | } else { |
---|
539 | if ((c3 < c4) && (c3 < c5) && (c3 < c6)) { |
---|
540 | /* c3 -> mtREV */ |
---|
541 | Dayhf_optn = FALSE; |
---|
542 | Jtt_optn = FALSE; |
---|
543 | mtrev_optn = TRUE; |
---|
544 | cprev_optn = FALSE; |
---|
545 | vtmv_optn = FALSE; |
---|
546 | wag_optn = FALSE; |
---|
547 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n"); |
---|
548 | } else { |
---|
549 | if ((c4 < c5) && (c4 < c6)) { |
---|
550 | /* c4 -> VT */ |
---|
551 | Dayhf_optn = FALSE; |
---|
552 | Jtt_optn = FALSE; |
---|
553 | mtrev_optn = FALSE; |
---|
554 | cprev_optn = FALSE; |
---|
555 | vtmv_optn = TRUE; |
---|
556 | wag_optn = FALSE; |
---|
557 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
558 | } else { |
---|
559 | if (c5 < c6) { |
---|
560 | /* c5 -> WAG */ |
---|
561 | Dayhf_optn = FALSE; |
---|
562 | Jtt_optn = FALSE; |
---|
563 | mtrev_optn = FALSE; |
---|
564 | cprev_optn = FALSE; |
---|
565 | vtmv_optn = FALSE; |
---|
566 | wag_optn = TRUE; |
---|
567 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n"); |
---|
568 | } else { |
---|
569 | /* if (c6) */ |
---|
570 | /* c6 -> cpREV */ |
---|
571 | Dayhf_optn = FALSE; |
---|
572 | Jtt_optn = FALSE; |
---|
573 | mtrev_optn = FALSE; |
---|
574 | cprev_optn = TRUE; |
---|
575 | vtmv_optn = FALSE; |
---|
576 | wag_optn = FALSE; |
---|
577 | FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n"); |
---|
578 | } /* if c5 else c6 */ |
---|
579 | } /* if c4 else c5 */ |
---|
580 | } /* if c3 else c4 */ |
---|
581 | } /* if c2 */ |
---|
582 | } /* if c1 */ |
---|
583 | #endif /* CPREV */ |
---|
584 | |
---|
585 | } else if (data_optn == 0) { |
---|
586 | FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n"); |
---|
587 | } else { |
---|
588 | FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n"); |
---|
589 | } |
---|
590 | } /* guessmodel */ |
---|
591 | |
---|
592 | |
---|
593 | /******************************************************************************/ |
---|
594 | /* functions for representing and building puzzling step trees */ |
---|
595 | /******************************************************************************/ |
---|
596 | |
---|
597 | /* initialize tree with the following starting configuration |
---|
598 | |
---|
599 | 2 |
---|
600 | 0 +------- C(=2) |
---|
601 | A(=0) -----+ |
---|
602 | +------- B(=1) |
---|
603 | 1 |
---|
604 | */ |
---|
605 | void inittree() |
---|
606 | { |
---|
607 | int i; |
---|
608 | |
---|
609 | /* allocate the memory for the whole tree */ |
---|
610 | |
---|
611 | /* allocate memory for vector with all the edges of the tree */ |
---|
612 | edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) ); |
---|
613 | if (edge == NULL) maerror("edge in inittree"); |
---|
614 | |
---|
615 | /* allocate memory for vector with edge numbers of leaves */ |
---|
616 | edgeofleaf = (int *) calloc(Maxspc, sizeof(int) ); |
---|
617 | if (edgeofleaf == NULL) maerror("edgeofleaf in inittree"); |
---|
618 | |
---|
619 | /* allocate memory for all the edges the edge map */ |
---|
620 | for (i = 0; i < Maxbrnch; i++) { |
---|
621 | edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) ); |
---|
622 | if (edge[i].edgemap == NULL) maerror("edgemap in inittree"); |
---|
623 | } |
---|
624 | |
---|
625 | /* number all edges */ |
---|
626 | for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i; |
---|
627 | |
---|
628 | /* initialize tree */ |
---|
629 | |
---|
630 | nextedge = 3; |
---|
631 | nextleaf = 3; |
---|
632 | |
---|
633 | /* edge maps */ |
---|
634 | (edge[0].edgemap)[0] = 0; /* you are on the right edge */ |
---|
635 | (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */ |
---|
636 | (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */ |
---|
637 | (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */ |
---|
638 | (edge[1].edgemap)[1] = 0; /* you are on the right edge */ |
---|
639 | (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */ |
---|
640 | (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */ |
---|
641 | (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */ |
---|
642 | (edge[2].edgemap)[2] = 0; /* you are on the right edge */ |
---|
643 | |
---|
644 | /* interconnection */ |
---|
645 | edge[0].up = NULL; |
---|
646 | edge[0].downleft = &edge[1]; |
---|
647 | edge[0].downright = &edge[2]; |
---|
648 | edge[1].up = &edge[0]; |
---|
649 | edge[1].downleft = NULL; |
---|
650 | edge[1].downright = NULL; |
---|
651 | edge[2].up = &edge[0]; |
---|
652 | edge[2].downleft = NULL; |
---|
653 | edge[2].downright = NULL; |
---|
654 | |
---|
655 | /* edges of leaves */ |
---|
656 | edgeofleaf[0] = 0; |
---|
657 | edgeofleaf[1] = 1; |
---|
658 | edgeofleaf[2] = 2; |
---|
659 | } /* inittree */ |
---|
660 | |
---|
661 | /* add next leaf on the specified edge */ |
---|
662 | void addnextleaf(int dockedge) |
---|
663 | { |
---|
664 | int i; |
---|
665 | |
---|
666 | if (dockedge >= nextedge) { |
---|
667 | /* Trying to add leaf nextleaf to nonexisting edge dockedge */ |
---|
668 | FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n"); |
---|
669 | exit(1); |
---|
670 | } |
---|
671 | |
---|
672 | if (nextleaf >= Maxspc) { |
---|
673 | /* Trying to add leaf nextleaf to a tree with Maxspc leaves */ |
---|
674 | FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n"); |
---|
675 | exit(1); |
---|
676 | } |
---|
677 | |
---|
678 | /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */ |
---|
679 | if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge; |
---|
680 | |
---|
681 | /* adding nextedge to the tree */ |
---|
682 | edge[nextedge].up = edge[dockedge].up; |
---|
683 | edge[nextedge].downleft = &edge[dockedge]; |
---|
684 | edge[nextedge].downright = &edge[nextedge+1]; |
---|
685 | edge[dockedge].up = &edge[nextedge]; |
---|
686 | |
---|
687 | if (edge[nextedge].up != NULL) { |
---|
688 | if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] ) |
---|
689 | (edge[nextedge].up)->downleft = &edge[nextedge]; |
---|
690 | else |
---|
691 | (edge[nextedge].up)->downright = &edge[nextedge]; |
---|
692 | } |
---|
693 | |
---|
694 | /* adding nextedge + 1 to the tree */ |
---|
695 | edge[nextedge+1].up = &edge[nextedge]; |
---|
696 | edge[nextedge+1].downleft = NULL; |
---|
697 | edge[nextedge+1].downright = NULL; |
---|
698 | edgeofleaf[nextleaf] = nextedge+1; |
---|
699 | |
---|
700 | /* the two new edges get info about the old edges */ |
---|
701 | /* nextedge */ |
---|
702 | for (i = 0; i < nextedge; i++) { |
---|
703 | switch ( (edge[dockedge].edgemap)[i] ) { |
---|
704 | |
---|
705 | /* down right changes to down left */ |
---|
706 | case 5: (edge[nextedge].edgemap)[i] = 4; |
---|
707 | break; |
---|
708 | |
---|
709 | /* null changes to down left */ |
---|
710 | case 0: (edge[nextedge].edgemap)[i] = 4; |
---|
711 | break; |
---|
712 | |
---|
713 | default: (edge[nextedge].edgemap)[i] = |
---|
714 | (edge[dockedge].edgemap)[i]; |
---|
715 | break; |
---|
716 | } |
---|
717 | } |
---|
718 | |
---|
719 | /* nextedge + 1 */ |
---|
720 | for (i = 0; i < nextedge; i++) { |
---|
721 | switch ( (edge[dockedge].edgemap)[i] ) { |
---|
722 | |
---|
723 | /* up/down left changes to up */ |
---|
724 | case 2: (edge[nextedge+1].edgemap)[i] = 1; |
---|
725 | break; |
---|
726 | |
---|
727 | /* up/down right changes to up */ |
---|
728 | case 3: (edge[nextedge+1].edgemap)[i] = 1; |
---|
729 | break; |
---|
730 | |
---|
731 | /* down left changes to up/down left */ |
---|
732 | case 4: (edge[nextedge+1].edgemap)[i] = 2; |
---|
733 | break; |
---|
734 | |
---|
735 | /* down right changes to up/down left */ |
---|
736 | case 5: (edge[nextedge+1].edgemap)[i] = 2; |
---|
737 | break; |
---|
738 | |
---|
739 | /* null changes to up/down left */ |
---|
740 | case 0: (edge[nextedge+1].edgemap)[i] = 2; |
---|
741 | break; |
---|
742 | |
---|
743 | /* up stays up */ |
---|
744 | default: (edge[nextedge+1].edgemap)[i] = |
---|
745 | (edge[dockedge].edgemap)[i]; |
---|
746 | break; |
---|
747 | } |
---|
748 | } |
---|
749 | |
---|
750 | /* dockedge */ |
---|
751 | for (i = 0; i < nextedge; i++) { |
---|
752 | switch ( (edge[dockedge].edgemap)[i] ) { |
---|
753 | |
---|
754 | /* up/down right changes to up */ |
---|
755 | case 3: (edge[dockedge].edgemap)[i] = 1; |
---|
756 | break; |
---|
757 | |
---|
758 | /* up/down left changes to up */ |
---|
759 | case 2: (edge[dockedge].edgemap)[i] = 1; |
---|
760 | break; |
---|
761 | |
---|
762 | default: break; |
---|
763 | } |
---|
764 | } |
---|
765 | |
---|
766 | /* all edgemaps are updated for the two new edges */ |
---|
767 | /* nextedge */ |
---|
768 | (edge[nextedge].edgemap)[nextedge] = 0; |
---|
769 | (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */ |
---|
770 | |
---|
771 | /* nextedge + 1 */ |
---|
772 | (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */ |
---|
773 | (edge[nextedge+1].edgemap)[nextedge+1] = 0; |
---|
774 | |
---|
775 | /* all other edges */ |
---|
776 | for (i = 0; i < nextedge; i++) { |
---|
777 | (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge]; |
---|
778 | (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge]; |
---|
779 | } |
---|
780 | |
---|
781 | /* an extra for dockedge */ |
---|
782 | (edge[dockedge].edgemap)[nextedge] = 1; /* up */ |
---|
783 | (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */ |
---|
784 | |
---|
785 | nextleaf++; |
---|
786 | nextedge = nextedge + 2; |
---|
787 | } /* addnextleaf */ |
---|
788 | |
---|
789 | |
---|
790 | /* free memory (to be called after inittree) */ |
---|
791 | void freetree() |
---|
792 | { |
---|
793 | int i; |
---|
794 | |
---|
795 | for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap); |
---|
796 | free(edge); |
---|
797 | free(edgeofleaf); |
---|
798 | } /* freetree */ |
---|
799 | |
---|
800 | /* writes OTU sitting on edge ed */ |
---|
801 | void writeOTU(FILE *outfp, int ed) |
---|
802 | { |
---|
803 | int i; |
---|
804 | |
---|
805 | /* test whether we are on a leaf */ |
---|
806 | if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { |
---|
807 | for (i = 1; i < nextleaf; i++) { |
---|
808 | if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ |
---|
809 | column += fputid(outfp, trueID[i]); |
---|
810 | return; |
---|
811 | } |
---|
812 | } |
---|
813 | } |
---|
814 | |
---|
815 | /* we are NOT on a leaf */ |
---|
816 | fprintf(outfp, "("); |
---|
817 | column++; |
---|
818 | writeOTU(outfp, edge[ed].downleft->numedge); |
---|
819 | fprintf(outfp, ","); |
---|
820 | column++; |
---|
821 | column++; |
---|
822 | if (column > 55) { |
---|
823 | column = 2; |
---|
824 | fprintf(outfp, "\n "); |
---|
825 | } |
---|
826 | writeOTU(outfp, edge[ed].downright->numedge); |
---|
827 | fprintf(outfp, ")"); |
---|
828 | column++; |
---|
829 | } /* writeOTU */ |
---|
830 | |
---|
831 | /* write tree */ |
---|
832 | void writetree(FILE *outfp) |
---|
833 | { |
---|
834 | column = 1; |
---|
835 | fprintf(outfp, "("); |
---|
836 | column += fputid(outfp, trueID[0]) + 3; |
---|
837 | fprintf(outfp, ","); |
---|
838 | writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge); |
---|
839 | column++; |
---|
840 | column++; |
---|
841 | fprintf(outfp, ","); |
---|
842 | writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge); |
---|
843 | fprintf(outfp, ");\n"); |
---|
844 | } /* writetree */ |
---|
845 | |
---|
846 | |
---|
847 | /* clear all edgeinfos */ |
---|
848 | void resetedgeinfo() |
---|
849 | { |
---|
850 | int i; |
---|
851 | |
---|
852 | for (i = 0; i < nextedge; i++) |
---|
853 | edge[i].edgeinfo = 0; |
---|
854 | } /* resetedgeinfo */ |
---|
855 | |
---|
856 | /* increment all edgeinfo between leaf A and B */ |
---|
857 | void incrementedgeinfo(int A, int B) |
---|
858 | { |
---|
859 | int curredge, finaledge, nextstep; |
---|
860 | |
---|
861 | if (A == B) return; |
---|
862 | |
---|
863 | finaledge = edgeofleaf[B]; |
---|
864 | |
---|
865 | curredge = edgeofleaf[A]; |
---|
866 | edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; |
---|
867 | |
---|
868 | while (curredge != finaledge) { |
---|
869 | nextstep = (edge[curredge].edgemap)[finaledge]; |
---|
870 | switch (nextstep) { |
---|
871 | |
---|
872 | /* up */ |
---|
873 | case 1: curredge = (edge[curredge].up)->numedge; |
---|
874 | break; |
---|
875 | |
---|
876 | /* up/down left */ |
---|
877 | case 2: curredge = ((edge[curredge].up)->downleft)->numedge; |
---|
878 | break; |
---|
879 | |
---|
880 | /* up/down right */ |
---|
881 | case 3: curredge = ((edge[curredge].up)->downright)->numedge; |
---|
882 | break; |
---|
883 | |
---|
884 | /* down left */ |
---|
885 | case 4: curredge = (edge[curredge].downleft)->numedge; |
---|
886 | break; |
---|
887 | |
---|
888 | /* down right */ |
---|
889 | case 5: curredge = (edge[curredge].downright)->numedge; |
---|
890 | break; |
---|
891 | |
---|
892 | } |
---|
893 | edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1; |
---|
894 | } |
---|
895 | } /* incrementedgeinfo */ |
---|
896 | |
---|
897 | /* checks which edge has the lowest edgeinfo |
---|
898 | if there are several edges with the same lowest edgeinfo, |
---|
899 | one of them will be selected randomly */ |
---|
900 | void minimumedgeinfo() |
---|
901 | { |
---|
902 | int i, k, howmany, randomnum; |
---|
903 | |
---|
904 | howmany = 1; |
---|
905 | minedge = 0; |
---|
906 | mininfo = edge[0].edgeinfo; |
---|
907 | for (i = 1; i < nextedge; i++) |
---|
908 | if (edge[i].edgeinfo <= mininfo) { |
---|
909 | if (edge[i].edgeinfo == mininfo) { |
---|
910 | howmany++; |
---|
911 | } else { |
---|
912 | minedge = i; |
---|
913 | mininfo = edge[i].edgeinfo; |
---|
914 | howmany = 1; |
---|
915 | } |
---|
916 | } |
---|
917 | |
---|
918 | if (howmany > 1) { /* draw random edge */ |
---|
919 | randomnum = randominteger(howmany) + 1; /* 1 to howmany */ |
---|
920 | i = -1; |
---|
921 | for (k = 0; k < randomnum; k++) { |
---|
922 | do { |
---|
923 | i++; |
---|
924 | } while (edge[i].edgeinfo != mininfo); |
---|
925 | minedge = i; |
---|
926 | } |
---|
927 | } |
---|
928 | } /* minimumedgeinfo */ |
---|
929 | |
---|
930 | |
---|
931 | |
---|
932 | |
---|
933 | /*******************************************/ |
---|
934 | /* tree sorting */ |
---|
935 | /*******************************************/ |
---|
936 | |
---|
937 | /* compute address of the 4 int (sort key) in the 4 int node */ |
---|
938 | int ct_sortkeyaddr(int addr) |
---|
939 | { |
---|
940 | int a, res; |
---|
941 | a = addr % 4; |
---|
942 | res = addr - a + 3; |
---|
943 | return res; |
---|
944 | } |
---|
945 | |
---|
946 | |
---|
947 | /**********/ |
---|
948 | |
---|
949 | /* compute address of the next edge pointer in a 4 int node (0->1->2->0) */ |
---|
950 | int ct_nextedgeaddr(int addr) |
---|
951 | { |
---|
952 | int a, res; |
---|
953 | a = addr % 4; |
---|
954 | if ( a == 2 ) { res = addr - 2; } |
---|
955 | else { res = addr + 1; } |
---|
956 | return res; |
---|
957 | } |
---|
958 | |
---|
959 | |
---|
960 | /**********/ |
---|
961 | |
---|
962 | /* compute address of 1st edge of a 4 int node from node number */ |
---|
963 | int ct_1stedge(int node) |
---|
964 | { |
---|
965 | int res; |
---|
966 | res = 4 * node; |
---|
967 | return res; |
---|
968 | } |
---|
969 | |
---|
970 | |
---|
971 | /**********/ |
---|
972 | |
---|
973 | /* compute address of 2nd edge of a 4 int node from node number */ |
---|
974 | int ct_2ndedge(int node) |
---|
975 | { |
---|
976 | int res; |
---|
977 | res = 4 * node +1; |
---|
978 | return res; |
---|
979 | } |
---|
980 | |
---|
981 | |
---|
982 | /**********/ |
---|
983 | |
---|
984 | /* compute address of 3rd edge of a 4 int node from node number */ |
---|
985 | int ct_3rdedge(int node) |
---|
986 | { |
---|
987 | int res; |
---|
988 | res = 4 * node +2; |
---|
989 | return res; |
---|
990 | } |
---|
991 | |
---|
992 | |
---|
993 | /**********/ |
---|
994 | |
---|
995 | /* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */ |
---|
996 | int ct_isleaf(int node, int *ctree) |
---|
997 | { |
---|
998 | return (ctree[ct_3rdedge(node)] < 0); |
---|
999 | } |
---|
1000 | |
---|
1001 | |
---|
1002 | /**********/ |
---|
1003 | |
---|
1004 | /* compute node number of 4 int node from an edge addr. */ |
---|
1005 | int ct_addr2node(int addr) |
---|
1006 | { |
---|
1007 | int a, res; |
---|
1008 | a = addr % 4; |
---|
1009 | res = (int) ((addr - a) / 4); |
---|
1010 | return res; |
---|
1011 | } |
---|
1012 | |
---|
1013 | |
---|
1014 | /**********/ |
---|
1015 | |
---|
1016 | /* print graph pointers for checking */ |
---|
1017 | void printctree(int *ctree) |
---|
1018 | { |
---|
1019 | int n; |
---|
1020 | for (n=0; n < 2*Maxspc; n++) { |
---|
1021 | printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n, |
---|
1022 | (int) ctree[ct_1stedge(n)]/4, |
---|
1023 | (int) ctree[ct_1stedge(n)]%4, |
---|
1024 | (int) ctree[ct_2ndedge(n)]/4, |
---|
1025 | (int) ctree[ct_2ndedge(n)]%4, |
---|
1026 | (int) ctree[ct_3rdedge(n)]/4, |
---|
1027 | (int) ctree[ct_3rdedge(n)]%4, |
---|
1028 | ctree[ct_3rdedge(n)+1]); |
---|
1029 | } |
---|
1030 | printf("\n"); |
---|
1031 | } /* printctree */ |
---|
1032 | |
---|
1033 | |
---|
1034 | /**********/ |
---|
1035 | |
---|
1036 | /* allocate memory for ctree 3 ints pointer plus 1 check byte */ |
---|
1037 | int *initctree() |
---|
1038 | { |
---|
1039 | int *snodes; |
---|
1040 | int n; |
---|
1041 | |
---|
1042 | snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int)); |
---|
1043 | if (snodes == NULL) maerror("snodes in copytree"); |
---|
1044 | |
---|
1045 | for (n=0; n<(4 * 2 * Maxspc); n++) { |
---|
1046 | snodes[n]=-1; |
---|
1047 | } |
---|
1048 | return snodes; |
---|
1049 | } |
---|
1050 | |
---|
1051 | |
---|
1052 | /**********/ |
---|
1053 | |
---|
1054 | /* free memory of a tree for sorting */ |
---|
1055 | void freectree(int **snodes) |
---|
1056 | { |
---|
1057 | free(*snodes); |
---|
1058 | *snodes = NULL; |
---|
1059 | } |
---|
1060 | |
---|
1061 | |
---|
1062 | /**********/ |
---|
1063 | |
---|
1064 | /* copy subtree recursively */ |
---|
1065 | void copyOTU(int *ctree, /* tree array struct */ |
---|
1066 | int *ct_nextnode, /* next free node */ |
---|
1067 | int ct_curredge, /* currende edge to add subtree */ |
---|
1068 | int *ct_nextleaf, /* next free leaf (0-maxspc) */ |
---|
1069 | int ed) /* edge in puzzling step tree */ |
---|
1070 | { |
---|
1071 | int i, nextcurredge; |
---|
1072 | |
---|
1073 | /* test whether we are on a leaf */ |
---|
1074 | if (edge[ed].downright == NULL && edge[ed].downleft == NULL) { |
---|
1075 | for (i = 1; i < nextleaf; i++) { |
---|
1076 | if (edgeofleaf[i] == ed) { /* i is the leaf of ed */ |
---|
1077 | nextcurredge = ct_1stedge(*ct_nextleaf); |
---|
1078 | ctree[ct_curredge] = nextcurredge; |
---|
1079 | ctree[nextcurredge] = ct_curredge; |
---|
1080 | ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i]; |
---|
1081 | (*ct_nextleaf)++; |
---|
1082 | return; |
---|
1083 | } |
---|
1084 | } |
---|
1085 | } |
---|
1086 | |
---|
1087 | /* we are NOT on a leaf */ |
---|
1088 | nextcurredge = ct_1stedge(*ct_nextnode); |
---|
1089 | ctree[ct_curredge] = nextcurredge; |
---|
1090 | ctree[nextcurredge] = ct_curredge; |
---|
1091 | (*ct_nextnode)++; |
---|
1092 | nextcurredge = ct_nextedgeaddr(nextcurredge); |
---|
1093 | copyOTU(ctree, ct_nextnode, nextcurredge, |
---|
1094 | ct_nextleaf, edge[ed].downleft->numedge); |
---|
1095 | |
---|
1096 | nextcurredge = ct_nextedgeaddr(nextcurredge); |
---|
1097 | copyOTU(ctree, ct_nextnode, nextcurredge, |
---|
1098 | ct_nextleaf, edge[ed].downright->numedge); |
---|
1099 | } |
---|
1100 | |
---|
1101 | |
---|
1102 | /**********/ |
---|
1103 | |
---|
1104 | /* copy treestructure to sorting structure */ |
---|
1105 | void copytree(int *ctree) |
---|
1106 | { |
---|
1107 | int ct_curredge; |
---|
1108 | int ct_nextleaf; |
---|
1109 | int ct_nextnode; |
---|
1110 | |
---|
1111 | ct_nextnode = Maxspc; |
---|
1112 | ct_curredge = ct_1stedge(ct_nextnode); |
---|
1113 | ct_nextleaf = 1; |
---|
1114 | |
---|
1115 | ctree[ct_1stedge(0)] = ct_curredge; |
---|
1116 | ctree[ct_curredge] = ct_1stedge(0); |
---|
1117 | ctree[ct_sortkeyaddr(0)] = trueID[0]; |
---|
1118 | |
---|
1119 | ct_nextnode++; |
---|
1120 | |
---|
1121 | ct_curredge = ct_nextedgeaddr(ct_curredge); |
---|
1122 | copyOTU(ctree, &ct_nextnode, ct_curredge, |
---|
1123 | &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge); |
---|
1124 | |
---|
1125 | ct_curredge = ct_nextedgeaddr(ct_curredge); |
---|
1126 | copyOTU(ctree, &ct_nextnode, ct_curredge, |
---|
1127 | &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge); |
---|
1128 | } |
---|
1129 | |
---|
1130 | |
---|
1131 | /**********/ |
---|
1132 | |
---|
1133 | /* sort subtree from edge recursively by indices */ |
---|
1134 | int sortOTU(int local_edge, int *ctree) |
---|
1135 | { |
---|
1136 | int key1, key2; |
---|
1137 | int edge1, edge2; |
---|
1138 | int tempedge; |
---|
1139 | |
---|
1140 | if (ctree[ct_2ndedge((int) (local_edge / 4))] < 0) |
---|
1141 | return ctree[ct_sortkeyaddr(local_edge)]; |
---|
1142 | |
---|
1143 | edge1 = ctree[ct_nextedgeaddr(local_edge)]; |
---|
1144 | edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(local_edge))]; |
---|
1145 | |
---|
1146 | /* printf ("visiting [%5d] -> [%5d], [%5d]\n", local_edge, edge1, edge2); */ |
---|
1147 | /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n", |
---|
1148 | (int)(local_edge/4), local_edge%4, (int)(edge1/4), edge1%4, |
---|
1149 | (int)(edge2/4), edge2%4); */ |
---|
1150 | |
---|
1151 | key1 = sortOTU(edge1, ctree); |
---|
1152 | key2 = sortOTU(edge2, ctree); |
---|
1153 | |
---|
1154 | if (key2 < key1) { |
---|
1155 | tempedge = ctree[ctree[edge1]]; |
---|
1156 | ctree[ctree[edge1]] = ctree[ctree[edge2]]; |
---|
1157 | ctree[ctree[edge2]] = tempedge; |
---|
1158 | tempedge = ctree[edge1]; |
---|
1159 | ctree[edge1] = ctree[edge2]; |
---|
1160 | ctree[edge2] = tempedge; |
---|
1161 | ctree[ct_sortkeyaddr(local_edge)] = key2; |
---|
1162 | |
---|
1163 | } else { |
---|
1164 | ctree[ct_sortkeyaddr(local_edge)] = key1; |
---|
1165 | } |
---|
1166 | return ctree[ct_sortkeyaddr(local_edge)]; |
---|
1167 | } |
---|
1168 | |
---|
1169 | |
---|
1170 | /**********/ |
---|
1171 | |
---|
1172 | /* sort ctree recursively by indices */ |
---|
1173 | int sortctree(int *ctree) |
---|
1174 | { |
---|
1175 | int n, startnode=-1; |
---|
1176 | for(n=0; n<Maxspc; n++) { |
---|
1177 | if (ctree[ct_sortkeyaddr(n*4)] == 0) |
---|
1178 | startnode = n; |
---|
1179 | } |
---|
1180 | sortOTU(ctree[startnode * 4], ctree); |
---|
1181 | return startnode; |
---|
1182 | } |
---|
1183 | |
---|
1184 | |
---|
1185 | /**********/ |
---|
1186 | |
---|
1187 | /* print recursively subtree of edge of sorted tree ctree */ |
---|
1188 | void printfsortOTU(int local_edge, int *ctree) |
---|
1189 | { |
---|
1190 | int edge1, edge2; |
---|
1191 | |
---|
1192 | if (ctree[ct_2ndedge((int) (local_edge / 4))] < 0) { |
---|
1193 | printf("%d", ctree[ct_sortkeyaddr(local_edge)]); |
---|
1194 | return; |
---|
1195 | } |
---|
1196 | |
---|
1197 | edge1 = ctree[ct_nextedgeaddr(local_edge)]; |
---|
1198 | edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(local_edge))]; |
---|
1199 | |
---|
1200 | printf("("); |
---|
1201 | printfsortOTU(edge1, ctree); |
---|
1202 | printf(","); |
---|
1203 | printfsortOTU(edge2, ctree); |
---|
1204 | printf(")"); |
---|
1205 | |
---|
1206 | } |
---|
1207 | |
---|
1208 | |
---|
1209 | /**********/ |
---|
1210 | |
---|
1211 | /* print recursively sorted tree ctree */ |
---|
1212 | int printfsortctree(int *ctree) |
---|
1213 | { |
---|
1214 | int n, startnode=-1; |
---|
1215 | for(n=0; n<Maxspc; n++) { |
---|
1216 | if (ctree[ct_sortkeyaddr(n*4)] == 0) |
---|
1217 | startnode = n; |
---|
1218 | } |
---|
1219 | printf ("(%d,", ctree[ct_sortkeyaddr(startnode*4)]); |
---|
1220 | printfsortOTU(ctree[startnode * 4], ctree); |
---|
1221 | printf (");\n"); |
---|
1222 | return startnode; |
---|
1223 | } |
---|
1224 | |
---|
1225 | |
---|
1226 | /**********/ |
---|
1227 | |
---|
1228 | /* print recursively subtree of edge of sorted tree ctree to string */ |
---|
1229 | void sprintfOTU(char *str, int *len, int local_edge, int *ctree) |
---|
1230 | { |
---|
1231 | int edge1, edge2; |
---|
1232 | |
---|
1233 | if (ctree[ct_2ndedge((int) (local_edge / 4))] < 0) { |
---|
1234 | *len+=sprintf(&(str[*len]), "%d", ctree[ct_sortkeyaddr(local_edge)]); |
---|
1235 | return; |
---|
1236 | } |
---|
1237 | |
---|
1238 | edge1 = ctree[ct_nextedgeaddr(local_edge)]; |
---|
1239 | edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(local_edge))]; |
---|
1240 | |
---|
1241 | sprintf(&(str[*len]), "("); |
---|
1242 | (*len)++; |
---|
1243 | sprintfOTU(str, len, edge1, ctree); |
---|
1244 | sprintf(&(str[*len]), ","); |
---|
1245 | (*len)++; |
---|
1246 | sprintfOTU(str, len, edge2, ctree); |
---|
1247 | sprintf(&(str[*len]), ")"); |
---|
1248 | (*len)++; |
---|
1249 | } |
---|
1250 | |
---|
1251 | /**********/ |
---|
1252 | |
---|
1253 | /* print recursively sorted tree ctree to string */ |
---|
1254 | char *sprintfctree(int *ctree, int strglen) |
---|
1255 | { |
---|
1256 | char *treestr, |
---|
1257 | *tmpptr; |
---|
1258 | int n, |
---|
1259 | len=0, |
---|
1260 | startnode=-1; |
---|
1261 | treestr = (char *) malloc(strglen * sizeof(char)); |
---|
1262 | tmpptr = treestr; |
---|
1263 | for(n=0; n<Maxspc; n++) { |
---|
1264 | if (ctree[ct_sortkeyaddr(n*4)] == 0) |
---|
1265 | startnode = n; |
---|
1266 | } |
---|
1267 | len+=sprintf (&(tmpptr[len]), "(%d,", ctree[ct_sortkeyaddr(startnode*4)]); |
---|
1268 | sprintfOTU(tmpptr, &len, ctree[startnode * 4], ctree); |
---|
1269 | len+=sprintf (&(tmpptr[len]), ");"); |
---|
1270 | return treestr; |
---|
1271 | } |
---|
1272 | |
---|
1273 | |
---|
1274 | /**********/ |
---|
1275 | |
---|
1276 | |
---|
1277 | /***********************************************/ |
---|
1278 | /* establish and handle a list of sorted trees */ |
---|
1279 | /***********************************************/ |
---|
1280 | |
---|
1281 | int itemcount; |
---|
1282 | |
---|
1283 | /* initialize structure */ |
---|
1284 | treelistitemtype *inittreelist(int *treenum) |
---|
1285 | { |
---|
1286 | *treenum = 0; |
---|
1287 | return NULL; |
---|
1288 | } |
---|
1289 | |
---|
1290 | |
---|
1291 | /**********/ |
---|
1292 | |
---|
1293 | /* malloc new tree list item */ |
---|
1294 | treelistitemtype *gettreelistitem() |
---|
1295 | { |
---|
1296 | treelistitemtype *tmpptr; |
---|
1297 | tmpptr = (treelistitemtype *)malloc(sizeof(treelistitemtype)); |
---|
1298 | if (tmpptr == NULL) maerror("item of intermediate tree structures"); |
---|
1299 | (*tmpptr).pred = NULL; |
---|
1300 | (*tmpptr).succ = NULL; |
---|
1301 | (*tmpptr).tree = NULL; |
---|
1302 | (*tmpptr).count = 0; |
---|
1303 | (*tmpptr).idx = itemcount++; |
---|
1304 | return tmpptr; |
---|
1305 | } |
---|
1306 | |
---|
1307 | /**********/ |
---|
1308 | |
---|
1309 | /* free whole tree list */ |
---|
1310 | void freetreelist(treelistitemtype **list, |
---|
1311 | int *numitems, |
---|
1312 | int *numsum) |
---|
1313 | { |
---|
1314 | treelistitemtype *current; |
---|
1315 | treelistitemtype *next; |
---|
1316 | current = *list; |
---|
1317 | while (current != NULL) { |
---|
1318 | next = (*current).succ; |
---|
1319 | if ((*current).tree != NULL) { |
---|
1320 | free ((*current).tree); |
---|
1321 | (*current).tree = NULL; |
---|
1322 | } |
---|
1323 | free(current); |
---|
1324 | current = next; |
---|
1325 | } |
---|
1326 | *list = NULL; |
---|
1327 | *numitems = 0; |
---|
1328 | *numsum = 0; |
---|
1329 | } /* freetreelist */ |
---|
1330 | |
---|
1331 | |
---|
1332 | /**********/ |
---|
1333 | |
---|
1334 | /* add tree to the tree list */ |
---|
1335 | treelistitemtype *addtree2list(char **tree, /* sorted tree string */ |
---|
1336 | int numtrees, /* how many occurred, e.g. in parallel */ |
---|
1337 | treelistitemtype **list, /* addr. of tree list */ |
---|
1338 | int *numitems, |
---|
1339 | int *numsum) |
---|
1340 | { |
---|
1341 | treelistitemtype *tmpptr = NULL; |
---|
1342 | treelistitemtype *newptr = NULL; |
---|
1343 | int result; |
---|
1344 | int done = 0; |
---|
1345 | |
---|
1346 | if ((*list == NULL) || (numitems == 0)) { |
---|
1347 | newptr = gettreelistitem(); |
---|
1348 | (*newptr).tree = *tree; |
---|
1349 | *tree = NULL; |
---|
1350 | (*newptr).id = *numitems; |
---|
1351 | (*newptr).count = numtrees; |
---|
1352 | *numitems = 1; |
---|
1353 | *numsum = numtrees; |
---|
1354 | *list = newptr; |
---|
1355 | } else { |
---|
1356 | tmpptr = *list; |
---|
1357 | while(done == 0) { |
---|
1358 | result = strcmp( (*tmpptr).tree, *tree); |
---|
1359 | if (result==0) { |
---|
1360 | free(*tree); *tree = NULL; |
---|
1361 | (*tmpptr).count += numtrees; |
---|
1362 | *numsum += numtrees; |
---|
1363 | done = 1; |
---|
1364 | newptr = tmpptr; |
---|
1365 | } else { if (result < 0) { |
---|
1366 | if ((*tmpptr).succ != NULL) |
---|
1367 | tmpptr = (*tmpptr).succ; |
---|
1368 | else { |
---|
1369 | newptr = gettreelistitem(); |
---|
1370 | (*newptr).tree = *tree; |
---|
1371 | *tree = NULL; |
---|
1372 | (*newptr).id = *numitems; |
---|
1373 | (*newptr).count = numtrees; |
---|
1374 | (*newptr).pred = tmpptr; |
---|
1375 | (*tmpptr).succ = newptr; |
---|
1376 | (*numitems)++; |
---|
1377 | *numsum += numtrees; |
---|
1378 | done = 1; |
---|
1379 | } |
---|
1380 | } else { /* result < 0 */ |
---|
1381 | newptr = gettreelistitem(); |
---|
1382 | (*newptr).tree = *tree; |
---|
1383 | *tree = NULL; |
---|
1384 | (*newptr).id = *numitems; |
---|
1385 | (*newptr).count = numtrees; |
---|
1386 | (*newptr).succ = tmpptr; |
---|
1387 | (*newptr).pred = (*tmpptr).pred; |
---|
1388 | (*tmpptr).pred = newptr; |
---|
1389 | *numsum += numtrees; |
---|
1390 | |
---|
1391 | if ((*newptr).pred != NULL) { |
---|
1392 | (*(*newptr).pred).succ = newptr; |
---|
1393 | } else { |
---|
1394 | *list = newptr; |
---|
1395 | } |
---|
1396 | (*numitems)++; |
---|
1397 | done = 1; |
---|
1398 | } /* end if result < 0 */ |
---|
1399 | } /* end if result != 0 */ |
---|
1400 | } /* while searching in list */ |
---|
1401 | } /* if list empty, else */ |
---|
1402 | return (newptr); |
---|
1403 | } /* addtree2list */ |
---|
1404 | |
---|
1405 | |
---|
1406 | /**********/ |
---|
1407 | |
---|
1408 | /* resort list of trees by number of occurrences for output */ |
---|
1409 | void sortbynum(treelistitemtype *list, treelistitemtype **sortlist) |
---|
1410 | { |
---|
1411 | treelistitemtype *tmpptr = NULL; |
---|
1412 | treelistitemtype *curr = NULL; |
---|
1413 | treelistitemtype *next = NULL; |
---|
1414 | int xchange = 1; |
---|
1415 | |
---|
1416 | if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); |
---|
1417 | tmpptr = list; |
---|
1418 | *sortlist = list; |
---|
1419 | while (tmpptr != NULL) { |
---|
1420 | (*tmpptr).sortnext = (*tmpptr).succ; |
---|
1421 | (*tmpptr).sortlast = (*tmpptr).pred; |
---|
1422 | tmpptr = (*tmpptr).succ; |
---|
1423 | } |
---|
1424 | |
---|
1425 | while (xchange > 0) { |
---|
1426 | curr = *sortlist; |
---|
1427 | xchange = 0; |
---|
1428 | if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); |
---|
1429 | while((*curr).sortnext != NULL) { |
---|
1430 | next = (*curr).sortnext; |
---|
1431 | if ((*curr).count >= (*next).count) |
---|
1432 | curr = (*curr).sortnext; |
---|
1433 | else { |
---|
1434 | if ((*curr).sortlast != NULL) |
---|
1435 | (*((*curr).sortlast)).sortnext = next; |
---|
1436 | if (*sortlist == curr) |
---|
1437 | *sortlist = next; |
---|
1438 | (*next).sortlast = (*curr).sortlast; |
---|
1439 | |
---|
1440 | if ((*next).sortnext != NULL) |
---|
1441 | (*((*next).sortnext)).sortlast = curr; |
---|
1442 | (*curr).sortnext = (*next).sortnext; |
---|
1443 | |
---|
1444 | (*curr).sortlast = next; |
---|
1445 | (*next).sortnext = curr; |
---|
1446 | |
---|
1447 | xchange++; |
---|
1448 | } |
---|
1449 | } |
---|
1450 | } |
---|
1451 | } /* sortbynum */ |
---|
1452 | |
---|
1453 | |
---|
1454 | /**********/ |
---|
1455 | |
---|
1456 | /* print puzzling step tree structures for checking */ |
---|
1457 | void printfpstrees(treelistitemtype *list) |
---|
1458 | { |
---|
1459 | char ch; |
---|
1460 | treelistitemtype *tmpptr = NULL; |
---|
1461 | tmpptr = list; |
---|
1462 | ch = '-'; |
---|
1463 | while (tmpptr != NULL) { |
---|
1464 | printf ("%c[%2d] %5d %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); |
---|
1465 | tmpptr = (*tmpptr).succ; |
---|
1466 | ch = ' '; |
---|
1467 | } |
---|
1468 | } |
---|
1469 | |
---|
1470 | /**********/ |
---|
1471 | |
---|
1472 | /* print sorted puzzling step tree structure with names */ |
---|
1473 | void fprintffullpstree(FILE *outf, char *treestr) |
---|
1474 | { |
---|
1475 | int count = 0; |
---|
1476 | int idnum = 0; |
---|
1477 | int n; |
---|
1478 | for(n=0; treestr[n] != '\0'; n++){ |
---|
1479 | while(isdigit((int)treestr[n])){ |
---|
1480 | idnum = (10 * idnum) + ((int)treestr[n]-48); |
---|
1481 | n++; |
---|
1482 | count++; |
---|
1483 | } |
---|
1484 | if (count > 0){ |
---|
1485 | # ifdef USEQUOTES |
---|
1486 | fprintf(outf, "'"); |
---|
1487 | # endif |
---|
1488 | (void)fputid(outf, idnum); |
---|
1489 | # ifdef USEQUOTES |
---|
1490 | fprintf(outf, "'"); |
---|
1491 | # endif |
---|
1492 | count = 0; |
---|
1493 | idnum = 0; |
---|
1494 | } |
---|
1495 | fprintf(outf, "%c", treestr[n]); |
---|
1496 | } |
---|
1497 | } |
---|
1498 | |
---|
1499 | |
---|
1500 | /**********/ |
---|
1501 | |
---|
1502 | /* print sorted puzzling step tree structures with names */ |
---|
1503 | void fprintfsortedpstrees(FILE *output, |
---|
1504 | treelistitemtype *list, /* tree list */ |
---|
1505 | int itemnum, /* order number */ |
---|
1506 | int itemsum, /* number of trees */ |
---|
1507 | int comment, /* with statistics, or puzzle report ? */ |
---|
1508 | float cutoff) /* cutoff percentage */ |
---|
1509 | { |
---|
1510 | treelistitemtype *tmpptr = NULL; |
---|
1511 | treelistitemtype *slist = NULL; |
---|
1512 | int num = 1; |
---|
1513 | float percent; |
---|
1514 | |
---|
1515 | if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n"); |
---|
1516 | sortbynum(list, &slist); |
---|
1517 | |
---|
1518 | tmpptr = slist; |
---|
1519 | while (tmpptr != NULL) { |
---|
1520 | percent = (float)(100.0 * (*tmpptr).count / itemsum); |
---|
1521 | if ((cutoff == 0.0) || (cutoff <= percent)) { |
---|
1522 | if (comment) |
---|
1523 | fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum); |
---|
1524 | else { |
---|
1525 | if (num == 1){ |
---|
1526 | fprintf (output, "\n"); |
---|
1527 | fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum); |
---|
1528 | fprintf (output, "The trees are orderd descending by the number of occurrences.\n"); |
---|
1529 | fprintf (output, "\n"); |
---|
1530 | fprintf (output, "\n occurrences ID Phylip tree\n"); |
---|
1531 | } |
---|
1532 | fprintf (output, "%2d. %5d %6.2f%% %5d ", num++, (*tmpptr).count, percent, (*tmpptr).id); |
---|
1533 | } |
---|
1534 | fprintffullpstree(output, (*tmpptr).tree); |
---|
1535 | fprintf (output, "\n"); |
---|
1536 | } |
---|
1537 | tmpptr = (*tmpptr).sortnext; |
---|
1538 | } |
---|
1539 | |
---|
1540 | if (!comment) { |
---|
1541 | fprintf (output, "\n"); |
---|
1542 | switch(num) { |
---|
1543 | case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break; |
---|
1544 | case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break; |
---|
1545 | default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break; |
---|
1546 | } |
---|
1547 | fprintf (output, "\n"); |
---|
1548 | fprintf (output, "\n"); |
---|
1549 | } |
---|
1550 | |
---|
1551 | } /* fprintfsortedpstrees */ |
---|
1552 | |
---|
1553 | /**********/ |
---|
1554 | |
---|
1555 | /* print sorted tree topologies for checking */ |
---|
1556 | void printfsortedpstrees(treelistitemtype *list) |
---|
1557 | { |
---|
1558 | treelistitemtype *tmpptr = NULL; |
---|
1559 | treelistitemtype *slist = NULL; |
---|
1560 | |
---|
1561 | sortbynum(list, &slist); |
---|
1562 | |
---|
1563 | tmpptr = slist; |
---|
1564 | while (tmpptr != NULL) { |
---|
1565 | printf ("[%2d] %5d %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree); |
---|
1566 | tmpptr = (*tmpptr).sortnext; |
---|
1567 | } |
---|
1568 | } /* printfsortedpstrees */ |
---|
1569 | |
---|
1570 | |
---|
1571 | /*******************************************/ |
---|
1572 | /* end of tree sorting */ |
---|
1573 | /*******************************************/ |
---|
1574 | |
---|
1575 | |
---|
1576 | |
---|
1577 | /******************************************************************************/ |
---|
1578 | /* functions for computing the consensus tree */ |
---|
1579 | /******************************************************************************/ |
---|
1580 | |
---|
1581 | /* prepare for consensus tree analysis */ |
---|
1582 | void initconsensus() |
---|
1583 | { |
---|
1584 | # if ! PARALLEL |
---|
1585 | biparts = new_cmatrix(Maxspc-3, Maxspc); |
---|
1586 | # endif /* PARALLEL */ |
---|
1587 | |
---|
1588 | if (Maxspc % 32 == 0) |
---|
1589 | splitlength = Maxspc/32; |
---|
1590 | else splitlength = (Maxspc + 32 - (Maxspc % 32))/32; |
---|
1591 | numbiparts = 0; /* no pattern stored so far */ |
---|
1592 | maxbiparts = 0; /* no memory reserved so far */ |
---|
1593 | splitfreqs = NULL; |
---|
1594 | splitpatterns = NULL; |
---|
1595 | splitsizes = NULL; |
---|
1596 | splitcomp = (uli *) malloc(splitlength * sizeof(uli) ); |
---|
1597 | if (splitcomp == NULL) maerror("splitcomp in initconsensus"); |
---|
1598 | } |
---|
1599 | |
---|
1600 | /* prototype needed for recursive function */ |
---|
1601 | void makepart(int i, int curribrnch); |
---|
1602 | |
---|
1603 | /* recursive function to get bipartitions */ |
---|
1604 | void makepart(int i, int curribrnch) |
---|
1605 | { |
---|
1606 | int j; |
---|
1607 | |
---|
1608 | if ( edge[i].downright == NULL || |
---|
1609 | edge[i].downleft == NULL) { /* if i is leaf */ |
---|
1610 | |
---|
1611 | /* check out what leaf j sits on this edge i */ |
---|
1612 | for (j = 1; j < Maxspc; j++) { |
---|
1613 | if (edgeofleaf[j] == i) { |
---|
1614 | biparts[curribrnch][trueID[j]] = '*'; |
---|
1615 | return; |
---|
1616 | } |
---|
1617 | } |
---|
1618 | } else { /* still on inner branch */ |
---|
1619 | makepart(edge[i].downleft->numedge, curribrnch); |
---|
1620 | makepart(edge[i].downright->numedge, curribrnch); |
---|
1621 | } |
---|
1622 | } |
---|
1623 | |
---|
1624 | /* compute bipartitions of tree of current puzzling step */ |
---|
1625 | void computebiparts() |
---|
1626 | { |
---|
1627 | int i, j, curribrnch; |
---|
1628 | |
---|
1629 | curribrnch = -1; |
---|
1630 | |
---|
1631 | for (i = 0; i < Maxspc - 3; i++) |
---|
1632 | for (j = 0; j < Maxspc; j++) |
---|
1633 | biparts[i][j] = '.'; |
---|
1634 | |
---|
1635 | for (i = 0; i < Maxbrnch; i++) { |
---|
1636 | if (!( edgeofleaf[0] == i || |
---|
1637 | edge[i].downright == NULL || |
---|
1638 | edge[i].downleft == NULL) ) { /* check all inner branches */ |
---|
1639 | curribrnch++; |
---|
1640 | makepart(i, curribrnch); |
---|
1641 | |
---|
1642 | /* make sure that the root is always a '*' */ |
---|
1643 | if (biparts[curribrnch][outgroup] == '.') { |
---|
1644 | for (j = 0; j < Maxspc; j++) { |
---|
1645 | if (biparts[curribrnch][j] == '.') |
---|
1646 | biparts[curribrnch][j] = '*'; |
---|
1647 | else |
---|
1648 | biparts[curribrnch][j] = '.'; |
---|
1649 | } |
---|
1650 | } |
---|
1651 | } |
---|
1652 | } |
---|
1653 | } |
---|
1654 | |
---|
1655 | /* print out the bipartition n of all different splitpatterns */ |
---|
1656 | void printsplit(FILE *fp, uli n) |
---|
1657 | { |
---|
1658 | int i, j, col; |
---|
1659 | uli z; |
---|
1660 | |
---|
1661 | col = 0; |
---|
1662 | for (i = 0; i < splitlength; i++) { |
---|
1663 | z = splitpatterns[n*splitlength + i]; |
---|
1664 | for (j = 0; j < 32 && col < Maxspc; j++) { |
---|
1665 | if (col % 10 == 0 && col != 0) fprintf(fp, " "); |
---|
1666 | if (z & 1) fprintf(fp, "."); |
---|
1667 | else fprintf(fp, "*"); |
---|
1668 | z = (z >> 1); |
---|
1669 | col++; |
---|
1670 | } |
---|
1671 | } |
---|
1672 | } |
---|
1673 | |
---|
1674 | /* make new entries for new different bipartitions and count frequencies */ |
---|
1675 | void makenewsplitentries() |
---|
1676 | { |
---|
1677 | int i, j, bpc, identical, idflag, bpsize; |
---|
1678 | uli nextentry, obpc; |
---|
1679 | |
---|
1680 | /* where the next entry would be in splitpatterns */ |
---|
1681 | nextentry = numbiparts; |
---|
1682 | |
---|
1683 | for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */ |
---|
1684 | /* convert bipartition into a more compact format */ |
---|
1685 | bpsize = 0; |
---|
1686 | for (i = 0; i < splitlength; i++) { |
---|
1687 | splitcomp[i] = 0; |
---|
1688 | for (j = 0; j < 32; j++) { |
---|
1689 | splitcomp[i] = splitcomp[i] >> 1; |
---|
1690 | if (i*32 + j < Maxspc) |
---|
1691 | if (biparts[bpc][i*32 + j] == '.') { |
---|
1692 | /* set highest bit */ |
---|
1693 | splitcomp[i] = (splitcomp[i] | 2147483648UL); |
---|
1694 | bpsize++; /* count the '.' */ |
---|
1695 | } |
---|
1696 | } |
---|
1697 | } |
---|
1698 | /* compare to the *old* patterns */ |
---|
1699 | identical = FALSE; |
---|
1700 | for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) { |
---|
1701 | /* compare first partition size */ |
---|
1702 | if (splitsizes[obpc] == bpsize) idflag = TRUE; |
---|
1703 | else idflag = FALSE; |
---|
1704 | /* if size is identical compare whole partition */ |
---|
1705 | for (i = 0; (i < splitlength) && idflag; i++) |
---|
1706 | if (splitcomp[i] != splitpatterns[obpc*splitlength + i]) |
---|
1707 | idflag = FALSE; |
---|
1708 | if (idflag) identical = TRUE; |
---|
1709 | } |
---|
1710 | if (identical) { /* if identical increase frequency */ |
---|
1711 | splitfreqs[2*(obpc-1)]++; |
---|
1712 | } else { /* create new entry */ |
---|
1713 | if (nextentry == maxbiparts) { /* reserve more memory */ |
---|
1714 | maxbiparts = maxbiparts + 2*Maxspc; |
---|
1715 | splitfreqs = (uli *) myrealloc(splitfreqs, |
---|
1716 | 2*maxbiparts * sizeof(uli) ); |
---|
1717 | /* 2x: splitfreqs contains also an index (sorting!) */ |
---|
1718 | if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries"); |
---|
1719 | splitpatterns = (uli *) myrealloc(splitpatterns, |
---|
1720 | splitlength*maxbiparts * sizeof(uli) ); |
---|
1721 | if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries"); |
---|
1722 | splitsizes = (int *) myrealloc(splitsizes, |
---|
1723 | maxbiparts * sizeof(int) ); |
---|
1724 | if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries"); |
---|
1725 | } |
---|
1726 | splitfreqs[2*nextentry] = 1; /* frequency */ |
---|
1727 | splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */ |
---|
1728 | for (i = 0; i < splitlength; i++) |
---|
1729 | splitpatterns[nextentry*splitlength + i] = splitcomp[i]; |
---|
1730 | splitsizes[nextentry] = bpsize; |
---|
1731 | nextentry++; |
---|
1732 | } |
---|
1733 | } |
---|
1734 | numbiparts = nextentry; |
---|
1735 | } |
---|
1736 | |
---|
1737 | /* general remarks: |
---|
1738 | |
---|
1739 | - every entry in consbiparts is one node of the consensus tree |
---|
1740 | - for each node one has to know which taxa and which other nodes |
---|
1741 | are *directly* descending from it |
---|
1742 | - for every taxon/node number there is a flag that shows |
---|
1743 | whether it descends from the node or not |
---|
1744 | - '0' means that neither a taxon nor another node with the |
---|
1745 | corresponding number decends from the node |
---|
1746 | '1' means that the corresponding taxon descends from the node |
---|
1747 | '2' means that the corresponding node descends from the node |
---|
1748 | '3' means that the corresponding taxon and node descends from the node |
---|
1749 | */ |
---|
1750 | |
---|
1751 | /* copy bipartition n of all different splitpatterns to consbiparts[k] */ |
---|
1752 | void copysplit(uli n, int k) |
---|
1753 | { |
---|
1754 | int i, j, col; |
---|
1755 | uli z; |
---|
1756 | |
---|
1757 | col = 0; |
---|
1758 | for (i = 0; i < splitlength; i++) { |
---|
1759 | z = splitpatterns[n*splitlength + i]; |
---|
1760 | for (j = 0; j < 32 && col < Maxspc; j++) { |
---|
1761 | if (z & 1) consbiparts[k][col] = '1'; |
---|
1762 | else consbiparts[k][col] = '0'; |
---|
1763 | z = (z >> 1); |
---|
1764 | col++; |
---|
1765 | } |
---|
1766 | } |
---|
1767 | } |
---|
1768 | |
---|
1769 | /* compute majority rule consensus tree */ |
---|
1770 | void makeconsensus() |
---|
1771 | { |
---|
1772 | int i, j, k, size, subnode; |
---|
1773 | char chari, charj; |
---|
1774 | |
---|
1775 | /* sort bipartition frequencies */ |
---|
1776 | qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp); |
---|
1777 | /* how many bipartitions are included in the consensus tree */ |
---|
1778 | consincluded = 0; |
---|
1779 | for (i = 0; (uli)i < numbiparts && (uli)i == consincluded; i++) { |
---|
1780 | if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1; |
---|
1781 | } |
---|
1782 | |
---|
1783 | /* collect all info about majority rule consensus tree */ |
---|
1784 | /* the +1 is due to the edge with the root */ |
---|
1785 | consconfid = new_ivector(consincluded + 1); |
---|
1786 | conssizes = new_ivector(2*consincluded + 2); |
---|
1787 | consbiparts = new_cmatrix(consincluded + 1, Maxspc); |
---|
1788 | |
---|
1789 | for (i = 0; (uli)i < consincluded; i++) { |
---|
1790 | /* copy partition to consbiparts */ |
---|
1791 | copysplit(splitfreqs[2*i+1], i); |
---|
1792 | /* frequency in percent (rounded to integer) */ |
---|
1793 | consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5); |
---|
1794 | /* size of partition */ |
---|
1795 | conssizes[2*i] = splitsizes[splitfreqs[2*i+1]]; |
---|
1796 | conssizes[2*i+1] = i; |
---|
1797 | } |
---|
1798 | for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1'; |
---|
1799 | consbiparts[consincluded][outgroup] = '0'; |
---|
1800 | consconfid[consincluded] = 100; |
---|
1801 | conssizes[2*consincluded] = Maxspc - 1; |
---|
1802 | conssizes[2*consincluded + 1] = consincluded; |
---|
1803 | |
---|
1804 | /* sort bipartitions according to cluster size */ |
---|
1805 | qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp); |
---|
1806 | |
---|
1807 | /* reconstruct consensus tree */ |
---|
1808 | for (i = 0; (uli)i < consincluded; i++) { /* try every node */ |
---|
1809 | size = conssizes[2*i]; /* size of current node */ |
---|
1810 | for (j = i + 1; (uli)j < consincluded + 1; j++) { |
---|
1811 | |
---|
1812 | /* compare only with nodes with more descendants */ |
---|
1813 | if (size == conssizes[2*j]) continue; |
---|
1814 | |
---|
1815 | /* check whether node i is a subnode of j */ |
---|
1816 | subnode = FALSE; |
---|
1817 | for (k = 0; k < Maxspc && !subnode; k++) { |
---|
1818 | chari = consbiparts[ conssizes[2*i+1] ][k]; |
---|
1819 | if (chari != '0') { |
---|
1820 | charj = consbiparts[ conssizes[2*j+1] ][k]; |
---|
1821 | if (chari == charj || charj == '3') subnode = TRUE; |
---|
1822 | } |
---|
1823 | } |
---|
1824 | |
---|
1825 | /* if i is a subnode of j change j accordingly */ |
---|
1826 | if (subnode) { |
---|
1827 | /* remove subnode i from j */ |
---|
1828 | for (k = 0; k < Maxspc; k++) { |
---|
1829 | chari = consbiparts[ conssizes[2*i+1] ][k]; |
---|
1830 | if (chari != '0') { |
---|
1831 | charj = consbiparts[ conssizes[2*j+1] ][k]; |
---|
1832 | if (chari == charj) |
---|
1833 | consbiparts[ conssizes[2*j+1] ][k] = '0'; |
---|
1834 | else if (charj == '3') { |
---|
1835 | if (chari == '1') |
---|
1836 | consbiparts[ conssizes[2*j+1] ][k] = '2'; |
---|
1837 | else if (chari == '2') |
---|
1838 | consbiparts[ conssizes[2*j+1] ][k] = '1'; |
---|
1839 | else { |
---|
1840 | /* Consensus tree [1] */ |
---|
1841 | FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n"); |
---|
1842 | exit(1); |
---|
1843 | } |
---|
1844 | } else { |
---|
1845 | /* Consensus tree [2] */ |
---|
1846 | FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n"); |
---|
1847 | exit(1); |
---|
1848 | } |
---|
1849 | } |
---|
1850 | } |
---|
1851 | /* add link to subnode i in node j */ |
---|
1852 | charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ]; |
---|
1853 | if (charj == '0') |
---|
1854 | consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2'; |
---|
1855 | else if (charj == '1') |
---|
1856 | consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3'; |
---|
1857 | else { |
---|
1858 | /* Consensus tree [3] */ |
---|
1859 | FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n"); |
---|
1860 | exit(1); |
---|
1861 | } |
---|
1862 | } |
---|
1863 | } |
---|
1864 | } |
---|
1865 | } |
---|
1866 | |
---|
1867 | /* prototype for recursion */ |
---|
1868 | void writenode(FILE *treefile, int node); |
---|
1869 | |
---|
1870 | /* write node (writeconsensustree) */ |
---|
1871 | void writenode(FILE *treefile, int node) |
---|
1872 | { |
---|
1873 | int i, first; |
---|
1874 | |
---|
1875 | fprintf(treefile, "("); |
---|
1876 | column++; |
---|
1877 | /* write descending nodes */ |
---|
1878 | first = TRUE; |
---|
1879 | for (i = 0; i < Maxspc; i++) { |
---|
1880 | if (consbiparts[node][i] == '2' || |
---|
1881 | consbiparts[node][i] == '3') { |
---|
1882 | if (first) first = FALSE; |
---|
1883 | else { |
---|
1884 | fprintf(treefile, ","); |
---|
1885 | column++; |
---|
1886 | } |
---|
1887 | if (column > 60) { |
---|
1888 | column = 2; |
---|
1889 | fprintf(treefile, "\n"); |
---|
1890 | } |
---|
1891 | /* write node i */ |
---|
1892 | writenode(treefile, i); |
---|
1893 | |
---|
1894 | /* reliability value as internal label */ |
---|
1895 | fprintf(treefile, "%d", consconfid[i]); |
---|
1896 | |
---|
1897 | column = column + 3; |
---|
1898 | } |
---|
1899 | } |
---|
1900 | /* write descending taxa */ |
---|
1901 | for (i = 0; i < Maxspc; i++) { |
---|
1902 | if (consbiparts[node][i] == '1' || |
---|
1903 | consbiparts[node][i] == '3') { |
---|
1904 | if (first) first = FALSE; |
---|
1905 | else { |
---|
1906 | fprintf(treefile, ","); |
---|
1907 | column++; |
---|
1908 | } |
---|
1909 | if (column > 60) { |
---|
1910 | column = 2; |
---|
1911 | fprintf(treefile, "\n"); |
---|
1912 | } |
---|
1913 | column += fputid(treefile, i); |
---|
1914 | } |
---|
1915 | } |
---|
1916 | fprintf(treefile, ")"); |
---|
1917 | column++; |
---|
1918 | } |
---|
1919 | |
---|
1920 | /* write consensus tree */ |
---|
1921 | void writeconsensustree(FILE *treefile) |
---|
1922 | { |
---|
1923 | int i, first; |
---|
1924 | |
---|
1925 | column = 1; |
---|
1926 | fprintf(treefile, "("); |
---|
1927 | column += fputid(treefile, outgroup) + 2; |
---|
1928 | fprintf(treefile, ","); |
---|
1929 | /* write descending nodes */ |
---|
1930 | first = TRUE; |
---|
1931 | for (i = 0; i < Maxspc; i++) { |
---|
1932 | if (consbiparts[consincluded][i] == '2' || |
---|
1933 | consbiparts[consincluded][i] == '3') { |
---|
1934 | if (first) first = FALSE; |
---|
1935 | else { |
---|
1936 | fprintf(treefile, ","); |
---|
1937 | column++; |
---|
1938 | } |
---|
1939 | if (column > 60) { |
---|
1940 | column = 2; |
---|
1941 | fprintf(treefile, "\n"); |
---|
1942 | } |
---|
1943 | /* write node i */ |
---|
1944 | writenode(treefile, i); |
---|
1945 | |
---|
1946 | /* reliability value as internal label */ |
---|
1947 | fprintf(treefile, "%d", consconfid[i]); |
---|
1948 | |
---|
1949 | column = column + 3; |
---|
1950 | } |
---|
1951 | } |
---|
1952 | /* write descending taxa */ |
---|
1953 | for (i = 0; i < Maxspc; i++) { |
---|
1954 | if (consbiparts[consincluded][i] == '1' || |
---|
1955 | consbiparts[consincluded][i] == '3') { |
---|
1956 | if (first) first = FALSE; |
---|
1957 | else { |
---|
1958 | fprintf(treefile, ","); |
---|
1959 | column++; |
---|
1960 | } |
---|
1961 | if (column > 60) { |
---|
1962 | column = 2; |
---|
1963 | fprintf(treefile, "\n"); |
---|
1964 | } |
---|
1965 | column += fputid(treefile, i); |
---|
1966 | } |
---|
1967 | } |
---|
1968 | fprintf(treefile, ");\n"); |
---|
1969 | } |
---|
1970 | |
---|
1971 | /* prototype for recursion */ |
---|
1972 | void nodecoordinates(int node); |
---|
1973 | |
---|
1974 | /* establish node coordinates (plotconsensustree) */ |
---|
1975 | void nodecoordinates(int node) |
---|
1976 | { |
---|
1977 | int i, ymin, ymax, xcoordinate; |
---|
1978 | |
---|
1979 | /* first establish coordinates of descending nodes */ |
---|
1980 | for (i = 0; i < Maxspc; i++) { |
---|
1981 | if (consbiparts[node][i] == '2' || |
---|
1982 | consbiparts[node][i] == '3') |
---|
1983 | nodecoordinates(i); |
---|
1984 | } |
---|
1985 | |
---|
1986 | /* then establish coordinates of descending taxa */ |
---|
1987 | for (i = 0; i < Maxspc; i++) { |
---|
1988 | if (consbiparts[node][i] == '1' || |
---|
1989 | consbiparts[node][i] == '3') { |
---|
1990 | /* y-coordinate of taxon i */ |
---|
1991 | ycortax[i] = ytaxcounter; |
---|
1992 | ytaxcounter = ytaxcounter - 2; |
---|
1993 | } |
---|
1994 | } |
---|
1995 | |
---|
1996 | /* then establish coordinates of this node */ |
---|
1997 | ymin = 2*Maxspc - 2; |
---|
1998 | ymax = 0; |
---|
1999 | xcoordinate = 0; |
---|
2000 | for (i = 0; i < Maxspc; i++) { |
---|
2001 | if (consbiparts[node][i] == '2' || |
---|
2002 | consbiparts[node][i] == '3') { |
---|
2003 | if (ycor[i] > ymax) ymax = ycor[i]; |
---|
2004 | if (ycor[i] < ymin) ymin = ycor[i]; |
---|
2005 | if (xcor[i] > xcoordinate) xcoordinate = xcor[i]; |
---|
2006 | } |
---|
2007 | } |
---|
2008 | for (i = 0; i < Maxspc; i++) { |
---|
2009 | if (consbiparts[node][i] == '1' || |
---|
2010 | consbiparts[node][i] == '3') { |
---|
2011 | if (ycortax[i] > ymax) ymax = ycortax[i]; |
---|
2012 | if (ycortax[i] < ymin) ymin = ycortax[i]; |
---|
2013 | } |
---|
2014 | } |
---|
2015 | ycormax[node] = ymax; |
---|
2016 | ycormin[node] = ymin; |
---|
2017 | ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5); |
---|
2018 | if (xcoordinate == 0) xcoordinate = 9; |
---|
2019 | xcor[node] = xcoordinate + 4; |
---|
2020 | } |
---|
2021 | |
---|
2022 | /* prototype for recursion */ |
---|
2023 | void drawnode(int node, int xold); |
---|
2024 | |
---|
2025 | /* drawnode (plotconsensustree) */ |
---|
2026 | void drawnode(int node, int xold) |
---|
2027 | { |
---|
2028 | int i, j; |
---|
2029 | char buf[4]; |
---|
2030 | |
---|
2031 | /* first draw vertical line */ |
---|
2032 | for (i = ycormin[node] + 1; i < ycormax[node]; i++) |
---|
2033 | treepict[xcor[node]][i] = ':'; |
---|
2034 | |
---|
2035 | /* then draw descending nodes */ |
---|
2036 | for (i = 0; i < Maxspc; i++) { |
---|
2037 | if (consbiparts[node][i] == '2' || |
---|
2038 | consbiparts[node][i] == '3') |
---|
2039 | drawnode(i, xcor[node]); |
---|
2040 | } |
---|
2041 | |
---|
2042 | /* then draw descending taxa */ |
---|
2043 | for (i = 0; i < Maxspc; i++) { |
---|
2044 | if (consbiparts[node][i] == '1' || |
---|
2045 | consbiparts[node][i] == '3') { |
---|
2046 | treepict[xcor[node]][ycortax[i]] = ':'; |
---|
2047 | for (j = xcor[node] + 1; j < xsize-10; j++) |
---|
2048 | treepict[j][ycortax[i]] = '-'; |
---|
2049 | for (j = 0; j < 10; j++) |
---|
2050 | treepict[xsize-10+j][ycortax[i]] = Identif[i][j]; |
---|
2051 | } |
---|
2052 | } |
---|
2053 | |
---|
2054 | /* then draw internal edge with consensus value */ |
---|
2055 | treepict[xold][ycor[node]] = ':'; |
---|
2056 | treepict[xcor[node]][ycor[node]] = ':'; |
---|
2057 | for (i = xold + 1; i < xcor[node]-3; i++) |
---|
2058 | treepict[i][ycor[node]] = '-'; |
---|
2059 | sprintf(buf, "%d", consconfid[node]); |
---|
2060 | if (consconfid[node] == 100) { |
---|
2061 | treepict[xcor[node]-3][ycor[node]] = buf[0]; |
---|
2062 | treepict[xcor[node]-2][ycor[node]] = buf[1]; |
---|
2063 | treepict[xcor[node]-1][ycor[node]] = buf[2]; |
---|
2064 | } else { |
---|
2065 | treepict[xcor[node]-3][ycor[node]] = '-'; |
---|
2066 | treepict[xcor[node]-2][ycor[node]] = buf[0]; |
---|
2067 | treepict[xcor[node]-1][ycor[node]] = buf[1]; |
---|
2068 | } |
---|
2069 | } |
---|
2070 | |
---|
2071 | /* plot consensus tree */ |
---|
2072 | void plotconsensustree(FILE *plotfp) |
---|
2073 | { |
---|
2074 | int i, j, yroot, startree; |
---|
2075 | |
---|
2076 | /* star tree or no star tree */ |
---|
2077 | if (consincluded == 0) { |
---|
2078 | startree = TRUE; |
---|
2079 | consincluded = 1; /* avoids problems with malloc */ |
---|
2080 | } else |
---|
2081 | startree = FALSE; |
---|
2082 | |
---|
2083 | /* memory for x-y-coordinates of each bipartition */ |
---|
2084 | xcor = new_ivector(consincluded); |
---|
2085 | ycor = new_ivector(consincluded); |
---|
2086 | ycormax = new_ivector(consincluded); |
---|
2087 | ycormin = new_ivector(consincluded); |
---|
2088 | if (startree) consincluded = 0; /* avoids problems with malloc */ |
---|
2089 | |
---|
2090 | /* y-coordinates of each taxon */ |
---|
2091 | ycortax = new_ivector(Maxspc); |
---|
2092 | ycortax[outgroup] = 0; |
---|
2093 | |
---|
2094 | /* establish coordinates */ |
---|
2095 | ytaxcounter = 2*Maxspc - 2; |
---|
2096 | |
---|
2097 | /* first establish coordinates of descending nodes */ |
---|
2098 | for (i = 0; i < Maxspc; i++) { |
---|
2099 | if (consbiparts[consincluded][i] == '2' || |
---|
2100 | consbiparts[consincluded][i] == '3') |
---|
2101 | nodecoordinates(i); |
---|
2102 | } |
---|
2103 | |
---|
2104 | /* then establish coordinates of descending taxa */ |
---|
2105 | for (i = 0; i < Maxspc; i++) { |
---|
2106 | if (consbiparts[consincluded][i] == '1' || |
---|
2107 | consbiparts[consincluded][i] == '3') { |
---|
2108 | /* y-coordinate of taxon i */ |
---|
2109 | ycortax[i] = ytaxcounter; |
---|
2110 | ytaxcounter = ytaxcounter - 2; |
---|
2111 | } |
---|
2112 | } |
---|
2113 | |
---|
2114 | /* then establish length of root edge and size of whole tree */ |
---|
2115 | yroot = 0; |
---|
2116 | xsize = 0; |
---|
2117 | for (i = 0; i < Maxspc; i++) { |
---|
2118 | if (consbiparts[consincluded][i] == '2' || |
---|
2119 | consbiparts[consincluded][i] == '3') { |
---|
2120 | if (ycor[i] > yroot) yroot = ycor[i]; |
---|
2121 | if (xcor[i] > xsize) xsize = xcor[i]; |
---|
2122 | } |
---|
2123 | } |
---|
2124 | for (i = 0; i < Maxspc; i++) { |
---|
2125 | if (consbiparts[consincluded][i] == '1' || |
---|
2126 | consbiparts[consincluded][i] == '3') { |
---|
2127 | if (ycortax[i] > yroot) yroot = ycortax[i]; |
---|
2128 | } |
---|
2129 | } |
---|
2130 | if (xsize == 0) xsize = 9; |
---|
2131 | /* size in x direction inclusive one blank on the left */ |
---|
2132 | xsize = xsize + 6; |
---|
2133 | |
---|
2134 | /* change all x-labels so that (0,0) is down-left */ |
---|
2135 | for (i = 0; (uli)i < consincluded; i++) |
---|
2136 | xcor[i] = xsize-1-xcor[i]; |
---|
2137 | |
---|
2138 | /* draw tree */ |
---|
2139 | treepict = new_cmatrix(xsize, 2*Maxspc-1); |
---|
2140 | for (i = 0; i < xsize; i++) |
---|
2141 | for (j = 0; j < 2*Maxspc-1; j++) |
---|
2142 | treepict[i][j] = ' '; |
---|
2143 | |
---|
2144 | /* draw root */ |
---|
2145 | for (i = 1; i < yroot; i++) |
---|
2146 | treepict[1][i] = ':'; |
---|
2147 | treepict[1][0] = ':'; |
---|
2148 | for (i = 2; i < xsize - 10; i++) |
---|
2149 | treepict[i][0] = '-'; |
---|
2150 | for (i = 0; i < 10; i++) |
---|
2151 | treepict[xsize-10+i][0] = Identif[outgroup][i]; |
---|
2152 | |
---|
2153 | /* then draw descending nodes */ |
---|
2154 | for (i = 0; i < Maxspc; i++) { |
---|
2155 | if (consbiparts[consincluded][i] == '2' || |
---|
2156 | consbiparts[consincluded][i] == '3') |
---|
2157 | drawnode(i, 1); |
---|
2158 | } |
---|
2159 | |
---|
2160 | /* then draw descending taxa */ |
---|
2161 | for (i = 0; i < Maxspc; i++) { |
---|
2162 | if (consbiparts[consincluded][i] == '1' || |
---|
2163 | consbiparts[consincluded][i] == '3') { |
---|
2164 | treepict[1][ycortax[i]] = ':'; |
---|
2165 | for (j = 2; j < xsize-10; j++) |
---|
2166 | treepict[j][ycortax[i]] = '-'; |
---|
2167 | for (j = 0; j < 10; j++) |
---|
2168 | treepict[xsize-10+j][ycortax[i]] = Identif[i][j]; |
---|
2169 | } |
---|
2170 | } |
---|
2171 | |
---|
2172 | /* plot tree */ |
---|
2173 | for (i = 2*Maxspc-2; i > -1; i--) { |
---|
2174 | for (j = 0; j < xsize; j++) |
---|
2175 | fputc(treepict[j][i], plotfp); |
---|
2176 | fputc('\n', plotfp); |
---|
2177 | } |
---|
2178 | |
---|
2179 | free_ivector(xcor); |
---|
2180 | free_ivector(ycor); |
---|
2181 | free_ivector(ycormax); |
---|
2182 | free_ivector(ycormin); |
---|
2183 | free_ivector(ycortax); |
---|
2184 | free_cmatrix(treepict); |
---|
2185 | } |
---|
2186 | |
---|
2187 | |
---|
2188 | |
---|
2189 | /******************************************************************************/ |
---|
2190 | /* storing and evaluating quartet branching information */ |
---|
2191 | /******************************************************************************/ |
---|
2192 | |
---|
2193 | /* general remarks: |
---|
2194 | |
---|
2195 | for a quartet with the taxa a, b, c, d there are |
---|
2196 | three possible binary trees: |
---|
2197 | |
---|
2198 | 1) (a,b)-(c,d) |
---|
2199 | 2) (a,c)-(b,d) |
---|
2200 | 3) (a,d)-(b,c) |
---|
2201 | |
---|
2202 | For every quartet information about its branching structure is |
---|
2203 | stored. With the functions readquartet and writequartet |
---|
2204 | this information can be accessed. For every quartet (a,b,c,d) |
---|
2205 | with a < b < c < d (taxa) the branching information is encoded |
---|
2206 | using 4 bits: |
---|
2207 | |
---|
2208 | value 8 4 2 1 |
---|
2209 | +-------------+-------------+-------------+-------------+ |
---|
2210 | | not used | tree 3 | tree 2 | tree 1 | |
---|
2211 | +-------------+-------------+-------------+-------------+ |
---|
2212 | |
---|
2213 | If the branching structure of the taxa corresponds to one of the |
---|
2214 | three trees the corresponding bit is set. If the branching structure |
---|
2215 | is unclear because two of the three trees have the same maximum |
---|
2216 | likelihood value the corresponding two bits are set. If the branching |
---|
2217 | structure is completely unknown all the bits are set (the highest |
---|
2218 | bit is always cleared because it is not used). |
---|
2219 | |
---|
2220 | */ |
---|
2221 | |
---|
2222 | /* allocate memory for quartets */ |
---|
2223 | unsigned char *mallocquartets(int taxa) |
---|
2224 | { |
---|
2225 | uli nc, numch; |
---|
2226 | unsigned char *qinfo; |
---|
2227 | |
---|
2228 | /* compute number of quartets */ |
---|
2229 | Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24; |
---|
2230 | if (Numquartets % 2 == 0) { /* even number */ |
---|
2231 | numch = Numquartets/2; |
---|
2232 | } else { /* odd number */ |
---|
2233 | numch = (Numquartets + 1)/2; |
---|
2234 | } |
---|
2235 | /* allocate memory */ |
---|
2236 | qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) ); |
---|
2237 | if (qinfo == NULL) maerror("quartetinfo in mallocquartets"); |
---|
2238 | for (nc = 0; nc < numch; nc++) qinfo[nc] = 0; |
---|
2239 | return(qinfo); |
---|
2240 | } |
---|
2241 | |
---|
2242 | /* free quartet memory */ |
---|
2243 | void freequartets() |
---|
2244 | { |
---|
2245 | free(quartetinfo); |
---|
2246 | } |
---|
2247 | |
---|
2248 | /* read quartet info - a < b < c < d */ |
---|
2249 | unsigned char readquartet(int a, int b, int c, int d) |
---|
2250 | { |
---|
2251 | uli qnum; |
---|
2252 | |
---|
2253 | qnum = (uli) a |
---|
2254 | + (uli) b*(b-1)/2 |
---|
2255 | + (uli) c*(c-1)*(c-2)/6 |
---|
2256 | + (uli) d*(d-1)*(d-2)*(d-3)/24; |
---|
2257 | if (qnum % 2 == 0) { /* even number */ |
---|
2258 | /* bits 0 to 3 */ |
---|
2259 | return (quartetinfo[qnum/2] & (unsigned char) 15); |
---|
2260 | } else { /* odd number */ |
---|
2261 | /* bits 4 to 7 */ |
---|
2262 | return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4); |
---|
2263 | } |
---|
2264 | } |
---|
2265 | |
---|
2266 | /* write quartet info - a < b < c < d, 0 <= info <= 15 */ |
---|
2267 | void writequartet(int a, int b, int c, int d, unsigned char info) |
---|
2268 | { |
---|
2269 | uli qnum; |
---|
2270 | |
---|
2271 | qnum = (uli) a |
---|
2272 | + (uli) b*(b-1)/2 |
---|
2273 | + (uli) c*(c-1)*(c-2)/6 |
---|
2274 | + (uli) d*(d-1)*(d-2)*(d-3)/24; |
---|
2275 | if (qnum % 2 == 0) { /* even number */ |
---|
2276 | /* bits 0 to 3 */ |
---|
2277 | quartetinfo[qnum/2] = |
---|
2278 | ((quartetinfo[qnum/2] & (unsigned char) 240) | |
---|
2279 | (info & (unsigned char) 15)); |
---|
2280 | } else { /* odd number */ |
---|
2281 | /* bits 4 to 7 */ |
---|
2282 | quartetinfo[(qnum-1)/2] = |
---|
2283 | ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) | |
---|
2284 | ((info & (unsigned char) 15)<<4)); |
---|
2285 | } |
---|
2286 | } |
---|
2287 | |
---|
2288 | /* sorts three doubles in descending order */ |
---|
2289 | void sort3doubles(dvector num, ivector order) |
---|
2290 | { |
---|
2291 | if (num[0] > num[1]) { |
---|
2292 | if(num[2] > num[0]) { |
---|
2293 | order[0] = 2; |
---|
2294 | order[1] = 0; |
---|
2295 | order[2] = 1; |
---|
2296 | } else if (num[2] < num[1]) { |
---|
2297 | order[0] = 0; |
---|
2298 | order[1] = 1; |
---|
2299 | order[2] = 2; |
---|
2300 | } else { |
---|
2301 | order[0] = 0; |
---|
2302 | order[1] = 2; |
---|
2303 | order[2] = 1; |
---|
2304 | } |
---|
2305 | } else { |
---|
2306 | if(num[2] > num[1]) { |
---|
2307 | order[0] = 2; |
---|
2308 | order[1] = 1; |
---|
2309 | order[2] = 0; |
---|
2310 | } else if (num[2] < num[0]) { |
---|
2311 | order[0] = 1; |
---|
2312 | order[1] = 0; |
---|
2313 | order[2] = 2; |
---|
2314 | } else { |
---|
2315 | order[0] = 1; |
---|
2316 | order[1] = 2; |
---|
2317 | order[2] = 0; |
---|
2318 | } |
---|
2319 | } |
---|
2320 | } |
---|
2321 | |
---|
2322 | /* checks out all possible quartets */ |
---|
2323 | void computeallquartets() |
---|
2324 | { |
---|
2325 | double onethird; |
---|
2326 | uli nq; |
---|
2327 | unsigned char treebits[3]; |
---|
2328 | FILE *lhfp; |
---|
2329 | # if ! PARALLEL |
---|
2330 | int a, b, c, i; |
---|
2331 | double qc2, mintogo, minutes, hours, temp; |
---|
2332 | double temp1, temp2, temp3; |
---|
2333 | unsigned char discreteweight[3]; |
---|
2334 | # endif |
---|
2335 | |
---|
2336 | onethird = 1.0/3.0; |
---|
2337 | treebits[0] = (unsigned char) 1; |
---|
2338 | treebits[1] = (unsigned char) 2; |
---|
2339 | treebits[2] = (unsigned char) 4; |
---|
2340 | |
---|
2341 | if (show_optn) { /* list all unresolved quartets */ |
---|
2342 | openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees"); |
---|
2343 | fprintf(unresfp, "List of all completely unresolved quartets:\n\n"); |
---|
2344 | } |
---|
2345 | |
---|
2346 | nq = 0; |
---|
2347 | badqs = 0; |
---|
2348 | |
---|
2349 | /* start timer - percentage of completed quartets */ |
---|
2350 | time(&time0); |
---|
2351 | time1 = time0; |
---|
2352 | mflag = 0; |
---|
2353 | |
---|
2354 | # if PARALLEL |
---|
2355 | { |
---|
2356 | schedtype sched; |
---|
2357 | int flag; |
---|
2358 | MPI_Status stat; |
---|
2359 | int dest = 1; |
---|
2360 | uli qaddr =0; |
---|
2361 | uli qamount=0; |
---|
2362 | int qblocksent = 0; |
---|
2363 | int apr; |
---|
2364 | uli sq, noq; |
---|
2365 | initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4); |
---|
2366 | qamount=sgss(&sched); |
---|
2367 | while (qamount > 0) { |
---|
2368 | if (PP_emptyslave()) { |
---|
2369 | PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); |
---|
2370 | qblocksent -= noq; |
---|
2371 | } |
---|
2372 | dest = PP_getslave(); |
---|
2373 | PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT)); |
---|
2374 | qblocksent += qamount; |
---|
2375 | qaddr += qamount; |
---|
2376 | qamount=sgss(&sched); |
---|
2377 | |
---|
2378 | MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat); |
---|
2379 | while (flag) { |
---|
2380 | PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); |
---|
2381 | qblocksent -= noq; |
---|
2382 | MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat); |
---|
2383 | } |
---|
2384 | } |
---|
2385 | while (qblocksent > 0) { |
---|
2386 | PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr); |
---|
2387 | qblocksent -= noq; |
---|
2388 | } |
---|
2389 | } |
---|
2390 | # else /* PARALLEL */ |
---|
2391 | |
---|
2392 | addtimes(GENERAL, &tarr); |
---|
2393 | if (savequartlh_optn) { |
---|
2394 | openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods"); |
---|
2395 | if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3); |
---|
2396 | else writetpqfheader(Maxspc, lhfp, 4); |
---|
2397 | } |
---|
2398 | |
---|
2399 | for (i = 3; i < Maxspc; i++) |
---|
2400 | for (c = 2; c < i; c++) |
---|
2401 | for (b = 1; b < c; b++) |
---|
2402 | for (a = 0; a < b; a++) { |
---|
2403 | nq++; |
---|
2404 | |
---|
2405 | /* generate message every 15 minutes */ |
---|
2406 | /* check timer */ |
---|
2407 | time(&time2); |
---|
2408 | if ( (time2 - time1) > 900) { |
---|
2409 | /* every 900 seconds */ |
---|
2410 | /* percentage of completed quartets */ |
---|
2411 | if (mflag == 0) { |
---|
2412 | FPRINTF(STDOUTFILE "\n"); |
---|
2413 | mflag = 1; |
---|
2414 | } |
---|
2415 | qc2 = 100.*nq/Numquartets; |
---|
2416 | mintogo = (100.0-qc2) * |
---|
2417 | (double) (time2-time0)/60.0/qc2; |
---|
2418 | hours = floor(mintogo/60.0); |
---|
2419 | minutes = mintogo - 60.0*hours; |
---|
2420 | FPRINTF(STDOUTFILE "%.2f%%", qc2); |
---|
2421 | FPRINTF(STDOUTFILE " completed (remaining"); |
---|
2422 | FPRINTF(STDOUTFILE " time: %.0f", hours); |
---|
2423 | FPRINTF(STDOUTFILE " hours %.0f", minutes); |
---|
2424 | FPRINTF(STDOUTFILE " minutes)\n"); |
---|
2425 | fflush(STDOUT); |
---|
2426 | time1 = time2; |
---|
2427 | } |
---|
2428 | |
---|
2429 | /* maximum likelihood values */ |
---|
2430 | |
---|
2431 | /* exact or approximate maximum likelihood values */ |
---|
2432 | compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT)); |
---|
2433 | |
---|
2434 | if (savequartlh_optn) { |
---|
2435 | if (saveqlhbin_optn) |
---|
2436 | fwrite(qweight, sizeof(double), 3, lhfp); |
---|
2437 | else |
---|
2438 | fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i, |
---|
2439 | qweight[0], qweight[1], qweight[2]); |
---|
2440 | } |
---|
2441 | |
---|
2442 | /* sort in descending order */ |
---|
2443 | sort3doubles(qweight, qworder); |
---|
2444 | |
---|
2445 | if (usebestq_optn) { |
---|
2446 | sqorder[2] = 2; |
---|
2447 | discreteweight[sqorder[2]] = treebits[qworder[0]]; |
---|
2448 | if (qweight[qworder[0]] == qweight[qworder[1]]) { |
---|
2449 | discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]]; |
---|
2450 | if (qweight[qworder[1]] == qweight[qworder[2]]) { |
---|
2451 | discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]]; |
---|
2452 | discreteweight[sqorder[2]] = 7; |
---|
2453 | } |
---|
2454 | } |
---|
2455 | } else { |
---|
2456 | |
---|
2457 | /* compute Bayesian weights */ |
---|
2458 | qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]); |
---|
2459 | qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]); |
---|
2460 | qweight[qworder[0]] = 1.0; |
---|
2461 | temp = qweight[0] + qweight[1] + qweight[2]; |
---|
2462 | qweight[0] = qweight[0]/temp; |
---|
2463 | qweight[1] = qweight[1]/temp; |
---|
2464 | qweight[2] = qweight[2]/temp; |
---|
2465 | |
---|
2466 | /* square deviations */ |
---|
2467 | temp1 = 1.0 - qweight[qworder[0]]; |
---|
2468 | sqdiff[0] = temp1 * temp1 + |
---|
2469 | qweight[qworder[1]] * qweight[qworder[1]] + |
---|
2470 | qweight[qworder[2]] * qweight[qworder[2]]; |
---|
2471 | discreteweight[0] = treebits[qworder[0]]; |
---|
2472 | |
---|
2473 | temp1 = 0.5 - qweight[qworder[0]]; |
---|
2474 | temp2 = 0.5 - qweight[qworder[1]]; |
---|
2475 | sqdiff[1] = temp1 * temp1 + temp2 * temp2 + |
---|
2476 | qweight[qworder[2]] * qweight[qworder[2]]; |
---|
2477 | discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]]; |
---|
2478 | |
---|
2479 | temp1 = onethird - qweight[qworder[0]]; |
---|
2480 | temp2 = onethird - qweight[qworder[1]]; |
---|
2481 | temp3 = onethird - qweight[qworder[2]]; |
---|
2482 | sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3; |
---|
2483 | discreteweight[2] = (unsigned char) 7; |
---|
2484 | |
---|
2485 | /* sort in descending order */ |
---|
2486 | sort3doubles(sqdiff, sqorder); |
---|
2487 | } |
---|
2488 | |
---|
2489 | /* determine best discrete weight */ |
---|
2490 | writequartet(a, b, c, i, discreteweight[sqorder[2]]); |
---|
2491 | |
---|
2492 | /* counting completely unresolved quartets */ |
---|
2493 | if (discreteweight[sqorder[2]] == 7) { |
---|
2494 | badqs++; |
---|
2495 | badtaxon[a]++; |
---|
2496 | badtaxon[b]++; |
---|
2497 | badtaxon[c]++; |
---|
2498 | badtaxon[i]++; |
---|
2499 | if (show_optn) { |
---|
2500 | fputid10(unresfp, a); |
---|
2501 | fprintf(unresfp, " "); |
---|
2502 | fputid10(unresfp, b); |
---|
2503 | fprintf(unresfp, " "); |
---|
2504 | fputid10(unresfp, c); |
---|
2505 | fprintf(unresfp, " "); |
---|
2506 | fputid(unresfp, i); |
---|
2507 | fprintf(unresfp, "\n"); |
---|
2508 | } |
---|
2509 | } |
---|
2510 | addtimes(QUARTETS, &tarr); |
---|
2511 | } |
---|
2512 | if (savequartlh_optn) { |
---|
2513 | closefile(lhfp); |
---|
2514 | } |
---|
2515 | if (show_optn) |
---|
2516 | closefile(unresfp); |
---|
2517 | if (mflag == 1) |
---|
2518 | FPRINTF(STDOUTFILE "\n"); |
---|
2519 | # endif /* PARALLEL */ |
---|
2520 | |
---|
2521 | } |
---|
2522 | |
---|
2523 | /* check the branching structure between the leaves (not the taxa!) |
---|
2524 | A, B, C, and I (A, B, C, I don't need to be ordered). As a result, |
---|
2525 | the two leaves that are closer related to each other than to leaf I |
---|
2526 | are found in chooseA and chooseB. If the branching structure is |
---|
2527 | not uniquely defined, ChooseA and ChooseB are chosen randomly |
---|
2528 | from the possible taxa */ |
---|
2529 | void checkquartet(int A, int B, int C, int I) |
---|
2530 | { |
---|
2531 | int i, j, a, b, taxon[5], leaf[5], ipos; |
---|
2532 | unsigned char qresult; |
---|
2533 | int notunique = FALSE; |
---|
2534 | |
---|
2535 | /* The relationship between leaves and taxa is defined by trueID */ |
---|
2536 | taxon[1] = trueID[A]; /* taxon number */ |
---|
2537 | leaf[1] = A; /* leaf number */ |
---|
2538 | taxon[2] = trueID[B]; |
---|
2539 | leaf[2] = B; |
---|
2540 | taxon[3] = trueID[C]; |
---|
2541 | leaf[3] = C; |
---|
2542 | taxon[4] = trueID[I]; |
---|
2543 | leaf[4] = I; |
---|
2544 | |
---|
2545 | /* sort for taxa */ |
---|
2546 | /* Source: Numerical Recipes (PIKSR2.C) */ |
---|
2547 | for (j = 2; j <= 4; j++) { |
---|
2548 | a = taxon[j]; |
---|
2549 | b = leaf[j]; |
---|
2550 | i = j-1; |
---|
2551 | while (i > 0 && taxon[i] > a) { |
---|
2552 | taxon[i+1] = taxon[i]; |
---|
2553 | leaf[i+1] = leaf[i]; |
---|
2554 | i--; |
---|
2555 | } |
---|
2556 | taxon[i+1] = a; |
---|
2557 | leaf[i+1] = b; |
---|
2558 | } |
---|
2559 | |
---|
2560 | /* where is leaf I ? */ |
---|
2561 | ipos = 1; |
---|
2562 | while (leaf[ipos] != I) ipos++; |
---|
2563 | |
---|
2564 | /* look at sequence quartet */ |
---|
2565 | qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]); |
---|
2566 | |
---|
2567 | /* chooseA and chooseB */ |
---|
2568 | do { |
---|
2569 | switch (qresult) { |
---|
2570 | |
---|
2571 | /* one single branching structure */ |
---|
2572 | |
---|
2573 | /* 001 */ |
---|
2574 | case 1: if (ipos == 1 || ipos == 2) { |
---|
2575 | chooseA = leaf[3]; |
---|
2576 | chooseB = leaf[4]; |
---|
2577 | } else { |
---|
2578 | chooseA = leaf[1]; |
---|
2579 | chooseB = leaf[2]; |
---|
2580 | } |
---|
2581 | notunique = FALSE; |
---|
2582 | break; |
---|
2583 | |
---|
2584 | /* 010 */ |
---|
2585 | case 2: if (ipos == 1 || ipos == 3) { |
---|
2586 | chooseA = leaf[2]; |
---|
2587 | chooseB = leaf[4]; |
---|
2588 | } else { |
---|
2589 | chooseA = leaf[1]; |
---|
2590 | chooseB = leaf[3]; |
---|
2591 | } |
---|
2592 | notunique = FALSE; |
---|
2593 | break; |
---|
2594 | |
---|
2595 | /* 100 */ |
---|
2596 | case 4: if (ipos == 1 || ipos == 4) { |
---|
2597 | chooseA = leaf[2]; |
---|
2598 | chooseB = leaf[3]; |
---|
2599 | } else { |
---|
2600 | chooseA = leaf[1]; |
---|
2601 | chooseB = leaf[4]; |
---|
2602 | } |
---|
2603 | notunique = FALSE; |
---|
2604 | break; |
---|
2605 | |
---|
2606 | /* two possible branching structures */ |
---|
2607 | |
---|
2608 | /* 011 */ |
---|
2609 | case 3: if (randominteger(2)) qresult = 1; |
---|
2610 | else qresult = 2; |
---|
2611 | notunique = TRUE; |
---|
2612 | break; |
---|
2613 | |
---|
2614 | /* 101 */ |
---|
2615 | case 5: if (randominteger(2)) qresult = 1; |
---|
2616 | else qresult = 4; |
---|
2617 | notunique = TRUE; |
---|
2618 | break; |
---|
2619 | |
---|
2620 | /* 110 */ |
---|
2621 | case 6: if (randominteger(2)) qresult = 2; |
---|
2622 | else qresult = 4; |
---|
2623 | notunique = TRUE; |
---|
2624 | break; |
---|
2625 | |
---|
2626 | /* three possible branching structures */ |
---|
2627 | |
---|
2628 | /* 111 */ |
---|
2629 | case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */ |
---|
2630 | notunique = TRUE; |
---|
2631 | break; |
---|
2632 | |
---|
2633 | default: /* Program error [checkquartet] */ |
---|
2634 | #if PARALLEL |
---|
2635 | FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n", |
---|
2636 | PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4], |
---|
2637 | quart2num(taxon[1], taxon[2], taxon[3], taxon[4])); |
---|
2638 | #else |
---|
2639 | FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n"); |
---|
2640 | #endif |
---|
2641 | |
---|
2642 | } |
---|
2643 | } while (notunique); |
---|
2644 | |
---|
2645 | return; |
---|
2646 | } |
---|
2647 | |
---|