1 | /* |
---|
2 | * protml.c Adachi, J. 1995.12.01 |
---|
3 | * Copyright (C) 1992-1995 J. Adachi & M. Hasegawa, All rights reserved. |
---|
4 | */ |
---|
5 | |
---|
6 | #define NJMLD 0 |
---|
7 | #define BRANCHULIMIT 0 |
---|
8 | #define PTNLKL 0 |
---|
9 | #define TPGRAPH 0 |
---|
10 | #define DISLKL 0 |
---|
11 | #define TREECHECK 0 |
---|
12 | #define FMLEN 0 |
---|
13 | #define COMPCRITERION 0 |
---|
14 | #define TPMZEROCHECK 0 |
---|
15 | |
---|
16 | #define MAIN_MODULE 1 |
---|
17 | #include "protml.h" |
---|
18 | |
---|
19 | void |
---|
20 | copyright() |
---|
21 | { |
---|
22 | #ifndef NUC |
---|
23 | fprintf(stderr, "ProtML %s(%s) ", VERSION, DATE ); |
---|
24 | fprintf(stderr, "Maximum Likelihood Inference of Protein Phylogeny\n"); |
---|
25 | #else /* NUC */ |
---|
26 | fprintf(stderr, "NucML %s(%s) ", VERSION, DATE ); |
---|
27 | fprintf(stderr, "Maximum Likelihood Inference of Nucleic Acid Phylogeny\n"); |
---|
28 | #endif /* NUC */ |
---|
29 | fprintf(stderr, "Copyright (C) 1992-1996 J. Adachi & M. Hasegawa. "); |
---|
30 | fprintf(stderr, "All rights reserved.\n"); |
---|
31 | /* fprintf(stderr, " ProtML comes with NO WARRANTY\n"); */ |
---|
32 | } |
---|
33 | |
---|
34 | |
---|
35 | void |
---|
36 | usage() |
---|
37 | { |
---|
38 | copyright(); |
---|
39 | fprintf(stderr, "Usage: %s [switches] sequence_file [topology_file]\n", |
---|
40 | Prog_name); |
---|
41 | fprintf(stderr, " Help: %s -h\n", Prog_name); |
---|
42 | } |
---|
43 | |
---|
44 | |
---|
45 | void |
---|
46 | helpinfo() |
---|
47 | { |
---|
48 | copyright(); |
---|
49 | fprintf(stderr, |
---|
50 | "Usage: %s [switches] sequence_file [topology_file]\n",Prog_name); |
---|
51 | fprintf(stderr, |
---|
52 | "sequence_file = MOLPHY_format | Sequential(-S) | Interleaved(-I)\n"); |
---|
53 | fprintf(stderr, |
---|
54 | "topology_file = users_trees(-u) | constrained_tree(-e)\n"); |
---|
55 | fprintf(stderr, "Model:\n"); |
---|
56 | #ifndef NUC |
---|
57 | fprintf(stderr, "-j JTT (default) -jf JTT-F Jones, Taylor & Thornton(1992)\n"); |
---|
58 | fprintf(stderr, "-d Dayhoff -df Dayhoff-F Dayhoff et al.(1978)\n"); |
---|
59 | fprintf(stderr, "-m %-7s -mf %-7s-F Adachi & Hasegawa(1995)\n", MTREVNAME, MTREVNAME); |
---|
60 | #else /* NUC */ |
---|
61 | fprintf(stderr, |
---|
62 | "-t n1 n1: Alpha/Beta ratio (default:%.1f) Hasegawa, Kishino & Yano(1985)\n", ALPHABETA); |
---|
63 | fprintf(stderr, |
---|
64 | "-t n1,n2 n2: AlphaY/AlphaR ratio (default:%.1f) Tamura & Nei(1993)\n", ALPHAYR); |
---|
65 | #endif /* NUC */ |
---|
66 | #ifndef NUC |
---|
67 | fprintf(stderr, "-p Poisson -pf Proportional (-f: with data Frequencies)\n"); |
---|
68 | fprintf(stderr, "-r users RSR -rf users RSR-F (Relative Substitution Rate)\n"); |
---|
69 | #else /* NUC */ |
---|
70 | fprintf(stderr, "-p Proportional -pf Poisson (-f withOUT data Frequencies)\n"); |
---|
71 | fprintf(stderr, "-r users RSR-F -rf users RSR (Relative Substitution Rate)\n"); |
---|
72 | #endif /* NUC */ |
---|
73 | fprintf(stderr, "Search strategy or Mode:\n"); |
---|
74 | fprintf(stderr, "-u Users trees (need users_trees file)\n"); |
---|
75 | fprintf(stderr, "-R local Rearrangement search -RX LBP only\n"); |
---|
76 | fprintf(stderr, "-e Exhaustive search (with/without constrained_tree file)\n"); |
---|
77 | fprintf(stderr, "-s Star decomposition search (may not be the ML tree)\n"); |
---|
78 | fprintf(stderr, "-q Quick add OTUs search (may not be the ML tree)\n"); |
---|
79 | fprintf(stderr, "-D maximum likelihood Distance matrix --> NJDIST\n"); |
---|
80 | fprintf(stderr, "Others:\n"); |
---|
81 | fprintf(stderr, "-n num retained top ranking trees (default -e:%d,-q:%d)\n", MAXALTREES, NUMQLTREES); |
---|
82 | fprintf(stderr, "-P num Per cent (default -e:%.0f, if possible trees<=945 : %.0f)\n", TBLRATE*100, TBLRATE7*100); |
---|
83 | fprintf(stderr, "-b no RELL-BP -M Minimum evolution (with -e)\n"); |
---|
84 | fprintf(stderr, "-S Sequential format -I Interleaved format\n"); |
---|
85 | fprintf(stderr, "-v verbose to stderr -i, -w output some information\n"); |
---|
86 | } |
---|
87 | |
---|
88 | static void prologue(); |
---|
89 | void pml(); |
---|
90 | |
---|
91 | |
---|
92 | main(argc, argv) |
---|
93 | int argc; |
---|
94 | char **argv; |
---|
95 | { |
---|
96 | FILE *seqfp, *tplfp; |
---|
97 | int i, ch, num; |
---|
98 | double x; |
---|
99 | char **cpp, *cp; |
---|
100 | char buf[64]; |
---|
101 | boolean num_flag; |
---|
102 | extern int Optindex; /* index of next argument */ |
---|
103 | extern char *Optargp; /* pointer to argument of current option */ |
---|
104 | |
---|
105 | Ct0 = time(NULL); |
---|
106 | if((Prog_name = strrchr(argv[0], DIR_CHAR)) != NULL ) |
---|
107 | Prog_name++; |
---|
108 | else |
---|
109 | Prog_name = argv[0]; |
---|
110 | |
---|
111 | Rrsr_optn = FALSE; |
---|
112 | Boots_optn = TRUE; /* default with User_optn*/ |
---|
113 | Exhau_optn = FALSE; |
---|
114 | Const_optn = FALSE; |
---|
115 | User_optn = FALSE; |
---|
116 | Aneal_optn = FALSE; |
---|
117 | Njoin_optn = FALSE; |
---|
118 | num_flag = FALSE; |
---|
119 | Percnt_optn = FALSE; |
---|
120 | Maxaltree = MAXALTREES; |
---|
121 | Numqltree = NUMQLTREES; |
---|
122 | #ifndef NUC |
---|
123 | Jtt_optn = TRUE; |
---|
124 | Frequ_optn = FALSE; |
---|
125 | #else /* NUC */ |
---|
126 | Jtt_optn = FALSE; |
---|
127 | Frequ_optn = TRUE; |
---|
128 | Tstv_optn = FALSE; |
---|
129 | AlphaBeta = ALPHABETA; |
---|
130 | AlphaYR = ALPHAYR; |
---|
131 | Beta12 = BETA12; |
---|
132 | Topting = FALSE; |
---|
133 | #endif /* NUC */ |
---|
134 | Dayhf_optn = FALSE; |
---|
135 | Mtrev_optn = FALSE; |
---|
136 | Linesites = LINESITES; |
---|
137 | while((ch = mygetopt(argc, argv, SWITCHES)) != -1 ) { |
---|
138 | switch(ch) { |
---|
139 | #ifndef NUC |
---|
140 | case 'j': |
---|
141 | Jtt_optn = TRUE; |
---|
142 | Dayhf_optn = FALSE; |
---|
143 | Mtrev_optn = FALSE; |
---|
144 | Poisn_optn = FALSE; |
---|
145 | Rrsr_optn = FALSE; |
---|
146 | break; |
---|
147 | case 'd': |
---|
148 | Dayhf_optn = TRUE; |
---|
149 | Jtt_optn = FALSE; |
---|
150 | Mtrev_optn = FALSE; |
---|
151 | Poisn_optn = FALSE; |
---|
152 | Rrsr_optn = FALSE; |
---|
153 | break; |
---|
154 | case 'm': |
---|
155 | Mtrev_optn = TRUE; |
---|
156 | Dayhf_optn = FALSE; |
---|
157 | Jtt_optn = FALSE; |
---|
158 | Poisn_optn = FALSE; |
---|
159 | Rrsr_optn = FALSE; |
---|
160 | break; |
---|
161 | case 'f': |
---|
162 | Frequ_optn = TRUE; |
---|
163 | break; |
---|
164 | case 'p': |
---|
165 | Poisn_optn = TRUE; |
---|
166 | Jtt_optn = FALSE; |
---|
167 | Dayhf_optn = FALSE; |
---|
168 | Mtrev_optn = FALSE; |
---|
169 | Rrsr_optn = FALSE; |
---|
170 | break; |
---|
171 | case 'r': |
---|
172 | Rrsr_optn = TRUE; |
---|
173 | Jtt_optn = FALSE; |
---|
174 | Dayhf_optn = FALSE; |
---|
175 | Mtrev_optn = FALSE; |
---|
176 | Poisn_optn = FALSE; |
---|
177 | break; |
---|
178 | #else /* NUC */ |
---|
179 | case 't': |
---|
180 | cpp = &Optargp; |
---|
181 | Tstv_optn = TRUE; |
---|
182 | if (x = strtod(Optargp, cpp)) { |
---|
183 | AlphaBeta = x; |
---|
184 | if (**cpp == ',') { |
---|
185 | Optargp = ++(*cpp); |
---|
186 | if (x = strtod(Optargp, cpp)) { |
---|
187 | AlphaYR = x; |
---|
188 | if (**cpp == ',') { |
---|
189 | Optargp = ++(*cpp); |
---|
190 | if (x = strtod(Optargp, cpp)) { |
---|
191 | Beta12 = x; |
---|
192 | } |
---|
193 | } |
---|
194 | } |
---|
195 | } |
---|
196 | } else { |
---|
197 | Toptim_optn = TRUE; |
---|
198 | if (cp = strchr(Optargp, ',')) { |
---|
199 | Optargp = ++cp; |
---|
200 | if (x = strtod(Optargp, cpp)) { |
---|
201 | AlphaYR = x; |
---|
202 | } else { |
---|
203 | Toptim_optn += 2; |
---|
204 | AlphaYR = 1.0; |
---|
205 | } |
---|
206 | if (strchr(cp, ',')) Toptim_optn += 4; |
---|
207 | Beta12 = 1.0; |
---|
208 | } |
---|
209 | } |
---|
210 | break; |
---|
211 | case 'f': |
---|
212 | Frequ_optn = FALSE; |
---|
213 | break; |
---|
214 | case 'p': |
---|
215 | Poisn_optn = TRUE; |
---|
216 | break; |
---|
217 | case 'r': |
---|
218 | Rrsr_optn = TRUE; |
---|
219 | Poisn_optn = FALSE; |
---|
220 | break; |
---|
221 | #endif /* NUC */ |
---|
222 | case 'F': |
---|
223 | Logdet_optn = TRUE; |
---|
224 | break; |
---|
225 | |
---|
226 | |
---|
227 | case 'a': Aneal_optn = TRUE; |
---|
228 | Njoin_optn = FALSE; |
---|
229 | Exhau_optn = FALSE; |
---|
230 | User_optn = FALSE; |
---|
231 | Stard_optn = FALSE; |
---|
232 | Quick_optn = FALSE; |
---|
233 | Boots_optn = FALSE; break; |
---|
234 | case 'e': Exhau_optn = TRUE; |
---|
235 | Njoin_optn = FALSE; |
---|
236 | Aneal_optn = FALSE; |
---|
237 | User_optn = FALSE; |
---|
238 | Stard_optn = FALSE; |
---|
239 | Quick_optn = FALSE; |
---|
240 | Boots_optn = FALSE; break; |
---|
241 | case 'u': User_optn = TRUE; |
---|
242 | Njoin_optn = FALSE; |
---|
243 | Aneal_optn = FALSE; |
---|
244 | Exhau_optn = FALSE; |
---|
245 | Stard_optn = FALSE; |
---|
246 | Quick_optn = FALSE; break; |
---|
247 | case 'q': Quick_optn = TRUE; |
---|
248 | Njoin_optn = FALSE; |
---|
249 | Aneal_optn = FALSE; |
---|
250 | User_optn = FALSE; |
---|
251 | Stard_optn = FALSE; |
---|
252 | Exhau_optn = FALSE; break; |
---|
253 | case 'Q': Quick1_optn = TRUE; |
---|
254 | Njoin_optn = FALSE; |
---|
255 | Aneal_optn = FALSE; |
---|
256 | User_optn = FALSE; |
---|
257 | Stard_optn = FALSE; |
---|
258 | Exhau_optn = FALSE; break; |
---|
259 | case 's': Stard_optn = TRUE; |
---|
260 | Njoin_optn = FALSE; |
---|
261 | Aneal_optn = FALSE; |
---|
262 | Quick_optn = FALSE; |
---|
263 | Quick1_optn = FALSE; |
---|
264 | User_optn = FALSE; |
---|
265 | Exhau_optn = FALSE; break; |
---|
266 | case 'N': Njoin_optn = TRUE; |
---|
267 | Stard_optn = FALSE; |
---|
268 | Aneal_optn = FALSE; |
---|
269 | Quick_optn = FALSE; |
---|
270 | Quick1_optn = FALSE; |
---|
271 | User_optn = FALSE; |
---|
272 | Exhau_optn = FALSE; break; |
---|
273 | |
---|
274 | case 'n': cpp = &Optargp; |
---|
275 | num_flag = TRUE; |
---|
276 | if (i = strtol(Optargp, cpp, 10)) num = i; break; |
---|
277 | case 'o': |
---|
278 | Outgr_optn = TRUE; |
---|
279 | cpp = &Optargp; |
---|
280 | if (i = strtol(Optargp, cpp, 10)) { |
---|
281 | Outgroup1 = i - 1; |
---|
282 | printf("Outgroup1 %3d\n", Outgroup1+1); |
---|
283 | if (**cpp == ',') { |
---|
284 | Optargp = ++(*cpp); |
---|
285 | if (i = strtol(Optargp, cpp, 10)) { |
---|
286 | Outgr_optn = 2; |
---|
287 | Outgroup2 = i - 1; |
---|
288 | printf("Outgroup2 %3d\n", Outgroup2+1); |
---|
289 | } else { |
---|
290 | fprintf(stderr,"%s: bad out group option \"%s\"\n", |
---|
291 | Prog_name, Optargp); |
---|
292 | exit(1); |
---|
293 | } |
---|
294 | } |
---|
295 | } else { |
---|
296 | fprintf(stderr,"%s: bad out group option \"%s\"\n", |
---|
297 | Prog_name, Optargp); |
---|
298 | exit(1); |
---|
299 | } |
---|
300 | break; |
---|
301 | case 'P': |
---|
302 | cpp = &Optargp; |
---|
303 | Percnt_optn = TRUE; |
---|
304 | if (x = strtod(Optargp, cpp)) { |
---|
305 | Percent = x; |
---|
306 | /* printf("%8.3f\n", x); */ |
---|
307 | if (x < 0.0) { |
---|
308 | fprintf(stderr,"%s: bad par cent(-P) open \"%.1f\"\n", |
---|
309 | Prog_name, x); |
---|
310 | exit(1); |
---|
311 | } |
---|
312 | } else { |
---|
313 | fprintf(stderr,"%s: bad par cent(-P) open \"%s\"\n", |
---|
314 | Prog_name, Optargp); |
---|
315 | exit(1); |
---|
316 | } |
---|
317 | break; |
---|
318 | case 'b': Boots_optn = FALSE; break; |
---|
319 | case 'R': Relia_optn = TRUE; break; |
---|
320 | case 'T': Triad_optn = TRUE; break; |
---|
321 | case 'D': Distn_optn = TRUE; break; |
---|
322 | case 'A': Aprox_optn = TRUE; break; |
---|
323 | case 'l': |
---|
324 | Lklhd_optn = TRUE; |
---|
325 | Llsfile = new_cvector(strlen(Optargp) + strlen(LLSFILEEXT) + 1); |
---|
326 | *Llsfile = '\0'; |
---|
327 | if (strcat(Llsfile, Optargp) && strcat(Llsfile, LLSFILEEXT)) { |
---|
328 | /* printf("Llsfile is \"%s\"\n", Llsfile); */ |
---|
329 | break; |
---|
330 | } |
---|
331 | case 'L': Logfl_optn = TRUE; break; |
---|
332 | case 'i': Info_optn = TRUE; break; |
---|
333 | case 'M': Mevol_optn = TRUE; break; |
---|
334 | case 'v': Verbs_optn = TRUE; break; |
---|
335 | case 'V': Varia_optn = TRUE; break; |
---|
336 | case 'w': Write_optn = TRUE; break; |
---|
337 | case 'X': Xreli_optn = TRUE; break; |
---|
338 | case 'S': Seque_optn = TRUE; |
---|
339 | Inlvd_optn = FALSE; break; |
---|
340 | case 'I': Inlvd_optn = TRUE; |
---|
341 | Seque_optn = FALSE; break; |
---|
342 | #if 1 |
---|
343 | case 'z': Debug_optn = TRUE; break; |
---|
344 | case 'Z': Debug = TRUE; break; |
---|
345 | #endif |
---|
346 | case 'C': Ctacit_optn = TRUE; break; |
---|
347 | case 'h': |
---|
348 | case 'H': helpinfo(); exit(1); |
---|
349 | default : usage(); exit(1); |
---|
350 | } |
---|
351 | } |
---|
352 | if (Outgr_optn == 1) { |
---|
353 | printf("Outgroup1 %3d\n", Outgroup1+1); |
---|
354 | } else if (Outgr_optn == 2) { |
---|
355 | printf("Outgroup1 %3d Outgroup2 %3d\n", Outgroup1+1, Outgroup2+1); |
---|
356 | } else { |
---|
357 | } |
---|
358 | if (Exhau_optn) { |
---|
359 | if (Optindex + 2 == argc) Const_optn = TRUE; |
---|
360 | } |
---|
361 | #if TPGRAPH |
---|
362 | #else /* TPGRAPH */ |
---|
363 | if (!Aneal_optn && !Exhau_optn && !User_optn && !Quick_optn && |
---|
364 | !Quick1_optn && !Stard_optn && !Distn_optn && !Njoin_optn ) { |
---|
365 | if (Optindex == argc) { |
---|
366 | helpinfo(); exit(1); /* default !? */ |
---|
367 | } else if (Optindex + 1 == argc) { |
---|
368 | helpinfo(); exit(1); /* default !? */ |
---|
369 | } else if (Optindex + 2 == argc) { |
---|
370 | User_optn = TRUE; /* default !? */ |
---|
371 | } |
---|
372 | } |
---|
373 | #endif /* TPGRAPH */ |
---|
374 | if (num_flag) { |
---|
375 | if (Quick_optn || Quick1_optn) Numqltree = num; |
---|
376 | if (Exhau_optn) Maxaltree = num; |
---|
377 | } |
---|
378 | *Modelname = '\0'; |
---|
379 | #ifndef NUC |
---|
380 | if (Poisn_optn) { |
---|
381 | if (!Frequ_optn) strcat(Modelname, "Poisson"); |
---|
382 | else strcat(Modelname, "Proportional"); |
---|
383 | } else { |
---|
384 | if (Rrsr_optn) { |
---|
385 | strcat(Modelname, "Read RSR"); |
---|
386 | } else if (Jtt_optn) { |
---|
387 | strcat(Modelname, "JTT"); |
---|
388 | } else if (Dayhf_optn) { |
---|
389 | strcat(Modelname, "Dayhoff"); |
---|
390 | } else if (Mtrev_optn) { |
---|
391 | strcat(Modelname, MTREVNAME); |
---|
392 | } else { |
---|
393 | strcat(Modelname, "unknown"); |
---|
394 | } |
---|
395 | if (Frequ_optn) strcat(Modelname, "-F"); |
---|
396 | } |
---|
397 | #else /* NUC */ |
---|
398 | if (Poisn_optn) { |
---|
399 | if (!Frequ_optn) strcat(Modelname, "Poisson"); |
---|
400 | else strcat(Modelname, "Proportional"); |
---|
401 | AlphaBeta = 1.0; |
---|
402 | AlphaYR = 1.0; |
---|
403 | } else if (Rrsr_optn) { |
---|
404 | strcat(Modelname, "Read RSR"); |
---|
405 | if (Frequ_optn) strcat(Modelname, "-F"); |
---|
406 | } else { |
---|
407 | if (Toptim_optn) { |
---|
408 | strcat(Modelname, "A/B:opt"); |
---|
409 | if (Toptim_optn == 3 || Toptim_optn == 7) |
---|
410 | strcat(Modelname, " Ay/Ar:opt"); |
---|
411 | if (Toptim_optn >= 4) |
---|
412 | strcat(Modelname, " B1/B2:opt"); |
---|
413 | } else { |
---|
414 | sprintf(buf, "A/B:%.2f\0", AlphaBeta); |
---|
415 | strcat(Modelname, buf); |
---|
416 | if (AlphaYR != 1.0) { |
---|
417 | sprintf(buf, " Ay/Ar:%.2f", AlphaYR); |
---|
418 | strcat(Modelname, buf); |
---|
419 | } |
---|
420 | if (Beta12 != 1.0) { |
---|
421 | sprintf(buf, " B1/B2:%.2f", Beta12); |
---|
422 | strcat(Modelname, buf); |
---|
423 | } |
---|
424 | } |
---|
425 | if (Frequ_optn) strcat(Modelname, " F"); |
---|
426 | } |
---|
427 | #endif /* NUC */ |
---|
428 | /* printf("\"%s\"\n", Modelname); */ |
---|
429 | |
---|
430 | #ifdef DEBUG |
---|
431 | if (Debug) { |
---|
432 | printf("argc = %d\n",argc); |
---|
433 | for(i = 0; i < argc; i++) printf("argv[%d] = %s\n",i,argv[i]); |
---|
434 | putchar('\n'); |
---|
435 | printf("\nOptindex = %d\n",Optindex); |
---|
436 | printf("Optargp = %s\n",Optargp); |
---|
437 | } |
---|
438 | #endif |
---|
439 | |
---|
440 | Aneal_mode = 0; |
---|
441 | if (Optindex == argc) { |
---|
442 | seqfp = stdin; |
---|
443 | tplfp = stdin; |
---|
444 | Aneal_mode = 0; |
---|
445 | } else if (Optindex + 1 == argc) { |
---|
446 | if ((seqfp = fopen(argv[Optindex++], "r")) == NULL) { |
---|
447 | fprintf(stderr,"%s: can't open %s\n",Prog_name,argv[--Optindex]); |
---|
448 | exit(1); |
---|
449 | } else { |
---|
450 | tplfp = seqfp; |
---|
451 | Aneal_mode = 0; |
---|
452 | } |
---|
453 | } else if (Optindex + 2 == argc) { |
---|
454 | if ((seqfp = fopen(argv[Optindex++], "r")) == NULL) { |
---|
455 | fprintf(stderr,"%s: can't open %s\n",Prog_name,argv[--Optindex]); |
---|
456 | exit(1); |
---|
457 | } else if ((tplfp = fopen(argv[Optindex], "r")) == NULL) { |
---|
458 | if (*argv[Optindex] == '-') { |
---|
459 | tplfp = stdin; |
---|
460 | Aneal_mode = 1; |
---|
461 | } else { |
---|
462 | fprintf(stderr,"%s: can't open %s\n",Prog_name,argv[Optindex]); |
---|
463 | exit(1); |
---|
464 | } |
---|
465 | } |
---|
466 | } else { |
---|
467 | fprintf(stderr, "%s: Inconsistent number of operand in command line!\n", |
---|
468 | Prog_name); |
---|
469 | usage(); |
---|
470 | exit(1); |
---|
471 | } |
---|
472 | |
---|
473 | if (Rrsr_optn) { |
---|
474 | if ((Rtfifp = fopen(RSRINFILE, "r")) == NULL) { |
---|
475 | fprintf(stderr, |
---|
476 | "%s: can't open RSR file: %s\n",Prog_name,RSRINFILE); |
---|
477 | exit(1); |
---|
478 | } |
---|
479 | } |
---|
480 | if (Logfl_optn) { |
---|
481 | if ((Logfp = fopen(LOGFILE, "w")) == NULL) { |
---|
482 | fprintf(stderr, |
---|
483 | "%s: can't open log file: %s\n",Prog_name,LOGFILE); |
---|
484 | exit(1); |
---|
485 | } |
---|
486 | } |
---|
487 | if (Lklhd_optn) { |
---|
488 | if ((Lklfp = fopen(Llsfile, "w")) == NULL) { |
---|
489 | fprintf(stderr, |
---|
490 | "%s: can't open log likelihood file: %s\n",Prog_name,Llsfile); |
---|
491 | exit(1); |
---|
492 | } |
---|
493 | } |
---|
494 | |
---|
495 | Epsfile = new_cvector(strlen(EPSFILE) + 1); |
---|
496 | *Epsfile = '\0'; |
---|
497 | strcat(Epsfile, EPSFILE); |
---|
498 | if ((Epsfp = fopen(Epsfile, "w")) == NULL) { |
---|
499 | fprintf(stderr,"%s: can't open eps tree file: %s\n",Prog_name,Epsfile); |
---|
500 | exit(1); |
---|
501 | } |
---|
502 | if (Relia_optn || Njoin_optn) { |
---|
503 | if ((Tplfp = fopen(TPLFILE, "w")) == NULL) { |
---|
504 | fprintf(stderr,"%s: can't open tree topology file: %s\n", |
---|
505 | Prog_name,TPLFILE); |
---|
506 | exit(1); |
---|
507 | } |
---|
508 | } |
---|
509 | if (User_optn || Stard_optn || Njoin_optn) { |
---|
510 | if ((Trefp = fopen(TREFILE, "w")) == NULL) { |
---|
511 | fprintf(stderr,"%s: can't open tree file: %s\n",Prog_name,TREFILE); |
---|
512 | exit(1); |
---|
513 | } |
---|
514 | } |
---|
515 | |
---|
516 | if (Verbs_optn) { fputc('\n', stderr); copyright(); } |
---|
517 | Numexe = 1; |
---|
518 | for (Cnoexe = 0; Cnoexe < Numexe; Cnoexe++) { |
---|
519 | prologue(seqfp, stdout); |
---|
520 | if (!Distn_optn) pml(tplfp, stdout); |
---|
521 | } |
---|
522 | #if __STDC__ && DIFFTIME |
---|
523 | if (Verbs_optn) { |
---|
524 | Ct1 = time(NULL); |
---|
525 | x = difftime(Ct1, Ct0); |
---|
526 | fprintf(stderr, "elapse time %02d:%02d:%02d\n", |
---|
527 | (int)(x/3600), (int)(x/60), (int)x % 60); |
---|
528 | } |
---|
529 | #endif |
---|
530 | if (tplfp != stdin && tplfp != seqfp) fclose(tplfp); |
---|
531 | if (seqfp != stdin) fclose(seqfp); |
---|
532 | fclose(Epsfp); |
---|
533 | if (Relia_optn || Njoin_optn) fclose(Tplfp); |
---|
534 | if (User_optn || Stard_optn || Njoin_optn) fclose(Trefp); |
---|
535 | if (Lklhd_optn) fclose(Lklfp); |
---|
536 | if (Logfl_optn) fclose(Logfp); |
---|
537 | if (Rrsr_optn) fclose(Rtfifp); |
---|
538 | |
---|
539 | return 0; |
---|
540 | } |
---|
541 | |
---|
542 | |
---|
543 | void |
---|
544 | header(ofp, maxspc, numsite, commentp) |
---|
545 | FILE *ofp; |
---|
546 | int *maxspc; |
---|
547 | int *numsite; |
---|
548 | char **commentp; |
---|
549 | { |
---|
550 | char date[32]; |
---|
551 | |
---|
552 | strftime(date, 32, "%x", localtime(&Ct0)); |
---|
553 | fprintf(ofp, "%s %s (%s) %s", Prog_name, VERSION, date, Modelname); |
---|
554 | fprintf(ofp, " %d OTUs %d sites. %s\n", *maxspc, *numsite, *commentp); |
---|
555 | } /*_ header */ |
---|
556 | |
---|
557 | |
---|
558 | void |
---|
559 | headerd(ofp, maxspc, numsite, commentp) |
---|
560 | FILE *ofp; |
---|
561 | int *maxspc; |
---|
562 | int *numsite; |
---|
563 | char **commentp; |
---|
564 | { |
---|
565 | |
---|
566 | fprintf(ofp, "%d %d sites %s %s\n", |
---|
567 | *maxspc, *numsite, Modelname, *commentp); |
---|
568 | } /*_ headerd */ |
---|
569 | |
---|
570 | |
---|
571 | #ifdef NUC |
---|
572 | #include "optimtpm.c" |
---|
573 | #include "abratio.c" |
---|
574 | #endif /* NUC */ |
---|
575 | |
---|
576 | #if COMPCRITERION |
---|
577 | #include "compcrit.c" |
---|
578 | #endif /* COMPCRITERION */ |
---|
579 | |
---|
580 | |
---|
581 | static void |
---|
582 | prologue(ifp, ofp) |
---|
583 | FILE *ifp; |
---|
584 | FILE *ofp; |
---|
585 | { |
---|
586 | int i, j, k; |
---|
587 | ivector alias; |
---|
588 | double maxdis; |
---|
589 | #if TPMZEROCHECK |
---|
590 | int mini, minj; |
---|
591 | double dis, minprob; |
---|
592 | dmattpmty tpm; |
---|
593 | #endif |
---|
594 | #if 0 |
---|
595 | dmattpmty tpm; |
---|
596 | dvectpmty x; |
---|
597 | double y; |
---|
598 | #endif |
---|
599 | |
---|
600 | getsize(ifp, &Maxspc, &Maxsite, &Comment); |
---|
601 | Numsites = new_ivector(Maxspc); |
---|
602 | if (Distn_optn) { |
---|
603 | if (Maxspc < 2) exit(1); |
---|
604 | } else { |
---|
605 | if (Maxspc < 3) exit(1); |
---|
606 | } |
---|
607 | if ((User_optn || Aneal_optn || Stard_optn || Njoin_optn) && !Ctacit_optn) |
---|
608 | header(ofp, &Maxspc, &Maxsite, &Comment); |
---|
609 | if (Distn_optn && Maxspc > 2) headerd(ofp, &Maxspc, &Maxsite, &Comment); |
---|
610 | Identif = (char **)malloc((unsigned)Maxspc * sizeof(char *)); |
---|
611 | if (Identif == NULL) maerror("in prologue, Identif"); |
---|
612 | Sciname = (char **)malloc((unsigned)Maxspc * sizeof(char *)); |
---|
613 | if (Sciname == NULL) maerror("in prologue, Sciname"); |
---|
614 | Engname = (char **)malloc((unsigned)Maxspc * sizeof(char *)); |
---|
615 | if (Engname == NULL) maerror("in prologue, Engname"); |
---|
616 | Seqchar = new_cmatrix(Maxspc, Maxsite); |
---|
617 | if (Seque_optn) |
---|
618 | getseqs(ifp, Identif, Seqchar, Maxspc, Maxsite); |
---|
619 | else if (Inlvd_optn) |
---|
620 | getseqi(ifp, Identif, Seqchar, Maxspc, Maxsite); |
---|
621 | else |
---|
622 | getseq(ifp, Identif, Sciname, Engname, Seqchar, Maxspc, Maxsite); |
---|
623 | if (Debug_optn) prsequence(ofp, Identif, Seqchar, Maxspc, Maxsite); |
---|
624 | getfreqepm(Seqchar, Freqemp, Maxspc, Maxsite); |
---|
625 | alias = new_ivector(Maxsite); |
---|
626 | radixsort(Seqchar, alias, Maxspc, Maxsite, &Numptrn); |
---|
627 | Seqconint = new_imatrix(Maxspc, Numptrn); |
---|
628 | Weight = new_ivector(Numptrn); |
---|
629 | condenceseq(Seqchar, alias, Seqconint, Weight, Maxspc, Maxsite, Numptrn); |
---|
630 | convseq(Seqconint, Maxspc, Numptrn); |
---|
631 | getnumsites(Seqconint, Numsites, Weight, Maxspc, Numptrn); |
---|
632 | if (Debug_optn) |
---|
633 | prcondenceseq(Identif, Seqconint, Weight, Maxspc, Maxsite, Numptrn); |
---|
634 | free_ivector(alias); |
---|
635 | checkseq(Seqconint, Maxspc, Numptrn); |
---|
636 | if (Frequ_optn) convfreq(Freqemp); |
---|
637 | if (Toptim_optn) Topting = TRUE; |
---|
638 | #ifdef NUCxxxxxxx |
---|
639 | if (!Toptim_optn && (AlphaBeta == ALPHABETA)) abratio(); |
---|
640 | #endif /* NUC */ |
---|
641 | tranprobmat(); |
---|
642 | #if TPMZEROCHECK |
---|
643 | for (k = 1, dis = 1.0; k < 100; k++) { |
---|
644 | dis = dis / (double)k; |
---|
645 | tprobmtrx(dis, tpm); |
---|
646 | for (i = 1, minprob = 2.0; i < Tpmradix; i++) { |
---|
647 | for (j = 0; j < i; j++) { |
---|
648 | if (tpm[i][j] < minprob) { |
---|
649 | minprob = tpm[i][j]; |
---|
650 | mini = i; |
---|
651 | minj = j; |
---|
652 | } |
---|
653 | if (tpm[i][j] < 0.0) |
---|
654 | printf("%3d %20.15f %2d %2d %25.20f\n", |
---|
655 | k, dis, i, j, tpm[i][j]); |
---|
656 | } |
---|
657 | } |
---|
658 | printf("%3d %20.15f %2d %2d %25.20f\n", k, dis, mini, minj, minprob); |
---|
659 | } |
---|
660 | exit(1); |
---|
661 | #endif |
---|
662 | if (Toptim_optn) Topting = FALSE; |
---|
663 | if (Write_optn) prfreq(); |
---|
664 | |
---|
665 | Distanmat = new_dmatrix(Maxspc, Maxspc); |
---|
666 | if (Logdet_optn) |
---|
667 | lddistance(Distanmat, Seqchar, Maxspc, Maxsite); |
---|
668 | else |
---|
669 | distance(Distanmat, Seqchar, Maxspc, Maxsite); |
---|
670 | #if 1 |
---|
671 | if (Triad_optn) tridistance(Distanmat, Seqconint, Weight, Maxspc, Numptrn); |
---|
672 | #else |
---|
673 | if (Triad_optn) tridistance2(Distanmat, Seqchar, Maxspc, Maxsite); |
---|
674 | #endif |
---|
675 | if (Distn_optn) putdistance(Identif, Sciname, Engname, Distanmat, Maxspc); |
---|
676 | Maxbrnch = 2 * Maxspc - 3; |
---|
677 | Maxibrnch = Maxspc - 3; |
---|
678 | Maxpair = (Maxspc * (Maxspc - 1)) / 2; |
---|
679 | #if BRANCHULIMIT |
---|
680 | Llimit = 100.0 / Maxsite; |
---|
681 | for (i = 1, maxdis = Llimit; i < Maxspc; i++) { |
---|
682 | for (j = 0; j < i; j++) { |
---|
683 | if (Distanmat[i][j] > maxdis) maxdis = Distanmat[i][j]; |
---|
684 | } |
---|
685 | } |
---|
686 | Ulimit = maxdis; |
---|
687 | Mlimit = (Ulimit - Llimit) * 0.5; |
---|
688 | #else /* BRANCHULIMIT */ |
---|
689 | Llimit = LOWERLIMIT; |
---|
690 | Ulimit = UPPERLIMIT; |
---|
691 | Mlimit = MIDLIMIT; |
---|
692 | #endif /* BRANCHULIMIT */ |
---|
693 | #if 0 |
---|
694 | tprobmtrx(1.0, tpm); /* 1 - 10000 */ |
---|
695 | for (i = 0, y = 0.0; i < Tpmradix; i++) { |
---|
696 | for (j = 0, x[i] = 0.0; j < Tpmradix; j++) { |
---|
697 | x[i] += tpm[i][j]; |
---|
698 | if (i != j) y += Freqtpm[i] * tpm[i][j]; |
---|
699 | } |
---|
700 | } |
---|
701 | putchar('\n'); |
---|
702 | if (Tpmradix == NUMAMI) { |
---|
703 | for (i = 0; i < Tpmradix; i++) { |
---|
704 | for (j = 0; j < 10; j++) { |
---|
705 | printf("%7d", (int)(tpm[i][j]*1.0e6)); |
---|
706 | } putchar('\n'); |
---|
707 | } |
---|
708 | for (j = 0; j < 10; j++) printf("%7.4f", x[j]); putchar('\n'); |
---|
709 | putchar('\n'); |
---|
710 | for (i = 0; i < Tpmradix; i++) { |
---|
711 | for (j = 10; j < Tpmradix; j++) { |
---|
712 | printf("%7d", (int)(tpm[i][j]*1.0e6)); |
---|
713 | } putchar('\n'); |
---|
714 | } |
---|
715 | for (j = 10; j < Tpmradix; j++) printf("%7.4f", x[j]); putchar('\n'); |
---|
716 | } else { |
---|
717 | for (i = 0; i < Tpmradix; i++) { |
---|
718 | for (j = 0; j < Tpmradix; j++) { |
---|
719 | printf("%7d", (int)(tpm[i][j]*1.0e6)); |
---|
720 | } putchar('\n'); |
---|
721 | } |
---|
722 | for (j = 0; j < Tpmradix; j++) printf("%7.4f", x[j]); putchar('\n'); |
---|
723 | } |
---|
724 | printf("%20.10f\n\n", y); |
---|
725 | exit(1); |
---|
726 | #endif |
---|
727 | } |
---|
728 | |
---|
729 | |
---|
730 | #ifdef BDATE |
---|
731 | #include "bdate.c" |
---|
732 | #endif /* BDATE */ |
---|
733 | |
---|
734 | #if PTNLKL |
---|
735 | #include "ptnlkl.c" |
---|
736 | #endif /* PTNLKL */ |
---|
737 | |
---|
738 | #if TPGRAPH |
---|
739 | #include "tpgraph.c" |
---|
740 | #endif /* TPGRAPH */ |
---|
741 | |
---|
742 | |
---|
743 | #ifndef TPM |
---|
744 | void |
---|
745 | pml(ifp, ofp) |
---|
746 | FILE *ifp; |
---|
747 | FILE *ofp; |
---|
748 | { |
---|
749 | int buftree, cnospc, i; |
---|
750 | Node *rp, **poolnode, **addposition; |
---|
751 | Infoaddtree *topaddtree, *curp, *insp; |
---|
752 | char *cltree; |
---|
753 | double lkla, nt, mt; |
---|
754 | #if DISLKL |
---|
755 | int k, l; |
---|
756 | double dlkl, dislkl; |
---|
757 | dmatrix dislklhd, iprb, jprb; |
---|
758 | #endif /* DISLKL */ |
---|
759 | |
---|
760 | buftree = getbuftree(Maxspc, Identif); |
---|
761 | if (Debug) fprintf(ofp, "buftree: %d\n", buftree); |
---|
762 | Distanvec = new_dvector(Maxpair); |
---|
763 | changedistan(Distanmat, Distanvec, Maxspc); |
---|
764 | Brnlength = new_dvector(Maxbrnch); |
---|
765 | if (!Cnoexe) getproportion(&Proportion, Distanvec, Maxpair); |
---|
766 | Epsilon = EPSILON; |
---|
767 | Numspc = Maxspc; |
---|
768 | Numbrnch = Maxbrnch; |
---|
769 | Numpair = Maxpair; |
---|
770 | Numsite = Maxsite; |
---|
771 | Numibrnch = Numspc - 3; |
---|
772 | Converg = TRUE; |
---|
773 | Numit = 0; |
---|
774 | |
---|
775 | #if TPGRAPH |
---|
776 | tpgraph(); |
---|
777 | exit(1); |
---|
778 | #endif /* TPGRAPH */ |
---|
779 | |
---|
780 | if (User_optn) { /* Users trees MODE */ |
---|
781 | |
---|
782 | if (!Cnoexe) |
---|
783 | Ctree = (Tree *) new_tree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
784 | getnumtree(ifp, &Numtree); |
---|
785 | Infotrees = (Infotree *) newinfotrees(Numtree, buftree); |
---|
786 | Lklptrn = NEW_LPMATRIX(Numtree, Numptrn); |
---|
787 | Strtree = new_cvector(buftree); |
---|
788 | if (Relia_optn) { |
---|
789 | Relistat = new_ivector(Numibrnch); |
---|
790 | Reliprob = new_dmatrix(Numibrnch, 2); |
---|
791 | Relinum = new_imatrix(Numibrnch, 2); |
---|
792 | fprintf(Tplfp, "%d\n", Numtree); |
---|
793 | } |
---|
794 | if (Numtree != 1) fprintf(Trefp, "%d\n", Numtree); |
---|
795 | for (Cnotree = 0; Cnotree < Numtree; Cnotree++) { |
---|
796 | if (!Ctacit_optn && !Aprox_optn) printf("#%d\n", Cnotree + 1); |
---|
797 | getusertree(ifp, Strtree, buftree); |
---|
798 | if (Debug) puts(Strtree); |
---|
799 | constructtree(Ctree, Strtree); |
---|
800 | #if TREECHECK |
---|
801 | prtopology(Ctree); |
---|
802 | exit(1); |
---|
803 | #endif |
---|
804 | if (Debug) prcurtree(Ctree); /* */ |
---|
805 | Alklptrn = Lklptrn[Cnotree]; |
---|
806 | #ifdef NUC |
---|
807 | if (Toptim_optn) { |
---|
808 | Epsilon = REPSILON; |
---|
809 | optimtpm(); |
---|
810 | tranprobmat(); |
---|
811 | Epsilon = EPSILON; |
---|
812 | } |
---|
813 | #endif /* NUC */ |
---|
814 | |
---|
815 | #if FMLEN |
---|
816 | fmlength(Ctree, Distanmat, Maxspc); |
---|
817 | #else |
---|
818 | slslength(Ctree, Distanmat, Maxspc); |
---|
819 | /* lslength(Ctree, Distanvec, Maxspc); */ |
---|
820 | #endif |
---|
821 | if (Debug) prcurtree(Ctree); |
---|
822 | if (Debug_optn) prcurtree(Ctree); |
---|
823 | if (Aprox_optn) { |
---|
824 | initpartlkl(Ctree); |
---|
825 | aproxlkl(Ctree); |
---|
826 | praproxlkl(Ctree); |
---|
827 | putctopology(Ctree); |
---|
828 | } else { |
---|
829 | initpartlkl(Ctree); |
---|
830 | if (Ctacit_optn) aproxlkl(Ctree); |
---|
831 | rp = (Node *)mlikelihood(Ctree); |
---|
832 | /* rp = (Node *)relibranch(rp); */ |
---|
833 | mlvalue(Ctree, Infotrees); |
---|
834 | if (Debug_optn) putctopology(Ctree); |
---|
835 | if (Relia_optn) { |
---|
836 | Epsilon = REPSILON; |
---|
837 | reliabranch(Ctree); |
---|
838 | fputctopology(Tplfp, Ctree); |
---|
839 | Epsilon = EPSILON; |
---|
840 | } |
---|
841 | if (!Ctacit_optn) prtopology(Ctree); |
---|
842 | strctree(Ctree, Infotrees[Cnotree].ltplgy); |
---|
843 | if (Debug) puts(Infotrees[Cnotree].ltplgy); |
---|
844 | if (!Ctacit_optn) resulttree(Ctree); |
---|
845 | if (Cnotree == 0) pstree(Epsfp, Ctree); |
---|
846 | fputcphylogeny(Trefp, Ctree); |
---|
847 | #if VARITPM |
---|
848 | varitpm(Ctree); |
---|
849 | #endif /* VARITPM */ |
---|
850 | #if COMPCRITERION |
---|
851 | if (Ctacit_optn) compcrit(Ctree); |
---|
852 | #endif /* COMPCRITERION */ |
---|
853 | #ifdef BDATE |
---|
854 | branchdate(Ctree); |
---|
855 | #endif /* BDATE */ |
---|
856 | #if PTNLKL |
---|
857 | xpatternlklhd(Ctree); |
---|
858 | /* patternlklhd(Ctree); */ |
---|
859 | #endif /* PTNLKL */ |
---|
860 | } |
---|
861 | if (Verbs_optn) { |
---|
862 | fprintf(stderr," %d", Cnotree + 1); |
---|
863 | #if __STDC__ && DIFFTIME |
---|
864 | Ct1 = time(NULL); |
---|
865 | fprintf(stderr, "(%.0fs)", difftime(Ct1, Ct0)); |
---|
866 | #endif |
---|
867 | } |
---|
868 | } |
---|
869 | if (Ctacit_optn && Numtree == 1) printf("%.10f\n", Ctree->lklhd); |
---|
870 | if (Relia_optn) { |
---|
871 | free_ivector(Relistat); |
---|
872 | free_dmatrix(Reliprob); |
---|
873 | free_imatrix(Relinum); |
---|
874 | } |
---|
875 | if (Lklhd_optn) outlklhd(Lklptrn); |
---|
876 | if (!Aprox_optn && Numtree > 1 && (!COMPCRITERION || !Ctacit_optn)) { |
---|
877 | if (Boots_optn && Verbs_optn) fputs("\nbootstraping\n", stderr); |
---|
878 | if (Boots_optn) bootstrap(Infotrees, Lklptrn); |
---|
879 | tabletree(Infotrees, Lklptrn); |
---|
880 | if (Info_optn) tableinfo(Infotrees); |
---|
881 | } |
---|
882 | if (Verbs_optn) fputs("\n", stderr); |
---|
883 | FREE_LPMATRIX(Lklptrn); |
---|
884 | |
---|
885 | } else if (Aneal_optn) { /* Annealing MODE */ |
---|
886 | |
---|
887 | Relia_optn = TRUE; |
---|
888 | Ctree = (Tree *) new_tree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
889 | if (!Aneal_mode) getnumtree(ifp, &Numtree); |
---|
890 | if (!Numtree) Numtree = 1; /* !? */ |
---|
891 | Infotrees = (Infotree *) newinfotrees(Numtree, buftree); |
---|
892 | Lklptrn = NEW_LPMATRIX(Numtree, Numptrn); |
---|
893 | Strtree = new_cvector(buftree); |
---|
894 | if (Relia_optn) { |
---|
895 | Reliprob = new_dmatrix(Numibrnch, 2); |
---|
896 | Relinum = new_imatrix(Numibrnch, 2); |
---|
897 | } |
---|
898 | Cnotree = 0; |
---|
899 | |
---|
900 | if (!Ctacit_optn && !Aprox_optn) printf("\n"); |
---|
901 | getusertree(ifp, Strtree, buftree); |
---|
902 | if (Debug) puts(Strtree); |
---|
903 | constructtree(Ctree, Strtree); |
---|
904 | if (Debug) prcurtree(Ctree); /* */ |
---|
905 | Alklptrn = Lklptrn[Cnotree]; |
---|
906 | #ifdef NUC |
---|
907 | if (Toptim_optn) { |
---|
908 | optimtpm(); |
---|
909 | tranprobmat(); |
---|
910 | } |
---|
911 | #endif /* NUC */ |
---|
912 | #if FMLEN |
---|
913 | fmlength(Ctree, Distanmat, Maxspc); |
---|
914 | #else |
---|
915 | lslength(Ctree, Distanvec, Maxspc); |
---|
916 | #endif |
---|
917 | if (Debug) prcurtree(Ctree); |
---|
918 | if (Debug_optn) prcurtree(Ctree); |
---|
919 | initpartlkl(Ctree); |
---|
920 | rp = (Node *)mlikelihood(Ctree); |
---|
921 | mlvalue(Ctree, Infotrees); |
---|
922 | if (Debug_optn) putctopology(Ctree); |
---|
923 | |
---|
924 | if (Relia_optn) annealing(Ctree); |
---|
925 | |
---|
926 | if (!Ctacit_optn) prtopology(Ctree); |
---|
927 | strctree(Ctree, Infotrees[Cnotree].ltplgy); |
---|
928 | if (Debug) puts(Infotrees[Cnotree].ltplgy); |
---|
929 | if (!Ctacit_optn) resulttree(Ctree); |
---|
930 | |
---|
931 | if (Ctacit_optn && Numtree == 1) printf("%.10f\n", Ctree->lklhd); |
---|
932 | if (Verbs_optn) fputs("\n", stderr); |
---|
933 | if (Lklhd_optn) outlklhd(Lklptrn); |
---|
934 | if (Relia_optn) { |
---|
935 | free_dmatrix(Reliprob); |
---|
936 | free_imatrix(Relinum); |
---|
937 | } |
---|
938 | FREE_LPMATRIX(Lklptrn); |
---|
939 | |
---|
940 | } else if (Exhau_optn) { /* Exhaustive search */ |
---|
941 | |
---|
942 | Numtree = 1; |
---|
943 | Cnotree = 0; |
---|
944 | Numspc = Maxspc; |
---|
945 | Numbrnch = Maxspc; |
---|
946 | Maxibrnch = Maxspc - 3; |
---|
947 | /* Infotrees = (Infotree *) newinfotrees(1, buftree); */ |
---|
948 | /* Alklptrn = NEW_LPVECTOR(Numptrn); */ |
---|
949 | if (Maxspc < 4) { |
---|
950 | fprintf(stderr,"%s: number of OTUs is \"%d\".\n",Prog_name,Maxspc); |
---|
951 | exit(1); |
---|
952 | } |
---|
953 | if (!Const_optn) { |
---|
954 | for (i = Maxspc * 2 - 5, nt = 1.0; i > 0; i--) nt *= i; |
---|
955 | for (i = Maxspc - 3, mt = 1.0; i > 0; i--) mt *= i; |
---|
956 | for (i = Maxspc - 3; i > 0; i--) mt *= 2; |
---|
957 | nt /= mt; |
---|
958 | Numtplgy = nt; |
---|
959 | } else { |
---|
960 | Numtplgy = 1.0; |
---|
961 | } |
---|
962 | |
---|
963 | Ctree = (Tree *) new_njtree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
964 | enjtree(Ctree, Distanmat, Maxspc, TRUE); |
---|
965 | pathing(Ctree); |
---|
966 | Numibrnch = Maxibrnch; |
---|
967 | slslength(Ctree, Distanmat, Maxspc); |
---|
968 | Mintbldm = Ctree->tbldis; |
---|
969 | /* printf("TBL: %7.3f ", Mintbldm); putctopology(Ctree); */ |
---|
970 | free_njtree(Ctree, Maxspc, Maxibrnch); |
---|
971 | |
---|
972 | Ctree = (Tree *) new_njtree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
973 | enjtree(Ctree, Distanmat, Maxspc, FALSE); |
---|
974 | pathing(Ctree); |
---|
975 | Numibrnch = Maxibrnch; |
---|
976 | slslength(Ctree, Distanmat, Maxspc); |
---|
977 | Maxtbldm = Ctree->tbldis; |
---|
978 | /* printf("TBL: %7.3f ", Maxtbldm); putctopology(Ctree); */ |
---|
979 | free_njtree(Ctree, Maxspc, Maxibrnch); |
---|
980 | |
---|
981 | Poolorder = new_ivector(Maxspc); |
---|
982 | Ctree = (Tree *) new_atree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
983 | poolnode = (Node **)malloc((unsigned)Maxspc * sizeof(Node *)); |
---|
984 | if (poolnode == NULL) maerror("poolnode."); |
---|
985 | addposition = (Node **)malloc((unsigned)Maxspc * sizeof(Node *)); |
---|
986 | if (addposition == NULL) maerror("addposition."); |
---|
987 | if (!Const_optn) { /* without constrained tree */ |
---|
988 | atreeinit(Ctree, poolnode, addposition, Poolorder); |
---|
989 | } else { /* with constrained tree */ |
---|
990 | Strtree = new_cvector(buftree); |
---|
991 | getusertree(ifp, Strtree, buftree); |
---|
992 | if (Debug) puts(Strtree); |
---|
993 | streeinit(Ctree, Strtree, poolnode, addposition, Poolorder); |
---|
994 | } |
---|
995 | Infoaltrees = (Infoaltree *) newinfoaltrees(Maxaltree, buftree); |
---|
996 | |
---|
997 | if (Percnt_optn) { |
---|
998 | if (Percent > 100.0) |
---|
999 | Tblrate = 2.0; |
---|
1000 | else |
---|
1001 | Tblrate = Percent / 100.0; |
---|
1002 | } else { |
---|
1003 | if (Numtplgy <= 945) |
---|
1004 | Tblrate = TBLRATE7; |
---|
1005 | else |
---|
1006 | Tblrate = TBLRATE; |
---|
1007 | } |
---|
1008 | Basetbldm = (Maxtbldm - Mintbldm) * Tblrate + Mintbldm; |
---|
1009 | /* printf("TBL: Min%7.3f max%7.3f bese%7.3f\n", |
---|
1010 | Mintbldm, Maxtbldm, Basetbldm); */ |
---|
1011 | if (Verbs_optn) { |
---|
1012 | fprintf(stderr," %.0f possible trees, TBL limit %.1f%%\n", |
---|
1013 | Numtplgy, Tblrate*100); |
---|
1014 | if (Maxspc > 9) Numverbs = 10000; else Numverbs = 1000; |
---|
1015 | } |
---|
1016 | if (Numtplgy > 655e6) { |
---|
1017 | fprintf(stderr,"%s: too many possible trees \"%.0f\".\n", |
---|
1018 | Prog_name, Numtplgy); |
---|
1019 | exit(1); |
---|
1020 | } |
---|
1021 | |
---|
1022 | Ahead.lklaprox = 0.0; |
---|
1023 | Atail.up = &Ahead; |
---|
1024 | Tblbin = new_ivector(NUMTBLBIN); |
---|
1025 | for (i = 0; i < NUMTBLBIN; i++) Tblbin[i] = 0; |
---|
1026 | Tblunder = Tblover = 0; |
---|
1027 | Tblcoef = NUMTBLBIN / (Maxtbldm - Mintbldm + 0.00001); |
---|
1028 | if (addposition[3] == NULL) |
---|
1029 | autoconstruction(Ctree, 3, poolnode, addposition, Poolorder); |
---|
1030 | else |
---|
1031 | wedge(Ctree, 3, poolnode, addposition, Poolorder, addposition[3]); |
---|
1032 | /* if (Cnotree < Maxaltree) Numaltree = Cnotree; */ |
---|
1033 | if (Verbs_optn) fputc('\n', stderr); |
---|
1034 | if (!Aprox_optn) tablealtree(Numaltree); |
---|
1035 | free_ivector(Poolorder); |
---|
1036 | free_ivector(Tblbin); |
---|
1037 | |
---|
1038 | } else if (Stard_optn) { /* Star Decomposition MODE */ |
---|
1039 | |
---|
1040 | Numspc = Maxspc; |
---|
1041 | Numibrnch = 0; |
---|
1042 | Numbrnch = Maxspc; |
---|
1043 | Cnotree = 0; |
---|
1044 | Numtree = MAXSLBUF; |
---|
1045 | Infotrees = (Infotree *) newinfotrees(Numtree, buftree); |
---|
1046 | Lklptrn = NEW_LPMATRIX(Numtree, Numptrn); |
---|
1047 | Ctree = (Tree *) new_stree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
1048 | if (Debug) prcurtree(Ctree); /* */ |
---|
1049 | Alklptrn = Lklptrn[Cnotree]; |
---|
1050 | #if 0 |
---|
1051 | fmlength(Ctree, Distanmat, Maxspc); |
---|
1052 | if (Aprox_optn) { |
---|
1053 | initpartlkl(Ctree); |
---|
1054 | aproxlkl(Ctree); |
---|
1055 | praproxlkl(Ctree); |
---|
1056 | putctopology(Ctree); |
---|
1057 | } else { |
---|
1058 | initpartlkl(Ctree); |
---|
1059 | rp = (Node *)mlikelihood(Ctree); |
---|
1060 | mlvalue(Ctree, Infotrees); |
---|
1061 | if (Debug_optn) putctopology(Ctree); |
---|
1062 | prtopology(Ctree); |
---|
1063 | resulttree(Ctree); |
---|
1064 | } |
---|
1065 | #endif |
---|
1066 | |
---|
1067 | #if DISLKL |
---|
1068 | initpartlkl(Ctree); |
---|
1069 | dislklhd = new_dmatrix(Numspc, Numspc); |
---|
1070 | for (i = 0; i < Numspc - 1; i++) { |
---|
1071 | iprb = Ctree->ebrnchp[i]->kinp->iprob; |
---|
1072 | for (j = i + 1; j < Numspc; j++) { |
---|
1073 | jprb = Ctree->ebrnchp[j]->kinp->iprob; |
---|
1074 | for (k = 0, dislkl = 0.0; k < Numptrn; k++) { |
---|
1075 | for (l = 0, dlkl = 0.0; l < Tpmradix; l++) { |
---|
1076 | /* dlkl += fabs(iprb[k][l] - jprb[k][l]); */ |
---|
1077 | dlkl += Freqtpm[l] * iprb[k][l] * jprb[k][l]; |
---|
1078 | } |
---|
1079 | /* dislkl += dlkl * Weight[k]; */ |
---|
1080 | dislkl += log(dlkl) * Weight[k]; |
---|
1081 | } |
---|
1082 | dislkl = -dislkl; |
---|
1083 | dislkl /= Numsite; |
---|
1084 | dislklhd[i][j] = dislkl; |
---|
1085 | dislklhd[j][i] = dislkl; |
---|
1086 | } |
---|
1087 | dislklhd[i][i] = 0.0; |
---|
1088 | } |
---|
1089 | dislklhd[Numspc-1][Numspc-1] = 0.0; |
---|
1090 | /* |
---|
1091 | for (i = 0; i < Numspc; i++) { |
---|
1092 | for (j = 0; j < Numspc; j++) { |
---|
1093 | if (i != j) printf("%5.0f", dislklhd[i][j] * 1000.0); |
---|
1094 | else printf("%5.3s", Identif[i]); |
---|
1095 | } putchar('\n'); |
---|
1096 | } putchar('\n'); |
---|
1097 | */ |
---|
1098 | printf("\n%d %d\n", Numspc, Numsite); |
---|
1099 | for (i = 0; i < Numspc; i++) { |
---|
1100 | printf("%s\n", Identif[i]); |
---|
1101 | for (j = 0; j < Numspc; j++) { |
---|
1102 | printf(" %15.12f", dislklhd[i][j]); |
---|
1103 | if ((j + 1) % 5 == 0) putchar('\n'); |
---|
1104 | } if (j % 5 != 0) putchar('\n'); |
---|
1105 | } putchar('\n'); |
---|
1106 | |
---|
1107 | free_dmatrix(dislklhd); |
---|
1108 | exit(1); |
---|
1109 | #endif /* DISLKL */ |
---|
1110 | |
---|
1111 | #if 1 |
---|
1112 | xstardecomp(Ctree); |
---|
1113 | #else |
---|
1114 | stardecomp(Ctree, Maxibrnch); |
---|
1115 | #endif |
---|
1116 | pathing(Ctree); |
---|
1117 | fmlength(Ctree, Distanmat, Maxspc); |
---|
1118 | initpartlkl(Ctree); |
---|
1119 | Alklptrn = Lklptrn[Cnotree]; |
---|
1120 | rp = (Node *)mlikelihood(Ctree); |
---|
1121 | mlvalue(Ctree, Infotrees); |
---|
1122 | if (Debug_optn) putctopology(Ctree); |
---|
1123 | putchar('\n'); |
---|
1124 | prtopology(Ctree); |
---|
1125 | resulttree(Ctree); |
---|
1126 | pstree(Epsfp, Ctree); |
---|
1127 | fputcphylogeny(Trefp, Ctree); |
---|
1128 | FREE_LPMATRIX(Lklptrn); |
---|
1129 | |
---|
1130 | } else if (Njoin_optn) { /* NJ MODE */ |
---|
1131 | |
---|
1132 | Numspc = Maxspc; |
---|
1133 | Numbrnch = Maxspc; |
---|
1134 | Maxibrnch = Maxspc - 3; |
---|
1135 | Numibrnch = 0; |
---|
1136 | Cnotree = 0; |
---|
1137 | Numtree = 1; |
---|
1138 | Infotrees = (Infotree *) newinfotrees(Numtree, buftree); |
---|
1139 | Lklptrn = NEW_LPMATRIX(Numtree, Numptrn); |
---|
1140 | Alklptrn = Lklptrn[0]; |
---|
1141 | puts("NJ"); |
---|
1142 | |
---|
1143 | Ctree = (Tree *) new_njtree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
1144 | #if NJMLD |
---|
1145 | njmtree(Ctree, Distanmat, Numspc, TRUE); |
---|
1146 | #else |
---|
1147 | enjtree(Ctree, Distanmat, Numspc, TRUE); |
---|
1148 | #endif |
---|
1149 | Alklptrn = Lklptrn[Cnotree]; |
---|
1150 | initpartlkl(Ctree); |
---|
1151 | rp = (Node *)mlikelihood(Ctree); |
---|
1152 | mlvalue(Ctree, Infotrees); |
---|
1153 | if (Relia_optn) { |
---|
1154 | Relitrif = new_dvector(Maxibrnch); |
---|
1155 | Relistat = new_ivector(Numibrnch); |
---|
1156 | Reliprob = new_dmatrix(Numibrnch, 2); |
---|
1157 | Relinum = new_imatrix(Numibrnch, 2); |
---|
1158 | qlrsearch(Ctree); |
---|
1159 | putchar('\n'); |
---|
1160 | } |
---|
1161 | prtopology(Ctree); |
---|
1162 | resulttree(Ctree); |
---|
1163 | /* |
---|
1164 | initpartlkl(Ctree); |
---|
1165 | rp = (Node *)mlikelihood(Ctree); |
---|
1166 | mlvalue(Ctree, Infotrees); |
---|
1167 | if (Debug_optn) putctopology(Ctree); |
---|
1168 | putchar('\n'); |
---|
1169 | prtopology(Ctree); |
---|
1170 | resulttree(Ctree); |
---|
1171 | putchar('\n'); |
---|
1172 | putctopology(Ctree); |
---|
1173 | */ |
---|
1174 | pstree(Epsfp, Ctree); |
---|
1175 | fprintf(Tplfp, "%d\n", Numtree); |
---|
1176 | fputctopology(Tplfp, Ctree); |
---|
1177 | fprintf(Trefp, "%d\n", Numtree); |
---|
1178 | fputcphylogeny(Trefp, Ctree); |
---|
1179 | FREE_LPMATRIX(Lklptrn); |
---|
1180 | if (Relia_optn) { |
---|
1181 | free_dvector(Relitrif); |
---|
1182 | free_ivector(Relistat); |
---|
1183 | free_dmatrix(Reliprob); |
---|
1184 | free_imatrix(Relinum); |
---|
1185 | } |
---|
1186 | /* |
---|
1187 | sorttree(Ctree, Ctree->rootp); |
---|
1188 | prtopology(Ctree); |
---|
1189 | putsortseq(Ctree); |
---|
1190 | */ |
---|
1191 | |
---|
1192 | } else { /* quick MODE */ |
---|
1193 | |
---|
1194 | Numtree = 2; |
---|
1195 | Lklptrn = NEW_LPMATRIX(Numqltree, Numptrn); |
---|
1196 | Ctree = (Tree *) new_tree(Maxspc, Maxibrnch, Numptrn, Seqconint); |
---|
1197 | Strtree = new_cvector(buftree); |
---|
1198 | Infoqltrees = (Infoqltree *) newinfoqltrees(MAXQLBUF, Maxbrnch); |
---|
1199 | Qhead = (Infoqltree *) malloc(sizeof(Infoqltree)); |
---|
1200 | if (Qhead == NULL) maerror("Qhead in pml()."); |
---|
1201 | Qhead->lklaprox = 0.0; |
---|
1202 | Qhead->residual = 0.0; |
---|
1203 | for (Cnotree = 0; Cnotree < Numqltree; Cnotree++) { |
---|
1204 | Alklptrn = Lklptrn[Cnotree]; |
---|
1205 | if (Cnotree == 0) |
---|
1206 | initturn(Ctree); |
---|
1207 | else |
---|
1208 | randturn(Ctree); |
---|
1209 | qtreeinit(Ctree); |
---|
1210 | for (cnospc = 3; cnospc < Maxspc; cnospc++) { |
---|
1211 | Numspc = cnospc + 1; |
---|
1212 | Numibrnch = Numspc - 3; |
---|
1213 | Numbrnch = Numspc + Numibrnch; |
---|
1214 | Numpair = (Numspc * (Numspc - 1)) / 2; |
---|
1215 | convertdistan(Ctree, cnospc+1, Distanmat, Distanvec); |
---|
1216 | Qtail = Qhead; |
---|
1217 | roundtree(Ctree, cnospc, Infoqltrees, Qhead, Qtail); |
---|
1218 | } |
---|
1219 | #if 0 |
---|
1220 | pathing(Ctree); |
---|
1221 | fmlength(Ctree, Distanmat, Maxspc); |
---|
1222 | initpartlkl(Ctree); |
---|
1223 | rp = (Node *)mlikelihood(Ctree); |
---|
1224 | /* mlvalue(Ctree, Infotrees); */ |
---|
1225 | prtopology(Ctree); |
---|
1226 | resulttree(Ctree); |
---|
1227 | #endif |
---|
1228 | /* rerootq(Ctree, Numspc); */ |
---|
1229 | |
---|
1230 | for (i = 0; Ctree->bturn[i] != Numspc-1 && i < Numspc; i++) |
---|
1231 | ; |
---|
1232 | reroot(Ctree, Ctree->ebrnchp[i]->kinp); |
---|
1233 | |
---|
1234 | if (Aprox_optn) initpartlkl(Ctree); |
---|
1235 | if (Aprox_optn) praproxlkl(Ctree); |
---|
1236 | if (Aprox_optn) putctopology(Ctree); |
---|
1237 | if (Cnotree == 0) { |
---|
1238 | topaddtree = (Infoaddtree *)newinfoaddtree(buftree); |
---|
1239 | strctree(Ctree, topaddtree->ltplgy); |
---|
1240 | topaddtree->lklaprox = Ctree->lklhd; |
---|
1241 | topaddtree->frequency = 1; |
---|
1242 | cltree = (char *)malloc((unsigned)buftree * sizeof(char)); |
---|
1243 | if (cltree == NULL) maerror("cltree in protml.c"); |
---|
1244 | if (Debug) puts(topaddtree->ltplgy); |
---|
1245 | Numaddtree = 1; |
---|
1246 | } else { |
---|
1247 | strctree(Ctree, cltree); |
---|
1248 | lkla = Ctree->lklhd; |
---|
1249 | insp = topaddtree; |
---|
1250 | for (curp = topaddtree; curp != NULL; curp = curp->dp) { |
---|
1251 | if (strcmp(cltree, curp->ltplgy) == 0) { /* same */ |
---|
1252 | curp->frequency++; |
---|
1253 | break; |
---|
1254 | } |
---|
1255 | if (lkla < curp->lklaprox) insp = curp; |
---|
1256 | } |
---|
1257 | if (curp == NULL) { |
---|
1258 | curp = (Infoaddtree *)newinfoaddtree(buftree); |
---|
1259 | curp->dp = insp->dp; |
---|
1260 | insp->dp = curp; |
---|
1261 | (void)strcpy(curp->ltplgy, cltree); |
---|
1262 | curp->lklaprox = lkla; |
---|
1263 | curp->frequency = 1; |
---|
1264 | Numaddtree++; |
---|
1265 | } |
---|
1266 | } |
---|
1267 | } |
---|
1268 | if (!Aprox_optn) tableaddtree(topaddtree, Numaddtree); |
---|
1269 | FREE_LPMATRIX(Lklptrn); |
---|
1270 | } |
---|
1271 | |
---|
1272 | free_cvector(Comment); |
---|
1273 | free_cmatrix(Identif); |
---|
1274 | free_cmatrix(Sciname); |
---|
1275 | free_cmatrix(Engname); |
---|
1276 | free_imatrix(Seqconint); |
---|
1277 | free_ivector(Weight); |
---|
1278 | free_cmatrix(Seqchar); |
---|
1279 | free_dvector(Distanvec); |
---|
1280 | free_dmatrix(Distanmat); |
---|
1281 | free_dvector(Brnlength); |
---|
1282 | } |
---|
1283 | #endif /* TPM */ |
---|