1 | /* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees |
---|
2 | * Copyright August 2006 by Alexandros Stamatakis |
---|
3 | * |
---|
4 | * Partially derived from |
---|
5 | * fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen |
---|
6 | * |
---|
7 | * and |
---|
8 | * |
---|
9 | * Programs of the PHYLIP package by Joe Felsenstein. |
---|
10 | * |
---|
11 | * This program is free software; you may redistribute it and/or modify its |
---|
12 | * under the terms of the GNU General Public License as published by the Free |
---|
13 | * Software Foundation; either version 2 of the License, or (at your option) |
---|
14 | * any later version. |
---|
15 | * |
---|
16 | * This program is distributed in the hope that it will be useful, but |
---|
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
18 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
19 | * for more details. |
---|
20 | * |
---|
21 | * |
---|
22 | * For any other enquiries send an Email to Alexandros Stamatakis |
---|
23 | * Alexandros.Stamatakis@epfl.ch |
---|
24 | * |
---|
25 | * When publishing work that is based on the results from RAxML-VI-HPC please cite: |
---|
26 | * |
---|
27 | * Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". |
---|
28 | * Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446 |
---|
29 | */ |
---|
30 | |
---|
31 | #ifdef WIN32 |
---|
32 | #include <direct.h> |
---|
33 | #endif |
---|
34 | |
---|
35 | #ifndef WIN32 |
---|
36 | #include <sys/times.h> |
---|
37 | #include <sys/types.h> |
---|
38 | #include <sys/time.h> |
---|
39 | #include <unistd.h> |
---|
40 | #endif |
---|
41 | |
---|
42 | #include <math.h> |
---|
43 | #include <time.h> |
---|
44 | #include <stdlib.h> |
---|
45 | #include <stdio.h> |
---|
46 | #include <ctype.h> |
---|
47 | #include <string.h> |
---|
48 | #include <stdarg.h> |
---|
49 | #include <limits.h> |
---|
50 | |
---|
51 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
52 | #include <mpi.h> |
---|
53 | #endif |
---|
54 | |
---|
55 | |
---|
56 | |
---|
57 | #ifdef _USE_PTHREADS |
---|
58 | #include <pthread.h> |
---|
59 | |
---|
60 | #endif |
---|
61 | |
---|
62 | #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC)) |
---|
63 | #include <xmmintrin.h> |
---|
64 | /* |
---|
65 | special bug fix, enforces denormalized numbers to be flushed to zero, |
---|
66 | without this program is a tiny bit faster though. |
---|
67 | #include <emmintrin.h> |
---|
68 | #define MM_DAZ_MASK 0x0040 |
---|
69 | #define MM_DAZ_ON 0x0040 |
---|
70 | #define MM_DAZ_OFF 0x0000 |
---|
71 | */ |
---|
72 | #endif |
---|
73 | |
---|
74 | #include "axml.h" |
---|
75 | #include "globalVariables.h" |
---|
76 | |
---|
77 | |
---|
78 | #define _PORTABLE_PTHREADS |
---|
79 | |
---|
80 | |
---|
81 | /***************** UTILITY FUNCTIONS **************************/ |
---|
82 | |
---|
83 | |
---|
84 | double FABS(double x) |
---|
85 | { |
---|
86 | /* if(x < -1.0E-10) |
---|
87 | assert(0);*/ |
---|
88 | |
---|
89 | /* if(x < 0.0) |
---|
90 | printf("%1.40f\n", x); */ |
---|
91 | |
---|
92 | return fabs(x); |
---|
93 | } |
---|
94 | |
---|
95 | |
---|
96 | |
---|
97 | |
---|
98 | |
---|
99 | FILE *getNumberOfTrees(tree *tr, char *fileName, analdef *adef) |
---|
100 | { |
---|
101 | FILE |
---|
102 | *f = myfopen(fileName, "r"); |
---|
103 | |
---|
104 | int |
---|
105 | trees = 0, |
---|
106 | ch; |
---|
107 | |
---|
108 | while((ch = fgetc(f)) != EOF) |
---|
109 | if(ch == ';') |
---|
110 | trees++; |
---|
111 | |
---|
112 | assert(trees > 0); |
---|
113 | |
---|
114 | tr->numberOfTrees = trees; |
---|
115 | |
---|
116 | if(!adef->allInOne) |
---|
117 | printBothOpen("\n\nFound %d trees in File %s\n\n", trees, fileName); |
---|
118 | |
---|
119 | |
---|
120 | rewind(f); |
---|
121 | |
---|
122 | return f; |
---|
123 | } |
---|
124 | |
---|
125 | static void printBoth(FILE *f, const char* format, ... ) |
---|
126 | { |
---|
127 | va_list args; |
---|
128 | va_start(args, format); |
---|
129 | vfprintf(f, format, args ); |
---|
130 | va_end(args); |
---|
131 | |
---|
132 | va_start(args, format); |
---|
133 | vprintf(format, args ); |
---|
134 | va_end(args); |
---|
135 | } |
---|
136 | |
---|
137 | void printBothOpen(const char* format, ... ) |
---|
138 | { |
---|
139 | #ifdef _QUARTET_MPI |
---|
140 | if(processID == 0) |
---|
141 | #endif |
---|
142 | { |
---|
143 | FILE *f = myfopen(infoFileName, "ab"); |
---|
144 | |
---|
145 | va_list args; |
---|
146 | va_start(args, format); |
---|
147 | vfprintf(f, format, args ); |
---|
148 | va_end(args); |
---|
149 | |
---|
150 | va_start(args, format); |
---|
151 | vprintf(format, args ); |
---|
152 | va_end(args); |
---|
153 | |
---|
154 | fclose(f); |
---|
155 | } |
---|
156 | } |
---|
157 | |
---|
158 | void printBothOpenMPI(const char* format, ... ) |
---|
159 | { |
---|
160 | #ifdef _WAYNE_MPI |
---|
161 | if(processID == 0) |
---|
162 | #endif |
---|
163 | { |
---|
164 | FILE *f = myfopen(infoFileName, "ab"); |
---|
165 | |
---|
166 | va_list args; |
---|
167 | va_start(args, format); |
---|
168 | vfprintf(f, format, args ); |
---|
169 | va_end(args); |
---|
170 | |
---|
171 | va_start(args, format); |
---|
172 | vprintf(format, args ); |
---|
173 | va_end(args); |
---|
174 | |
---|
175 | fclose(f); |
---|
176 | } |
---|
177 | } |
---|
178 | |
---|
179 | |
---|
180 | boolean getSmoothFreqs(int dataType) |
---|
181 | { |
---|
182 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
183 | |
---|
184 | return pLengths[dataType].smoothFrequencies; |
---|
185 | } |
---|
186 | |
---|
187 | const unsigned int *getBitVector(int dataType) |
---|
188 | { |
---|
189 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
190 | |
---|
191 | return pLengths[dataType].bitVector; |
---|
192 | } |
---|
193 | |
---|
194 | |
---|
195 | int getStates(int dataType) |
---|
196 | { |
---|
197 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
198 | |
---|
199 | return pLengths[dataType].states; |
---|
200 | } |
---|
201 | |
---|
202 | unsigned char getUndetermined(int dataType) |
---|
203 | { |
---|
204 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
205 | |
---|
206 | return pLengths[dataType].undetermined; |
---|
207 | } |
---|
208 | |
---|
209 | |
---|
210 | |
---|
211 | char getInverseMeaning(int dataType, unsigned char state) |
---|
212 | { |
---|
213 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
214 | |
---|
215 | return pLengths[dataType].inverseMeaning[state]; |
---|
216 | } |
---|
217 | |
---|
218 | partitionLengths *getPartitionLengths(pInfo *p) |
---|
219 | { |
---|
220 | int |
---|
221 | dataType = p->dataType, |
---|
222 | states = p->states, |
---|
223 | tipLength = p->maxTipStates; |
---|
224 | |
---|
225 | assert(states != -1 && tipLength != -1); |
---|
226 | |
---|
227 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
228 | |
---|
229 | pLength.leftLength = pLength.rightLength = states * states; |
---|
230 | pLength.eignLength = states -1; |
---|
231 | pLength.evLength = states * states; |
---|
232 | pLength.eiLength = states * states - states; |
---|
233 | pLength.substRatesLength = (states * states - states) / 2; |
---|
234 | pLength.frequenciesLength = states; |
---|
235 | pLength.tipVectorLength = tipLength * states; |
---|
236 | pLength.symmetryVectorLength = (states * states - states) / 2; |
---|
237 | pLength.frequencyGroupingLength = states; |
---|
238 | pLength.nonGTR = FALSE; |
---|
239 | |
---|
240 | return (&pLengths[dataType]); |
---|
241 | } |
---|
242 | |
---|
243 | |
---|
244 | |
---|
245 | static boolean isCat(analdef *adef) |
---|
246 | { |
---|
247 | if(adef->model == M_PROTCAT || adef->model == M_GTRCAT || adef->model == M_BINCAT || adef->model == M_32CAT || adef->model == M_64CAT) |
---|
248 | return TRUE; |
---|
249 | else |
---|
250 | return FALSE; |
---|
251 | } |
---|
252 | |
---|
253 | static boolean isGamma(analdef *adef) |
---|
254 | { |
---|
255 | if(adef->model == M_PROTGAMMA || adef->model == M_GTRGAMMA || adef->model == M_BINGAMMA || |
---|
256 | adef->model == M_32GAMMA || adef->model == M_64GAMMA) |
---|
257 | return TRUE; |
---|
258 | else |
---|
259 | return FALSE; |
---|
260 | } |
---|
261 | |
---|
262 | |
---|
263 | static int stateAnalyzer(tree *tr, int model, int maxStates) |
---|
264 | { |
---|
265 | boolean |
---|
266 | counter[256], |
---|
267 | previous, |
---|
268 | inputError = FALSE; |
---|
269 | |
---|
270 | int |
---|
271 | lower = tr->partitionData[model].lower, |
---|
272 | upper = tr->partitionData[model].upper, |
---|
273 | i, |
---|
274 | j, |
---|
275 | states = 0; |
---|
276 | |
---|
277 | for(i = 0; i < 256; i++) |
---|
278 | counter[i] = FALSE; |
---|
279 | |
---|
280 | for(i = 0; i < tr->rdta->numsp; i++) |
---|
281 | { |
---|
282 | unsigned char *yptr = &(tr->rdta->y0[((size_t)i) * ((size_t)tr->originalCrunchedLength)]); |
---|
283 | |
---|
284 | for(j = lower; j < upper; j++) |
---|
285 | if(yptr[j] != getUndetermined(GENERIC_32)) |
---|
286 | counter[yptr[j]] = TRUE; |
---|
287 | |
---|
288 | } |
---|
289 | |
---|
290 | for(i = 0; i < maxStates; i++) |
---|
291 | { |
---|
292 | if(counter[i]) |
---|
293 | states++; |
---|
294 | } |
---|
295 | |
---|
296 | |
---|
297 | previous = counter[0]; |
---|
298 | |
---|
299 | for(i = 1; i < 256; i++) |
---|
300 | { |
---|
301 | if(previous == FALSE && counter[i] == TRUE) |
---|
302 | { |
---|
303 | inputError = TRUE; |
---|
304 | break; |
---|
305 | } |
---|
306 | else |
---|
307 | { |
---|
308 | if(previous == TRUE && counter[i] == FALSE) |
---|
309 | previous = FALSE; |
---|
310 | } |
---|
311 | } |
---|
312 | |
---|
313 | if(inputError) |
---|
314 | { |
---|
315 | printf("Multi State Error, characters must be used in the order they are available, i.e.\n"); |
---|
316 | printf("0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V\n"); |
---|
317 | printf("You are using the following characters: \n"); |
---|
318 | for(i = 0; i < 256; i++) |
---|
319 | if(counter[i]) |
---|
320 | printf("%c ", inverseMeaningGeneric32[i]); |
---|
321 | printf("\n"); |
---|
322 | exit(-1); |
---|
323 | } |
---|
324 | |
---|
325 | return states; |
---|
326 | } |
---|
327 | |
---|
328 | |
---|
329 | |
---|
330 | |
---|
331 | static void setRateHetAndDataIncrement(tree *tr, analdef *adef) |
---|
332 | { |
---|
333 | int model; |
---|
334 | |
---|
335 | if(isCat(adef)) |
---|
336 | tr->rateHetModel = CAT; |
---|
337 | else |
---|
338 | { |
---|
339 | if(adef->useInvariant) |
---|
340 | tr->rateHetModel = GAMMA_I; |
---|
341 | else |
---|
342 | tr->rateHetModel = GAMMA; |
---|
343 | } |
---|
344 | |
---|
345 | switch(tr->rateHetModel) |
---|
346 | { |
---|
347 | case GAMMA: |
---|
348 | case GAMMA_I: |
---|
349 | tr->discreteRateCategories = 4; |
---|
350 | break; |
---|
351 | case CAT: |
---|
352 | if((adef->boot && !adef->bootstrapBranchLengths) || (adef->mode == CLASSIFY_ML) || (tr->catOnly)) |
---|
353 | tr->discreteRateCategories = 1; |
---|
354 | else |
---|
355 | tr->discreteRateCategories = 4; |
---|
356 | break; |
---|
357 | default: |
---|
358 | assert(0); |
---|
359 | } |
---|
360 | |
---|
361 | if(adef->bootstrapBranchLengths) |
---|
362 | assert(tr->discreteRateCategories == 4); |
---|
363 | |
---|
364 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
365 | { |
---|
366 | int |
---|
367 | states = -1, |
---|
368 | maxTipStates = getUndetermined(tr->partitionData[model].dataType) + 1; |
---|
369 | |
---|
370 | switch(tr->partitionData[model].dataType) |
---|
371 | { |
---|
372 | case BINARY_DATA: |
---|
373 | case DNA_DATA: |
---|
374 | case AA_DATA: |
---|
375 | case SECONDARY_DATA: |
---|
376 | case SECONDARY_DATA_6: |
---|
377 | case SECONDARY_DATA_7: |
---|
378 | states = getStates(tr->partitionData[model].dataType); |
---|
379 | break; |
---|
380 | case GENERIC_32: |
---|
381 | case GENERIC_64: |
---|
382 | states = stateAnalyzer(tr, model, getStates(tr->partitionData[model].dataType)); |
---|
383 | break; |
---|
384 | default: |
---|
385 | assert(0); |
---|
386 | } |
---|
387 | |
---|
388 | tr->partitionData[model].states = states; |
---|
389 | tr->partitionData[model].maxTipStates = maxTipStates; |
---|
390 | } |
---|
391 | } |
---|
392 | |
---|
393 | |
---|
394 | double gettime(void) |
---|
395 | { |
---|
396 | #ifdef WIN32 |
---|
397 | time_t tp; |
---|
398 | struct tm localtm; |
---|
399 | tp = time(NULL); |
---|
400 | localtm = *localtime(&tp); |
---|
401 | return 60.0*localtm.tm_min + localtm.tm_sec; |
---|
402 | #else |
---|
403 | struct timeval ttime; |
---|
404 | gettimeofday(&ttime , NULL); |
---|
405 | return ttime.tv_sec + ttime.tv_usec * 0.000001; |
---|
406 | #endif |
---|
407 | } |
---|
408 | |
---|
409 | int gettimeSrand(void) |
---|
410 | { |
---|
411 | #ifdef WIN32 |
---|
412 | time_t tp; |
---|
413 | struct tm localtm; |
---|
414 | tp = time(NULL); |
---|
415 | localtm = *localtime(&tp); |
---|
416 | return 24*60*60*localtm.tm_yday + 60*60*localtm.tm_hour + 60*localtm.tm_min + localtm.tm_sec; |
---|
417 | #else |
---|
418 | struct timeval ttime; |
---|
419 | gettimeofday(&ttime , NULL); |
---|
420 | return ttime.tv_sec + ttime.tv_usec; |
---|
421 | #endif |
---|
422 | } |
---|
423 | |
---|
424 | double randum (long *seed) |
---|
425 | { |
---|
426 | long sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2; |
---|
427 | double res; |
---|
428 | |
---|
429 | mult0 = 1549; |
---|
430 | seed0 = *seed & 4095; |
---|
431 | sum = mult0 * seed0; |
---|
432 | newseed0 = sum & 4095; |
---|
433 | sum >>= 12; |
---|
434 | seed1 = (*seed >> 12) & 4095; |
---|
435 | mult1 = 406; |
---|
436 | sum += mult0 * seed1 + mult1 * seed0; |
---|
437 | newseed1 = sum & 4095; |
---|
438 | sum >>= 12; |
---|
439 | seed2 = (*seed >> 24) & 255; |
---|
440 | sum += mult0 * seed2 + mult1 * seed1; |
---|
441 | newseed2 = sum & 255; |
---|
442 | |
---|
443 | *seed = newseed2 << 24 | newseed1 << 12 | newseed0; |
---|
444 | res = 0.00390625 * (newseed2 + 0.000244140625 * (newseed1 + 0.000244140625 * newseed0)); |
---|
445 | |
---|
446 | return res; |
---|
447 | } |
---|
448 | |
---|
449 | int filexists(char *filename) |
---|
450 | { |
---|
451 | FILE *fp; |
---|
452 | int res; |
---|
453 | fp = fopen(filename,"rb"); |
---|
454 | |
---|
455 | if(fp) |
---|
456 | { |
---|
457 | res = 1; |
---|
458 | fclose(fp); |
---|
459 | } |
---|
460 | else |
---|
461 | res = 0; |
---|
462 | |
---|
463 | return res; |
---|
464 | } |
---|
465 | |
---|
466 | |
---|
467 | FILE *myfopen(const char *path, const char *mode) |
---|
468 | { |
---|
469 | FILE *fp = fopen(path, mode); |
---|
470 | |
---|
471 | if(strcmp(mode,"r") == 0 || strcmp(mode,"rb") == 0) |
---|
472 | { |
---|
473 | if(fp) |
---|
474 | return fp; |
---|
475 | else |
---|
476 | { |
---|
477 | if(processID == 0) |
---|
478 | printf("The file %s you want to open for reading does not exist, exiting ...\n", path); |
---|
479 | errorExit(-1); |
---|
480 | return (FILE *)NULL; |
---|
481 | } |
---|
482 | } |
---|
483 | else |
---|
484 | { |
---|
485 | if(fp) |
---|
486 | return fp; |
---|
487 | else |
---|
488 | { |
---|
489 | if(processID == 0) |
---|
490 | printf("The file %s RAxML wants to open for writing or appending can not be opened [mode: %s], exiting ...\n", |
---|
491 | path, mode); |
---|
492 | errorExit(-1); |
---|
493 | return (FILE *)NULL; |
---|
494 | } |
---|
495 | } |
---|
496 | |
---|
497 | |
---|
498 | } |
---|
499 | |
---|
500 | |
---|
501 | |
---|
502 | |
---|
503 | |
---|
504 | /********************* END UTILITY FUNCTIONS ********************/ |
---|
505 | |
---|
506 | |
---|
507 | /******************************some functions for the likelihood computation ****************************/ |
---|
508 | |
---|
509 | |
---|
510 | boolean isTip(int number, int maxTips) |
---|
511 | { |
---|
512 | assert(number > 0); |
---|
513 | |
---|
514 | if(number <= maxTips) |
---|
515 | return TRUE; |
---|
516 | else |
---|
517 | return FALSE; |
---|
518 | } |
---|
519 | |
---|
520 | |
---|
521 | |
---|
522 | |
---|
523 | |
---|
524 | |
---|
525 | |
---|
526 | |
---|
527 | void getxnode (nodeptr p) |
---|
528 | { |
---|
529 | nodeptr s; |
---|
530 | |
---|
531 | if ((s = p->next)->x || (s = s->next)->x) |
---|
532 | { |
---|
533 | p->x = s->x; |
---|
534 | s->x = 0; |
---|
535 | } |
---|
536 | |
---|
537 | assert(p->x); |
---|
538 | } |
---|
539 | |
---|
540 | |
---|
541 | |
---|
542 | |
---|
543 | |
---|
544 | void hookup (nodeptr p, nodeptr q, double *z, int numBranches) |
---|
545 | { |
---|
546 | int i; |
---|
547 | |
---|
548 | p->back = q; |
---|
549 | q->back = p; |
---|
550 | |
---|
551 | for(i = 0; i < numBranches; i++) |
---|
552 | p->z[i] = q->z[i] = z[i]; |
---|
553 | } |
---|
554 | |
---|
555 | void hookupDefault (nodeptr p, nodeptr q, int numBranches) |
---|
556 | { |
---|
557 | int i; |
---|
558 | |
---|
559 | p->back = q; |
---|
560 | q->back = p; |
---|
561 | |
---|
562 | for(i = 0; i < numBranches; i++) |
---|
563 | p->z[i] = q->z[i] = defaultz; |
---|
564 | } |
---|
565 | |
---|
566 | |
---|
567 | /***********************reading and initializing input ******************/ |
---|
568 | |
---|
569 | static void rax_getline_insptr_valid(char **lineptr, size_t *n, size_t ins_ptr ) |
---|
570 | { |
---|
571 | const size_t |
---|
572 | n_inc = 1024; |
---|
573 | |
---|
574 | if(ins_ptr >= *n) |
---|
575 | { |
---|
576 | assert( *n <= (SSIZE_MAX - n_inc)); |
---|
577 | |
---|
578 | *n += n_inc; |
---|
579 | |
---|
580 | *lineptr = (char*)rax_realloc((void*)(*lineptr), *n * sizeof(char), FALSE); |
---|
581 | |
---|
582 | assert(*lineptr != 0); |
---|
583 | } |
---|
584 | } |
---|
585 | |
---|
586 | static ssize_t rax_getline(char **lineptr, size_t *n, FILE *h) |
---|
587 | { |
---|
588 | size_t |
---|
589 | ins_ptr = 0; |
---|
590 | |
---|
591 | /* this implementation does not conform to the standard regarding error checking (i.e., asserts on errors ) */ |
---|
592 | |
---|
593 | assert(h != (FILE*)NULL); |
---|
594 | |
---|
595 | if(*lineptr == (char *)NULL) |
---|
596 | *n = 0; |
---|
597 | |
---|
598 | while(1) |
---|
599 | { |
---|
600 | int |
---|
601 | c = fgetc(h); |
---|
602 | |
---|
603 | /* handle EOF: if no character has been read on the current line throw an error. |
---|
604 | Otherwise treat as end-of-line. Don't know if this is correct, |
---|
605 | as I don't have the POSIX standard and the linux manpage is unclear. */ |
---|
606 | |
---|
607 | if(c == EOF) |
---|
608 | { |
---|
609 | if(ins_ptr == 0) |
---|
610 | return -1; |
---|
611 | else |
---|
612 | break; |
---|
613 | } |
---|
614 | |
---|
615 | if(c == '\r') |
---|
616 | { |
---|
617 | //this is the original GNU implementation |
---|
618 | /* windows line-end: must be followed by a '\n'. Don't tolerate anything else. */ |
---|
619 | //c = fgetc(h); |
---|
620 | //assert(c == '\n'); |
---|
621 | |
---|
622 | //fixed to essentialy replace windows line endings by '\n' |
---|
623 | c = '\n'; |
---|
624 | } |
---|
625 | |
---|
626 | /* insert character (including '\n') into buffer */ |
---|
627 | rax_getline_insptr_valid(lineptr, n, ins_ptr); |
---|
628 | (*lineptr)[ins_ptr] = c; |
---|
629 | ++ins_ptr; |
---|
630 | |
---|
631 | if(c == '\n') |
---|
632 | break; |
---|
633 | } |
---|
634 | |
---|
635 | /* null-terminate */ |
---|
636 | rax_getline_insptr_valid( lineptr, n, ins_ptr ); |
---|
637 | (*lineptr)[ins_ptr] = 0; |
---|
638 | |
---|
639 | return ((ssize_t)ins_ptr); |
---|
640 | } |
---|
641 | |
---|
642 | |
---|
643 | static void getnums (rawdata *rdta, analdef *adef) |
---|
644 | { |
---|
645 | if(fscanf(INFILE, "%d %d", & rdta->numsp, & rdta->sites) != 2) |
---|
646 | { |
---|
647 | char |
---|
648 | *line = NULL; |
---|
649 | |
---|
650 | size_t |
---|
651 | len = 0; |
---|
652 | |
---|
653 | ssize_t |
---|
654 | read; |
---|
655 | |
---|
656 | int |
---|
657 | sequenceLength = 0, |
---|
658 | sequences = 0, |
---|
659 | taxa = 0, |
---|
660 | sites =0; |
---|
661 | |
---|
662 | if(processID == 0) |
---|
663 | { |
---|
664 | printf("\nRAxML can't, parse the alignment file as phylip file \n"); |
---|
665 | printf("it will now try to parse it as FASTA file\n\n"); |
---|
666 | } |
---|
667 | |
---|
668 | while((read = rax_getline(&line, &len, INFILE)) != -1) |
---|
669 | { |
---|
670 | ssize_t |
---|
671 | i = 0; |
---|
672 | |
---|
673 | while((i < read - 1) && (line[i] == ' ' || line[i] == '\t')) |
---|
674 | i++; |
---|
675 | |
---|
676 | if(line[i] == '>') |
---|
677 | { |
---|
678 | if(taxa == 1) |
---|
679 | sequenceLength = sites; |
---|
680 | |
---|
681 | if(taxa > 0) |
---|
682 | { |
---|
683 | if(sites == 0 && processID == 0) |
---|
684 | { |
---|
685 | printf("Fasta parsing error, RAxML was expecting sequence data before: %s\n", line); |
---|
686 | errorExit(-1); |
---|
687 | } |
---|
688 | assert(sites > 0); |
---|
689 | sequences++; |
---|
690 | } |
---|
691 | |
---|
692 | if(taxa > 0) |
---|
693 | { |
---|
694 | if(sequenceLength != sites && processID == 0) |
---|
695 | { |
---|
696 | printf("Fasta parsing error, RAxML expects an alignment.\n"); |
---|
697 | printf("the sequence before taxon %s: seems to have a different length\n", line); |
---|
698 | errorExit(-1); |
---|
699 | } |
---|
700 | assert(sequenceLength == sites); |
---|
701 | } |
---|
702 | |
---|
703 | taxa++; |
---|
704 | |
---|
705 | sites = 0; |
---|
706 | } |
---|
707 | else |
---|
708 | { |
---|
709 | while(i < read - 1) |
---|
710 | { |
---|
711 | if(!(line[i] == ' ' || line[i] == '\t')) |
---|
712 | { |
---|
713 | sites++; |
---|
714 | } |
---|
715 | i++; |
---|
716 | } |
---|
717 | } |
---|
718 | } |
---|
719 | |
---|
720 | if(sites > 0) |
---|
721 | sequences++; |
---|
722 | if(taxa != sequences && processID == 0) |
---|
723 | { |
---|
724 | printf("Fasta parsing error, the number of taxa %d and sequences %d are not equal!\n", taxa, sequences); |
---|
725 | errorExit(-1); |
---|
726 | } |
---|
727 | assert(taxa == sequences); |
---|
728 | |
---|
729 | if(sequenceLength != sites && processID == 0) |
---|
730 | { |
---|
731 | printf("Fasta parsing error, RAxML expects an alignment.\n"); |
---|
732 | printf("the last sequence in the alignment seems to have a different length\n"); |
---|
733 | errorExit(-1); |
---|
734 | } |
---|
735 | |
---|
736 | assert(sites == sequenceLength); |
---|
737 | |
---|
738 | if(line) |
---|
739 | rax_free(line); |
---|
740 | |
---|
741 | rewind(INFILE); |
---|
742 | |
---|
743 | adef->alignmentFileType = FASTA; |
---|
744 | |
---|
745 | rdta->numsp = taxa; |
---|
746 | rdta->sites = sites; |
---|
747 | } |
---|
748 | |
---|
749 | |
---|
750 | |
---|
751 | if (rdta->numsp < 4) |
---|
752 | { |
---|
753 | if(processID == 0) |
---|
754 | printf("TOO FEW SPECIES\n"); |
---|
755 | errorExit(-1); |
---|
756 | } |
---|
757 | |
---|
758 | if (rdta->sites < 1) |
---|
759 | { |
---|
760 | if(processID == 0) |
---|
761 | printf("TOO FEW SITES\n"); |
---|
762 | errorExit(-1); |
---|
763 | } |
---|
764 | |
---|
765 | return; |
---|
766 | } |
---|
767 | |
---|
768 | |
---|
769 | |
---|
770 | |
---|
771 | |
---|
772 | boolean whitechar (int ch) |
---|
773 | { |
---|
774 | return (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r'); |
---|
775 | } |
---|
776 | |
---|
777 | |
---|
778 | static void uppercase (int *chptr) |
---|
779 | { |
---|
780 | int ch; |
---|
781 | |
---|
782 | ch = *chptr; |
---|
783 | if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') |
---|
784 | || (ch >= 's' && ch <= 'z')) |
---|
785 | *chptr = ch + 'A' - 'a'; |
---|
786 | } |
---|
787 | |
---|
788 | |
---|
789 | |
---|
790 | |
---|
791 | static void getyspace (rawdata *rdta) |
---|
792 | { |
---|
793 | size_t size = 4 * ((size_t)(rdta->sites / 4 + 1)); |
---|
794 | int i; |
---|
795 | unsigned char *y0; |
---|
796 | |
---|
797 | rdta->y = (unsigned char **) rax_malloc((rdta->numsp + 1) * sizeof(unsigned char *)); |
---|
798 | assert(rdta->y); |
---|
799 | |
---|
800 | y0 = (unsigned char *) rax_malloc(((size_t)(rdta->numsp + 1)) * size * sizeof(unsigned char)); |
---|
801 | assert(y0); |
---|
802 | |
---|
803 | rdta->y0 = y0; |
---|
804 | |
---|
805 | for (i = 0; i <= rdta->numsp; i++) |
---|
806 | { |
---|
807 | rdta->y[i] = y0; |
---|
808 | y0 += size; |
---|
809 | } |
---|
810 | |
---|
811 | return; |
---|
812 | } |
---|
813 | |
---|
814 | |
---|
815 | static unsigned int KISS32(void) |
---|
816 | { |
---|
817 | static unsigned int |
---|
818 | x = 123456789, |
---|
819 | y = 362436069, |
---|
820 | z = 21288629, |
---|
821 | w = 14921776, |
---|
822 | c = 0; |
---|
823 | |
---|
824 | unsigned int t; |
---|
825 | |
---|
826 | x += 545925293; |
---|
827 | y ^= (y<<13); |
---|
828 | y ^= (y>>17); |
---|
829 | y ^= (y<<5); |
---|
830 | t = z + w + c; |
---|
831 | z = w; |
---|
832 | c = (t>>31); |
---|
833 | w = t & 2147483647; |
---|
834 | |
---|
835 | return (x+y+w); |
---|
836 | } |
---|
837 | |
---|
838 | static boolean setupTree (tree *tr, analdef *adef) |
---|
839 | { |
---|
840 | nodeptr p0, p, q; |
---|
841 | int |
---|
842 | i, |
---|
843 | j, |
---|
844 | tips, |
---|
845 | inter; |
---|
846 | |
---|
847 | |
---|
848 | |
---|
849 | tr->storedBrLens = (double*)NULL; |
---|
850 | |
---|
851 | if(!adef->readTaxaOnly) |
---|
852 | { |
---|
853 | tr->bigCutoff = FALSE; |
---|
854 | |
---|
855 | tr->patternPosition = (int*)NULL; |
---|
856 | tr->columnPosition = (int*)NULL; |
---|
857 | |
---|
858 | tr->maxCategories = MAX(4, adef->categories); |
---|
859 | |
---|
860 | tr->partitionContributions = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
861 | |
---|
862 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
863 | tr->partitionContributions[i] = -1.0; |
---|
864 | |
---|
865 | tr->perPartitionLH = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
866 | tr->storedPerPartitionLH = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
867 | |
---|
868 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
869 | { |
---|
870 | tr->perPartitionLH[i] = 0.0; |
---|
871 | tr->storedPerPartitionLH[i] = 0.0; |
---|
872 | } |
---|
873 | |
---|
874 | if(adef->grouping) |
---|
875 | tr->grouped = TRUE; |
---|
876 | else |
---|
877 | tr->grouped = FALSE; |
---|
878 | |
---|
879 | if(adef->constraint) |
---|
880 | tr->constrained = TRUE; |
---|
881 | else |
---|
882 | tr->constrained = FALSE; |
---|
883 | |
---|
884 | tr->treeID = 0; |
---|
885 | } |
---|
886 | |
---|
887 | tips = tr->mxtips; |
---|
888 | inter = tr->mxtips - 1; |
---|
889 | |
---|
890 | if(!adef->readTaxaOnly) |
---|
891 | { |
---|
892 | tr->yVector = (unsigned char **) rax_malloc((tr->mxtips + 1) * sizeof(unsigned char *)); |
---|
893 | |
---|
894 | tr->fracchanges = (double *)rax_malloc(tr->NumberOfModels * sizeof(double)); |
---|
895 | |
---|
896 | tr->rawFracchanges = (double *)rax_malloc(tr->NumberOfModels * sizeof(double)); |
---|
897 | |
---|
898 | tr->likelihoods = (double *)rax_malloc(adef->multipleRuns * sizeof(double)); |
---|
899 | } |
---|
900 | |
---|
901 | tr->numberOfTrees = -1; |
---|
902 | |
---|
903 | |
---|
904 | |
---|
905 | tr->treeStringLength = tr->mxtips * (nmlngth+128) + 256 + tr->mxtips * 2; |
---|
906 | |
---|
907 | tr->tree_string = (char*)rax_calloc(tr->treeStringLength, sizeof(char)); |
---|
908 | |
---|
909 | /*TODO, must that be so long ?*/ |
---|
910 | |
---|
911 | if(!adef->readTaxaOnly) |
---|
912 | { |
---|
913 | |
---|
914 | tr->td[0].count = 0; |
---|
915 | tr->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * tr->mxtips); |
---|
916 | |
---|
917 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
918 | { |
---|
919 | tr->fracchanges[i] = -1.0; |
---|
920 | tr->rawFracchanges[i] = -1.0; |
---|
921 | } |
---|
922 | |
---|
923 | tr->fracchange = -1.0; |
---|
924 | tr->rawFracchange = -1.0; |
---|
925 | |
---|
926 | tr->constraintVector = (int *)rax_malloc((2 * tr->mxtips) * sizeof(int)); |
---|
927 | |
---|
928 | tr->nameList = (char **)rax_malloc(sizeof(char *) * (tips + 1)); |
---|
929 | } |
---|
930 | |
---|
931 | if (!(p0 = (nodeptr) rax_malloc((tips + 3*inter) * sizeof(node)))) |
---|
932 | { |
---|
933 | printf("ERROR: Unable to obtain sufficient tree memory\n"); |
---|
934 | return FALSE; |
---|
935 | } |
---|
936 | |
---|
937 | if (!(tr->nodep = (nodeptr *) rax_malloc((2*tr->mxtips) * sizeof(nodeptr)))) |
---|
938 | { |
---|
939 | printf("ERROR: Unable to obtain sufficient tree memory, too\n"); |
---|
940 | return FALSE; |
---|
941 | } |
---|
942 | |
---|
943 | tr->nodep[0] = (node *) NULL; /* Use as 1-based array */ |
---|
944 | |
---|
945 | for (i = 1; i <= tips; i++) |
---|
946 | { |
---|
947 | p = p0++; |
---|
948 | |
---|
949 | p->hash = KISS32(); /* hast table stuff */ |
---|
950 | p->x = 0; |
---|
951 | p->number = i; |
---|
952 | p->next = p; |
---|
953 | p->back = (node *)NULL; |
---|
954 | p->bInf = (branchInfo *)NULL; |
---|
955 | |
---|
956 | |
---|
957 | |
---|
958 | |
---|
959 | |
---|
960 | |
---|
961 | tr->nodep[i] = p; |
---|
962 | } |
---|
963 | |
---|
964 | for (i = tips + 1; i <= tips + inter; i++) |
---|
965 | { |
---|
966 | q = (node *) NULL; |
---|
967 | for (j = 1; j <= 3; j++) |
---|
968 | { |
---|
969 | p = p0++; |
---|
970 | if(j == 1) |
---|
971 | p->x = 1; |
---|
972 | else |
---|
973 | p->x = 0; |
---|
974 | p->number = i; |
---|
975 | p->next = q; |
---|
976 | p->bInf = (branchInfo *)NULL; |
---|
977 | p->back = (node *) NULL; |
---|
978 | p->hash = 0; |
---|
979 | |
---|
980 | |
---|
981 | |
---|
982 | |
---|
983 | |
---|
984 | |
---|
985 | q = p; |
---|
986 | } |
---|
987 | p->next->next->next = p; |
---|
988 | tr->nodep[i] = p; |
---|
989 | } |
---|
990 | |
---|
991 | tr->likelihood = unlikely; |
---|
992 | tr->start = (node *) NULL; |
---|
993 | |
---|
994 | |
---|
995 | |
---|
996 | tr->ntips = 0; |
---|
997 | tr->nextnode = 0; |
---|
998 | |
---|
999 | if(!adef->readTaxaOnly) |
---|
1000 | { |
---|
1001 | for(i = 0; i < tr->numBranches; i++) |
---|
1002 | tr->partitionSmoothed[i] = FALSE; |
---|
1003 | } |
---|
1004 | |
---|
1005 | return TRUE; |
---|
1006 | } |
---|
1007 | |
---|
1008 | |
---|
1009 | static void checkTaxonName(char *buffer, int len) |
---|
1010 | { |
---|
1011 | int i; |
---|
1012 | |
---|
1013 | for(i = 0; i < len - 1; i++) |
---|
1014 | { |
---|
1015 | boolean valid; |
---|
1016 | |
---|
1017 | switch(buffer[i]) |
---|
1018 | { |
---|
1019 | case '\0': |
---|
1020 | case '\t': |
---|
1021 | case '\n': |
---|
1022 | case '\r': |
---|
1023 | case ' ': |
---|
1024 | case ':': |
---|
1025 | case ',': |
---|
1026 | case '(': |
---|
1027 | case ')': |
---|
1028 | case ';': |
---|
1029 | case '[': |
---|
1030 | case ']': |
---|
1031 | case '\'': |
---|
1032 | valid = FALSE; |
---|
1033 | break; |
---|
1034 | default: |
---|
1035 | valid = TRUE; |
---|
1036 | } |
---|
1037 | |
---|
1038 | if(!valid) |
---|
1039 | { |
---|
1040 | printf("ERROR: Taxon Name \"%s\" is invalid at position %d, it contains illegal character %c\n", buffer, i, buffer[i]); |
---|
1041 | printf("Illegal characters in taxon-names are: tabulators, carriage returns, spaces, \":\", \",\", \")\", \"(\", \";\", \"]\", \"[\", \"\'\" \n"); |
---|
1042 | printf("Exiting\n"); |
---|
1043 | exit(-1); |
---|
1044 | } |
---|
1045 | |
---|
1046 | } |
---|
1047 | assert(buffer[len - 1] == '\0'); |
---|
1048 | } |
---|
1049 | |
---|
1050 | static void printParsingErrorContext(FILE *f) |
---|
1051 | { |
---|
1052 | const long |
---|
1053 | contextWidth = 20; |
---|
1054 | |
---|
1055 | long |
---|
1056 | i, |
---|
1057 | currentPos = ftell(f), |
---|
1058 | contextPos = MAX(currentPos - contextWidth, 0); |
---|
1059 | |
---|
1060 | fseek(f, MAX(currentPos - contextWidth, 0), SEEK_SET); |
---|
1061 | |
---|
1062 | printf("Printing error context:\n\n"); |
---|
1063 | |
---|
1064 | for(i = contextPos; i < currentPos + contextWidth; i++) |
---|
1065 | { |
---|
1066 | int |
---|
1067 | ch = getc(f); |
---|
1068 | if(ch != EOF) |
---|
1069 | printf("%c", ch); |
---|
1070 | else |
---|
1071 | break; |
---|
1072 | } |
---|
1073 | |
---|
1074 | printf("\n\n"); |
---|
1075 | } |
---|
1076 | |
---|
1077 | static boolean getdata(analdef *adef, rawdata *rdta, tree *tr) |
---|
1078 | { |
---|
1079 | int |
---|
1080 | i, |
---|
1081 | j, |
---|
1082 | basesread, |
---|
1083 | basesnew, |
---|
1084 | ch, my_i, meaning, |
---|
1085 | len, |
---|
1086 | meaningAA[256], |
---|
1087 | meaningDNA[256], |
---|
1088 | meaningBINARY[256], |
---|
1089 | meaningGeneric32[256], |
---|
1090 | meaningGeneric64[256]; |
---|
1091 | |
---|
1092 | boolean |
---|
1093 | allread, |
---|
1094 | firstpass; |
---|
1095 | |
---|
1096 | char |
---|
1097 | buffer[nmlngth + 2]; |
---|
1098 | |
---|
1099 | unsigned char |
---|
1100 | genericChars32[32] = {'0', '1', '2', '3', '4', '5', '6', '7', |
---|
1101 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', |
---|
1102 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', |
---|
1103 | 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V'}; |
---|
1104 | unsigned long |
---|
1105 | total = 0, |
---|
1106 | gaps = 0; |
---|
1107 | |
---|
1108 | for (i = 0; i < 256; i++) |
---|
1109 | { |
---|
1110 | meaningAA[i] = -1; |
---|
1111 | meaningDNA[i] = -1; |
---|
1112 | meaningBINARY[i] = -1; |
---|
1113 | meaningGeneric32[i] = -1; |
---|
1114 | meaningGeneric64[i] = -1; |
---|
1115 | } |
---|
1116 | |
---|
1117 | /* generic 32 data */ |
---|
1118 | |
---|
1119 | for(i = 0; i < 32; i++) |
---|
1120 | meaningGeneric32[genericChars32[i]] = i; |
---|
1121 | meaningGeneric32['-'] = getUndetermined(GENERIC_32); |
---|
1122 | meaningGeneric32['?'] = getUndetermined(GENERIC_32); |
---|
1123 | |
---|
1124 | /* AA data */ |
---|
1125 | |
---|
1126 | meaningAA['A'] = 0; /* alanine */ |
---|
1127 | meaningAA['R'] = 1; /* arginine */ |
---|
1128 | meaningAA['N'] = 2; /* asparagine*/ |
---|
1129 | meaningAA['D'] = 3; /* aspartic */ |
---|
1130 | meaningAA['C'] = 4; /* cysteine */ |
---|
1131 | meaningAA['Q'] = 5; /* glutamine */ |
---|
1132 | meaningAA['E'] = 6; /* glutamic */ |
---|
1133 | meaningAA['G'] = 7; /* glycine */ |
---|
1134 | meaningAA['H'] = 8; /* histidine */ |
---|
1135 | meaningAA['I'] = 9; /* isoleucine */ |
---|
1136 | meaningAA['L'] = 10; /* leucine */ |
---|
1137 | meaningAA['K'] = 11; /* lysine */ |
---|
1138 | meaningAA['M'] = 12; /* methionine */ |
---|
1139 | meaningAA['F'] = 13; /* phenylalanine */ |
---|
1140 | meaningAA['P'] = 14; /* proline */ |
---|
1141 | meaningAA['S'] = 15; /* serine */ |
---|
1142 | meaningAA['T'] = 16; /* threonine */ |
---|
1143 | meaningAA['W'] = 17; /* tryptophan */ |
---|
1144 | meaningAA['Y'] = 18; /* tyrosine */ |
---|
1145 | meaningAA['V'] = 19; /* valine */ |
---|
1146 | meaningAA['B'] = 20; /* asparagine, aspartic 2 and 3*/ |
---|
1147 | meaningAA['Z'] = 21; /*21 glutamine glutamic 5 and 6*/ |
---|
1148 | |
---|
1149 | meaningAA['X'] = |
---|
1150 | meaningAA['?'] = |
---|
1151 | meaningAA['*'] = |
---|
1152 | meaningAA['-'] = |
---|
1153 | getUndetermined(AA_DATA); |
---|
1154 | |
---|
1155 | /* DNA data */ |
---|
1156 | |
---|
1157 | meaningDNA['A'] = 1; |
---|
1158 | meaningDNA['B'] = 14; |
---|
1159 | meaningDNA['C'] = 2; |
---|
1160 | meaningDNA['D'] = 13; |
---|
1161 | meaningDNA['G'] = 4; |
---|
1162 | meaningDNA['H'] = 11; |
---|
1163 | meaningDNA['K'] = 12; |
---|
1164 | meaningDNA['M'] = 3; |
---|
1165 | meaningDNA['R'] = 5; |
---|
1166 | meaningDNA['S'] = 6; |
---|
1167 | meaningDNA['T'] = 8; |
---|
1168 | meaningDNA['U'] = 8; |
---|
1169 | meaningDNA['V'] = 7; |
---|
1170 | meaningDNA['W'] = 9; |
---|
1171 | meaningDNA['Y'] = 10; |
---|
1172 | |
---|
1173 | meaningDNA['N'] = |
---|
1174 | meaningDNA['O'] = |
---|
1175 | meaningDNA['X'] = |
---|
1176 | meaningDNA['-'] = |
---|
1177 | meaningDNA['?'] = |
---|
1178 | getUndetermined(DNA_DATA); |
---|
1179 | |
---|
1180 | /* BINARY DATA */ |
---|
1181 | |
---|
1182 | meaningBINARY['0'] = 1; |
---|
1183 | meaningBINARY['1'] = 2; |
---|
1184 | |
---|
1185 | meaningBINARY['-'] = |
---|
1186 | meaningBINARY['?'] = |
---|
1187 | getUndetermined(BINARY_DATA); |
---|
1188 | |
---|
1189 | |
---|
1190 | /*******************************************************************/ |
---|
1191 | |
---|
1192 | basesread = basesnew = 0; |
---|
1193 | |
---|
1194 | allread = FALSE; |
---|
1195 | firstpass = TRUE; |
---|
1196 | ch = ' '; |
---|
1197 | |
---|
1198 | while (! allread) |
---|
1199 | { |
---|
1200 | for(i = 1; i <= tr->mxtips; i++) |
---|
1201 | { |
---|
1202 | if(firstpass) |
---|
1203 | { |
---|
1204 | ch = getc(INFILE); |
---|
1205 | |
---|
1206 | while(ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r') |
---|
1207 | ch = getc(INFILE); |
---|
1208 | |
---|
1209 | my_i = 0; |
---|
1210 | |
---|
1211 | do |
---|
1212 | { |
---|
1213 | buffer[my_i] = ch; |
---|
1214 | ch = getc(INFILE); |
---|
1215 | my_i++; |
---|
1216 | if(my_i >= nmlngth) |
---|
1217 | { |
---|
1218 | if(processID == 0) |
---|
1219 | { |
---|
1220 | printf("Taxon Name too long at taxon %d, adapt constant nmlngth in\n", i); |
---|
1221 | printf("axml.h, current setting %d\n", nmlngth); |
---|
1222 | } |
---|
1223 | errorExit(-1); |
---|
1224 | } |
---|
1225 | } |
---|
1226 | while(ch != ' ' && ch != '\n' && ch != '\t' && ch != '\r'); |
---|
1227 | |
---|
1228 | buffer[my_i] = '\0'; |
---|
1229 | len = strlen(buffer) + 1; |
---|
1230 | checkTaxonName(buffer, len); |
---|
1231 | tr->nameList[i] = (char *)rax_malloc(sizeof(char) * len); |
---|
1232 | strcpy(tr->nameList[i], buffer); |
---|
1233 | |
---|
1234 | while(ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r') |
---|
1235 | ch = getc(INFILE); |
---|
1236 | |
---|
1237 | ungetc(ch, INFILE); |
---|
1238 | } |
---|
1239 | |
---|
1240 | j = basesread; |
---|
1241 | |
---|
1242 | while((j < rdta->sites) && ((ch = getc(INFILE)) != EOF) && (ch != '\n') && (ch != '\r')) |
---|
1243 | { |
---|
1244 | uppercase(& ch); |
---|
1245 | |
---|
1246 | assert(tr->dataVector[j + 1] != -1); |
---|
1247 | |
---|
1248 | switch(tr->dataVector[j + 1]) |
---|
1249 | { |
---|
1250 | case BINARY_DATA: |
---|
1251 | meaning = meaningBINARY[ch]; |
---|
1252 | break; |
---|
1253 | case DNA_DATA: |
---|
1254 | case SECONDARY_DATA: |
---|
1255 | case SECONDARY_DATA_6: |
---|
1256 | case SECONDARY_DATA_7: |
---|
1257 | /* |
---|
1258 | still dealing with DNA/RNA here, hence just act if as they where DNA characters |
---|
1259 | corresponding column merging for sec struct models will take place later |
---|
1260 | */ |
---|
1261 | meaning = meaningDNA[ch]; |
---|
1262 | break; |
---|
1263 | case AA_DATA: |
---|
1264 | meaning = meaningAA[ch]; |
---|
1265 | break; |
---|
1266 | case GENERIC_32: |
---|
1267 | meaning = meaningGeneric32[ch]; |
---|
1268 | break; |
---|
1269 | case GENERIC_64: |
---|
1270 | meaning = meaningGeneric64[ch]; |
---|
1271 | break; |
---|
1272 | default: |
---|
1273 | assert(0); |
---|
1274 | } |
---|
1275 | |
---|
1276 | if (meaning != -1) |
---|
1277 | { |
---|
1278 | j++; |
---|
1279 | rdta->y[i][j] = ch; |
---|
1280 | } |
---|
1281 | else |
---|
1282 | { |
---|
1283 | if(!whitechar(ch)) |
---|
1284 | { |
---|
1285 | printf("ERROR: Bad base (%c) at site %d of sequence %d\n", |
---|
1286 | ch, j + 1, i); |
---|
1287 | |
---|
1288 | printParsingErrorContext(INFILE); |
---|
1289 | |
---|
1290 | return FALSE; |
---|
1291 | } |
---|
1292 | } |
---|
1293 | } |
---|
1294 | |
---|
1295 | if (ch == EOF) |
---|
1296 | { |
---|
1297 | printf("ERROR: End-of-file at site %d of sequence %d\n", j + 1, i); |
---|
1298 | |
---|
1299 | printParsingErrorContext(INFILE); |
---|
1300 | |
---|
1301 | return FALSE; |
---|
1302 | } |
---|
1303 | |
---|
1304 | if (! firstpass && (j == basesread)) |
---|
1305 | i--; |
---|
1306 | else |
---|
1307 | { |
---|
1308 | if (i == 1) |
---|
1309 | basesnew = j; |
---|
1310 | else |
---|
1311 | if (j != basesnew) |
---|
1312 | { |
---|
1313 | printf("ERROR: Sequences out of alignment\n"); |
---|
1314 | printf("%d (instead of %d) residues read in sequence %d %s\n", |
---|
1315 | j - basesread, basesnew - basesread, i, tr->nameList[i]); |
---|
1316 | |
---|
1317 | printParsingErrorContext(INFILE); |
---|
1318 | |
---|
1319 | return FALSE; |
---|
1320 | } |
---|
1321 | } |
---|
1322 | while (ch != '\n' && ch != EOF && ch != '\r') ch = getc(INFILE); /* flush line *//* PC-LINEBREAK*/ |
---|
1323 | } |
---|
1324 | |
---|
1325 | firstpass = FALSE; |
---|
1326 | basesread = basesnew; |
---|
1327 | allread = (basesread >= rdta->sites); |
---|
1328 | } |
---|
1329 | |
---|
1330 | for(j = 1; j <= tr->mxtips; j++) |
---|
1331 | for(i = 1; i <= rdta->sites; i++) |
---|
1332 | { |
---|
1333 | assert(tr->dataVector[i] != -1); |
---|
1334 | |
---|
1335 | switch(tr->dataVector[i]) |
---|
1336 | { |
---|
1337 | case BINARY_DATA: |
---|
1338 | meaning = meaningBINARY[rdta->y[j][i]]; |
---|
1339 | if(meaning == getUndetermined(BINARY_DATA)) |
---|
1340 | gaps++; |
---|
1341 | break; |
---|
1342 | |
---|
1343 | case SECONDARY_DATA: |
---|
1344 | case SECONDARY_DATA_6: |
---|
1345 | case SECONDARY_DATA_7: |
---|
1346 | assert(tr->secondaryStructurePairs[i - 1] != -1); |
---|
1347 | assert(i - 1 == tr->secondaryStructurePairs[tr->secondaryStructurePairs[i - 1]]); |
---|
1348 | /* |
---|
1349 | don't worry too much about undetermined column count here for sec-struct, just count |
---|
1350 | DNA/RNA gaps here and worry about the rest later-on, falling through to DNA again :-) |
---|
1351 | */ |
---|
1352 | case DNA_DATA: |
---|
1353 | meaning = meaningDNA[rdta->y[j][i]]; |
---|
1354 | if(meaning == getUndetermined(DNA_DATA)) |
---|
1355 | gaps++; |
---|
1356 | break; |
---|
1357 | |
---|
1358 | case AA_DATA: |
---|
1359 | meaning = meaningAA[rdta->y[j][i]]; |
---|
1360 | if(meaning == getUndetermined(AA_DATA)) |
---|
1361 | gaps++; |
---|
1362 | break; |
---|
1363 | |
---|
1364 | case GENERIC_32: |
---|
1365 | meaning = meaningGeneric32[rdta->y[j][i]]; |
---|
1366 | if(meaning == getUndetermined(GENERIC_32)) |
---|
1367 | gaps++; |
---|
1368 | break; |
---|
1369 | |
---|
1370 | case GENERIC_64: |
---|
1371 | meaning = meaningGeneric64[rdta->y[j][i]]; |
---|
1372 | if(meaning == getUndetermined(GENERIC_64)) |
---|
1373 | gaps++; |
---|
1374 | break; |
---|
1375 | default: |
---|
1376 | assert(0); |
---|
1377 | } |
---|
1378 | |
---|
1379 | total++; |
---|
1380 | rdta->y[j][i] = meaning; |
---|
1381 | } |
---|
1382 | |
---|
1383 | adef->gapyness = (double)gaps / (double)total; |
---|
1384 | |
---|
1385 | return TRUE; |
---|
1386 | } |
---|
1387 | |
---|
1388 | static void parseFasta(analdef *adef, rawdata *rdta, tree *tr) |
---|
1389 | { |
---|
1390 | int |
---|
1391 | index, |
---|
1392 | meaning, |
---|
1393 | meaningAA[256], |
---|
1394 | meaningDNA[256], |
---|
1395 | meaningBINARY[256], |
---|
1396 | meaningGeneric32[256], |
---|
1397 | meaningGeneric64[256]; |
---|
1398 | |
---|
1399 | char |
---|
1400 | buffer[nmlngth + 2]; |
---|
1401 | |
---|
1402 | unsigned char |
---|
1403 | genericChars32[32] = {'0', '1', '2', '3', '4', '5', '6', '7', |
---|
1404 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', |
---|
1405 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', |
---|
1406 | 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V'}; |
---|
1407 | unsigned long |
---|
1408 | total = 0, |
---|
1409 | gaps = 0; |
---|
1410 | |
---|
1411 | for(index = 0; index < 256; index++) |
---|
1412 | { |
---|
1413 | meaningAA[index] = -1; |
---|
1414 | meaningDNA[index] = -1; |
---|
1415 | meaningBINARY[index] = -1; |
---|
1416 | meaningGeneric32[index] = -1; |
---|
1417 | meaningGeneric64[index] = -1; |
---|
1418 | } |
---|
1419 | |
---|
1420 | /* generic 32 data */ |
---|
1421 | |
---|
1422 | for(index = 0; index < 32; index++) |
---|
1423 | meaningGeneric32[genericChars32[index]] = index; |
---|
1424 | |
---|
1425 | meaningGeneric32['-'] = getUndetermined(GENERIC_32); |
---|
1426 | meaningGeneric32['?'] = getUndetermined(GENERIC_32); |
---|
1427 | |
---|
1428 | /* AA data */ |
---|
1429 | |
---|
1430 | meaningAA['A'] = 0; /* alanine */ |
---|
1431 | meaningAA['R'] = 1; /* arginine */ |
---|
1432 | meaningAA['N'] = 2; /* asparagine*/ |
---|
1433 | meaningAA['D'] = 3; /* aspartic */ |
---|
1434 | meaningAA['C'] = 4; /* cysteine */ |
---|
1435 | meaningAA['Q'] = 5; /* glutamine */ |
---|
1436 | meaningAA['E'] = 6; /* glutamic */ |
---|
1437 | meaningAA['G'] = 7; /* glycine */ |
---|
1438 | meaningAA['H'] = 8; /* histidine */ |
---|
1439 | meaningAA['I'] = 9; /* isoleucine */ |
---|
1440 | meaningAA['L'] = 10; /* leucine */ |
---|
1441 | meaningAA['K'] = 11; /* lysine */ |
---|
1442 | meaningAA['M'] = 12; /* methionine */ |
---|
1443 | meaningAA['F'] = 13; /* phenylalanine */ |
---|
1444 | meaningAA['P'] = 14; /* proline */ |
---|
1445 | meaningAA['S'] = 15; /* serine */ |
---|
1446 | meaningAA['T'] = 16; /* threonine */ |
---|
1447 | meaningAA['W'] = 17; /* tryptophan */ |
---|
1448 | meaningAA['Y'] = 18; /* tyrosine */ |
---|
1449 | meaningAA['V'] = 19; /* valine */ |
---|
1450 | meaningAA['B'] = 20; /* asparagine, aspartic 2 and 3*/ |
---|
1451 | meaningAA['Z'] = 21; /*21 glutamine glutamic 5 and 6*/ |
---|
1452 | |
---|
1453 | meaningAA['X'] = |
---|
1454 | meaningAA['?'] = |
---|
1455 | meaningAA['*'] = |
---|
1456 | meaningAA['-'] = |
---|
1457 | getUndetermined(AA_DATA); |
---|
1458 | |
---|
1459 | /* DNA data */ |
---|
1460 | |
---|
1461 | meaningDNA['A'] = 1; |
---|
1462 | meaningDNA['B'] = 14; |
---|
1463 | meaningDNA['C'] = 2; |
---|
1464 | meaningDNA['D'] = 13; |
---|
1465 | meaningDNA['G'] = 4; |
---|
1466 | meaningDNA['H'] = 11; |
---|
1467 | meaningDNA['K'] = 12; |
---|
1468 | meaningDNA['M'] = 3; |
---|
1469 | meaningDNA['R'] = 5; |
---|
1470 | meaningDNA['S'] = 6; |
---|
1471 | meaningDNA['T'] = 8; |
---|
1472 | meaningDNA['U'] = 8; |
---|
1473 | meaningDNA['V'] = 7; |
---|
1474 | meaningDNA['W'] = 9; |
---|
1475 | meaningDNA['Y'] = 10; |
---|
1476 | |
---|
1477 | meaningDNA['N'] = |
---|
1478 | meaningDNA['O'] = |
---|
1479 | meaningDNA['X'] = |
---|
1480 | meaningDNA['-'] = |
---|
1481 | meaningDNA['?'] = |
---|
1482 | getUndetermined(DNA_DATA); |
---|
1483 | |
---|
1484 | /* BINARY DATA */ |
---|
1485 | |
---|
1486 | meaningBINARY['0'] = 1; |
---|
1487 | meaningBINARY['1'] = 2; |
---|
1488 | |
---|
1489 | meaningBINARY['-'] = |
---|
1490 | meaningBINARY['?'] = |
---|
1491 | getUndetermined(BINARY_DATA); |
---|
1492 | |
---|
1493 | |
---|
1494 | /*******************************************************************/ |
---|
1495 | |
---|
1496 | { |
---|
1497 | char |
---|
1498 | *line = NULL; |
---|
1499 | |
---|
1500 | size_t |
---|
1501 | len = 0; |
---|
1502 | |
---|
1503 | ssize_t |
---|
1504 | read; |
---|
1505 | |
---|
1506 | int |
---|
1507 | sequenceLength = 0, |
---|
1508 | sequences = 0, |
---|
1509 | taxa = 0, |
---|
1510 | sites = 0; |
---|
1511 | |
---|
1512 | |
---|
1513 | while((read = rax_getline(&line, &len, INFILE)) != -1) |
---|
1514 | { |
---|
1515 | ssize_t |
---|
1516 | i = 0; |
---|
1517 | |
---|
1518 | while((i < read - 1) && (line[i] == ' ' || line[i] == '\t')) |
---|
1519 | i++; |
---|
1520 | |
---|
1521 | if(line[i] == '>') |
---|
1522 | { |
---|
1523 | int |
---|
1524 | nameCount = 0, |
---|
1525 | nameLength; |
---|
1526 | |
---|
1527 | |
---|
1528 | |
---|
1529 | if(taxa == 1) |
---|
1530 | sequenceLength = sites; |
---|
1531 | |
---|
1532 | if(taxa > 0) |
---|
1533 | { |
---|
1534 | assert(sites > 0); |
---|
1535 | sequences++; |
---|
1536 | } |
---|
1537 | |
---|
1538 | if(taxa > 0) |
---|
1539 | assert(sequenceLength == sites); |
---|
1540 | |
---|
1541 | taxa++; |
---|
1542 | |
---|
1543 | i++; |
---|
1544 | |
---|
1545 | while((i < read - 1) && (line[i] == ' ' || line[i] == '\t')) |
---|
1546 | i++; |
---|
1547 | |
---|
1548 | while((i < read - 1) && !(line[i] == ' ' || line[i] == '\t')) |
---|
1549 | { |
---|
1550 | buffer[nameCount] = line[i]; |
---|
1551 | nameCount++; |
---|
1552 | i++; |
---|
1553 | } |
---|
1554 | |
---|
1555 | if(nameCount >= nmlngth) |
---|
1556 | { |
---|
1557 | if(processID == 0) |
---|
1558 | { |
---|
1559 | printf("Taxon Name too long at taxon %d, adapt constant nmlngth in\n", taxa); |
---|
1560 | printf("axml.h, current setting %d\n", nmlngth); |
---|
1561 | } |
---|
1562 | errorExit(-1); |
---|
1563 | } |
---|
1564 | |
---|
1565 | buffer[nameCount] = '\0'; |
---|
1566 | nameLength = strlen(buffer) + 1; |
---|
1567 | checkTaxonName(buffer, nameLength); |
---|
1568 | tr->nameList[taxa] = (char *)rax_malloc(sizeof(char) * nameLength); |
---|
1569 | strcpy(tr->nameList[taxa], buffer); |
---|
1570 | |
---|
1571 | sites = 0; |
---|
1572 | } |
---|
1573 | else |
---|
1574 | { |
---|
1575 | while(i < read - 1) |
---|
1576 | { |
---|
1577 | if(!(line[i] == ' ' || line[i] == '\t')) |
---|
1578 | { |
---|
1579 | int |
---|
1580 | ch = line[i]; |
---|
1581 | |
---|
1582 | uppercase(&ch); |
---|
1583 | |
---|
1584 | assert(tr->dataVector[sites + 1] != -1); |
---|
1585 | |
---|
1586 | switch(tr->dataVector[sites + 1]) |
---|
1587 | { |
---|
1588 | case BINARY_DATA: |
---|
1589 | meaning = meaningBINARY[ch]; |
---|
1590 | break; |
---|
1591 | case DNA_DATA: |
---|
1592 | case SECONDARY_DATA: |
---|
1593 | case SECONDARY_DATA_6: |
---|
1594 | case SECONDARY_DATA_7: |
---|
1595 | meaning = meaningDNA[ch]; |
---|
1596 | break; |
---|
1597 | case AA_DATA: |
---|
1598 | meaning = meaningAA[ch]; |
---|
1599 | break; |
---|
1600 | case GENERIC_32: |
---|
1601 | meaning = meaningGeneric32[ch]; |
---|
1602 | break; |
---|
1603 | case GENERIC_64: |
---|
1604 | meaning = meaningGeneric64[ch]; |
---|
1605 | break; |
---|
1606 | default: |
---|
1607 | assert(0); |
---|
1608 | } |
---|
1609 | |
---|
1610 | if (meaning != -1) |
---|
1611 | rdta->y[taxa][sites + 1] = ch; |
---|
1612 | else |
---|
1613 | { |
---|
1614 | if(processID == 0) |
---|
1615 | { |
---|
1616 | printf("ERROR: Bad base (%c) at site %d of sequence %d\n", |
---|
1617 | ch, sites + 1, taxa); |
---|
1618 | } |
---|
1619 | errorExit(-1); |
---|
1620 | } |
---|
1621 | |
---|
1622 | sites++; |
---|
1623 | } |
---|
1624 | i++; |
---|
1625 | } |
---|
1626 | } |
---|
1627 | } |
---|
1628 | |
---|
1629 | if(sites > 0) |
---|
1630 | sequences++; |
---|
1631 | |
---|
1632 | /* the assertions below should never fail, the have already been checked in getNums */ |
---|
1633 | |
---|
1634 | assert(taxa == sequences); |
---|
1635 | assert(sites == sequenceLength); |
---|
1636 | |
---|
1637 | if(line) |
---|
1638 | rax_free(line); |
---|
1639 | } |
---|
1640 | |
---|
1641 | |
---|
1642 | { |
---|
1643 | int |
---|
1644 | i, |
---|
1645 | j; |
---|
1646 | |
---|
1647 | for(j = 1; j <= tr->mxtips; j++) |
---|
1648 | for(i = 1; i <= rdta->sites; i++) |
---|
1649 | { |
---|
1650 | assert(tr->dataVector[i] != -1); |
---|
1651 | |
---|
1652 | switch(tr->dataVector[i]) |
---|
1653 | { |
---|
1654 | case BINARY_DATA: |
---|
1655 | meaning = meaningBINARY[rdta->y[j][i]]; |
---|
1656 | if(meaning == getUndetermined(BINARY_DATA)) |
---|
1657 | gaps++; |
---|
1658 | break; |
---|
1659 | |
---|
1660 | case SECONDARY_DATA: |
---|
1661 | case SECONDARY_DATA_6: |
---|
1662 | case SECONDARY_DATA_7: |
---|
1663 | assert(tr->secondaryStructurePairs[i - 1] != -1); |
---|
1664 | assert(i - 1 == tr->secondaryStructurePairs[tr->secondaryStructurePairs[i - 1]]); |
---|
1665 | /* |
---|
1666 | don't worry too much about undetermined column count here for sec-struct, just count |
---|
1667 | DNA/RNA gaps here and worry about the rest later-on, falling through to DNA again :-) |
---|
1668 | */ |
---|
1669 | case DNA_DATA: |
---|
1670 | meaning = meaningDNA[rdta->y[j][i]]; |
---|
1671 | if(meaning == getUndetermined(DNA_DATA)) |
---|
1672 | gaps++; |
---|
1673 | break; |
---|
1674 | |
---|
1675 | case AA_DATA: |
---|
1676 | meaning = meaningAA[rdta->y[j][i]]; |
---|
1677 | if(meaning == getUndetermined(AA_DATA)) |
---|
1678 | gaps++; |
---|
1679 | break; |
---|
1680 | |
---|
1681 | case GENERIC_32: |
---|
1682 | meaning = meaningGeneric32[rdta->y[j][i]]; |
---|
1683 | if(meaning == getUndetermined(GENERIC_32)) |
---|
1684 | gaps++; |
---|
1685 | break; |
---|
1686 | |
---|
1687 | case GENERIC_64: |
---|
1688 | meaning = meaningGeneric64[rdta->y[j][i]]; |
---|
1689 | if(meaning == getUndetermined(GENERIC_64)) |
---|
1690 | gaps++; |
---|
1691 | break; |
---|
1692 | default: |
---|
1693 | assert(0); |
---|
1694 | } |
---|
1695 | |
---|
1696 | total++; |
---|
1697 | rdta->y[j][i] = meaning; |
---|
1698 | } |
---|
1699 | } |
---|
1700 | |
---|
1701 | adef->gapyness = (double)gaps / (double)total; |
---|
1702 | |
---|
1703 | return; |
---|
1704 | } |
---|
1705 | |
---|
1706 | |
---|
1707 | |
---|
1708 | static void inputweights (rawdata *rdta) |
---|
1709 | { |
---|
1710 | int i, w, fres; |
---|
1711 | FILE *weightFile; |
---|
1712 | int *wv = (int *)rax_malloc(sizeof(int) * rdta->sites); |
---|
1713 | |
---|
1714 | weightFile = myfopen(weightFileName, "rb"); |
---|
1715 | |
---|
1716 | i = 0; |
---|
1717 | |
---|
1718 | while((fres = fscanf(weightFile,"%d", &w)) != EOF) |
---|
1719 | { |
---|
1720 | if(!fres) |
---|
1721 | { |
---|
1722 | if(processID == 0) |
---|
1723 | printf("error reading weight file probably encountered a non-integer weight value\n"); |
---|
1724 | errorExit(-1); |
---|
1725 | } |
---|
1726 | wv[i] = w; |
---|
1727 | i++; |
---|
1728 | } |
---|
1729 | |
---|
1730 | if(i != rdta->sites) |
---|
1731 | { |
---|
1732 | if(processID == 0) |
---|
1733 | printf("number %d of weights not equal to number %d of alignment columns\n", i, rdta->sites); |
---|
1734 | errorExit(-1); |
---|
1735 | } |
---|
1736 | |
---|
1737 | for(i = 1; i <= rdta->sites; i++) |
---|
1738 | rdta->wgt[i] = wv[i - 1]; |
---|
1739 | |
---|
1740 | fclose(weightFile); |
---|
1741 | rax_free(wv); |
---|
1742 | } |
---|
1743 | |
---|
1744 | |
---|
1745 | |
---|
1746 | static void getinput(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
1747 | { |
---|
1748 | int i; |
---|
1749 | |
---|
1750 | if(!adef->readTaxaOnly) |
---|
1751 | { |
---|
1752 | INFILE = myfopen(seq_file, "rb"); |
---|
1753 | |
---|
1754 | getnums(rdta, adef); |
---|
1755 | } |
---|
1756 | |
---|
1757 | tr->mxtips = rdta->numsp; |
---|
1758 | |
---|
1759 | if(!adef->readTaxaOnly) |
---|
1760 | { |
---|
1761 | rdta->wgt = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1762 | cdta->alias = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1763 | cdta->aliaswgt = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1764 | cdta->rateCategory = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1765 | tr->model = (int *) rax_calloc((rdta->sites + 1), sizeof(int)); |
---|
1766 | tr->initialDataVector = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1767 | tr->extendedDataVector = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1768 | cdta->patrat = (double *) rax_malloc((rdta->sites + 1) * sizeof(double)); |
---|
1769 | cdta->patratStored = (double *) rax_malloc((rdta->sites + 1) * sizeof(double)); |
---|
1770 | |
---|
1771 | |
---|
1772 | |
---|
1773 | if(!adef->useWeightFile) |
---|
1774 | { |
---|
1775 | for (i = 1; i <= rdta->sites; i++) |
---|
1776 | rdta->wgt[i] = 1; |
---|
1777 | } |
---|
1778 | else |
---|
1779 | { |
---|
1780 | assert(!adef->useSecondaryStructure); |
---|
1781 | inputweights(rdta); |
---|
1782 | } |
---|
1783 | } |
---|
1784 | |
---|
1785 | tr->multiBranch = 0; |
---|
1786 | tr->numBranches = 1; |
---|
1787 | |
---|
1788 | if(!adef->readTaxaOnly) |
---|
1789 | { |
---|
1790 | if(adef->useMultipleModel) |
---|
1791 | { |
---|
1792 | int ref; |
---|
1793 | |
---|
1794 | parsePartitions(adef, rdta, tr); |
---|
1795 | |
---|
1796 | for(i = 1; i <= rdta->sites; i++) |
---|
1797 | { |
---|
1798 | ref = tr->model[i]; |
---|
1799 | tr->initialDataVector[i] = tr->initialPartitionData[ref].dataType; |
---|
1800 | } |
---|
1801 | } |
---|
1802 | else |
---|
1803 | { |
---|
1804 | int |
---|
1805 | dataType = -1; |
---|
1806 | |
---|
1807 | tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo)); |
---|
1808 | tr->initialPartitionData[0].partitionName = (char*)rax_malloc(128 * sizeof(char)); |
---|
1809 | strcpy(tr->initialPartitionData[0].partitionName, "No Name Provided"); |
---|
1810 | |
---|
1811 | tr->initialPartitionData[0].protModels = adef->proteinMatrix; |
---|
1812 | if(adef->protEmpiricalFreqs) |
---|
1813 | tr->initialPartitionData[0].usePredefinedProtFreqs = FALSE; |
---|
1814 | else |
---|
1815 | tr->initialPartitionData[0].usePredefinedProtFreqs = TRUE; |
---|
1816 | |
---|
1817 | |
---|
1818 | |
---|
1819 | tr->NumberOfModels = 1; |
---|
1820 | |
---|
1821 | if(adef->model == M_PROTCAT || adef->model == M_PROTGAMMA) |
---|
1822 | dataType = AA_DATA; |
---|
1823 | if(adef->model == M_GTRCAT || adef->model == M_GTRGAMMA) |
---|
1824 | dataType = DNA_DATA; |
---|
1825 | if(adef->model == M_BINCAT || adef->model == M_BINGAMMA) |
---|
1826 | dataType = BINARY_DATA; |
---|
1827 | if(adef->model == M_32CAT || adef->model == M_32GAMMA) |
---|
1828 | dataType = GENERIC_32; |
---|
1829 | if(adef->model == M_64CAT || adef->model == M_64GAMMA) |
---|
1830 | dataType = GENERIC_64; |
---|
1831 | |
---|
1832 | |
---|
1833 | |
---|
1834 | assert(dataType == BINARY_DATA || dataType == DNA_DATA || dataType == AA_DATA || |
---|
1835 | dataType == GENERIC_32 || dataType == GENERIC_64); |
---|
1836 | |
---|
1837 | tr->initialPartitionData[0].dataType = dataType; |
---|
1838 | |
---|
1839 | if(dataType == AA_DATA && adef->userProteinModel) |
---|
1840 | { |
---|
1841 | tr->initialPartitionData[0].protModels = PROT_FILE; |
---|
1842 | tr->initialPartitionData[0].usePredefinedProtFreqs = TRUE; |
---|
1843 | strcpy(tr->initialPartitionData[0].proteinSubstitutionFileName, proteinModelFileName); |
---|
1844 | } |
---|
1845 | |
---|
1846 | for(i = 0; i <= rdta->sites; i++) |
---|
1847 | { |
---|
1848 | tr->initialDataVector[i] = dataType; |
---|
1849 | tr->model[i] = 0; |
---|
1850 | } |
---|
1851 | } |
---|
1852 | |
---|
1853 | if(adef->useSecondaryStructure) |
---|
1854 | { |
---|
1855 | memcpy(tr->extendedDataVector, tr->initialDataVector, (rdta->sites + 1) * sizeof(int)); |
---|
1856 | |
---|
1857 | tr->extendedPartitionData =(pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels); |
---|
1858 | |
---|
1859 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
1860 | { |
---|
1861 | tr->extendedPartitionData[i].partitionName = (char*)rax_malloc((strlen(tr->initialPartitionData[i].partitionName) + 1) * sizeof(char)); |
---|
1862 | strcpy(tr->extendedPartitionData[i].partitionName, tr->initialPartitionData[i].partitionName); |
---|
1863 | strcpy(tr->extendedPartitionData[i].proteinSubstitutionFileName, tr->initialPartitionData[i].proteinSubstitutionFileName); |
---|
1864 | tr->extendedPartitionData[i].dataType = tr->initialPartitionData[i].dataType; |
---|
1865 | tr->extendedPartitionData[i].protModels = tr->initialPartitionData[i].protModels; |
---|
1866 | tr->extendedPartitionData[i].usePredefinedProtFreqs = tr->initialPartitionData[i].usePredefinedProtFreqs; |
---|
1867 | } |
---|
1868 | |
---|
1869 | parseSecondaryStructure(tr, adef, rdta->sites); |
---|
1870 | |
---|
1871 | tr->dataVector = tr->extendedDataVector; |
---|
1872 | tr->partitionData = tr->extendedPartitionData; |
---|
1873 | } |
---|
1874 | else |
---|
1875 | { |
---|
1876 | tr->dataVector = tr->initialDataVector; |
---|
1877 | tr->partitionData = tr->initialPartitionData; |
---|
1878 | } |
---|
1879 | |
---|
1880 | |
---|
1881 | |
---|
1882 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
1883 | if(tr->partitionData[i].dataType == AA_DATA && tr->partitionData[i].protModels == PROT_FILE) |
---|
1884 | parseProteinModel(tr->partitionData[i].externalAAMatrix, tr->partitionData[i].proteinSubstitutionFileName); |
---|
1885 | |
---|
1886 | |
---|
1887 | |
---|
1888 | tr->executeModel = (boolean *)rax_malloc(sizeof(boolean) * tr->NumberOfModels); |
---|
1889 | |
---|
1890 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
1891 | tr->executeModel[i] = TRUE; |
---|
1892 | |
---|
1893 | getyspace(rdta); |
---|
1894 | } |
---|
1895 | |
---|
1896 | setupTree(tr, adef); |
---|
1897 | |
---|
1898 | |
---|
1899 | if(!adef->readTaxaOnly) |
---|
1900 | { |
---|
1901 | switch(adef->alignmentFileType) |
---|
1902 | { |
---|
1903 | case PHYLIP: |
---|
1904 | if(!getdata(adef, rdta, tr)) |
---|
1905 | { |
---|
1906 | printf("Problem reading alignment file \n"); |
---|
1907 | errorExit(1); |
---|
1908 | } |
---|
1909 | break; |
---|
1910 | case FASTA: |
---|
1911 | parseFasta(adef, rdta, tr); |
---|
1912 | break; |
---|
1913 | default: |
---|
1914 | assert(0); |
---|
1915 | } |
---|
1916 | |
---|
1917 | tr->nameHash = initStringHashTable(10 * tr->mxtips); |
---|
1918 | for(i = 1; i <= tr->mxtips; i++) |
---|
1919 | addword(tr->nameList[i], tr->nameHash, i); |
---|
1920 | |
---|
1921 | fclose(INFILE); |
---|
1922 | } |
---|
1923 | } |
---|
1924 | |
---|
1925 | |
---|
1926 | |
---|
1927 | static unsigned char buildStates(int secModel, unsigned char v1, unsigned char v2) |
---|
1928 | { |
---|
1929 | unsigned char new = 0; |
---|
1930 | |
---|
1931 | switch(secModel) |
---|
1932 | { |
---|
1933 | case SECONDARY_DATA: |
---|
1934 | new = v1; |
---|
1935 | new = new << 4; |
---|
1936 | new = new | v2; |
---|
1937 | break; |
---|
1938 | case SECONDARY_DATA_6: |
---|
1939 | { |
---|
1940 | int |
---|
1941 | meaningDNA[256], |
---|
1942 | i; |
---|
1943 | |
---|
1944 | const unsigned char |
---|
1945 | allowedStates[6][2] = {{'A','T'}, {'C', 'G'}, {'G', 'C'}, {'G','T'}, {'T', 'A'}, {'T', 'G'}}; |
---|
1946 | |
---|
1947 | const unsigned char |
---|
1948 | finalBinaryStates[6] = {1, 2, 4, 8, 16, 32}; |
---|
1949 | |
---|
1950 | unsigned char |
---|
1951 | intermediateBinaryStates[6]; |
---|
1952 | |
---|
1953 | int length = 6; |
---|
1954 | |
---|
1955 | for(i = 0; i < 256; i++) |
---|
1956 | meaningDNA[i] = -1; |
---|
1957 | |
---|
1958 | meaningDNA['A'] = 1; |
---|
1959 | meaningDNA['B'] = 14; |
---|
1960 | meaningDNA['C'] = 2; |
---|
1961 | meaningDNA['D'] = 13; |
---|
1962 | meaningDNA['G'] = 4; |
---|
1963 | meaningDNA['H'] = 11; |
---|
1964 | meaningDNA['K'] = 12; |
---|
1965 | meaningDNA['M'] = 3; |
---|
1966 | meaningDNA['N'] = 15; |
---|
1967 | meaningDNA['O'] = 15; |
---|
1968 | meaningDNA['R'] = 5; |
---|
1969 | meaningDNA['S'] = 6; |
---|
1970 | meaningDNA['T'] = 8; |
---|
1971 | meaningDNA['U'] = 8; |
---|
1972 | meaningDNA['V'] = 7; |
---|
1973 | meaningDNA['W'] = 9; |
---|
1974 | meaningDNA['X'] = 15; |
---|
1975 | meaningDNA['Y'] = 10; |
---|
1976 | meaningDNA['-'] = 15; |
---|
1977 | meaningDNA['?'] = 15; |
---|
1978 | |
---|
1979 | for(i = 0; i < length; i++) |
---|
1980 | { |
---|
1981 | unsigned char n1 = meaningDNA[allowedStates[i][0]]; |
---|
1982 | unsigned char n2 = meaningDNA[allowedStates[i][1]]; |
---|
1983 | |
---|
1984 | new = n1; |
---|
1985 | new = new << 4; |
---|
1986 | new = new | n2; |
---|
1987 | |
---|
1988 | intermediateBinaryStates[i] = new; |
---|
1989 | } |
---|
1990 | |
---|
1991 | new = v1; |
---|
1992 | new = new << 4; |
---|
1993 | new = new | v2; |
---|
1994 | |
---|
1995 | for(i = 0; i < length; i++) |
---|
1996 | { |
---|
1997 | if(new == intermediateBinaryStates[i]) |
---|
1998 | break; |
---|
1999 | } |
---|
2000 | if(i < length) |
---|
2001 | new = finalBinaryStates[i]; |
---|
2002 | else |
---|
2003 | { |
---|
2004 | new = 0; |
---|
2005 | for(i = 0; i < length; i++) |
---|
2006 | { |
---|
2007 | if(v1 & meaningDNA[allowedStates[i][0]]) |
---|
2008 | { |
---|
2009 | /*printf("Adding %c%c\n", allowedStates[i][0], allowedStates[i][1]);*/ |
---|
2010 | new |= finalBinaryStates[i]; |
---|
2011 | } |
---|
2012 | if(v2 & meaningDNA[allowedStates[i][1]]) |
---|
2013 | { |
---|
2014 | /*printf("Adding %c%c\n", allowedStates[i][0], allowedStates[i][1]);*/ |
---|
2015 | new |= finalBinaryStates[i]; |
---|
2016 | } |
---|
2017 | } |
---|
2018 | } |
---|
2019 | } |
---|
2020 | break; |
---|
2021 | case SECONDARY_DATA_7: |
---|
2022 | { |
---|
2023 | int |
---|
2024 | meaningDNA[256], |
---|
2025 | i; |
---|
2026 | |
---|
2027 | const unsigned char |
---|
2028 | allowedStates[6][2] = {{'A','T'}, {'C', 'G'}, {'G', 'C'}, {'G','T'}, {'T', 'A'}, {'T', 'G'}}; |
---|
2029 | |
---|
2030 | const unsigned char |
---|
2031 | finalBinaryStates[7] = {1, 2, 4, 8, 16, 32, 64}; |
---|
2032 | |
---|
2033 | unsigned char |
---|
2034 | intermediateBinaryStates[7]; |
---|
2035 | |
---|
2036 | for(i = 0; i < 256; i++) |
---|
2037 | meaningDNA[i] = -1; |
---|
2038 | |
---|
2039 | meaningDNA['A'] = 1; |
---|
2040 | meaningDNA['B'] = 14; |
---|
2041 | meaningDNA['C'] = 2; |
---|
2042 | meaningDNA['D'] = 13; |
---|
2043 | meaningDNA['G'] = 4; |
---|
2044 | meaningDNA['H'] = 11; |
---|
2045 | meaningDNA['K'] = 12; |
---|
2046 | meaningDNA['M'] = 3; |
---|
2047 | meaningDNA['N'] = 15; |
---|
2048 | meaningDNA['O'] = 15; |
---|
2049 | meaningDNA['R'] = 5; |
---|
2050 | meaningDNA['S'] = 6; |
---|
2051 | meaningDNA['T'] = 8; |
---|
2052 | meaningDNA['U'] = 8; |
---|
2053 | meaningDNA['V'] = 7; |
---|
2054 | meaningDNA['W'] = 9; |
---|
2055 | meaningDNA['X'] = 15; |
---|
2056 | meaningDNA['Y'] = 10; |
---|
2057 | meaningDNA['-'] = 15; |
---|
2058 | meaningDNA['?'] = 15; |
---|
2059 | |
---|
2060 | |
---|
2061 | for(i = 0; i < 6; i++) |
---|
2062 | { |
---|
2063 | unsigned char n1 = meaningDNA[allowedStates[i][0]]; |
---|
2064 | unsigned char n2 = meaningDNA[allowedStates[i][1]]; |
---|
2065 | |
---|
2066 | new = n1; |
---|
2067 | new = new << 4; |
---|
2068 | new = new | n2; |
---|
2069 | |
---|
2070 | intermediateBinaryStates[i] = new; |
---|
2071 | } |
---|
2072 | |
---|
2073 | new = v1; |
---|
2074 | new = new << 4; |
---|
2075 | new = new | v2; |
---|
2076 | |
---|
2077 | for(i = 0; i < 6; i++) |
---|
2078 | { |
---|
2079 | /* exact match */ |
---|
2080 | if(new == intermediateBinaryStates[i]) |
---|
2081 | break; |
---|
2082 | } |
---|
2083 | if(i < 6) |
---|
2084 | new = finalBinaryStates[i]; |
---|
2085 | else |
---|
2086 | { |
---|
2087 | /* distinguish between exact mismatches and partial mismatches */ |
---|
2088 | |
---|
2089 | for(i = 0; i < 6; i++) |
---|
2090 | if((v1 & meaningDNA[allowedStates[i][0]]) && (v2 & meaningDNA[allowedStates[i][1]])) |
---|
2091 | break; |
---|
2092 | if(i < 6) |
---|
2093 | { |
---|
2094 | /* printf("partial mismatch\n"); */ |
---|
2095 | |
---|
2096 | new = 0; |
---|
2097 | for(i = 0; i < 6; i++) |
---|
2098 | { |
---|
2099 | if((v1 & meaningDNA[allowedStates[i][0]]) && (v2 & meaningDNA[allowedStates[i][1]])) |
---|
2100 | { |
---|
2101 | /*printf("Adding %c%c\n", allowedStates[i][0], allowedStates[i][1]);*/ |
---|
2102 | new |= finalBinaryStates[i]; |
---|
2103 | } |
---|
2104 | else |
---|
2105 | new |= finalBinaryStates[6]; |
---|
2106 | } |
---|
2107 | } |
---|
2108 | else |
---|
2109 | new = finalBinaryStates[6]; |
---|
2110 | } |
---|
2111 | } |
---|
2112 | break; |
---|
2113 | default: |
---|
2114 | assert(0); |
---|
2115 | } |
---|
2116 | |
---|
2117 | return new; |
---|
2118 | |
---|
2119 | } |
---|
2120 | |
---|
2121 | |
---|
2122 | |
---|
2123 | static void adaptRdataToSecondary(tree *tr, rawdata *rdta) |
---|
2124 | { |
---|
2125 | int *alias = (int*)rax_calloc(rdta->sites, sizeof(int)); |
---|
2126 | int i, j, realPosition; |
---|
2127 | |
---|
2128 | for(i = 0; i < rdta->sites; i++) |
---|
2129 | alias[i] = -1; |
---|
2130 | |
---|
2131 | for(i = 0, realPosition = 0; i < rdta->sites; i++) |
---|
2132 | { |
---|
2133 | int partner = tr->secondaryStructurePairs[i]; |
---|
2134 | if(partner != -1) |
---|
2135 | { |
---|
2136 | assert(tr->dataVector[i+1] == SECONDARY_DATA || tr->dataVector[i+1] == SECONDARY_DATA_6 || tr->dataVector[i+1] == SECONDARY_DATA_7); |
---|
2137 | |
---|
2138 | if(i < partner) |
---|
2139 | { |
---|
2140 | for(j = 1; j <= rdta->numsp; j++) |
---|
2141 | { |
---|
2142 | unsigned char v1 = rdta->y[j][i+1]; |
---|
2143 | unsigned char v2 = rdta->y[j][partner+1]; |
---|
2144 | |
---|
2145 | assert(i+1 < partner+1); |
---|
2146 | |
---|
2147 | rdta->y[j][i+1] = buildStates(tr->dataVector[i+1], v1, v2); |
---|
2148 | } |
---|
2149 | alias[realPosition] = i; |
---|
2150 | realPosition++; |
---|
2151 | } |
---|
2152 | } |
---|
2153 | else |
---|
2154 | { |
---|
2155 | alias[realPosition] = i; |
---|
2156 | realPosition++; |
---|
2157 | } |
---|
2158 | } |
---|
2159 | |
---|
2160 | assert(rdta->sites - realPosition == tr->numberOfSecondaryColumns / 2); |
---|
2161 | |
---|
2162 | rdta->sites = realPosition; |
---|
2163 | |
---|
2164 | for(i = 0; i < rdta->sites; i++) |
---|
2165 | { |
---|
2166 | assert(alias[i] != -1); |
---|
2167 | tr->model[i+1] = tr->model[alias[i]+1]; |
---|
2168 | tr->dataVector[i+1] = tr->dataVector[alias[i]+1]; |
---|
2169 | rdta->wgt[i+1] = rdta->wgt[alias[i]+1]; |
---|
2170 | |
---|
2171 | for(j = 1; j <= rdta->numsp; j++) |
---|
2172 | rdta->y[j][i+1] = rdta->y[j][alias[i]+1]; |
---|
2173 | } |
---|
2174 | |
---|
2175 | rax_free(alias); |
---|
2176 | } |
---|
2177 | |
---|
2178 | static void sitesort(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef) |
---|
2179 | { |
---|
2180 | int gap, i, j, jj, jg, k, n, nsp; |
---|
2181 | int |
---|
2182 | *index, |
---|
2183 | *category = (int*)NULL; |
---|
2184 | |
---|
2185 | boolean flip, tied; |
---|
2186 | unsigned char **data; |
---|
2187 | |
---|
2188 | if(adef->useSecondaryStructure) |
---|
2189 | { |
---|
2190 | assert(tr->NumberOfModels > 1 && adef->useMultipleModel); |
---|
2191 | |
---|
2192 | adaptRdataToSecondary(tr, rdta); |
---|
2193 | } |
---|
2194 | |
---|
2195 | if(adef->useMultipleModel) |
---|
2196 | category = tr->model; |
---|
2197 | |
---|
2198 | |
---|
2199 | index = cdta->alias; |
---|
2200 | data = rdta->y; |
---|
2201 | n = rdta->sites; |
---|
2202 | nsp = rdta->numsp; |
---|
2203 | index[0] = -1; |
---|
2204 | |
---|
2205 | |
---|
2206 | if(adef->compressPatterns) |
---|
2207 | { |
---|
2208 | for (gap = n / 2; gap > 0; gap /= 2) |
---|
2209 | { |
---|
2210 | for (i = gap + 1; i <= n; i++) |
---|
2211 | { |
---|
2212 | j = i - gap; |
---|
2213 | |
---|
2214 | do |
---|
2215 | { |
---|
2216 | jj = index[j]; |
---|
2217 | jg = index[j+gap]; |
---|
2218 | if(adef->useMultipleModel) |
---|
2219 | { |
---|
2220 | assert(category[jj] != -1 && |
---|
2221 | category[jg] != -1); |
---|
2222 | |
---|
2223 | flip = (category[jj] > category[jg]); |
---|
2224 | tied = (category[jj] == category[jg]); |
---|
2225 | } |
---|
2226 | else |
---|
2227 | { |
---|
2228 | flip = 0; |
---|
2229 | tied = 1; |
---|
2230 | } |
---|
2231 | |
---|
2232 | for (k = 1; (k <= nsp) && tied; k++) |
---|
2233 | { |
---|
2234 | flip = (data[k][jj] > data[k][jg]); |
---|
2235 | tied = (data[k][jj] == data[k][jg]); |
---|
2236 | } |
---|
2237 | |
---|
2238 | if (flip) |
---|
2239 | { |
---|
2240 | index[j] = jg; |
---|
2241 | index[j+gap] = jj; |
---|
2242 | j -= gap; |
---|
2243 | } |
---|
2244 | } |
---|
2245 | while (flip && (j > 0)); |
---|
2246 | } |
---|
2247 | } |
---|
2248 | } |
---|
2249 | } |
---|
2250 | |
---|
2251 | |
---|
2252 | static void sitecombcrunch (rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef) |
---|
2253 | { |
---|
2254 | boolean |
---|
2255 | tied; |
---|
2256 | |
---|
2257 | int |
---|
2258 | i, |
---|
2259 | sitei, |
---|
2260 | j, |
---|
2261 | sitej, |
---|
2262 | k, |
---|
2263 | *aliasModel = (int*)NULL, |
---|
2264 | *aliasSuperModel = (int*)NULL; |
---|
2265 | |
---|
2266 | tr->origNumSitePerModel = (int*)rax_calloc(tr->NumberOfModels, sizeof(int)); |
---|
2267 | |
---|
2268 | for(i = 1; i <= rdta->sites; i++) |
---|
2269 | tr->origNumSitePerModel[tr->model[i]]++; |
---|
2270 | |
---|
2271 | if(adef->useMultipleModel) |
---|
2272 | { |
---|
2273 | aliasSuperModel = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1)); |
---|
2274 | aliasModel = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1)); |
---|
2275 | } |
---|
2276 | |
---|
2277 | i = 0; |
---|
2278 | cdta->alias[0] = cdta->alias[1]; |
---|
2279 | cdta->aliaswgt[0] = 0; |
---|
2280 | |
---|
2281 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2282 | { |
---|
2283 | int i; |
---|
2284 | |
---|
2285 | tr->patternPosition = (int*)rax_malloc(sizeof(int) * rdta->sites); |
---|
2286 | tr->columnPosition = (int*)rax_malloc(sizeof(int) * rdta->sites); |
---|
2287 | |
---|
2288 | for(i = 0; i < rdta->sites; i++) |
---|
2289 | { |
---|
2290 | tr->patternPosition[i] = -1; |
---|
2291 | tr->columnPosition[i] = -1; |
---|
2292 | } |
---|
2293 | } |
---|
2294 | |
---|
2295 | |
---|
2296 | |
---|
2297 | i = 0; |
---|
2298 | for (j = 1; j <= rdta->sites; j++) |
---|
2299 | { |
---|
2300 | sitei = cdta->alias[i]; |
---|
2301 | sitej = cdta->alias[j]; |
---|
2302 | if(!adef->compressPatterns) |
---|
2303 | tied = 0; |
---|
2304 | else |
---|
2305 | { |
---|
2306 | if(adef->useMultipleModel) |
---|
2307 | { |
---|
2308 | tied = (tr->model[sitei] == tr->model[sitej]); |
---|
2309 | if(tied) |
---|
2310 | assert(tr->dataVector[sitei] == tr->dataVector[sitej]); |
---|
2311 | } |
---|
2312 | else |
---|
2313 | tied = 1; |
---|
2314 | } |
---|
2315 | |
---|
2316 | for (k = 1; tied && (k <= rdta->numsp); k++) |
---|
2317 | tied = (rdta->y[k][sitei] == rdta->y[k][sitej]); |
---|
2318 | |
---|
2319 | if (tied) |
---|
2320 | { |
---|
2321 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2322 | { |
---|
2323 | tr->patternPosition[j - 1] = i; |
---|
2324 | tr->columnPosition[j - 1] = sitej; |
---|
2325 | /* printf("Pattern %d from column %d also at site %d\n", i, sitei, sitej); */ |
---|
2326 | } |
---|
2327 | |
---|
2328 | |
---|
2329 | cdta->aliaswgt[i] += rdta->wgt[sitej]; |
---|
2330 | |
---|
2331 | if(adef->useMultipleModel) |
---|
2332 | { |
---|
2333 | aliasModel[i] = tr->model[sitej]; |
---|
2334 | aliasSuperModel[i] = tr->dataVector[sitej]; |
---|
2335 | } |
---|
2336 | } |
---|
2337 | else |
---|
2338 | { |
---|
2339 | if (cdta->aliaswgt[i] > 0) i++; |
---|
2340 | |
---|
2341 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2342 | { |
---|
2343 | tr->patternPosition[j - 1] = i; |
---|
2344 | tr->columnPosition[j - 1] = sitej; |
---|
2345 | /*printf("Pattern %d is from cloumn %d\n", i, sitej);*/ |
---|
2346 | } |
---|
2347 | |
---|
2348 | cdta->aliaswgt[i] = rdta->wgt[sitej]; |
---|
2349 | cdta->alias[i] = sitej; |
---|
2350 | if(adef->useMultipleModel) |
---|
2351 | { |
---|
2352 | aliasModel[i] = tr->model[sitej]; |
---|
2353 | aliasSuperModel[i] = tr->dataVector[sitej]; |
---|
2354 | } |
---|
2355 | } |
---|
2356 | } |
---|
2357 | |
---|
2358 | cdta->endsite = i; |
---|
2359 | if (cdta->aliaswgt[i] > 0) cdta->endsite++; |
---|
2360 | |
---|
2361 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2362 | { |
---|
2363 | for(i = 0; i < rdta->sites; i++) |
---|
2364 | { |
---|
2365 | int p = tr->patternPosition[i]; |
---|
2366 | int c = tr->columnPosition[i]; |
---|
2367 | |
---|
2368 | assert(p >= 0 && p < cdta->endsite); |
---|
2369 | assert(c >= 1 && c <= rdta->sites); |
---|
2370 | } |
---|
2371 | } |
---|
2372 | |
---|
2373 | |
---|
2374 | if(adef->useMultipleModel) |
---|
2375 | { |
---|
2376 | for(i = 0; i <= rdta->sites; i++) |
---|
2377 | { |
---|
2378 | tr->model[i] = aliasModel[i]; |
---|
2379 | tr->dataVector[i] = aliasSuperModel[i]; |
---|
2380 | } |
---|
2381 | } |
---|
2382 | |
---|
2383 | if(adef->useMultipleModel) |
---|
2384 | { |
---|
2385 | rax_free(aliasModel); |
---|
2386 | rax_free(aliasSuperModel); |
---|
2387 | } |
---|
2388 | } |
---|
2389 | |
---|
2390 | |
---|
2391 | static boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
2392 | { |
---|
2393 | int i; |
---|
2394 | |
---|
2395 | for (i = 1; i <= rdta->sites; i++) |
---|
2396 | cdta->alias[i] = i; |
---|
2397 | |
---|
2398 | sitesort(rdta, cdta, tr, adef); |
---|
2399 | sitecombcrunch(rdta, cdta, tr, adef); |
---|
2400 | |
---|
2401 | return TRUE; |
---|
2402 | } |
---|
2403 | |
---|
2404 | |
---|
2405 | |
---|
2406 | |
---|
2407 | static boolean makevalues(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef) |
---|
2408 | { |
---|
2409 | int i, j, model, fullSites = 0, modelCounter; |
---|
2410 | |
---|
2411 | unsigned char |
---|
2412 | *y = (unsigned char *)rax_malloc(((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)), |
---|
2413 | *yBUF = (unsigned char *)rax_malloc( ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)); |
---|
2414 | |
---|
2415 | for (i = 1; i <= rdta->numsp; i++) |
---|
2416 | for (j = 0; j < cdta->endsite; j++) |
---|
2417 | y[(((size_t)(i - 1)) * ((size_t)cdta->endsite)) + j] = rdta->y[i][cdta->alias[j]]; |
---|
2418 | |
---|
2419 | rax_free(rdta->y0); |
---|
2420 | rax_free(rdta->y); |
---|
2421 | |
---|
2422 | rdta->y0 = y; |
---|
2423 | memcpy(yBUF, y, ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)); |
---|
2424 | rdta->yBUF = yBUF; |
---|
2425 | |
---|
2426 | if(!adef->useMultipleModel) |
---|
2427 | tr->NumberOfModels = 1; |
---|
2428 | |
---|
2429 | if(adef->useMultipleModel) |
---|
2430 | { |
---|
2431 | tr->partitionData[0].lower = 0; |
---|
2432 | |
---|
2433 | model = tr->model[0]; |
---|
2434 | modelCounter = 0; |
---|
2435 | |
---|
2436 | i = 1; |
---|
2437 | |
---|
2438 | while(i < cdta->endsite) |
---|
2439 | { |
---|
2440 | if(tr->model[i] != model) |
---|
2441 | { |
---|
2442 | tr->partitionData[modelCounter].upper = i; |
---|
2443 | tr->partitionData[modelCounter + 1].lower = i; |
---|
2444 | |
---|
2445 | model = tr->model[i]; |
---|
2446 | modelCounter++; |
---|
2447 | } |
---|
2448 | i++; |
---|
2449 | } |
---|
2450 | |
---|
2451 | tr->partitionData[tr->NumberOfModels - 1].upper = cdta->endsite; |
---|
2452 | |
---|
2453 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
2454 | tr->partitionData[i].width = tr->partitionData[i].upper - tr->partitionData[i].lower; |
---|
2455 | |
---|
2456 | model = tr->model[0]; |
---|
2457 | modelCounter = 0; |
---|
2458 | tr->model[0] = modelCounter; |
---|
2459 | i = 1; |
---|
2460 | |
---|
2461 | while(i < cdta->endsite) |
---|
2462 | { |
---|
2463 | if(tr->model[i] != model) |
---|
2464 | { |
---|
2465 | model = tr->model[i]; |
---|
2466 | modelCounter++; |
---|
2467 | tr->model[i] = modelCounter; |
---|
2468 | } |
---|
2469 | else |
---|
2470 | tr->model[i] = modelCounter; |
---|
2471 | i++; |
---|
2472 | } |
---|
2473 | } |
---|
2474 | else |
---|
2475 | { |
---|
2476 | tr->partitionData[0].lower = 0; |
---|
2477 | tr->partitionData[0].upper = cdta->endsite; |
---|
2478 | tr->partitionData[0].width = tr->partitionData[0].upper - tr->partitionData[0].lower; |
---|
2479 | } |
---|
2480 | |
---|
2481 | tr->rdta = rdta; |
---|
2482 | tr->cdta = cdta; |
---|
2483 | |
---|
2484 | tr->invariant = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2485 | tr->originalDataVector = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2486 | tr->originalModel = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2487 | tr->originalWeights = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2488 | |
---|
2489 | memcpy(tr->originalModel, tr->model, cdta->endsite * sizeof(int)); |
---|
2490 | memcpy(tr->originalDataVector, tr->dataVector, cdta->endsite * sizeof(int)); |
---|
2491 | memcpy(tr->originalWeights, tr->cdta->aliaswgt, cdta->endsite * sizeof(int)); |
---|
2492 | |
---|
2493 | |
---|
2494 | tr->originalCrunchedLength = tr->cdta->endsite; |
---|
2495 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
2496 | fullSites += tr->cdta->aliaswgt[i]; |
---|
2497 | |
---|
2498 | tr->fullSites = fullSites; |
---|
2499 | |
---|
2500 | for(i = 0; i < rdta->numsp; i++) |
---|
2501 | tr->yVector[i + 1] = &(rdta->y0[((size_t)tr->originalCrunchedLength) * ((size_t)i)]); |
---|
2502 | |
---|
2503 | return TRUE; |
---|
2504 | } |
---|
2505 | |
---|
2506 | |
---|
2507 | |
---|
2508 | |
---|
2509 | |
---|
2510 | |
---|
2511 | |
---|
2512 | |
---|
2513 | static int sequenceSimilarity(unsigned char *tipJ, unsigned char *tipK, int n) |
---|
2514 | { |
---|
2515 | int i; |
---|
2516 | |
---|
2517 | for(i = 0; i < n; i++) |
---|
2518 | if(*tipJ++ != *tipK++) |
---|
2519 | return 0; |
---|
2520 | |
---|
2521 | return 1; |
---|
2522 | } |
---|
2523 | |
---|
2524 | static void checkSequences(tree *tr, rawdata *rdta, analdef *adef) |
---|
2525 | { |
---|
2526 | int n = tr->mxtips + 1; |
---|
2527 | int i, j; |
---|
2528 | int *omissionList = (int *)rax_calloc(n, sizeof(int)); |
---|
2529 | int *undeterminedList = (int *)rax_calloc((rdta->sites + 1), sizeof(int)); |
---|
2530 | int *modelList = (int *)rax_malloc((rdta->sites + 1)* sizeof(int)); |
---|
2531 | int count = 0; |
---|
2532 | int countNameDuplicates = 0; |
---|
2533 | int countUndeterminedColumns = 0; |
---|
2534 | int countOnlyGaps = 0; |
---|
2535 | int modelCounter = 1; |
---|
2536 | unsigned char *tipI, *tipJ; |
---|
2537 | |
---|
2538 | for(i = 1; i < n; i++) |
---|
2539 | { |
---|
2540 | for(j = i + 1; j < n; j++) |
---|
2541 | if(strcmp(tr->nameList[i], tr->nameList[j]) == 0) |
---|
2542 | { |
---|
2543 | countNameDuplicates++; |
---|
2544 | if(processID == 0) |
---|
2545 | printBothOpen("Sequence names of taxon %d and %d are identical, they are both called %s\n", i, j, tr->nameList[i]); |
---|
2546 | } |
---|
2547 | } |
---|
2548 | |
---|
2549 | if(countNameDuplicates > 0) |
---|
2550 | { |
---|
2551 | if(processID == 0) |
---|
2552 | printBothOpen("ERROR: Found %d taxa that had equal names in the alignment, exiting...\n", countNameDuplicates); |
---|
2553 | errorExit(-1); |
---|
2554 | } |
---|
2555 | |
---|
2556 | if(adef->checkForUndeterminedSequences) |
---|
2557 | { |
---|
2558 | for(i = 1; i < n; i++) |
---|
2559 | { |
---|
2560 | j = 1; |
---|
2561 | |
---|
2562 | while(j <= rdta->sites) |
---|
2563 | { |
---|
2564 | if(rdta->y[i][j] != getUndetermined(tr->dataVector[j])) |
---|
2565 | break; |
---|
2566 | |
---|
2567 | j++; |
---|
2568 | } |
---|
2569 | |
---|
2570 | if(j == (rdta->sites + 1)) |
---|
2571 | { |
---|
2572 | if(processID == 0) |
---|
2573 | printBothOpen("ERROR: Sequence %s consists entirely of undetermined values which will be treated as missing data\n", |
---|
2574 | tr->nameList[i]); |
---|
2575 | |
---|
2576 | countOnlyGaps++; |
---|
2577 | } |
---|
2578 | } |
---|
2579 | |
---|
2580 | if(countOnlyGaps > 0) |
---|
2581 | { |
---|
2582 | if(processID == 0) |
---|
2583 | printBothOpen("ERROR: Found %d sequences that consist entirely of undetermined values, exiting...\n", countOnlyGaps); |
---|
2584 | |
---|
2585 | errorExit(-1); |
---|
2586 | } |
---|
2587 | } |
---|
2588 | |
---|
2589 | for(i = 0; i <= rdta->sites; i++) |
---|
2590 | modelList[i] = -1; |
---|
2591 | |
---|
2592 | for(i = 1; i <= rdta->sites; i++) |
---|
2593 | { |
---|
2594 | j = 1; |
---|
2595 | |
---|
2596 | while(j < n) |
---|
2597 | { |
---|
2598 | if(rdta->y[j][i] != getUndetermined(tr->dataVector[i])) |
---|
2599 | break; |
---|
2600 | |
---|
2601 | |
---|
2602 | j++; |
---|
2603 | } |
---|
2604 | |
---|
2605 | if(j == n) |
---|
2606 | { |
---|
2607 | undeterminedList[i] = 1; |
---|
2608 | |
---|
2609 | if(processID == 0) |
---|
2610 | printBothOpen("IMPORTANT WARNING: Alignment column %d contains only undetermined values which will be treated as missing data\n", i); |
---|
2611 | |
---|
2612 | countUndeterminedColumns++; |
---|
2613 | } |
---|
2614 | else |
---|
2615 | { |
---|
2616 | if(adef->useMultipleModel) |
---|
2617 | { |
---|
2618 | modelList[modelCounter] = tr->model[i]; |
---|
2619 | modelCounter++; |
---|
2620 | } |
---|
2621 | } |
---|
2622 | } |
---|
2623 | |
---|
2624 | |
---|
2625 | for(i = 1; i < n; i++) |
---|
2626 | { |
---|
2627 | if(omissionList[i] == 0) |
---|
2628 | { |
---|
2629 | tipI = &(rdta->y[i][1]); |
---|
2630 | |
---|
2631 | for(j = i + 1; j < n; j++) |
---|
2632 | { |
---|
2633 | if(omissionList[j] == 0) |
---|
2634 | { |
---|
2635 | tipJ = &(rdta->y[j][1]); |
---|
2636 | if(sequenceSimilarity(tipI, tipJ, rdta->sites)) |
---|
2637 | { |
---|
2638 | if(processID == 0) |
---|
2639 | printBothOpen("\n\nIMPORTANT WARNING: Sequences %s and %s are exactly identical\n", tr->nameList[i], tr->nameList[j]); |
---|
2640 | |
---|
2641 | omissionList[j] = 1; |
---|
2642 | count++; |
---|
2643 | } |
---|
2644 | } |
---|
2645 | } |
---|
2646 | } |
---|
2647 | } |
---|
2648 | |
---|
2649 | if(count > 0 || countUndeterminedColumns > 0) |
---|
2650 | { |
---|
2651 | char noDupFile[2048]; |
---|
2652 | char noDupModels[2048]; |
---|
2653 | char noDupSecondary[2048]; |
---|
2654 | |
---|
2655 | if(count > 0 &&processID == 0) |
---|
2656 | { |
---|
2657 | printBothOpen("\nIMPORTANT WARNING\n"); |
---|
2658 | |
---|
2659 | printBothOpen("Found %d %s that %s exactly identical to other sequences in the alignment.\n", count, (count == 1)?"sequence":"sequences", (count == 1)?"is":"are"); |
---|
2660 | |
---|
2661 | printBothOpen("Normally they should be excluded from the analysis.\n\n"); |
---|
2662 | } |
---|
2663 | |
---|
2664 | if(countUndeterminedColumns > 0 && processID == 0) |
---|
2665 | { |
---|
2666 | printBothOpen("\nIMPORTANT WARNING\n"); |
---|
2667 | |
---|
2668 | printBothOpen("Found %d %s that %s only undetermined values which will be treated as missing data.\n", |
---|
2669 | countUndeterminedColumns, (countUndeterminedColumns == 1)?"column":"columns", (countUndeterminedColumns == 1)?"contains":"contain"); |
---|
2670 | |
---|
2671 | printBothOpen("Normally these columns should be excluded from the analysis.\n\n"); |
---|
2672 | } |
---|
2673 | |
---|
2674 | strcpy(noDupFile, seq_file); |
---|
2675 | strcat(noDupFile, ".reduced"); |
---|
2676 | |
---|
2677 | strcpy(noDupModels, modelFileName); |
---|
2678 | strcat(noDupModels, ".reduced"); |
---|
2679 | |
---|
2680 | strcpy(noDupSecondary, secondaryStructureFileName); |
---|
2681 | strcat(noDupSecondary, ".reduced"); |
---|
2682 | |
---|
2683 | if(processID == 0) |
---|
2684 | { |
---|
2685 | if(adef->useSecondaryStructure) |
---|
2686 | { |
---|
2687 | if(countUndeterminedColumns && !filexists(noDupSecondary)) |
---|
2688 | { |
---|
2689 | FILE *newFile = myfopen(noDupSecondary, "wb"); |
---|
2690 | int count; |
---|
2691 | |
---|
2692 | printBothOpen("\nJust in case you might need it, a secondary structure file with \n"); |
---|
2693 | printBothOpen("structure assignments for undetermined columns removed is printed to file %s\n",noDupSecondary); |
---|
2694 | |
---|
2695 | for(i = 1, count = 0; i <= rdta->sites; i++) |
---|
2696 | { |
---|
2697 | if(undeterminedList[i] == 0) |
---|
2698 | fprintf(newFile, "%c", tr->secondaryStructureInput[i - 1]); |
---|
2699 | else |
---|
2700 | count++; |
---|
2701 | } |
---|
2702 | |
---|
2703 | assert(count == countUndeterminedColumns); |
---|
2704 | |
---|
2705 | fprintf(newFile,"\n"); |
---|
2706 | |
---|
2707 | fclose(newFile); |
---|
2708 | } |
---|
2709 | else |
---|
2710 | { |
---|
2711 | if(countUndeterminedColumns) |
---|
2712 | { |
---|
2713 | printBothOpen("\nA secondary structure file with model assignments for undetermined\n"); |
---|
2714 | printBothOpen("columns removed has already been printed to file %s\n",noDupSecondary); |
---|
2715 | } |
---|
2716 | } |
---|
2717 | } |
---|
2718 | |
---|
2719 | |
---|
2720 | if(adef->useMultipleModel && !filexists(noDupModels) && countUndeterminedColumns) |
---|
2721 | { |
---|
2722 | FILE *newFile = myfopen(noDupModels, "wb"); |
---|
2723 | |
---|
2724 | printBothOpen("\nJust in case you might need it, a mixed model file with \n"); |
---|
2725 | printBothOpen("model assignments for undetermined columns removed is printed to file %s\n",noDupModels); |
---|
2726 | |
---|
2727 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
2728 | { |
---|
2729 | boolean modelStillExists = FALSE; |
---|
2730 | |
---|
2731 | for(j = 1; (j <= rdta->sites) && (!modelStillExists); j++) |
---|
2732 | { |
---|
2733 | if(modelList[j] == i) |
---|
2734 | modelStillExists = TRUE; |
---|
2735 | } |
---|
2736 | |
---|
2737 | if(modelStillExists) |
---|
2738 | { |
---|
2739 | int k = 1; |
---|
2740 | int lower, upper; |
---|
2741 | int parts = 0; |
---|
2742 | |
---|
2743 | |
---|
2744 | switch(tr->partitionData[i].dataType) |
---|
2745 | { |
---|
2746 | case AA_DATA: |
---|
2747 | { |
---|
2748 | char |
---|
2749 | AAmodel[1024]; |
---|
2750 | |
---|
2751 | if(tr->partitionData[i].protModels != PROT_FILE) |
---|
2752 | { |
---|
2753 | strcpy(AAmodel, protModels[tr->partitionData[i].protModels]); |
---|
2754 | if(tr->partitionData[i].usePredefinedProtFreqs == FALSE) |
---|
2755 | strcat(AAmodel, "F"); |
---|
2756 | |
---|
2757 | fprintf(newFile, "%s, ", AAmodel); |
---|
2758 | } |
---|
2759 | else |
---|
2760 | fprintf(newFile, "[%s], ", tr->partitionData[i].proteinSubstitutionFileName); |
---|
2761 | } |
---|
2762 | break; |
---|
2763 | case DNA_DATA: |
---|
2764 | fprintf(newFile, "DNA, "); |
---|
2765 | break; |
---|
2766 | case BINARY_DATA: |
---|
2767 | fprintf(newFile, "BIN, "); |
---|
2768 | break; |
---|
2769 | case GENERIC_32: |
---|
2770 | fprintf(newFile, "MULTI, "); |
---|
2771 | break; |
---|
2772 | case GENERIC_64: |
---|
2773 | fprintf(newFile, "CODON, "); |
---|
2774 | break; |
---|
2775 | default: |
---|
2776 | assert(0); |
---|
2777 | } |
---|
2778 | |
---|
2779 | fprintf(newFile, "%s = ", tr->partitionData[i].partitionName); |
---|
2780 | |
---|
2781 | while(k <= rdta->sites) |
---|
2782 | { |
---|
2783 | if(modelList[k] == i) |
---|
2784 | { |
---|
2785 | lower = k; |
---|
2786 | while((modelList[k + 1] == i) && (k <= rdta->sites)) |
---|
2787 | k++; |
---|
2788 | upper = k; |
---|
2789 | |
---|
2790 | if(lower == upper) |
---|
2791 | { |
---|
2792 | if(parts == 0) |
---|
2793 | fprintf(newFile, "%d", lower); |
---|
2794 | else |
---|
2795 | fprintf(newFile, ",%d", lower); |
---|
2796 | } |
---|
2797 | else |
---|
2798 | { |
---|
2799 | if(parts == 0) |
---|
2800 | fprintf(newFile, "%d-%d", lower, upper); |
---|
2801 | else |
---|
2802 | fprintf(newFile, ",%d-%d", lower, upper); |
---|
2803 | } |
---|
2804 | parts++; |
---|
2805 | } |
---|
2806 | k++; |
---|
2807 | } |
---|
2808 | fprintf(newFile, "\n"); |
---|
2809 | } |
---|
2810 | } |
---|
2811 | fclose(newFile); |
---|
2812 | } |
---|
2813 | else |
---|
2814 | { |
---|
2815 | if(adef->useMultipleModel) |
---|
2816 | { |
---|
2817 | printBothOpen("\nA mixed model file with model assignments for undetermined\n"); |
---|
2818 | printBothOpen("columns removed has already been printed to file %s\n",noDupModels); |
---|
2819 | } |
---|
2820 | } |
---|
2821 | |
---|
2822 | |
---|
2823 | if(!filexists(noDupFile)) |
---|
2824 | { |
---|
2825 | FILE *newFile; |
---|
2826 | |
---|
2827 | printBothOpen("Just in case you might need it, an alignment file with \n"); |
---|
2828 | if(count && !countUndeterminedColumns) |
---|
2829 | printBothOpen("sequence duplicates removed is printed to file %s\n", noDupFile); |
---|
2830 | if(!count && countUndeterminedColumns) |
---|
2831 | printBothOpen("undetermined columns removed is printed to file %s\n", noDupFile); |
---|
2832 | if(count && countUndeterminedColumns) |
---|
2833 | printBothOpen("sequence duplicates and undetermined columns removed is printed to file %s\n", noDupFile); |
---|
2834 | |
---|
2835 | newFile = myfopen(noDupFile, "wb"); |
---|
2836 | |
---|
2837 | fprintf(newFile, "%d %d\n", tr->mxtips - count, rdta->sites - countUndeterminedColumns); |
---|
2838 | |
---|
2839 | for(i = 1; i < n; i++) |
---|
2840 | { |
---|
2841 | if(!omissionList[i]) |
---|
2842 | { |
---|
2843 | fprintf(newFile, "%s ", tr->nameList[i]); |
---|
2844 | tipI = &(rdta->y[i][1]); |
---|
2845 | |
---|
2846 | for(j = 0; j < rdta->sites; j++) |
---|
2847 | { |
---|
2848 | if(undeterminedList[j + 1] == 0) |
---|
2849 | fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j])); |
---|
2850 | } |
---|
2851 | |
---|
2852 | fprintf(newFile, "\n"); |
---|
2853 | } |
---|
2854 | } |
---|
2855 | |
---|
2856 | fclose(newFile); |
---|
2857 | } |
---|
2858 | else |
---|
2859 | { |
---|
2860 | if(count && !countUndeterminedColumns) |
---|
2861 | printBothOpen("An alignment file with sequence duplicates removed has already\n"); |
---|
2862 | if(!count && countUndeterminedColumns) |
---|
2863 | printBothOpen("An alignment file with undetermined columns removed has already\n"); |
---|
2864 | if(count && countUndeterminedColumns) |
---|
2865 | printBothOpen("An alignment file with undetermined columns and sequence duplicates removed has already\n"); |
---|
2866 | |
---|
2867 | printBothOpen("been printed to file %s\n", noDupFile); |
---|
2868 | } |
---|
2869 | } |
---|
2870 | } |
---|
2871 | |
---|
2872 | rax_free(undeterminedList); |
---|
2873 | rax_free(omissionList); |
---|
2874 | rax_free(modelList); |
---|
2875 | } |
---|
2876 | |
---|
2877 | |
---|
2878 | |
---|
2879 | |
---|
2880 | |
---|
2881 | |
---|
2882 | |
---|
2883 | static void generateBS(tree *tr, analdef *adef) |
---|
2884 | { |
---|
2885 | int |
---|
2886 | i, |
---|
2887 | j, |
---|
2888 | k, |
---|
2889 | w; |
---|
2890 | |
---|
2891 | char outName[1024], buf[16]; |
---|
2892 | FILE *of; |
---|
2893 | |
---|
2894 | assert(adef->boot != 0); |
---|
2895 | |
---|
2896 | for(i = 0; i < adef->multipleRuns; i++) |
---|
2897 | { |
---|
2898 | int |
---|
2899 | count = 0; |
---|
2900 | |
---|
2901 | computeNextReplicate(tr, &adef->boot, (int*)NULL, (int*)NULL, FALSE, FALSE); |
---|
2902 | |
---|
2903 | count = 0; |
---|
2904 | for(j = 0; j < tr->cdta->endsite; j++) |
---|
2905 | count += tr->cdta->aliaswgt[j]; |
---|
2906 | |
---|
2907 | assert(count == tr->rdta->sites); |
---|
2908 | |
---|
2909 | strcpy(outName, workdir); |
---|
2910 | strcat(outName, seq_file); |
---|
2911 | strcat(outName, ".BS"); |
---|
2912 | sprintf(buf, "%d", i); |
---|
2913 | strcat(outName, buf); |
---|
2914 | printf("Printing replicate %d to %s\n", i, outName); |
---|
2915 | |
---|
2916 | of = myfopen(outName, "wb"); |
---|
2917 | |
---|
2918 | fprintf(of, "%d %d\n", tr->mxtips, count); |
---|
2919 | |
---|
2920 | for(j = 1; j <= tr->mxtips; j++) |
---|
2921 | { |
---|
2922 | unsigned char *tip = tr->yVector[tr->nodep[j]->number]; |
---|
2923 | fprintf(of, "%s ", tr->nameList[j]); |
---|
2924 | |
---|
2925 | for(k = 0; k < tr->cdta->endsite; k++) |
---|
2926 | { |
---|
2927 | for(w = 0; w < tr->cdta->aliaswgt[k]; w++) |
---|
2928 | fprintf(of, "%c", getInverseMeaning(tr->dataVector[k], tip[k])); |
---|
2929 | } |
---|
2930 | |
---|
2931 | fprintf(of, "\n"); |
---|
2932 | } |
---|
2933 | fclose(of); |
---|
2934 | } |
---|
2935 | } |
---|
2936 | |
---|
2937 | |
---|
2938 | |
---|
2939 | |
---|
2940 | |
---|
2941 | static void splitMultiGene(tree *tr, rawdata *rdta) |
---|
2942 | { |
---|
2943 | int i, l; |
---|
2944 | int n = rdta->sites + 1; |
---|
2945 | int *modelFilter = (int *)rax_malloc(sizeof(int) * n); |
---|
2946 | int length, k; |
---|
2947 | unsigned char *tip; |
---|
2948 | FILE *outf; |
---|
2949 | char outFileName[2048]; |
---|
2950 | |
---|
2951 | /* char buf[16]; */ |
---|
2952 | |
---|
2953 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
2954 | { |
---|
2955 | strcpy(outFileName, seq_file); |
---|
2956 | |
---|
2957 | /*sprintf(buf, "%d", i);*/ |
---|
2958 | /*strcat(outFileName, ".GENE.");*/ |
---|
2959 | |
---|
2960 | strcat(outFileName, "."); |
---|
2961 | strcat(outFileName, tr->partitionData[i].partitionName); |
---|
2962 | strcat(outFileName, ".phy"); |
---|
2963 | |
---|
2964 | /*strcat(outFileName, buf);*/ |
---|
2965 | |
---|
2966 | outf = myfopen(outFileName, "wb"); |
---|
2967 | |
---|
2968 | length = 0; |
---|
2969 | |
---|
2970 | for(k = 1; k < n; k++) |
---|
2971 | { |
---|
2972 | if(tr->model[k] == i) |
---|
2973 | { |
---|
2974 | modelFilter[k] = 1; |
---|
2975 | length++; |
---|
2976 | } |
---|
2977 | else |
---|
2978 | modelFilter[k] = -1; |
---|
2979 | } |
---|
2980 | |
---|
2981 | fprintf(outf, "%d %d\n", rdta->numsp, length); |
---|
2982 | |
---|
2983 | for(l = 1; l <= rdta->numsp; l++) |
---|
2984 | { |
---|
2985 | fprintf(outf, "%s ", tr->nameList[l]); |
---|
2986 | |
---|
2987 | tip = &(rdta->y[l][0]); |
---|
2988 | |
---|
2989 | for(k = 1; k < n; k++) |
---|
2990 | { |
---|
2991 | if(modelFilter[k] == 1) |
---|
2992 | fprintf(outf, "%c", getInverseMeaning(tr->dataVector[k], tip[k])); |
---|
2993 | } |
---|
2994 | fprintf(outf, "\n"); |
---|
2995 | |
---|
2996 | } |
---|
2997 | |
---|
2998 | fclose(outf); |
---|
2999 | |
---|
3000 | printf("Wrote individual gene/partition alignment to file %s\n", outFileName); |
---|
3001 | } |
---|
3002 | |
---|
3003 | rax_free(modelFilter); |
---|
3004 | printf("Wrote all %d individual gene/partition alignments\n", tr->NumberOfModels); |
---|
3005 | printf("Exiting normally\n"); |
---|
3006 | } |
---|
3007 | |
---|
3008 | |
---|
3009 | static int countTaxaInTopology(void) |
---|
3010 | { |
---|
3011 | FILE |
---|
3012 | *f = myfopen(tree_file, "rb"); |
---|
3013 | |
---|
3014 | int |
---|
3015 | c, |
---|
3016 | taxaCount = 0; |
---|
3017 | |
---|
3018 | while((c = fgetc(f)) != EOF) |
---|
3019 | { |
---|
3020 | if(c == '(' || c == ',') |
---|
3021 | { |
---|
3022 | c = fgetc(f); |
---|
3023 | if(c == '(' || c == ',') |
---|
3024 | ungetc(c, f); |
---|
3025 | else |
---|
3026 | { |
---|
3027 | do |
---|
3028 | { |
---|
3029 | c = fgetc(f); |
---|
3030 | } |
---|
3031 | while(c != ':' && c != ')' && c != ','); |
---|
3032 | |
---|
3033 | taxaCount++; |
---|
3034 | |
---|
3035 | ungetc(c, f); |
---|
3036 | } |
---|
3037 | } |
---|
3038 | } |
---|
3039 | |
---|
3040 | printBothOpen("Found a total of %d taxa in tree file %s\n", taxaCount, tree_file); |
---|
3041 | |
---|
3042 | fclose(f); |
---|
3043 | |
---|
3044 | return taxaCount; |
---|
3045 | } |
---|
3046 | |
---|
3047 | |
---|
3048 | |
---|
3049 | |
---|
3050 | |
---|
3051 | |
---|
3052 | |
---|
3053 | static void allocPartitions(tree *tr) |
---|
3054 | { |
---|
3055 | int |
---|
3056 | i, |
---|
3057 | maxCategories = tr->maxCategories; |
---|
3058 | |
---|
3059 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
3060 | { |
---|
3061 | const partitionLengths |
---|
3062 | *pl = getPartitionLengths(&(tr->partitionData[i])); |
---|
3063 | |
---|
3064 | if(tr->useFastScaling) |
---|
3065 | tr->partitionData[i].globalScaler = (unsigned int *)rax_calloc(2 * tr->mxtips, sizeof(unsigned int)); |
---|
3066 | |
---|
3067 | |
---|
3068 | tr->partitionData[i].left = (double *)rax_malloc(pl->leftLength * (maxCategories + 1) * sizeof(double)); |
---|
3069 | tr->partitionData[i].right = (double *)rax_malloc(pl->rightLength * (maxCategories + 1) * sizeof(double)); |
---|
3070 | tr->partitionData[i].EIGN = (double*)rax_malloc(pl->eignLength * sizeof(double)); |
---|
3071 | tr->partitionData[i].EV = (double*)rax_malloc(pl->evLength * sizeof(double)); |
---|
3072 | tr->partitionData[i].EI = (double*)rax_malloc(pl->eiLength * sizeof(double)); |
---|
3073 | tr->partitionData[i].substRates = (double *)rax_malloc(pl->substRatesLength * sizeof(double)); |
---|
3074 | tr->partitionData[i].frequencies = (double*)rax_malloc(pl->frequenciesLength * sizeof(double)); |
---|
3075 | tr->partitionData[i].tipVector = (double *)rax_malloc(pl->tipVectorLength * sizeof(double)); |
---|
3076 | |
---|
3077 | |
---|
3078 | if(tr->partitionData[i].protModels == LG4 || tr->partitionData[i].protModels == LG4X) |
---|
3079 | { |
---|
3080 | int |
---|
3081 | k; |
---|
3082 | |
---|
3083 | for(k = 0; k < 4; k++) |
---|
3084 | { |
---|
3085 | tr->partitionData[i].EIGN_LG4[k] = (double*)rax_malloc(pl->eignLength * sizeof(double)); |
---|
3086 | tr->partitionData[i].EV_LG4[k] = (double*)rax_malloc(pl->evLength * sizeof(double)); |
---|
3087 | tr->partitionData[i].EI_LG4[k] = (double*)rax_malloc(pl->eiLength * sizeof(double)); |
---|
3088 | tr->partitionData[i].substRates_LG4[k] = (double *)rax_malloc(pl->substRatesLength * sizeof(double)); |
---|
3089 | tr->partitionData[i].frequencies_LG4[k] = (double*)rax_malloc(pl->frequenciesLength * sizeof(double)); |
---|
3090 | tr->partitionData[i].tipVector_LG4[k] = (double *)rax_malloc(pl->tipVectorLength * sizeof(double)); |
---|
3091 | } |
---|
3092 | } |
---|
3093 | |
---|
3094 | |
---|
3095 | tr->partitionData[i].symmetryVector = (int *)rax_malloc(pl->symmetryVectorLength * sizeof(int)); |
---|
3096 | tr->partitionData[i].frequencyGrouping = (int *)rax_malloc(pl->frequencyGroupingLength * sizeof(int)); |
---|
3097 | tr->partitionData[i].perSiteRates = (double *)rax_malloc(sizeof(double) * tr->maxCategories); |
---|
3098 | tr->partitionData[i].unscaled_perSiteRates = (double *)rax_malloc(sizeof(double) * tr->maxCategories); |
---|
3099 | |
---|
3100 | |
---|
3101 | tr->partitionData[i].nonGTR = FALSE; |
---|
3102 | |
---|
3103 | |
---|
3104 | |
---|
3105 | tr->partitionData[i].gammaRates = (double*)rax_malloc(sizeof(double) * 4); |
---|
3106 | tr->partitionData[i].yVector = (unsigned char **)rax_malloc(sizeof(unsigned char*) * (tr->mxtips + 1)); |
---|
3107 | |
---|
3108 | |
---|
3109 | tr->partitionData[i].xVector = (double **)rax_malloc(sizeof(double*) * tr->innerNodes); |
---|
3110 | tr->partitionData[i].xSpaceVector = (size_t *)rax_calloc(tr->innerNodes, sizeof(size_t)); |
---|
3111 | |
---|
3112 | tr->partitionData[i].expVector = (int **)rax_malloc(sizeof(int*) * tr->innerNodes); |
---|
3113 | tr->partitionData[i].expSpaceVector = (size_t *)rax_calloc(tr->innerNodes, sizeof(size_t)); |
---|
3114 | |
---|
3115 | tr->partitionData[i].mxtips = tr->mxtips; |
---|
3116 | |
---|
3117 | |
---|
3118 | |
---|
3119 | |
---|
3120 | #ifndef _USE_PTHREADS |
---|
3121 | { |
---|
3122 | int j; |
---|
3123 | |
---|
3124 | for(j = 1; j <= tr->mxtips; j++) |
---|
3125 | tr->partitionData[i].yVector[j] = &(tr->yVector[j][tr->partitionData[i].lower]); |
---|
3126 | } |
---|
3127 | #endif |
---|
3128 | |
---|
3129 | } |
---|
3130 | } |
---|
3131 | |
---|
3132 | #ifndef _USE_PTHREADS |
---|
3133 | |
---|
3134 | |
---|
3135 | |
---|
3136 | |
---|
3137 | |
---|
3138 | static void allocNodex (tree *tr) |
---|
3139 | { |
---|
3140 | size_t |
---|
3141 | i, |
---|
3142 | model, |
---|
3143 | offset, |
---|
3144 | memoryRequirements = 0; |
---|
3145 | |
---|
3146 | allocPartitions(tr); |
---|
3147 | |
---|
3148 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
3149 | { |
---|
3150 | size_t |
---|
3151 | width = tr->partitionData[model].upper - tr->partitionData[model].lower; |
---|
3152 | |
---|
3153 | int |
---|
3154 | undetermined, |
---|
3155 | j; |
---|
3156 | |
---|
3157 | memoryRequirements += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width; |
---|
3158 | |
---|
3159 | tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1; |
---|
3160 | |
---|
3161 | tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int)); |
---|
3162 | |
---|
3163 | |
---|
3164 | tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int); |
---|
3165 | |
---|
3166 | /* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */ |
---|
3167 | |
---|
3168 | tr->partitionData[model].gapColumn = (double *)rax_malloc(((size_t)tr->innerNodes) * |
---|
3169 | ((size_t)4) * |
---|
3170 | ((size_t)(tr->partitionData[model].states)) * |
---|
3171 | sizeof(double)); |
---|
3172 | |
---|
3173 | undetermined = getUndetermined(tr->partitionData[model].dataType); |
---|
3174 | |
---|
3175 | for(j = 1; j <= tr->mxtips; j++) |
---|
3176 | for(i = 0; i < width; i++) |
---|
3177 | if(tr->partitionData[model].yVector[j][i] == undetermined) |
---|
3178 | tr->partitionData[model].gapVector[tr->partitionData[model].gapVectorLength * j + i / 32] |= mask32[i % 32]; |
---|
3179 | } |
---|
3180 | |
---|
3181 | tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double)); |
---|
3182 | assert(tr->perSiteLL != NULL); |
---|
3183 | |
---|
3184 | tr->sumBuffer = (double *)rax_malloc(memoryRequirements * sizeof(double)); |
---|
3185 | assert(tr->sumBuffer != NULL); |
---|
3186 | |
---|
3187 | offset = 0; |
---|
3188 | |
---|
3189 | /* C-OPT for initial testing tr->NumberOfModels will be 1 */ |
---|
3190 | |
---|
3191 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
3192 | { |
---|
3193 | size_t |
---|
3194 | lower = tr->partitionData[model].lower, |
---|
3195 | width = tr->partitionData[model].upper - lower; |
---|
3196 | |
---|
3197 | /* TODO all of this must be reset/adapted when fixModelIndices is called ! */ |
---|
3198 | |
---|
3199 | |
---|
3200 | tr->partitionData[model].sumBuffer = &tr->sumBuffer[offset]; |
---|
3201 | |
---|
3202 | |
---|
3203 | tr->partitionData[model].perSiteLL = &tr->perSiteLL[lower]; |
---|
3204 | |
---|
3205 | |
---|
3206 | tr->partitionData[model].wgt = &tr->cdta->aliaswgt[lower]; |
---|
3207 | tr->partitionData[model].invariant = &tr->invariant[lower]; |
---|
3208 | tr->partitionData[model].rateCategory = &tr->cdta->rateCategory[lower]; |
---|
3209 | |
---|
3210 | offset += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width; |
---|
3211 | } |
---|
3212 | |
---|
3213 | for(i = 0; i < tr->innerNodes; i++) |
---|
3214 | { |
---|
3215 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
3216 | { |
---|
3217 | tr->partitionData[model].expVector[i] = (int*)NULL; |
---|
3218 | tr->partitionData[model].xVector[i] = (double*)NULL; |
---|
3219 | } |
---|
3220 | } |
---|
3221 | } |
---|
3222 | |
---|
3223 | #endif |
---|
3224 | |
---|
3225 | |
---|
3226 | static void initAdef(analdef *adef) |
---|
3227 | { |
---|
3228 | adef->useSecondaryStructure = FALSE; |
---|
3229 | adef->bootstrapBranchLengths = FALSE; |
---|
3230 | adef->model = M_GTRCAT; |
---|
3231 | adef->max_rearrange = 21; |
---|
3232 | adef->stepwidth = 5; |
---|
3233 | adef->initial = adef->bestTrav = 10; |
---|
3234 | adef->initialSet = FALSE; |
---|
3235 | adef->restart = FALSE; |
---|
3236 | adef->mode = BIG_RAPID_MODE; |
---|
3237 | adef->categories = 25; |
---|
3238 | adef->boot = 0; |
---|
3239 | adef->rapidBoot = 0; |
---|
3240 | adef->useWeightFile = FALSE; |
---|
3241 | adef->checkpoints = 0; |
---|
3242 | adef->startingTreeOnly = 0; |
---|
3243 | adef->multipleRuns = 1; |
---|
3244 | adef->useMultipleModel = FALSE; |
---|
3245 | adef->likelihoodEpsilon = 0.1; |
---|
3246 | adef->constraint = FALSE; |
---|
3247 | adef->grouping = FALSE; |
---|
3248 | adef->randomStartingTree = FALSE; |
---|
3249 | adef->parsimonySeed = 0; |
---|
3250 | adef->proteinMatrix = JTT; |
---|
3251 | adef->protEmpiricalFreqs = 0; |
---|
3252 | adef->outgroup = FALSE; |
---|
3253 | adef->useInvariant = FALSE; |
---|
3254 | adef->permuteTreeoptimize = FALSE; |
---|
3255 | adef->useInvariant = FALSE; |
---|
3256 | adef->allInOne = FALSE; |
---|
3257 | adef->likelihoodTest = FALSE; |
---|
3258 | adef->perGeneBranchLengths = FALSE; |
---|
3259 | adef->generateBS = FALSE; |
---|
3260 | adef->bootStopping = FALSE; |
---|
3261 | adef->gapyness = 0.0; |
---|
3262 | adef->similarityFilterMode = 0; |
---|
3263 | adef->useExcludeFile = FALSE; |
---|
3264 | adef->userProteinModel = FALSE; |
---|
3265 | adef->computeELW = FALSE; |
---|
3266 | adef->computeDistance = FALSE; |
---|
3267 | adef->compressPatterns = TRUE; |
---|
3268 | adef->readTaxaOnly = FALSE; |
---|
3269 | adef->useBinaryModelFile = FALSE; |
---|
3270 | adef->leaveDropMode = FALSE; |
---|
3271 | adef->slidingWindowSize = 100; |
---|
3272 | adef->checkForUndeterminedSequences = TRUE; |
---|
3273 | adef->useQuartetGrouping = FALSE; |
---|
3274 | adef->alignmentFileType = PHYLIP; |
---|
3275 | adef->calculateIC = FALSE; |
---|
3276 | adef->verboseIC = FALSE; |
---|
3277 | adef->stepwiseAdditionOnly = FALSE; |
---|
3278 | } |
---|
3279 | |
---|
3280 | |
---|
3281 | |
---|
3282 | |
---|
3283 | static int modelExists(char *model, analdef *adef) |
---|
3284 | { |
---|
3285 | int i; |
---|
3286 | char thisModel[1024]; |
---|
3287 | |
---|
3288 | /********** BINARY ********************/ |
---|
3289 | |
---|
3290 | if(strcmp(model, "BINGAMMAI\0") == 0) |
---|
3291 | { |
---|
3292 | adef->model = M_BINGAMMA; |
---|
3293 | adef->useInvariant = TRUE; |
---|
3294 | return 1; |
---|
3295 | } |
---|
3296 | |
---|
3297 | if(strcmp(model, "BINGAMMA\0") == 0) |
---|
3298 | { |
---|
3299 | adef->model = M_BINGAMMA; |
---|
3300 | adef->useInvariant = FALSE; |
---|
3301 | return 1; |
---|
3302 | } |
---|
3303 | |
---|
3304 | if(strcmp(model, "BINCAT\0") == 0) |
---|
3305 | { |
---|
3306 | adef->model = M_BINCAT; |
---|
3307 | adef->useInvariant = FALSE; |
---|
3308 | return 1; |
---|
3309 | } |
---|
3310 | |
---|
3311 | if(strcmp(model, "BINCATI\0") == 0) |
---|
3312 | { |
---|
3313 | adef->model = M_BINCAT; |
---|
3314 | adef->useInvariant = TRUE; |
---|
3315 | return 1; |
---|
3316 | } |
---|
3317 | |
---|
3318 | /*********** 32 state ****************************/ |
---|
3319 | |
---|
3320 | if(strcmp(model, "MULTIGAMMAI\0") == 0) |
---|
3321 | { |
---|
3322 | adef->model = M_32GAMMA; |
---|
3323 | adef->useInvariant = TRUE; |
---|
3324 | return 1; |
---|
3325 | } |
---|
3326 | |
---|
3327 | if(strcmp(model, "MULTIGAMMA\0") == 0) |
---|
3328 | { |
---|
3329 | adef->model = M_32GAMMA; |
---|
3330 | adef->useInvariant = FALSE; |
---|
3331 | return 1; |
---|
3332 | } |
---|
3333 | |
---|
3334 | if(strcmp(model, "MULTICAT\0") == 0) |
---|
3335 | { |
---|
3336 | adef->model = M_32CAT; |
---|
3337 | adef->useInvariant = FALSE; |
---|
3338 | return 1; |
---|
3339 | } |
---|
3340 | |
---|
3341 | if(strcmp(model, "MULTICATI\0") == 0) |
---|
3342 | { |
---|
3343 | adef->model = M_32CAT; |
---|
3344 | adef->useInvariant = TRUE; |
---|
3345 | return 1; |
---|
3346 | } |
---|
3347 | |
---|
3348 | /*********** 64 state ****************************/ |
---|
3349 | |
---|
3350 | if(strcmp(model, "CODONGAMMAI\0") == 0) |
---|
3351 | { |
---|
3352 | adef->model = M_64GAMMA; |
---|
3353 | adef->useInvariant = TRUE; |
---|
3354 | return 1; |
---|
3355 | } |
---|
3356 | |
---|
3357 | if(strcmp(model, "CODONGAMMA\0") == 0) |
---|
3358 | { |
---|
3359 | adef->model = M_64GAMMA; |
---|
3360 | adef->useInvariant = FALSE; |
---|
3361 | return 1; |
---|
3362 | } |
---|
3363 | |
---|
3364 | if(strcmp(model, "CODONCAT\0") == 0) |
---|
3365 | { |
---|
3366 | adef->model = M_64CAT; |
---|
3367 | adef->useInvariant = FALSE; |
---|
3368 | return 1; |
---|
3369 | } |
---|
3370 | |
---|
3371 | if(strcmp(model, "CODONCATI\0") == 0) |
---|
3372 | { |
---|
3373 | adef->model = M_64CAT; |
---|
3374 | adef->useInvariant = TRUE; |
---|
3375 | return 1; |
---|
3376 | } |
---|
3377 | |
---|
3378 | |
---|
3379 | /*********** DNA **********************/ |
---|
3380 | |
---|
3381 | if(strcmp(model, "GTRGAMMAI\0") == 0) |
---|
3382 | { |
---|
3383 | adef->model = M_GTRGAMMA; |
---|
3384 | adef->useInvariant = TRUE; |
---|
3385 | return 1; |
---|
3386 | } |
---|
3387 | |
---|
3388 | if(strcmp(model, "GTRGAMMA\0") == 0) |
---|
3389 | { |
---|
3390 | adef->model = M_GTRGAMMA; |
---|
3391 | adef->useInvariant = FALSE; |
---|
3392 | return 1; |
---|
3393 | } |
---|
3394 | |
---|
3395 | |
---|
3396 | |
---|
3397 | if(strcmp(model, "GTRCAT\0") == 0) |
---|
3398 | { |
---|
3399 | adef->model = M_GTRCAT; |
---|
3400 | adef->useInvariant = FALSE; |
---|
3401 | return 1; |
---|
3402 | } |
---|
3403 | |
---|
3404 | |
---|
3405 | |
---|
3406 | if(strcmp(model, "GTRCATI\0") == 0) |
---|
3407 | { |
---|
3408 | adef->model = M_GTRCAT; |
---|
3409 | adef->useInvariant = TRUE; |
---|
3410 | return 1; |
---|
3411 | } |
---|
3412 | |
---|
3413 | |
---|
3414 | |
---|
3415 | |
---|
3416 | /*************** AA GTR ********************/ |
---|
3417 | |
---|
3418 | /* TODO empirical FREQS */ |
---|
3419 | |
---|
3420 | if(strcmp(model, "PROTCATGTR\0") == 0) |
---|
3421 | { |
---|
3422 | adef->model = M_PROTCAT; |
---|
3423 | adef->proteinMatrix = GTR; |
---|
3424 | adef->useInvariant = FALSE; |
---|
3425 | adef->protEmpiricalFreqs = 1; |
---|
3426 | return 1; |
---|
3427 | } |
---|
3428 | |
---|
3429 | if(strcmp(model, "PROTCATIGTR\0") == 0) |
---|
3430 | { |
---|
3431 | adef->model = M_PROTCAT; |
---|
3432 | adef->proteinMatrix = GTR; |
---|
3433 | adef->useInvariant = TRUE; |
---|
3434 | return 1; |
---|
3435 | } |
---|
3436 | |
---|
3437 | if(strcmp(model, "PROTGAMMAGTR\0") == 0) |
---|
3438 | { |
---|
3439 | adef->model = M_PROTGAMMA; |
---|
3440 | adef->proteinMatrix = GTR; |
---|
3441 | adef->useInvariant = FALSE; |
---|
3442 | adef->protEmpiricalFreqs = 1; |
---|
3443 | return 1; |
---|
3444 | } |
---|
3445 | |
---|
3446 | if(strcmp(model, "PROTGAMMAIGTR\0") == 0) |
---|
3447 | { |
---|
3448 | adef->model = M_PROTGAMMA; |
---|
3449 | adef->proteinMatrix = GTR; |
---|
3450 | adef->useInvariant = TRUE; |
---|
3451 | adef->protEmpiricalFreqs = 1; |
---|
3452 | return 1; |
---|
3453 | } |
---|
3454 | |
---|
3455 | /*************** AA GTR_UNLINKED ********************/ |
---|
3456 | |
---|
3457 | if(strcmp(model, "PROTCATGTR_UNLINKED\0") == 0) |
---|
3458 | { |
---|
3459 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3460 | |
---|
3461 | adef->model = M_PROTCAT; |
---|
3462 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3463 | adef->useInvariant = FALSE; |
---|
3464 | adef->protEmpiricalFreqs = 1; |
---|
3465 | return 1; |
---|
3466 | } |
---|
3467 | |
---|
3468 | if(strcmp(model, "PROTCATIGTR_UNLINKED\0") == 0) |
---|
3469 | { |
---|
3470 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3471 | |
---|
3472 | adef->model = M_PROTCAT; |
---|
3473 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3474 | adef->useInvariant = TRUE; |
---|
3475 | adef->protEmpiricalFreqs = 1; |
---|
3476 | return 1; |
---|
3477 | } |
---|
3478 | |
---|
3479 | if(strcmp(model, "PROTGAMMAGTR_UNLINKED\0") == 0) |
---|
3480 | { |
---|
3481 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3482 | |
---|
3483 | adef->model = M_PROTGAMMA; |
---|
3484 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3485 | adef->useInvariant = FALSE; |
---|
3486 | adef->protEmpiricalFreqs = 1; |
---|
3487 | return 1; |
---|
3488 | } |
---|
3489 | |
---|
3490 | if(strcmp(model, "PROTGAMMAIGTR_UNLINKED\0") == 0) |
---|
3491 | { |
---|
3492 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3493 | |
---|
3494 | adef->model = M_PROTGAMMA; |
---|
3495 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3496 | adef->useInvariant = TRUE; |
---|
3497 | return 1; |
---|
3498 | } |
---|
3499 | |
---|
3500 | /****************** AA ************************/ |
---|
3501 | |
---|
3502 | for(i = 0; i < NUM_PROT_MODELS - 2; i++) |
---|
3503 | { |
---|
3504 | /* check CAT */ |
---|
3505 | |
---|
3506 | strcpy(thisModel, "PROTCAT"); |
---|
3507 | strcat(thisModel, protModels[i]); |
---|
3508 | |
---|
3509 | if(strcmp(model, thisModel) == 0) |
---|
3510 | { |
---|
3511 | adef->model = M_PROTCAT; |
---|
3512 | adef->proteinMatrix = i; |
---|
3513 | return 1; |
---|
3514 | } |
---|
3515 | |
---|
3516 | /* check CATF */ |
---|
3517 | |
---|
3518 | strcpy(thisModel, "PROTCAT"); |
---|
3519 | strcat(thisModel, protModels[i]); |
---|
3520 | strcat(thisModel, "F"); |
---|
3521 | |
---|
3522 | if(strcmp(model, thisModel) == 0) |
---|
3523 | { |
---|
3524 | adef->model = M_PROTCAT; |
---|
3525 | adef->proteinMatrix = i; |
---|
3526 | adef->protEmpiricalFreqs = 1; |
---|
3527 | return 1; |
---|
3528 | } |
---|
3529 | |
---|
3530 | |
---|
3531 | /* check CATI */ |
---|
3532 | |
---|
3533 | strcpy(thisModel, "PROTCATI"); |
---|
3534 | strcat(thisModel, protModels[i]); |
---|
3535 | |
---|
3536 | if(strcmp(model, thisModel) == 0) |
---|
3537 | { |
---|
3538 | adef->model = M_PROTCAT; |
---|
3539 | adef->proteinMatrix = i; |
---|
3540 | adef->useInvariant = TRUE; |
---|
3541 | return 1; |
---|
3542 | } |
---|
3543 | |
---|
3544 | /* check CATIF */ |
---|
3545 | |
---|
3546 | strcpy(thisModel, "PROTCATI"); |
---|
3547 | strcat(thisModel, protModels[i]); |
---|
3548 | strcat(thisModel, "F"); |
---|
3549 | |
---|
3550 | if(strcmp(model, thisModel) == 0) |
---|
3551 | { |
---|
3552 | adef->model = M_PROTCAT; |
---|
3553 | adef->proteinMatrix = i; |
---|
3554 | adef->protEmpiricalFreqs = 1; |
---|
3555 | adef->useInvariant = TRUE; |
---|
3556 | return 1; |
---|
3557 | } |
---|
3558 | |
---|
3559 | |
---|
3560 | /****************check GAMMA ************************/ |
---|
3561 | |
---|
3562 | strcpy(thisModel, "PROTGAMMA"); |
---|
3563 | strcat(thisModel, protModels[i]); |
---|
3564 | |
---|
3565 | if(strcmp(model, thisModel) == 0) |
---|
3566 | { |
---|
3567 | adef->model = M_PROTGAMMA; |
---|
3568 | adef->proteinMatrix = i; |
---|
3569 | adef->useInvariant = FALSE; |
---|
3570 | return 1; |
---|
3571 | } |
---|
3572 | |
---|
3573 | |
---|
3574 | |
---|
3575 | |
---|
3576 | /*check GAMMAI*/ |
---|
3577 | |
---|
3578 | strcpy(thisModel, "PROTGAMMAI"); |
---|
3579 | strcat(thisModel, protModels[i]); |
---|
3580 | |
---|
3581 | if(strcmp(model, thisModel) == 0) |
---|
3582 | { |
---|
3583 | adef->model = M_PROTGAMMA; |
---|
3584 | adef->proteinMatrix = i; |
---|
3585 | adef->useInvariant = TRUE; |
---|
3586 | return 1; |
---|
3587 | } |
---|
3588 | |
---|
3589 | |
---|
3590 | /* check GAMMAmodelF */ |
---|
3591 | |
---|
3592 | strcpy(thisModel, "PROTGAMMA"); |
---|
3593 | strcat(thisModel, protModels[i]); |
---|
3594 | strcat(thisModel, "F"); |
---|
3595 | |
---|
3596 | if(strcmp(model, thisModel) == 0) |
---|
3597 | { |
---|
3598 | adef->model = M_PROTGAMMA; |
---|
3599 | adef->proteinMatrix = i; |
---|
3600 | adef->protEmpiricalFreqs = 1; |
---|
3601 | adef->useInvariant = FALSE; |
---|
3602 | return 1; |
---|
3603 | } |
---|
3604 | |
---|
3605 | |
---|
3606 | /* check GAMMAImodelF */ |
---|
3607 | |
---|
3608 | strcpy(thisModel, "PROTGAMMAI"); |
---|
3609 | strcat(thisModel, protModels[i]); |
---|
3610 | strcat(thisModel, "F"); |
---|
3611 | |
---|
3612 | if(strcmp(model, thisModel) == 0) |
---|
3613 | { |
---|
3614 | adef->model = M_PROTGAMMA; |
---|
3615 | adef->proteinMatrix = i; |
---|
3616 | adef->protEmpiricalFreqs = 1; |
---|
3617 | adef->useInvariant = TRUE; |
---|
3618 | return 1; |
---|
3619 | } |
---|
3620 | |
---|
3621 | } |
---|
3622 | |
---|
3623 | /*********************************************************************************/ |
---|
3624 | |
---|
3625 | |
---|
3626 | |
---|
3627 | return 0; |
---|
3628 | } |
---|
3629 | |
---|
3630 | |
---|
3631 | |
---|
3632 | static int mygetopt(int argc, char **argv, char *opts, int *optind, char **optarg) |
---|
3633 | { |
---|
3634 | static |
---|
3635 | int sp = 1; |
---|
3636 | |
---|
3637 | register |
---|
3638 | int c; |
---|
3639 | |
---|
3640 | register |
---|
3641 | char *cp; |
---|
3642 | |
---|
3643 | if(sp == 1) |
---|
3644 | { |
---|
3645 | if(*optind >= argc || argv[*optind][0] != '-' || argv[*optind][1] == '\0') |
---|
3646 | return -1; |
---|
3647 | } |
---|
3648 | else |
---|
3649 | { |
---|
3650 | if(strcmp(argv[*optind], "--") == 0) |
---|
3651 | { |
---|
3652 | *optind = *optind + 1; |
---|
3653 | return -1; |
---|
3654 | } |
---|
3655 | } |
---|
3656 | |
---|
3657 | c = argv[*optind][sp]; |
---|
3658 | if(c == ':' || (cp=strchr(opts, c)) == 0) |
---|
3659 | { |
---|
3660 | printf(": illegal option -- %c \n", c); |
---|
3661 | if(argv[*optind][++sp] == '\0') |
---|
3662 | { |
---|
3663 | *optind = *optind + 1; |
---|
3664 | sp = 1; |
---|
3665 | } |
---|
3666 | return('?'); |
---|
3667 | } |
---|
3668 | if(*++cp == ':') |
---|
3669 | { |
---|
3670 | if(argv[*optind][sp+1] != '\0') |
---|
3671 | { |
---|
3672 | *optarg = &argv[*optind][sp+1]; |
---|
3673 | *optind = *optind + 1; |
---|
3674 | } |
---|
3675 | else |
---|
3676 | { |
---|
3677 | *optind = *optind + 1; |
---|
3678 | if(*optind >= argc) |
---|
3679 | { |
---|
3680 | printf(": option requires an argument -- %c\n", c); |
---|
3681 | sp = 1; |
---|
3682 | return('?'); |
---|
3683 | } |
---|
3684 | else |
---|
3685 | { |
---|
3686 | *optarg = argv[*optind]; |
---|
3687 | *optind = *optind + 1; |
---|
3688 | } |
---|
3689 | } |
---|
3690 | sp = 1; |
---|
3691 | } |
---|
3692 | else |
---|
3693 | { |
---|
3694 | if(argv[*optind][++sp] == '\0') |
---|
3695 | { |
---|
3696 | sp = 1; |
---|
3697 | *optind = *optind + 1; |
---|
3698 | } |
---|
3699 | *optarg = 0; |
---|
3700 | } |
---|
3701 | |
---|
3702 | return(c); |
---|
3703 | } |
---|
3704 | |
---|
3705 | static void checkOutgroups(tree *tr, analdef *adef) |
---|
3706 | { |
---|
3707 | if(adef->outgroup) |
---|
3708 | { |
---|
3709 | boolean found; |
---|
3710 | int i, j; |
---|
3711 | |
---|
3712 | for(j = 0; j < tr->numberOfOutgroups; j++) |
---|
3713 | { |
---|
3714 | found = FALSE; |
---|
3715 | for(i = 1; (i <= tr->mxtips) && !found; i++) |
---|
3716 | { |
---|
3717 | if(strcmp(tr->nameList[i], tr->outgroups[j]) == 0) |
---|
3718 | { |
---|
3719 | tr->outgroupNums[j] = i; |
---|
3720 | found = TRUE; |
---|
3721 | } |
---|
3722 | } |
---|
3723 | if(!found) |
---|
3724 | { |
---|
3725 | printf("Error, the outgroup name \"%s\" you specified can not be found in the alignment, exiting ....\n", tr->outgroups[j]); |
---|
3726 | errorExit(-1); |
---|
3727 | } |
---|
3728 | } |
---|
3729 | } |
---|
3730 | |
---|
3731 | } |
---|
3732 | |
---|
3733 | static void parseOutgroups(char *outgr, tree *tr) |
---|
3734 | { |
---|
3735 | int count = 1, i, k; |
---|
3736 | char name[nmlngth]; |
---|
3737 | |
---|
3738 | i = 0; |
---|
3739 | while(outgr[i] != '\0') |
---|
3740 | { |
---|
3741 | if(outgr[i] == ',') |
---|
3742 | count++; |
---|
3743 | i++; |
---|
3744 | } |
---|
3745 | |
---|
3746 | tr->numberOfOutgroups = count; |
---|
3747 | |
---|
3748 | tr->outgroups = (char **)rax_malloc(sizeof(char *) * count); |
---|
3749 | |
---|
3750 | for(i = 0; i < tr->numberOfOutgroups; i++) |
---|
3751 | tr->outgroups[i] = (char *)rax_malloc(sizeof(char) * nmlngth); |
---|
3752 | |
---|
3753 | tr->outgroupNums = (int *)rax_malloc(sizeof(int) * count); |
---|
3754 | |
---|
3755 | i = 0; |
---|
3756 | k = 0; |
---|
3757 | count = 0; |
---|
3758 | while(outgr[i] != '\0') |
---|
3759 | { |
---|
3760 | if(outgr[i] == ',') |
---|
3761 | { |
---|
3762 | name[k] = '\0'; |
---|
3763 | strcpy(tr->outgroups[count], name); |
---|
3764 | count++; |
---|
3765 | k = 0; |
---|
3766 | } |
---|
3767 | else |
---|
3768 | { |
---|
3769 | name[k] = outgr[i]; |
---|
3770 | k++; |
---|
3771 | } |
---|
3772 | i++; |
---|
3773 | } |
---|
3774 | |
---|
3775 | name[k] = '\0'; |
---|
3776 | strcpy(tr->outgroups[count], name); |
---|
3777 | |
---|
3778 | /*for(i = 0; i < tr->numberOfOutgroups; i++) |
---|
3779 | printf("%d %s \n", i, tr->outgroups[i]);*/ |
---|
3780 | |
---|
3781 | |
---|
3782 | /*printf("%s \n", name);*/ |
---|
3783 | } |
---|
3784 | |
---|
3785 | |
---|
3786 | /*********************************** OUTGROUP STUFF END *********************************************************/ |
---|
3787 | |
---|
3788 | |
---|
3789 | static void printVersionInfo(boolean terminal, FILE *infoFile) |
---|
3790 | { |
---|
3791 | char |
---|
3792 | text[7][1024]; |
---|
3793 | |
---|
3794 | int |
---|
3795 | i; |
---|
3796 | |
---|
3797 | sprintf(text[0], "\n\nThis is %s version %s released by Alexandros Stamatakis on %s.\n\n", programName, programVersion, programDate); |
---|
3798 | sprintf(text[1], "With greatly appreciated code contributions by:\n"); |
---|
3799 | sprintf(text[2], "Andre Aberer (HITS)\n"); |
---|
3800 | sprintf(text[3], "Simon Berger (HITS)\n"); |
---|
3801 | sprintf(text[4], "Nick Pattengale (Sandia)\n"); |
---|
3802 | sprintf(text[5], "Wayne Pfeiffer (SDSC)\n"); |
---|
3803 | sprintf(text[6], "Akifumi S. Tanabe (NRIFS)\n\n"); |
---|
3804 | |
---|
3805 | for(i = 0; i < 7; i++) |
---|
3806 | { |
---|
3807 | if(terminal) |
---|
3808 | printf("%s", text[i]); |
---|
3809 | else |
---|
3810 | printBoth(infoFile, text[i]); |
---|
3811 | } |
---|
3812 | |
---|
3813 | } |
---|
3814 | |
---|
3815 | static void printMinusFUsage(void) |
---|
3816 | { |
---|
3817 | printf("\n"); |
---|
3818 | printf(" \"-f a\": rapid Bootstrap analysis and search for best-scoring ML tree in one program run\n"); |
---|
3819 | |
---|
3820 | printf(" \"-f A\": compute marginal ancestral states on a ROOTED reference tree provided with \"t\"\n"); |
---|
3821 | |
---|
3822 | printf(" \"-f b\": draw bipartition information on a tree provided with \"-t\" based on multiple trees\n"); |
---|
3823 | printf(" (e.g., from a bootstrap) in a file specifed by \"-z\"\n"); |
---|
3824 | |
---|
3825 | printf(" \"-f B\": optimize br-len scaler and other model parameters (GTR, alpha, etc.) on a tree provided with \"-t\".\n"); |
---|
3826 | printf(" The tree needs to contain branch lengths. The branch lengths will not be optimized, just scaled by a single common value.\n"); |
---|
3827 | |
---|
3828 | |
---|
3829 | printf(" \"-f c\": check if the alignment can be properly read by RAxML\n"); |
---|
3830 | |
---|
3831 | printf(" \"-f C\": ancestral sequence test for Jiajie, users will also need to provide a list of taxon names via -Y separated by whitespaces\n"); |
---|
3832 | |
---|
3833 | printf(" \"-f d\": new rapid hill-climbing \n"); |
---|
3834 | printf(" DEFAULT: ON\n"); |
---|
3835 | |
---|
3836 | printf(" \"-f e\": optimize model+branch lengths for given input tree under GAMMA/GAMMAI only\n"); |
---|
3837 | |
---|
3838 | |
---|
3839 | |
---|
3840 | printf(" \"-f E\": execute very fast experimental tree search, at present only for testing\n"); |
---|
3841 | |
---|
3842 | printf(" \"-f F\": execute fast experimental tree search, at present only for testing\n"); |
---|
3843 | |
---|
3844 | printf(" \"-f g\": compute per site log Likelihoods for one ore more trees passed via\n"); |
---|
3845 | printf(" \"-z\" and write them to a file that can be read by CONSEL\n"); |
---|
3846 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3847 | |
---|
3848 | printf(" \"-f G\": compute per site log Likelihoods for one ore more trees passed via\n"); |
---|
3849 | printf(" \"-z\" and write them to a file that can be read by CONSEL.\n"); |
---|
3850 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3851 | |
---|
3852 | printf(" \"-f h\": compute log likelihood test (SH-test) between best tree passed via \"-t\"\n"); |
---|
3853 | printf(" and a bunch of other trees passed via \"-z\" \n"); |
---|
3854 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3855 | |
---|
3856 | printf(" \"-f H\": compute log likelihood test (SH-test) between best tree passed via \"-t\"\n"); |
---|
3857 | printf(" and a bunch of other trees passed via \"-z\" \n"); |
---|
3858 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3859 | |
---|
3860 | printf(" \"-f i\": calculate IC and TC scores (Salichos and Rokas 2013) on a tree provided with \"-t\" based on multiple trees\n"); |
---|
3861 | printf(" (e.g., from a bootstrap) in a file specifed by \"-z\"\n"); |
---|
3862 | |
---|
3863 | printf(" \"-f I\": a simple tree rooting algorithm for unrooted trees.\n"); |
---|
3864 | printf(" It roots the tree by rooting it at the branch that best balances the subtree lengths\n"); |
---|
3865 | printf(" (sum over branches in the subtrees) of the left and right subtree.\n"); |
---|
3866 | printf(" A branch with an optimal balance does not always exist!\n"); |
---|
3867 | printf(" You need to specify the tree you want to root via \"-t\".\n"); |
---|
3868 | |
---|
3869 | printf(" \"-f j\": generate a bunch of bootstrapped alignment files from an original alignemnt file.\n"); |
---|
3870 | printf(" You need to specify a seed with \"-b\" and the number of replicates with \"-#\" \n"); |
---|
3871 | |
---|
3872 | printf(" \"-f J\": Compute SH-like support values on a given tree passed via \"-t\".\n"); |
---|
3873 | |
---|
3874 | printf(" \"-f m\": compare bipartitions between two bunches of trees passed via \"-t\" and \"-z\" \n"); |
---|
3875 | printf(" respectively. This will return the Pearson correlation between all bipartitions found\n"); |
---|
3876 | printf(" in the two tree files. A file called RAxML_bipartitionFrequencies.outpuFileName\n"); |
---|
3877 | printf(" will be printed that contains the pair-wise bipartition frequencies of the two sets\n"); |
---|
3878 | |
---|
3879 | printf(" \"-f n\": compute the log likelihood score of all trees contained in a tree file provided by\n"); |
---|
3880 | printf(" \"-z\" under GAMMA or GAMMA+P-Invar\n"); |
---|
3881 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3882 | |
---|
3883 | printf(" \"-f N\": compute the log likelihood score of all trees contained in a tree file provided by\n"); |
---|
3884 | printf(" \"-z\" under GAMMA or GAMMA+P-Invar\n"); |
---|
3885 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3886 | |
---|
3887 | |
---|
3888 | printf(" \"-f o\": old and slower rapid hill-climbing without heuristic cutoff\n"); |
---|
3889 | |
---|
3890 | printf(" \"-f p\": perform pure stepwise MP addition of new sequences to an incomplete starting tree and exit\n"); |
---|
3891 | |
---|
3892 | printf(" \"-f q\": fast quartet calculator\n"); |
---|
3893 | |
---|
3894 | printf(" \"-f r\": compute pairwise Robinson-Foulds (RF) distances between all pairs of trees in a tree file passed via \"-z\" \n"); |
---|
3895 | printf(" if the trees have node labales represented as integer support values the program will also compute two flavors of\n"); |
---|
3896 | printf(" the weighted Robinson-Foulds (WRF) distance\n"); |
---|
3897 | |
---|
3898 | printf(" \"-f R\": compute all pairwise Robinson-Foulds (RF) distances between a large reference tree passed via \"-t\" \n"); |
---|
3899 | printf(" and many smaller trees (that must have a subset of the taxa of the large tree) passed via \"-z\".\n"); |
---|
3900 | printf(" This option is intended for checking the plausibility of very large phylogenies that can not be inspected\n"); |
---|
3901 | printf(" visually any more.\n"); |
---|
3902 | |
---|
3903 | printf(" \"-f s\": split up a multi-gene partitioned alignment into the respective subalignments \n"); |
---|
3904 | |
---|
3905 | printf(" \"-f S\": compute site-specific placement bias using a leave one out test inspired by the evolutionary placement algorithm\n"); |
---|
3906 | |
---|
3907 | printf(" \"-f t\": do randomized tree searches on one fixed starting tree\n"); |
---|
3908 | |
---|
3909 | printf(" \"-f T\": do final thorough optimization of ML tree from rapid bootstrap search in stand-alone mode\n"); |
---|
3910 | |
---|
3911 | printf(" \"-f u\": execute morphological weight calibration using maximum likelihood, this will return a weight vector.\n"); |
---|
3912 | printf(" you need to provide a morphological alignment and a reference tree via \"-t\" \n"); |
---|
3913 | |
---|
3914 | printf(" \"-f v\": classify a bunch of environmental sequences into a reference tree using thorough read insertions\n"); |
---|
3915 | printf(" you will need to start RAxML with a non-comprehensive reference tree and an alignment containing all sequences (reference + query)\n"); |
---|
3916 | |
---|
3917 | printf(" \"-f V\": classify a bunch of environmental sequences into a reference tree using thorough read insertions\n"); |
---|
3918 | printf(" you will need to start RAxML with a non-comprehensive reference tree and an alignment containing all sequences (reference + query)\n"); |
---|
3919 | printf(" WARNING: this is a test implementation for more efficient handling of multi-gene/whole-genome datasets!\n"); |
---|
3920 | |
---|
3921 | printf(" \"-f w\": compute ELW test on a bunch of trees passed via \"-z\" \n"); |
---|
3922 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3923 | |
---|
3924 | printf(" \"-f W\": compute ELW test on a bunch of trees passed via \"-z\" \n"); |
---|
3925 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3926 | |
---|
3927 | printf(" \"-f x\": compute pair-wise ML distances, ML model parameters will be estimated on an MP \n"); |
---|
3928 | printf(" starting tree or a user-defined tree passed via \"-t\", only allowed for GAMMA-based\n"); |
---|
3929 | printf(" models of rate heterogeneity\n"); |
---|
3930 | |
---|
3931 | printf(" \"-f y\": classify a bunch of environmental sequences into a reference tree using parsimony\n"); |
---|
3932 | printf(" you will need to start RAxML with a non-comprehensive reference tree and an alignment containing all sequences (reference + query)\n"); |
---|
3933 | |
---|
3934 | printf("\n"); |
---|
3935 | printf(" DEFAULT for \"-f\": new rapid hill climbing\n"); |
---|
3936 | |
---|
3937 | printf("\n"); |
---|
3938 | } |
---|
3939 | |
---|
3940 | |
---|
3941 | static void printREADME(void) |
---|
3942 | { |
---|
3943 | printVersionInfo(TRUE, (FILE*)NULL); |
---|
3944 | printf("\n"); |
---|
3945 | printf("Please also consult the RAxML-manual\n"); |
---|
3946 | printf("\nTo report bugs send an email to stamatak@cs.tum.edu\n"); |
---|
3947 | printf("Please send me all input files, the exact invocation, details of the HW and operating system,\n"); |
---|
3948 | printf("as well as all error messages printed to screen.\n\n\n"); |
---|
3949 | |
---|
3950 | printf("raxmlHPC[-SSE3|-PTHREADS|-PTHREADS-SSE3|-HYBRID|-HYBRID-SSE3]\n"); |
---|
3951 | printf(" -s sequenceFileName -n outputFileName -m substitutionModel\n"); |
---|
3952 | printf(" [-a weightFileName] [-A secondaryStructureSubstModel]\n"); |
---|
3953 | printf(" [-b bootstrapRandomNumberSeed] [-B wcCriterionThreshold]\n"); |
---|
3954 | printf(" [-c numberOfCategories] [-C] [-d] [-D]\n"); |
---|
3955 | printf(" [-e likelihoodEpsilon] [-E excludeFileName]\n"); |
---|
3956 | printf(" [-f a|A|b|B|c|C|d|e|E|F|g|G|h|H|i|I|j|J|m|n|N|o|p|q|r|R|s|S|t|T|u|v|V|w|W|x|y] [-F]\n"); |
---|
3957 | printf(" [-g groupingFileName] [-G placementThreshold] [-h]\n"); |
---|
3958 | printf(" [-i initialRearrangementSetting] [-I autoFC|autoMR|autoMRE|autoMRE_IGN]\n"); |
---|
3959 | printf(" [-j] [-J MR|MR_DROP|MRE|STRICT|STRICT_DROP|T_<PERCENT>] [-k] [-K] \n"); |
---|
3960 | printf(" [-L MR|MRE|T_<PERCENT>] [-M]\n"); |
---|
3961 | printf(" [-o outGroupName1[,outGroupName2[,...]]][-O]\n"); |
---|
3962 | printf(" [-p parsimonyRandomSeed] [-P proteinModel]\n"); |
---|
3963 | printf(" [-q multipleModelFileName] [-r binaryConstraintTree]\n"); |
---|
3964 | printf(" [-R binaryModelParamFile] [-S secondaryStructureFile] [-t userStartingTree]\n"); |
---|
3965 | printf(" [-T numberOfThreads] [-u] [-U] [-v] [-V] [-w outputDirectory] [-W slidingWindowSize]\n"); |
---|
3966 | printf(" [-x rapidBootstrapRandomNumberSeed] [-X] [-y] [-Y quartetGroupingFileName|ancestralSequenceCandidatesFileName]\n"); |
---|
3967 | printf(" [-z multipleTreesFile] [-#|-N numberOfRuns|autoFC|autoMR|autoMRE|autoMRE_IGN]\n"); |
---|
3968 | printf("\n"); |
---|
3969 | printf(" -a Specify a column weight file name to assign individual weights to each column of \n"); |
---|
3970 | printf(" the alignment. Those weights must be integers separated by any type and number \n"); |
---|
3971 | printf(" of whitespaces whithin a separate file, see file \"example_weights\" for an example.\n"); |
---|
3972 | printf("\n"); |
---|
3973 | printf(" -A Specify one of the secondary structure substitution models implemented in RAxML.\n"); |
---|
3974 | printf(" The same nomenclature as in the PHASE manual is used, available models: \n"); |
---|
3975 | printf(" S6A, S6B, S6C, S6D, S6E, S7A, S7B, S7C, S7D, S7E, S7F, S16, S16A, S16B\n"); |
---|
3976 | printf("\n"); |
---|
3977 | printf(" DEFAULT: 16-state GTR model (S16)\n"); |
---|
3978 | printf("\n"); |
---|
3979 | printf(" -b Specify an integer number (random seed) and turn on bootstrapping\n"); |
---|
3980 | printf("\n"); |
---|
3981 | printf(" DEFAULT: OFF\n"); |
---|
3982 | printf("\n"); |
---|
3983 | printf(" -B specify a floating point number between 0.0 and 1.0 that will be used as cutoff threshold \n"); |
---|
3984 | printf(" for the MR-based bootstopping criteria. The recommended setting is 0.03.\n"); |
---|
3985 | printf("\n"); |
---|
3986 | printf(" DEFAULT: 0.03 (recommended empirically determined setting)\n"); |
---|
3987 | printf("\n"); |
---|
3988 | printf(" -c Specify number of distinct rate catgories for RAxML when modelOfEvolution\n"); |
---|
3989 | printf(" is set to GTRCAT or GTRMIX\n"); |
---|
3990 | printf(" Individual per-site rates are categorized into numberOfCategories rate \n"); |
---|
3991 | printf(" categories to accelerate computations. \n"); |
---|
3992 | printf("\n"); |
---|
3993 | printf(" DEFAULT: 25\n"); |
---|
3994 | printf("\n"); |
---|
3995 | printf(" -C Enable verbose output for the \"-L\" and \"-f i\" options. This will produce more, as well as more verbose output files\n"); |
---|
3996 | printf("\n"); |
---|
3997 | printf(" DEFAULT: OFF\n"); |
---|
3998 | printf("\n"); |
---|
3999 | printf(" -d start ML optimization from random starting tree \n"); |
---|
4000 | printf("\n"); |
---|
4001 | printf(" DEFAULT: OFF\n"); |
---|
4002 | printf("\n"); |
---|
4003 | printf(" -D ML search convergence criterion. This will break off ML searches if the relative \n"); |
---|
4004 | printf(" Robinson-Foulds distance between the trees obtained from two consecutive lazy SPR cycles\n"); |
---|
4005 | printf(" is smaller or equal to 1%s. Usage recommended for very large datasets in terms of taxa.\n", "%"); |
---|
4006 | printf(" On trees with more than 500 taxa this will yield execution time improvements of approximately 50%s\n", "%"); |
---|
4007 | printf(" While yielding only slightly worse trees.\n"); |
---|
4008 | printf("\n"); |
---|
4009 | printf(" DEFAULT: OFF\n"); |
---|
4010 | printf("\n"); |
---|
4011 | printf(" -e set model optimization precision in log likelihood units for final\n"); |
---|
4012 | printf(" optimization of tree topology under MIX/MIXI or GAMMA/GAMMAI\n"); |
---|
4013 | printf("\n"); |
---|
4014 | printf(" DEFAULT: 0.1 for models not using proportion of invariant sites estimate\n"); |
---|
4015 | printf(" 0.001 for models using proportion of invariant sites estimate\n"); |
---|
4016 | printf("\n"); |
---|
4017 | printf(" -E specify an exclude file name, that contains a specification of alignment positions you wish to exclude.\n"); |
---|
4018 | printf(" Format is similar to Nexus, the file shall contain entries like \"100-200 300-400\", to exclude a\n"); |
---|
4019 | printf(" single column write, e.g., \"100-100\", if you use a mixed model, an appropriatly adapted model file\n"); |
---|
4020 | printf(" will be written.\n"); |
---|
4021 | printf("\n"); |
---|
4022 | printf(" -f select algorithm:\n"); |
---|
4023 | |
---|
4024 | printMinusFUsage(); |
---|
4025 | |
---|
4026 | printf("\n"); |
---|
4027 | printf(" -F enable ML tree searches under CAT model for very large trees without switching to \n"); |
---|
4028 | printf(" GAMMA in the end (saves memory).\n"); |
---|
4029 | printf(" This option can also be used with the GAMMA models in order to avoid the thorough optimization \n"); |
---|
4030 | printf(" of the best-scoring ML tree in the end.\n"); |
---|
4031 | printf("\n"); |
---|
4032 | printf(" DEFAULT: OFF\n"); |
---|
4033 | printf("\n"); |
---|
4034 | printf(" -g specify the file name of a multifurcating constraint tree\n"); |
---|
4035 | printf(" this tree does not need to be comprehensive, i.e. must not contain all taxa\n"); |
---|
4036 | printf("\n"); |
---|
4037 | printf(" -G enable the ML-based evolutionary placement algorithm heuristics\n"); |
---|
4038 | printf(" by specifiyng a threshold value (fraction of insertion branches to be evaluated\n"); |
---|
4039 | printf(" using slow insertions under ML).\n"); |
---|
4040 | printf("\n"); |
---|
4041 | printf(" -h Display this help message.\n"); |
---|
4042 | printf("\n"); |
---|
4043 | printf(" -i Initial rearrangement setting for the subsequent application of topological \n"); |
---|
4044 | printf(" changes phase\n"); |
---|
4045 | printf("\n"); |
---|
4046 | printf(" -I a posteriori bootstopping analysis. Use:\n"); |
---|
4047 | printf(" \"-I autoFC\" for the frequency-based criterion\n"); |
---|
4048 | printf(" \"-I autoMR\" for the majority-rule consensus tree criterion\n"); |
---|
4049 | printf(" \"-I autoMRE\" for the extended majority-rule consensus tree criterion\n"); |
---|
4050 | printf(" \"-I autoMRE_IGN\" for metrics similar to MRE, but include bipartitions under the threshold whether they are compatible\n"); |
---|
4051 | printf(" or not. This emulates MRE but is faster to compute.\n"); |
---|
4052 | printf(" You also need to pass a tree file containg several bootstrap replicates via \"-z\" \n"); |
---|
4053 | printf("\n"); |
---|
4054 | printf(" -j Specifies that intermediate tree files shall be written to file during the standard ML and BS tree searches.\n"); |
---|
4055 | printf("\n"); |
---|
4056 | printf(" DEFAULT: OFF\n"); |
---|
4057 | printf("\n"); |
---|
4058 | printf(" -J Compute majority rule consensus tree with \"-J MR\" or extended majority rule consensus tree with \"-J MRE\"\n"); |
---|
4059 | printf(" or strict consensus tree with \"-J STRICT\". For a custom consensus treshold >= 50%%, specify T_<NUM>, where 100 >= NUM >= 50.\n"); |
---|
4060 | printf(" Options \"-J STRICT_DROP\" and \"-J MR_DROP\" will execute an algorithm that identifies dropsets which contain\n"); |
---|
4061 | printf(" rogue taxa as proposed by Pattengale et al. in the paper \"Uncovering hidden phylogenetic consensus\".\n"); |
---|
4062 | printf(" You will also need to provide a tree file containing several UNROOTED trees via \"-z\"\n"); |
---|
4063 | printf("\n"); |
---|
4064 | printf(" -k Specifies that bootstrapped trees should be printed with branch lengths.\n"); |
---|
4065 | printf(" The bootstraps will run a bit longer, because model parameters will be optimized\n"); |
---|
4066 | printf(" at the end of each run under GAMMA or GAMMA+P-Invar respectively.\n"); |
---|
4067 | printf("\n"); |
---|
4068 | printf(" DEFAULT: OFF\n"); |
---|
4069 | printf("\n"); |
---|
4070 | printf(" -K Specify one of the multi-state substitution models (max 32 states) implemented in RAxML.\n"); |
---|
4071 | printf(" Available models are: ORDERED, MK, GTR\n"); |
---|
4072 | printf("\n"); |
---|
4073 | printf(" DEFAULT: GTR model \n"); |
---|
4074 | printf("\n"); |
---|
4075 | printf(" -L Compute consensus trees labelled by IC supports and the overall TC value as proposed in Salichos and Rokas 2013.\n"); |
---|
4076 | printf(" Compute a majority rule consensus tree with \"-L MR\" or an extended majority rule consensus tree with \"-L MRE\".\n"); |
---|
4077 | printf(" For a custom consensus treshold >= 50%%, specify \"-L T_<NUM>\", where 100 >= NUM >= 50.\n"); |
---|
4078 | printf(" You will of course also need to provide a tree file containing several UNROOTED trees via \"-z\"!\n"); |
---|
4079 | printf("\n"); |
---|
4080 | printf(" -m Model of Binary (Morphological), Nucleotide, Multi-State, or Amino Acid Substitution: \n"); |
---|
4081 | printf("\n"); |
---|
4082 | printf(" BINARY:\n\n"); |
---|
4083 | printf(" \"-m BINCAT\" : Optimization of site-specific\n"); |
---|
4084 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4085 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4086 | printf(" automatically under BINGAMMA, depending on the tree search option\n"); |
---|
4087 | printf(" \"-m BINCATI\" : Optimization of site-specific\n"); |
---|
4088 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4089 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4090 | printf(" automatically under BINGAMMAI, depending on the tree search option \n"); |
---|
4091 | printf(" \"-m BINGAMMA\" : GAMMA model of rate \n"); |
---|
4092 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4093 | printf(" \"-m BINGAMMAI\" : Same as BINGAMMA, but with estimate of proportion of invariable sites\n"); |
---|
4094 | printf("\n"); |
---|
4095 | printf(" NUCLEOTIDES:\n\n"); |
---|
4096 | printf(" \"-m GTRCAT\" : GTR + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4097 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4098 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4099 | printf(" under GTRGAMMA, depending on the tree search option\n"); |
---|
4100 | printf(" \"-m GTRCATI\" : GTR + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4101 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4102 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4103 | printf(" under GTRGAMMAI, depending on the tree search option\n"); |
---|
4104 | printf(" \"-m GTRGAMMA\" : GTR + Optimization of substitution rates + GAMMA model of rate \n"); |
---|
4105 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4106 | printf(" \"-m GTRGAMMAI\" : Same as GTRGAMMA, but with estimate of proportion of invariable sites \n"); |
---|
4107 | printf("\n"); |
---|
4108 | printf(" MULTI-STATE:\n\n"); |
---|
4109 | printf(" \"-m MULTICAT\" : Optimization of site-specific\n"); |
---|
4110 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4111 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4112 | printf(" automatically under MULTIGAMMA, depending on the tree search option\n"); |
---|
4113 | printf(" \"-m MULTICATI\" : Optimization of site-specific\n"); |
---|
4114 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4115 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4116 | printf(" automatically under MULTIGAMMAI, depending on the tree search option \n"); |
---|
4117 | printf(" \"-m MULTIGAMMA\" : GAMMA model of rate \n"); |
---|
4118 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4119 | printf(" \"-m MULTIGAMMAI\" : Same as MULTIGAMMA, but with estimate of proportion of invariable sites\n"); |
---|
4120 | printf("\n"); |
---|
4121 | printf(" You can use up to 32 distinct character states to encode multi-state regions, they must be used in the following order:\n"); |
---|
4122 | printf(" 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V\n"); |
---|
4123 | printf(" i.e., if you have 6 distinct character states you would use 0, 1, 2, 3, 4, 5 to encode these.\n"); |
---|
4124 | printf(" The substitution model for the multi-state regions can be selected via the \"-K\" option\n"); |
---|
4125 | printf("\n"); |
---|
4126 | printf(" AMINO ACIDS:\n\n"); |
---|
4127 | printf(" \"-m PROTCATmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4128 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4129 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4130 | printf(" automatically under PROTGAMMAmatrixName[f], depending on the tree search option\n"); |
---|
4131 | printf(" \"-m PROTCATImatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4132 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4133 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4134 | printf(" automatically under PROTGAMMAImatrixName[f], depending on the tree search option\n"); |
---|
4135 | printf(" \"-m PROTGAMMAmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + GAMMA model of rate \n"); |
---|
4136 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4137 | printf(" \"-m PROTGAMMAImatrixName[F]\" : Same as PROTGAMMAmatrixName[F], but with estimate of proportion of invariable sites \n"); |
---|
4138 | printf("\n"); |
---|
4139 | printf(" Available AA substitution models:\n"); |
---|
4140 | printf(" "); |
---|
4141 | |
---|
4142 | { |
---|
4143 | int |
---|
4144 | i; |
---|
4145 | |
---|
4146 | for(i = 0; i < NUM_PROT_MODELS - 1; i++) |
---|
4147 | { |
---|
4148 | if(i > 0 && (i % 8 == 0)) |
---|
4149 | { |
---|
4150 | printf("\n"); |
---|
4151 | printf(" "); |
---|
4152 | } |
---|
4153 | printf("%s, ", protModels[i]); |
---|
4154 | } |
---|
4155 | |
---|
4156 | printf("%s\n", protModels[i]); |
---|
4157 | } |
---|
4158 | |
---|
4159 | printf(" With the optional \"F\" appendix you can specify if you want to use empirical base frequencies\n"); |
---|
4160 | printf(" Please note that for mixed models you can in addition specify the per-gene AA model in\n"); |
---|
4161 | printf(" the mixed model file (see manual for details). Also note that if you estimate AA GTR parameters on a partitioned\n"); |
---|
4162 | printf(" dataset, they will be linked (estimated jointly) across all partitions to avoid over-parametrization\n"); |
---|
4163 | printf("\n"); |
---|
4164 | printf(" -M Switch on estimation of individual per-partition branch lengths. Only has effect when used in combination with \"-q\"\n"); |
---|
4165 | printf(" Branch lengths for individual partitions will be printed to separate files\n"); |
---|
4166 | printf(" A weighted average of the branch lengths is computed by using the respective partition lengths\n"); |
---|
4167 | printf("\n"), |
---|
4168 | printf(" DEFAULT: OFF\n"); |
---|
4169 | printf("\n"); |
---|
4170 | printf(" -n Specifies the name of the output file.\n"); |
---|
4171 | printf("\n"); |
---|
4172 | printf(" -o Specify the name of a single outgrpoup or a comma-separated list of outgroups, eg \"-o Rat\" \n"); |
---|
4173 | printf(" or \"-o Rat,Mouse\", in case that multiple outgroups are not monophyletic the first name \n"); |
---|
4174 | printf(" in the list will be selected as outgroup, don't leave spaces between taxon names!\n"); |
---|
4175 | printf("\n"); |
---|
4176 | printf(" -O Disable check for completely undetermined sequence in alignment.\n"); |
---|
4177 | printf(" The program will not exit with an error message when \"-O\" is specified.\n"); |
---|
4178 | printf("\n"); |
---|
4179 | printf(" DEFAULT: check enabled\n"); |
---|
4180 | printf("\n"); |
---|
4181 | printf(" -p Specify a random number seed for the parsimony inferences. This allows you to reproduce your results\n"); |
---|
4182 | printf(" and will help me debug the program.\n"); |
---|
4183 | printf("\n"); |
---|
4184 | printf(" -P Specify the file name of a user-defined AA (Protein) substitution model. This file must contain\n"); |
---|
4185 | printf(" 420 entries, the first 400 being the AA substitution rates (this must be a symmetric matrix) and the\n"); |
---|
4186 | printf(" last 20 are the empirical base frequencies\n"); |
---|
4187 | printf("\n"); |
---|
4188 | printf(" -q Specify the file name which contains the assignment of models to alignment\n"); |
---|
4189 | printf(" partitions for multiple models of substitution. For the syntax of this file\n"); |
---|
4190 | printf(" please consult the manual.\n"); |
---|
4191 | printf("\n"); |
---|
4192 | printf(" -r Specify the file name of a binary constraint tree.\n"); |
---|
4193 | printf(" this tree does not need to be comprehensive, i.e. must not contain all taxa\n"); |
---|
4194 | printf("\n"); |
---|
4195 | printf(" -R Specify the file name of a binary model parameter file that has previously been generated\n"); |
---|
4196 | printf(" with RAxML using the -f e tree evaluation option. The file name should be: \n"); |
---|
4197 | printf(" RAxML_binaryModelParameters.runID\n"); |
---|
4198 | printf("\n"); |
---|
4199 | printf(" -s Specify the name of the alignment data file in PHYLIP format\n"); |
---|
4200 | printf("\n"); |
---|
4201 | printf(" -S Specify the name of a secondary structure file. The file can contain \".\" for \n"); |
---|
4202 | printf(" alignment columns that do not form part of a stem and characters \"()<>[]{}\" to define \n"); |
---|
4203 | printf(" stem regions and pseudoknots\n"); |
---|
4204 | printf("\n"); |
---|
4205 | printf(" -t Specify a user starting tree file name in Newick format\n"); |
---|
4206 | printf("\n"); |
---|
4207 | printf(" -T PTHREADS VERSION ONLY! Specify the number of threads you want to run.\n"); |
---|
4208 | printf(" Make sure to set \"-T\" to at most the number of CPUs you have on your machine,\n"); |
---|
4209 | printf(" otherwise, there will be a huge performance decrease!\n"); |
---|
4210 | printf("\n"); |
---|
4211 | printf(" -u use the median for the discrete approximation of the GAMMA model of rate heterogeneity\n"); |
---|
4212 | printf("\n"); |
---|
4213 | printf(" DEFAULT: OFF\n"); |
---|
4214 | printf("\n"); |
---|
4215 | printf(" -U Try to save memory by using SEV-based implementation for gap columns on large gappy alignments\n"); |
---|
4216 | printf(" The technique is described here: http://www.biomedcentral.com/1471-2105/12/470\n"); |
---|
4217 | printf(" This will only work for DNA and/or PROTEIN data and only with the SSE3 or AVX-vextorized version of the code.\n"); |
---|
4218 | printf("\n"); |
---|
4219 | printf(" -v Display version information\n"); |
---|
4220 | printf("\n"); |
---|
4221 | printf(" -V Disable rate heterogeneity among sites model and use one without rate heterogeneity instead.\n"); |
---|
4222 | printf(" Only works if you specify the CAT model of rate heterogeneity.\n"); |
---|
4223 | printf("\n"); |
---|
4224 | printf(" DEFAULT: use rate heterogeneity\n"); |
---|
4225 | printf("\n"); |
---|
4226 | printf(" -w FULL (!) path to the directory into which RAxML shall write its output files\n"); |
---|
4227 | printf("\n"); |
---|
4228 | printf(" DEFAULT: current directory\n"); |
---|
4229 | printf("\n"); |
---|
4230 | printf(" -W Sliding window size for leave-one-out site-specific placement bias algorithm\n"); |
---|
4231 | printf(" only effective when used in combination with \"-f S\" \n"); |
---|
4232 | printf("\n"); |
---|
4233 | printf(" DEFAULT: 100 sites\n"); |
---|
4234 | printf("\n"); |
---|
4235 | printf(" -x Specify an integer number (random seed) and turn on rapid bootstrapping\n"); |
---|
4236 | printf(" CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under \n"); |
---|
4237 | printf(" the model of rate heterogeneity you specified via \"-m\" and not by default under CAT\n"); |
---|
4238 | printf("\n"); |
---|
4239 | printf(" -X Same as the \"-y\" option below, however the parsimony search is more superficial.\n"); |
---|
4240 | printf(" RAxML will only do a randomized stepwise addition order parsimony tree reconstruction\n"); |
---|
4241 | printf(" without performing any additional SPRs.\n"); |
---|
4242 | printf(" This may be helpful for very broad whole-genome datasets, since this can generate topologically\n"); |
---|
4243 | printf(" more different starting trees.\n"); |
---|
4244 | printf("\n"); |
---|
4245 | printf(" DEFAULT: OFF\n"); |
---|
4246 | printf("\n"); |
---|
4247 | printf(" -y If you want to only compute a parsimony starting tree with RAxML specify \"-y\",\n"); |
---|
4248 | printf(" the program will exit after computation of the starting tree\n"); |
---|
4249 | printf("\n"); |
---|
4250 | printf(" DEFAULT: OFF\n"); |
---|
4251 | printf("\n"); |
---|
4252 | printf(" -Y Pass a quartet grouping file name defining four groups from which to draw quartets\n"); |
---|
4253 | printf(" The file input format must contain 4 groups in the following form:\n"); |
---|
4254 | printf(" (Chicken, Human, Loach), (Cow, Carp), (Mouse, Rat, Seal), (Whale, Frog);\n"); |
---|
4255 | printf(" Only works in combination with -f q !\n"); |
---|
4256 | printf("\n"); |
---|
4257 | printf(" -z Specify the file name of a file containing multiple trees e.g. from a bootstrap\n"); |
---|
4258 | printf(" that shall be used to draw bipartition values onto a tree provided with \"-t\",\n"); |
---|
4259 | printf(" It can also be used to compute per site log likelihoods in combination with \"-f g\"\n"); |
---|
4260 | printf(" and to read a bunch of trees for a couple of other options (\"-f h\", \"-f m\", \"-f n\").\n"); |
---|
4261 | printf("\n"); |
---|
4262 | printf(" -#|-N Specify the number of alternative runs on distinct starting trees\n"); |
---|
4263 | printf(" In combination with the \"-b\" option, this will invoke a multiple boostrap analysis\n"); |
---|
4264 | printf(" Note that \"-N\" has been added as an alternative since \"-#\" sometimes caused problems\n"); |
---|
4265 | printf(" with certain MPI job submission systems, since \"-#\" is often used to start comments.\n"); |
---|
4266 | printf(" If you want to use the bootstopping criteria specify \"-# autoMR\" or \"-# autoMRE\" or \"-# autoMRE_IGN\"\n"); |
---|
4267 | printf(" for the majority-rule tree based criteria (see -I option) or \"-# autoFC\" for the frequency-based criterion.\n"); |
---|
4268 | printf(" Bootstopping will only work in combination with \"-x\" or \"-b\"\n"); |
---|
4269 | printf("\n"); |
---|
4270 | printf(" DEFAULT: 1 single analysis\n"); |
---|
4271 | printf("\n\n\n\n"); |
---|
4272 | |
---|
4273 | } |
---|
4274 | |
---|
4275 | |
---|
4276 | |
---|
4277 | |
---|
4278 | static void analyzeRunId(char id[128]) |
---|
4279 | { |
---|
4280 | int i = 0; |
---|
4281 | |
---|
4282 | while(id[i] != '\0') |
---|
4283 | { |
---|
4284 | if(i >= 128) |
---|
4285 | { |
---|
4286 | printf("Error: run id after \"-n\" is too long, it has %d characters please use a shorter one\n", i); |
---|
4287 | assert(0); |
---|
4288 | } |
---|
4289 | |
---|
4290 | if(id[i] == '/') |
---|
4291 | { |
---|
4292 | printf("Error character %c not allowed in run ID\n", id[i]); |
---|
4293 | assert(0); |
---|
4294 | } |
---|
4295 | |
---|
4296 | |
---|
4297 | i++; |
---|
4298 | } |
---|
4299 | |
---|
4300 | if(i == 0) |
---|
4301 | { |
---|
4302 | printf("Error: please provide a string for the run id after \"-n\" \n"); |
---|
4303 | assert(0); |
---|
4304 | } |
---|
4305 | |
---|
4306 | } |
---|
4307 | |
---|
4308 | static void get_args(int argc, char *argv[], analdef *adef, tree *tr) |
---|
4309 | { |
---|
4310 | boolean |
---|
4311 | bad_opt =FALSE, |
---|
4312 | resultDirSet = FALSE; |
---|
4313 | |
---|
4314 | char |
---|
4315 | resultDir[1024] = "", |
---|
4316 | aut[256], |
---|
4317 | *optarg, |
---|
4318 | model[2048] = "", |
---|
4319 | secondaryModel[2048] = "", |
---|
4320 | multiStateModel[2048] = "", |
---|
4321 | modelChar; |
---|
4322 | |
---|
4323 | double |
---|
4324 | likelihoodEpsilon, |
---|
4325 | wcThreshold, |
---|
4326 | fastEPAthreshold; |
---|
4327 | |
---|
4328 | int |
---|
4329 | optind = 1, |
---|
4330 | c, |
---|
4331 | nameSet = 0, |
---|
4332 | alignmentSet = 0, |
---|
4333 | multipleRuns = 0, |
---|
4334 | constraintSet = 0, |
---|
4335 | treeSet = 0, |
---|
4336 | groupSet = 0, |
---|
4337 | modelSet = 0, |
---|
4338 | treesSet = 0; |
---|
4339 | |
---|
4340 | boolean |
---|
4341 | bSeedSet = FALSE, |
---|
4342 | xSeedSet = FALSE, |
---|
4343 | multipleRunsSet = FALSE, |
---|
4344 | yFileSet = FALSE; |
---|
4345 | |
---|
4346 | run_id[0] = 0; |
---|
4347 | workdir[0] = 0; |
---|
4348 | seq_file[0] = 0; |
---|
4349 | tree_file[0] = 0; |
---|
4350 | model[0] = 0; |
---|
4351 | weightFileName[0] = 0; |
---|
4352 | modelFileName[0] = 0; |
---|
4353 | |
---|
4354 | /*********** tr inits **************/ |
---|
4355 | |
---|
4356 | #ifdef _USE_PTHREADS |
---|
4357 | NumberOfThreads = 0; |
---|
4358 | #endif |
---|
4359 | |
---|
4360 | |
---|
4361 | tr->useFastScaling = TRUE; |
---|
4362 | tr->bootStopCriterion = -1; |
---|
4363 | tr->wcThreshold = 0.03; |
---|
4364 | tr->doCutoff = TRUE; |
---|
4365 | tr->secondaryStructureModel = SEC_16; /* default setting */ |
---|
4366 | tr->searchConvergenceCriterion = FALSE; |
---|
4367 | tr->catOnly = FALSE; |
---|
4368 | tr->useEpaHeuristics = FALSE; |
---|
4369 | tr->fastEPAthreshold = -1.0; |
---|
4370 | tr->multiStateModel = GTR_MULTI_STATE; |
---|
4371 | tr->saveMemory = FALSE; |
---|
4372 | tr->useGammaMedian = FALSE; |
---|
4373 | tr->noRateHet = FALSE; |
---|
4374 | tr->perPartitionEPA = FALSE; |
---|
4375 | tr->useBrLenScaler = FALSE; |
---|
4376 | /********* tr inits end*************/ |
---|
4377 | |
---|
4378 | |
---|
4379 | while(!bad_opt && |
---|
4380 | ((c = mygetopt(argc,argv,"R:T:E:N:B:L:P:S:Y:A:G:H:I:J:K:W:l:x:z:g:r:e:a:b:c:f:i:m:t:w:s:n:o:q:#:p:vudyjhkMDFQUOVCX", &optind, &optarg))!=-1)) |
---|
4381 | { |
---|
4382 | switch(c) |
---|
4383 | { |
---|
4384 | case 'Y': |
---|
4385 | adef->useQuartetGrouping = TRUE; |
---|
4386 | yFileSet = TRUE; |
---|
4387 | strcpy(quartetGroupingFileName, optarg); |
---|
4388 | break; |
---|
4389 | case 'V': |
---|
4390 | tr->noRateHet = TRUE; |
---|
4391 | break; |
---|
4392 | case 'u': |
---|
4393 | tr->useGammaMedian = TRUE; |
---|
4394 | break; |
---|
4395 | case 'O': |
---|
4396 | adef->checkForUndeterminedSequences = FALSE; |
---|
4397 | break; |
---|
4398 | case 'W': |
---|
4399 | sscanf(optarg,"%d", &(adef->slidingWindowSize)); |
---|
4400 | if(adef->slidingWindowSize <= 0) |
---|
4401 | { |
---|
4402 | printf("You can't use a sliding window size smaller than 1, you specified %d\n", adef->slidingWindowSize); |
---|
4403 | exit(-1); |
---|
4404 | } |
---|
4405 | if(adef->slidingWindowSize <= 10) |
---|
4406 | { |
---|
4407 | printf("You specified a small sliding window size of %d sites\n", adef->slidingWindowSize); |
---|
4408 | printf("Are you sure you want to do this?\n"); |
---|
4409 | } |
---|
4410 | if(adef->slidingWindowSize >= 500) |
---|
4411 | { |
---|
4412 | printf("You specified a large sliding window size of %d sites\n", adef->slidingWindowSize); |
---|
4413 | printf("Are you sure you want to do this?\n"); |
---|
4414 | } |
---|
4415 | break; |
---|
4416 | case 'U': |
---|
4417 | tr->saveMemory = TRUE; |
---|
4418 | #if (!defined(__SIM_SSE3) && !defined(__AVX)) |
---|
4419 | printf("\nmemory saving option -U does only work with the AVX and SSE3 vectorized versions of the code\n"); |
---|
4420 | printf("please remove this option and execute the program again\n"); |
---|
4421 | printf("exiting ....\n\n"); |
---|
4422 | errorExit(0); |
---|
4423 | #endif |
---|
4424 | break; |
---|
4425 | case 'R': |
---|
4426 | adef->useBinaryModelFile = TRUE; |
---|
4427 | strcpy(binaryModelParamsInputFileName, optarg); |
---|
4428 | break; |
---|
4429 | case 'K': |
---|
4430 | { |
---|
4431 | const char *modelList[3] = { "ORDERED", "MK", "GTR"}; |
---|
4432 | const int states[3] = {ORDERED_MULTI_STATE, MK_MULTI_STATE, GTR_MULTI_STATE}; |
---|
4433 | int i; |
---|
4434 | |
---|
4435 | sscanf(optarg, "%s", multiStateModel); |
---|
4436 | |
---|
4437 | for(i = 0; i < 3; i++) |
---|
4438 | if(strcmp(multiStateModel, modelList[i]) == 0) |
---|
4439 | break; |
---|
4440 | |
---|
4441 | if(i < 3) |
---|
4442 | tr->multiStateModel = states[i]; |
---|
4443 | else |
---|
4444 | { |
---|
4445 | printf("The multi-state model %s you want to use does not exist, exiting .... \n", multiStateModel); |
---|
4446 | errorExit(0); |
---|
4447 | } |
---|
4448 | |
---|
4449 | |
---|
4450 | } |
---|
4451 | break; |
---|
4452 | case 'A': |
---|
4453 | { |
---|
4454 | const char *modelList[21] = { "S6A", "S6B", "S6C", "S6D", "S6E", "S7A", "S7B", "S7C", "S7D", "S7E", "S7F", "S16", "S16A", "S16B", "S16C", |
---|
4455 | "S16D", "S16E", "S16F", "S16I", "S16J", "S16K"}; |
---|
4456 | int i; |
---|
4457 | |
---|
4458 | sscanf(optarg, "%s", secondaryModel); |
---|
4459 | |
---|
4460 | for(i = 0; i < 21; i++) |
---|
4461 | if(strcmp(secondaryModel, modelList[i]) == 0) |
---|
4462 | break; |
---|
4463 | |
---|
4464 | if(i < 21) |
---|
4465 | tr->secondaryStructureModel = i; |
---|
4466 | else |
---|
4467 | { |
---|
4468 | printf("The secondary structure model %s you want to use does not exist, exiting .... \n", secondaryModel); |
---|
4469 | errorExit(0); |
---|
4470 | } |
---|
4471 | } |
---|
4472 | break; |
---|
4473 | case 'B': |
---|
4474 | sscanf(optarg,"%lf", &wcThreshold); |
---|
4475 | tr->wcThreshold = wcThreshold; |
---|
4476 | if(wcThreshold <= 0.0 || wcThreshold >= 1.0) |
---|
4477 | { |
---|
4478 | printf("\nBootstrap threshold must be set to values between 0.0 and 1.0, you just set it to %f\n", wcThreshold); |
---|
4479 | exit(-1); |
---|
4480 | } |
---|
4481 | if(wcThreshold < 0.01 || wcThreshold > 0.05) |
---|
4482 | { |
---|
4483 | printf("\n\nWARNING, reasonable settings for Bootstopping threshold with MR-based criteria range between 0.01 and 0.05.\n"); |
---|
4484 | printf("You are just setting it to %f, the most reasonable empirically determined setting is 0.03 \n\n", wcThreshold); |
---|
4485 | } |
---|
4486 | break; |
---|
4487 | case 'D': |
---|
4488 | tr->searchConvergenceCriterion = TRUE; |
---|
4489 | break; |
---|
4490 | case 'E': |
---|
4491 | strcpy(excludeFileName, optarg); |
---|
4492 | adef->useExcludeFile = TRUE; |
---|
4493 | break; |
---|
4494 | case 'F': |
---|
4495 | tr->catOnly = TRUE; |
---|
4496 | break; |
---|
4497 | case 'G': |
---|
4498 | tr->useEpaHeuristics = TRUE; |
---|
4499 | |
---|
4500 | sscanf(optarg,"%lf", &fastEPAthreshold); |
---|
4501 | tr->fastEPAthreshold = fastEPAthreshold; |
---|
4502 | |
---|
4503 | if(fastEPAthreshold <= 0.0 || fastEPAthreshold >= 1.0) |
---|
4504 | { |
---|
4505 | printf("\nHeuristic EPA threshold must be set to values between 0.0 and 1.0, you just set it to %f\n", fastEPAthreshold); |
---|
4506 | exit(-1); |
---|
4507 | } |
---|
4508 | if(fastEPAthreshold < 0.015625 || fastEPAthreshold > 0.5) |
---|
4509 | { |
---|
4510 | printf("\n\nWARNING, reasonable settings for heuristic EPA threshold range between 0.015625 (1/64) and 0.5 (1/2).\n"); |
---|
4511 | printf("You are just setting it to %f\n\n", fastEPAthreshold); |
---|
4512 | } |
---|
4513 | #ifdef _USE_PTHREADS |
---|
4514 | tr->useFastScaling = FALSE; |
---|
4515 | #endif |
---|
4516 | break; |
---|
4517 | |
---|
4518 | case 'I': |
---|
4519 | adef->readTaxaOnly = TRUE; |
---|
4520 | adef->mode = BOOTSTOP_ONLY; |
---|
4521 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "autoFC") == 0) || (strcmp(aut, "autoMR") == 0) || |
---|
4522 | (strcmp(aut, "autoMRE") == 0) || (strcmp(aut, "autoMRE_IGN") == 0))) |
---|
4523 | { |
---|
4524 | if((strcmp(aut, "autoFC") == 0)) |
---|
4525 | tr->bootStopCriterion = FREQUENCY_STOP; |
---|
4526 | if((strcmp(aut, "autoMR") == 0)) |
---|
4527 | tr->bootStopCriterion = MR_STOP; |
---|
4528 | if((strcmp(aut, "autoMRE") == 0)) |
---|
4529 | tr->bootStopCriterion = MRE_STOP; |
---|
4530 | if((strcmp(aut, "autoMRE_IGN") == 0)) |
---|
4531 | tr->bootStopCriterion = MRE_IGN_STOP; |
---|
4532 | } |
---|
4533 | else |
---|
4534 | { |
---|
4535 | if(processID == 0) |
---|
4536 | printf("Use -I a posteriori bootstop option either as \"-I autoFC\" or \"-I autoMR\" or \"-I autoMRE\" or \"-I autoMRE_IGN\"\n"); |
---|
4537 | errorExit(0); |
---|
4538 | } |
---|
4539 | break; |
---|
4540 | case 'J': |
---|
4541 | adef->readTaxaOnly = TRUE; |
---|
4542 | adef->mode = CONSENSUS_ONLY; |
---|
4543 | adef->calculateIC = FALSE; |
---|
4544 | |
---|
4545 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "MR") == 0) || (strcmp(aut, "MRE") == 0) || (strcmp(aut, "STRICT") == 0) || |
---|
4546 | (strcmp(aut, "STRICT_DROP") == 0) || (strcmp(aut, "MR_DROP") == 0))) |
---|
4547 | { |
---|
4548 | if((strcmp(aut, "MR") == 0)) |
---|
4549 | tr->consensusType = MR_CONSENSUS; |
---|
4550 | if((strcmp(aut, "MR_DROP") == 0)) |
---|
4551 | { |
---|
4552 | tr->consensusType = MR_CONSENSUS; |
---|
4553 | adef->leaveDropMode = TRUE; |
---|
4554 | } |
---|
4555 | |
---|
4556 | if((strcmp(aut, "MRE") == 0)) |
---|
4557 | tr->consensusType = MRE_CONSENSUS; |
---|
4558 | |
---|
4559 | |
---|
4560 | if((strcmp(aut, "STRICT") == 0)) |
---|
4561 | tr->consensusType = STRICT_CONSENSUS; |
---|
4562 | if((strcmp(aut, "STRICT_DROP") == 0)) |
---|
4563 | { |
---|
4564 | tr->consensusType = STRICT_CONSENSUS; |
---|
4565 | adef->leaveDropMode = TRUE; |
---|
4566 | } |
---|
4567 | } |
---|
4568 | else |
---|
4569 | { |
---|
4570 | if( (sscanf( optarg, "%s", aut) > 0) && optarg[0] == 'T' && optarg[1] == '_') |
---|
4571 | { |
---|
4572 | tr->consensusType = USER_DEFINED; |
---|
4573 | sscanf(optarg + 2,"%d", &tr->consensusUserThreshold); |
---|
4574 | |
---|
4575 | if(tr->consensusUserThreshold < 50 || tr->consensusUserThreshold > 100) |
---|
4576 | { |
---|
4577 | printf("Please specify a custom threshold c, with 50 <= c <= 100\n" ); |
---|
4578 | errorExit(0); |
---|
4579 | } |
---|
4580 | } |
---|
4581 | else |
---|
4582 | { |
---|
4583 | if(processID == 0) |
---|
4584 | printf("Use -J consensus tree option either as \"-J MR\" or \"-J MRE\" or \"-J STRICT\" or \"-J MR_DROP\" or \"-J STRICT_DROP\" or T_<NUM>, where NUM >= 50\n"); |
---|
4585 | errorExit(0); |
---|
4586 | } |
---|
4587 | } |
---|
4588 | break; |
---|
4589 | case 'C': |
---|
4590 | adef->verboseIC = TRUE; |
---|
4591 | break; |
---|
4592 | case 'L': |
---|
4593 | adef->readTaxaOnly = TRUE; |
---|
4594 | adef->mode = CONSENSUS_ONLY; |
---|
4595 | adef->leaveDropMode = FALSE; |
---|
4596 | adef->calculateIC = TRUE; |
---|
4597 | |
---|
4598 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "MR") == 0) || (strcmp(aut, "MRE") == 0))) |
---|
4599 | { |
---|
4600 | if((strcmp(aut, "MR") == 0)) |
---|
4601 | tr->consensusType = MR_CONSENSUS; |
---|
4602 | |
---|
4603 | if((strcmp(aut, "MRE") == 0)) |
---|
4604 | tr->consensusType = MRE_CONSENSUS; |
---|
4605 | } |
---|
4606 | else |
---|
4607 | { |
---|
4608 | if((sscanf( optarg, "%s", aut) > 0) && optarg[0] == 'T' && optarg[1] == '_') |
---|
4609 | { |
---|
4610 | tr->consensusType = USER_DEFINED; |
---|
4611 | sscanf(optarg + 2,"%d", &tr->consensusUserThreshold); |
---|
4612 | |
---|
4613 | if(tr->consensusUserThreshold < 50 || tr->consensusUserThreshold > 100) |
---|
4614 | { |
---|
4615 | printf("Please specify a custom threshold c, with 50 <= c <= 100\n" ); |
---|
4616 | errorExit(0); |
---|
4617 | } |
---|
4618 | } |
---|
4619 | else |
---|
4620 | { |
---|
4621 | if(processID == 0) |
---|
4622 | printf("Use -L consensus tree option including IC/TC score computation either as \"-L MR\" or \"-L MRE\" or \"-L T_<NUM>\", where NUM >= 50\n"); |
---|
4623 | errorExit(0); |
---|
4624 | } |
---|
4625 | } |
---|
4626 | break; |
---|
4627 | case 'M': |
---|
4628 | adef->perGeneBranchLengths = TRUE; |
---|
4629 | break; |
---|
4630 | case 'P': |
---|
4631 | strcpy(proteinModelFileName, optarg); |
---|
4632 | adef->userProteinModel = TRUE; |
---|
4633 | /*parseProteinModel(adef->externalAAMatrix, proteinModelFileName);*/ |
---|
4634 | break; |
---|
4635 | case 'S': |
---|
4636 | adef->useSecondaryStructure = TRUE; |
---|
4637 | strcpy(secondaryStructureFileName, optarg); |
---|
4638 | break; |
---|
4639 | case 'T': |
---|
4640 | #ifdef _USE_PTHREADS |
---|
4641 | sscanf(optarg,"%d", &NumberOfThreads); |
---|
4642 | #else |
---|
4643 | if(processID == 0) |
---|
4644 | { |
---|
4645 | printf("Option -T does not have any effect with the sequential or parallel MPI version.\n"); |
---|
4646 | printf("It is used to specify the number of threads for the Pthreads-based parallelization\n"); |
---|
4647 | } |
---|
4648 | #endif |
---|
4649 | break; |
---|
4650 | case 'o': |
---|
4651 | { |
---|
4652 | char *outgroups; |
---|
4653 | outgroups = (char*)rax_malloc(sizeof(char) * (strlen(optarg) + 1)); |
---|
4654 | strcpy(outgroups, optarg); |
---|
4655 | parseOutgroups(outgroups, tr); |
---|
4656 | rax_free(outgroups); |
---|
4657 | adef->outgroup = TRUE; |
---|
4658 | } |
---|
4659 | break; |
---|
4660 | case 'k': |
---|
4661 | adef->bootstrapBranchLengths = TRUE; |
---|
4662 | break; |
---|
4663 | case 'z': |
---|
4664 | strcpy(bootStrapFile, optarg); |
---|
4665 | treesSet = 1; |
---|
4666 | break; |
---|
4667 | case 'd': |
---|
4668 | adef->randomStartingTree = TRUE; |
---|
4669 | break; |
---|
4670 | case 'g': |
---|
4671 | strcpy(tree_file, optarg); |
---|
4672 | adef->grouping = TRUE; |
---|
4673 | adef->restart = TRUE; |
---|
4674 | groupSet = 1; |
---|
4675 | break; |
---|
4676 | case 'r': |
---|
4677 | strcpy(tree_file, optarg); |
---|
4678 | adef->restart = TRUE; |
---|
4679 | adef->constraint = TRUE; |
---|
4680 | constraintSet = 1; |
---|
4681 | break; |
---|
4682 | case 'e': |
---|
4683 | sscanf(optarg,"%lf", &likelihoodEpsilon); |
---|
4684 | adef->likelihoodEpsilon = likelihoodEpsilon; |
---|
4685 | break; |
---|
4686 | case 'q': |
---|
4687 | strcpy(modelFileName,optarg); |
---|
4688 | adef->useMultipleModel = TRUE; |
---|
4689 | break; |
---|
4690 | case 'p': |
---|
4691 | sscanf(optarg,"%ld", &(adef->parsimonySeed)); |
---|
4692 | if(adef->parsimonySeed <= 0) |
---|
4693 | { |
---|
4694 | printf("Parsimony seed specified via -p must be greater than zero\n"); |
---|
4695 | errorExit(-1); |
---|
4696 | } |
---|
4697 | break; |
---|
4698 | case 'N': |
---|
4699 | case '#': |
---|
4700 | if(sscanf(optarg,"%d", &multipleRuns) > 0) |
---|
4701 | { |
---|
4702 | adef->multipleRuns = multipleRuns; |
---|
4703 | } |
---|
4704 | else |
---|
4705 | { |
---|
4706 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "autoFC") == 0) || (strcmp(aut, "autoMR") == 0) || |
---|
4707 | (strcmp(aut, "autoMRE") == 0) || (strcmp(aut, "autoMRE_IGN") == 0))) |
---|
4708 | |
---|
4709 | { |
---|
4710 | adef->bootStopping = TRUE; |
---|
4711 | adef->multipleRuns = 1000; |
---|
4712 | |
---|
4713 | if((strcmp(aut, "autoFC") == 0)) |
---|
4714 | tr->bootStopCriterion = FREQUENCY_STOP; |
---|
4715 | if((strcmp(aut, "autoMR") == 0)) |
---|
4716 | tr->bootStopCriterion = MR_STOP; |
---|
4717 | if((strcmp(aut, "autoMRE") == 0)) |
---|
4718 | tr->bootStopCriterion = MRE_STOP; |
---|
4719 | if((strcmp(aut, "autoMRE_IGN") == 0)) |
---|
4720 | tr->bootStopCriterion = MRE_IGN_STOP; |
---|
4721 | } |
---|
4722 | else |
---|
4723 | { |
---|
4724 | if(processID == 0) |
---|
4725 | { |
---|
4726 | printf("Use -# or -N option either with an integer, e.g., -# 100 or with -# autoFC or -# autoMR or -# autoMRE or -# autoMRE_IGN\n"); |
---|
4727 | printf("or -N 100 or -N autoFC or -N autoMR or -N autoMRE or -N autoMRE_IGN respectively, note that auto will not work for the\n"); |
---|
4728 | printf("MPI-based parallel version\n"); |
---|
4729 | } |
---|
4730 | errorExit(0); |
---|
4731 | } |
---|
4732 | } |
---|
4733 | multipleRunsSet = TRUE; |
---|
4734 | break; |
---|
4735 | case 'v': |
---|
4736 | printVersionInfo(TRUE, (FILE*)NULL); |
---|
4737 | errorExit(0); |
---|
4738 | case 'y': |
---|
4739 | adef->stepwiseAdditionOnly = FALSE; |
---|
4740 | adef->startingTreeOnly = 1; |
---|
4741 | break; |
---|
4742 | case 'X': |
---|
4743 | adef->stepwiseAdditionOnly = TRUE; |
---|
4744 | adef->startingTreeOnly = 1; |
---|
4745 | break; |
---|
4746 | case 'h': |
---|
4747 | printREADME(); |
---|
4748 | errorExit(0); |
---|
4749 | case 'j': |
---|
4750 | adef->checkpoints = 1; |
---|
4751 | break; |
---|
4752 | case 'a': |
---|
4753 | strcpy(weightFileName,optarg); |
---|
4754 | adef->useWeightFile = TRUE; |
---|
4755 | break; |
---|
4756 | case 'b': |
---|
4757 | sscanf(optarg,"%ld", &adef->boot); |
---|
4758 | if(adef->boot <= 0) |
---|
4759 | { |
---|
4760 | printf("Bootstrap seed specified via -b must be greater than zero\n"); |
---|
4761 | errorExit(-1); |
---|
4762 | } |
---|
4763 | bSeedSet = TRUE; |
---|
4764 | break; |
---|
4765 | case 'x': |
---|
4766 | sscanf(optarg,"%ld", &adef->rapidBoot); |
---|
4767 | if(adef->rapidBoot <= 0) |
---|
4768 | { |
---|
4769 | printf("Bootstrap seed specified via -x must be greater than zero\n"); |
---|
4770 | errorExit(-1); |
---|
4771 | } |
---|
4772 | xSeedSet = TRUE; |
---|
4773 | break; |
---|
4774 | case 'c': |
---|
4775 | sscanf(optarg, "%d", &adef->categories); |
---|
4776 | break; |
---|
4777 | case 'f': |
---|
4778 | sscanf(optarg, "%c", &modelChar); |
---|
4779 | switch(modelChar) |
---|
4780 | { |
---|
4781 | case 'A': |
---|
4782 | adef->mode = ANCESTRAL_STATES; |
---|
4783 | /*adef->compressPatterns = FALSE;*/ |
---|
4784 | break; |
---|
4785 | case 'a': |
---|
4786 | adef->allInOne = TRUE; |
---|
4787 | adef->mode = BIG_RAPID_MODE; |
---|
4788 | tr->doCutoff = TRUE; |
---|
4789 | break; |
---|
4790 | case 'b': |
---|
4791 | adef->readTaxaOnly = TRUE; |
---|
4792 | adef->mode = CALC_BIPARTITIONS; |
---|
4793 | break; |
---|
4794 | case 'B': |
---|
4795 | adef->mode = OPTIMIZE_BR_LEN_SCALER; |
---|
4796 | adef->perGeneBranchLengths = TRUE; |
---|
4797 | tr->useBrLenScaler = TRUE; |
---|
4798 | break; |
---|
4799 | case 'c': |
---|
4800 | adef->mode = CHECK_ALIGNMENT; |
---|
4801 | break; |
---|
4802 | case 'C': |
---|
4803 | adef->mode = ANCESTRAL_SEQUENCE_TEST; |
---|
4804 | tr->useFastScaling = FALSE; |
---|
4805 | break; |
---|
4806 | case 'd': |
---|
4807 | adef->mode = BIG_RAPID_MODE; |
---|
4808 | tr->doCutoff = TRUE; |
---|
4809 | break; |
---|
4810 | case 'e': |
---|
4811 | adef->mode = TREE_EVALUATION; |
---|
4812 | break; |
---|
4813 | case 'E': |
---|
4814 | adef->mode = FAST_SEARCH; |
---|
4815 | adef->veryFast = TRUE; |
---|
4816 | break; |
---|
4817 | case 'F': |
---|
4818 | adef->mode = FAST_SEARCH; |
---|
4819 | adef->veryFast = FALSE; |
---|
4820 | break; |
---|
4821 | case 'g': |
---|
4822 | tr->useFastScaling = FALSE; |
---|
4823 | tr->optimizeAllTrees = FALSE; |
---|
4824 | adef->mode = PER_SITE_LL; |
---|
4825 | break; |
---|
4826 | case 'G': |
---|
4827 | tr->useFastScaling = FALSE; |
---|
4828 | tr->optimizeAllTrees = TRUE; |
---|
4829 | adef->mode = PER_SITE_LL; |
---|
4830 | break; |
---|
4831 | case 'h': |
---|
4832 | tr->optimizeAllTrees = FALSE; |
---|
4833 | adef->mode = TREE_EVALUATION; |
---|
4834 | adef->likelihoodTest = TRUE; |
---|
4835 | tr->useFastScaling = FALSE; |
---|
4836 | break; |
---|
4837 | case 'H': |
---|
4838 | tr->optimizeAllTrees = TRUE; |
---|
4839 | adef->mode = TREE_EVALUATION; |
---|
4840 | adef->likelihoodTest = TRUE; |
---|
4841 | tr->useFastScaling = FALSE; |
---|
4842 | break; |
---|
4843 | case 'i': |
---|
4844 | adef->readTaxaOnly = TRUE; |
---|
4845 | adef->mode = CALC_BIPARTITIONS_IC; |
---|
4846 | break; |
---|
4847 | case 'I': |
---|
4848 | adef->mode = ROOT_TREE; |
---|
4849 | adef->readTaxaOnly = TRUE; |
---|
4850 | break; |
---|
4851 | case 'j': |
---|
4852 | adef->mode = GENERATE_BS; |
---|
4853 | adef->generateBS = TRUE; |
---|
4854 | break; |
---|
4855 | case 'J': |
---|
4856 | adef->mode = SH_LIKE_SUPPORTS; |
---|
4857 | tr->useFastScaling = FALSE; |
---|
4858 | break; |
---|
4859 | case 'm': |
---|
4860 | adef->readTaxaOnly = TRUE; |
---|
4861 | adef->mode = COMPUTE_BIPARTITION_CORRELATION; |
---|
4862 | break; |
---|
4863 | case 'n': |
---|
4864 | tr->optimizeAllTrees = FALSE; |
---|
4865 | adef->mode = COMPUTE_LHS; |
---|
4866 | break; |
---|
4867 | case 'N': |
---|
4868 | tr->optimizeAllTrees = TRUE; |
---|
4869 | adef->mode = COMPUTE_LHS; |
---|
4870 | break; |
---|
4871 | case 'o': |
---|
4872 | adef->mode = BIG_RAPID_MODE; |
---|
4873 | tr->doCutoff = FALSE; |
---|
4874 | break; |
---|
4875 | case 'p': |
---|
4876 | adef->mode = PARSIMONY_ADDITION; |
---|
4877 | break; |
---|
4878 | case 'q': |
---|
4879 | adef->mode = QUARTET_CALCULATION; |
---|
4880 | break; |
---|
4881 | case 'r': |
---|
4882 | adef->readTaxaOnly = TRUE; |
---|
4883 | adef->mode = COMPUTE_RF_DISTANCE; |
---|
4884 | break; |
---|
4885 | case 'R': |
---|
4886 | adef->readTaxaOnly = TRUE; |
---|
4887 | adef->mode = PLAUSIBILITY_CHECKER; |
---|
4888 | break; |
---|
4889 | case 's': |
---|
4890 | adef->mode = SPLIT_MULTI_GENE; |
---|
4891 | break; |
---|
4892 | case 'S': |
---|
4893 | adef->mode = EPA_SITE_SPECIFIC_BIAS; |
---|
4894 | tr->useFastScaling = FALSE; |
---|
4895 | adef->compressPatterns = FALSE; |
---|
4896 | break; |
---|
4897 | case 't': |
---|
4898 | adef->mode = BIG_RAPID_MODE; |
---|
4899 | tr->doCutoff = TRUE; |
---|
4900 | adef->permuteTreeoptimize = TRUE; |
---|
4901 | break; |
---|
4902 | case 'T': |
---|
4903 | adef->mode = THOROUGH_OPTIMIZATION; |
---|
4904 | break; |
---|
4905 | case 'u': |
---|
4906 | adef->mode = MORPH_CALIBRATOR; |
---|
4907 | tr->useFastScaling = FALSE; |
---|
4908 | adef->compressPatterns = FALSE; |
---|
4909 | break; |
---|
4910 | case 'v': |
---|
4911 | adef->mode = CLASSIFY_ML; |
---|
4912 | |
---|
4913 | tr->perPartitionEPA = FALSE; |
---|
4914 | #ifdef _PAVLOS |
---|
4915 | adef->compressPatterns = FALSE; |
---|
4916 | #endif |
---|
4917 | #ifdef _USE_PTHREADS |
---|
4918 | tr->useFastScaling = FALSE; |
---|
4919 | #endif |
---|
4920 | break; |
---|
4921 | |
---|
4922 | case 'V': |
---|
4923 | adef->mode = CLASSIFY_ML; |
---|
4924 | |
---|
4925 | |
---|
4926 | tr->perPartitionEPA = TRUE; |
---|
4927 | #ifdef _PAVLOS |
---|
4928 | adef->compressPatterns = FALSE; |
---|
4929 | #endif |
---|
4930 | #ifdef _USE_PTHREADS |
---|
4931 | tr->useFastScaling = FALSE; |
---|
4932 | #endif |
---|
4933 | break; |
---|
4934 | case 'w': |
---|
4935 | adef->mode = COMPUTE_ELW; |
---|
4936 | adef->computeELW = TRUE; |
---|
4937 | tr->optimizeAllTrees = FALSE; |
---|
4938 | break; |
---|
4939 | case 'W': |
---|
4940 | adef->mode = COMPUTE_ELW; |
---|
4941 | adef->computeELW = TRUE; |
---|
4942 | tr->optimizeAllTrees = TRUE; |
---|
4943 | break; |
---|
4944 | case 'x': |
---|
4945 | adef->mode = DISTANCE_MODE; |
---|
4946 | adef->computeDistance = TRUE; |
---|
4947 | break; |
---|
4948 | case 'y': |
---|
4949 | adef->mode = CLASSIFY_MP; |
---|
4950 | break; |
---|
4951 | default: |
---|
4952 | { |
---|
4953 | if(processID == 0) |
---|
4954 | { |
---|
4955 | printf("Error select one of the following algorithms via -f :\n"); |
---|
4956 | printMinusFUsage(); |
---|
4957 | } |
---|
4958 | errorExit(-1); |
---|
4959 | } |
---|
4960 | } |
---|
4961 | break; |
---|
4962 | case 'i': |
---|
4963 | sscanf(optarg, "%d", &adef->initial); |
---|
4964 | adef->initialSet = TRUE; |
---|
4965 | break; |
---|
4966 | case 'n': |
---|
4967 | strcpy(run_id,optarg); |
---|
4968 | analyzeRunId(run_id); |
---|
4969 | nameSet = 1; |
---|
4970 | break; |
---|
4971 | case 'w': |
---|
4972 | strcpy(resultDir, optarg); |
---|
4973 | resultDirSet = TRUE; |
---|
4974 | break; |
---|
4975 | case 't': |
---|
4976 | strcpy(tree_file, optarg); |
---|
4977 | adef->restart = TRUE; |
---|
4978 | treeSet = 1; |
---|
4979 | break; |
---|
4980 | case 's': |
---|
4981 | strcpy(seq_file, optarg); |
---|
4982 | alignmentSet = 1; |
---|
4983 | break; |
---|
4984 | case 'm': |
---|
4985 | strcpy(model,optarg); |
---|
4986 | if(modelExists(model, adef) == 0) |
---|
4987 | { |
---|
4988 | if(processID == 0) |
---|
4989 | { |
---|
4990 | printf("Model %s does not exist\n\n", model); |
---|
4991 | printf("For BINARY data use: BINCAT or BINGAMMA or\n"); |
---|
4992 | printf(" BINCATI or BINGAMMAI \n"); |
---|
4993 | printf("For DNA data use: GTRCAT or GTRGAMMA or\n"); |
---|
4994 | printf(" GTRCATI or GTRGAMMAI \n"); |
---|
4995 | printf("For AA data use: PROTCATmatrixName[F] or PROTGAMMAmatrixName[F] or\n"); |
---|
4996 | printf(" PROTCATImatrixName[F] or PROTGAMMAImatrixName[F] \n"); |
---|
4997 | printf("The AA substitution matrix can be one of the following: \n"); |
---|
4998 | |
---|
4999 | { |
---|
5000 | int |
---|
5001 | i; |
---|
5002 | |
---|
5003 | for(i = 0; i < NUM_PROT_MODELS - 1; i++) |
---|
5004 | { |
---|
5005 | if(i % 8 == 0) |
---|
5006 | printf("\n"); |
---|
5007 | printf("%s, ", protModels[i]); |
---|
5008 | } |
---|
5009 | |
---|
5010 | printf("%s\n\n", protModels[i]); |
---|
5011 | } |
---|
5012 | |
---|
5013 | printf("With the optional \"F\" appendix you can specify if you want to use empirical base frequencies\n"); |
---|
5014 | printf("Please note that for mixed models you can in addition specify the per-gene model in\n"); |
---|
5015 | printf("the mixed model file (see manual for details)\n"); |
---|
5016 | } |
---|
5017 | errorExit(-1); |
---|
5018 | } |
---|
5019 | else |
---|
5020 | modelSet = 1; |
---|
5021 | break; |
---|
5022 | default: |
---|
5023 | errorExit(-1); |
---|
5024 | } |
---|
5025 | } |
---|
5026 | |
---|
5027 | |
---|
5028 | |
---|
5029 | #ifdef _USE_PTHREADS |
---|
5030 | if(NumberOfThreads < 2) |
---|
5031 | { |
---|
5032 | printf("\nThe number of threads is currently set to %d\n", NumberOfThreads); |
---|
5033 | printf("Specify the number of threads to run via -T numberOfThreads\n"); |
---|
5034 | printf("NumberOfThreads must be set to an integer value greater than 1\n\n"); |
---|
5035 | errorExit(-1); |
---|
5036 | } |
---|
5037 | #endif |
---|
5038 | |
---|
5039 | #ifdef _QUARTET_MPI |
---|
5040 | if(adef->mode != QUARTET_CALCULATION) |
---|
5041 | { |
---|
5042 | if(processID == 0) |
---|
5043 | { |
---|
5044 | printf("you are using the dedicated RAxML MPI version for parallel quartet computations\n"); |
---|
5045 | printf("However you are not using the quartet option \"-f q\", raxml will exit now ...\n"); |
---|
5046 | } |
---|
5047 | |
---|
5048 | errorExit(-1); |
---|
5049 | } |
---|
5050 | if(!adef->useBinaryModelFile) |
---|
5051 | { |
---|
5052 | if(processID == 0) |
---|
5053 | { |
---|
5054 | printf("you are using the dedicated RAxML MPI version for parallel quartet computations\n"); |
---|
5055 | printf("However you must provide a binary model file via \"-R\" when using the MPI version, raxml will exit now ...\n"); |
---|
5056 | } |
---|
5057 | |
---|
5058 | errorExit(-1); |
---|
5059 | } |
---|
5060 | |
---|
5061 | #endif |
---|
5062 | |
---|
5063 | if(adef->mode == ANCESTRAL_SEQUENCE_TEST && !yFileSet) |
---|
5064 | { |
---|
5065 | if(!yFileSet) |
---|
5066 | { |
---|
5067 | printf("Error, for using the ancestral sequence test you have to provide a ancestral taxon name\n"); |
---|
5068 | printf("candidate file via \"-Y\" \n"); |
---|
5069 | errorExit(-1); |
---|
5070 | } |
---|
5071 | |
---|
5072 | if(!treeSet) |
---|
5073 | { |
---|
5074 | printf("Error, for using the ancestral sequence test you have to provide a tree file\n"); |
---|
5075 | printf("via \"-t\" \n"); |
---|
5076 | errorExit(-1); |
---|
5077 | } |
---|
5078 | } |
---|
5079 | |
---|
5080 | if(tr->catOnly && adef->rapidBoot) |
---|
5081 | { |
---|
5082 | printf("Error, you can not use \"-F\" in conjunction with the rapid bootstrapping option!\n"); |
---|
5083 | printf("it will only work with standard ML tree searches\n"); |
---|
5084 | errorExit(-1); |
---|
5085 | } |
---|
5086 | |
---|
5087 | if(tr->catOnly && adef->boot) |
---|
5088 | { |
---|
5089 | printf("Error, you can not use \"-F\" in conjunction with the standard bootstrapping option!\n"); |
---|
5090 | printf("it will only work with standard ML tree searches\n"); |
---|
5091 | errorExit(-1); |
---|
5092 | } |
---|
5093 | |
---|
5094 | |
---|
5095 | if(bSeedSet && xSeedSet) |
---|
5096 | { |
---|
5097 | printf("Error, you can't seed random seeds by using -x and -b at the same time\n"); |
---|
5098 | printf("use either -x or -b, exiting ......\n"); |
---|
5099 | errorExit(-1); |
---|
5100 | } |
---|
5101 | |
---|
5102 | if(bSeedSet || xSeedSet) |
---|
5103 | { |
---|
5104 | if(!multipleRunsSet) |
---|
5105 | { |
---|
5106 | printf("Error, you have specified a random number seed via -x or -b for some sort of bootstrapping,\n"); |
---|
5107 | printf("but you have not specified a number of replicates via -N or -#, exiting ....\n"); |
---|
5108 | errorExit(-1); |
---|
5109 | } |
---|
5110 | |
---|
5111 | if(adef->multipleRuns == 1) |
---|
5112 | { |
---|
5113 | printf("WARNING, you have specified a random number seed via -x or -b for some sort of bootstrapping,\n"); |
---|
5114 | printf("but you have specified a number of replicates via -N or -# euqal to one\n"); |
---|
5115 | printf("Are you really sure that this is what you want to do?\n"); |
---|
5116 | } |
---|
5117 | |
---|
5118 | |
---|
5119 | } |
---|
5120 | |
---|
5121 | |
---|
5122 | |
---|
5123 | |
---|
5124 | if(adef->computeELW) |
---|
5125 | { |
---|
5126 | if(processID == 0) |
---|
5127 | { |
---|
5128 | if(adef->boot == 0) |
---|
5129 | { |
---|
5130 | printf("Error, you must specify a bootstrap seed via \"-b\" to compute ELW statistics\n"); |
---|
5131 | errorExit(-1); |
---|
5132 | } |
---|
5133 | |
---|
5134 | if(adef->multipleRuns < 2) |
---|
5135 | { |
---|
5136 | printf("Error, you must specify the number of BS replicates via \"-#\" or \"-N\" to compute ELW statistics\n"); |
---|
5137 | printf("it should be larger than 1, recommended setting is 100\n"); |
---|
5138 | errorExit(-1); |
---|
5139 | } |
---|
5140 | |
---|
5141 | if(!treesSet) |
---|
5142 | { |
---|
5143 | printf("Error, you must specify an input file containing several candidate trees\n"); |
---|
5144 | printf("via \"-z\" to compute ELW statistics.\n"); |
---|
5145 | errorExit(-1); |
---|
5146 | } |
---|
5147 | |
---|
5148 | if(!isGamma(adef)) |
---|
5149 | { |
---|
5150 | printf("Error ELW test can only be conducted undetr GAMMA or GAMMA+P-Invar models\n"); |
---|
5151 | errorExit(-1); |
---|
5152 | } |
---|
5153 | } |
---|
5154 | } |
---|
5155 | |
---|
5156 | |
---|
5157 | if(isGamma(adef) && tr->noRateHet) |
---|
5158 | { |
---|
5159 | printf("\n\nError: using a model without any rate heterogeneity (enabled via \"-V\") only works if you specify a CAT model\n"); |
---|
5160 | printf("via the \"-m\" switch, exiting ....\n\n"); |
---|
5161 | errorExit(-1); |
---|
5162 | } |
---|
5163 | |
---|
5164 | if(((!adef->boot) && (!adef->rapidBoot)) && adef->bootStopping) |
---|
5165 | { |
---|
5166 | if(processID == 0) |
---|
5167 | { |
---|
5168 | printf("Can't use automatic bootstopping without actually doing a Bootstrap\n"); |
---|
5169 | printf("Specify either -x randomNumberSeed (rapid) or -b randomNumberSeed (standard)\n"); |
---|
5170 | errorExit(-1); |
---|
5171 | } |
---|
5172 | } |
---|
5173 | |
---|
5174 | if(adef->boot && adef->rapidBoot) |
---|
5175 | { |
---|
5176 | if(processID == 0) |
---|
5177 | { |
---|
5178 | printf("Can't use standard and rapid BOOTSTRAP simultaneously\n"); |
---|
5179 | errorExit(-1); |
---|
5180 | } |
---|
5181 | } |
---|
5182 | |
---|
5183 | if(adef->rapidBoot) |
---|
5184 | { |
---|
5185 | if(processID == 0 && (adef->restart || treesSet) && !(groupSet || constraintSet)) |
---|
5186 | { |
---|
5187 | printf("Error, starting tree(s) will be ignored by rapid Bootstrapping\n"); |
---|
5188 | errorExit(-1); |
---|
5189 | } |
---|
5190 | } |
---|
5191 | |
---|
5192 | if(adef->allInOne && (adef->rapidBoot == 0)) |
---|
5193 | { |
---|
5194 | if(processID == 0) |
---|
5195 | { |
---|
5196 | printf("Error, to carry out an ML search after a rapid BS inference you must specify a random number seed with -x\n"); |
---|
5197 | errorExit(-1); |
---|
5198 | } |
---|
5199 | } |
---|
5200 | |
---|
5201 | |
---|
5202 | |
---|
5203 | |
---|
5204 | if(adef->mode == PER_SITE_LL) |
---|
5205 | { |
---|
5206 | if(!isGamma(adef)) |
---|
5207 | { |
---|
5208 | if(processID == 0) |
---|
5209 | printf("\n ERROR: Computation of per-site log LHs is only allowed under GAMMA model of rate heterogeneity!\n"); |
---|
5210 | errorExit(-1); |
---|
5211 | } |
---|
5212 | |
---|
5213 | if(!treesSet) |
---|
5214 | { |
---|
5215 | if(processID == 0) |
---|
5216 | printf("\n ERROR: For Computation of per-site log LHs you need to specify several input trees with \"-z\"\n"); |
---|
5217 | errorExit(-1); |
---|
5218 | } |
---|
5219 | } |
---|
5220 | |
---|
5221 | if(adef->mode == FAST_SEARCH && (adef->grouping || adef->constraint)) |
---|
5222 | { |
---|
5223 | if(processID == 0) |
---|
5224 | printf("\n ERROR: Fast ML search algorithms -f F and -f E can not take as input constraint trees specified via -g or -r, since they will be ignored\n"); |
---|
5225 | errorExit(-1); |
---|
5226 | } |
---|
5227 | |
---|
5228 | if(adef->mode == SPLIT_MULTI_GENE && (!adef->useMultipleModel)) |
---|
5229 | { |
---|
5230 | if(processID == 0) |
---|
5231 | { |
---|
5232 | printf("\n Error, you are trying to split a multi-gene alignment into individual genes with the \"-f s\" option\n"); |
---|
5233 | printf("Without specifying a multiple model file with \"-q modelFileName\" \n"); |
---|
5234 | } |
---|
5235 | errorExit(-1); |
---|
5236 | } |
---|
5237 | |
---|
5238 | if(adef->mode == ROOT_TREE && !treeSet) |
---|
5239 | { |
---|
5240 | if(processID == 0) |
---|
5241 | printf("\n Error, for the tree rooting algorithm you need to specify a file containing the tree you want to root via \"-t\"\n"); |
---|
5242 | errorExit(-1); |
---|
5243 | } |
---|
5244 | |
---|
5245 | if((adef->mode == CALC_BIPARTITIONS || adef->mode == CALC_BIPARTITIONS_IC) && !treesSet) |
---|
5246 | { |
---|
5247 | if(processID == 0) |
---|
5248 | printf("\n Error, in bipartition and IC computation mode you must specify a file containing multiple trees with the \"-z\" option\n"); |
---|
5249 | errorExit(-1); |
---|
5250 | } |
---|
5251 | |
---|
5252 | if((adef->mode == CALC_BIPARTITIONS || adef->mode == CALC_BIPARTITIONS_IC) && !adef->restart) |
---|
5253 | { |
---|
5254 | if(processID == 0) |
---|
5255 | printf("\n Error, in bipartition and IC computation mode you must specify a tree on which bipartition information will be drawn with the \"-t\" option\n"); |
---|
5256 | errorExit(-1); |
---|
5257 | } |
---|
5258 | |
---|
5259 | if(!modelSet) |
---|
5260 | { |
---|
5261 | if(processID == 0) |
---|
5262 | printf("\n Error, you must specify a model of substitution with the \"-m\" option\n"); |
---|
5263 | errorExit(-1); |
---|
5264 | } |
---|
5265 | |
---|
5266 | if(adef->computeDistance) |
---|
5267 | { |
---|
5268 | if(isCat(adef)) |
---|
5269 | { |
---|
5270 | if(processID == 0) |
---|
5271 | printf("\n Error pairwise distance computation only allowed for GAMMA-based models of rate heterogeneity\n"); |
---|
5272 | errorExit(-1); |
---|
5273 | } |
---|
5274 | |
---|
5275 | if(adef->restart) |
---|
5276 | { |
---|
5277 | if(adef->randomStartingTree) |
---|
5278 | { |
---|
5279 | if(processID == 0) |
---|
5280 | printf("\n Error pairwise distance computation not allowed for random starting trees\n"); |
---|
5281 | errorExit(-1); |
---|
5282 | } |
---|
5283 | |
---|
5284 | if(adef->constraint) |
---|
5285 | { |
---|
5286 | if(processID == 0) |
---|
5287 | printf("\n Error pairwise distance computation not allowed for binary backbone constraint tree\n"); |
---|
5288 | errorExit(-1); |
---|
5289 | } |
---|
5290 | |
---|
5291 | if(adef->grouping) |
---|
5292 | { |
---|
5293 | if(processID == 0) |
---|
5294 | printf("\n Error pairwise distance computation not allowed for constraint tree\n"); |
---|
5295 | errorExit(-1); |
---|
5296 | } |
---|
5297 | |
---|
5298 | } |
---|
5299 | |
---|
5300 | if(adef->boot || adef->rapidBoot) |
---|
5301 | { |
---|
5302 | if(processID == 0) |
---|
5303 | printf("\n Bootstrapping not implemented for pairwise distance computation\n"); |
---|
5304 | errorExit(-1); |
---|
5305 | } |
---|
5306 | } |
---|
5307 | |
---|
5308 | |
---|
5309 | |
---|
5310 | |
---|
5311 | |
---|
5312 | |
---|
5313 | |
---|
5314 | |
---|
5315 | if(!adef->restart && adef->mode == PARSIMONY_ADDITION) |
---|
5316 | { |
---|
5317 | if(processID == 0) |
---|
5318 | { |
---|
5319 | printf("\n You need to specify an incomplete binary input tree with \"-t\" to execute \n"); |
---|
5320 | printf(" RAxML MP stepwise addition with \"-f p\"\n"); |
---|
5321 | } |
---|
5322 | errorExit(-1); |
---|
5323 | } |
---|
5324 | |
---|
5325 | |
---|
5326 | |
---|
5327 | if(adef->restart && adef->randomStartingTree) |
---|
5328 | { |
---|
5329 | if(processID == 0) |
---|
5330 | { |
---|
5331 | if(adef->constraint) |
---|
5332 | { |
---|
5333 | printf("\n Error you specified a binary constraint tree with -r AND the computation\n"); |
---|
5334 | printf("of a random starting tree with -d for the same run\n"); |
---|
5335 | } |
---|
5336 | else |
---|
5337 | { |
---|
5338 | if(adef->grouping) |
---|
5339 | { |
---|
5340 | printf("\n Error you specified a multifurcating constraint tree with -g AND the computation\n"); |
---|
5341 | printf("of a random starting tree with -d for the same run\n"); |
---|
5342 | } |
---|
5343 | else |
---|
5344 | { |
---|
5345 | printf("\n Error you specified a starting tree with -t AND the computation\n"); |
---|
5346 | printf("of a random starting tree with -d for the same run\n"); |
---|
5347 | } |
---|
5348 | } |
---|
5349 | } |
---|
5350 | errorExit(-1); |
---|
5351 | } |
---|
5352 | |
---|
5353 | if(adef->outgroup && adef->mode == ANCESTRAL_STATES) |
---|
5354 | { |
---|
5355 | if(processID == 0) |
---|
5356 | { |
---|
5357 | printf("\n Specifying an outgroup for ancestral state reconstruction is not allowed\n"); |
---|
5358 | printf(" You already need to specify a rooted input tree for computing ancestral states anyway.\n\n"); |
---|
5359 | } |
---|
5360 | errorExit(-1); |
---|
5361 | } |
---|
5362 | |
---|
5363 | if(!treeSet && adef->mode == ANCESTRAL_STATES) |
---|
5364 | { |
---|
5365 | if(processID == 0) |
---|
5366 | printf("\n Error you need to specify a ROOTED binary reference tree for ancestral state computations\n"); |
---|
5367 | errorExit(-1); |
---|
5368 | } |
---|
5369 | |
---|
5370 | if(treeSet && constraintSet) |
---|
5371 | { |
---|
5372 | if(processID == 0) |
---|
5373 | printf("\n Error you specified a binary constraint tree AND a starting tree for the same run\n"); |
---|
5374 | errorExit(-1); |
---|
5375 | } |
---|
5376 | |
---|
5377 | |
---|
5378 | if(treeSet && groupSet) |
---|
5379 | { |
---|
5380 | if(processID == 0) |
---|
5381 | printf("\n Error you specified a multifurcating constraint tree AND a starting tree for the same run\n"); |
---|
5382 | errorExit(-1); |
---|
5383 | } |
---|
5384 | |
---|
5385 | |
---|
5386 | if(groupSet && constraintSet) |
---|
5387 | { |
---|
5388 | if(processID == 0) |
---|
5389 | printf("\n Error you specified a bifurcating constraint tree AND a multifurcating constraint tree for the same run\n"); |
---|
5390 | errorExit(-1); |
---|
5391 | } |
---|
5392 | |
---|
5393 | if(adef->restart && adef->startingTreeOnly) |
---|
5394 | { |
---|
5395 | if(processID == 0) |
---|
5396 | { |
---|
5397 | printf("\n Error conflicting options: you want to compute only a parsimony starting tree with -y\n"); |
---|
5398 | printf(" while you actually specified a starting tree with -t %s\n", tree_file); |
---|
5399 | } |
---|
5400 | errorExit(-1); |
---|
5401 | } |
---|
5402 | |
---|
5403 | if((adef->mode == TREE_EVALUATION || adef->mode == OPTIMIZE_BR_LEN_SCALER) && (!adef->restart)) |
---|
5404 | { |
---|
5405 | if(processID == 0) |
---|
5406 | printf("\n Error: please specify a treefile for the tree you want to evaluate with -t\n"); |
---|
5407 | errorExit(-1); |
---|
5408 | } |
---|
5409 | |
---|
5410 | #ifdef _WAYNE_MPI |
---|
5411 | |
---|
5412 | if(adef->mode == SPLIT_MULTI_GENE) |
---|
5413 | { |
---|
5414 | if(processID == 0) |
---|
5415 | printf("Multi gene alignment splitting (-f s) not implemented for the MPI-Version\n"); |
---|
5416 | errorExit(-1); |
---|
5417 | } |
---|
5418 | |
---|
5419 | if(adef->mode == TREE_EVALUATION) |
---|
5420 | { |
---|
5421 | if(processID == 0) |
---|
5422 | printf("Tree Evaluation mode (-f e) not implemented for the MPI-Version\n"); |
---|
5423 | errorExit(-1); |
---|
5424 | } |
---|
5425 | |
---|
5426 | if(adef->mode == OPTIMIZE_BR_LEN_SCALER) |
---|
5427 | { |
---|
5428 | if(processID == 0) |
---|
5429 | printf("Branch length scaler optimization mode (-f B) not implemented for the MPI-Version\n"); |
---|
5430 | errorExit(-1); |
---|
5431 | } |
---|
5432 | |
---|
5433 | if(adef->mode == CALC_BIPARTITIONS) |
---|
5434 | { |
---|
5435 | if(processID == 0) |
---|
5436 | printf("Computation of bipartitions (-f b) not implemented for the MPI-Version\n"); |
---|
5437 | errorExit(-1); |
---|
5438 | } |
---|
5439 | |
---|
5440 | if(adef->mode == CALC_BIPARTITIONS_IC) |
---|
5441 | { |
---|
5442 | if(processID == 0) |
---|
5443 | printf("Computation of IC and TC scores (-f i) not implemented for the MPI-Version\n"); |
---|
5444 | errorExit(-1); |
---|
5445 | } |
---|
5446 | |
---|
5447 | if(adef->multipleRuns == 1) |
---|
5448 | { |
---|
5449 | if(processID == 0) |
---|
5450 | { |
---|
5451 | printf("Error: you are running the parallel MPI program but only want to compute one tree\n"); |
---|
5452 | printf("For the MPI version you must specify a number of trees greater than 1 with the -# or -N option\n"); |
---|
5453 | } |
---|
5454 | errorExit(-1); |
---|
5455 | } |
---|
5456 | |
---|
5457 | #endif |
---|
5458 | |
---|
5459 | if((adef->mode == TREE_EVALUATION || adef->mode == OPTIMIZE_BR_LEN_SCALER) && (isCat(adef))) |
---|
5460 | { |
---|
5461 | if(processID == 0) |
---|
5462 | { |
---|
5463 | printf("\n Warning: tree evaluation with CAT model of rate heterogeneity\n"); |
---|
5464 | printf("Only compare likelihood values for identical rate category assignments\n"); |
---|
5465 | printf("CAT-based Branch lengths are on average shorter by factor 0.5 than GAMMA-based branch lengths\n"); |
---|
5466 | printf("... but highly correlated with GAMMA branch lengths\n"); |
---|
5467 | } |
---|
5468 | } |
---|
5469 | |
---|
5470 | if(!nameSet) |
---|
5471 | { |
---|
5472 | if(processID == 0) |
---|
5473 | printf("\n Error: please specify a name for this run with -n\n"); |
---|
5474 | errorExit(-1); |
---|
5475 | } |
---|
5476 | |
---|
5477 | if(! alignmentSet && !adef->readTaxaOnly) |
---|
5478 | { |
---|
5479 | if(processID == 0) |
---|
5480 | printf("\n Error: please specify an alignment for this run with -s\n"); |
---|
5481 | errorExit(-1); |
---|
5482 | } |
---|
5483 | |
---|
5484 | |
---|
5485 | { |
---|
5486 | #ifdef WIN32 |
---|
5487 | const |
---|
5488 | char *separator = "\\"; |
---|
5489 | #else |
---|
5490 | const |
---|
5491 | char *separator = "/"; |
---|
5492 | #endif |
---|
5493 | |
---|
5494 | if(resultDirSet) |
---|
5495 | { |
---|
5496 | char |
---|
5497 | dir[1024] = ""; |
---|
5498 | |
---|
5499 | #ifndef WIN32 |
---|
5500 | if(resultDir[0] != separator[0]) |
---|
5501 | strcat(dir, separator); |
---|
5502 | #endif |
---|
5503 | |
---|
5504 | strcat(dir, resultDir); |
---|
5505 | |
---|
5506 | if(dir[strlen(dir) - 1] != separator[0]) |
---|
5507 | strcat(dir, separator); |
---|
5508 | strcpy(workdir, dir); |
---|
5509 | } |
---|
5510 | else |
---|
5511 | { |
---|
5512 | char |
---|
5513 | dir[1024] = "", |
---|
5514 | *result = getcwd(dir, sizeof(dir)); |
---|
5515 | |
---|
5516 | assert(result != (char*)NULL); |
---|
5517 | |
---|
5518 | if(dir[strlen(dir) - 1] != separator[0]) |
---|
5519 | strcat(dir, separator); |
---|
5520 | |
---|
5521 | strcpy(workdir, dir); |
---|
5522 | } |
---|
5523 | } |
---|
5524 | |
---|
5525 | return; |
---|
5526 | } |
---|
5527 | |
---|
5528 | |
---|
5529 | |
---|
5530 | |
---|
5531 | void errorExit(int e) |
---|
5532 | { |
---|
5533 | |
---|
5534 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
5535 | MPI_Finalize(); |
---|
5536 | #endif |
---|
5537 | |
---|
5538 | exit(e); |
---|
5539 | |
---|
5540 | } |
---|
5541 | |
---|
5542 | |
---|
5543 | |
---|
5544 | static void makeFileNames(void) |
---|
5545 | { |
---|
5546 | int infoFileExists = 0; |
---|
5547 | |
---|
5548 | strcpy(verboseSplitsFileName, workdir); |
---|
5549 | strcpy(permFileName, workdir); |
---|
5550 | strcpy(resultFileName, workdir); |
---|
5551 | strcpy(logFileName, workdir); |
---|
5552 | strcpy(checkpointFileName, workdir); |
---|
5553 | strcpy(infoFileName, workdir); |
---|
5554 | strcpy(randomFileName, workdir); |
---|
5555 | strcpy(bootstrapFileName, workdir); |
---|
5556 | strcpy(bipartitionsFileName, workdir); |
---|
5557 | strcpy(bipartitionsFileNameBranchLabels, workdir); |
---|
5558 | strcpy(icFileNameBranchLabels, workdir); |
---|
5559 | strcpy(ratesFileName, workdir); |
---|
5560 | strcpy(lengthFileName, workdir); |
---|
5561 | strcpy(lengthFileNameModel, workdir); |
---|
5562 | strcpy(perSiteLLsFileName, workdir); |
---|
5563 | strcpy(binaryModelParamsOutputFileName, workdir); |
---|
5564 | |
---|
5565 | strcat(verboseSplitsFileName, "RAxML_verboseSplits."); |
---|
5566 | strcat(permFileName, "RAxML_parsimonyTree."); |
---|
5567 | strcat(resultFileName, "RAxML_result."); |
---|
5568 | strcat(logFileName, "RAxML_log."); |
---|
5569 | strcat(checkpointFileName, "RAxML_checkpoint."); |
---|
5570 | strcat(infoFileName, "RAxML_info."); |
---|
5571 | strcat(randomFileName, "RAxML_randomTree."); |
---|
5572 | strcat(bootstrapFileName, "RAxML_bootstrap."); |
---|
5573 | strcat(bipartitionsFileName, "RAxML_bipartitions."); |
---|
5574 | strcat(bipartitionsFileNameBranchLabels, "RAxML_bipartitionsBranchLabels."); |
---|
5575 | strcat(icFileNameBranchLabels, "RAxML_IC_Score_BranchLabels."); |
---|
5576 | strcat(ratesFileName, "RAxML_perSiteRates."); |
---|
5577 | strcat(lengthFileName, "RAxML_treeLength."); |
---|
5578 | strcat(lengthFileNameModel, "RAxML_treeLengthModel."); |
---|
5579 | strcat(perSiteLLsFileName, "RAxML_perSiteLLs."); |
---|
5580 | strcat(binaryModelParamsOutputFileName, "RAxML_binaryModelParameters."); |
---|
5581 | |
---|
5582 | strcat(verboseSplitsFileName, run_id); |
---|
5583 | strcat(permFileName, run_id); |
---|
5584 | strcat(resultFileName, run_id); |
---|
5585 | strcat(logFileName, run_id); |
---|
5586 | strcat(checkpointFileName, run_id); |
---|
5587 | strcat(infoFileName, run_id); |
---|
5588 | strcat(randomFileName, run_id); |
---|
5589 | strcat(bootstrapFileName, run_id); |
---|
5590 | strcat(bipartitionsFileName, run_id); |
---|
5591 | strcat(bipartitionsFileNameBranchLabels, run_id); |
---|
5592 | strcat(icFileNameBranchLabels, run_id); |
---|
5593 | strcat(ratesFileName, run_id); |
---|
5594 | strcat(lengthFileName, run_id); |
---|
5595 | strcat(lengthFileNameModel, run_id); |
---|
5596 | strcat(perSiteLLsFileName, run_id); |
---|
5597 | strcat(binaryModelParamsOutputFileName, run_id); |
---|
5598 | |
---|
5599 | #ifdef _WAYNE_MPI |
---|
5600 | { |
---|
5601 | char buf[64]; |
---|
5602 | |
---|
5603 | strcpy(bootstrapFileNamePID, bootstrapFileName); |
---|
5604 | strcat(bootstrapFileNamePID, ".PID."); |
---|
5605 | sprintf(buf, "%d", processID); |
---|
5606 | strcat(bootstrapFileNamePID, buf); |
---|
5607 | } |
---|
5608 | #endif |
---|
5609 | |
---|
5610 | if(processID == 0) |
---|
5611 | { |
---|
5612 | infoFileExists = filexists(infoFileName); |
---|
5613 | |
---|
5614 | if(infoFileExists) |
---|
5615 | { |
---|
5616 | printf("RAxML output files with the run ID <%s> already exist \n", run_id); |
---|
5617 | printf("in directory %s ...... exiting\n", workdir); |
---|
5618 | |
---|
5619 | exit(-1); |
---|
5620 | } |
---|
5621 | } |
---|
5622 | } |
---|
5623 | |
---|
5624 | |
---|
5625 | |
---|
5626 | |
---|
5627 | |
---|
5628 | |
---|
5629 | |
---|
5630 | |
---|
5631 | |
---|
5632 | /***********************reading and initializing input ******************/ |
---|
5633 | |
---|
5634 | |
---|
5635 | /********************PRINTING various INFO **************************************/ |
---|
5636 | |
---|
5637 | |
---|
5638 | void printBaseFrequencies(tree *tr) |
---|
5639 | { |
---|
5640 | if(processID == 0) |
---|
5641 | { |
---|
5642 | int |
---|
5643 | model; |
---|
5644 | |
---|
5645 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
5646 | { |
---|
5647 | int i; |
---|
5648 | |
---|
5649 | printBothOpen("Partition: %d with name: %s\n", model, tr->partitionData[model].partitionName); |
---|
5650 | printBothOpen("Base frequencies: "); |
---|
5651 | |
---|
5652 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
5653 | { |
---|
5654 | int |
---|
5655 | k; |
---|
5656 | |
---|
5657 | printBothOpen("\n"); |
---|
5658 | |
---|
5659 | for(k = 0; k < 4; k++) |
---|
5660 | { |
---|
5661 | printBothOpen("LG4 %d: ", k); |
---|
5662 | for(i = 0; i < tr->partitionData[model].states; i++) |
---|
5663 | printBothOpen("%1.3f ", tr->partitionData[model].frequencies_LG4[k][i]); |
---|
5664 | printBothOpen("\n"); |
---|
5665 | } |
---|
5666 | } |
---|
5667 | else |
---|
5668 | { |
---|
5669 | for(i = 0; i < tr->partitionData[model].states; i++) |
---|
5670 | printBothOpen("%1.3f ", tr->partitionData[model].frequencies[i]); |
---|
5671 | } |
---|
5672 | |
---|
5673 | printBothOpen("\n\n"); |
---|
5674 | } |
---|
5675 | } |
---|
5676 | } |
---|
5677 | |
---|
5678 | static void printModelAndProgramInfo(tree *tr, analdef *adef, int argc, char *argv[]) |
---|
5679 | { |
---|
5680 | if(processID == 0) |
---|
5681 | { |
---|
5682 | int i, model; |
---|
5683 | FILE *infoFile = myfopen(infoFileName, "ab"); |
---|
5684 | char modelType[128]; |
---|
5685 | |
---|
5686 | if(!adef->readTaxaOnly) |
---|
5687 | { |
---|
5688 | if(adef->useInvariant) |
---|
5689 | strcpy(modelType, "GAMMA+P-Invar"); |
---|
5690 | else |
---|
5691 | strcpy(modelType, "GAMMA"); |
---|
5692 | } |
---|
5693 | |
---|
5694 | printVersionInfo(FALSE, infoFile); |
---|
5695 | |
---|
5696 | |
---|
5697 | |
---|
5698 | if(!adef->readTaxaOnly) |
---|
5699 | { |
---|
5700 | if(!adef->compressPatterns) |
---|
5701 | printBoth(infoFile, "\nAlignment has %d columns\n\n", tr->cdta->endsite); |
---|
5702 | else |
---|
5703 | printBoth(infoFile, "\nAlignment has %d distinct alignment patterns\n\n", tr->cdta->endsite); |
---|
5704 | |
---|
5705 | if(adef->useInvariant) |
---|
5706 | printBoth(infoFile, "Found %d invariant alignment patterns that correspond to %d columns \n", tr->numberOfInvariableColumns, tr->weightOfInvariableColumns); |
---|
5707 | |
---|
5708 | printBoth(infoFile, "Proportion of gaps and completely undetermined characters in this alignment: %3.2f%s\n", 100.0 * adef->gapyness, "%"); |
---|
5709 | } |
---|
5710 | |
---|
5711 | switch(adef->mode) |
---|
5712 | { |
---|
5713 | case DISTANCE_MODE: |
---|
5714 | printBoth(infoFile, "\nRAxML Computation of pairwise distances\n\n"); |
---|
5715 | break; |
---|
5716 | case TREE_EVALUATION : |
---|
5717 | printBoth(infoFile, "\nRAxML Model Optimization up to an accuracy of %f log likelihood units\n\n", adef->likelihoodEpsilon); |
---|
5718 | break; |
---|
5719 | case BIG_RAPID_MODE: |
---|
5720 | if(adef->rapidBoot) |
---|
5721 | { |
---|
5722 | if(adef->allInOne) |
---|
5723 | printBoth(infoFile, "\nRAxML rapid bootstrapping and subsequent ML search\n\n"); |
---|
5724 | else |
---|
5725 | printBoth(infoFile, "\nRAxML rapid bootstrapping algorithm\n\n"); |
---|
5726 | } |
---|
5727 | else |
---|
5728 | printBoth(infoFile, "\nRAxML rapid hill-climbing mode\n\n"); |
---|
5729 | break; |
---|
5730 | case CALC_BIPARTITIONS: |
---|
5731 | printBoth(infoFile, "\nRAxML Bipartition Computation: Drawing support values from trees in file %s onto tree in file %s\n\n", |
---|
5732 | bootStrapFile, tree_file); |
---|
5733 | break; |
---|
5734 | case CALC_BIPARTITIONS_IC: |
---|
5735 | printBoth(infoFile, "\nRAxML IC and TC score Computation: Computing IC and TC scores induced by trees in file %s w.r.t. tree in file %s\n\n", |
---|
5736 | bootStrapFile, tree_file); |
---|
5737 | break; |
---|
5738 | case PER_SITE_LL: |
---|
5739 | printBoth(infoFile, "\nRAxML computation of per-site log likelihoods\n"); |
---|
5740 | break; |
---|
5741 | case PARSIMONY_ADDITION: |
---|
5742 | printBoth(infoFile, "\nRAxML stepwise MP addition to incomplete starting tree\n\n"); |
---|
5743 | break; |
---|
5744 | case CLASSIFY_ML: |
---|
5745 | printBoth(infoFile, "\nRAxML likelihood-based placement algorithm\n\n"); |
---|
5746 | break; |
---|
5747 | case CLASSIFY_MP: |
---|
5748 | printBoth(infoFile, "\nRAxML parsimony-based placement algorithm\n\n"); |
---|
5749 | break; |
---|
5750 | case GENERATE_BS: |
---|
5751 | printBoth(infoFile, "\nRAxML BS replicate generation\n\n"); |
---|
5752 | break; |
---|
5753 | case COMPUTE_ELW: |
---|
5754 | printBoth(infoFile, "\nRAxML ELW test\n\n"); |
---|
5755 | break; |
---|
5756 | case BOOTSTOP_ONLY: |
---|
5757 | printBoth(infoFile, "\nRAxML a posteriori Bootstrap convergence assessment\n\n"); |
---|
5758 | break; |
---|
5759 | case CONSENSUS_ONLY: |
---|
5760 | if(adef->leaveDropMode) |
---|
5761 | printBoth(infoFile, "\nRAxML rogue taxa computation by Andre Aberer (HITS)\n\n"); |
---|
5762 | else |
---|
5763 | printBoth(infoFile, "\nRAxML consensus tree computation\n\n"); |
---|
5764 | break; |
---|
5765 | case COMPUTE_LHS: |
---|
5766 | printBoth(infoFile, "\nRAxML computation of likelihoods for a set of trees\n\n"); |
---|
5767 | break; |
---|
5768 | case COMPUTE_BIPARTITION_CORRELATION: |
---|
5769 | printBoth(infoFile, "\nRAxML computation of bipartition support correlation on two sets of trees\n\n"); |
---|
5770 | break; |
---|
5771 | case COMPUTE_RF_DISTANCE: |
---|
5772 | printBoth(infoFile, "\nRAxML computation of RF distances for all pairs of trees in a set of trees\n\n"); |
---|
5773 | break; |
---|
5774 | case MORPH_CALIBRATOR: |
---|
5775 | printBoth(infoFile, "\nRAxML morphological calibrator using Maximum Likelihood\n\n"); |
---|
5776 | break; |
---|
5777 | case FAST_SEARCH: |
---|
5778 | printBoth(infoFile, "\nRAxML experimental very fast tree search\n\n"); |
---|
5779 | break; |
---|
5780 | case SH_LIKE_SUPPORTS: |
---|
5781 | printBoth(infoFile, "\nRAxML computation of SH-like support values on a given tree\n\n"); |
---|
5782 | break; |
---|
5783 | case EPA_SITE_SPECIFIC_BIAS: |
---|
5784 | printBoth(infoFile, "\nRAxML exprimental site-specfific phylogenetic placement bias analysis algorithm\n\n"); |
---|
5785 | break; |
---|
5786 | case ANCESTRAL_STATES: |
---|
5787 | printBoth(infoFile, "\nRAxML marginal ancestral state computation\n\n"); |
---|
5788 | break; |
---|
5789 | case QUARTET_CALCULATION: |
---|
5790 | printBoth(infoFile, "\nRAxML quartet computation\n\n"); |
---|
5791 | break; |
---|
5792 | case THOROUGH_OPTIMIZATION: |
---|
5793 | printBoth(infoFile, "\nRAxML thorough tree optimization\n\n"); |
---|
5794 | break; |
---|
5795 | case OPTIMIZE_BR_LEN_SCALER : |
---|
5796 | printBoth(infoFile, "\nRAxML Branch length scaler and other model parameter optimization up to an accuracy of %f log likelihood units\n\n", adef->likelihoodEpsilon); |
---|
5797 | break; |
---|
5798 | case ANCESTRAL_SEQUENCE_TEST: |
---|
5799 | printBoth(infoFile, "\nRAxML ancestral sequence test for Jiajie\n\n"); |
---|
5800 | break; |
---|
5801 | case PLAUSIBILITY_CHECKER: |
---|
5802 | printBoth(infoFile, "\nRAxML large-tree plausibility-checker\n\n"); |
---|
5803 | break; |
---|
5804 | case ROOT_TREE: |
---|
5805 | printBoth(infoFile, "\nRAxML tree rooting algorithm\n\n"); |
---|
5806 | break; |
---|
5807 | default: |
---|
5808 | assert(0); |
---|
5809 | } |
---|
5810 | |
---|
5811 | |
---|
5812 | if(!adef->readTaxaOnly) |
---|
5813 | { |
---|
5814 | if(adef->perGeneBranchLengths) |
---|
5815 | printBoth(infoFile, "Using %d distinct models/data partitions with individual per partition branch length optimization\n\n\n", tr->NumberOfModels); |
---|
5816 | else |
---|
5817 | printBoth(infoFile, "Using %d distinct models/data partitions with joint branch length optimization\n\n\n", tr->NumberOfModels); |
---|
5818 | } |
---|
5819 | |
---|
5820 | if(adef->mode == BIG_RAPID_MODE) |
---|
5821 | { |
---|
5822 | if(adef->rapidBoot) |
---|
5823 | { |
---|
5824 | if(adef->allInOne) |
---|
5825 | printBoth(infoFile, "\nExecuting %d rapid bootstrap inferences and thereafter a thorough ML search \n\n", adef->multipleRuns); |
---|
5826 | else |
---|
5827 | printBoth(infoFile, "\nExecuting %d rapid bootstrap inferences\n\n", adef->multipleRuns); |
---|
5828 | } |
---|
5829 | else |
---|
5830 | { |
---|
5831 | if(adef->boot) |
---|
5832 | printBoth(infoFile, "Executing %d non-parametric bootstrap inferences\n\n", adef->multipleRuns); |
---|
5833 | else |
---|
5834 | { |
---|
5835 | char treeType[1024]; |
---|
5836 | |
---|
5837 | if(adef->restart) |
---|
5838 | strcpy(treeType, "user-specifed"); |
---|
5839 | else |
---|
5840 | { |
---|
5841 | if(adef->randomStartingTree) |
---|
5842 | strcpy(treeType, "distinct complete random"); |
---|
5843 | else |
---|
5844 | strcpy(treeType, "distinct randomized MP"); |
---|
5845 | } |
---|
5846 | |
---|
5847 | printBoth(infoFile, "Executing %d inferences on the original alignment using %d %s trees\n\n", |
---|
5848 | adef->multipleRuns, adef->multipleRuns, treeType); |
---|
5849 | } |
---|
5850 | } |
---|
5851 | } |
---|
5852 | |
---|
5853 | |
---|
5854 | if(!adef->readTaxaOnly) |
---|
5855 | { |
---|
5856 | printBoth(infoFile, "All free model parameters will be estimated by RAxML\n"); |
---|
5857 | |
---|
5858 | |
---|
5859 | if(tr->rateHetModel == GAMMA || tr->rateHetModel == GAMMA_I) |
---|
5860 | printBoth(infoFile, "%s model of rate heteorgeneity, ML estimate of alpha-parameter\n\n", modelType); |
---|
5861 | else |
---|
5862 | { |
---|
5863 | printBoth(infoFile, "ML estimate of %d per site rate categories\n\n", adef->categories); |
---|
5864 | if(adef->mode != CLASSIFY_ML && adef->mode != CLASSIFY_MP) |
---|
5865 | printBoth(infoFile, "Likelihood of final tree will be evaluated and optimized under %s\n\n", modelType); |
---|
5866 | } |
---|
5867 | |
---|
5868 | if(adef->mode != CLASSIFY_ML && adef->mode != CLASSIFY_MP) |
---|
5869 | printBoth(infoFile, "%s Model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n", |
---|
5870 | modelType, adef->likelihoodEpsilon); |
---|
5871 | |
---|
5872 | |
---|
5873 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
5874 | { |
---|
5875 | printBoth(infoFile, "Partition: %d\n", model); |
---|
5876 | printBoth(infoFile, "Alignment Patterns: %d\n", tr->partitionData[model].upper - tr->partitionData[model].lower); |
---|
5877 | printBoth(infoFile, "Name: %s\n", tr->partitionData[model].partitionName); |
---|
5878 | |
---|
5879 | switch(tr->partitionData[model].dataType) |
---|
5880 | { |
---|
5881 | case DNA_DATA: |
---|
5882 | printBoth(infoFile, "DataType: DNA\n"); |
---|
5883 | printBoth(infoFile, "Substitution Matrix: GTR\n"); |
---|
5884 | break; |
---|
5885 | case AA_DATA: |
---|
5886 | assert(tr->partitionData[model].protModels >= 0 && tr->partitionData[model].protModels < NUM_PROT_MODELS); |
---|
5887 | printBoth(infoFile, "DataType: AA\n"); |
---|
5888 | if(tr->partitionData[model].protModels != PROT_FILE) |
---|
5889 | { |
---|
5890 | printBoth(infoFile, "Substitution Matrix: %s\n", protModels[tr->partitionData[model].protModels]); |
---|
5891 | printBoth(infoFile, "Using %s base frequencies\n", (tr->partitionData[model].usePredefinedProtFreqs == TRUE)?"fixed":"empirical"); |
---|
5892 | } |
---|
5893 | else |
---|
5894 | { |
---|
5895 | printBoth(infoFile, "Substitution Matrix File name: %s\n", tr->partitionData[model].proteinSubstitutionFileName); |
---|
5896 | printBoth(infoFile, "Using base frequencies as provided in the model file\n"); |
---|
5897 | } |
---|
5898 | break; |
---|
5899 | case BINARY_DATA: |
---|
5900 | printBoth(infoFile, "DataType: BINARY/MORPHOLOGICAL\n"); |
---|
5901 | printBoth(infoFile, "Substitution Matrix: Uncorrected\n"); |
---|
5902 | break; |
---|
5903 | case SECONDARY_DATA: |
---|
5904 | printBoth(infoFile, "DataType: SECONDARY STRUCTURE\n"); |
---|
5905 | printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]); |
---|
5906 | break; |
---|
5907 | case SECONDARY_DATA_6: |
---|
5908 | printBoth(infoFile, "DataType: SECONDARY STRUCTURE 6 STATE\n"); |
---|
5909 | printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]); |
---|
5910 | break; |
---|
5911 | case SECONDARY_DATA_7: |
---|
5912 | printBoth(infoFile, "DataType: SECONDARY STRUCTURE 7 STATE\n"); |
---|
5913 | printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]); |
---|
5914 | break; |
---|
5915 | case GENERIC_32: |
---|
5916 | printBoth(infoFile, "DataType: Multi-State with %d distinct states in use (maximum 32)\n",tr->partitionData[model].states); |
---|
5917 | switch(tr->multiStateModel) |
---|
5918 | { |
---|
5919 | case ORDERED_MULTI_STATE: |
---|
5920 | printBoth(infoFile, "Substitution Matrix: Ordered Likelihood\n"); |
---|
5921 | break; |
---|
5922 | case MK_MULTI_STATE: |
---|
5923 | printBoth(infoFile, "Substitution Matrix: MK model\n"); |
---|
5924 | break; |
---|
5925 | case GTR_MULTI_STATE: |
---|
5926 | printBoth(infoFile, "Substitution Matrix: GTR\n"); |
---|
5927 | break; |
---|
5928 | default: |
---|
5929 | assert(0); |
---|
5930 | } |
---|
5931 | break; |
---|
5932 | case GENERIC_64: |
---|
5933 | printBoth(infoFile, "DataType: Codon\n"); |
---|
5934 | break; |
---|
5935 | default: |
---|
5936 | assert(0); |
---|
5937 | } |
---|
5938 | printBoth(infoFile, "\n\n\n"); |
---|
5939 | } |
---|
5940 | } |
---|
5941 | |
---|
5942 | printBoth(infoFile, "\n"); |
---|
5943 | |
---|
5944 | printBoth(infoFile, "RAxML was called as follows:\n\n"); |
---|
5945 | for(i = 0; i < argc; i++) |
---|
5946 | printBoth(infoFile,"%s ", argv[i]); |
---|
5947 | printBoth(infoFile,"\n\n\n"); |
---|
5948 | |
---|
5949 | fclose(infoFile); |
---|
5950 | } |
---|
5951 | } |
---|
5952 | |
---|
5953 | void printResult(tree *tr, analdef *adef, boolean finalPrint) |
---|
5954 | { |
---|
5955 | FILE *logFile; |
---|
5956 | char temporaryFileName[1024] = "", treeID[64] = ""; |
---|
5957 | |
---|
5958 | strcpy(temporaryFileName, resultFileName); |
---|
5959 | |
---|
5960 | switch(adef->mode) |
---|
5961 | { |
---|
5962 | case MORPH_CALIBRATOR: |
---|
5963 | break; |
---|
5964 | case TREE_EVALUATION: |
---|
5965 | |
---|
5966 | |
---|
5967 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, finalPrint, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
5968 | |
---|
5969 | logFile = myfopen(temporaryFileName, "wb"); |
---|
5970 | fprintf(logFile, "%s", tr->tree_string); |
---|
5971 | fclose(logFile); |
---|
5972 | |
---|
5973 | if(adef->perGeneBranchLengths) |
---|
5974 | printTreePerGene(tr, adef, temporaryFileName, "wb"); |
---|
5975 | |
---|
5976 | |
---|
5977 | break; |
---|
5978 | case BIG_RAPID_MODE: |
---|
5979 | if(!adef->boot) |
---|
5980 | { |
---|
5981 | if(adef->multipleRuns > 1) |
---|
5982 | { |
---|
5983 | sprintf(treeID, "%d", tr->treeID); |
---|
5984 | strcat(temporaryFileName, ".RUN."); |
---|
5985 | strcat(temporaryFileName, treeID); |
---|
5986 | } |
---|
5987 | |
---|
5988 | |
---|
5989 | if(finalPrint) |
---|
5990 | { |
---|
5991 | switch(tr->rateHetModel) |
---|
5992 | { |
---|
5993 | case GAMMA: |
---|
5994 | case GAMMA_I: |
---|
5995 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, finalPrint, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
5996 | |
---|
5997 | logFile = myfopen(temporaryFileName, "wb"); |
---|
5998 | fprintf(logFile, "%s", tr->tree_string); |
---|
5999 | fclose(logFile); |
---|
6000 | |
---|
6001 | if(adef->perGeneBranchLengths) |
---|
6002 | printTreePerGene(tr, adef, temporaryFileName, "wb"); |
---|
6003 | break; |
---|
6004 | case CAT: |
---|
6005 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6006 | |
---|
6007 | logFile = myfopen(temporaryFileName, "wb"); |
---|
6008 | fprintf(logFile, "%s", tr->tree_string); |
---|
6009 | fclose(logFile); |
---|
6010 | |
---|
6011 | break; |
---|
6012 | default: |
---|
6013 | assert(0); |
---|
6014 | } |
---|
6015 | } |
---|
6016 | else |
---|
6017 | { |
---|
6018 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6019 | logFile = myfopen(temporaryFileName, "wb"); |
---|
6020 | fprintf(logFile, "%s", tr->tree_string); |
---|
6021 | fclose(logFile); |
---|
6022 | } |
---|
6023 | } |
---|
6024 | break; |
---|
6025 | default: |
---|
6026 | printf("FATAL ERROR call to printResult from undefined STATE %d\n", adef->mode); |
---|
6027 | exit(-1); |
---|
6028 | break; |
---|
6029 | } |
---|
6030 | } |
---|
6031 | |
---|
6032 | void printBootstrapResult(tree *tr, analdef *adef, boolean finalPrint) |
---|
6033 | { |
---|
6034 | FILE |
---|
6035 | *logFile; |
---|
6036 | #ifdef _WAYNE_MPI |
---|
6037 | char |
---|
6038 | *fileName = bootstrapFileNamePID; |
---|
6039 | #else |
---|
6040 | char |
---|
6041 | *fileName = bootstrapFileName; |
---|
6042 | #endif |
---|
6043 | |
---|
6044 | if(adef->mode == BIG_RAPID_MODE && (adef->boot || adef->rapidBoot)) |
---|
6045 | { |
---|
6046 | if(adef->bootstrapBranchLengths) |
---|
6047 | { |
---|
6048 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, finalPrint, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
6049 | |
---|
6050 | logFile = myfopen(fileName, "ab"); |
---|
6051 | fprintf(logFile, "%s", tr->tree_string); |
---|
6052 | fclose(logFile); |
---|
6053 | |
---|
6054 | if(adef->perGeneBranchLengths) |
---|
6055 | printTreePerGene(tr, adef, fileName, "ab"); |
---|
6056 | } |
---|
6057 | else |
---|
6058 | { |
---|
6059 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6060 | |
---|
6061 | logFile = myfopen(fileName, "ab"); |
---|
6062 | fprintf(logFile, "%s", tr->tree_string); |
---|
6063 | fclose(logFile); |
---|
6064 | } |
---|
6065 | } |
---|
6066 | else |
---|
6067 | { |
---|
6068 | printf("FATAL ERROR in printBootstrapResult\n"); |
---|
6069 | exit(-1); |
---|
6070 | } |
---|
6071 | } |
---|
6072 | |
---|
6073 | |
---|
6074 | |
---|
6075 | void printBipartitionResult(tree *tr, analdef *adef, boolean finalPrint, boolean printIC) |
---|
6076 | { |
---|
6077 | if(processID == 0 || adef->allInOne) |
---|
6078 | { |
---|
6079 | FILE |
---|
6080 | *logFile; |
---|
6081 | |
---|
6082 | if(!printIC) |
---|
6083 | { |
---|
6084 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, TRUE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, printIC, FALSE); |
---|
6085 | |
---|
6086 | logFile = myfopen(bipartitionsFileName, "ab"); |
---|
6087 | |
---|
6088 | fprintf(logFile, "%s", tr->tree_string); |
---|
6089 | fclose(logFile); |
---|
6090 | } |
---|
6091 | |
---|
6092 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, TRUE, FALSE, printIC, FALSE); |
---|
6093 | |
---|
6094 | if(printIC) |
---|
6095 | logFile = myfopen(icFileNameBranchLabels, "ab"); |
---|
6096 | else |
---|
6097 | logFile = myfopen(bipartitionsFileNameBranchLabels, "ab"); |
---|
6098 | |
---|
6099 | fprintf(logFile, "%s", tr->tree_string); |
---|
6100 | fclose(logFile); |
---|
6101 | } |
---|
6102 | } |
---|
6103 | |
---|
6104 | |
---|
6105 | |
---|
6106 | void printLog(tree *tr, analdef *adef, boolean finalPrint) |
---|
6107 | { |
---|
6108 | FILE *logFile; |
---|
6109 | char temporaryFileName[1024] = "", checkPoints[1024] = "", treeID[64] = ""; |
---|
6110 | double lh, t; |
---|
6111 | |
---|
6112 | lh = tr->likelihood; |
---|
6113 | t = gettime() - masterTime; |
---|
6114 | |
---|
6115 | strcpy(temporaryFileName, logFileName); |
---|
6116 | strcpy(checkPoints, checkpointFileName); |
---|
6117 | |
---|
6118 | switch(adef->mode) |
---|
6119 | { |
---|
6120 | case TREE_EVALUATION: |
---|
6121 | logFile = myfopen(temporaryFileName, "ab"); |
---|
6122 | |
---|
6123 | printf("%f %f\n", t, lh); |
---|
6124 | fprintf(logFile, "%f %f\n", t, lh); |
---|
6125 | |
---|
6126 | fclose(logFile); |
---|
6127 | break; |
---|
6128 | case BIG_RAPID_MODE: |
---|
6129 | if(adef->boot || adef->rapidBoot) |
---|
6130 | { |
---|
6131 | /* testing only printf("%f %f\n", t, lh);*/ |
---|
6132 | /* NOTHING PRINTED so far */ |
---|
6133 | } |
---|
6134 | else |
---|
6135 | { |
---|
6136 | if(adef->multipleRuns > 1) |
---|
6137 | { |
---|
6138 | sprintf(treeID, "%d", tr->treeID); |
---|
6139 | strcat(temporaryFileName, ".RUN."); |
---|
6140 | strcat(temporaryFileName, treeID); |
---|
6141 | |
---|
6142 | strcat(checkPoints, ".RUN."); |
---|
6143 | strcat(checkPoints, treeID); |
---|
6144 | } |
---|
6145 | |
---|
6146 | |
---|
6147 | if(!adef->checkpoints) |
---|
6148 | { |
---|
6149 | logFile = myfopen(temporaryFileName, "ab"); |
---|
6150 | |
---|
6151 | fprintf(logFile, "%f %f\n", t, lh); |
---|
6152 | |
---|
6153 | fclose(logFile); |
---|
6154 | } |
---|
6155 | else |
---|
6156 | { |
---|
6157 | logFile = myfopen(temporaryFileName, "ab"); |
---|
6158 | |
---|
6159 | fprintf(logFile, "%f %f %d\n", t, lh, tr->checkPointCounter); |
---|
6160 | |
---|
6161 | fclose(logFile); |
---|
6162 | |
---|
6163 | strcat(checkPoints, "."); |
---|
6164 | |
---|
6165 | sprintf(treeID, "%d", tr->checkPointCounter); |
---|
6166 | strcat(checkPoints, treeID); |
---|
6167 | |
---|
6168 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6169 | |
---|
6170 | logFile = myfopen(checkPoints, "ab"); |
---|
6171 | fprintf(logFile, "%s", tr->tree_string); |
---|
6172 | fclose(logFile); |
---|
6173 | |
---|
6174 | tr->checkPointCounter++; |
---|
6175 | } |
---|
6176 | } |
---|
6177 | break; |
---|
6178 | case MORPH_CALIBRATOR: |
---|
6179 | break; |
---|
6180 | default: |
---|
6181 | assert(0); |
---|
6182 | } |
---|
6183 | } |
---|
6184 | |
---|
6185 | |
---|
6186 | |
---|
6187 | void printStartingTree(tree *tr, analdef *adef, boolean finalPrint) |
---|
6188 | { |
---|
6189 | if(adef->boot) |
---|
6190 | { |
---|
6191 | /* not printing starting trees for bootstrap */ |
---|
6192 | } |
---|
6193 | else |
---|
6194 | { |
---|
6195 | FILE *treeFile; |
---|
6196 | char temporaryFileName[1024] = "", treeID[64] = ""; |
---|
6197 | |
---|
6198 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6199 | |
---|
6200 | if(adef->randomStartingTree) |
---|
6201 | strcpy(temporaryFileName, randomFileName); |
---|
6202 | else |
---|
6203 | strcpy(temporaryFileName, permFileName); |
---|
6204 | |
---|
6205 | if(adef->multipleRuns > 1) |
---|
6206 | { |
---|
6207 | sprintf(treeID, "%d", tr->treeID); |
---|
6208 | strcat(temporaryFileName, ".RUN."); |
---|
6209 | strcat(temporaryFileName, treeID); |
---|
6210 | } |
---|
6211 | |
---|
6212 | treeFile = myfopen(temporaryFileName, "ab"); |
---|
6213 | fprintf(treeFile, "%s", tr->tree_string); |
---|
6214 | fclose(treeFile); |
---|
6215 | } |
---|
6216 | } |
---|
6217 | |
---|
6218 | void writeInfoFile(analdef *adef, tree *tr, double t) |
---|
6219 | { |
---|
6220 | |
---|
6221 | { |
---|
6222 | switch(adef->mode) |
---|
6223 | { |
---|
6224 | case TREE_EVALUATION: |
---|
6225 | break; |
---|
6226 | case BIG_RAPID_MODE: |
---|
6227 | if(adef->boot || adef->rapidBoot) |
---|
6228 | { |
---|
6229 | if(!adef->initialSet) |
---|
6230 | printBothOpen("Bootstrap[%d]: Time %f seconds, bootstrap likelihood %f, best rearrangement setting %d\n", tr->treeID, t, tr->likelihood, adef->bestTrav); |
---|
6231 | else |
---|
6232 | printBothOpen("Bootstrap[%d]: Time %f seconds, bootstrap likelihood %f\n", tr->treeID, t, tr->likelihood); |
---|
6233 | } |
---|
6234 | else |
---|
6235 | { |
---|
6236 | int model; |
---|
6237 | char modelType[128]; |
---|
6238 | |
---|
6239 | switch(tr->rateHetModel) |
---|
6240 | { |
---|
6241 | case GAMMA_I: |
---|
6242 | strcpy(modelType, "GAMMA+P-Invar"); |
---|
6243 | break; |
---|
6244 | case GAMMA: |
---|
6245 | strcpy(modelType, "GAMMA"); |
---|
6246 | break; |
---|
6247 | case CAT: |
---|
6248 | strcpy(modelType, "CAT"); |
---|
6249 | break; |
---|
6250 | default: |
---|
6251 | assert(0); |
---|
6252 | } |
---|
6253 | |
---|
6254 | if(!adef->initialSet) |
---|
6255 | printBothOpen("Inference[%d]: Time %f %s-based likelihood %f, best rearrangement setting %d\n", |
---|
6256 | tr->treeID, t, modelType, tr->likelihood, adef->bestTrav); |
---|
6257 | else |
---|
6258 | printBothOpen("Inference[%d]: Time %f %s-based likelihood %f\n", |
---|
6259 | tr->treeID, t, modelType, tr->likelihood); |
---|
6260 | |
---|
6261 | { |
---|
6262 | FILE *infoFile = myfopen(infoFileName, "ab"); |
---|
6263 | |
---|
6264 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
6265 | { |
---|
6266 | fprintf(infoFile, "alpha[%d]: %f ", model, tr->partitionData[model].alpha); |
---|
6267 | if(adef->useInvariant) |
---|
6268 | fprintf(infoFile, "invar[%d]: %f ", model, tr->partitionData[model].propInvariant); |
---|
6269 | |
---|
6270 | if(tr->partitionData[model].dataType == DNA_DATA) |
---|
6271 | { |
---|
6272 | int |
---|
6273 | k, |
---|
6274 | states = tr->partitionData[model].states, |
---|
6275 | rates = ((states * states - states) / 2); |
---|
6276 | |
---|
6277 | fprintf(infoFile, "rates[%d] ac ag at cg ct gt: ", model); |
---|
6278 | for(k = 0; k < rates; k++) |
---|
6279 | fprintf(infoFile, "%f ", tr->partitionData[model].substRates[k]); |
---|
6280 | } |
---|
6281 | |
---|
6282 | } |
---|
6283 | |
---|
6284 | fprintf(infoFile, "\n"); |
---|
6285 | fclose(infoFile); |
---|
6286 | } |
---|
6287 | } |
---|
6288 | break; |
---|
6289 | default: |
---|
6290 | assert(0); |
---|
6291 | } |
---|
6292 | } |
---|
6293 | } |
---|
6294 | |
---|
6295 | static void printFreqs(int n, double *f, char **names) |
---|
6296 | { |
---|
6297 | int k; |
---|
6298 | |
---|
6299 | for(k = 0; k < n; k++) |
---|
6300 | printBothOpen("freq pi(%s): %f\n", names[k], f[k]); |
---|
6301 | } |
---|
6302 | |
---|
6303 | static void printRatesDNA_BIN(int n, double *r, char **names) |
---|
6304 | { |
---|
6305 | int i, j, c; |
---|
6306 | |
---|
6307 | for(i = 0, c = 0; i < n; i++) |
---|
6308 | { |
---|
6309 | for(j = i + 1; j < n; j++) |
---|
6310 | { |
---|
6311 | if(i == n - 2 && j == n - 1) |
---|
6312 | printBothOpen("rate %s <-> %s: %f\n", names[i], names[j], 1.0); |
---|
6313 | else |
---|
6314 | printBothOpen("rate %s <-> %s: %f\n", names[i], names[j], r[c]); |
---|
6315 | c++; |
---|
6316 | } |
---|
6317 | } |
---|
6318 | } |
---|
6319 | |
---|
6320 | static void printRatesRest(int n, double *r, char **names) |
---|
6321 | { |
---|
6322 | int i, j, c; |
---|
6323 | |
---|
6324 | for(i = 0, c = 0; i < n; i++) |
---|
6325 | { |
---|
6326 | for(j = i + 1; j < n; j++) |
---|
6327 | { |
---|
6328 | printBothOpen("rate %s <-> %s: %f\n", names[i], names[j], r[c]); |
---|
6329 | c++; |
---|
6330 | } |
---|
6331 | } |
---|
6332 | } |
---|
6333 | |
---|
6334 | |
---|
6335 | void getDataTypeString(tree *tr, int model, char typeOfData[1024]) |
---|
6336 | { |
---|
6337 | switch(tr->partitionData[model].dataType) |
---|
6338 | { |
---|
6339 | case AA_DATA: |
---|
6340 | strcpy(typeOfData,"AA"); |
---|
6341 | break; |
---|
6342 | case DNA_DATA: |
---|
6343 | strcpy(typeOfData,"DNA"); |
---|
6344 | break; |
---|
6345 | case BINARY_DATA: |
---|
6346 | strcpy(typeOfData,"BINARY/MORPHOLOGICAL"); |
---|
6347 | break; |
---|
6348 | case SECONDARY_DATA: |
---|
6349 | strcpy(typeOfData,"SECONDARY 16 STATE MODEL USING "); |
---|
6350 | strcat(typeOfData, secondaryModelList[tr->secondaryStructureModel]); |
---|
6351 | break; |
---|
6352 | case SECONDARY_DATA_6: |
---|
6353 | strcpy(typeOfData,"SECONDARY 6 STATE MODEL USING "); |
---|
6354 | strcat(typeOfData, secondaryModelList[tr->secondaryStructureModel]); |
---|
6355 | break; |
---|
6356 | case SECONDARY_DATA_7: |
---|
6357 | strcpy(typeOfData,"SECONDARY 7 STATE MODEL USING "); |
---|
6358 | strcat(typeOfData, secondaryModelList[tr->secondaryStructureModel]); |
---|
6359 | break; |
---|
6360 | case GENERIC_32: |
---|
6361 | strcpy(typeOfData,"Multi-State"); |
---|
6362 | break; |
---|
6363 | case GENERIC_64: |
---|
6364 | strcpy(typeOfData,"Codon"); |
---|
6365 | break; |
---|
6366 | default: |
---|
6367 | assert(0); |
---|
6368 | } |
---|
6369 | } |
---|
6370 | |
---|
6371 | |
---|
6372 | |
---|
6373 | void printModelParams(tree *tr, analdef *adef) |
---|
6374 | { |
---|
6375 | int |
---|
6376 | model; |
---|
6377 | |
---|
6378 | double |
---|
6379 | *f = (double*)NULL, |
---|
6380 | *r = (double*)NULL; |
---|
6381 | |
---|
6382 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
6383 | { |
---|
6384 | double tl; |
---|
6385 | char typeOfData[1024]; |
---|
6386 | |
---|
6387 | getDataTypeString(tr, model, typeOfData); |
---|
6388 | |
---|
6389 | printBothOpen("Model Parameters of Partition %d, Name: %s, Type of Data: %s\n", |
---|
6390 | model, tr->partitionData[model].partitionName, typeOfData); |
---|
6391 | printBothOpen("alpha: %f\n", tr->partitionData[model].alpha); |
---|
6392 | |
---|
6393 | if(adef->useInvariant) |
---|
6394 | printBothOpen("invar: %f\n", tr->partitionData[model].propInvariant); |
---|
6395 | |
---|
6396 | if(tr->useBrLenScaler) |
---|
6397 | printBothOpen("Branch length scaler: %f\n", tr->partitionData[model].brLenScaler); |
---|
6398 | |
---|
6399 | if(adef->perGeneBranchLengths) |
---|
6400 | tl = treeLength(tr, model); |
---|
6401 | else |
---|
6402 | tl = treeLength(tr, 0); |
---|
6403 | |
---|
6404 | printBothOpen("Tree-Length: %f\n", tl); |
---|
6405 | |
---|
6406 | f = tr->partitionData[model].frequencies; |
---|
6407 | r = tr->partitionData[model].substRates; |
---|
6408 | |
---|
6409 | switch(tr->partitionData[model].dataType) |
---|
6410 | { |
---|
6411 | case AA_DATA: |
---|
6412 | { |
---|
6413 | char *freqNames[20] = {"A", "R", "N","D", "C", "Q", "E", "G", |
---|
6414 | "H", "I", "L", "K", "M", "F", "P", "S", |
---|
6415 | "T", "W", "Y", "V"}; |
---|
6416 | |
---|
6417 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
6418 | { |
---|
6419 | int |
---|
6420 | k; |
---|
6421 | |
---|
6422 | for(k = 0; k < 4; k++) |
---|
6423 | { |
---|
6424 | printBothOpen("LGM %d\n", k); |
---|
6425 | printRatesRest(20, tr->partitionData[model].substRates_LG4[k], freqNames); |
---|
6426 | printBothOpen("\n"); |
---|
6427 | printFreqs(20, tr->partitionData[model].frequencies_LG4[k], freqNames); |
---|
6428 | } |
---|
6429 | } |
---|
6430 | else |
---|
6431 | { |
---|
6432 | printRatesRest(20, r, freqNames); |
---|
6433 | printBothOpen("\n"); |
---|
6434 | printFreqs(20, f, freqNames); |
---|
6435 | } |
---|
6436 | } |
---|
6437 | break; |
---|
6438 | case GENERIC_32: |
---|
6439 | { |
---|
6440 | char *freqNames[32] = {"0", "1", "2", "3", "4", "5", "6", "7", |
---|
6441 | "8", "9", "A", "B", "C", "D", "E", "F", |
---|
6442 | "G", "H", "I", "J", "K", "L", "M", "N", |
---|
6443 | "O", "P", "Q", "R", "S", "T", "U", "V"}; |
---|
6444 | |
---|
6445 | printRatesRest(32, r, freqNames); |
---|
6446 | printBothOpen("\n"); |
---|
6447 | printFreqs(32, f, freqNames); |
---|
6448 | } |
---|
6449 | break; |
---|
6450 | case GENERIC_64: |
---|
6451 | assert(0); |
---|
6452 | break; |
---|
6453 | case DNA_DATA: |
---|
6454 | { |
---|
6455 | char *freqNames[4] = {"A", "C", "G", "T"}; |
---|
6456 | |
---|
6457 | printRatesDNA_BIN(4, r, freqNames); |
---|
6458 | printBothOpen("\n"); |
---|
6459 | printFreqs(4, f, freqNames); |
---|
6460 | } |
---|
6461 | break; |
---|
6462 | case SECONDARY_DATA_6: |
---|
6463 | { |
---|
6464 | char *freqNames[6] = {"AU", "CG", "GC", "GU", "UA", "UG"}; |
---|
6465 | |
---|
6466 | printRatesRest(6, r, freqNames); |
---|
6467 | printBothOpen("\n"); |
---|
6468 | printFreqs(6, f, freqNames); |
---|
6469 | } |
---|
6470 | break; |
---|
6471 | case SECONDARY_DATA_7: |
---|
6472 | { |
---|
6473 | char *freqNames[7] = {"AU", "CG", "GC", "GU", "UA", "UG", "REST"}; |
---|
6474 | |
---|
6475 | printRatesRest(7, r, freqNames); |
---|
6476 | printBothOpen("\n"); |
---|
6477 | printFreqs(7, f, freqNames); |
---|
6478 | } |
---|
6479 | break; |
---|
6480 | case SECONDARY_DATA: |
---|
6481 | { |
---|
6482 | char *freqNames[16] = {"AA", "AC", "AG", "AU", "CA", "CC", "CG", "CU", |
---|
6483 | "GA", "GC", "GG", "GU", "UA", "UC", "UG", "UU"}; |
---|
6484 | |
---|
6485 | printRatesRest(16, r, freqNames); |
---|
6486 | printBothOpen("\n"); |
---|
6487 | printFreqs(16, f, freqNames); |
---|
6488 | } |
---|
6489 | break; |
---|
6490 | case BINARY_DATA: |
---|
6491 | { |
---|
6492 | char *freqNames[2] = {"0", "1"}; |
---|
6493 | |
---|
6494 | printRatesDNA_BIN(2, r, freqNames); |
---|
6495 | printBothOpen("\n"); |
---|
6496 | printFreqs(2, f, freqNames); |
---|
6497 | } |
---|
6498 | break; |
---|
6499 | default: |
---|
6500 | assert(0); |
---|
6501 | } |
---|
6502 | |
---|
6503 | printBothOpen("\n"); |
---|
6504 | } |
---|
6505 | } |
---|
6506 | |
---|
6507 | static void finalizeInfoFile(tree *tr, analdef *adef) |
---|
6508 | { |
---|
6509 | if(processID == 0) |
---|
6510 | { |
---|
6511 | double t; |
---|
6512 | |
---|
6513 | t = gettime() - masterTime; |
---|
6514 | |
---|
6515 | switch(adef->mode) |
---|
6516 | { |
---|
6517 | case TREE_EVALUATION : |
---|
6518 | case OPTIMIZE_BR_LEN_SCALER: |
---|
6519 | |
---|
6520 | if(adef->mode == OPTIMIZE_BR_LEN_SCALER) |
---|
6521 | printBothOpen("\n\nOverall Time for Tree Evaluation with branch length scalers: %f\n", t); |
---|
6522 | else |
---|
6523 | printBothOpen("\n\nOverall Time for Tree Evaluation %f\n", t); |
---|
6524 | |
---|
6525 | printBothOpen("Final GAMMA likelihood: %f\n", tr->likelihood); |
---|
6526 | |
---|
6527 | { |
---|
6528 | boolean |
---|
6529 | linkedProteinGTR = FALSE; |
---|
6530 | |
---|
6531 | int |
---|
6532 | model, |
---|
6533 | params = 0, |
---|
6534 | paramsBrLen = 0; |
---|
6535 | |
---|
6536 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
6537 | { |
---|
6538 | switch(tr->partitionData[model].dataType) |
---|
6539 | { |
---|
6540 | case AA_DATA: |
---|
6541 | if(tr->partitionData[model].protModels == GTR_UNLINKED) |
---|
6542 | params += 189; |
---|
6543 | |
---|
6544 | if(tr->partitionData[model].protModels == GTR) |
---|
6545 | linkedProteinGTR = TRUE; |
---|
6546 | |
---|
6547 | if(!tr->partitionData[model].usePredefinedProtFreqs) |
---|
6548 | params += 19; |
---|
6549 | break; |
---|
6550 | case GENERIC_32: |
---|
6551 | { |
---|
6552 | int |
---|
6553 | states = tr->partitionData[model].states; |
---|
6554 | |
---|
6555 | /* frequencies */ |
---|
6556 | |
---|
6557 | params += (states - 1); |
---|
6558 | |
---|
6559 | switch(tr->multiStateModel) |
---|
6560 | { |
---|
6561 | case ORDERED_MULTI_STATE: |
---|
6562 | break; |
---|
6563 | case MK_MULTI_STATE: |
---|
6564 | params += (states - 1); |
---|
6565 | break; |
---|
6566 | case GTR_MULTI_STATE: |
---|
6567 | params += ((((states * states) - states) / 2) - 1); |
---|
6568 | break; |
---|
6569 | default: |
---|
6570 | assert(0); |
---|
6571 | } |
---|
6572 | break; |
---|
6573 | case GENERIC_64: |
---|
6574 | assert(0); |
---|
6575 | break; |
---|
6576 | case DNA_DATA: |
---|
6577 | params += 5 + 3; |
---|
6578 | break; |
---|
6579 | case SECONDARY_DATA_6: |
---|
6580 | case SECONDARY_DATA_7: |
---|
6581 | case SECONDARY_DATA: |
---|
6582 | { |
---|
6583 | int |
---|
6584 | states = tr->partitionData[model].states; |
---|
6585 | |
---|
6586 | switch(tr->secondaryStructureModel) |
---|
6587 | { |
---|
6588 | case SEC_6_A: |
---|
6589 | params += ((((states * states) - states) / 2) - 1); /*rates*/ |
---|
6590 | params += (states - 1); /* frequencies */ |
---|
6591 | break; |
---|
6592 | case SEC_6_B: |
---|
6593 | params += 1; /*rates */ |
---|
6594 | params += 5; /* frequencies */ |
---|
6595 | break; |
---|
6596 | case SEC_6_C: |
---|
6597 | params += 1; /*rates */ |
---|
6598 | params += 2; /* frequencies */ |
---|
6599 | break; |
---|
6600 | case SEC_6_D: |
---|
6601 | params += 1; /*rates */ |
---|
6602 | params += 1; /* frequencies */ |
---|
6603 | break; |
---|
6604 | case SEC_6_E: |
---|
6605 | params += 1; /*rates */ |
---|
6606 | params += 5; /* frequencies */ |
---|
6607 | break; |
---|
6608 | case SEC_7_A: |
---|
6609 | params += ((((states * states) - states) / 2) - 1); /*rates*/ |
---|
6610 | params += (states - 1); /* frequencies */ |
---|
6611 | break; |
---|
6612 | case SEC_7_B: |
---|
6613 | params += 20; /*rates */ |
---|
6614 | params += 3; /* frequencies */ |
---|
6615 | break; |
---|
6616 | case SEC_7_C: |
---|
6617 | params += 9; /*rates */ |
---|
6618 | params += 6; /* frequencies */ |
---|
6619 | break; |
---|
6620 | case SEC_7_D: |
---|
6621 | params += 3; /*rates */ |
---|
6622 | params += 6; /* frequencies */ |
---|
6623 | break; |
---|
6624 | case SEC_7_E: |
---|
6625 | params += 1; /*rates */ |
---|
6626 | params += 6; /* frequencies */ |
---|
6627 | break; |
---|
6628 | case SEC_7_F: |
---|
6629 | params += 3; /*rates */ |
---|
6630 | params += 3; /* frequencies */ |
---|
6631 | break; |
---|
6632 | case SEC_16: |
---|
6633 | params += ((((states * states) - states) / 2) - 1); /*rates*/ |
---|
6634 | params += (states - 1); /* frequencies */ |
---|
6635 | break; |
---|
6636 | case SEC_16_A: |
---|
6637 | params += 4; /*rates */ |
---|
6638 | params += 15; /* frequencies */ |
---|
6639 | break; |
---|
6640 | case SEC_16_B: |
---|
6641 | params += 0; /*rates */ |
---|
6642 | params += 15; /* frequencies */ |
---|
6643 | break; |
---|
6644 | case SEC_16_C: |
---|
6645 | case SEC_16_D: |
---|
6646 | case SEC_16_E: |
---|
6647 | case SEC_16_F: |
---|
6648 | case SEC_16_I: |
---|
6649 | case SEC_16_J: |
---|
6650 | case SEC_16_K: |
---|
6651 | assert(0); |
---|
6652 | default: |
---|
6653 | assert(0); |
---|
6654 | } |
---|
6655 | } |
---|
6656 | break; |
---|
6657 | case BINARY_DATA: |
---|
6658 | params += 1; |
---|
6659 | break; |
---|
6660 | default: |
---|
6661 | assert(0); |
---|
6662 | } |
---|
6663 | } |
---|
6664 | |
---|
6665 | if(adef->useInvariant) |
---|
6666 | params += 2; |
---|
6667 | else /* GAMMA */ |
---|
6668 | params += 1; |
---|
6669 | } |
---|
6670 | |
---|
6671 | if(linkedProteinGTR) |
---|
6672 | params += 189; |
---|
6673 | |
---|
6674 | if(adef->mode == TREE_EVALUATION) |
---|
6675 | { |
---|
6676 | if(tr->multiBranch) |
---|
6677 | paramsBrLen = params + tr->NumberOfModels * (2 * tr->mxtips - 3); |
---|
6678 | else |
---|
6679 | paramsBrLen = params + 2 * tr->mxtips - 3; |
---|
6680 | } |
---|
6681 | else |
---|
6682 | { |
---|
6683 | paramsBrLen = params + tr->NumberOfModels; |
---|
6684 | } |
---|
6685 | |
---|
6686 | printBothOpen("\n"); |
---|
6687 | |
---|
6688 | |
---|
6689 | printBothOpen("Number of free parameters for AIC-TEST(BR-LEN): %d\n", paramsBrLen); |
---|
6690 | printBothOpen("Number of free parameters for AIC-TEST(NO-BR-LEN): %d\n", params); |
---|
6691 | |
---|
6692 | |
---|
6693 | printBothOpen("\n\n"); |
---|
6694 | |
---|
6695 | printModelParams(tr, adef); |
---|
6696 | |
---|
6697 | if(adef->mode == TREE_EVALUATION) |
---|
6698 | { |
---|
6699 | printBothOpen("Final tree written to: %s\n", resultFileName); |
---|
6700 | printBothOpen("Execution Log File written to: %s\n", logFileName); |
---|
6701 | } |
---|
6702 | |
---|
6703 | } |
---|
6704 | break; |
---|
6705 | case BIG_RAPID_MODE: |
---|
6706 | if(adef->boot) |
---|
6707 | { |
---|
6708 | printBothOpen("\n\nOverall Time for %d Bootstraps %f\n", adef->multipleRuns, t); |
---|
6709 | printBothOpen("\n\nAverage Time per Bootstrap %f\n", (double)(t/((double)adef->multipleRuns))); |
---|
6710 | printBothOpen("All %d bootstrapped trees written to: %s\n", adef->multipleRuns, bootstrapFileName); |
---|
6711 | } |
---|
6712 | else |
---|
6713 | { |
---|
6714 | if(adef->multipleRuns > 1) |
---|
6715 | { |
---|
6716 | double avgLH = 0; |
---|
6717 | double bestLH = unlikely; |
---|
6718 | int i, bestI = 0; |
---|
6719 | |
---|
6720 | for(i = 0; i < adef->multipleRuns; i++) |
---|
6721 | { |
---|
6722 | avgLH += tr->likelihoods[i]; |
---|
6723 | if(tr->likelihoods[i] > bestLH) |
---|
6724 | { |
---|
6725 | bestLH = tr->likelihoods[i]; |
---|
6726 | bestI = i; |
---|
6727 | } |
---|
6728 | } |
---|
6729 | avgLH /= ((double)adef->multipleRuns); |
---|
6730 | |
---|
6731 | printBothOpen("\n\nOverall Time for %d Inferences %f\n", adef->multipleRuns, t); |
---|
6732 | printBothOpen("Average Time per Inference %f\n", (double)(t/((double)adef->multipleRuns))); |
---|
6733 | printBothOpen("Average Likelihood : %f\n", avgLH); |
---|
6734 | printBothOpen("\n"); |
---|
6735 | printBothOpen("Best Likelihood in run number %d: likelihood %f\n\n", bestI, bestLH); |
---|
6736 | |
---|
6737 | if(adef->checkpoints) |
---|
6738 | printBothOpen("Checkpoints written to: %s.RUN.%d.* to %d.*\n", checkpointFileName, 0, adef->multipleRuns - 1); |
---|
6739 | if(!adef->restart) |
---|
6740 | { |
---|
6741 | if(adef->randomStartingTree) |
---|
6742 | printBothOpen("Random starting trees written to: %s.RUN.%d to %d\n", randomFileName, 0, adef->multipleRuns - 1); |
---|
6743 | else |
---|
6744 | printBothOpen("Parsimony starting trees written to: %s.RUN.%d to %d\n", permFileName, 0, adef->multipleRuns - 1); |
---|
6745 | } |
---|
6746 | printBothOpen("Final trees written to: %s.RUN.%d to %d\n", resultFileName, 0, adef->multipleRuns - 1); |
---|
6747 | printBothOpen("Execution Log Files written to: %s.RUN.%d to %d\n", logFileName, 0, adef->multipleRuns - 1); |
---|
6748 | printBothOpen("Execution information file written to: %s\n", infoFileName); |
---|
6749 | } |
---|
6750 | else |
---|
6751 | { |
---|
6752 | printBothOpen("\n\nOverall Time for 1 Inference %f\n", t); |
---|
6753 | printBothOpen("Likelihood : %f\n", tr->likelihood); |
---|
6754 | printBothOpen("\n\n"); |
---|
6755 | |
---|
6756 | if(adef->checkpoints) |
---|
6757 | printBothOpen("Checkpoints written to: %s.*\n", checkpointFileName); |
---|
6758 | if(!adef->restart) |
---|
6759 | { |
---|
6760 | if(adef->randomStartingTree) |
---|
6761 | printBothOpen("Random starting tree written to: %s\n", randomFileName); |
---|
6762 | else |
---|
6763 | printBothOpen("Parsimony starting tree written to: %s\n", permFileName); |
---|
6764 | } |
---|
6765 | printBothOpen("Final tree written to: %s\n", resultFileName); |
---|
6766 | printBothOpen("Execution Log File written to: %s\n", logFileName); |
---|
6767 | printBothOpen("Execution information file written to: %s\n",infoFileName); |
---|
6768 | } |
---|
6769 | } |
---|
6770 | |
---|
6771 | break; |
---|
6772 | case CALC_BIPARTITIONS: |
---|
6773 | printBothOpen("\n\nTime for Computation of Bipartitions %f\n", t); |
---|
6774 | printBothOpen("Tree with bipartitions written to file: %s\n", bipartitionsFileName); |
---|
6775 | printBothOpen("Tree with bipartitions as branch labels written to file: %s\n", bipartitionsFileNameBranchLabels); |
---|
6776 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6777 | break; |
---|
6778 | case CALC_BIPARTITIONS_IC: |
---|
6779 | printBothOpen("\n\nTime for Computation of TC and IC scores %f\n", t); |
---|
6780 | printBothOpen("Tree with IC scores as branch labels written to file: %s\n", icFileNameBranchLabels); |
---|
6781 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6782 | break; |
---|
6783 | case PER_SITE_LL: |
---|
6784 | printBothOpen("\n\nTime for Optimization of per-site log likelihoods %f\n", t); |
---|
6785 | printBothOpen("Per-site Log Likelihoods written to File %s in Tree-Puzzle format\n", perSiteLLsFileName); |
---|
6786 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6787 | |
---|
6788 | break; |
---|
6789 | case PARSIMONY_ADDITION: |
---|
6790 | printBothOpen("\n\nTime for MP stepwise addition %f\n", t); |
---|
6791 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6792 | printBothOpen("Complete parsimony tree written to: %s\n", permFileName); |
---|
6793 | break; |
---|
6794 | case ANCESTRAL_STATES: |
---|
6795 | printBothOpen("\n\nTime for marginal ancestral state computation: %f\n\n", t); |
---|
6796 | break; |
---|
6797 | case QUARTET_CALCULATION: |
---|
6798 | printBothOpen("\n\nOverall Time for quartet computation: %f\n\n", t); |
---|
6799 | break; |
---|
6800 | case THOROUGH_OPTIMIZATION: |
---|
6801 | printBothOpen("\n\nTime for thorough tree optimization: %f\n\n", t); |
---|
6802 | break; |
---|
6803 | case ROOT_TREE: |
---|
6804 | printBothOpen("\n\nTime for tree rooting: %f\n\n", t); |
---|
6805 | break; |
---|
6806 | default: |
---|
6807 | assert(0); |
---|
6808 | } |
---|
6809 | } |
---|
6810 | |
---|
6811 | } |
---|
6812 | |
---|
6813 | |
---|
6814 | /************************************************************************************/ |
---|
6815 | |
---|
6816 | |
---|
6817 | #ifdef _USE_PTHREADS |
---|
6818 | |
---|
6819 | |
---|
6820 | |
---|
6821 | |
---|
6822 | |
---|
6823 | |
---|
6824 | static void computeFraction(tree *localTree, int tid, int n) |
---|
6825 | { |
---|
6826 | int |
---|
6827 | model; |
---|
6828 | |
---|
6829 | size_t |
---|
6830 | i; |
---|
6831 | |
---|
6832 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
6833 | { |
---|
6834 | int width = 0; |
---|
6835 | |
---|
6836 | for(i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++) |
---|
6837 | if(i % (size_t)n == (size_t)tid) |
---|
6838 | width++; |
---|
6839 | |
---|
6840 | localTree->partitionData[model].width = width; |
---|
6841 | } |
---|
6842 | } |
---|
6843 | |
---|
6844 | |
---|
6845 | |
---|
6846 | static void threadFixModelIndices(tree *tr, tree *localTree, int tid, int n) |
---|
6847 | { |
---|
6848 | size_t |
---|
6849 | model, |
---|
6850 | j, |
---|
6851 | i, |
---|
6852 | globalCounter = 0, |
---|
6853 | localCounter = 0, |
---|
6854 | offset, |
---|
6855 | countOffset, |
---|
6856 | myLength = 0; |
---|
6857 | |
---|
6858 | for(model = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6859 | { |
---|
6860 | localTree->partitionData[model].lower = tr->partitionData[model].lower; |
---|
6861 | localTree->partitionData[model].upper = tr->partitionData[model].upper; |
---|
6862 | } |
---|
6863 | |
---|
6864 | computeFraction(localTree, tid, n); |
---|
6865 | |
---|
6866 | for(model = 0, offset = 0, countOffset = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6867 | { |
---|
6868 | localTree->partitionData[model].sumBuffer = &localTree->sumBuffer[offset]; |
---|
6869 | localTree->partitionData[model].perSiteLL = &localTree->perSiteLLPtr[countOffset]; |
---|
6870 | localTree->partitionData[model].wgt = &localTree->wgtPtr[countOffset]; |
---|
6871 | localTree->partitionData[model].invariant = &localTree->invariantPtr[countOffset]; |
---|
6872 | localTree->partitionData[model].rateCategory = &localTree->rateCategoryPtr[countOffset]; |
---|
6873 | |
---|
6874 | countOffset += localTree->partitionData[model].width; |
---|
6875 | |
---|
6876 | offset += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * (size_t)(localTree->partitionData[model].width); |
---|
6877 | } |
---|
6878 | |
---|
6879 | myLength = countOffset; |
---|
6880 | |
---|
6881 | |
---|
6882 | /* figure in data */ |
---|
6883 | |
---|
6884 | for(i = 0; i < (size_t)localTree->mxtips; i++) |
---|
6885 | { |
---|
6886 | for(model = 0, offset = 0, countOffset = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6887 | { |
---|
6888 | localTree->partitionData[model].yVector[i+1] = &localTree->y_ptr[i * myLength + countOffset]; |
---|
6889 | countOffset += localTree->partitionData[model].width; |
---|
6890 | } |
---|
6891 | assert(countOffset == myLength); |
---|
6892 | } |
---|
6893 | |
---|
6894 | |
---|
6895 | |
---|
6896 | for(model = 0, globalCounter = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6897 | { |
---|
6898 | for(localCounter = 0, i = (size_t)localTree->partitionData[model].lower; i < (size_t)localTree->partitionData[model].upper; i++) |
---|
6899 | { |
---|
6900 | if(i % (size_t)n == (size_t)tid) |
---|
6901 | { |
---|
6902 | localTree->partitionData[model].wgt[localCounter] = tr->cdta->aliaswgt[globalCounter]; |
---|
6903 | localTree->partitionData[model].invariant[localCounter] = tr->invariant[globalCounter]; |
---|
6904 | localTree->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[globalCounter]; |
---|
6905 | |
---|
6906 | for(j = 1; j <= (size_t)localTree->mxtips; j++) |
---|
6907 | localTree->partitionData[model].yVector[j][localCounter] = tr->yVector[j][globalCounter]; |
---|
6908 | |
---|
6909 | localCounter++; |
---|
6910 | } |
---|
6911 | globalCounter++; |
---|
6912 | } |
---|
6913 | } |
---|
6914 | |
---|
6915 | for(model = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6916 | { |
---|
6917 | int |
---|
6918 | undetermined = getUndetermined(localTree->partitionData[model].dataType); |
---|
6919 | |
---|
6920 | size_t |
---|
6921 | width = localTree->partitionData[model].width; |
---|
6922 | |
---|
6923 | localTree->partitionData[model].gapVectorLength = ((int)width / 32) + 1; |
---|
6924 | |
---|
6925 | memset(localTree->partitionData[model].gapVector, 0, localTree->partitionData[model].initialGapVectorSize); |
---|
6926 | |
---|
6927 | for(j = 1; j <= (size_t)(localTree->mxtips); j++) |
---|
6928 | for(i = 0; i < width; i++) |
---|
6929 | if(localTree->partitionData[model].yVector[j][i] == undetermined) |
---|
6930 | localTree->partitionData[model].gapVector[localTree->partitionData[model].gapVectorLength * j + i / 32] |= mask32[i % 32]; |
---|
6931 | } |
---|
6932 | } |
---|
6933 | |
---|
6934 | |
---|
6935 | static void initPartition(tree *tr, tree *localTree, int tid) |
---|
6936 | { |
---|
6937 | int model; |
---|
6938 | |
---|
6939 | localTree->threadID = tid; |
---|
6940 | |
---|
6941 | if(tid > 0) |
---|
6942 | { |
---|
6943 | int totalLength = 0; |
---|
6944 | |
---|
6945 | localTree->useGammaMedian = tr->useGammaMedian; |
---|
6946 | localTree->saveMemory = tr->saveMemory; |
---|
6947 | localTree->innerNodes = tr->innerNodes; |
---|
6948 | localTree->useFastScaling = tr->useFastScaling; |
---|
6949 | localTree->perPartitionEPA = tr->perPartitionEPA; |
---|
6950 | localTree->maxCategories = tr->maxCategories; |
---|
6951 | |
---|
6952 | localTree->originalCrunchedLength = tr->originalCrunchedLength; |
---|
6953 | localTree->NumberOfModels = tr->NumberOfModels; |
---|
6954 | localTree->mxtips = tr->mxtips; |
---|
6955 | localTree->multiBranch = tr->multiBranch; |
---|
6956 | |
---|
6957 | localTree->nameList = tr->nameList; |
---|
6958 | localTree->numBranches = tr->numBranches; |
---|
6959 | localTree->lhs = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength); |
---|
6960 | localTree->executeModel = (boolean*)rax_malloc(sizeof(boolean) * localTree->NumberOfModels); |
---|
6961 | localTree->perPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6962 | localTree->storedPerPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6963 | |
---|
6964 | localTree->fracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6965 | localTree->rawFracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6966 | |
---|
6967 | localTree->partitionContributions = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6968 | |
---|
6969 | localTree->partitionData = (pInfo*)rax_malloc(sizeof(pInfo) * localTree->NumberOfModels); |
---|
6970 | |
---|
6971 | /* extend for multi-branch */ |
---|
6972 | localTree->td[0].count = 0; |
---|
6973 | localTree->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * localTree->mxtips); |
---|
6974 | |
---|
6975 | localTree->cdta = (cruncheddata*)rax_malloc(sizeof(cruncheddata)); |
---|
6976 | localTree->cdta->patrat = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength); |
---|
6977 | localTree->cdta->patratStored = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength); |
---|
6978 | |
---|
6979 | localTree->discreteRateCategories = tr->discreteRateCategories; |
---|
6980 | |
---|
6981 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
6982 | { |
---|
6983 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
6984 | localTree->partitionData[model].states = tr->partitionData[model].states; |
---|
6985 | localTree->partitionData[model].maxTipStates = tr->partitionData[model].maxTipStates; |
---|
6986 | localTree->partitionData[model].dataType = tr->partitionData[model].dataType; |
---|
6987 | localTree->partitionData[model].protModels = tr->partitionData[model].protModels; |
---|
6988 | localTree->partitionData[model].usePredefinedProtFreqs = tr->partitionData[model].usePredefinedProtFreqs; |
---|
6989 | localTree->partitionData[model].mxtips = tr->partitionData[model].mxtips; |
---|
6990 | localTree->partitionData[model].lower = tr->partitionData[model].lower; |
---|
6991 | localTree->partitionData[model].upper = tr->partitionData[model].upper; |
---|
6992 | localTree->executeModel[model] = TRUE; |
---|
6993 | localTree->perPartitionLH[model] = 0.0; |
---|
6994 | localTree->storedPerPartitionLH[model] = 0.0; |
---|
6995 | totalLength += (localTree->partitionData[model].upper - localTree->partitionData[model].lower); |
---|
6996 | } |
---|
6997 | |
---|
6998 | assert(totalLength == localTree->originalCrunchedLength); |
---|
6999 | } |
---|
7000 | |
---|
7001 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7002 | localTree->partitionData[model].width = 0; |
---|
7003 | } |
---|
7004 | |
---|
7005 | |
---|
7006 | static void allocNodex(tree *tr, int tid, int n) |
---|
7007 | { |
---|
7008 | size_t |
---|
7009 | model, |
---|
7010 | memoryRequirements = 0, |
---|
7011 | myLength = 0; |
---|
7012 | |
---|
7013 | computeFraction(tr, tid, n); |
---|
7014 | |
---|
7015 | allocPartitions(tr); |
---|
7016 | |
---|
7017 | |
---|
7018 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
7019 | { |
---|
7020 | size_t |
---|
7021 | width = tr->partitionData[model].width, |
---|
7022 | i; |
---|
7023 | |
---|
7024 | myLength += width; |
---|
7025 | |
---|
7026 | memoryRequirements += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width; |
---|
7027 | |
---|
7028 | tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1; |
---|
7029 | |
---|
7030 | tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int)); |
---|
7031 | |
---|
7032 | tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int); |
---|
7033 | |
---|
7034 | /* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */ |
---|
7035 | |
---|
7036 | tr->partitionData[model].gapColumn = (double *)rax_malloc( |
---|
7037 | ((size_t)(tr->innerNodes)) * |
---|
7038 | ((size_t)(4)) * |
---|
7039 | ((size_t)(tr->partitionData[model].states)) * |
---|
7040 | sizeof(double)); |
---|
7041 | for(i = 0; i < tr->innerNodes; i++) |
---|
7042 | { |
---|
7043 | tr->partitionData[model].xVector[i] = (double*)NULL; |
---|
7044 | tr->partitionData[model].expVector[i] = (int*)NULL; |
---|
7045 | } |
---|
7046 | } |
---|
7047 | |
---|
7048 | if(tid == 0) |
---|
7049 | { |
---|
7050 | tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double)); |
---|
7051 | assert(tr->perSiteLL != NULL); |
---|
7052 | } |
---|
7053 | |
---|
7054 | tr->sumBuffer = (double *)rax_malloc(memoryRequirements * sizeof(double)); |
---|
7055 | assert(tr->sumBuffer != NULL); |
---|
7056 | |
---|
7057 | tr->y_ptr = (unsigned char *)rax_malloc(myLength * (size_t)(tr->mxtips) * sizeof(unsigned char)); |
---|
7058 | assert(tr->y_ptr != NULL); |
---|
7059 | |
---|
7060 | tr->perSiteLLPtr = (double*) rax_malloc(myLength * sizeof(double)); |
---|
7061 | assert(tr->perSiteLLPtr != NULL); |
---|
7062 | |
---|
7063 | tr->wgtPtr = (int*) rax_malloc(myLength * sizeof(int)); |
---|
7064 | assert(tr->wgtPtr != NULL); |
---|
7065 | |
---|
7066 | tr->invariantPtr = (int*) rax_malloc(myLength * sizeof(int)); |
---|
7067 | assert(tr->invariantPtr != NULL); |
---|
7068 | |
---|
7069 | tr->rateCategoryPtr = (int*) rax_malloc(myLength * sizeof(int)); |
---|
7070 | assert(tr->rateCategoryPtr != NULL); |
---|
7071 | } |
---|
7072 | |
---|
7073 | |
---|
7074 | |
---|
7075 | |
---|
7076 | |
---|
7077 | |
---|
7078 | inline static void sendTraversalInfo(tree *localTree, tree *tr) |
---|
7079 | { |
---|
7080 | localTree->td[0] = tr->td[0]; |
---|
7081 | } |
---|
7082 | |
---|
7083 | |
---|
7084 | static void collectDouble(double *dst, double *src, tree *tr, int n, int tid) |
---|
7085 | { |
---|
7086 | int |
---|
7087 | model; |
---|
7088 | |
---|
7089 | size_t |
---|
7090 | i; |
---|
7091 | |
---|
7092 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
7093 | { |
---|
7094 | for(i = tr->partitionData[model].lower; i < tr->partitionData[model].upper; i++) |
---|
7095 | { |
---|
7096 | if(i % (size_t)n == (size_t)tid) |
---|
7097 | dst[i] = src[i]; |
---|
7098 | } |
---|
7099 | } |
---|
7100 | } |
---|
7101 | |
---|
7102 | |
---|
7103 | static void broadcastPerSiteRates(tree *tr, tree *localTree) |
---|
7104 | { |
---|
7105 | int |
---|
7106 | i = 0, |
---|
7107 | model = 0; |
---|
7108 | |
---|
7109 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7110 | { |
---|
7111 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
7112 | |
---|
7113 | for(i = 0; i < localTree->partitionData[model].numberOfCategories; i++) |
---|
7114 | { |
---|
7115 | localTree->partitionData[model].perSiteRates[i] = tr->partitionData[model].perSiteRates[i]; |
---|
7116 | localTree->partitionData[model].unscaled_perSiteRates[i] = tr->partitionData[model].unscaled_perSiteRates[i]; |
---|
7117 | } |
---|
7118 | } |
---|
7119 | |
---|
7120 | } |
---|
7121 | |
---|
7122 | static void copyLG4(tree *localTree, tree *tr, int model, const partitionLengths *pl) |
---|
7123 | { |
---|
7124 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
7125 | { |
---|
7126 | int |
---|
7127 | k; |
---|
7128 | |
---|
7129 | for(k = 0; k < 4; k++) |
---|
7130 | { |
---|
7131 | memcpy(localTree->partitionData[model].EIGN_LG4[k], tr->partitionData[model].EIGN_LG4[k], pl->eignLength * sizeof(double)); |
---|
7132 | memcpy(localTree->partitionData[model].EV_LG4[k], tr->partitionData[model].EV_LG4[k], pl->evLength * sizeof(double)); |
---|
7133 | memcpy(localTree->partitionData[model].EI_LG4[k], tr->partitionData[model].EI_LG4[k], pl->eiLength * sizeof(double)); |
---|
7134 | memcpy(localTree->partitionData[model].substRates_LG4[k], tr->partitionData[model].substRates_LG4[k], pl->substRatesLength * sizeof(double)); |
---|
7135 | memcpy(localTree->partitionData[model].frequencies_LG4[k], tr->partitionData[model].frequencies_LG4[k], pl->frequenciesLength * sizeof(double)); |
---|
7136 | memcpy(localTree->partitionData[model].tipVector_LG4[k], tr->partitionData[model].tipVector_LG4[k], pl->tipVectorLength * sizeof(double)); |
---|
7137 | } |
---|
7138 | } |
---|
7139 | } |
---|
7140 | |
---|
7141 | static void execFunction(tree *tr, tree *localTree, int tid, int n) |
---|
7142 | { |
---|
7143 | double volatile result; |
---|
7144 | |
---|
7145 | size_t |
---|
7146 | i; |
---|
7147 | |
---|
7148 | int |
---|
7149 | currentJob, |
---|
7150 | model, |
---|
7151 | localCounter, |
---|
7152 | globalCounter; |
---|
7153 | |
---|
7154 | currentJob = threadJob >> 16; |
---|
7155 | |
---|
7156 | switch(currentJob) |
---|
7157 | { |
---|
7158 | case THREAD_INIT_PARTITION: |
---|
7159 | initPartition(tr, localTree, tid); |
---|
7160 | break; |
---|
7161 | case THREAD_ALLOC_LIKELIHOOD: |
---|
7162 | allocNodex(localTree, tid, n); |
---|
7163 | threadFixModelIndices(tr, localTree, tid, n); |
---|
7164 | break; |
---|
7165 | case THREAD_FIX_MODEL_INDICES: |
---|
7166 | threadFixModelIndices(tr, localTree, tid, n); |
---|
7167 | break; |
---|
7168 | case THREAD_EVALUATE: |
---|
7169 | sendTraversalInfo(localTree, tr); |
---|
7170 | result = evaluateIterative(localTree, FALSE); |
---|
7171 | |
---|
7172 | if(localTree->NumberOfModels > 1) |
---|
7173 | { |
---|
7174 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7175 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7176 | } |
---|
7177 | else |
---|
7178 | reductionBuffer[tid] = result; |
---|
7179 | |
---|
7180 | if(tid > 0) |
---|
7181 | { |
---|
7182 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7183 | localTree->executeModel[model] = TRUE; |
---|
7184 | } |
---|
7185 | break; |
---|
7186 | case THREAD_NEWVIEW_MASKED: |
---|
7187 | sendTraversalInfo(localTree, tr); |
---|
7188 | memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels); |
---|
7189 | newviewIterative(localTree); |
---|
7190 | if(tid > 0) |
---|
7191 | { |
---|
7192 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7193 | localTree->executeModel[model] = TRUE; |
---|
7194 | } |
---|
7195 | break; |
---|
7196 | case THREAD_NEWVIEW: |
---|
7197 | sendTraversalInfo(localTree, tr); |
---|
7198 | newviewIterative(localTree); |
---|
7199 | break; |
---|
7200 | case THREAD_MAKENEWZ_FIRST: |
---|
7201 | { |
---|
7202 | volatile double |
---|
7203 | dlnLdlz[NUM_BRANCHES], |
---|
7204 | d2lnLdlz2[NUM_BRANCHES]; |
---|
7205 | |
---|
7206 | sendTraversalInfo(localTree, tr); |
---|
7207 | if(tid > 0) |
---|
7208 | { |
---|
7209 | memcpy(localTree->coreLZ, tr->coreLZ, sizeof(double) * localTree->numBranches); |
---|
7210 | memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels); |
---|
7211 | } |
---|
7212 | |
---|
7213 | makenewzIterative(localTree); |
---|
7214 | execCore(localTree, dlnLdlz, d2lnLdlz2); |
---|
7215 | |
---|
7216 | if(!tr->multiBranch) |
---|
7217 | { |
---|
7218 | reductionBuffer[tid] = dlnLdlz[0]; |
---|
7219 | reductionBufferTwo[tid] = d2lnLdlz2[0]; |
---|
7220 | } |
---|
7221 | else |
---|
7222 | { |
---|
7223 | for(i = 0; i < (size_t)localTree->NumberOfModels; i++) |
---|
7224 | { |
---|
7225 | reductionBuffer[tid * localTree->NumberOfModels + i] = dlnLdlz[i]; |
---|
7226 | reductionBufferTwo[tid * localTree->NumberOfModels + i] = d2lnLdlz2[i]; |
---|
7227 | } |
---|
7228 | } |
---|
7229 | |
---|
7230 | if(tid > 0) |
---|
7231 | { |
---|
7232 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7233 | localTree->executeModel[model] = TRUE; |
---|
7234 | } |
---|
7235 | } |
---|
7236 | break; |
---|
7237 | case THREAD_MAKENEWZ: |
---|
7238 | { |
---|
7239 | volatile double |
---|
7240 | dlnLdlz[NUM_BRANCHES], |
---|
7241 | d2lnLdlz2[NUM_BRANCHES]; |
---|
7242 | |
---|
7243 | memcpy(localTree->coreLZ, tr->coreLZ, sizeof(double) * localTree->numBranches); |
---|
7244 | memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels); |
---|
7245 | |
---|
7246 | execCore(localTree, dlnLdlz, d2lnLdlz2); |
---|
7247 | |
---|
7248 | if(!tr->multiBranch) |
---|
7249 | { |
---|
7250 | reductionBuffer[tid] = dlnLdlz[0]; |
---|
7251 | reductionBufferTwo[tid] = d2lnLdlz2[0]; |
---|
7252 | } |
---|
7253 | else |
---|
7254 | { |
---|
7255 | for(i = 0; i < (size_t)localTree->NumberOfModels; i++) |
---|
7256 | { |
---|
7257 | reductionBuffer[tid * localTree->NumberOfModels + i] = dlnLdlz[i]; |
---|
7258 | reductionBufferTwo[tid * localTree->NumberOfModels + i] = d2lnLdlz2[i]; |
---|
7259 | } |
---|
7260 | } |
---|
7261 | if(tid > 0) |
---|
7262 | { |
---|
7263 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7264 | localTree->executeModel[model] = TRUE; |
---|
7265 | } |
---|
7266 | } |
---|
7267 | break; |
---|
7268 | case THREAD_COPY_RATES: |
---|
7269 | if(tid > 0) |
---|
7270 | { |
---|
7271 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7272 | { |
---|
7273 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7274 | |
---|
7275 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7276 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7277 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7278 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7279 | |
---|
7280 | copyLG4(localTree, tr, model, pl); |
---|
7281 | } |
---|
7282 | } |
---|
7283 | break; |
---|
7284 | case THREAD_OPT_RATE: |
---|
7285 | if(tid > 0) |
---|
7286 | { |
---|
7287 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
7288 | |
---|
7289 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7290 | { |
---|
7291 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7292 | |
---|
7293 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7294 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7295 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7296 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7297 | |
---|
7298 | copyLG4(localTree, tr, model, pl); |
---|
7299 | } |
---|
7300 | } |
---|
7301 | |
---|
7302 | result = evaluateIterative(localTree, FALSE); |
---|
7303 | |
---|
7304 | |
---|
7305 | if(localTree->NumberOfModels > 1) |
---|
7306 | { |
---|
7307 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7308 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7309 | } |
---|
7310 | else |
---|
7311 | reductionBuffer[tid] = result; |
---|
7312 | |
---|
7313 | |
---|
7314 | if(tid > 0) |
---|
7315 | { |
---|
7316 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7317 | localTree->executeModel[model] = TRUE; |
---|
7318 | } |
---|
7319 | break; |
---|
7320 | case THREAD_COPY_INVAR: |
---|
7321 | if(tid > 0) |
---|
7322 | { |
---|
7323 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7324 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7325 | } |
---|
7326 | break; |
---|
7327 | case THREAD_OPT_INVAR: |
---|
7328 | if(tid > 0) |
---|
7329 | { |
---|
7330 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
7331 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7332 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7333 | } |
---|
7334 | |
---|
7335 | result = evaluateIterative(localTree, FALSE); |
---|
7336 | |
---|
7337 | if(localTree->NumberOfModels > 1) |
---|
7338 | { |
---|
7339 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7340 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7341 | } |
---|
7342 | else |
---|
7343 | reductionBuffer[tid] = result; |
---|
7344 | |
---|
7345 | if(tid > 0) |
---|
7346 | { |
---|
7347 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7348 | localTree->executeModel[model] = TRUE; |
---|
7349 | } |
---|
7350 | break; |
---|
7351 | case THREAD_COPY_ALPHA: |
---|
7352 | if(tid > 0) |
---|
7353 | { |
---|
7354 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7355 | { |
---|
7356 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7357 | localTree->partitionData[model].alpha = tr->partitionData[model].alpha; |
---|
7358 | } |
---|
7359 | } |
---|
7360 | break; |
---|
7361 | case THREAD_OPT_ALPHA: |
---|
7362 | if(tid > 0) |
---|
7363 | { |
---|
7364 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
7365 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7366 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7367 | } |
---|
7368 | |
---|
7369 | result = evaluateIterative(localTree, FALSE); |
---|
7370 | |
---|
7371 | |
---|
7372 | if(localTree->NumberOfModels > 1) |
---|
7373 | { |
---|
7374 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7375 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7376 | } |
---|
7377 | else |
---|
7378 | reductionBuffer[tid] = result; |
---|
7379 | |
---|
7380 | if(tid > 0) |
---|
7381 | { |
---|
7382 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7383 | localTree->executeModel[model] = TRUE; |
---|
7384 | } |
---|
7385 | break; |
---|
7386 | case THREAD_RESET_MODEL: |
---|
7387 | if(tid > 0) |
---|
7388 | { |
---|
7389 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7390 | { |
---|
7391 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7392 | |
---|
7393 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7394 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7395 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7396 | memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double)); |
---|
7397 | memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double)); |
---|
7398 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7399 | |
---|
7400 | copyLG4(localTree, tr, model, pl); |
---|
7401 | |
---|
7402 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7403 | localTree->partitionData[model].alpha = tr->partitionData[model].alpha; |
---|
7404 | localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler; |
---|
7405 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7406 | } |
---|
7407 | } |
---|
7408 | break; |
---|
7409 | case THREAD_COPY_INIT_MODEL: |
---|
7410 | if(tid > 0) |
---|
7411 | { |
---|
7412 | localTree->rateHetModel = tr->rateHetModel; |
---|
7413 | |
---|
7414 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7415 | { |
---|
7416 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7417 | |
---|
7418 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7419 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7420 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7421 | memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double)); |
---|
7422 | memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double)); |
---|
7423 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7424 | |
---|
7425 | copyLG4(localTree, tr, model, pl); |
---|
7426 | |
---|
7427 | memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4); |
---|
7428 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7429 | localTree->partitionData[model].alpha = tr->partitionData[model].alpha; |
---|
7430 | localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler; |
---|
7431 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7432 | localTree->partitionData[model].lower = tr->partitionData[model].lower; |
---|
7433 | localTree->partitionData[model].upper = tr->partitionData[model].upper; |
---|
7434 | |
---|
7435 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
7436 | } |
---|
7437 | |
---|
7438 | memcpy(localTree->cdta->patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double)); |
---|
7439 | memcpy(localTree->cdta->patratStored, tr->cdta->patratStored, localTree->originalCrunchedLength * sizeof(double)); |
---|
7440 | } |
---|
7441 | |
---|
7442 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7443 | { |
---|
7444 | int |
---|
7445 | localIndex; |
---|
7446 | |
---|
7447 | for(i = localTree->partitionData[model].lower, localIndex = 0; i < localTree->partitionData[model].upper; i++) |
---|
7448 | if(i % (size_t)n == (size_t)tid) |
---|
7449 | { |
---|
7450 | localTree->partitionData[model].wgt[localIndex] = tr->cdta->aliaswgt[i]; |
---|
7451 | localTree->partitionData[model].invariant[localIndex] = tr->invariant[i]; |
---|
7452 | |
---|
7453 | localIndex++; |
---|
7454 | } |
---|
7455 | } |
---|
7456 | break; |
---|
7457 | case THREAD_RATE_CATS: |
---|
7458 | sendTraversalInfo(localTree, tr); |
---|
7459 | if(tid > 0) |
---|
7460 | { |
---|
7461 | localTree->lower_spacing = tr->lower_spacing; |
---|
7462 | localTree->upper_spacing = tr->upper_spacing; |
---|
7463 | } |
---|
7464 | |
---|
7465 | optRateCatPthreads(localTree, localTree->lower_spacing, localTree->upper_spacing, localTree->lhs, n, tid); |
---|
7466 | |
---|
7467 | if(tid > 0) |
---|
7468 | { |
---|
7469 | collectDouble(tr->cdta->patrat, localTree->cdta->patrat, localTree, n, tid); |
---|
7470 | collectDouble(tr->cdta->patratStored, localTree->cdta->patratStored, localTree, n, tid); |
---|
7471 | collectDouble(tr->lhs, localTree->lhs, localTree, n, tid); |
---|
7472 | } |
---|
7473 | break; |
---|
7474 | case THREAD_COPY_RATE_CATS: |
---|
7475 | if(tid > 0) |
---|
7476 | { |
---|
7477 | memcpy(localTree->cdta->patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double)); |
---|
7478 | memcpy(localTree->cdta->patratStored, tr->cdta->patratStored, localTree->originalCrunchedLength * sizeof(double)); |
---|
7479 | broadcastPerSiteRates(tr, localTree); |
---|
7480 | } |
---|
7481 | |
---|
7482 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7483 | { |
---|
7484 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
7485 | |
---|
7486 | for(localCounter = 0, i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++) |
---|
7487 | { |
---|
7488 | if(i % (size_t)n == (size_t)tid) |
---|
7489 | { |
---|
7490 | localTree->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[i]; |
---|
7491 | localCounter++; |
---|
7492 | } |
---|
7493 | } |
---|
7494 | } |
---|
7495 | break; |
---|
7496 | case THREAD_CAT_TO_GAMMA: |
---|
7497 | if(tid > 0) |
---|
7498 | localTree->rateHetModel = tr->rateHetModel; |
---|
7499 | break; |
---|
7500 | case THREAD_GAMMA_TO_CAT: |
---|
7501 | if(tid > 0) |
---|
7502 | localTree->rateHetModel = tr->rateHetModel; |
---|
7503 | break; |
---|
7504 | case THREAD_EVALUATE_VECTOR: |
---|
7505 | sendTraversalInfo(localTree, tr); |
---|
7506 | result = evaluateIterative(localTree, TRUE); |
---|
7507 | |
---|
7508 | if(localTree->NumberOfModels > 1) |
---|
7509 | { |
---|
7510 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7511 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7512 | } |
---|
7513 | else |
---|
7514 | reductionBuffer[tid] = result; |
---|
7515 | |
---|
7516 | if(tid > 0) |
---|
7517 | { |
---|
7518 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7519 | localTree->executeModel[model] = TRUE; |
---|
7520 | } |
---|
7521 | |
---|
7522 | for(model = 0, globalCounter = 0; model < localTree->NumberOfModels; model++) |
---|
7523 | { |
---|
7524 | for(localCounter = 0, i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++) |
---|
7525 | { |
---|
7526 | if(i % (size_t)n == (size_t)tid) |
---|
7527 | { |
---|
7528 | tr->perSiteLL[globalCounter] = localTree->partitionData[model].perSiteLL[localCounter]; |
---|
7529 | localCounter++; |
---|
7530 | } |
---|
7531 | globalCounter++; |
---|
7532 | } |
---|
7533 | } |
---|
7534 | break; |
---|
7535 | case THREAD_COPY_PARAMS: |
---|
7536 | if(tid > 0) |
---|
7537 | { |
---|
7538 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7539 | { |
---|
7540 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7541 | |
---|
7542 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7543 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7544 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7545 | memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double)); |
---|
7546 | memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double)); |
---|
7547 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7548 | |
---|
7549 | copyLG4(localTree, tr, model, pl); |
---|
7550 | |
---|
7551 | } |
---|
7552 | } |
---|
7553 | break; |
---|
7554 | case THREAD_INIT_EPA: |
---|
7555 | if(tid > 0) |
---|
7556 | { |
---|
7557 | localTree->leftRootNode = tr->leftRootNode; |
---|
7558 | localTree->rightRootNode = tr->rightRootNode; |
---|
7559 | localTree->wasRooted = tr->wasRooted; |
---|
7560 | localTree->bInf = tr->bInf; |
---|
7561 | localTree->numberOfBranches = tr->numberOfBranches; |
---|
7562 | localTree->contiguousVectorLength = tr->contiguousVectorLength; |
---|
7563 | localTree->contiguousScalingLength = tr->contiguousScalingLength; |
---|
7564 | localTree->inserts = tr->inserts; |
---|
7565 | localTree->numberOfTipsForInsertion = tr->numberOfTipsForInsertion; |
---|
7566 | localTree->fracchange = tr->fracchange; |
---|
7567 | localTree->rawFracchange = tr->rawFracchange; |
---|
7568 | |
---|
7569 | memcpy(localTree->partitionContributions, tr->partitionContributions, sizeof(double) * localTree->NumberOfModels); |
---|
7570 | |
---|
7571 | memcpy(localTree->fracchanges, tr->fracchanges, sizeof(double) * localTree->NumberOfModels); |
---|
7572 | |
---|
7573 | memcpy(localTree->rawFracchanges, tr->rawFracchanges, sizeof(double) * localTree->NumberOfModels); |
---|
7574 | |
---|
7575 | |
---|
7576 | if(localTree->perPartitionEPA) |
---|
7577 | { |
---|
7578 | localTree->readPartition = (int *)rax_malloc(sizeof(int) * (size_t)localTree->numberOfTipsForInsertion); |
---|
7579 | memcpy(localTree->readPartition, tr->readPartition, sizeof(int) * (size_t)localTree->numberOfTipsForInsertion); |
---|
7580 | } |
---|
7581 | |
---|
7582 | } |
---|
7583 | |
---|
7584 | localTree->temporarySumBuffer = (double *)rax_malloc(sizeof(double) * localTree->contiguousVectorLength); |
---|
7585 | localTree->temporaryVector = (double *)rax_malloc(sizeof(double) * localTree->contiguousVectorLength); |
---|
7586 | |
---|
7587 | localTree->temporaryScaling = (int *)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7588 | |
---|
7589 | |
---|
7590 | localTree->contiguousWgt = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7591 | localTree->contiguousInvariant = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7592 | |
---|
7593 | |
---|
7594 | memcpy(localTree->contiguousWgt , tr->cdta->aliaswgt, sizeof(int) * localTree->contiguousScalingLength); |
---|
7595 | memcpy(localTree->contiguousInvariant , tr->invariant, sizeof(int) * localTree->contiguousScalingLength); |
---|
7596 | |
---|
7597 | if(tid > 0) |
---|
7598 | broadcastPerSiteRates(tr, localTree); |
---|
7599 | |
---|
7600 | |
---|
7601 | localTree->contiguousRateCategory = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7602 | |
---|
7603 | |
---|
7604 | memcpy(localTree->contiguousRateCategory, tr->cdta->rateCategory, sizeof(int) * localTree->contiguousScalingLength); |
---|
7605 | |
---|
7606 | localTree->contiguousTips = tr->yVector; |
---|
7607 | |
---|
7608 | break; |
---|
7609 | case THREAD_GATHER_LIKELIHOOD: |
---|
7610 | { |
---|
7611 | int |
---|
7612 | branchCounter = tr->branchCounter; |
---|
7613 | |
---|
7614 | double |
---|
7615 | *leftContigousVector = localTree->bInf[branchCounter].epa->left, |
---|
7616 | *rightContigousVector = localTree->bInf[branchCounter].epa->right; |
---|
7617 | |
---|
7618 | int |
---|
7619 | *leftContigousScalingVector = localTree->bInf[branchCounter].epa->leftScaling, |
---|
7620 | *rightContigousScalingVector = localTree->bInf[branchCounter].epa->rightScaling, |
---|
7621 | rightNumber = localTree->bInf[branchCounter].epa->rightNodeNumber, |
---|
7622 | leftNumber = localTree->bInf[branchCounter].epa->leftNodeNumber; |
---|
7623 | |
---|
7624 | size_t |
---|
7625 | globalColumnCount = 0, |
---|
7626 | globalCount = 0; |
---|
7627 | |
---|
7628 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7629 | { |
---|
7630 | size_t |
---|
7631 | blockRequirements; |
---|
7632 | |
---|
7633 | double |
---|
7634 | *leftStridedVector = (double *)NULL, |
---|
7635 | *rightStridedVector = (double *)NULL; |
---|
7636 | |
---|
7637 | int |
---|
7638 | *leftStridedScalingVector = (int *)NULL, |
---|
7639 | *rightStridedScalingVector = (int *)NULL; |
---|
7640 | |
---|
7641 | size_t |
---|
7642 | localColumnCount = 0, |
---|
7643 | localCount = 0; |
---|
7644 | |
---|
7645 | if(!isTip(leftNumber, localTree->mxtips)) |
---|
7646 | { |
---|
7647 | leftStridedVector = localTree->partitionData[model].xVector[leftNumber - localTree->mxtips - 1]; |
---|
7648 | leftStridedScalingVector = localTree->partitionData[model].expVector[leftNumber - localTree->mxtips - 1]; |
---|
7649 | } |
---|
7650 | |
---|
7651 | if(!isTip(rightNumber, localTree->mxtips)) |
---|
7652 | { |
---|
7653 | rightStridedVector = localTree->partitionData[model].xVector[rightNumber - localTree->mxtips - 1]; |
---|
7654 | rightStridedScalingVector = localTree->partitionData[model].expVector[rightNumber - localTree->mxtips - 1]; |
---|
7655 | } |
---|
7656 | |
---|
7657 | assert(!(isTip(leftNumber, localTree->mxtips) && isTip(rightNumber, localTree->mxtips))); |
---|
7658 | |
---|
7659 | blockRequirements = (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states); |
---|
7660 | |
---|
7661 | for(globalColumnCount = localTree->partitionData[model].lower; globalColumnCount < localTree->partitionData[model].upper; globalColumnCount++) |
---|
7662 | { |
---|
7663 | if(globalColumnCount % (size_t)n == (size_t)tid) |
---|
7664 | { |
---|
7665 | if(leftStridedVector) |
---|
7666 | { |
---|
7667 | memcpy(&leftContigousVector[globalCount], &leftStridedVector[localCount], sizeof(double) * blockRequirements); |
---|
7668 | leftContigousScalingVector[globalColumnCount] = leftStridedScalingVector[localColumnCount]; |
---|
7669 | } |
---|
7670 | |
---|
7671 | if(rightStridedVector) |
---|
7672 | { |
---|
7673 | memcpy(&rightContigousVector[globalCount], &rightStridedVector[localCount], sizeof(double) * blockRequirements); |
---|
7674 | rightContigousScalingVector[globalColumnCount] = rightStridedScalingVector[localColumnCount]; |
---|
7675 | } |
---|
7676 | |
---|
7677 | localColumnCount++; |
---|
7678 | localCount += blockRequirements; |
---|
7679 | } |
---|
7680 | |
---|
7681 | |
---|
7682 | |
---|
7683 | globalCount += blockRequirements; |
---|
7684 | } |
---|
7685 | |
---|
7686 | assert(localColumnCount == localTree->partitionData[model].width); |
---|
7687 | assert(localCount == (localTree->partitionData[model].width * (int)blockRequirements)); |
---|
7688 | |
---|
7689 | } |
---|
7690 | } |
---|
7691 | break; |
---|
7692 | case THREAD_INSERT_CLASSIFY: |
---|
7693 | case THREAD_INSERT_CLASSIFY_THOROUGH: |
---|
7694 | { |
---|
7695 | int |
---|
7696 | branchNumber; |
---|
7697 | |
---|
7698 | boolean |
---|
7699 | done = FALSE; |
---|
7700 | |
---|
7701 | while(!done) |
---|
7702 | { |
---|
7703 | pthread_mutex_lock(&mutex); |
---|
7704 | |
---|
7705 | if(NumberOfJobs == 0) |
---|
7706 | done = TRUE; |
---|
7707 | else |
---|
7708 | { |
---|
7709 | branchNumber = localTree->numberOfBranches - NumberOfJobs; |
---|
7710 | NumberOfJobs--; |
---|
7711 | } |
---|
7712 | |
---|
7713 | pthread_mutex_unlock(&mutex); |
---|
7714 | |
---|
7715 | if(!done) |
---|
7716 | { |
---|
7717 | switch(currentJob) |
---|
7718 | { |
---|
7719 | case THREAD_INSERT_CLASSIFY: |
---|
7720 | addTraverseRobIterative(localTree, branchNumber); |
---|
7721 | break; |
---|
7722 | case THREAD_INSERT_CLASSIFY_THOROUGH: |
---|
7723 | testInsertThoroughIterative(localTree, branchNumber); |
---|
7724 | break; |
---|
7725 | default: |
---|
7726 | assert(0); |
---|
7727 | } |
---|
7728 | |
---|
7729 | } |
---|
7730 | } |
---|
7731 | } |
---|
7732 | break; |
---|
7733 | case THREAD_PREPARE_BIPS_FOR_PRINT: |
---|
7734 | { |
---|
7735 | int |
---|
7736 | i = 0, |
---|
7737 | j = 0; |
---|
7738 | |
---|
7739 | boolean |
---|
7740 | done = FALSE; |
---|
7741 | |
---|
7742 | while(!done) |
---|
7743 | { |
---|
7744 | pthread_mutex_lock(&mutex); |
---|
7745 | |
---|
7746 | if(NumberOfJobs == 0) |
---|
7747 | done = TRUE; |
---|
7748 | else |
---|
7749 | { |
---|
7750 | i = tr->consensusBipLen - NumberOfJobs; |
---|
7751 | NumberOfJobs--; |
---|
7752 | } |
---|
7753 | |
---|
7754 | pthread_mutex_unlock(&mutex); |
---|
7755 | |
---|
7756 | if( ! done) |
---|
7757 | { |
---|
7758 | entry |
---|
7759 | *bipA = tr->consensusBips[i] ; |
---|
7760 | |
---|
7761 | unsigned int |
---|
7762 | firstIndex = 0; |
---|
7763 | |
---|
7764 | while(firstIndex < tr->bitVectorLength && bipA->bitVector[firstIndex] == 0 ) |
---|
7765 | firstIndex++; |
---|
7766 | |
---|
7767 | |
---|
7768 | for(j = i + 1; j < tr->consensusBipLen; j++) |
---|
7769 | { |
---|
7770 | entry |
---|
7771 | *bipB = tr->consensusBips[j]; |
---|
7772 | |
---|
7773 | if(bipA->amountTips < bipB->amountTips && |
---|
7774 | issubset(bipA->bitVector, bipB->bitVector, tr->bitVectorLength, firstIndex)) |
---|
7775 | { |
---|
7776 | /* i is child of j */ |
---|
7777 | List |
---|
7778 | *elem = (List*) rax_malloc(sizeof(List)); |
---|
7779 | |
---|
7780 | elem->value = rax_calloc(1, sizeof(int)); |
---|
7781 | |
---|
7782 | *(int*)elem->value = i; |
---|
7783 | |
---|
7784 | pthread_mutex_lock(tr->mutexesForHashing[j]); /* LOCKED */ |
---|
7785 | |
---|
7786 | tr->hasAncestor[i] = TRUE; |
---|
7787 | |
---|
7788 | elem->next = tr->listOfDirectChildren[j]; |
---|
7789 | tr->listOfDirectChildren[j] = elem; |
---|
7790 | |
---|
7791 | pthread_mutex_unlock(tr->mutexesForHashing[j]); /* UNLOCKED */ |
---|
7792 | |
---|
7793 | break; /* each node has only 1 parent -> nothing more to do */ |
---|
7794 | } |
---|
7795 | } |
---|
7796 | } |
---|
7797 | } |
---|
7798 | } |
---|
7799 | break; |
---|
7800 | case THREAD_MRE_COMPUTE: |
---|
7801 | { |
---|
7802 | if(tid > 0) |
---|
7803 | { |
---|
7804 | /* worker threads */ |
---|
7805 | boolean done = FALSE; |
---|
7806 | int localEntryCount = (int) tr->h->entryCount; /* problem? */ |
---|
7807 | while(!done ) |
---|
7808 | { |
---|
7809 | int acquiredJobs = 0; |
---|
7810 | int jobId = -1; |
---|
7811 | |
---|
7812 | /* get new job */ |
---|
7813 | |
---|
7814 | pthread_mutex_lock(&mutex) ; /* START LOCK */ |
---|
7815 | |
---|
7816 | if( NumberOfJobs == 0 ) |
---|
7817 | { |
---|
7818 | /* finish */ |
---|
7819 | done = TRUE; |
---|
7820 | } |
---|
7821 | else |
---|
7822 | if( localEntryCount - NumberOfJobs + tr->recommendedAmountJobs < tr->sectionEnd) |
---|
7823 | { |
---|
7824 | /* try to acquire the recommended amount of jobs */ |
---|
7825 | jobId = localEntryCount - NumberOfJobs; |
---|
7826 | acquiredJobs = tr->recommendedAmountJobs; |
---|
7827 | NumberOfJobs -= acquiredJobs; |
---|
7828 | } |
---|
7829 | else |
---|
7830 | if( localEntryCount - NumberOfJobs < (signed int)tr->sectionEnd) |
---|
7831 | { |
---|
7832 | /* at least get one job */ |
---|
7833 | jobId = tr->h->entryCount - NumberOfJobs; |
---|
7834 | acquiredJobs = 1; |
---|
7835 | NumberOfJobs--; |
---|
7836 | } |
---|
7837 | |
---|
7838 | pthread_mutex_unlock(&mutex); /* END LOCK */ |
---|
7839 | |
---|
7840 | if(*(tr->len) >= tr->maxBips) |
---|
7841 | break; |
---|
7842 | |
---|
7843 | /* check all */ |
---|
7844 | while(acquiredJobs > 0) |
---|
7845 | { |
---|
7846 | boolean |
---|
7847 | compatflag = TRUE; |
---|
7848 | |
---|
7849 | entry |
---|
7850 | *currentEntry = tr->sbw[jobId]; |
---|
7851 | |
---|
7852 | int k; |
---|
7853 | |
---|
7854 | if(!((unsigned int)tr->mr_thresh < currentEntry->supportFromTreeset[0])) |
---|
7855 | { |
---|
7856 | for(k = *(tr->len); k > 0; k--) |
---|
7857 | { |
---|
7858 | if(! compatible(tr->sbi[k-1], currentEntry, tr->bitVectorLength)) |
---|
7859 | { |
---|
7860 | compatflag = FALSE; |
---|
7861 | break; |
---|
7862 | } |
---|
7863 | } |
---|
7864 | } |
---|
7865 | if(compatflag) |
---|
7866 | tr->bipStatus[jobId - tr->sectionEnd + tr->bipStatusLen] = MRE_POSSIBLE_CANDIDATE; /* ready to check */ |
---|
7867 | else |
---|
7868 | tr->bipStatus[jobId - tr->sectionEnd + tr->bipStatusLen] = MRE_EXCLUDED; /* can be omitted */ |
---|
7869 | |
---|
7870 | acquiredJobs--; |
---|
7871 | jobId++; |
---|
7872 | } |
---|
7873 | } |
---|
7874 | } |
---|
7875 | else |
---|
7876 | /* master thread */ |
---|
7877 | { |
---|
7878 | /* check in a looping manner, if bipartitions could be added */ |
---|
7879 | |
---|
7880 | int |
---|
7881 | highestToCheck, |
---|
7882 | tmpCounter = 0; |
---|
7883 | |
---|
7884 | double |
---|
7885 | density = 0.0; |
---|
7886 | |
---|
7887 | while(TRUE) |
---|
7888 | { |
---|
7889 | /* get highest bip to check */ |
---|
7890 | highestToCheck = 0; |
---|
7891 | while(highestToCheck < tr->bipStatusLen) |
---|
7892 | { |
---|
7893 | /* waits busily as long as there is nothing to do */ |
---|
7894 | /* printf("%d is highest to check\n", highestToCheck); */ |
---|
7895 | if( ! tr->bipStatus[highestToCheck] ) |
---|
7896 | highestToCheck = 0; |
---|
7897 | else |
---|
7898 | if(tr->bipStatus[highestToCheck] == MRE_POSSIBLE_CANDIDATE) |
---|
7899 | break; |
---|
7900 | else |
---|
7901 | highestToCheck++; |
---|
7902 | } |
---|
7903 | |
---|
7904 | /* try to finish */ |
---|
7905 | if( tmpCounter >= tr->maxBips || |
---|
7906 | (highestToCheck == tr->bipStatusLen /* end of buffer that is examined */ |
---|
7907 | && (unsigned int)tr->sectionEnd == tr->h->entryCount /* the end of the buffer is also the hashtable */ |
---|
7908 | && tr->bipStatus[highestToCheck-1] > MRE_POSSIBLE_CANDIDATE)) |
---|
7909 | { |
---|
7910 | /* the last entry in buffer was already processed */ |
---|
7911 | *(tr->len) = tmpCounter; /* for the workers to finish */ |
---|
7912 | break; /* master says goodbye */ |
---|
7913 | } |
---|
7914 | |
---|
7915 | /* reset section (resp. the buffer to be checked) */ |
---|
7916 | else |
---|
7917 | if( highestToCheck == tr->bipStatusLen) |
---|
7918 | { |
---|
7919 | int |
---|
7920 | newSectionEnd, |
---|
7921 | min, |
---|
7922 | max; |
---|
7923 | |
---|
7924 | *(tr->len) = tmpCounter; /* reset counter for workers */ |
---|
7925 | tr->entriesOfSection = &(tr->sbw[tr->sectionEnd ]); |
---|
7926 | |
---|
7927 | /* find new section end: tries to find a new window |
---|
7928 | size (and resp. sectionEnd) s.t. the expected |
---|
7929 | amount of work for master and workers is the same. |
---|
7930 | */ |
---|
7931 | density /= tr->bipStatusLen; |
---|
7932 | |
---|
7933 | /* I am not entirely sure, if this makes the code really incredible faster... */ |
---|
7934 | max = 5 * (NumberOfThreads-1); |
---|
7935 | min = 1; |
---|
7936 | tr->recommendedAmountJobs = (int)(max + (min - max) * density); /* recommend an amount of jobs to be calculate per thread between min and max */ |
---|
7937 | |
---|
7938 | if(density) |
---|
7939 | { |
---|
7940 | int |
---|
7941 | tmp = MAX((2 * tmpCounter * SECTION_CONSTANT / (NumberOfThreads * density)), /* the above discussed formula */ |
---|
7942 | NumberOfThreads * MRE_MIN_AMOUNT_JOBS_PER_THREAD ); /* we need at least a bit work */ |
---|
7943 | newSectionEnd = MIN(tr->sectionEnd + tmp, (int)(tr->h->entryCount)); |
---|
7944 | } |
---|
7945 | else |
---|
7946 | newSectionEnd = tr->h->entryCount; |
---|
7947 | |
---|
7948 | density = 0.0; |
---|
7949 | |
---|
7950 | tr->bipStatusLen = newSectionEnd - tr->sectionEnd; |
---|
7951 | rax_free(tr->bipStatus); |
---|
7952 | /* printf("%d\n" ,tr->bipStatusLen); */ |
---|
7953 | tr->bipStatus = (int*)rax_calloc(tr->bipStatusLen, sizeof(int)); |
---|
7954 | tr->sectionEnd = newSectionEnd; |
---|
7955 | continue; |
---|
7956 | } |
---|
7957 | |
---|
7958 | assert( tr->bipStatus[highestToCheck] == MRE_POSSIBLE_CANDIDATE); |
---|
7959 | |
---|
7960 | for(i = highestToCheck; i > 0; i--) /* checking new bip */ |
---|
7961 | { |
---|
7962 | assert(tr->bipStatus[i-1] == MRE_ADDED || tr->bipStatus[i-1] == MRE_EXCLUDED); |
---|
7963 | |
---|
7964 | if(tr->bipStatus[i-1] == MRE_ADDED |
---|
7965 | && ! compatible(tr->entriesOfSection[i-1], tr->entriesOfSection[highestToCheck], tr->bitVectorLength)) |
---|
7966 | { |
---|
7967 | tr->bipStatus[highestToCheck] = MRE_EXCLUDED; |
---|
7968 | break; |
---|
7969 | } |
---|
7970 | } |
---|
7971 | |
---|
7972 | if(i == 0) /* accepting */ |
---|
7973 | { |
---|
7974 | tr->bipStatus[highestToCheck] = MRE_ADDED; |
---|
7975 | tr->sbi[tmpCounter] = tr->entriesOfSection[highestToCheck]; |
---|
7976 | tmpCounter++; |
---|
7977 | density++; |
---|
7978 | } |
---|
7979 | } |
---|
7980 | } |
---|
7981 | } |
---|
7982 | break; |
---|
7983 | case THREAD_NEWVIEW_ANCESTRAL: |
---|
7984 | sendTraversalInfo(localTree, tr); |
---|
7985 | newviewIterativeAncestral(localTree); |
---|
7986 | break; |
---|
7987 | case THREAD_GATHER_ANCESTRAL: |
---|
7988 | { |
---|
7989 | double |
---|
7990 | *contigousVector = tr->ancestralStates; |
---|
7991 | |
---|
7992 | size_t |
---|
7993 | globalColumnCount = 0, |
---|
7994 | globalCount = 0; |
---|
7995 | |
---|
7996 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7997 | { |
---|
7998 | size_t |
---|
7999 | rateHet, |
---|
8000 | blockRequirements; |
---|
8001 | |
---|
8002 | |
---|
8003 | size_t |
---|
8004 | localColumnCount = 0, |
---|
8005 | localCount = 0; |
---|
8006 | |
---|
8007 | double |
---|
8008 | *stridedVector = localTree->partitionData[model].sumBuffer; |
---|
8009 | |
---|
8010 | if(tr->rateHetModel == CAT) |
---|
8011 | rateHet = 1; |
---|
8012 | else |
---|
8013 | rateHet = 4; |
---|
8014 | |
---|
8015 | blockRequirements = (size_t)(rateHet) * (size_t)(tr->partitionData[model].states); |
---|
8016 | |
---|
8017 | for(globalColumnCount = localTree->partitionData[model].lower; globalColumnCount < localTree->partitionData[model].upper; globalColumnCount++) |
---|
8018 | { |
---|
8019 | if(globalColumnCount % (size_t)n == (size_t)tid) |
---|
8020 | { |
---|
8021 | memcpy(&contigousVector[globalCount], &stridedVector[localCount], sizeof(double) * blockRequirements); |
---|
8022 | |
---|
8023 | localColumnCount++; |
---|
8024 | localCount += blockRequirements; |
---|
8025 | } |
---|
8026 | |
---|
8027 | globalCount += blockRequirements; |
---|
8028 | } |
---|
8029 | |
---|
8030 | assert(localColumnCount == localTree->partitionData[model].width); |
---|
8031 | assert(localCount == (localTree->partitionData[model].width * (int)blockRequirements)); |
---|
8032 | } |
---|
8033 | } |
---|
8034 | break; |
---|
8035 | case THREAD_OPT_SCALER: |
---|
8036 | if(tid > 0) |
---|
8037 | { |
---|
8038 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
8039 | |
---|
8040 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8041 | localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler; |
---|
8042 | } |
---|
8043 | |
---|
8044 | result = evaluateIterative(localTree, FALSE); |
---|
8045 | |
---|
8046 | if(localTree->NumberOfModels > 1) |
---|
8047 | { |
---|
8048 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8049 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
8050 | } |
---|
8051 | else |
---|
8052 | reductionBuffer[tid] = result; |
---|
8053 | |
---|
8054 | if(tid > 0) |
---|
8055 | { |
---|
8056 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8057 | localTree->executeModel[model] = TRUE; |
---|
8058 | } |
---|
8059 | break; |
---|
8060 | case THREAD_COPY_LG4X_RATES: |
---|
8061 | if(tid > 0) |
---|
8062 | { |
---|
8063 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8064 | { |
---|
8065 | memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4); |
---|
8066 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
8067 | } |
---|
8068 | } |
---|
8069 | break; |
---|
8070 | case THREAD_OPT_LG4X_RATES: |
---|
8071 | if(tid > 0) |
---|
8072 | { |
---|
8073 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
8074 | |
---|
8075 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8076 | { |
---|
8077 | memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4); |
---|
8078 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
8079 | } |
---|
8080 | } |
---|
8081 | |
---|
8082 | |
---|
8083 | result = evaluateIterative(localTree, FALSE); |
---|
8084 | |
---|
8085 | if(localTree->NumberOfModels > 1) |
---|
8086 | { |
---|
8087 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8088 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
8089 | } |
---|
8090 | else |
---|
8091 | reductionBuffer[tid] = result; |
---|
8092 | |
---|
8093 | if(tid > 0) |
---|
8094 | { |
---|
8095 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8096 | localTree->executeModel[model] = TRUE; |
---|
8097 | } |
---|
8098 | break; |
---|
8099 | default: |
---|
8100 | printf("Job %d\n", currentJob); |
---|
8101 | assert(0); |
---|
8102 | } |
---|
8103 | } |
---|
8104 | |
---|
8105 | |
---|
8106 | |
---|
8107 | |
---|
8108 | void masterBarrier(int jobType, tree *tr) |
---|
8109 | { |
---|
8110 | const int |
---|
8111 | n = NumberOfThreads; |
---|
8112 | |
---|
8113 | int |
---|
8114 | i, |
---|
8115 | sum; |
---|
8116 | |
---|
8117 | jobCycle = !jobCycle; |
---|
8118 | threadJob = (jobType << 16) + jobCycle; |
---|
8119 | |
---|
8120 | execFunction(tr, tr, 0, n); |
---|
8121 | |
---|
8122 | |
---|
8123 | do |
---|
8124 | { |
---|
8125 | for(i = 1, sum = 1; i < n; i++) |
---|
8126 | sum += barrierBuffer[i]; |
---|
8127 | } |
---|
8128 | while(sum < n); |
---|
8129 | |
---|
8130 | for(i = 1; i < n; i++) |
---|
8131 | barrierBuffer[i] = 0; |
---|
8132 | } |
---|
8133 | |
---|
8134 | #ifndef _PORTABLE_PTHREADS |
---|
8135 | |
---|
8136 | static void pinToCore(int tid) |
---|
8137 | { |
---|
8138 | cpu_set_t cpuset; |
---|
8139 | |
---|
8140 | CPU_ZERO(&cpuset); |
---|
8141 | CPU_SET(tid, &cpuset); |
---|
8142 | |
---|
8143 | if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) != 0) |
---|
8144 | { |
---|
8145 | printBothOpen("\n\nThere was a problem finding a physical core for thread number %d to run on.\n", tid); |
---|
8146 | printBothOpen("Probably this happend because you are trying to run more threads than you have cores available,\n"); |
---|
8147 | printBothOpen("which is a thing you should never ever do again, good bye .... \n\n"); |
---|
8148 | assert(0); |
---|
8149 | } |
---|
8150 | } |
---|
8151 | |
---|
8152 | #endif |
---|
8153 | |
---|
8154 | static void *likelihoodThread(void *tData) |
---|
8155 | { |
---|
8156 | threadData *td = (threadData*)tData; |
---|
8157 | tree |
---|
8158 | *tr = td->tr, |
---|
8159 | *localTree = (tree *)rax_malloc(sizeof(tree)); |
---|
8160 | int |
---|
8161 | myCycle = 0; |
---|
8162 | |
---|
8163 | const int |
---|
8164 | n = NumberOfThreads, |
---|
8165 | tid = td->threadNumber; |
---|
8166 | |
---|
8167 | #ifndef _PORTABLE_PTHREADS |
---|
8168 | pinToCore(tid); |
---|
8169 | #endif |
---|
8170 | |
---|
8171 | printf("\nThis is RAxML Worker Pthread Number: %d\n", tid); |
---|
8172 | |
---|
8173 | while(1) |
---|
8174 | { |
---|
8175 | while (myCycle == threadJob); |
---|
8176 | myCycle = threadJob; |
---|
8177 | |
---|
8178 | execFunction(tr, localTree, tid, n); |
---|
8179 | |
---|
8180 | |
---|
8181 | barrierBuffer[tid] = 1; |
---|
8182 | } |
---|
8183 | |
---|
8184 | return (void*)NULL; |
---|
8185 | } |
---|
8186 | |
---|
8187 | static void startPthreads(tree *tr) |
---|
8188 | { |
---|
8189 | pthread_t *threads; |
---|
8190 | pthread_attr_t attr; |
---|
8191 | int rc, t; |
---|
8192 | threadData *tData; |
---|
8193 | |
---|
8194 | jobCycle = 0; |
---|
8195 | threadJob = 0; |
---|
8196 | |
---|
8197 | printf("\nThis is the RAxML Master Pthread\n"); |
---|
8198 | |
---|
8199 | pthread_attr_init(&attr); |
---|
8200 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); |
---|
8201 | |
---|
8202 | pthread_mutex_init(&mutex , (pthread_mutexattr_t *)NULL); |
---|
8203 | |
---|
8204 | threads = (pthread_t *)rax_malloc(NumberOfThreads * sizeof(pthread_t)); |
---|
8205 | tData = (threadData *)rax_malloc(NumberOfThreads * sizeof(threadData)); |
---|
8206 | |
---|
8207 | |
---|
8208 | reductionBuffer = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels); |
---|
8209 | reductionBufferTwo = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels); |
---|
8210 | reductionBufferThree = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels); |
---|
8211 | reductionBufferParsimony = (volatile int *)rax_malloc(sizeof(volatile int) * NumberOfThreads); |
---|
8212 | |
---|
8213 | |
---|
8214 | barrierBuffer = (volatile char *)rax_malloc(sizeof(volatile char) * NumberOfThreads); |
---|
8215 | |
---|
8216 | for(t = 0; t < NumberOfThreads; t++) |
---|
8217 | barrierBuffer[t] = 0; |
---|
8218 | |
---|
8219 | |
---|
8220 | branchInfos = (volatile branchInfo **)rax_malloc(sizeof(volatile branchInfo *) * NumberOfThreads); |
---|
8221 | |
---|
8222 | for(t = 1; t < NumberOfThreads; t++) |
---|
8223 | { |
---|
8224 | tData[t].tr = tr; |
---|
8225 | tData[t].threadNumber = t; |
---|
8226 | rc = pthread_create(&threads[t], &attr, likelihoodThread, (void *)(&tData[t])); |
---|
8227 | if(rc) |
---|
8228 | { |
---|
8229 | printf("ERROR; return code from pthread_create() is %d\n", rc); |
---|
8230 | exit(-1); |
---|
8231 | } |
---|
8232 | } |
---|
8233 | } |
---|
8234 | |
---|
8235 | |
---|
8236 | |
---|
8237 | #endif |
---|
8238 | |
---|
8239 | |
---|
8240 | /*************************************************************************************************************************************************************/ |
---|
8241 | |
---|
8242 | static int elwCompare(const void *p1, const void *p2) |
---|
8243 | { |
---|
8244 | elw *rc1 = (elw *)p1; |
---|
8245 | elw *rc2 = (elw *)p2; |
---|
8246 | |
---|
8247 | double i = rc1->weight; |
---|
8248 | double j = rc2->weight; |
---|
8249 | |
---|
8250 | if (i > j) |
---|
8251 | return (-1); |
---|
8252 | if (i < j) |
---|
8253 | return (1); |
---|
8254 | return (0); |
---|
8255 | } |
---|
8256 | |
---|
8257 | static int elwCompareLikelihood(const void *p1, const void *p2) |
---|
8258 | { |
---|
8259 | elw *rc1 = (elw *)p1; |
---|
8260 | elw *rc2 = (elw *)p2; |
---|
8261 | |
---|
8262 | double i = rc1->lh; |
---|
8263 | double j = rc2->lh; |
---|
8264 | |
---|
8265 | if (i > j) |
---|
8266 | return (-1); |
---|
8267 | if (i < j) |
---|
8268 | return (1); |
---|
8269 | return (0); |
---|
8270 | } |
---|
8271 | |
---|
8272 | static void computeLHTest(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8273 | { |
---|
8274 | int |
---|
8275 | i; |
---|
8276 | |
---|
8277 | double |
---|
8278 | bestLH, |
---|
8279 | currentLH, |
---|
8280 | weightSum = 0.0; |
---|
8281 | |
---|
8282 | FILE |
---|
8283 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef); |
---|
8284 | |
---|
8285 | double |
---|
8286 | *bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
8287 | |
---|
8288 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
8289 | weightSum += (double)(tr->cdta->aliaswgt[i]); |
---|
8290 | |
---|
8291 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8292 | printBothOpen("Model optimization, best Tree: %f\n", tr->likelihood); |
---|
8293 | bestLH = tr->likelihood; |
---|
8294 | |
---|
8295 | evaluateGenericVector(tr, tr->start); |
---|
8296 | memcpy(bestVector, tr->perSiteLL, tr->cdta->endsite * sizeof(double)); |
---|
8297 | |
---|
8298 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8299 | { |
---|
8300 | int |
---|
8301 | j; |
---|
8302 | |
---|
8303 | double |
---|
8304 | temp, |
---|
8305 | wtemp, |
---|
8306 | sum = 0.0, |
---|
8307 | sum2 = 0.0, |
---|
8308 | sd; |
---|
8309 | |
---|
8310 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8311 | |
---|
8312 | |
---|
8313 | if(tr->optimizeAllTrees) |
---|
8314 | { |
---|
8315 | treeEvaluate(tr, 1); |
---|
8316 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8317 | } |
---|
8318 | else |
---|
8319 | treeEvaluate(tr, 2); |
---|
8320 | |
---|
8321 | tr->start = tr->nodep[1]; |
---|
8322 | |
---|
8323 | currentLH = tr->likelihood; |
---|
8324 | |
---|
8325 | if(currentLH > bestLH) |
---|
8326 | printBothOpen("Better tree found %d at %f\n", i, currentLH); |
---|
8327 | |
---|
8328 | evaluateGenericVector(tr, tr->start); |
---|
8329 | |
---|
8330 | sum = 0.0; |
---|
8331 | sum2 = 0.0; |
---|
8332 | |
---|
8333 | for (j = 0; j < tr->cdta->endsite; j++) |
---|
8334 | { |
---|
8335 | temp = bestVector[j] - tr->perSiteLL[j]; |
---|
8336 | wtemp = tr->cdta->aliaswgt[j] * temp; |
---|
8337 | sum += wtemp; |
---|
8338 | sum2 += wtemp * temp; |
---|
8339 | } |
---|
8340 | |
---|
8341 | sd = sqrt( weightSum * (sum2 - sum*sum / weightSum) / (weightSum - 1) ); |
---|
8342 | /* this is for a 5% p level */ |
---|
8343 | |
---|
8344 | printBothOpen("Tree: %d Likelihood: %f D(LH): %f SD: %f Significantly Worse: %s (5%s), %s (2%s), %s (1%s)\n", |
---|
8345 | i, currentLH, currentLH - bestLH, sd, |
---|
8346 | (sum > 1.95996 * sd) ? "Yes" : " No", "%", |
---|
8347 | (sum > 2.326 * sd) ? "Yes" : " No", "%", |
---|
8348 | (sum > 2.57583 * sd) ? "Yes" : " No", "%"); |
---|
8349 | } |
---|
8350 | |
---|
8351 | |
---|
8352 | rax_free(bestVector); |
---|
8353 | fclose(treeFile); |
---|
8354 | exit(0); |
---|
8355 | } |
---|
8356 | |
---|
8357 | static void computePerSiteLLs(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8358 | { |
---|
8359 | int |
---|
8360 | i; |
---|
8361 | |
---|
8362 | FILE |
---|
8363 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef), |
---|
8364 | *tlf = myfopen(perSiteLLsFileName, "wb"); |
---|
8365 | |
---|
8366 | double |
---|
8367 | *unsortedSites = (double*)rax_malloc(sizeof(double) * tr->rdta->sites); |
---|
8368 | |
---|
8369 | |
---|
8370 | |
---|
8371 | fprintf(tlf, " %d %d\n", tr->numberOfTrees, tr->rdta->sites); |
---|
8372 | |
---|
8373 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8374 | { |
---|
8375 | int |
---|
8376 | k, |
---|
8377 | j; |
---|
8378 | |
---|
8379 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8380 | assert(tr->ntips == tr->mxtips); |
---|
8381 | |
---|
8382 | if(i == 0) |
---|
8383 | { |
---|
8384 | if(adef->useBinaryModelFile) |
---|
8385 | { |
---|
8386 | readBinaryModel(tr); |
---|
8387 | evaluateGenericInitrav(tr, tr->start); |
---|
8388 | treeEvaluate(tr, 2); |
---|
8389 | } |
---|
8390 | else |
---|
8391 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8392 | } |
---|
8393 | else |
---|
8394 | { |
---|
8395 | if(tr->optimizeAllTrees) |
---|
8396 | { |
---|
8397 | treeEvaluate(tr, 1); |
---|
8398 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8399 | } |
---|
8400 | else |
---|
8401 | treeEvaluate(tr, 2); |
---|
8402 | } |
---|
8403 | |
---|
8404 | tr->start = tr->nodep[1]; |
---|
8405 | |
---|
8406 | evaluateGenericVector(tr, tr->start); |
---|
8407 | |
---|
8408 | printBothOpen("Tree %d: %f\n", i, tr->likelihood); |
---|
8409 | |
---|
8410 | fprintf(tlf, "tr%d\t", i + 1); |
---|
8411 | |
---|
8412 | for(j = 0; j < tr->cdta->endsite; j++) |
---|
8413 | { |
---|
8414 | for(k = 0; k < tr->rdta->sites; k++) |
---|
8415 | if(j == tr->patternPosition[k]) |
---|
8416 | unsortedSites[tr->columnPosition[k] - 1] = tr->perSiteLL[j]; |
---|
8417 | } |
---|
8418 | |
---|
8419 | for(j = 0; j < tr->rdta->sites; j++) |
---|
8420 | fprintf(tlf, "%f ", unsortedSites[j]); |
---|
8421 | |
---|
8422 | fprintf(tlf, "\n"); |
---|
8423 | } |
---|
8424 | |
---|
8425 | fclose(treeFile); |
---|
8426 | |
---|
8427 | rax_free(unsortedSites); |
---|
8428 | fclose(tlf); |
---|
8429 | } |
---|
8430 | |
---|
8431 | |
---|
8432 | static double cumulativeTreeLength(tree *tr, analdef *adef) |
---|
8433 | { |
---|
8434 | double tl = 0.0; |
---|
8435 | |
---|
8436 | if(adef->perGeneBranchLengths) |
---|
8437 | { |
---|
8438 | int |
---|
8439 | accWgt = 0, |
---|
8440 | model; |
---|
8441 | |
---|
8442 | double |
---|
8443 | accLength = 0.0; |
---|
8444 | |
---|
8445 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
8446 | { |
---|
8447 | int |
---|
8448 | wgt = 0, |
---|
8449 | i, |
---|
8450 | lower, |
---|
8451 | upper; |
---|
8452 | |
---|
8453 | double |
---|
8454 | tlm; |
---|
8455 | |
---|
8456 | tlm = treeLength(tr, model); |
---|
8457 | |
---|
8458 | lower = tr->partitionData[model].lower; |
---|
8459 | upper = tr->partitionData[model].upper; |
---|
8460 | |
---|
8461 | for(i = lower; i < upper; i++) |
---|
8462 | wgt += tr->cdta->aliaswgt[i]; |
---|
8463 | |
---|
8464 | accLength += ((double)wgt) * tlm; |
---|
8465 | accWgt += wgt; |
---|
8466 | } |
---|
8467 | |
---|
8468 | tl = accLength / ((double)accWgt); |
---|
8469 | |
---|
8470 | } |
---|
8471 | else |
---|
8472 | tl = treeLength(tr, 0); |
---|
8473 | |
---|
8474 | |
---|
8475 | return tl; |
---|
8476 | } |
---|
8477 | |
---|
8478 | static void computeAllLHs(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8479 | { |
---|
8480 | int |
---|
8481 | i; |
---|
8482 | |
---|
8483 | double |
---|
8484 | bestLH = unlikely; |
---|
8485 | |
---|
8486 | bestlist |
---|
8487 | *bestT; |
---|
8488 | |
---|
8489 | FILE |
---|
8490 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef), |
---|
8491 | *result = myfopen(resultFileName, "wb"); |
---|
8492 | |
---|
8493 | elw |
---|
8494 | *list; |
---|
8495 | |
---|
8496 | INFILE = getNumberOfTrees(tr, bootStrapFileName, adef); |
---|
8497 | |
---|
8498 | bestT = (bestlist *) rax_malloc(sizeof(bestlist)); |
---|
8499 | bestT->ninit = 0; |
---|
8500 | initBestTree(bestT, 1, tr->mxtips); |
---|
8501 | |
---|
8502 | list = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees); |
---|
8503 | |
---|
8504 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8505 | { |
---|
8506 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8507 | resetBranches(tr); |
---|
8508 | |
---|
8509 | if(i == 0) |
---|
8510 | { |
---|
8511 | |
---|
8512 | if(adef->useBinaryModelFile) |
---|
8513 | { |
---|
8514 | readBinaryModel(tr); |
---|
8515 | evaluateGenericInitrav(tr, tr->start); |
---|
8516 | treeEvaluate(tr, 2); |
---|
8517 | } |
---|
8518 | else |
---|
8519 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8520 | |
---|
8521 | printBothOpen("Model optimization on first Tree: %f\n", tr->likelihood); |
---|
8522 | } |
---|
8523 | else |
---|
8524 | { |
---|
8525 | evaluateGenericInitrav(tr, tr->start); |
---|
8526 | |
---|
8527 | /* |
---|
8528 | treeEvaluateProgressive(tr); |
---|
8529 | treeEvaluateRandom(tr, 2); |
---|
8530 | */ |
---|
8531 | if(tr->optimizeAllTrees) |
---|
8532 | { |
---|
8533 | treeEvaluate(tr, 1); |
---|
8534 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8535 | } |
---|
8536 | else |
---|
8537 | treeEvaluate(tr, 2); |
---|
8538 | } |
---|
8539 | |
---|
8540 | list[i].tree = i; |
---|
8541 | list[i].lh = tr->likelihood; |
---|
8542 | |
---|
8543 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
8544 | |
---|
8545 | fprintf(result, "%s", tr->tree_string); |
---|
8546 | |
---|
8547 | saveBestTree(bestT, tr); |
---|
8548 | |
---|
8549 | if(tr->likelihood > bestLH) |
---|
8550 | bestLH = tr->likelihood; |
---|
8551 | |
---|
8552 | printBothOpen("Tree %d Likelihood %f Tree-Length %f\n", i, tr->likelihood, cumulativeTreeLength(tr, adef)); |
---|
8553 | } |
---|
8554 | |
---|
8555 | qsort(list, tr->numberOfTrees, sizeof(elw), elwCompareLikelihood); |
---|
8556 | |
---|
8557 | printBothOpen("\n"); |
---|
8558 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8559 | printBothOpen("%d %f\n", list[i].tree, list[i].lh); |
---|
8560 | |
---|
8561 | printBothOpen("\n"); |
---|
8562 | |
---|
8563 | /* |
---|
8564 | recallBestTree(bestT, 1, tr); |
---|
8565 | evaluateGeneric(tr, tr->start); |
---|
8566 | printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8567 | fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8568 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8569 | treeEvaluate(tr, 2); |
---|
8570 | printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8571 | fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8572 | */ |
---|
8573 | |
---|
8574 | printBothOpen("\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName); |
---|
8575 | printBothOpen("\nTotal execution time: %f\n", gettime() - masterTime); |
---|
8576 | |
---|
8577 | |
---|
8578 | fclose(result); |
---|
8579 | exit(0); |
---|
8580 | } |
---|
8581 | |
---|
8582 | |
---|
8583 | |
---|
8584 | |
---|
8585 | static void computeELW(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8586 | { |
---|
8587 | FILE |
---|
8588 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef); |
---|
8589 | |
---|
8590 | int |
---|
8591 | bestIndex = -1, |
---|
8592 | i, |
---|
8593 | k, |
---|
8594 | *originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)), |
---|
8595 | *originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); |
---|
8596 | |
---|
8597 | long |
---|
8598 | startSeed; |
---|
8599 | |
---|
8600 | double |
---|
8601 | best = unlikely, |
---|
8602 | **lhs, |
---|
8603 | **lhweights, |
---|
8604 | sum = 0.0; |
---|
8605 | |
---|
8606 | elw |
---|
8607 | *bootweights, |
---|
8608 | **rankTest; |
---|
8609 | |
---|
8610 | initModel(tr, tr->rdta, tr->cdta, adef); |
---|
8611 | |
---|
8612 | if(tr->numberOfTrees < 2) |
---|
8613 | { |
---|
8614 | printBothOpen("Error, there is only one tree in file %s which you want to use to conduct an ELW test\n", bootStrapFileName); |
---|
8615 | |
---|
8616 | exit(-1); |
---|
8617 | } |
---|
8618 | |
---|
8619 | bootweights = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees); |
---|
8620 | |
---|
8621 | rankTest = (elw **)rax_malloc(sizeof(elw *) * adef->multipleRuns); |
---|
8622 | |
---|
8623 | for(k = 0; k < adef->multipleRuns; k++) |
---|
8624 | rankTest[k] = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees); |
---|
8625 | |
---|
8626 | lhs = (double **)rax_malloc(sizeof(double *) * tr->numberOfTrees); |
---|
8627 | |
---|
8628 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8629 | lhs[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double)); |
---|
8630 | |
---|
8631 | |
---|
8632 | lhweights = (double **)rax_malloc(sizeof(double *) * tr->numberOfTrees); |
---|
8633 | |
---|
8634 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8635 | lhweights[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double)); |
---|
8636 | |
---|
8637 | /* read in the first tree and optimize ML params on it */ |
---|
8638 | |
---|
8639 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8640 | |
---|
8641 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8642 | rewind(treeFile); |
---|
8643 | |
---|
8644 | printBothOpen("Model optimization, first Tree: %f\n", tr->likelihood); |
---|
8645 | |
---|
8646 | memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite); |
---|
8647 | memcpy(originalInvariant, tr->invariant, sizeof(int) * tr->cdta->endsite); |
---|
8648 | |
---|
8649 | assert(adef->boot > 0); |
---|
8650 | |
---|
8651 | /* TODO this is ugly, should be passed as param to computenextreplicate() */ |
---|
8652 | |
---|
8653 | startSeed = adef->boot; |
---|
8654 | |
---|
8655 | |
---|
8656 | /* |
---|
8657 | now read the trees one by one, do a couple of BS replicates and re-compute their likelihood |
---|
8658 | for every replicate |
---|
8659 | */ |
---|
8660 | |
---|
8661 | /* loop over all trees */ |
---|
8662 | |
---|
8663 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8664 | { |
---|
8665 | |
---|
8666 | /* read in new tree */ |
---|
8667 | |
---|
8668 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8669 | |
---|
8670 | if(tr->optimizeAllTrees) |
---|
8671 | { |
---|
8672 | treeEvaluate(tr, 1); |
---|
8673 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8674 | } |
---|
8675 | else |
---|
8676 | treeEvaluate(tr, 2.0); |
---|
8677 | |
---|
8678 | printBothOpen("Original tree %d likelihood %f\n", i, tr->likelihood); |
---|
8679 | |
---|
8680 | if(tr->likelihood > best) |
---|
8681 | { |
---|
8682 | best = tr->likelihood; |
---|
8683 | bestIndex = i; |
---|
8684 | } |
---|
8685 | /* reset branches to default values */ |
---|
8686 | |
---|
8687 | resetBranches(tr); |
---|
8688 | |
---|
8689 | /* reset BS random seed, we want to use the same replicates for every tree */ |
---|
8690 | |
---|
8691 | adef->rapidBoot = startSeed; |
---|
8692 | |
---|
8693 | for(k = 0; k < adef->multipleRuns; k++) |
---|
8694 | { |
---|
8695 | /* compute the next BS replicate, i.e., re-sample alignment columns */ |
---|
8696 | |
---|
8697 | computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE); |
---|
8698 | |
---|
8699 | evaluateGenericInitrav(tr, tr->start); |
---|
8700 | |
---|
8701 | /* if this is the first replicate for this tree do a slightly more thorough br-len opt */ |
---|
8702 | /* we don't re-estimate ML model params (except branches) for every replicate to make things a bit faster */ |
---|
8703 | |
---|
8704 | if(k == 0) |
---|
8705 | treeEvaluate(tr, 2.0); |
---|
8706 | else |
---|
8707 | treeEvaluate(tr, 0.5); |
---|
8708 | |
---|
8709 | /* store the likelihood of replicate k for tree i */ |
---|
8710 | lhs[i][k] = tr->likelihood; |
---|
8711 | |
---|
8712 | rankTest[k][i].lh = tr->likelihood; |
---|
8713 | rankTest[k][i].tree = i; |
---|
8714 | } |
---|
8715 | |
---|
8716 | /* restore the original alignment to start BS procedure for the next tree */ |
---|
8717 | |
---|
8718 | reductionCleanup(tr, originalRateCategories, originalInvariant); |
---|
8719 | } |
---|
8720 | |
---|
8721 | assert(bestIndex >= 0 && best != unlikely); |
---|
8722 | |
---|
8723 | printBothOpen("Best-Scoring tree is tree %d with score %f\n", bestIndex, best); |
---|
8724 | |
---|
8725 | |
---|
8726 | /* now loop over all replicates */ |
---|
8727 | |
---|
8728 | for(k = 0; k < adef->multipleRuns; k++) |
---|
8729 | { |
---|
8730 | /* find best score for this replicate */ |
---|
8731 | |
---|
8732 | for(i = 0, best = unlikely; i < tr->numberOfTrees; i++) |
---|
8733 | if(lhs[i][k] > best) |
---|
8734 | best = lhs[i][k]; |
---|
8735 | |
---|
8736 | /* compute exponential weights w.r.t. the best likelihood for replicate k */ |
---|
8737 | |
---|
8738 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8739 | lhweights[i][k] = exp(lhs[i][k] - best); |
---|
8740 | |
---|
8741 | /* sum over all exponential weights */ |
---|
8742 | |
---|
8743 | for(i = 0, sum = 0.0; i < tr->numberOfTrees; i++) |
---|
8744 | sum += lhweights[i][k]; |
---|
8745 | |
---|
8746 | /* and normalize by the sum */ |
---|
8747 | |
---|
8748 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8749 | lhweights[i][k] = lhweights[i][k] / sum; |
---|
8750 | |
---|
8751 | } |
---|
8752 | |
---|
8753 | /* now loop over all trees */ |
---|
8754 | |
---|
8755 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8756 | { |
---|
8757 | |
---|
8758 | /* loop to sum over all replicate weights for tree i */ |
---|
8759 | |
---|
8760 | for(k = 0, sum = 0.0; k < adef->multipleRuns; k++) |
---|
8761 | sum += lhweights[i][k]; |
---|
8762 | |
---|
8763 | /* set the weight and the index of the respective tree */ |
---|
8764 | |
---|
8765 | bootweights[i].weight = sum / ((double)adef->multipleRuns); |
---|
8766 | bootweights[i].tree = i; |
---|
8767 | } |
---|
8768 | |
---|
8769 | /* now just sort the tree collection by weights */ |
---|
8770 | |
---|
8771 | qsort(bootweights, tr->numberOfTrees, sizeof(elw), elwCompare); |
---|
8772 | |
---|
8773 | printBothOpen("Tree\t Posterior Probability \t Cumulative posterior probability\n"); |
---|
8774 | |
---|
8775 | /* loop over the sorted array of trees and print out statistics */ |
---|
8776 | |
---|
8777 | for(i = 0, sum = 0.0; i < tr->numberOfTrees; i++) |
---|
8778 | { |
---|
8779 | sum += bootweights[i].weight; |
---|
8780 | |
---|
8781 | printBothOpen("%d\t\t %f \t\t %f\n", bootweights[i].tree, bootweights[i].weight, sum); |
---|
8782 | } |
---|
8783 | |
---|
8784 | |
---|
8785 | /* |
---|
8786 | if(0) |
---|
8787 | { |
---|
8788 | // now compute the super-duper rank test |
---|
8789 | |
---|
8790 | printBothOpen("\n\nNow also computing the super-duper rank test, though I still don't\n"); |
---|
8791 | printBothOpen("understand what it actually means. What this thing does is to initially determine\n"); |
---|
8792 | printBothOpen("the best-scoring ML tree on the original alignment and then the scores of the input\n"); |
---|
8793 | printBothOpen("trees on the number of specified Bootstrap replicates. Then it sorts the scores of the trees\n"); |
---|
8794 | printBothOpen("for every bootstrap replicate and determines the rank of the best-scoring tree on every BS\n"); |
---|
8795 | printBothOpen("replicate. It then prints out how many positions in the sorted lists of thz BS replicates \n"); |
---|
8796 | printBothOpen("must be included in order for the best scoring tree to appear 95 and 99 times respectively.\n"); |
---|
8797 | printBothOpen("This gives some intuition about how variable the score order of the trees will be under\n"); |
---|
8798 | printBothOpen("slight alterations of the data.\n\n"); |
---|
8799 | |
---|
8800 | // sort all BS replicates accodring to likelihood scores |
---|
8801 | |
---|
8802 | for(i = 0; i < adef->multipleRuns; i++) |
---|
8803 | qsort(rankTest[i], tr->numberOfTrees, sizeof(elw), elwCompareLikelihood); |
---|
8804 | |
---|
8805 | |
---|
8806 | // search for our best-scoring tree in every sorted array of likelihood scores |
---|
8807 | |
---|
8808 | for(i = 0; i < adef->multipleRuns; i++) |
---|
8809 | { |
---|
8810 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8811 | { |
---|
8812 | if(rankTest[i][k].tree == bestIndex) |
---|
8813 | countBest[k]++; |
---|
8814 | } |
---|
8815 | } |
---|
8816 | |
---|
8817 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8818 | { |
---|
8819 | if(k > 0) |
---|
8820 | countBest[k] += countBest[k - 1]; |
---|
8821 | |
---|
8822 | printBothOpen("Number of Occurences of best-scoring tree for %d BS replicates up to position %d in sorted list: %d\n", |
---|
8823 | adef->multipleRuns, k, countBest[k]); |
---|
8824 | |
---|
8825 | if(cutOff95 == -1 && countBest[k] <= (int)((double)adef->multipleRuns * 0.95 + 0.5)) |
---|
8826 | cutOff95 = k; |
---|
8827 | |
---|
8828 | if(cutOff99 == -1 && countBest[k] <= (int)((double)adef->multipleRuns * 0.99 + 0.5)) |
---|
8829 | cutOff99 = k; |
---|
8830 | } |
---|
8831 | |
---|
8832 | assert(countBest[k-1] == adef->multipleRuns); |
---|
8833 | assert(cutOff95 >= 0 && cutOff99 >= 0); |
---|
8834 | |
---|
8835 | printBothOpen("\n95%s cutoff reached after including %d out of %d sorted likelihood columns\n", "%", countBest[cutOff95], adef->multipleRuns); |
---|
8836 | |
---|
8837 | printBothOpen("99%s cutoff reached after including %d out of %d sorted likelihood columns\n\n", "%", countBest[cutOff99], adef->multipleRuns); |
---|
8838 | } |
---|
8839 | */ |
---|
8840 | |
---|
8841 | printBothOpen("\nTotal execution time: %f\n\n", gettime() - masterTime); |
---|
8842 | |
---|
8843 | rax_free(originalRateCategories); |
---|
8844 | rax_free(originalInvariant); |
---|
8845 | fclose(treeFile); |
---|
8846 | |
---|
8847 | exit(0); |
---|
8848 | } |
---|
8849 | |
---|
8850 | |
---|
8851 | |
---|
8852 | static void computeDistances(tree *tr, analdef *adef) |
---|
8853 | { |
---|
8854 | int i, j, modelCounter; |
---|
8855 | double z0[NUM_BRANCHES]; |
---|
8856 | double result[NUM_BRANCHES]; |
---|
8857 | double t; |
---|
8858 | char distanceFileName[1024]; |
---|
8859 | |
---|
8860 | FILE |
---|
8861 | *out; |
---|
8862 | |
---|
8863 | strcpy(distanceFileName, workdir); |
---|
8864 | strcat(distanceFileName, "RAxML_distances."); |
---|
8865 | strcat(distanceFileName, run_id); |
---|
8866 | |
---|
8867 | out = myfopen(distanceFileName, "wb"); |
---|
8868 | |
---|
8869 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8870 | |
---|
8871 | printBothOpen("\nLog Likelihood Score after parameter optimization: %f\n\n", tr->likelihood); |
---|
8872 | printBothOpen("\nComputing pairwise ML-distances ...\n"); |
---|
8873 | |
---|
8874 | for(modelCounter = 0; modelCounter < tr->NumberOfModels; modelCounter++) |
---|
8875 | z0[modelCounter] = defaultz; |
---|
8876 | |
---|
8877 | t = gettime(); |
---|
8878 | |
---|
8879 | for(i = 1; i <= tr->mxtips; i++) |
---|
8880 | for(j = i + 1; j <= tr->mxtips; j++) |
---|
8881 | { |
---|
8882 | double z, x; |
---|
8883 | |
---|
8884 | makenewzGenericDistance(tr, 10, z0, result, i, j); |
---|
8885 | |
---|
8886 | if(tr->multiBranch) |
---|
8887 | { |
---|
8888 | int k; |
---|
8889 | |
---|
8890 | for(k = 0, x = 0.0; k < tr->numBranches; k++) |
---|
8891 | { |
---|
8892 | assert(tr->partitionContributions[k] != -1.0); |
---|
8893 | assert(tr->fracchanges[k] != -1.0); |
---|
8894 | z = result[k]; |
---|
8895 | if (z < zmin) |
---|
8896 | z = zmin; |
---|
8897 | x += (-log(z) * tr->fracchanges[k]) * tr->partitionContributions[k]; |
---|
8898 | } |
---|
8899 | } |
---|
8900 | else |
---|
8901 | { |
---|
8902 | z = result[0]; |
---|
8903 | if (z < zmin) |
---|
8904 | z = zmin; |
---|
8905 | x = -log(z) * tr->fracchange; |
---|
8906 | } |
---|
8907 | |
---|
8908 | /*printf("%s-%s \t %f\n", tr->nameList[i], tr->nameList[j], x);*/ |
---|
8909 | fprintf(out, "%s %s \t %f\n", tr->nameList[i], tr->nameList[j], x); |
---|
8910 | } |
---|
8911 | |
---|
8912 | fclose(out); |
---|
8913 | |
---|
8914 | t = gettime() - t; |
---|
8915 | |
---|
8916 | printBothOpen("\nTime for pair-wise ML distance computation of %d distances: %f seconds\n", |
---|
8917 | (tr->mxtips * tr->mxtips - tr->mxtips) / 2, t); |
---|
8918 | printBothOpen("\nDistances written to file: %s\n", distanceFileName); |
---|
8919 | |
---|
8920 | |
---|
8921 | |
---|
8922 | exit(0); |
---|
8923 | } |
---|
8924 | |
---|
8925 | |
---|
8926 | |
---|
8927 | static void morphologicalCalibration(tree *tr, analdef *adef) |
---|
8928 | { |
---|
8929 | int |
---|
8930 | replicates = adef->multipleRuns, |
---|
8931 | i, |
---|
8932 | *significanceCounter = (int*)rax_malloc(sizeof(int) * tr->cdta->endsite); |
---|
8933 | |
---|
8934 | double |
---|
8935 | *reference = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
8936 | |
---|
8937 | char |
---|
8938 | integerFileName[1024] = ""; |
---|
8939 | |
---|
8940 | FILE |
---|
8941 | *integerFile; |
---|
8942 | |
---|
8943 | if(replicates == 1) |
---|
8944 | { |
---|
8945 | printBothOpen("You did not specify the number of random trees to be generated by \"-#\" !\n"); |
---|
8946 | printBothOpen("Automatically setting it to 100.\n"); |
---|
8947 | replicates = 100; |
---|
8948 | } |
---|
8949 | |
---|
8950 | printBothOpen("Likelihood on Reference tree: %f\n\n", tr->likelihood); |
---|
8951 | |
---|
8952 | evaluateGenericVector(tr, tr->start); |
---|
8953 | |
---|
8954 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
8955 | significanceCounter[i] = 0; |
---|
8956 | |
---|
8957 | memcpy(reference, tr->perSiteLL, tr->cdta->endsite * sizeof(double)); |
---|
8958 | |
---|
8959 | for(i = 0; i < replicates; i++) |
---|
8960 | { |
---|
8961 | int k; |
---|
8962 | |
---|
8963 | printBothOpen("Testing Random Tree [%d]\n", i); |
---|
8964 | makeRandomTree(tr, adef); |
---|
8965 | evaluateGenericInitrav(tr, tr->start); |
---|
8966 | treeEvaluate(tr, 2); |
---|
8967 | |
---|
8968 | /* |
---|
8969 | don't really need modOpt here |
---|
8970 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8971 | */ |
---|
8972 | |
---|
8973 | evaluateGenericVector(tr, tr->start); |
---|
8974 | |
---|
8975 | |
---|
8976 | for(k = 0; k < tr->cdta->endsite; k++) |
---|
8977 | if(tr->perSiteLL[k] <= reference[k]) |
---|
8978 | significanceCounter[k] = significanceCounter[k] + 1; |
---|
8979 | } |
---|
8980 | |
---|
8981 | strcpy(integerFileName, workdir); |
---|
8982 | strcat(integerFileName, "RAxML_weights."); |
---|
8983 | strcat(integerFileName, run_id); |
---|
8984 | |
---|
8985 | integerFile = myfopen(integerFileName, "wb"); |
---|
8986 | |
---|
8987 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
8988 | fprintf(integerFile, "%d ", significanceCounter[i]); |
---|
8989 | |
---|
8990 | fclose(integerFile); |
---|
8991 | |
---|
8992 | printBothOpen("RAxML calibrated integer weight file written to: %s\n", integerFileName); |
---|
8993 | |
---|
8994 | exit(0); |
---|
8995 | } |
---|
8996 | |
---|
8997 | |
---|
8998 | |
---|
8999 | |
---|
9000 | static int sortLex(const void *a, const void *b) |
---|
9001 | { |
---|
9002 | int |
---|
9003 | i = 0; |
---|
9004 | |
---|
9005 | char |
---|
9006 | *aPtr = *(char**)a, |
---|
9007 | *bPtr = *(char**)b; |
---|
9008 | |
---|
9009 | while((aPtr[i] != '\0') && (bPtr[i] != '\0') && (aPtr[i] == bPtr[i])) |
---|
9010 | i++; |
---|
9011 | |
---|
9012 | if((aPtr[i] == '\0') || (bPtr[i] == '\0')) |
---|
9013 | return (bPtr[i] == '\0'); |
---|
9014 | |
---|
9015 | return (aPtr[i] > bPtr[i]); |
---|
9016 | } |
---|
9017 | |
---|
9018 | |
---|
9019 | static void extractTaxaFromTopology(tree *tr, rawdata *rdta, cruncheddata *cdta, char fileName[1024]) |
---|
9020 | { |
---|
9021 | FILE |
---|
9022 | *f = myfopen(fileName, "rb"); |
---|
9023 | |
---|
9024 | char |
---|
9025 | **nameList, |
---|
9026 | buffer[nmlngth + 2]; |
---|
9027 | |
---|
9028 | int |
---|
9029 | i = 0, |
---|
9030 | c, |
---|
9031 | taxaSize = 1024, |
---|
9032 | taxaCount = 0; |
---|
9033 | |
---|
9034 | nameList = (char**)rax_malloc(sizeof(char*) * taxaSize); |
---|
9035 | |
---|
9036 | while((c = fgetc(f)) != ';') |
---|
9037 | { |
---|
9038 | if(c == '(' || c == ',') |
---|
9039 | { |
---|
9040 | c = fgetc(f); |
---|
9041 | if(c == '(' || c == ',') |
---|
9042 | ungetc(c, f); |
---|
9043 | else |
---|
9044 | { |
---|
9045 | i = 0; |
---|
9046 | |
---|
9047 | do |
---|
9048 | { |
---|
9049 | buffer[i++] = c; |
---|
9050 | c = fgetc(f); |
---|
9051 | } |
---|
9052 | while(c != ':' && c != ')' && c != ','); |
---|
9053 | buffer[i] = '\0'; |
---|
9054 | |
---|
9055 | if(taxaCount == taxaSize) |
---|
9056 | { |
---|
9057 | taxaSize *= 2; |
---|
9058 | nameList = (char **)rax_realloc(nameList, sizeof(char*) * taxaSize, FALSE); |
---|
9059 | } |
---|
9060 | |
---|
9061 | nameList[taxaCount] = (char*)rax_malloc(sizeof(char) * (strlen(buffer) + 1)); |
---|
9062 | strcpy(nameList[taxaCount], buffer); |
---|
9063 | |
---|
9064 | taxaCount++; |
---|
9065 | |
---|
9066 | ungetc(c, f); |
---|
9067 | } |
---|
9068 | } |
---|
9069 | } |
---|
9070 | |
---|
9071 | |
---|
9072 | /* BEGIN ensuring no taxon occurs twice */ |
---|
9073 | { |
---|
9074 | char |
---|
9075 | **taxList = (char **)rax_malloc(sizeof(char *) * (size_t)taxaCount); |
---|
9076 | |
---|
9077 | for(i = 0; i < taxaCount; ++i) |
---|
9078 | taxList[i] = nameList[i]; |
---|
9079 | |
---|
9080 | qsort(taxList, taxaCount, sizeof(char**), sortLex); |
---|
9081 | |
---|
9082 | for(i = 1; i < taxaCount; ++i) |
---|
9083 | if(strcmp(taxList[i], taxList[i-1]) == 0) |
---|
9084 | { |
---|
9085 | printf("A taxon labelled by %s appears twice in the first tree of tree collection %s, exiting ...\n", buffer, bootStrapFile); |
---|
9086 | exit(-1); |
---|
9087 | } |
---|
9088 | |
---|
9089 | rax_free(taxList); |
---|
9090 | } |
---|
9091 | /* END */ |
---|
9092 | |
---|
9093 | |
---|
9094 | printf("Found a total of %d taxa in first tree of tree collection %s\n", taxaCount, bootStrapFile); |
---|
9095 | printf("Expecting all remaining trees in collection to have the same taxon set\n"); |
---|
9096 | |
---|
9097 | rdta->numsp = taxaCount; |
---|
9098 | |
---|
9099 | tr->nameList = (char **)rax_malloc(sizeof(char *) * (taxaCount + 1)); |
---|
9100 | for(i = 1; i <= taxaCount; i++) |
---|
9101 | tr->nameList[i] = nameList[i - 1]; |
---|
9102 | |
---|
9103 | rax_free(nameList); |
---|
9104 | |
---|
9105 | tr->rdta = rdta; |
---|
9106 | tr->cdta = cdta; |
---|
9107 | |
---|
9108 | if (rdta->numsp < 4) |
---|
9109 | { |
---|
9110 | printf("TOO FEW SPECIES, tree contains only %d species\n", rdta->numsp); |
---|
9111 | assert(0); |
---|
9112 | } |
---|
9113 | |
---|
9114 | tr->nameHash = initStringHashTable(10 * taxaCount); |
---|
9115 | for(i = 1; i <= taxaCount; i++) |
---|
9116 | addword(tr->nameList[i], tr->nameHash, i); |
---|
9117 | |
---|
9118 | fclose(f); |
---|
9119 | } |
---|
9120 | |
---|
9121 | |
---|
9122 | static void myfwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) |
---|
9123 | { |
---|
9124 | size_t |
---|
9125 | bytes_written = fwrite(ptr, size, nmemb, stream); |
---|
9126 | |
---|
9127 | assert(bytes_written = nmemb); |
---|
9128 | } |
---|
9129 | |
---|
9130 | |
---|
9131 | static void writeLG4(tree *tr, int model, int dataType, FILE *f, partitionLengths pLengths[MAX_MODEL]) |
---|
9132 | { |
---|
9133 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
9134 | { |
---|
9135 | int |
---|
9136 | k; |
---|
9137 | |
---|
9138 | for(k = 0; k < 4; k++) |
---|
9139 | { |
---|
9140 | myfwrite(tr->partitionData[model].EIGN_LG4[k], sizeof(double), pLengths[dataType].eignLength, f); |
---|
9141 | myfwrite(tr->partitionData[model].EV_LG4[k], sizeof(double), pLengths[dataType].evLength, f); |
---|
9142 | myfwrite(tr->partitionData[model].EI_LG4[k], sizeof(double), pLengths[dataType].eiLength, f); |
---|
9143 | myfwrite(tr->partitionData[model].frequencies_LG4[k], sizeof(double), pLengths[dataType].frequenciesLength, f); |
---|
9144 | myfwrite(tr->partitionData[model].tipVector_LG4[k], sizeof(double), pLengths[dataType].tipVectorLength, f); |
---|
9145 | myfwrite(tr->partitionData[model].substRates_LG4[k], sizeof(double), pLengths[dataType].substRatesLength, f); |
---|
9146 | } |
---|
9147 | } |
---|
9148 | } |
---|
9149 | |
---|
9150 | |
---|
9151 | void writeBinaryModel(tree *tr) |
---|
9152 | { |
---|
9153 | int |
---|
9154 | model; |
---|
9155 | |
---|
9156 | FILE |
---|
9157 | *f = myfopen(binaryModelParamsOutputFileName, "w"); |
---|
9158 | |
---|
9159 | /* cdta */ |
---|
9160 | |
---|
9161 | myfwrite(tr->cdta->rateCategory, sizeof(int), tr->rdta->sites + 1, f); |
---|
9162 | myfwrite(tr->cdta->patrat, sizeof(double), tr->rdta->sites + 1, f); |
---|
9163 | myfwrite(tr->cdta->patratStored, sizeof(double), tr->rdta->sites + 1, f); |
---|
9164 | |
---|
9165 | /* partition contributions for fracchange */ |
---|
9166 | |
---|
9167 | myfwrite(tr->partitionContributions, sizeof(double), tr->NumberOfModels, f); |
---|
9168 | |
---|
9169 | /* fracchange */ |
---|
9170 | |
---|
9171 | myfwrite(&tr->fracchange, sizeof(double), 1, f); |
---|
9172 | myfwrite(tr->fracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9173 | |
---|
9174 | myfwrite(&tr->rawFracchange, sizeof(double), 1, f); |
---|
9175 | myfwrite(tr->rawFracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9176 | |
---|
9177 | /* pInfo */ |
---|
9178 | |
---|
9179 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
9180 | { |
---|
9181 | int |
---|
9182 | dataType = tr->partitionData[model].dataType; |
---|
9183 | |
---|
9184 | myfwrite(tr->partitionData[model].weightExponents, sizeof(double), 4, f); |
---|
9185 | myfwrite(tr->partitionData[model].weights, sizeof(double), 4, f); |
---|
9186 | |
---|
9187 | myfwrite(tr->partitionData[model].gammaRates, sizeof(double), 4, f); |
---|
9188 | |
---|
9189 | myfwrite(tr->partitionData[model].EIGN, sizeof(double), pLengths[dataType].eignLength, f); |
---|
9190 | myfwrite(tr->partitionData[model].EV, sizeof(double), pLengths[dataType].evLength, f); |
---|
9191 | myfwrite(tr->partitionData[model].EI, sizeof(double), pLengths[dataType].eiLength, f); |
---|
9192 | |
---|
9193 | myfwrite(tr->partitionData[model].frequencies, sizeof(double), pLengths[dataType].frequenciesLength, f); |
---|
9194 | myfwrite(tr->partitionData[model].tipVector, sizeof(double), pLengths[dataType].tipVectorLength, f); |
---|
9195 | myfwrite(tr->partitionData[model].substRates, sizeof(double), pLengths[dataType].substRatesLength, f); |
---|
9196 | myfwrite(&(tr->partitionData[model].alpha), sizeof(double), 1, f); |
---|
9197 | myfwrite(&(tr->partitionData[model].propInvariant), sizeof(double), 1, f); |
---|
9198 | |
---|
9199 | myfwrite(&(tr->partitionData[model].numberOfCategories), sizeof(int), 1, f); |
---|
9200 | |
---|
9201 | myfwrite(&(tr->partitionData[model].protModels), sizeof(int), 1, f); |
---|
9202 | myfwrite(&(tr->partitionData[model].autoProtModels), sizeof(int), 1, f); |
---|
9203 | |
---|
9204 | myfwrite(tr->partitionData[model].perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9205 | myfwrite(tr->partitionData[model].unscaled_perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9206 | |
---|
9207 | writeLG4(tr, model, dataType, f, pLengths); |
---|
9208 | } |
---|
9209 | |
---|
9210 | printBothOpen("\nModel parameters (binary file format) written to: %s\n", binaryModelParamsOutputFileName); |
---|
9211 | |
---|
9212 | fclose(f); |
---|
9213 | } |
---|
9214 | |
---|
9215 | static void myfread(void *ptr, size_t size, size_t nmemb, FILE *stream) |
---|
9216 | { |
---|
9217 | size_t |
---|
9218 | bytes_read; |
---|
9219 | |
---|
9220 | bytes_read = fread(ptr, size, nmemb, stream); |
---|
9221 | |
---|
9222 | assert(bytes_read == nmemb); |
---|
9223 | } |
---|
9224 | |
---|
9225 | |
---|
9226 | static void readLG4(tree *tr, int model, int dataType, FILE *f, partitionLengths pLengths[MAX_MODEL]) |
---|
9227 | { |
---|
9228 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
9229 | { |
---|
9230 | int |
---|
9231 | k; |
---|
9232 | |
---|
9233 | for(k = 0; k < 4; k++) |
---|
9234 | { |
---|
9235 | myfread(tr->partitionData[model].EIGN_LG4[k], sizeof(double), pLengths[dataType].eignLength, f); |
---|
9236 | myfread(tr->partitionData[model].EV_LG4[k], sizeof(double), pLengths[dataType].evLength, f); |
---|
9237 | myfread(tr->partitionData[model].EI_LG4[k], sizeof(double), pLengths[dataType].eiLength, f); |
---|
9238 | myfread(tr->partitionData[model].frequencies_LG4[k], sizeof(double), pLengths[dataType].frequenciesLength, f); |
---|
9239 | myfread(tr->partitionData[model].tipVector_LG4[k], sizeof(double), pLengths[dataType].tipVectorLength, f); |
---|
9240 | myfread(tr->partitionData[model].substRates_LG4[k], sizeof(double), pLengths[dataType].substRatesLength, f); |
---|
9241 | } |
---|
9242 | } |
---|
9243 | } |
---|
9244 | |
---|
9245 | void readBinaryModel(tree *tr) |
---|
9246 | { |
---|
9247 | int |
---|
9248 | model; |
---|
9249 | |
---|
9250 | FILE |
---|
9251 | *f; |
---|
9252 | |
---|
9253 | |
---|
9254 | printBothOpen("\nRAxML is reading a binary model file and not optimizing model params\n"); |
---|
9255 | |
---|
9256 | f = fopen(binaryModelParamsInputFileName, "r"); |
---|
9257 | |
---|
9258 | /* cdta */ |
---|
9259 | |
---|
9260 | myfread(tr->cdta->rateCategory, sizeof(int), (size_t)(tr->rdta->sites + 1), f); |
---|
9261 | myfread(tr->cdta->patrat, sizeof(double), (size_t)(tr->rdta->sites + 1), f); |
---|
9262 | myfread(tr->cdta->patratStored, sizeof(double), (size_t)(tr->rdta->sites + 1), f); |
---|
9263 | |
---|
9264 | /* partition contributions for fracchange */ |
---|
9265 | |
---|
9266 | myfread(tr->partitionContributions, sizeof(double), tr->NumberOfModels, f); |
---|
9267 | |
---|
9268 | /* fracchange */ |
---|
9269 | |
---|
9270 | myfread(&tr->fracchange, sizeof(double), 1, f); |
---|
9271 | myfread(tr->fracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9272 | |
---|
9273 | myfread(&tr->rawFracchange, sizeof(double), 1, f); |
---|
9274 | myfread(tr->rawFracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9275 | |
---|
9276 | /* pInfo */ |
---|
9277 | |
---|
9278 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
9279 | { |
---|
9280 | int |
---|
9281 | dataType = tr->partitionData[model].dataType; |
---|
9282 | |
---|
9283 | myfread(tr->partitionData[model].weightExponents, sizeof(double), 4, f); |
---|
9284 | myfread(tr->partitionData[model].weights, sizeof(double), 4, f); |
---|
9285 | |
---|
9286 | myfread(tr->partitionData[model].gammaRates, sizeof(double), 4, f); |
---|
9287 | |
---|
9288 | myfread(tr->partitionData[model].EIGN, sizeof(double), (size_t)(pLengths[dataType].eignLength), f); |
---|
9289 | myfread(tr->partitionData[model].EV, sizeof(double), (size_t)(pLengths[dataType].evLength), f); |
---|
9290 | myfread(tr->partitionData[model].EI, sizeof(double), (size_t)(pLengths[dataType].eiLength), f); |
---|
9291 | |
---|
9292 | myfread(tr->partitionData[model].frequencies, sizeof(double), (size_t)(pLengths[dataType].frequenciesLength), f); |
---|
9293 | myfread(tr->partitionData[model].tipVector, sizeof(double), (size_t)(pLengths[dataType].tipVectorLength), f); |
---|
9294 | myfread(tr->partitionData[model].substRates, sizeof(double), (size_t)(pLengths[dataType].substRatesLength), f); |
---|
9295 | |
---|
9296 | myfread(&(tr->partitionData[model].alpha), sizeof(double), 1, f); |
---|
9297 | myfread(&(tr->partitionData[model].propInvariant), sizeof(double), 1, f); |
---|
9298 | |
---|
9299 | myfread(&(tr->partitionData[model].numberOfCategories), sizeof(int), 1, f); |
---|
9300 | |
---|
9301 | myfread(&(tr->partitionData[model].protModels), sizeof(int), 1, f); |
---|
9302 | myfread(&(tr->partitionData[model].autoProtModels), sizeof(int), 1, f); |
---|
9303 | |
---|
9304 | myfread(tr->partitionData[model].perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9305 | myfread(tr->partitionData[model].unscaled_perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9306 | |
---|
9307 | readLG4(tr, model, dataType, f, pLengths); |
---|
9308 | } |
---|
9309 | |
---|
9310 | #ifdef _USE_PTHREADS |
---|
9311 | masterBarrier(THREAD_COPY_INIT_MODEL, tr); |
---|
9312 | //masterBarrier(THREAD_RESET_MODEL, tr); |
---|
9313 | #endif |
---|
9314 | |
---|
9315 | if(tr->rateHetModel == CAT) |
---|
9316 | { |
---|
9317 | #ifdef _USE_PTHREADS |
---|
9318 | masterBarrier(THREAD_COPY_RATE_CATS, tr); |
---|
9319 | #else |
---|
9320 | { |
---|
9321 | size_t |
---|
9322 | i; |
---|
9323 | int |
---|
9324 | model; |
---|
9325 | |
---|
9326 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
9327 | { |
---|
9328 | int |
---|
9329 | localCounter = 0; |
---|
9330 | |
---|
9331 | for(i = tr->partitionData[model].lower; i < tr->partitionData[model].upper; i++, localCounter++) |
---|
9332 | tr->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[i]; |
---|
9333 | } |
---|
9334 | } |
---|
9335 | #endif |
---|
9336 | } |
---|
9337 | |
---|
9338 | fclose(f); |
---|
9339 | } |
---|
9340 | |
---|
9341 | |
---|
9342 | |
---|
9343 | |
---|
9344 | static int iterated_bitcount(unsigned int n) |
---|
9345 | { |
---|
9346 | int |
---|
9347 | count=0; |
---|
9348 | |
---|
9349 | while(n) |
---|
9350 | { |
---|
9351 | count += n & 0x1u ; |
---|
9352 | n >>= 1 ; |
---|
9353 | } |
---|
9354 | |
---|
9355 | return count; |
---|
9356 | } |
---|
9357 | |
---|
9358 | static char bits_in_16bits [0x1u << 16]; |
---|
9359 | |
---|
9360 | static void compute_bits_in_16bits(void) |
---|
9361 | { |
---|
9362 | unsigned int i; |
---|
9363 | |
---|
9364 | assert(sizeof(unsigned int) == 4); |
---|
9365 | |
---|
9366 | for (i = 0; i < (0x1u<<16); i++) |
---|
9367 | bits_in_16bits[i] = iterated_bitcount(i); |
---|
9368 | |
---|
9369 | return ; |
---|
9370 | } |
---|
9371 | |
---|
9372 | unsigned int precomputed16_bitcount (unsigned int n) |
---|
9373 | { |
---|
9374 | /* works only for 32-bit int*/ |
---|
9375 | |
---|
9376 | return bits_in_16bits [n & 0xffffu] |
---|
9377 | + bits_in_16bits [(n >> 16) & 0xffffu] ; |
---|
9378 | } |
---|
9379 | |
---|
9380 | /* functions to compute likelihoods on quartets */ |
---|
9381 | |
---|
9382 | |
---|
9383 | /* a parser error function */ |
---|
9384 | |
---|
9385 | static void parseError(int c) |
---|
9386 | { |
---|
9387 | printf("Quartet grouping parser expecting symbol: %c\n", c); |
---|
9388 | assert(0); |
---|
9389 | } |
---|
9390 | |
---|
9391 | /* parser for the taxon grouping format, one has to specify 4 groups in a newick-like |
---|
9392 | format from which quartets (a substantially smaller number compared to ungrouped quartets) |
---|
9393 | will be drawn */ |
---|
9394 | |
---|
9395 | static void groupingParser(char *quartetGroupFileName, int *groups[4], int groupSize[4], tree *tr) |
---|
9396 | { |
---|
9397 | FILE |
---|
9398 | *f = myfopen(quartetGroupFileName, "r"); |
---|
9399 | |
---|
9400 | int |
---|
9401 | taxonCounter = 0, |
---|
9402 | n, |
---|
9403 | state = 0, |
---|
9404 | groupCounter = 0, |
---|
9405 | ch, |
---|
9406 | i; |
---|
9407 | |
---|
9408 | printf("%s\n", quartetGroupFileName); |
---|
9409 | |
---|
9410 | for(i = 0; i < 4; i++) |
---|
9411 | { |
---|
9412 | groups[i] = (int*)rax_malloc(sizeof(int) * (tr->mxtips + 1)); |
---|
9413 | groupSize[i] = 0; |
---|
9414 | } |
---|
9415 | |
---|
9416 | while((ch = getc(f)) != EOF) |
---|
9417 | { |
---|
9418 | if(!whitechar(ch)) |
---|
9419 | { |
---|
9420 | switch(state) |
---|
9421 | { |
---|
9422 | case 0: |
---|
9423 | if(ch != '(') |
---|
9424 | parseError('('); |
---|
9425 | state = 1; |
---|
9426 | break; |
---|
9427 | case 1: |
---|
9428 | ungetc(ch, f); |
---|
9429 | n = treeFindTipName(f, tr, FALSE); |
---|
9430 | if(n <= 0 || n > tr->mxtips) |
---|
9431 | printf("parsing error, raxml is expecting to read a taxon name, found \"%c\" instead\n", ch); |
---|
9432 | assert(n > 0 && n <= tr->mxtips); |
---|
9433 | taxonCounter++; |
---|
9434 | groups[groupCounter][groupSize[groupCounter]] = n; |
---|
9435 | groupSize[groupCounter] = groupSize[groupCounter] + 1; |
---|
9436 | state = 2; |
---|
9437 | break; |
---|
9438 | case 2: |
---|
9439 | if(ch == ',') |
---|
9440 | state = 1; |
---|
9441 | else |
---|
9442 | { |
---|
9443 | if(ch == ')') |
---|
9444 | { |
---|
9445 | groupCounter++; |
---|
9446 | state = 3; |
---|
9447 | } |
---|
9448 | else |
---|
9449 | parseError('?'); |
---|
9450 | } |
---|
9451 | break; |
---|
9452 | case 3: |
---|
9453 | if(groupCounter == 4) |
---|
9454 | { |
---|
9455 | if(ch == ';') |
---|
9456 | state = 4; |
---|
9457 | else |
---|
9458 | parseError(';'); |
---|
9459 | } |
---|
9460 | else |
---|
9461 | { |
---|
9462 | if(ch != ',') |
---|
9463 | parseError(','); |
---|
9464 | state = 0; |
---|
9465 | } |
---|
9466 | break; |
---|
9467 | case 4: |
---|
9468 | printf("Error: extra char after ; %c\n", ch); |
---|
9469 | assert(0); |
---|
9470 | default: |
---|
9471 | assert(0); |
---|
9472 | } |
---|
9473 | } |
---|
9474 | } |
---|
9475 | |
---|
9476 | assert(state == 4); |
---|
9477 | assert(groupCounter == 4); |
---|
9478 | assert(taxonCounter == tr->mxtips); |
---|
9479 | |
---|
9480 | printBothOpen("Successfully parsed quartet groups\n\n"); |
---|
9481 | |
---|
9482 | /* print out the taxa that have been assigned to the 4 groups */ |
---|
9483 | |
---|
9484 | for(i = 0; i < 4; i++) |
---|
9485 | { |
---|
9486 | int |
---|
9487 | j; |
---|
9488 | |
---|
9489 | printBothOpen("group %d has %d members\n", i, groupSize[i]); |
---|
9490 | |
---|
9491 | for(j = 0; j < groupSize[i]; j++) |
---|
9492 | printBothOpen("%s\n", tr->nameList[groups[i][j]]); |
---|
9493 | |
---|
9494 | printBothOpen("\n"); |
---|
9495 | } |
---|
9496 | |
---|
9497 | fclose(f); |
---|
9498 | } |
---|
9499 | |
---|
9500 | |
---|
9501 | static double quartetLikelihood(tree *tr, nodeptr p1, nodeptr p2, nodeptr p3, nodeptr p4, nodeptr q1, nodeptr q2) |
---|
9502 | { |
---|
9503 | /* |
---|
9504 | build a quartet tree, where q1 and q2 are the inner nodes and p1, p2, p3, p4 |
---|
9505 | are the tips of the quartet where the sequence data is located. |
---|
9506 | |
---|
9507 | initially set all branch lengths to the default value. |
---|
9508 | */ |
---|
9509 | |
---|
9510 | /* |
---|
9511 | for the tree and node data structure used, please see one of the last chapter's of Joe |
---|
9512 | Felsensteins book. |
---|
9513 | */ |
---|
9514 | |
---|
9515 | hookupDefault(q1, q2, tr->numBranches); |
---|
9516 | |
---|
9517 | hookupDefault(q1->next, p1, tr->numBranches); |
---|
9518 | hookupDefault(q1->next->next, p2, tr->numBranches); |
---|
9519 | |
---|
9520 | hookupDefault(q2->next, p3, tr->numBranches); |
---|
9521 | hookupDefault(q2->next->next, p4, tr->numBranches); |
---|
9522 | |
---|
9523 | /* now compute the likelihood vectors at the two inner nodes of the tree, |
---|
9524 | here the virtual root is located between the two inner nodes q1 and q2. |
---|
9525 | */ |
---|
9526 | |
---|
9527 | newviewGeneric(tr, q1); |
---|
9528 | newviewGeneric(tr, q2); |
---|
9529 | |
---|
9530 | /* call a function that is also used for NNIs that iteratively optimizes all |
---|
9531 | 5 branch lengths in the tree. |
---|
9532 | |
---|
9533 | Note that 16 is an important tuning parameter, this integer value determines |
---|
9534 | how many times we visit all branches until we give up further optimizing the branch length |
---|
9535 | configuration. |
---|
9536 | */ |
---|
9537 | |
---|
9538 | nniSmooth(tr, q1, 16); |
---|
9539 | |
---|
9540 | /* now compute the log likelihood of the tree for the virtual root located between inner nodes q1 and q2 */ |
---|
9541 | |
---|
9542 | /* debugging code |
---|
9543 | { |
---|
9544 | double l; |
---|
9545 | */ |
---|
9546 | |
---|
9547 | evaluateGeneric(tr, q1->back->next->next); |
---|
9548 | |
---|
9549 | /* debugging code |
---|
9550 | |
---|
9551 | l = tr->likelihood; |
---|
9552 | |
---|
9553 | newviewGeneric(tr, q1); |
---|
9554 | newviewGeneric(tr, q2); |
---|
9555 | evaluateGeneric(tr, q1); |
---|
9556 | |
---|
9557 | |
---|
9558 | assert(ABS(l - tr->likelihood) < 0.00001); |
---|
9559 | } |
---|
9560 | */ |
---|
9561 | |
---|
9562 | return (tr->likelihood); |
---|
9563 | } |
---|
9564 | |
---|
9565 | #ifdef _QUARTET_MPI |
---|
9566 | |
---|
9567 | typedef struct |
---|
9568 | { |
---|
9569 | int a1; |
---|
9570 | int b1; |
---|
9571 | int c1; |
---|
9572 | int d1; |
---|
9573 | |
---|
9574 | int a2; |
---|
9575 | int b2; |
---|
9576 | int c2; |
---|
9577 | int d2; |
---|
9578 | |
---|
9579 | int a3; |
---|
9580 | int b3; |
---|
9581 | int c3; |
---|
9582 | int d3; |
---|
9583 | |
---|
9584 | double l1; |
---|
9585 | double l2; |
---|
9586 | double l3; |
---|
9587 | } quartetResult; |
---|
9588 | |
---|
9589 | #define QUARTET_MESSAGE_SIZE sizeof(quartetResult) |
---|
9590 | #define QUARTET_MESSAGE 0 |
---|
9591 | #define I_AM_DONE 1 |
---|
9592 | |
---|
9593 | static void startQuartetMaster(tree *tr, FILE *f) |
---|
9594 | { |
---|
9595 | quartetResult |
---|
9596 | *qr = (quartetResult *)rax_malloc(sizeof(quartetResult)); |
---|
9597 | |
---|
9598 | MPI_Status |
---|
9599 | status, |
---|
9600 | recvStatus; |
---|
9601 | |
---|
9602 | int |
---|
9603 | dummy, |
---|
9604 | workersDone = 0; |
---|
9605 | |
---|
9606 | assert(processID == 0); |
---|
9607 | |
---|
9608 | while(1) |
---|
9609 | { |
---|
9610 | MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); |
---|
9611 | |
---|
9612 | switch(status.MPI_TAG) |
---|
9613 | { |
---|
9614 | case QUARTET_MESSAGE: |
---|
9615 | MPI_Recv((void *)(qr), QUARTET_MESSAGE_SIZE, MPI_BYTE, status.MPI_SOURCE, QUARTET_MESSAGE, MPI_COMM_WORLD, &recvStatus); |
---|
9616 | fprintf(f, "%d %d | %d %d: %f\n", qr->a1, qr->b1, qr->c1, qr->d1, qr->l1); |
---|
9617 | fprintf(f, "%d %d | %d %d: %f\n", qr->a2, qr->b2, qr->c2, qr->d2, qr->l2); |
---|
9618 | fprintf(f, "%d %d | %d %d: %f\n", qr->a3, qr->b3, qr->c3, qr->d3, qr->l3); |
---|
9619 | break; |
---|
9620 | case I_AM_DONE: |
---|
9621 | MPI_Recv(&dummy, 1, MPI_INT, status.MPI_SOURCE, I_AM_DONE, MPI_COMM_WORLD, &recvStatus); |
---|
9622 | workersDone++; |
---|
9623 | if(workersDone == processes -1) |
---|
9624 | goto END_IT; |
---|
9625 | break; |
---|
9626 | default: |
---|
9627 | assert(0); |
---|
9628 | } |
---|
9629 | } |
---|
9630 | |
---|
9631 | END_IT: |
---|
9632 | rax_free(qr); |
---|
9633 | return; |
---|
9634 | } |
---|
9635 | |
---|
9636 | #endif |
---|
9637 | |
---|
9638 | static void computeAllThreeQuartets(tree *tr, nodeptr q1, nodeptr q2, int t1, int t2, int t3, int t4, FILE *f) |
---|
9639 | { |
---|
9640 | /* set the tip nodes to different sequences |
---|
9641 | with the tip indices t1, t2, t3, t4 */ |
---|
9642 | |
---|
9643 | nodeptr |
---|
9644 | p1 = tr->nodep[t1], |
---|
9645 | p2 = tr->nodep[t2], |
---|
9646 | p3 = tr->nodep[t3], |
---|
9647 | p4 = tr->nodep[t4]; |
---|
9648 | |
---|
9649 | double |
---|
9650 | l; |
---|
9651 | |
---|
9652 | #ifdef _QUARTET_MPI |
---|
9653 | quartetResult |
---|
9654 | *qr = (quartetResult *)rax_malloc(sizeof(quartetResult)); |
---|
9655 | #endif |
---|
9656 | |
---|
9657 | /* first quartet */ |
---|
9658 | |
---|
9659 | /* compute the likelihood of tree ((p1, p2), (p3, p4)) */ |
---|
9660 | |
---|
9661 | l = quartetLikelihood(tr, p1, p2, p3, p4, q1, q2); |
---|
9662 | |
---|
9663 | #ifndef _QUARTET_MPI |
---|
9664 | fprintf(f, "%d %d | %d %d: %f\n", p1->number, p2->number, p3->number, p4->number, l); |
---|
9665 | #else |
---|
9666 | qr->a1 = p1->number; |
---|
9667 | qr->b1 = p2->number; |
---|
9668 | qr->c1 = p3->number; |
---|
9669 | qr->d1 = p4->number; |
---|
9670 | qr->l1 = l; |
---|
9671 | #endif |
---|
9672 | /* second quartet */ |
---|
9673 | |
---|
9674 | /* compute the likelihood of tree ((p1, p3), (p2, p4)) */ |
---|
9675 | |
---|
9676 | l = quartetLikelihood(tr, p1, p3, p2, p4, q1, q2); |
---|
9677 | |
---|
9678 | #ifndef _QUARTET_MPI |
---|
9679 | fprintf(f, "%d %d | %d %d: %f\n", p1->number, p3->number, p2->number, p4->number, l); |
---|
9680 | #else |
---|
9681 | qr->a2 = p1->number; |
---|
9682 | qr->b2 = p3->number; |
---|
9683 | qr->c2 = p2->number; |
---|
9684 | qr->d2 = p4->number; |
---|
9685 | qr->l2 = l; |
---|
9686 | #endif |
---|
9687 | /* third quartet */ |
---|
9688 | |
---|
9689 | /* compute the likelihood of tree ((p1, p4), (p2, p3)) */ |
---|
9690 | |
---|
9691 | l = quartetLikelihood(tr, p1, p4, p2, p3, q1, q2); |
---|
9692 | |
---|
9693 | #ifndef _QUARTET_MPI |
---|
9694 | fprintf(f, "%d %d | %d %d: %f\n", p1->number, p4->number, p2->number, p3->number, l); |
---|
9695 | #else |
---|
9696 | qr->a3 = p1->number; |
---|
9697 | qr->b3 = p4->number; |
---|
9698 | qr->c3 = p2->number; |
---|
9699 | qr->d3 = p3->number; |
---|
9700 | qr->l3 = l; |
---|
9701 | |
---|
9702 | MPI_Send((void *)qr, QUARTET_MESSAGE_SIZE, MPI_BYTE, 0, QUARTET_MESSAGE, MPI_COMM_WORLD); |
---|
9703 | |
---|
9704 | assert(processID > 0); |
---|
9705 | rax_free(qr); |
---|
9706 | #endif |
---|
9707 | } |
---|
9708 | |
---|
9709 | /* the three quartet options: all quartets, randomly sub-sample a certain number n of quartets, |
---|
9710 | subsample all quartets from 4 pre-defined groups of quartets */ |
---|
9711 | |
---|
9712 | #define ALL_QUARTETS 0 |
---|
9713 | #define RANDOM_QUARTETS 1 |
---|
9714 | #define GROUPED_QUARTETS 2 |
---|
9715 | |
---|
9716 | |
---|
9717 | |
---|
9718 | static void computeQuartets(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
9719 | { |
---|
9720 | /* some indices for generating quartets in an arbitrary way */ |
---|
9721 | |
---|
9722 | int |
---|
9723 | flavor = ALL_QUARTETS, |
---|
9724 | i, |
---|
9725 | t1, |
---|
9726 | t2, |
---|
9727 | t3, |
---|
9728 | t4, |
---|
9729 | *groups[4], |
---|
9730 | groupSize[4]; |
---|
9731 | |
---|
9732 | double |
---|
9733 | fraction = 0.0, |
---|
9734 | t; |
---|
9735 | |
---|
9736 | unsigned long int |
---|
9737 | randomQuartets = (unsigned long int)(adef->multipleRuns), |
---|
9738 | quartetCounter = 0, |
---|
9739 | numberOfQuartets = ((unsigned long int)tr->mxtips * ((unsigned long int)tr->mxtips - 1) * ((unsigned long int)tr->mxtips - 2) * ((unsigned long int)tr->mxtips - 3)) / 24; |
---|
9740 | |
---|
9741 | /* use two inner nodes for building quartet trees */ |
---|
9742 | |
---|
9743 | nodeptr |
---|
9744 | q1 = tr->nodep[tr->mxtips + 1], |
---|
9745 | q2 = tr->nodep[tr->mxtips + 2]; |
---|
9746 | |
---|
9747 | char |
---|
9748 | quartetFileName[1024]; |
---|
9749 | |
---|
9750 | FILE |
---|
9751 | *f; |
---|
9752 | |
---|
9753 | /* build output file name */ |
---|
9754 | |
---|
9755 | strcpy(quartetFileName, workdir); |
---|
9756 | strcat(quartetFileName, "RAxML_quartets."); |
---|
9757 | strcat(quartetFileName, run_id); |
---|
9758 | |
---|
9759 | /* open output file */ |
---|
9760 | |
---|
9761 | |
---|
9762 | |
---|
9763 | #ifdef _QUARTET_MPI |
---|
9764 | if(processID == 0) |
---|
9765 | #endif |
---|
9766 | f = myfopen(quartetFileName, "w"); |
---|
9767 | |
---|
9768 | /* initialize model parameters */ |
---|
9769 | |
---|
9770 | initModel(tr, rdta, cdta, adef); |
---|
9771 | |
---|
9772 | |
---|
9773 | |
---|
9774 | if(!adef->useBinaryModelFile) |
---|
9775 | { |
---|
9776 | #ifdef _QUARTET_MPI |
---|
9777 | assert(0); |
---|
9778 | #endif |
---|
9779 | |
---|
9780 | /* get a starting tree: either reads in a tree or computes a randomized stepwise addition parsimony tree */ |
---|
9781 | |
---|
9782 | getStartingTree(tr, adef); |
---|
9783 | |
---|
9784 | /* optimize model parameters on that comprehensive tree that can subsequently be used for qyartet building */ |
---|
9785 | |
---|
9786 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
9787 | |
---|
9788 | printBothOpen("Time for parsing input tree or building parsimony tree and optimizing model parameters: %f\n\n", gettime() - masterTime); |
---|
9789 | } |
---|
9790 | else |
---|
9791 | { |
---|
9792 | readBinaryModel(tr); |
---|
9793 | |
---|
9794 | printBothOpen("Time for reading model parameters: %f\n\n", gettime() - masterTime); |
---|
9795 | } |
---|
9796 | |
---|
9797 | |
---|
9798 | /* figure out which flavor of quartets we want to compute */ |
---|
9799 | |
---|
9800 | if(adef->useQuartetGrouping) |
---|
9801 | { |
---|
9802 | flavor = GROUPED_QUARTETS; |
---|
9803 | groupingParser(quartetGroupingFileName, groups, groupSize, tr); |
---|
9804 | } |
---|
9805 | else |
---|
9806 | { |
---|
9807 | if(randomQuartets > numberOfQuartets) |
---|
9808 | randomQuartets = 1; |
---|
9809 | |
---|
9810 | if(randomQuartets == 1) |
---|
9811 | flavor = ALL_QUARTETS; |
---|
9812 | else |
---|
9813 | { |
---|
9814 | fraction = (double)randomQuartets / (double)numberOfQuartets; |
---|
9815 | flavor = RANDOM_QUARTETS; |
---|
9816 | } |
---|
9817 | } |
---|
9818 | |
---|
9819 | /* print some output on what we are doing*/ |
---|
9820 | |
---|
9821 | switch(flavor) |
---|
9822 | { |
---|
9823 | case ALL_QUARTETS: |
---|
9824 | printBothOpen("There are %u quartet sets for which RAxML will evaluate all %u quartet trees\n", numberOfQuartets, numberOfQuartets * 3); |
---|
9825 | break; |
---|
9826 | case RANDOM_QUARTETS: |
---|
9827 | printBothOpen("There are %u quartet sets for which RAxML will randomly sub-sambple %u sets (%f per cent), i.e., compute %u quartet trees\n", numberOfQuartets, randomQuartets, 100 * fraction, randomQuartets * 3); |
---|
9828 | break; |
---|
9829 | case GROUPED_QUARTETS: |
---|
9830 | printBothOpen("There are 4 quartet groups from which RAxML will evaluate all %u quartet trees\n", (unsigned int)groupSize[0] * (unsigned int)groupSize[1] * (unsigned int)groupSize[2] * (unsigned int)groupSize[3] * 3); |
---|
9831 | break; |
---|
9832 | default: |
---|
9833 | assert(0); |
---|
9834 | } |
---|
9835 | |
---|
9836 | /* print taxon name to taxon number correspondance table to output file */ |
---|
9837 | #ifdef _QUARTET_MPI |
---|
9838 | if(processID == 0) |
---|
9839 | #endif |
---|
9840 | { |
---|
9841 | fprintf(f, "Taxon names and indices:\n\n"); |
---|
9842 | |
---|
9843 | for(i = 1; i <= tr->mxtips; i++) |
---|
9844 | { |
---|
9845 | fprintf(f, "%s %d\n", tr->nameList[i], i); |
---|
9846 | assert(tr->nodep[i]->number == i); |
---|
9847 | } |
---|
9848 | |
---|
9849 | fprintf(f, "\n\n"); |
---|
9850 | } |
---|
9851 | |
---|
9852 | |
---|
9853 | t = gettime(); |
---|
9854 | |
---|
9855 | /* do a loop to generate some quartets to test. |
---|
9856 | note that tip nodes/sequences in RAxML are indexed from 1,...,n |
---|
9857 | and not from 0,...,n-1 as one might expect |
---|
9858 | |
---|
9859 | tr->mxtips is the maximum number of tips in the alignment/tree |
---|
9860 | */ |
---|
9861 | |
---|
9862 | #ifdef _QUARTET_MPI |
---|
9863 | if(processID > 0) |
---|
9864 | #endif |
---|
9865 | { |
---|
9866 | switch(flavor) |
---|
9867 | { |
---|
9868 | case ALL_QUARTETS: |
---|
9869 | { |
---|
9870 | assert(randomQuartets == 1); |
---|
9871 | |
---|
9872 | /* compute all possible quartets */ |
---|
9873 | |
---|
9874 | for(t1 = 1; t1 <= tr->mxtips; t1++) |
---|
9875 | for(t2 = t1 + 1; t2 <= tr->mxtips; t2++) |
---|
9876 | for(t3 = t2 + 1; t3 <= tr->mxtips; t3++) |
---|
9877 | for(t4 = t3 + 1; t4 <= tr->mxtips; t4++) |
---|
9878 | { |
---|
9879 | #ifdef _QUARTET_MPI |
---|
9880 | if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1)) |
---|
9881 | #endif |
---|
9882 | computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f); |
---|
9883 | quartetCounter++; |
---|
9884 | } |
---|
9885 | |
---|
9886 | assert(quartetCounter == numberOfQuartets); |
---|
9887 | } |
---|
9888 | break; |
---|
9889 | case RANDOM_QUARTETS: |
---|
9890 | { |
---|
9891 | /* randomly sub-sample a fraction of all quartets */ |
---|
9892 | |
---|
9893 | for(t1 = 1; t1 <= tr->mxtips; t1++) |
---|
9894 | for(t2 = t1 + 1; t2 <= tr->mxtips; t2++) |
---|
9895 | for(t3 = t2 + 1; t3 <= tr->mxtips; t3++) |
---|
9896 | for(t4 = t3 + 1; t4 <= tr->mxtips; t4++) |
---|
9897 | { |
---|
9898 | double |
---|
9899 | r = randum(&adef->parsimonySeed); |
---|
9900 | |
---|
9901 | if(r < fraction) |
---|
9902 | { |
---|
9903 | #ifdef _QUARTET_MPI |
---|
9904 | if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1)) |
---|
9905 | #endif |
---|
9906 | computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f); |
---|
9907 | quartetCounter++; |
---|
9908 | } |
---|
9909 | |
---|
9910 | if(quartetCounter == randomQuartets) |
---|
9911 | goto DONE; |
---|
9912 | } |
---|
9913 | |
---|
9914 | DONE: |
---|
9915 | assert(quartetCounter == randomQuartets); |
---|
9916 | } |
---|
9917 | break; |
---|
9918 | case GROUPED_QUARTETS: |
---|
9919 | { |
---|
9920 | /* compute all quartets that can be built out of the four pre-defined groups */ |
---|
9921 | |
---|
9922 | for(t1 = 0; t1 < groupSize[0]; t1++) |
---|
9923 | for(t2 = 0; t2 < groupSize[1]; t2++) |
---|
9924 | for(t3 = 0; t3 < groupSize[2]; t3++) |
---|
9925 | for(t4 = 0; t4 < groupSize[3]; t4++) |
---|
9926 | { |
---|
9927 | int |
---|
9928 | i1 = groups[0][t1], |
---|
9929 | i2 = groups[1][t2], |
---|
9930 | i3 = groups[2][t3], |
---|
9931 | i4 = groups[3][t4]; |
---|
9932 | |
---|
9933 | #ifdef _QUARTET_MPI |
---|
9934 | if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1)) |
---|
9935 | #endif |
---|
9936 | computeAllThreeQuartets(tr, q1, q2, i1, i2, i3, i4, f); |
---|
9937 | quartetCounter++; |
---|
9938 | } |
---|
9939 | |
---|
9940 | printBothOpen("\nComputed all %u possible grouped quartets\n", quartetCounter); |
---|
9941 | } |
---|
9942 | break; |
---|
9943 | default: |
---|
9944 | assert(0); |
---|
9945 | } |
---|
9946 | } |
---|
9947 | #ifdef _QUARTET_MPI |
---|
9948 | if(processID == 0) |
---|
9949 | startQuartetMaster(tr, f); |
---|
9950 | else |
---|
9951 | { |
---|
9952 | int |
---|
9953 | dummy; |
---|
9954 | |
---|
9955 | MPI_Send(&dummy, 1, MPI_INT, 0, I_AM_DONE, MPI_COMM_WORLD); |
---|
9956 | } |
---|
9957 | #endif |
---|
9958 | |
---|
9959 | t = gettime() - t; |
---|
9960 | |
---|
9961 | printBothOpen("\nPure quartet computation time: %f secs\n", t); |
---|
9962 | |
---|
9963 | printBothOpen("\nAll quartets and corresponding likelihoods written to file %s\n", quartetFileName); |
---|
9964 | |
---|
9965 | #ifdef _QUARTET_MPI |
---|
9966 | if(processID == 0) |
---|
9967 | #endif |
---|
9968 | fclose(f); |
---|
9969 | } |
---|
9970 | |
---|
9971 | static void thoroughTreeOptimization(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
9972 | { |
---|
9973 | char |
---|
9974 | bestTreeFileName[1024]; |
---|
9975 | |
---|
9976 | FILE |
---|
9977 | *f; |
---|
9978 | |
---|
9979 | initModel(tr, rdta, cdta, adef); |
---|
9980 | |
---|
9981 | getStartingTree(tr, adef); |
---|
9982 | |
---|
9983 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
9984 | |
---|
9985 | Thorough = 1; |
---|
9986 | tr->doCutoff = FALSE; |
---|
9987 | |
---|
9988 | printBothOpen("\nStart likelihood: %f\n\n", tr->likelihood); |
---|
9989 | |
---|
9990 | treeOptimizeThorough(tr, 1, 10); |
---|
9991 | evaluateGenericInitrav(tr, tr->start); |
---|
9992 | |
---|
9993 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
9994 | |
---|
9995 | printBothOpen("End likelihood: %f\n\n", tr->likelihood); |
---|
9996 | |
---|
9997 | printModelParams(tr, adef); |
---|
9998 | |
---|
9999 | strcpy(bestTreeFileName, workdir); |
---|
10000 | strcat(bestTreeFileName, "RAxML_bestTree."); |
---|
10001 | strcat(bestTreeFileName, run_id); |
---|
10002 | |
---|
10003 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
10004 | f = myfopen(bestTreeFileName, "wb"); |
---|
10005 | fprintf(f, "%s", tr->tree_string); |
---|
10006 | fclose(f); |
---|
10007 | |
---|
10008 | printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName); |
---|
10009 | } |
---|
10010 | |
---|
10011 | static void evaluateSD(tree *tr, double bestLH, double *bestVector, double weightSum, int configuration, int i, FILE *f) |
---|
10012 | { |
---|
10013 | double |
---|
10014 | sum = 0.0, |
---|
10015 | sum2 = 0.0, |
---|
10016 | sd, |
---|
10017 | currentLH; |
---|
10018 | |
---|
10019 | int |
---|
10020 | k; |
---|
10021 | |
---|
10022 | evaluateGenericInitrav(tr, tr->start); |
---|
10023 | evaluateGenericVector(tr, tr->start); |
---|
10024 | |
---|
10025 | currentLH = tr->likelihood; |
---|
10026 | |
---|
10027 | printBothOpen("Configuration %d Likelihood: %f\n", configuration, tr->likelihood); |
---|
10028 | |
---|
10029 | fprintf(f, "tr%d\t", configuration); |
---|
10030 | |
---|
10031 | if(currentLH > bestLH) |
---|
10032 | printBothOpen("WARNING tree with ancestral sequence taxon %s has a better likelihood %f > %f than the reference tree!\n", tr->nameList[i], currentLH, bestLH); |
---|
10033 | |
---|
10034 | for (k = 0; k < tr->cdta->endsite; k++) |
---|
10035 | { |
---|
10036 | int |
---|
10037 | w; |
---|
10038 | |
---|
10039 | double |
---|
10040 | temp = bestVector[k] - tr->perSiteLL[k], |
---|
10041 | wtemp = tr->cdta->aliaswgt[k] * temp; |
---|
10042 | |
---|
10043 | for(w = 0; w < tr->cdta->aliaswgt[k]; w++) |
---|
10044 | fprintf(f, "%f ", tr->perSiteLL[k]); |
---|
10045 | |
---|
10046 | sum += wtemp; |
---|
10047 | sum2 += wtemp * temp; |
---|
10048 | } |
---|
10049 | |
---|
10050 | fprintf(f, "\n"); |
---|
10051 | |
---|
10052 | sd = sqrt( weightSum * (sum2 - sum * sum / weightSum) / (weightSum - 1) ); |
---|
10053 | |
---|
10054 | printBothOpen("Ancestral Taxon: %s Likelihood: %f D(LH): %f SD: %f \nSignificantly Worse: %s (5%s), %s (2%s), %s (1%s)\n", |
---|
10055 | tr->nameList[i], currentLH, currentLH - bestLH, sd, |
---|
10056 | (sum > 1.95996 * sd) ? "Yes" : " No", "%", |
---|
10057 | (sum > 2.326 * sd) ? "Yes" : " No", "%", |
---|
10058 | (sum > 2.57583 * sd) ? "Yes" : " No", "%"); |
---|
10059 | |
---|
10060 | printBothOpen("\n"); |
---|
10061 | } |
---|
10062 | |
---|
10063 | static void ancestralSequenceTest(tree *tr) |
---|
10064 | { |
---|
10065 | FILE |
---|
10066 | *f = myfopen(quartetGroupingFileName, "r"); |
---|
10067 | |
---|
10068 | int |
---|
10069 | ch, |
---|
10070 | i, |
---|
10071 | *candidateAncestorList = (int *)rax_calloc((tr->mxtips + 1), sizeof(int)), |
---|
10072 | numberOfCandidateAncestors = 0; |
---|
10073 | |
---|
10074 | double |
---|
10075 | bestLH, |
---|
10076 | weightSum = 0.0, |
---|
10077 | *bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
10078 | |
---|
10079 | assert(tr->useFastScaling == FALSE); |
---|
10080 | |
---|
10081 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
10082 | weightSum += (double)(tr->cdta->aliaswgt[i]); |
---|
10083 | |
---|
10084 | evaluateGenericInitrav(tr, tr->start); |
---|
10085 | evaluateGenericVector(tr, tr->start); |
---|
10086 | |
---|
10087 | bestLH = tr->likelihood; |
---|
10088 | |
---|
10089 | memcpy(bestVector, tr->perSiteLL, tr->cdta->endsite * sizeof(double)); |
---|
10090 | |
---|
10091 | printBothOpen("Likelihood of reference tree: %f\n\n\n", tr->likelihood); |
---|
10092 | |
---|
10093 | while((ch = getc(f)) != EOF) |
---|
10094 | { |
---|
10095 | if(!whitechar(ch)) |
---|
10096 | { |
---|
10097 | int |
---|
10098 | n; |
---|
10099 | |
---|
10100 | ungetc(ch, f); |
---|
10101 | |
---|
10102 | n = treeFindTipName(f, tr, FALSE); |
---|
10103 | |
---|
10104 | if(n <= 0 || n > tr->mxtips) |
---|
10105 | printf("parsing error, raxml is expecting to read a taxon name that is contained in the reference tree you passed!\n"); |
---|
10106 | |
---|
10107 | assert(n > 0 && n <= tr->mxtips); |
---|
10108 | |
---|
10109 | candidateAncestorList[n] = 1; |
---|
10110 | numberOfCandidateAncestors++; |
---|
10111 | } |
---|
10112 | } |
---|
10113 | |
---|
10114 | fclose(f); |
---|
10115 | |
---|
10116 | for(i = 1; i <= tr->mxtips; i++) |
---|
10117 | { |
---|
10118 | if(candidateAncestorList[i]) |
---|
10119 | { |
---|
10120 | nodeptr |
---|
10121 | p = tr->nodep[i], |
---|
10122 | q = p->back, |
---|
10123 | l = q->next, |
---|
10124 | r = q->next->next; |
---|
10125 | |
---|
10126 | int |
---|
10127 | k; |
---|
10128 | |
---|
10129 | double |
---|
10130 | attachmentBranch[NUM_BRANCHES], |
---|
10131 | leftBranch[NUM_BRANCHES], |
---|
10132 | rightBranch[NUM_BRANCHES]; |
---|
10133 | |
---|
10134 | FILE |
---|
10135 | *f; |
---|
10136 | |
---|
10137 | char |
---|
10138 | fileName[1024]; |
---|
10139 | |
---|
10140 | strcpy(fileName, workdir); |
---|
10141 | strcat(fileName, "RAxML_ancestralTest."); |
---|
10142 | strcat(fileName, tr->nameList[i]); |
---|
10143 | strcat(fileName, "."); |
---|
10144 | strcat(fileName, run_id); |
---|
10145 | |
---|
10146 | f = myfopen(fileName, "w"); |
---|
10147 | |
---|
10148 | fprintf(f, " 3 %d\n", tr->rdta->sites); |
---|
10149 | |
---|
10150 | assert(strcmp(tr->nameList[i], tr->nameList[p->number]) == 0); |
---|
10151 | |
---|
10152 | printBothOpen("Checking if %s is a candidate ancestor\n\n", tr->nameList[i]); |
---|
10153 | printBothOpen("Per site log likelihoods for the three configurations will be written to file %s\n\n", fileName); |
---|
10154 | |
---|
10155 | memcpy(attachmentBranch, p->z, sizeof(double) * NUM_BRANCHES); |
---|
10156 | memcpy(leftBranch, l->z, sizeof(double) * NUM_BRANCHES); |
---|
10157 | memcpy(rightBranch, r->z, sizeof(double) * NUM_BRANCHES); |
---|
10158 | |
---|
10159 | |
---|
10160 | //configuration 1 |
---|
10161 | |
---|
10162 | for(k = 0; k < NUM_BRANCHES; k++) |
---|
10163 | p->z[k] = q->z[k] = zmax; |
---|
10164 | |
---|
10165 | evaluateSD(tr, bestLH, bestVector, weightSum, 1, i, f); |
---|
10166 | |
---|
10167 | memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10168 | memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10169 | |
---|
10170 | evaluateGenericInitrav(tr, tr->start); |
---|
10171 | assert(tr->likelihood == bestLH); |
---|
10172 | |
---|
10173 | //configuration 2 |
---|
10174 | |
---|
10175 | for(k = 0; k < NUM_BRANCHES; k++) |
---|
10176 | { |
---|
10177 | p->z[k] = q->z[k] = zmax; |
---|
10178 | l->z[k] = l->back->z[k] = zmax; |
---|
10179 | } |
---|
10180 | |
---|
10181 | evaluateSD(tr, bestLH, bestVector, weightSum, 2, i, f); |
---|
10182 | |
---|
10183 | memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10184 | memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10185 | memcpy(l->z, leftBranch, sizeof(double) * NUM_BRANCHES); |
---|
10186 | memcpy(l->back->z, leftBranch, sizeof(double) * NUM_BRANCHES); |
---|
10187 | |
---|
10188 | evaluateGenericInitrav(tr, tr->start); |
---|
10189 | assert(tr->likelihood == bestLH); |
---|
10190 | |
---|
10191 | //configuration 3 |
---|
10192 | |
---|
10193 | for(k = 0; k < NUM_BRANCHES; k++) |
---|
10194 | { |
---|
10195 | p->z[k] = q->z[k] = zmax; |
---|
10196 | r->z[k] = r->back->z[k] = zmax; |
---|
10197 | } |
---|
10198 | |
---|
10199 | evaluateSD(tr, bestLH, bestVector, weightSum, 3, i, f); |
---|
10200 | |
---|
10201 | memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10202 | memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10203 | memcpy(r->z, rightBranch, sizeof(double) * NUM_BRANCHES); |
---|
10204 | memcpy(r->back->z, rightBranch, sizeof(double) * NUM_BRANCHES); |
---|
10205 | |
---|
10206 | evaluateGenericInitrav(tr, tr->start); |
---|
10207 | assert(tr->likelihood == bestLH); |
---|
10208 | |
---|
10209 | printBothOpen("\n\n"); |
---|
10210 | fclose(f); |
---|
10211 | } |
---|
10212 | } |
---|
10213 | |
---|
10214 | printBothOpen("good-bye\n\n"); |
---|
10215 | |
---|
10216 | rax_free(candidateAncestorList); |
---|
10217 | rax_free(bestVector); |
---|
10218 | exit(0); |
---|
10219 | } |
---|
10220 | |
---|
10221 | static double distancesInitial(nodeptr p, double *distances, tree *tr, boolean fullTraversal) |
---|
10222 | { |
---|
10223 | if(isTip(p->number, tr->mxtips)) |
---|
10224 | return p->z[0]; |
---|
10225 | else |
---|
10226 | { |
---|
10227 | double |
---|
10228 | acc = 0.0; |
---|
10229 | |
---|
10230 | nodeptr |
---|
10231 | q; |
---|
10232 | |
---|
10233 | if(fullTraversal || !p->x) |
---|
10234 | { |
---|
10235 | q = p->next; |
---|
10236 | |
---|
10237 | while(q != p) |
---|
10238 | { |
---|
10239 | acc += distancesInitial(q->back, distances, tr, fullTraversal); |
---|
10240 | q = q->next; |
---|
10241 | } |
---|
10242 | |
---|
10243 | distances[p->number] = acc; |
---|
10244 | p->x = 1; |
---|
10245 | p->next->x = 0; |
---|
10246 | p->next->next->x = 0; |
---|
10247 | } |
---|
10248 | else |
---|
10249 | acc = distances[p->number]; |
---|
10250 | |
---|
10251 | return acc + p->z[0]; |
---|
10252 | } |
---|
10253 | } |
---|
10254 | |
---|
10255 | |
---|
10256 | |
---|
10257 | static void distancesNewview(nodeptr p, double *distances, tree *tr, nodeptr *rootBranch, double *minimum) |
---|
10258 | { |
---|
10259 | nodeptr |
---|
10260 | q; |
---|
10261 | |
---|
10262 | double |
---|
10263 | left = 0.0, |
---|
10264 | right = 0.0; |
---|
10265 | |
---|
10266 | if(isTip(p->number, tr->mxtips)) |
---|
10267 | { |
---|
10268 | q = p->back; |
---|
10269 | |
---|
10270 | if(!isTip(q->number, tr->mxtips)) |
---|
10271 | { |
---|
10272 | if(!q->x) |
---|
10273 | distancesInitial(q, distances, tr, FALSE); |
---|
10274 | left = distances[q->number]; |
---|
10275 | } |
---|
10276 | |
---|
10277 | if(left <= p->z[0]) |
---|
10278 | { |
---|
10279 | //the balanced root is in this branch |
---|
10280 | *rootBranch = p; |
---|
10281 | *minimum = 0.0; |
---|
10282 | } |
---|
10283 | else |
---|
10284 | { |
---|
10285 | double |
---|
10286 | diff = left - p->z[0]; |
---|
10287 | |
---|
10288 | if(diff < *minimum) |
---|
10289 | { |
---|
10290 | *minimum = diff; |
---|
10291 | *rootBranch = p; |
---|
10292 | } |
---|
10293 | } |
---|
10294 | } |
---|
10295 | else |
---|
10296 | { |
---|
10297 | q = p->back; |
---|
10298 | |
---|
10299 | if(!isTip(q->number, tr->mxtips)) |
---|
10300 | { |
---|
10301 | if(!q->x) |
---|
10302 | distancesInitial(q, distances, tr, FALSE); |
---|
10303 | |
---|
10304 | left = distances[q->number]; |
---|
10305 | } |
---|
10306 | else |
---|
10307 | left = 0.0; |
---|
10308 | |
---|
10309 | if(!isTip(p->number, tr->mxtips)) |
---|
10310 | { |
---|
10311 | if(!p->x) |
---|
10312 | distancesInitial(p, distances, tr, FALSE); |
---|
10313 | |
---|
10314 | right = distances[p->number]; |
---|
10315 | } |
---|
10316 | else |
---|
10317 | right = 0.0; |
---|
10318 | |
---|
10319 | if(ABS(left - right) <= p->z[0]) |
---|
10320 | { |
---|
10321 | *rootBranch = p; |
---|
10322 | *minimum = 0.0; |
---|
10323 | } |
---|
10324 | else |
---|
10325 | { |
---|
10326 | double |
---|
10327 | diff; |
---|
10328 | |
---|
10329 | if(left > right) |
---|
10330 | diff = left - (right + p->z[0]); |
---|
10331 | else |
---|
10332 | diff = right - (left + p->z[0]); |
---|
10333 | |
---|
10334 | if(*minimum > diff) |
---|
10335 | { |
---|
10336 | *minimum = diff; |
---|
10337 | *rootBranch = p; |
---|
10338 | } |
---|
10339 | } |
---|
10340 | |
---|
10341 | q = p->next; |
---|
10342 | |
---|
10343 | while(q != p) |
---|
10344 | { |
---|
10345 | distancesNewview(q->back, distances, tr, rootBranch, minimum); |
---|
10346 | q = q->next; |
---|
10347 | } |
---|
10348 | } |
---|
10349 | } |
---|
10350 | |
---|
10351 | static void printTreeRec(FILE *f, nodeptr p, tree *tr, boolean rootDescendant, boolean printBranchLabels) |
---|
10352 | { |
---|
10353 | if(isTip(p->number, tr->mxtips)) |
---|
10354 | { |
---|
10355 | if(rootDescendant) |
---|
10356 | fprintf(f, "%s", tr->nameList[p->number]); |
---|
10357 | else |
---|
10358 | fprintf(f, "%s:%f", tr->nameList[p->number], p->z[0]); |
---|
10359 | } |
---|
10360 | else |
---|
10361 | { |
---|
10362 | fprintf(f, "("); |
---|
10363 | printTreeRec(f, p->next->back, tr, FALSE, printBranchLabels); |
---|
10364 | fprintf(f, ","); |
---|
10365 | printTreeRec(f, p->next->next->back, tr, FALSE, printBranchLabels); |
---|
10366 | |
---|
10367 | if(rootDescendant) |
---|
10368 | fprintf(f, ")"); |
---|
10369 | else |
---|
10370 | { |
---|
10371 | if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(p->back->number, tr->mxtips)) |
---|
10372 | { |
---|
10373 | assert(p->support == p->back->support); |
---|
10374 | fprintf(f, "):%f[%d]", p->z[0], p->support); |
---|
10375 | } |
---|
10376 | else |
---|
10377 | fprintf(f, "):%f", p->z[0]); |
---|
10378 | } |
---|
10379 | } |
---|
10380 | } |
---|
10381 | |
---|
10382 | static void printTree(nodeptr p, tree *tr, double *distances, FILE *f, boolean printBranchLabels) |
---|
10383 | { |
---|
10384 | double |
---|
10385 | leftRoot, |
---|
10386 | rightRoot, |
---|
10387 | thisBranch = p->z[0], |
---|
10388 | left = 0.0, |
---|
10389 | right = 0.0; |
---|
10390 | |
---|
10391 | nodeptr |
---|
10392 | q = p->back; |
---|
10393 | |
---|
10394 | if(!isTip(p->number, tr->mxtips)) |
---|
10395 | { |
---|
10396 | if(!p->x) |
---|
10397 | distancesInitial(p, distances, tr, FALSE); |
---|
10398 | |
---|
10399 | left = distances[p->number]; |
---|
10400 | } |
---|
10401 | else |
---|
10402 | left = 0.0; |
---|
10403 | |
---|
10404 | if(!isTip(q->number, tr->mxtips)) |
---|
10405 | { |
---|
10406 | if(!q->x) |
---|
10407 | distancesInitial(q, distances, tr, FALSE); |
---|
10408 | |
---|
10409 | right = distances[q->number]; |
---|
10410 | } |
---|
10411 | else |
---|
10412 | left = 0.0; |
---|
10413 | |
---|
10414 | //printf("left %f right %f thisBranch %f\n", left, right, thisBranch); |
---|
10415 | |
---|
10416 | if(ABS(left - right) <= thisBranch) |
---|
10417 | { |
---|
10418 | if(left < right) |
---|
10419 | { |
---|
10420 | leftRoot = (right + thisBranch - left) / 2.0; |
---|
10421 | rightRoot = thisBranch - leftRoot; |
---|
10422 | } |
---|
10423 | else |
---|
10424 | { |
---|
10425 | rightRoot = (left + thisBranch - right) / 2.0; |
---|
10426 | leftRoot = thisBranch - rightRoot; |
---|
10427 | } |
---|
10428 | } |
---|
10429 | else |
---|
10430 | { |
---|
10431 | if(left < right) |
---|
10432 | { |
---|
10433 | leftRoot = thisBranch; |
---|
10434 | rightRoot = 0.0; |
---|
10435 | } |
---|
10436 | else |
---|
10437 | { |
---|
10438 | leftRoot = 0.0; |
---|
10439 | rightRoot = thisBranch; |
---|
10440 | } |
---|
10441 | } |
---|
10442 | |
---|
10443 | //descend into right subtree and print it |
---|
10444 | |
---|
10445 | fprintf(f, "("); |
---|
10446 | printTreeRec(f, p, tr, TRUE, printBranchLabels); |
---|
10447 | |
---|
10448 | //finished right subtree, print attachment branch of right subtree |
---|
10449 | //noew descent into left subtree |
---|
10450 | |
---|
10451 | if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips)) |
---|
10452 | { |
---|
10453 | assert(p->support == q->support); |
---|
10454 | fprintf(f, ":%f[%d], ", leftRoot, p->support); |
---|
10455 | } |
---|
10456 | else |
---|
10457 | fprintf(f, ":%f, ", leftRoot); |
---|
10458 | printTreeRec(f, q, tr, TRUE, printBranchLabels); |
---|
10459 | |
---|
10460 | //finished left subtree, now print its branch to the root node |
---|
10461 | //and we are done |
---|
10462 | |
---|
10463 | if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips)) |
---|
10464 | { |
---|
10465 | assert(p->support == q->support); |
---|
10466 | fprintf(f, ":%f[%d]);", rightRoot, q->support); |
---|
10467 | } |
---|
10468 | else |
---|
10469 | fprintf(f, ":%f);", rightRoot); |
---|
10470 | } |
---|
10471 | |
---|
10472 | static void rootTree(tree *tr, analdef *adef) |
---|
10473 | { |
---|
10474 | int |
---|
10475 | i; |
---|
10476 | |
---|
10477 | double |
---|
10478 | checkDistances, |
---|
10479 | minimum, |
---|
10480 | *distances = (double *)rax_malloc(sizeof(double) * 2 * tr->mxtips); |
---|
10481 | |
---|
10482 | char |
---|
10483 | rootedTreeFile[1024]; |
---|
10484 | |
---|
10485 | FILE |
---|
10486 | *f = myfopen(tree_file, "r"); |
---|
10487 | |
---|
10488 | nodeptr |
---|
10489 | rootBranch; |
---|
10490 | |
---|
10491 | boolean |
---|
10492 | printBranchLabels = FALSE; |
---|
10493 | |
---|
10494 | for(i = 0; i < 2 * tr->mxtips; i++) |
---|
10495 | distances[i] = 0.0; |
---|
10496 | |
---|
10497 | strcpy(rootedTreeFile, workdir); |
---|
10498 | strcat(rootedTreeFile, "RAxML_rootedTree."); |
---|
10499 | strcat(rootedTreeFile, run_id); |
---|
10500 | |
---|
10501 | treeReadLen(f, tr, TRUE, FALSE, TRUE, adef, TRUE, TRUE); |
---|
10502 | |
---|
10503 | if(tr->branchLabelCounter > 0) |
---|
10504 | { |
---|
10505 | assert(tr->branchLabelCounter == (tr->ntips - 3)); |
---|
10506 | printBranchLabels = TRUE; |
---|
10507 | printBothOpen("\nYour input tree contains branch labels, these will also be printed in the output tree ...\n\n"); |
---|
10508 | } |
---|
10509 | |
---|
10510 | fclose(f); |
---|
10511 | |
---|
10512 | minimum = checkDistances = distancesInitial(tr->start->back, distances, tr, TRUE); |
---|
10513 | |
---|
10514 | //printf("Tree Lenght: %f\n", checkDistances); |
---|
10515 | |
---|
10516 | f = myfopen(rootedTreeFile, "w"); |
---|
10517 | |
---|
10518 | distancesNewview(tr->start->back, distances, tr, &rootBranch, &minimum); |
---|
10519 | |
---|
10520 | printTree(rootBranch, tr, distances, f, printBranchLabels); |
---|
10521 | |
---|
10522 | fclose(f); |
---|
10523 | |
---|
10524 | printBothOpen("RAxML-rooted tree using subtree length-balance printed to file:\n%s\n", rootedTreeFile); |
---|
10525 | |
---|
10526 | rax_free(distances); |
---|
10527 | } |
---|
10528 | |
---|
10529 | int main (int argc, char *argv[]) |
---|
10530 | { |
---|
10531 | rawdata *rdta; |
---|
10532 | cruncheddata *cdta; |
---|
10533 | tree *tr; |
---|
10534 | analdef *adef; |
---|
10535 | int |
---|
10536 | i, |
---|
10537 | countGTR = 0, |
---|
10538 | countOtherModel = 0; |
---|
10539 | |
---|
10540 | #if (defined(_USE_PTHREADS) && !defined(_PORTABLE_PTHREADS)) |
---|
10541 | pinToCore(0); |
---|
10542 | #endif |
---|
10543 | |
---|
10544 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
10545 | MPI_Init(&argc, &argv); |
---|
10546 | MPI_Comm_rank(MPI_COMM_WORLD, &processID); |
---|
10547 | MPI_Comm_size(MPI_COMM_WORLD, &processes); |
---|
10548 | printf("\nThis is RAxML MPI Process Number: %d\n", processID); |
---|
10549 | #else |
---|
10550 | processID = 0; |
---|
10551 | #endif |
---|
10552 | |
---|
10553 | masterTime = gettime(); |
---|
10554 | |
---|
10555 | globalArgc = argc; |
---|
10556 | globalArgv = (char **)rax_malloc(sizeof(char *) * argc); |
---|
10557 | for(i = 0; i < argc; i++) |
---|
10558 | globalArgv[i] = argv[i]; |
---|
10559 | |
---|
10560 | |
---|
10561 | |
---|
10562 | #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC)) |
---|
10563 | |
---|
10564 | /* |
---|
10565 | David Defour's command |
---|
10566 | _mm_setcsr( _mm_getcsr() | (_MM_FLUSH_ZERO_ON | MM_DAZ_ON)); |
---|
10567 | */ |
---|
10568 | |
---|
10569 | _mm_setcsr( _mm_getcsr() | _MM_FLUSH_ZERO_ON); |
---|
10570 | |
---|
10571 | #endif |
---|
10572 | |
---|
10573 | adef = (analdef *)rax_malloc(sizeof(analdef)); |
---|
10574 | rdta = (rawdata *)rax_malloc(sizeof(rawdata)); |
---|
10575 | cdta = (cruncheddata *)rax_malloc(sizeof(cruncheddata)); |
---|
10576 | tr = (tree *)rax_malloc(sizeof(tree)); |
---|
10577 | |
---|
10578 | /* initialize lookup table for fast bit counter */ |
---|
10579 | |
---|
10580 | compute_bits_in_16bits(); |
---|
10581 | |
---|
10582 | initAdef(adef); |
---|
10583 | get_args(argc,argv, adef, tr); |
---|
10584 | |
---|
10585 | |
---|
10586 | if(adef->readTaxaOnly) |
---|
10587 | { |
---|
10588 | if(adef->mode == PLAUSIBILITY_CHECKER || adef->mode == ROOT_TREE) |
---|
10589 | extractTaxaFromTopology(tr, rdta, cdta, tree_file); |
---|
10590 | else |
---|
10591 | extractTaxaFromTopology(tr, rdta, cdta, bootStrapFile); |
---|
10592 | } |
---|
10593 | |
---|
10594 | getinput(adef, rdta, cdta, tr); |
---|
10595 | |
---|
10596 | checkOutgroups(tr, adef); |
---|
10597 | makeFileNames(); |
---|
10598 | |
---|
10599 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
10600 | MPI_Barrier(MPI_COMM_WORLD); |
---|
10601 | #endif |
---|
10602 | |
---|
10603 | if(adef->useInvariant && adef->likelihoodEpsilon > 0.001) |
---|
10604 | { |
---|
10605 | printBothOpen("\nYou are using a proportion of Invariable sites estimate, although I don't\n"); |
---|
10606 | printBothOpen("like it. The likelihood epsilon \"-f e\" will be automatically lowered to 0.001\n"); |
---|
10607 | printBothOpen("to avoid unfavorable effects caused by simultaneous optimization of alpha and P-Invar\n"); |
---|
10608 | |
---|
10609 | adef->likelihoodEpsilon = 0.001; |
---|
10610 | } |
---|
10611 | |
---|
10612 | |
---|
10613 | /* |
---|
10614 | switch back to model without secondary structure for all this |
---|
10615 | checking stuff |
---|
10616 | */ |
---|
10617 | |
---|
10618 | if(adef->useSecondaryStructure) |
---|
10619 | { |
---|
10620 | tr->dataVector = tr->initialDataVector; |
---|
10621 | tr->partitionData = tr->initialPartitionData; |
---|
10622 | tr->NumberOfModels--; |
---|
10623 | } |
---|
10624 | |
---|
10625 | if(adef->useExcludeFile) |
---|
10626 | { |
---|
10627 | handleExcludeFile(tr, adef, rdta); |
---|
10628 | exit(0); |
---|
10629 | } |
---|
10630 | |
---|
10631 | |
---|
10632 | if(!adef->readTaxaOnly && adef->mode != FAST_SEARCH && adef->mode != SH_LIKE_SUPPORTS) |
---|
10633 | checkSequences(tr, rdta, adef); |
---|
10634 | |
---|
10635 | |
---|
10636 | if(adef->mode == SPLIT_MULTI_GENE) |
---|
10637 | { |
---|
10638 | splitMultiGene(tr, rdta); |
---|
10639 | exit(0); |
---|
10640 | } |
---|
10641 | |
---|
10642 | if(adef->mode == CHECK_ALIGNMENT) |
---|
10643 | { |
---|
10644 | printf("Alignment format can be read by RAxML \n"); |
---|
10645 | exit(0); |
---|
10646 | } |
---|
10647 | |
---|
10648 | /* |
---|
10649 | switch back to model with secondary structure for all this |
---|
10650 | checking stuff |
---|
10651 | */ |
---|
10652 | |
---|
10653 | if(adef->useSecondaryStructure && !adef->readTaxaOnly) |
---|
10654 | { |
---|
10655 | tr->dataVector = tr->extendedDataVector; |
---|
10656 | tr->partitionData = tr->extendedPartitionData; |
---|
10657 | tr->NumberOfModels++; |
---|
10658 | /* might as well rax_free the initial structures here */ |
---|
10659 | |
---|
10660 | } |
---|
10661 | |
---|
10662 | if(!adef->readTaxaOnly) |
---|
10663 | { |
---|
10664 | int |
---|
10665 | countNonSev = 0, |
---|
10666 | countLG4 =0; |
---|
10667 | |
---|
10668 | makeweights(adef, rdta, cdta, tr); |
---|
10669 | makevalues(rdta, cdta, tr, adef); |
---|
10670 | |
---|
10671 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
10672 | { |
---|
10673 | if(!(tr->partitionData[i].dataType == AA_DATA || tr->partitionData[i].dataType == DNA_DATA)) |
---|
10674 | countNonSev++; |
---|
10675 | |
---|
10676 | if(tr->partitionData[i].protModels == LG4 || tr->partitionData[i].protModels == LG4X) |
---|
10677 | countLG4++; |
---|
10678 | |
---|
10679 | if(tr->partitionData[i].dataType == AA_DATA) |
---|
10680 | { |
---|
10681 | if(tr->partitionData[i].protModels == GTR || tr->partitionData[i].protModels == GTR_UNLINKED) |
---|
10682 | countGTR++; |
---|
10683 | else |
---|
10684 | countOtherModel++; |
---|
10685 | } |
---|
10686 | } |
---|
10687 | |
---|
10688 | if(countLG4 > 0) |
---|
10689 | { |
---|
10690 | if(tr->saveMemory) |
---|
10691 | { |
---|
10692 | printf("Error: the LG4 substitution model does not work in combination with the \"-U\" memory saving flag!\n\n"); |
---|
10693 | errorExit(-1); |
---|
10694 | } |
---|
10695 | |
---|
10696 | if(adef->useInvariant) |
---|
10697 | { |
---|
10698 | printf("Error: the LG4 substitution model does not work for proportion of invariavble sites estimates!\n\n"); |
---|
10699 | errorExit(-1); |
---|
10700 | } |
---|
10701 | |
---|
10702 | if(isCat(adef)) |
---|
10703 | { |
---|
10704 | printf("Error: the LG4 substitution model does not work with the CAT model of rate heterogeneity!\n\n"); |
---|
10705 | errorExit(-1); |
---|
10706 | } |
---|
10707 | } |
---|
10708 | |
---|
10709 | if(tr->saveMemory && countNonSev > 0) |
---|
10710 | { |
---|
10711 | printf("\nError, you want to use the SEV-based memory saving technique for large gappy datasets with missing data.\n"); |
---|
10712 | printf("However, this is only implelemented for DNA and protein data partitions, one of your partitions is neither DNA\n"); |
---|
10713 | printf("nor protein data ... exiting to prevent bad things from happening ;-) \n\n"); |
---|
10714 | |
---|
10715 | errorExit(-1); |
---|
10716 | } |
---|
10717 | |
---|
10718 | |
---|
10719 | if(countGTR > 0 && countOtherModel > 0) |
---|
10720 | { |
---|
10721 | printf("Error, it is only allowed to conduct partitioned AA analyses\n"); |
---|
10722 | printf("with a GTR model of AA substitution, if not all AA partitions are assigned\n"); |
---|
10723 | printf("the GTR or GTR_UNLINKED model.\n\n"); |
---|
10724 | |
---|
10725 | printf("The following partitions do not use GTR:\n"); |
---|
10726 | |
---|
10727 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
10728 | { |
---|
10729 | if(tr->partitionData[i].dataType == AA_DATA && (tr->partitionData[i].protModels != GTR || tr->partitionData[i].protModels != GTR_UNLINKED)) |
---|
10730 | printf("Partition %s\n", tr->partitionData[i].partitionName); |
---|
10731 | } |
---|
10732 | printf("exiting ...\n"); |
---|
10733 | errorExit(-1); |
---|
10734 | } |
---|
10735 | |
---|
10736 | if(countGTR > 0 && tr->NumberOfModels > 1) |
---|
10737 | { |
---|
10738 | FILE *info = myfopen(infoFileName, "ab"); |
---|
10739 | |
---|
10740 | printBoth(info, "You are using the GTR model of AA substitution!\n"); |
---|
10741 | printBoth(info, "GTR parameters for AA substiution will automatically be estimated\n"); |
---|
10742 | printBoth(info, "either jointly (GTR params will be linked) or independently (when using GTR_UNLINKED) across all partitions.\n"); |
---|
10743 | printBoth(info, "WARNING: you may be over-parametrizing the model!\n\n\n"); |
---|
10744 | |
---|
10745 | fclose(info); |
---|
10746 | } |
---|
10747 | } |
---|
10748 | |
---|
10749 | if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP) |
---|
10750 | tr->innerNodes = (size_t)(countTaxaInTopology() - 1); |
---|
10751 | else |
---|
10752 | tr->innerNodes = tr->mxtips; |
---|
10753 | |
---|
10754 | |
---|
10755 | setRateHetAndDataIncrement(tr, adef); |
---|
10756 | |
---|
10757 | #ifdef _USE_PTHREADS |
---|
10758 | startPthreads(tr); |
---|
10759 | masterBarrier(THREAD_INIT_PARTITION, tr); |
---|
10760 | if(!adef->readTaxaOnly) |
---|
10761 | masterBarrier(THREAD_ALLOC_LIKELIHOOD, tr); |
---|
10762 | #else |
---|
10763 | if(!adef->readTaxaOnly) |
---|
10764 | allocNodex(tr); |
---|
10765 | #endif |
---|
10766 | |
---|
10767 | printModelAndProgramInfo(tr, adef, argc, argv); |
---|
10768 | |
---|
10769 | switch(adef->mode) |
---|
10770 | { |
---|
10771 | case CLASSIFY_MP: |
---|
10772 | getStartingTree(tr, adef); |
---|
10773 | assert(0); |
---|
10774 | break; |
---|
10775 | case CLASSIFY_ML: |
---|
10776 | if(adef->useBinaryModelFile) |
---|
10777 | { |
---|
10778 | assert(tr->rateHetModel != CAT); |
---|
10779 | readBinaryModel(tr); |
---|
10780 | } |
---|
10781 | else |
---|
10782 | initModel(tr, rdta, cdta, adef); |
---|
10783 | |
---|
10784 | getStartingTree(tr, adef); |
---|
10785 | exit(0); |
---|
10786 | break; |
---|
10787 | case GENERATE_BS: |
---|
10788 | generateBS(tr, adef); |
---|
10789 | exit(0); |
---|
10790 | break; |
---|
10791 | case COMPUTE_ELW: |
---|
10792 | computeELW(tr, adef, bootStrapFile); |
---|
10793 | exit(0); |
---|
10794 | break; |
---|
10795 | case COMPUTE_LHS: |
---|
10796 | initModel(tr, rdta, cdta, adef); |
---|
10797 | computeAllLHs(tr, adef, bootStrapFile); |
---|
10798 | exit(0); |
---|
10799 | break; |
---|
10800 | case COMPUTE_BIPARTITION_CORRELATION: |
---|
10801 | compareBips(tr, bootStrapFile, adef); |
---|
10802 | exit(0); |
---|
10803 | break; |
---|
10804 | case COMPUTE_RF_DISTANCE: |
---|
10805 | computeRF(tr, bootStrapFile, adef); |
---|
10806 | exit(0); |
---|
10807 | break; |
---|
10808 | case BOOTSTOP_ONLY: |
---|
10809 | computeBootStopOnly(tr, bootStrapFile, adef); |
---|
10810 | exit(0); |
---|
10811 | break; |
---|
10812 | case CONSENSUS_ONLY: |
---|
10813 | if(adef->leaveDropMode) |
---|
10814 | computeRogueTaxa(tr, bootStrapFile, adef); |
---|
10815 | else |
---|
10816 | computeConsensusOnly(tr, bootStrapFile, adef, adef->calculateIC); |
---|
10817 | exit(0); |
---|
10818 | break; |
---|
10819 | case DISTANCE_MODE: |
---|
10820 | initModel(tr, rdta, cdta, adef); |
---|
10821 | getStartingTree(tr, adef); |
---|
10822 | computeDistances(tr, adef); |
---|
10823 | break; |
---|
10824 | case PARSIMONY_ADDITION: |
---|
10825 | initModel(tr, rdta, cdta, adef); |
---|
10826 | getStartingTree(tr, adef); |
---|
10827 | printStartingTree(tr, adef, TRUE); |
---|
10828 | break; |
---|
10829 | case PER_SITE_LL: |
---|
10830 | initModel(tr, rdta, cdta, adef); |
---|
10831 | computePerSiteLLs(tr, adef, bootStrapFile); |
---|
10832 | break; |
---|
10833 | case TREE_EVALUATION: |
---|
10834 | initModel(tr, rdta, cdta, adef); |
---|
10835 | |
---|
10836 | getStartingTree(tr, adef); |
---|
10837 | |
---|
10838 | if(adef->likelihoodTest) |
---|
10839 | computeLHTest(tr, adef, bootStrapFile); |
---|
10840 | else |
---|
10841 | { |
---|
10842 | if(adef->useBinaryModelFile) |
---|
10843 | { |
---|
10844 | readBinaryModel(tr); |
---|
10845 | evaluateGenericInitrav(tr, tr->start); |
---|
10846 | treeEvaluate(tr, 2); |
---|
10847 | } |
---|
10848 | else |
---|
10849 | { |
---|
10850 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10851 | writeBinaryModel(tr); |
---|
10852 | } |
---|
10853 | |
---|
10854 | printLog(tr, adef, TRUE); |
---|
10855 | printResult(tr, adef, TRUE); |
---|
10856 | } |
---|
10857 | |
---|
10858 | break; |
---|
10859 | case ANCESTRAL_STATES: |
---|
10860 | initModel(tr, rdta, cdta, adef); |
---|
10861 | |
---|
10862 | getStartingTree(tr, adef); |
---|
10863 | |
---|
10864 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10865 | |
---|
10866 | evaluateGenericInitrav(tr, tr->start); |
---|
10867 | |
---|
10868 | computeAncestralStates(tr, tr->likelihood); |
---|
10869 | break; |
---|
10870 | case QUARTET_CALCULATION: |
---|
10871 | computeQuartets(tr, adef, rdta, cdta); |
---|
10872 | break; |
---|
10873 | case THOROUGH_OPTIMIZATION: |
---|
10874 | thoroughTreeOptimization(tr, adef, rdta, cdta); |
---|
10875 | break; |
---|
10876 | case CALC_BIPARTITIONS: |
---|
10877 | calcBipartitions(tr, adef, tree_file, bootStrapFile); |
---|
10878 | break; |
---|
10879 | case CALC_BIPARTITIONS_IC: |
---|
10880 | calcBipartitions_IC(tr, adef, tree_file, bootStrapFile); |
---|
10881 | break; |
---|
10882 | case BIG_RAPID_MODE: |
---|
10883 | if(adef->boot) |
---|
10884 | doBootstrap(tr, adef, rdta, cdta); |
---|
10885 | else |
---|
10886 | { |
---|
10887 | if(adef->rapidBoot) |
---|
10888 | { |
---|
10889 | initModel(tr, rdta, cdta, adef); |
---|
10890 | doAllInOne(tr, adef); |
---|
10891 | } |
---|
10892 | else |
---|
10893 | doInference(tr, adef, rdta, cdta); |
---|
10894 | } |
---|
10895 | break; |
---|
10896 | case MORPH_CALIBRATOR: |
---|
10897 | initModel(tr, rdta, cdta, adef); |
---|
10898 | getStartingTree(tr, adef); |
---|
10899 | evaluateGenericInitrav(tr, tr->start); |
---|
10900 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10901 | morphologicalCalibration(tr, adef); |
---|
10902 | break; |
---|
10903 | case FAST_SEARCH: |
---|
10904 | fastSearch(tr, adef, rdta, cdta); |
---|
10905 | exit(0); |
---|
10906 | case SH_LIKE_SUPPORTS: |
---|
10907 | shSupports(tr, adef, rdta, cdta); |
---|
10908 | break; |
---|
10909 | case EPA_SITE_SPECIFIC_BIAS: |
---|
10910 | initModel(tr, rdta, cdta, adef); |
---|
10911 | getStartingTree(tr, adef); |
---|
10912 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10913 | computePlacementBias(tr, adef); |
---|
10914 | break; |
---|
10915 | case OPTIMIZE_BR_LEN_SCALER: |
---|
10916 | initModel(tr, rdta, cdta, adef); |
---|
10917 | |
---|
10918 | getStartingTree(tr, adef); |
---|
10919 | evaluateGenericInitrav(tr, tr->start); |
---|
10920 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
10921 | |
---|
10922 | printBothOpen("Likelihood: %f\n", tr->likelihood); |
---|
10923 | |
---|
10924 | break; |
---|
10925 | case ANCESTRAL_SEQUENCE_TEST: |
---|
10926 | initModel(tr, rdta, cdta, adef); |
---|
10927 | |
---|
10928 | getStartingTree(tr, adef); |
---|
10929 | |
---|
10930 | evaluateGenericInitrav(tr, tr->start); |
---|
10931 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
10932 | |
---|
10933 | ancestralSequenceTest(tr); |
---|
10934 | break; |
---|
10935 | case PLAUSIBILITY_CHECKER: |
---|
10936 | plausibilityChecker(tr, adef); |
---|
10937 | exit(0); |
---|
10938 | break; |
---|
10939 | case ROOT_TREE: |
---|
10940 | rootTree(tr, adef); |
---|
10941 | break; |
---|
10942 | default: |
---|
10943 | assert(0); |
---|
10944 | } |
---|
10945 | |
---|
10946 | finalizeInfoFile(tr, adef); |
---|
10947 | |
---|
10948 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
10949 | MPI_Finalize(); |
---|
10950 | #endif |
---|
10951 | |
---|
10952 | return 0; |
---|
10953 | } |
---|
10954 | |
---|
10955 | |
---|