1 | /* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees |
---|
2 | * Copyright August 2006 by Alexandros Stamatakis |
---|
3 | * |
---|
4 | * Partially derived from |
---|
5 | * fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen |
---|
6 | * |
---|
7 | * and |
---|
8 | * |
---|
9 | * Programs of the PHYLIP package by Joe Felsenstein. |
---|
10 | * |
---|
11 | * This program is free software; you may redistribute it and/or modify its |
---|
12 | * under the terms of the GNU General Public License as published by the Free |
---|
13 | * Software Foundation; either version 2 of the License, or (at your option) |
---|
14 | * any later version. |
---|
15 | * |
---|
16 | * This program is distributed in the hope that it will be useful, but |
---|
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
---|
18 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
---|
19 | * for more details. |
---|
20 | * |
---|
21 | * |
---|
22 | * For any other enquiries send an Email to Alexandros Stamatakis |
---|
23 | * Alexandros.Stamatakis@epfl.ch |
---|
24 | * |
---|
25 | * When publishing work that is based on the results from RAxML-VI-HPC please cite: |
---|
26 | * |
---|
27 | * Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands of taxa and mixed models". |
---|
28 | * Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446 |
---|
29 | */ |
---|
30 | |
---|
31 | #ifdef WIN32 |
---|
32 | #include <direct.h> |
---|
33 | #endif |
---|
34 | |
---|
35 | #ifndef WIN32 |
---|
36 | #include <sys/times.h> |
---|
37 | #include <sys/types.h> |
---|
38 | #include <sys/time.h> |
---|
39 | #include <unistd.h> |
---|
40 | #endif |
---|
41 | |
---|
42 | #include <math.h> |
---|
43 | #include <time.h> |
---|
44 | #include <stdlib.h> |
---|
45 | #include <stdio.h> |
---|
46 | #include <ctype.h> |
---|
47 | #include <string.h> |
---|
48 | #include <stdarg.h> |
---|
49 | #include <limits.h> |
---|
50 | |
---|
51 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
52 | #include <mpi.h> |
---|
53 | #endif |
---|
54 | |
---|
55 | |
---|
56 | |
---|
57 | #ifdef _USE_PTHREADS |
---|
58 | #include <pthread.h> |
---|
59 | |
---|
60 | #endif |
---|
61 | |
---|
62 | #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC)) |
---|
63 | #include <xmmintrin.h> |
---|
64 | /* |
---|
65 | special bug fix, enforces denormalized numbers to be flushed to zero, |
---|
66 | without this program is a tiny bit faster though. |
---|
67 | #include <emmintrin.h> |
---|
68 | #define MM_DAZ_MASK 0x0040 |
---|
69 | #define MM_DAZ_ON 0x0040 |
---|
70 | #define MM_DAZ_OFF 0x0000 |
---|
71 | */ |
---|
72 | #endif |
---|
73 | |
---|
74 | #include "axml.h" |
---|
75 | #include "globalVariables.h" |
---|
76 | |
---|
77 | |
---|
78 | #define _PORTABLE_PTHREADS |
---|
79 | |
---|
80 | |
---|
81 | /***************** UTILITY FUNCTIONS **************************/ |
---|
82 | |
---|
83 | |
---|
84 | double FABS(double x) |
---|
85 | { |
---|
86 | /* if(x < -1.0E-10) |
---|
87 | assert(0);*/ |
---|
88 | |
---|
89 | /* if(x < 0.0) |
---|
90 | printf("%1.40f\n", x); */ |
---|
91 | |
---|
92 | return fabs(x); |
---|
93 | } |
---|
94 | |
---|
95 | |
---|
96 | |
---|
97 | |
---|
98 | |
---|
99 | FILE *getNumberOfTrees(tree *tr, char *fileName, analdef *adef) |
---|
100 | { |
---|
101 | FILE |
---|
102 | *f = myfopen(fileName, "r"); |
---|
103 | |
---|
104 | int |
---|
105 | trees = 0, |
---|
106 | ch; |
---|
107 | |
---|
108 | while((ch = fgetc(f)) != EOF) |
---|
109 | if(ch == ';') |
---|
110 | trees++; |
---|
111 | |
---|
112 | assert(trees > 0); |
---|
113 | |
---|
114 | tr->numberOfTrees = trees; |
---|
115 | |
---|
116 | if(!adef->allInOne) |
---|
117 | printBothOpen("\n\nFound %d trees in File %s\n\n", trees, fileName); |
---|
118 | |
---|
119 | |
---|
120 | rewind(f); |
---|
121 | |
---|
122 | return f; |
---|
123 | } |
---|
124 | |
---|
125 | static void printBoth(FILE *f, const char* format, ... ) |
---|
126 | { |
---|
127 | va_list args; |
---|
128 | va_start(args, format); |
---|
129 | vfprintf(f, format, args ); |
---|
130 | va_end(args); |
---|
131 | |
---|
132 | va_start(args, format); |
---|
133 | vprintf(format, args ); |
---|
134 | va_end(args); |
---|
135 | } |
---|
136 | |
---|
137 | void printBothOpen(const char* format, ... ) |
---|
138 | { |
---|
139 | #ifdef _QUARTET_MPI |
---|
140 | if(processID == 0) |
---|
141 | #endif |
---|
142 | { |
---|
143 | FILE *f = myfopen(infoFileName, "ab"); |
---|
144 | |
---|
145 | va_list args; |
---|
146 | va_start(args, format); |
---|
147 | vfprintf(f, format, args ); |
---|
148 | va_end(args); |
---|
149 | |
---|
150 | va_start(args, format); |
---|
151 | vprintf(format, args ); |
---|
152 | va_end(args); |
---|
153 | |
---|
154 | fclose(f); |
---|
155 | } |
---|
156 | } |
---|
157 | |
---|
158 | void printBothOpenMPI(const char* format, ... ) |
---|
159 | { |
---|
160 | #ifdef _WAYNE_MPI |
---|
161 | if(processID == 0) |
---|
162 | #endif |
---|
163 | { |
---|
164 | FILE *f = myfopen(infoFileName, "ab"); |
---|
165 | |
---|
166 | va_list args; |
---|
167 | va_start(args, format); |
---|
168 | vfprintf(f, format, args ); |
---|
169 | va_end(args); |
---|
170 | |
---|
171 | va_start(args, format); |
---|
172 | vprintf(format, args ); |
---|
173 | va_end(args); |
---|
174 | |
---|
175 | fclose(f); |
---|
176 | } |
---|
177 | } |
---|
178 | |
---|
179 | |
---|
180 | boolean getSmoothFreqs(int dataType) |
---|
181 | { |
---|
182 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
183 | |
---|
184 | return pLengths[dataType].smoothFrequencies; |
---|
185 | } |
---|
186 | |
---|
187 | const unsigned int *getBitVector(int dataType) |
---|
188 | { |
---|
189 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
190 | |
---|
191 | return pLengths[dataType].bitVector; |
---|
192 | } |
---|
193 | |
---|
194 | |
---|
195 | int getStates(int dataType) |
---|
196 | { |
---|
197 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
198 | |
---|
199 | return pLengths[dataType].states; |
---|
200 | } |
---|
201 | |
---|
202 | unsigned char getUndetermined(int dataType) |
---|
203 | { |
---|
204 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
205 | |
---|
206 | return pLengths[dataType].undetermined; |
---|
207 | } |
---|
208 | |
---|
209 | |
---|
210 | |
---|
211 | char getInverseMeaning(int dataType, unsigned char state) |
---|
212 | { |
---|
213 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
214 | |
---|
215 | return pLengths[dataType].inverseMeaning[state]; |
---|
216 | } |
---|
217 | |
---|
218 | partitionLengths *getPartitionLengths(pInfo *p) |
---|
219 | { |
---|
220 | int |
---|
221 | dataType = p->dataType, |
---|
222 | states = p->states, |
---|
223 | tipLength = p->maxTipStates; |
---|
224 | |
---|
225 | assert(states != -1 && tipLength != -1); |
---|
226 | |
---|
227 | assert(MIN_MODEL < dataType && dataType < MAX_MODEL); |
---|
228 | |
---|
229 | pLength.leftLength = pLength.rightLength = states * states; |
---|
230 | pLength.eignLength = states -1; |
---|
231 | pLength.evLength = states * states; |
---|
232 | pLength.eiLength = states * states - states; |
---|
233 | pLength.substRatesLength = (states * states - states) / 2; |
---|
234 | pLength.frequenciesLength = states; |
---|
235 | pLength.tipVectorLength = tipLength * states; |
---|
236 | pLength.symmetryVectorLength = (states * states - states) / 2; |
---|
237 | pLength.frequencyGroupingLength = states; |
---|
238 | pLength.nonGTR = FALSE; |
---|
239 | |
---|
240 | return (&pLengths[dataType]); |
---|
241 | } |
---|
242 | |
---|
243 | |
---|
244 | |
---|
245 | static boolean isCat(analdef *adef) |
---|
246 | { |
---|
247 | if(adef->model == M_PROTCAT || adef->model == M_GTRCAT || adef->model == M_BINCAT || adef->model == M_32CAT || adef->model == M_64CAT) |
---|
248 | return TRUE; |
---|
249 | else |
---|
250 | return FALSE; |
---|
251 | } |
---|
252 | |
---|
253 | static boolean isGamma(analdef *adef) |
---|
254 | { |
---|
255 | if(adef->model == M_PROTGAMMA || adef->model == M_GTRGAMMA || adef->model == M_BINGAMMA || |
---|
256 | adef->model == M_32GAMMA || adef->model == M_64GAMMA) |
---|
257 | return TRUE; |
---|
258 | else |
---|
259 | return FALSE; |
---|
260 | } |
---|
261 | |
---|
262 | |
---|
263 | static int stateAnalyzer(tree *tr, int model, int maxStates) |
---|
264 | { |
---|
265 | boolean |
---|
266 | counter[256], |
---|
267 | previous, |
---|
268 | inputError = FALSE; |
---|
269 | |
---|
270 | int |
---|
271 | lower = tr->partitionData[model].lower, |
---|
272 | upper = tr->partitionData[model].upper, |
---|
273 | i, |
---|
274 | j, |
---|
275 | states = 0; |
---|
276 | |
---|
277 | for(i = 0; i < 256; i++) |
---|
278 | counter[i] = FALSE; |
---|
279 | |
---|
280 | for(i = 0; i < tr->rdta->numsp; i++) |
---|
281 | { |
---|
282 | unsigned char *yptr = &(tr->rdta->y0[((size_t)i) * ((size_t)tr->originalCrunchedLength)]); |
---|
283 | |
---|
284 | for(j = lower; j < upper; j++) |
---|
285 | if(yptr[j] != getUndetermined(GENERIC_32)) |
---|
286 | counter[yptr[j]] = TRUE; |
---|
287 | |
---|
288 | } |
---|
289 | |
---|
290 | for(i = 0; i < maxStates; i++) |
---|
291 | { |
---|
292 | if(counter[i]) |
---|
293 | states++; |
---|
294 | } |
---|
295 | |
---|
296 | |
---|
297 | previous = counter[0]; |
---|
298 | |
---|
299 | for(i = 1; i < 256; i++) |
---|
300 | { |
---|
301 | if(previous == FALSE && counter[i] == TRUE) |
---|
302 | { |
---|
303 | inputError = TRUE; |
---|
304 | break; |
---|
305 | } |
---|
306 | else |
---|
307 | { |
---|
308 | if(previous == TRUE && counter[i] == FALSE) |
---|
309 | previous = FALSE; |
---|
310 | } |
---|
311 | } |
---|
312 | |
---|
313 | if(inputError) |
---|
314 | { |
---|
315 | printf("Multi State Error, characters must be used in the order they are available, i.e.\n"); |
---|
316 | printf("0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V\n"); |
---|
317 | printf("You are using the following characters: \n"); |
---|
318 | for(i = 0; i < 256; i++) |
---|
319 | if(counter[i]) |
---|
320 | printf("%c ", inverseMeaningGeneric32[i]); |
---|
321 | printf("\n"); |
---|
322 | exit(-1); |
---|
323 | } |
---|
324 | |
---|
325 | return states; |
---|
326 | } |
---|
327 | |
---|
328 | |
---|
329 | |
---|
330 | |
---|
331 | static void setRateHetAndDataIncrement(tree *tr, analdef *adef) |
---|
332 | { |
---|
333 | int model; |
---|
334 | |
---|
335 | if(isCat(adef)) |
---|
336 | tr->rateHetModel = CAT; |
---|
337 | else |
---|
338 | { |
---|
339 | if(adef->useInvariant) |
---|
340 | tr->rateHetModel = GAMMA_I; |
---|
341 | else |
---|
342 | tr->rateHetModel = GAMMA; |
---|
343 | } |
---|
344 | |
---|
345 | switch(tr->rateHetModel) |
---|
346 | { |
---|
347 | case GAMMA: |
---|
348 | case GAMMA_I: |
---|
349 | tr->discreteRateCategories = 4; |
---|
350 | break; |
---|
351 | case CAT: |
---|
352 | if((adef->boot && !adef->bootstrapBranchLengths) || (adef->mode == CLASSIFY_ML) || (tr->catOnly)) |
---|
353 | tr->discreteRateCategories = 1; |
---|
354 | else |
---|
355 | tr->discreteRateCategories = 4; |
---|
356 | break; |
---|
357 | default: |
---|
358 | assert(0); |
---|
359 | } |
---|
360 | |
---|
361 | if(adef->bootstrapBranchLengths) |
---|
362 | assert(tr->discreteRateCategories == 4); |
---|
363 | |
---|
364 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
365 | { |
---|
366 | int |
---|
367 | states = -1, |
---|
368 | maxTipStates = getUndetermined(tr->partitionData[model].dataType) + 1; |
---|
369 | |
---|
370 | switch(tr->partitionData[model].dataType) |
---|
371 | { |
---|
372 | case BINARY_DATA: |
---|
373 | case DNA_DATA: |
---|
374 | case AA_DATA: |
---|
375 | case SECONDARY_DATA: |
---|
376 | case SECONDARY_DATA_6: |
---|
377 | case SECONDARY_DATA_7: |
---|
378 | states = getStates(tr->partitionData[model].dataType); |
---|
379 | break; |
---|
380 | case GENERIC_32: |
---|
381 | case GENERIC_64: |
---|
382 | states = stateAnalyzer(tr, model, getStates(tr->partitionData[model].dataType)); |
---|
383 | break; |
---|
384 | default: |
---|
385 | assert(0); |
---|
386 | } |
---|
387 | |
---|
388 | tr->partitionData[model].states = states; |
---|
389 | tr->partitionData[model].maxTipStates = maxTipStates; |
---|
390 | } |
---|
391 | } |
---|
392 | |
---|
393 | |
---|
394 | double gettime(void) |
---|
395 | { |
---|
396 | #ifdef WIN32 |
---|
397 | time_t tp; |
---|
398 | struct tm localtm; |
---|
399 | tp = time(NULL); |
---|
400 | localtm = *localtime(&tp); |
---|
401 | return 60.0*localtm.tm_min + localtm.tm_sec; |
---|
402 | #else |
---|
403 | struct timeval ttime; |
---|
404 | gettimeofday(&ttime , NULL); |
---|
405 | return ttime.tv_sec + ttime.tv_usec * 0.000001; |
---|
406 | #endif |
---|
407 | } |
---|
408 | |
---|
409 | int gettimeSrand(void) |
---|
410 | { |
---|
411 | #ifdef WIN32 |
---|
412 | time_t tp; |
---|
413 | struct tm localtm; |
---|
414 | tp = time(NULL); |
---|
415 | localtm = *localtime(&tp); |
---|
416 | return 24*60*60*localtm.tm_yday + 60*60*localtm.tm_hour + 60*localtm.tm_min + localtm.tm_sec; |
---|
417 | #else |
---|
418 | struct timeval ttime; |
---|
419 | gettimeofday(&ttime , NULL); |
---|
420 | return ttime.tv_sec + ttime.tv_usec; |
---|
421 | #endif |
---|
422 | } |
---|
423 | |
---|
424 | double randum (long *seed) |
---|
425 | { |
---|
426 | long sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2; |
---|
427 | double res; |
---|
428 | |
---|
429 | mult0 = 1549; |
---|
430 | seed0 = *seed & 4095; |
---|
431 | sum = mult0 * seed0; |
---|
432 | newseed0 = sum & 4095; |
---|
433 | sum >>= 12; |
---|
434 | seed1 = (*seed >> 12) & 4095; |
---|
435 | mult1 = 406; |
---|
436 | sum += mult0 * seed1 + mult1 * seed0; |
---|
437 | newseed1 = sum & 4095; |
---|
438 | sum >>= 12; |
---|
439 | seed2 = (*seed >> 24) & 255; |
---|
440 | sum += mult0 * seed2 + mult1 * seed1; |
---|
441 | newseed2 = sum & 255; |
---|
442 | |
---|
443 | *seed = newseed2 << 24 | newseed1 << 12 | newseed0; |
---|
444 | res = 0.00390625 * (newseed2 + 0.000244140625 * (newseed1 + 0.000244140625 * newseed0)); |
---|
445 | |
---|
446 | return res; |
---|
447 | } |
---|
448 | |
---|
449 | int filexists(char *filename) |
---|
450 | { |
---|
451 | FILE *fp; |
---|
452 | int res; |
---|
453 | fp = fopen(filename,"rb"); |
---|
454 | |
---|
455 | if(fp) |
---|
456 | { |
---|
457 | res = 1; |
---|
458 | fclose(fp); |
---|
459 | } |
---|
460 | else |
---|
461 | res = 0; |
---|
462 | |
---|
463 | return res; |
---|
464 | } |
---|
465 | |
---|
466 | |
---|
467 | FILE *myfopen(const char *path, const char *mode) |
---|
468 | { |
---|
469 | FILE *fp = fopen(path, mode); |
---|
470 | |
---|
471 | if(strcmp(mode,"r") == 0 || strcmp(mode,"rb") == 0) |
---|
472 | { |
---|
473 | if(fp) |
---|
474 | return fp; |
---|
475 | else |
---|
476 | { |
---|
477 | if(processID == 0) |
---|
478 | printf("The file %s you want to open for reading does not exist, exiting ...\n", path); |
---|
479 | errorExit(-1); |
---|
480 | return (FILE *)NULL; |
---|
481 | } |
---|
482 | } |
---|
483 | else |
---|
484 | { |
---|
485 | if(fp) |
---|
486 | return fp; |
---|
487 | else |
---|
488 | { |
---|
489 | if(processID == 0) |
---|
490 | printf("The file %s RAxML wants to open for writing or appending can not be opened [mode: %s], exiting ...\n", |
---|
491 | path, mode); |
---|
492 | errorExit(-1); |
---|
493 | return (FILE *)NULL; |
---|
494 | } |
---|
495 | } |
---|
496 | |
---|
497 | |
---|
498 | } |
---|
499 | |
---|
500 | |
---|
501 | |
---|
502 | |
---|
503 | |
---|
504 | /********************* END UTILITY FUNCTIONS ********************/ |
---|
505 | |
---|
506 | |
---|
507 | /******************************some functions for the likelihood computation ****************************/ |
---|
508 | |
---|
509 | |
---|
510 | boolean isTip(int number, int maxTips) |
---|
511 | { |
---|
512 | assert(number > 0); |
---|
513 | |
---|
514 | if(number <= maxTips) |
---|
515 | return TRUE; |
---|
516 | else |
---|
517 | return FALSE; |
---|
518 | } |
---|
519 | |
---|
520 | |
---|
521 | |
---|
522 | |
---|
523 | |
---|
524 | |
---|
525 | |
---|
526 | |
---|
527 | void getxnode (nodeptr p) |
---|
528 | { |
---|
529 | nodeptr s; |
---|
530 | |
---|
531 | if ((s = p->next)->x || (s = s->next)->x) |
---|
532 | { |
---|
533 | p->x = s->x; |
---|
534 | s->x = 0; |
---|
535 | } |
---|
536 | |
---|
537 | assert(p->x); |
---|
538 | } |
---|
539 | |
---|
540 | |
---|
541 | |
---|
542 | |
---|
543 | |
---|
544 | void hookup (nodeptr p, nodeptr q, double *z, int numBranches) |
---|
545 | { |
---|
546 | int i; |
---|
547 | |
---|
548 | p->back = q; |
---|
549 | q->back = p; |
---|
550 | |
---|
551 | for(i = 0; i < numBranches; i++) |
---|
552 | p->z[i] = q->z[i] = z[i]; |
---|
553 | } |
---|
554 | |
---|
555 | void hookupDefault (nodeptr p, nodeptr q, int numBranches) |
---|
556 | { |
---|
557 | int i; |
---|
558 | |
---|
559 | p->back = q; |
---|
560 | q->back = p; |
---|
561 | |
---|
562 | for(i = 0; i < numBranches; i++) |
---|
563 | p->z[i] = q->z[i] = defaultz; |
---|
564 | } |
---|
565 | |
---|
566 | |
---|
567 | /***********************reading and initializing input ******************/ |
---|
568 | |
---|
569 | static void rax_getline_insptr_valid(char **lineptr, size_t *n, size_t ins_ptr ) |
---|
570 | { |
---|
571 | const size_t |
---|
572 | n_inc = 1024; |
---|
573 | |
---|
574 | if(ins_ptr >= *n) |
---|
575 | { |
---|
576 | assert( *n <= (SSIZE_MAX - n_inc)); |
---|
577 | |
---|
578 | *n += n_inc; |
---|
579 | |
---|
580 | *lineptr = (char*)rax_realloc((void*)(*lineptr), *n * sizeof(char), FALSE); |
---|
581 | |
---|
582 | assert(*lineptr != 0); |
---|
583 | } |
---|
584 | } |
---|
585 | |
---|
586 | static ssize_t rax_getline(char **lineptr, size_t *n, FILE *h) |
---|
587 | { |
---|
588 | size_t |
---|
589 | ins_ptr = 0; |
---|
590 | |
---|
591 | /* this implementation does not conform to the standard regarding error checking (i.e., asserts on errors ) */ |
---|
592 | |
---|
593 | assert(h != (FILE*)NULL); |
---|
594 | |
---|
595 | if(*lineptr == (char *)NULL) |
---|
596 | *n = 0; |
---|
597 | |
---|
598 | while(1) |
---|
599 | { |
---|
600 | int |
---|
601 | c = fgetc(h); |
---|
602 | |
---|
603 | /* handle EOF: if no character has been read on the current line throw an error. |
---|
604 | Otherwise treat as end-of-line. Don't know if this is correct, |
---|
605 | as I don't have the POSIX standard and the linux manpage is unclear. */ |
---|
606 | |
---|
607 | if(c == EOF) |
---|
608 | { |
---|
609 | if(ins_ptr == 0) |
---|
610 | return -1; |
---|
611 | else |
---|
612 | break; |
---|
613 | } |
---|
614 | |
---|
615 | if(c == '\r') |
---|
616 | { |
---|
617 | //this is the original GNU implementation |
---|
618 | /* windows line-end: must be followed by a '\n'. Don't tolerate anything else. */ |
---|
619 | //c = fgetc(h); |
---|
620 | //assert(c == '\n'); |
---|
621 | |
---|
622 | //fixed to essentialy replace windows line endings by '\n' |
---|
623 | c = '\n'; |
---|
624 | } |
---|
625 | |
---|
626 | /* insert character (including '\n') into buffer */ |
---|
627 | rax_getline_insptr_valid(lineptr, n, ins_ptr); |
---|
628 | (*lineptr)[ins_ptr] = c; |
---|
629 | ++ins_ptr; |
---|
630 | |
---|
631 | if(c == '\n') |
---|
632 | break; |
---|
633 | } |
---|
634 | |
---|
635 | /* null-terminate */ |
---|
636 | rax_getline_insptr_valid( lineptr, n, ins_ptr ); |
---|
637 | (*lineptr)[ins_ptr] = 0; |
---|
638 | |
---|
639 | return ((ssize_t)ins_ptr); |
---|
640 | } |
---|
641 | |
---|
642 | |
---|
643 | static void getnums (rawdata *rdta, analdef *adef) |
---|
644 | { |
---|
645 | if(fscanf(INFILE, "%d %d", & rdta->numsp, & rdta->sites) != 2) |
---|
646 | { |
---|
647 | char |
---|
648 | *line = NULL; |
---|
649 | |
---|
650 | size_t |
---|
651 | len = 0; |
---|
652 | |
---|
653 | ssize_t |
---|
654 | read; |
---|
655 | |
---|
656 | int |
---|
657 | sequenceLength = 0, |
---|
658 | sequences = 0, |
---|
659 | taxa = 0, |
---|
660 | sites =0; |
---|
661 | |
---|
662 | if(processID == 0) |
---|
663 | { |
---|
664 | printf("\nRAxML can't, parse the alignment file as phylip file \n"); |
---|
665 | printf("it will now try to parse it as FASTA file\n\n"); |
---|
666 | } |
---|
667 | |
---|
668 | while((read = rax_getline(&line, &len, INFILE)) != -1) |
---|
669 | { |
---|
670 | ssize_t |
---|
671 | i = 0; |
---|
672 | |
---|
673 | while((i < read - 1) && (line[i] == ' ' || line[i] == '\t')) |
---|
674 | i++; |
---|
675 | |
---|
676 | if(line[i] == '>') |
---|
677 | { |
---|
678 | if(taxa == 1) |
---|
679 | sequenceLength = sites; |
---|
680 | |
---|
681 | if(taxa > 0) |
---|
682 | { |
---|
683 | if(sites == 0 && processID == 0) |
---|
684 | { |
---|
685 | printf("Fasta parsing error, RAxML was expecting sequence data before: %s\n", line); |
---|
686 | errorExit(-1); |
---|
687 | } |
---|
688 | assert(sites > 0); |
---|
689 | sequences++; |
---|
690 | } |
---|
691 | |
---|
692 | if(taxa > 0) |
---|
693 | { |
---|
694 | if(sequenceLength != sites && processID == 0) |
---|
695 | { |
---|
696 | printf("Fasta parsing error, RAxML expects an alignment.\n"); |
---|
697 | printf("the sequence before taxon %s: seems to have a different length\n", line); |
---|
698 | errorExit(-1); |
---|
699 | } |
---|
700 | assert(sequenceLength == sites); |
---|
701 | } |
---|
702 | |
---|
703 | taxa++; |
---|
704 | |
---|
705 | sites = 0; |
---|
706 | } |
---|
707 | else |
---|
708 | { |
---|
709 | while(i < read - 1) |
---|
710 | { |
---|
711 | if(!(line[i] == ' ' || line[i] == '\t')) |
---|
712 | { |
---|
713 | sites++; |
---|
714 | } |
---|
715 | i++; |
---|
716 | } |
---|
717 | } |
---|
718 | } |
---|
719 | |
---|
720 | if(sites > 0) |
---|
721 | sequences++; |
---|
722 | if(taxa != sequences && processID == 0) |
---|
723 | { |
---|
724 | printf("Fasta parsing error, the number of taxa %d and sequences %d are not equal!\n", taxa, sequences); |
---|
725 | errorExit(-1); |
---|
726 | } |
---|
727 | assert(taxa == sequences); |
---|
728 | |
---|
729 | if(sequenceLength != sites && processID == 0) |
---|
730 | { |
---|
731 | printf("Fasta parsing error, RAxML expects an alignment.\n"); |
---|
732 | printf("the last sequence in the alignment seems to have a different length\n"); |
---|
733 | errorExit(-1); |
---|
734 | } |
---|
735 | |
---|
736 | assert(sites == sequenceLength); |
---|
737 | |
---|
738 | if(line) |
---|
739 | rax_free(line); |
---|
740 | |
---|
741 | rewind(INFILE); |
---|
742 | |
---|
743 | adef->alignmentFileType = FASTA; |
---|
744 | |
---|
745 | rdta->numsp = taxa; |
---|
746 | rdta->sites = sites; |
---|
747 | } |
---|
748 | |
---|
749 | |
---|
750 | |
---|
751 | if (rdta->numsp < 4) |
---|
752 | { |
---|
753 | if(processID == 0) |
---|
754 | printf("TOO FEW SPECIES\n"); |
---|
755 | errorExit(-1); |
---|
756 | } |
---|
757 | |
---|
758 | if (rdta->sites < 1) |
---|
759 | { |
---|
760 | if(processID == 0) |
---|
761 | printf("TOO FEW SITES\n"); |
---|
762 | errorExit(-1); |
---|
763 | } |
---|
764 | |
---|
765 | return; |
---|
766 | } |
---|
767 | |
---|
768 | |
---|
769 | |
---|
770 | |
---|
771 | |
---|
772 | boolean whitechar (int ch) |
---|
773 | { |
---|
774 | return (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r'); |
---|
775 | } |
---|
776 | |
---|
777 | |
---|
778 | static void uppercase (int *chptr) |
---|
779 | { |
---|
780 | int ch; |
---|
781 | |
---|
782 | ch = *chptr; |
---|
783 | if ((ch >= 'a' && ch <= 'i') || (ch >= 'j' && ch <= 'r') |
---|
784 | || (ch >= 's' && ch <= 'z')) |
---|
785 | *chptr = ch + 'A' - 'a'; |
---|
786 | } |
---|
787 | |
---|
788 | |
---|
789 | |
---|
790 | |
---|
791 | static void getyspace (rawdata *rdta) |
---|
792 | { |
---|
793 | size_t size = 4 * ((size_t)(rdta->sites / 4 + 1)); |
---|
794 | int i; |
---|
795 | unsigned char *y0; |
---|
796 | |
---|
797 | rdta->y = (unsigned char **) rax_malloc((rdta->numsp + 1) * sizeof(unsigned char *)); |
---|
798 | assert(rdta->y); |
---|
799 | |
---|
800 | y0 = (unsigned char *) rax_malloc(((size_t)(rdta->numsp + 1)) * size * sizeof(unsigned char)); |
---|
801 | assert(y0); |
---|
802 | |
---|
803 | rdta->y0 = y0; |
---|
804 | |
---|
805 | for (i = 0; i <= rdta->numsp; i++) |
---|
806 | { |
---|
807 | rdta->y[i] = y0; |
---|
808 | y0 += size; |
---|
809 | } |
---|
810 | |
---|
811 | return; |
---|
812 | } |
---|
813 | |
---|
814 | |
---|
815 | static unsigned int KISS32(void) |
---|
816 | { |
---|
817 | static unsigned int |
---|
818 | x = 123456789, |
---|
819 | y = 362436069, |
---|
820 | z = 21288629, |
---|
821 | w = 14921776, |
---|
822 | c = 0; |
---|
823 | |
---|
824 | unsigned int t; |
---|
825 | |
---|
826 | x += 545925293; |
---|
827 | y ^= (y<<13); |
---|
828 | y ^= (y>>17); |
---|
829 | y ^= (y<<5); |
---|
830 | t = z + w + c; |
---|
831 | z = w; |
---|
832 | c = (t>>31); |
---|
833 | w = t & 2147483647; |
---|
834 | |
---|
835 | return (x+y+w); |
---|
836 | } |
---|
837 | |
---|
838 | static boolean setupTree (tree *tr, analdef *adef) |
---|
839 | { |
---|
840 | nodeptr p0, p, q; |
---|
841 | int |
---|
842 | i, |
---|
843 | j, |
---|
844 | tips, |
---|
845 | inter; |
---|
846 | |
---|
847 | |
---|
848 | |
---|
849 | tr->storedBrLens = (double*)NULL; |
---|
850 | |
---|
851 | if(!adef->readTaxaOnly) |
---|
852 | { |
---|
853 | tr->bigCutoff = FALSE; |
---|
854 | |
---|
855 | tr->patternPosition = (int*)NULL; |
---|
856 | tr->columnPosition = (int*)NULL; |
---|
857 | |
---|
858 | tr->maxCategories = MAX(4, adef->categories); |
---|
859 | |
---|
860 | tr->partitionContributions = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
861 | |
---|
862 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
863 | tr->partitionContributions[i] = -1.0; |
---|
864 | |
---|
865 | tr->perPartitionLH = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
866 | tr->storedPerPartitionLH = (double *)rax_malloc(sizeof(double) * tr->NumberOfModels); |
---|
867 | |
---|
868 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
869 | { |
---|
870 | tr->perPartitionLH[i] = 0.0; |
---|
871 | tr->storedPerPartitionLH[i] = 0.0; |
---|
872 | } |
---|
873 | |
---|
874 | if(adef->grouping) |
---|
875 | tr->grouped = TRUE; |
---|
876 | else |
---|
877 | tr->grouped = FALSE; |
---|
878 | |
---|
879 | if(adef->constraint) |
---|
880 | tr->constrained = TRUE; |
---|
881 | else |
---|
882 | tr->constrained = FALSE; |
---|
883 | |
---|
884 | tr->treeID = 0; |
---|
885 | } |
---|
886 | |
---|
887 | tips = tr->mxtips; |
---|
888 | inter = tr->mxtips - 1; |
---|
889 | |
---|
890 | if(!adef->readTaxaOnly) |
---|
891 | { |
---|
892 | tr->yVector = (unsigned char **) rax_malloc((tr->mxtips + 1) * sizeof(unsigned char *)); |
---|
893 | |
---|
894 | tr->fracchanges = (double *)rax_malloc(tr->NumberOfModels * sizeof(double)); |
---|
895 | |
---|
896 | tr->rawFracchanges = (double *)rax_malloc(tr->NumberOfModels * sizeof(double)); |
---|
897 | |
---|
898 | tr->likelihoods = (double *)rax_malloc(adef->multipleRuns * sizeof(double)); |
---|
899 | } |
---|
900 | |
---|
901 | tr->numberOfTrees = -1; |
---|
902 | |
---|
903 | |
---|
904 | |
---|
905 | tr->treeStringLength = tr->mxtips * (nmlngth+128) + 256 + tr->mxtips * 2; |
---|
906 | |
---|
907 | tr->tree_string = (char*)rax_calloc(tr->treeStringLength, sizeof(char)); |
---|
908 | |
---|
909 | /*TODO, must that be so long ?*/ |
---|
910 | |
---|
911 | if(!adef->readTaxaOnly) |
---|
912 | { |
---|
913 | |
---|
914 | tr->td[0].count = 0; |
---|
915 | tr->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * tr->mxtips); |
---|
916 | |
---|
917 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
918 | { |
---|
919 | tr->fracchanges[i] = -1.0; |
---|
920 | tr->rawFracchanges[i] = -1.0; |
---|
921 | } |
---|
922 | |
---|
923 | tr->fracchange = -1.0; |
---|
924 | tr->rawFracchange = -1.0; |
---|
925 | |
---|
926 | tr->constraintVector = (int *)rax_malloc((2 * tr->mxtips) * sizeof(int)); |
---|
927 | |
---|
928 | tr->nameList = (char **)rax_malloc(sizeof(char *) * (tips + 1)); |
---|
929 | } |
---|
930 | |
---|
931 | if (!(p0 = (nodeptr) rax_malloc((tips + 3*inter) * sizeof(node)))) |
---|
932 | { |
---|
933 | printf("ERROR: Unable to obtain sufficient tree memory\n"); |
---|
934 | return FALSE; |
---|
935 | } |
---|
936 | |
---|
937 | if (!(tr->nodep = (nodeptr *) rax_malloc((2*tr->mxtips) * sizeof(nodeptr)))) |
---|
938 | { |
---|
939 | printf("ERROR: Unable to obtain sufficient tree memory, too\n"); |
---|
940 | return FALSE; |
---|
941 | } |
---|
942 | |
---|
943 | tr->nodep[0] = (node *) NULL; /* Use as 1-based array */ |
---|
944 | |
---|
945 | for (i = 1; i <= tips; i++) |
---|
946 | { |
---|
947 | p = p0++; |
---|
948 | |
---|
949 | p->hash = KISS32(); /* hast table stuff */ |
---|
950 | p->x = 0; |
---|
951 | p->number = i; |
---|
952 | p->next = p; |
---|
953 | p->back = (node *)NULL; |
---|
954 | p->bInf = (branchInfo *)NULL; |
---|
955 | |
---|
956 | |
---|
957 | |
---|
958 | |
---|
959 | |
---|
960 | |
---|
961 | tr->nodep[i] = p; |
---|
962 | } |
---|
963 | |
---|
964 | for (i = tips + 1; i <= tips + inter; i++) |
---|
965 | { |
---|
966 | q = (node *) NULL; |
---|
967 | for (j = 1; j <= 3; j++) |
---|
968 | { |
---|
969 | p = p0++; |
---|
970 | if(j == 1) |
---|
971 | p->x = 1; |
---|
972 | else |
---|
973 | p->x = 0; |
---|
974 | p->number = i; |
---|
975 | p->next = q; |
---|
976 | p->bInf = (branchInfo *)NULL; |
---|
977 | p->back = (node *) NULL; |
---|
978 | p->hash = 0; |
---|
979 | |
---|
980 | |
---|
981 | |
---|
982 | |
---|
983 | |
---|
984 | |
---|
985 | q = p; |
---|
986 | } |
---|
987 | p->next->next->next = p; |
---|
988 | tr->nodep[i] = p; |
---|
989 | } |
---|
990 | |
---|
991 | tr->likelihood = unlikely; |
---|
992 | tr->start = (node *) NULL; |
---|
993 | |
---|
994 | |
---|
995 | |
---|
996 | tr->ntips = 0; |
---|
997 | tr->nextnode = 0; |
---|
998 | |
---|
999 | if(!adef->readTaxaOnly) |
---|
1000 | { |
---|
1001 | for(i = 0; i < tr->numBranches; i++) |
---|
1002 | tr->partitionSmoothed[i] = FALSE; |
---|
1003 | } |
---|
1004 | |
---|
1005 | return TRUE; |
---|
1006 | } |
---|
1007 | |
---|
1008 | |
---|
1009 | static void checkTaxonName(char *buffer, int len) |
---|
1010 | { |
---|
1011 | int i; |
---|
1012 | |
---|
1013 | for(i = 0; i < len - 1; i++) |
---|
1014 | { |
---|
1015 | boolean valid; |
---|
1016 | |
---|
1017 | switch(buffer[i]) |
---|
1018 | { |
---|
1019 | case '\0': |
---|
1020 | case '\t': |
---|
1021 | case '\n': |
---|
1022 | case '\r': |
---|
1023 | case ' ': |
---|
1024 | case ':': |
---|
1025 | case ',': |
---|
1026 | case '(': |
---|
1027 | case ')': |
---|
1028 | case ';': |
---|
1029 | case '[': |
---|
1030 | case ']': |
---|
1031 | case '\'': |
---|
1032 | valid = FALSE; |
---|
1033 | break; |
---|
1034 | default: |
---|
1035 | valid = TRUE; |
---|
1036 | } |
---|
1037 | |
---|
1038 | if(!valid) |
---|
1039 | { |
---|
1040 | printf("ERROR: Taxon Name \"%s\" is invalid at position %d, it contains illegal character %c\n", buffer, i, buffer[i]); |
---|
1041 | printf("Illegal characters in taxon-names are: tabulators, carriage returns, spaces, \":\", \",\", \")\", \"(\", \";\", \"]\", \"[\", \"\'\" \n"); |
---|
1042 | printf("Exiting\n"); |
---|
1043 | exit(-1); |
---|
1044 | } |
---|
1045 | |
---|
1046 | } |
---|
1047 | assert(buffer[len - 1] == '\0'); |
---|
1048 | } |
---|
1049 | |
---|
1050 | static void printParsingErrorContext(FILE *f) |
---|
1051 | { |
---|
1052 | const long |
---|
1053 | contextWidth = 20; |
---|
1054 | |
---|
1055 | long |
---|
1056 | i, |
---|
1057 | currentPos = ftell(f), |
---|
1058 | contextPos = MAX(currentPos - contextWidth, 0); |
---|
1059 | |
---|
1060 | fseek(f, MAX(currentPos - contextWidth, 0), SEEK_SET); |
---|
1061 | |
---|
1062 | printf("Printing error context:\n\n"); |
---|
1063 | |
---|
1064 | for(i = contextPos; i < currentPos + contextWidth; i++) |
---|
1065 | { |
---|
1066 | int |
---|
1067 | ch = getc(f); |
---|
1068 | if(ch != EOF) |
---|
1069 | printf("%c", ch); |
---|
1070 | else |
---|
1071 | break; |
---|
1072 | } |
---|
1073 | |
---|
1074 | printf("\n\n"); |
---|
1075 | } |
---|
1076 | |
---|
1077 | static boolean getdata(analdef *adef, rawdata *rdta, tree *tr) |
---|
1078 | { |
---|
1079 | int |
---|
1080 | i, |
---|
1081 | j, |
---|
1082 | basesread, |
---|
1083 | basesnew, |
---|
1084 | ch, my_i, meaning, |
---|
1085 | len, |
---|
1086 | meaningAA[256], |
---|
1087 | meaningDNA[256], |
---|
1088 | meaningBINARY[256], |
---|
1089 | meaningGeneric32[256], |
---|
1090 | meaningGeneric64[256]; |
---|
1091 | |
---|
1092 | boolean |
---|
1093 | allread, |
---|
1094 | firstpass; |
---|
1095 | |
---|
1096 | char |
---|
1097 | buffer[nmlngth + 2]; |
---|
1098 | |
---|
1099 | unsigned char |
---|
1100 | genericChars32[32] = {'0', '1', '2', '3', '4', '5', '6', '7', |
---|
1101 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', |
---|
1102 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', |
---|
1103 | 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V'}; |
---|
1104 | unsigned long |
---|
1105 | total = 0, |
---|
1106 | gaps = 0; |
---|
1107 | |
---|
1108 | for (i = 0; i < 256; i++) |
---|
1109 | { |
---|
1110 | meaningAA[i] = -1; |
---|
1111 | meaningDNA[i] = -1; |
---|
1112 | meaningBINARY[i] = -1; |
---|
1113 | meaningGeneric32[i] = -1; |
---|
1114 | meaningGeneric64[i] = -1; |
---|
1115 | } |
---|
1116 | |
---|
1117 | /* generic 32 data */ |
---|
1118 | |
---|
1119 | for(i = 0; i < 32; i++) |
---|
1120 | meaningGeneric32[genericChars32[i]] = i; |
---|
1121 | meaningGeneric32['-'] = getUndetermined(GENERIC_32); |
---|
1122 | meaningGeneric32['?'] = getUndetermined(GENERIC_32); |
---|
1123 | |
---|
1124 | /* AA data */ |
---|
1125 | |
---|
1126 | meaningAA['A'] = 0; /* alanine */ |
---|
1127 | meaningAA['R'] = 1; /* arginine */ |
---|
1128 | meaningAA['N'] = 2; /* asparagine*/ |
---|
1129 | meaningAA['D'] = 3; /* aspartic */ |
---|
1130 | meaningAA['C'] = 4; /* cysteine */ |
---|
1131 | meaningAA['Q'] = 5; /* glutamine */ |
---|
1132 | meaningAA['E'] = 6; /* glutamic */ |
---|
1133 | meaningAA['G'] = 7; /* glycine */ |
---|
1134 | meaningAA['H'] = 8; /* histidine */ |
---|
1135 | meaningAA['I'] = 9; /* isoleucine */ |
---|
1136 | meaningAA['L'] = 10; /* leucine */ |
---|
1137 | meaningAA['K'] = 11; /* lysine */ |
---|
1138 | meaningAA['M'] = 12; /* methionine */ |
---|
1139 | meaningAA['F'] = 13; /* phenylalanine */ |
---|
1140 | meaningAA['P'] = 14; /* proline */ |
---|
1141 | meaningAA['S'] = 15; /* serine */ |
---|
1142 | meaningAA['T'] = 16; /* threonine */ |
---|
1143 | meaningAA['W'] = 17; /* tryptophan */ |
---|
1144 | meaningAA['Y'] = 18; /* tyrosine */ |
---|
1145 | meaningAA['V'] = 19; /* valine */ |
---|
1146 | meaningAA['B'] = 20; /* asparagine, aspartic 2 and 3*/ |
---|
1147 | meaningAA['Z'] = 21; /*21 glutamine glutamic 5 and 6*/ |
---|
1148 | |
---|
1149 | meaningAA['X'] = |
---|
1150 | meaningAA['?'] = |
---|
1151 | meaningAA['*'] = |
---|
1152 | meaningAA['-'] = |
---|
1153 | getUndetermined(AA_DATA); |
---|
1154 | |
---|
1155 | /* DNA data */ |
---|
1156 | |
---|
1157 | meaningDNA['A'] = 1; |
---|
1158 | meaningDNA['B'] = 14; |
---|
1159 | meaningDNA['C'] = 2; |
---|
1160 | meaningDNA['D'] = 13; |
---|
1161 | meaningDNA['G'] = 4; |
---|
1162 | meaningDNA['H'] = 11; |
---|
1163 | meaningDNA['K'] = 12; |
---|
1164 | meaningDNA['M'] = 3; |
---|
1165 | meaningDNA['R'] = 5; |
---|
1166 | meaningDNA['S'] = 6; |
---|
1167 | meaningDNA['T'] = 8; |
---|
1168 | meaningDNA['U'] = 8; |
---|
1169 | meaningDNA['V'] = 7; |
---|
1170 | meaningDNA['W'] = 9; |
---|
1171 | meaningDNA['Y'] = 10; |
---|
1172 | |
---|
1173 | meaningDNA['N'] = |
---|
1174 | meaningDNA['O'] = |
---|
1175 | meaningDNA['X'] = |
---|
1176 | meaningDNA['-'] = |
---|
1177 | meaningDNA['?'] = |
---|
1178 | getUndetermined(DNA_DATA); |
---|
1179 | |
---|
1180 | /* BINARY DATA */ |
---|
1181 | |
---|
1182 | meaningBINARY['0'] = 1; |
---|
1183 | meaningBINARY['1'] = 2; |
---|
1184 | |
---|
1185 | meaningBINARY['-'] = |
---|
1186 | meaningBINARY['?'] = |
---|
1187 | getUndetermined(BINARY_DATA); |
---|
1188 | |
---|
1189 | |
---|
1190 | /*******************************************************************/ |
---|
1191 | |
---|
1192 | basesread = basesnew = 0; |
---|
1193 | |
---|
1194 | allread = FALSE; |
---|
1195 | firstpass = TRUE; |
---|
1196 | ch = ' '; |
---|
1197 | |
---|
1198 | while (! allread) |
---|
1199 | { |
---|
1200 | for(i = 1; i <= tr->mxtips; i++) |
---|
1201 | { |
---|
1202 | if(firstpass) |
---|
1203 | { |
---|
1204 | ch = getc(INFILE); |
---|
1205 | |
---|
1206 | while(ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r') |
---|
1207 | ch = getc(INFILE); |
---|
1208 | |
---|
1209 | my_i = 0; |
---|
1210 | |
---|
1211 | do |
---|
1212 | { |
---|
1213 | buffer[my_i] = ch; |
---|
1214 | ch = getc(INFILE); |
---|
1215 | my_i++; |
---|
1216 | if(my_i >= nmlngth) |
---|
1217 | { |
---|
1218 | if(processID == 0) |
---|
1219 | { |
---|
1220 | printf("Taxon Name too long at taxon %d, adapt constant nmlngth in\n", i); |
---|
1221 | printf("axml.h, current setting %d\n", nmlngth); |
---|
1222 | } |
---|
1223 | errorExit(-1); |
---|
1224 | } |
---|
1225 | } |
---|
1226 | while(ch != ' ' && ch != '\n' && ch != '\t' && ch != '\r'); |
---|
1227 | |
---|
1228 | buffer[my_i] = '\0'; |
---|
1229 | len = strlen(buffer) + 1; |
---|
1230 | checkTaxonName(buffer, len); |
---|
1231 | tr->nameList[i] = (char *)rax_malloc(sizeof(char) * len); |
---|
1232 | strcpy(tr->nameList[i], buffer); |
---|
1233 | |
---|
1234 | while(ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r') |
---|
1235 | ch = getc(INFILE); |
---|
1236 | |
---|
1237 | ungetc(ch, INFILE); |
---|
1238 | } |
---|
1239 | |
---|
1240 | j = basesread; |
---|
1241 | |
---|
1242 | while((j < rdta->sites) && ((ch = getc(INFILE)) != EOF) && (ch != '\n') && (ch != '\r')) |
---|
1243 | { |
---|
1244 | uppercase(& ch); |
---|
1245 | |
---|
1246 | assert(tr->dataVector[j + 1] != -1); |
---|
1247 | |
---|
1248 | switch(tr->dataVector[j + 1]) |
---|
1249 | { |
---|
1250 | case BINARY_DATA: |
---|
1251 | meaning = meaningBINARY[ch]; |
---|
1252 | break; |
---|
1253 | case DNA_DATA: |
---|
1254 | case SECONDARY_DATA: |
---|
1255 | case SECONDARY_DATA_6: |
---|
1256 | case SECONDARY_DATA_7: |
---|
1257 | /* |
---|
1258 | still dealing with DNA/RNA here, hence just act if as they where DNA characters |
---|
1259 | corresponding column merging for sec struct models will take place later |
---|
1260 | */ |
---|
1261 | meaning = meaningDNA[ch]; |
---|
1262 | break; |
---|
1263 | case AA_DATA: |
---|
1264 | meaning = meaningAA[ch]; |
---|
1265 | break; |
---|
1266 | case GENERIC_32: |
---|
1267 | meaning = meaningGeneric32[ch]; |
---|
1268 | break; |
---|
1269 | case GENERIC_64: |
---|
1270 | meaning = meaningGeneric64[ch]; |
---|
1271 | break; |
---|
1272 | default: |
---|
1273 | assert(0); |
---|
1274 | } |
---|
1275 | |
---|
1276 | if (meaning != -1) |
---|
1277 | { |
---|
1278 | j++; |
---|
1279 | rdta->y[i][j] = ch; |
---|
1280 | } |
---|
1281 | else |
---|
1282 | { |
---|
1283 | if(!whitechar(ch)) |
---|
1284 | { |
---|
1285 | printf("ERROR: Bad base (%c) at site %d of sequence %d\n", |
---|
1286 | ch, j + 1, i); |
---|
1287 | |
---|
1288 | printParsingErrorContext(INFILE); |
---|
1289 | |
---|
1290 | return FALSE; |
---|
1291 | } |
---|
1292 | } |
---|
1293 | } |
---|
1294 | |
---|
1295 | if (ch == EOF) |
---|
1296 | { |
---|
1297 | printf("ERROR: End-of-file at site %d of sequence %d\n", j + 1, i); |
---|
1298 | |
---|
1299 | printParsingErrorContext(INFILE); |
---|
1300 | |
---|
1301 | return FALSE; |
---|
1302 | } |
---|
1303 | |
---|
1304 | if (! firstpass && (j == basesread)) |
---|
1305 | i--; |
---|
1306 | else |
---|
1307 | { |
---|
1308 | if (i == 1) |
---|
1309 | basesnew = j; |
---|
1310 | else |
---|
1311 | if (j != basesnew) |
---|
1312 | { |
---|
1313 | printf("ERROR: Sequences out of alignment\n"); |
---|
1314 | printf("%d (instead of %d) residues read in sequence %d %s\n", |
---|
1315 | j - basesread, basesnew - basesread, i, tr->nameList[i]); |
---|
1316 | |
---|
1317 | printParsingErrorContext(INFILE); |
---|
1318 | |
---|
1319 | return FALSE; |
---|
1320 | } |
---|
1321 | } |
---|
1322 | while (ch != '\n' && ch != EOF && ch != '\r') ch = getc(INFILE); /* flush line *//* PC-LINEBREAK*/ |
---|
1323 | } |
---|
1324 | |
---|
1325 | firstpass = FALSE; |
---|
1326 | basesread = basesnew; |
---|
1327 | allread = (basesread >= rdta->sites); |
---|
1328 | } |
---|
1329 | |
---|
1330 | for(j = 1; j <= tr->mxtips; j++) |
---|
1331 | for(i = 1; i <= rdta->sites; i++) |
---|
1332 | { |
---|
1333 | assert(tr->dataVector[i] != -1); |
---|
1334 | |
---|
1335 | switch(tr->dataVector[i]) |
---|
1336 | { |
---|
1337 | case BINARY_DATA: |
---|
1338 | meaning = meaningBINARY[rdta->y[j][i]]; |
---|
1339 | if(meaning == getUndetermined(BINARY_DATA)) |
---|
1340 | gaps++; |
---|
1341 | break; |
---|
1342 | |
---|
1343 | case SECONDARY_DATA: |
---|
1344 | case SECONDARY_DATA_6: |
---|
1345 | case SECONDARY_DATA_7: |
---|
1346 | assert(tr->secondaryStructurePairs[i - 1] != -1); |
---|
1347 | assert(i - 1 == tr->secondaryStructurePairs[tr->secondaryStructurePairs[i - 1]]); |
---|
1348 | /* |
---|
1349 | don't worry too much about undetermined column count here for sec-struct, just count |
---|
1350 | DNA/RNA gaps here and worry about the rest later-on, falling through to DNA again :-) |
---|
1351 | */ |
---|
1352 | case DNA_DATA: |
---|
1353 | meaning = meaningDNA[rdta->y[j][i]]; |
---|
1354 | if(meaning == getUndetermined(DNA_DATA)) |
---|
1355 | gaps++; |
---|
1356 | break; |
---|
1357 | |
---|
1358 | case AA_DATA: |
---|
1359 | meaning = meaningAA[rdta->y[j][i]]; |
---|
1360 | if(meaning == getUndetermined(AA_DATA)) |
---|
1361 | gaps++; |
---|
1362 | break; |
---|
1363 | |
---|
1364 | case GENERIC_32: |
---|
1365 | meaning = meaningGeneric32[rdta->y[j][i]]; |
---|
1366 | if(meaning == getUndetermined(GENERIC_32)) |
---|
1367 | gaps++; |
---|
1368 | break; |
---|
1369 | |
---|
1370 | case GENERIC_64: |
---|
1371 | meaning = meaningGeneric64[rdta->y[j][i]]; |
---|
1372 | if(meaning == getUndetermined(GENERIC_64)) |
---|
1373 | gaps++; |
---|
1374 | break; |
---|
1375 | default: |
---|
1376 | assert(0); |
---|
1377 | } |
---|
1378 | |
---|
1379 | total++; |
---|
1380 | rdta->y[j][i] = meaning; |
---|
1381 | } |
---|
1382 | |
---|
1383 | adef->gapyness = (double)gaps / (double)total; |
---|
1384 | |
---|
1385 | return TRUE; |
---|
1386 | } |
---|
1387 | |
---|
1388 | static void parseFasta(analdef *adef, rawdata *rdta, tree *tr) |
---|
1389 | { |
---|
1390 | int |
---|
1391 | index, |
---|
1392 | meaning, |
---|
1393 | meaningAA[256], |
---|
1394 | meaningDNA[256], |
---|
1395 | meaningBINARY[256], |
---|
1396 | meaningGeneric32[256], |
---|
1397 | meaningGeneric64[256]; |
---|
1398 | |
---|
1399 | char |
---|
1400 | buffer[nmlngth + 2]; |
---|
1401 | |
---|
1402 | unsigned char |
---|
1403 | genericChars32[32] = {'0', '1', '2', '3', '4', '5', '6', '7', |
---|
1404 | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', |
---|
1405 | 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', |
---|
1406 | 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V'}; |
---|
1407 | unsigned long |
---|
1408 | total = 0, |
---|
1409 | gaps = 0; |
---|
1410 | |
---|
1411 | for(index = 0; index < 256; index++) |
---|
1412 | { |
---|
1413 | meaningAA[index] = -1; |
---|
1414 | meaningDNA[index] = -1; |
---|
1415 | meaningBINARY[index] = -1; |
---|
1416 | meaningGeneric32[index] = -1; |
---|
1417 | meaningGeneric64[index] = -1; |
---|
1418 | } |
---|
1419 | |
---|
1420 | /* generic 32 data */ |
---|
1421 | |
---|
1422 | for(index = 0; index < 32; index++) |
---|
1423 | meaningGeneric32[genericChars32[index]] = index; |
---|
1424 | |
---|
1425 | meaningGeneric32['-'] = getUndetermined(GENERIC_32); |
---|
1426 | meaningGeneric32['?'] = getUndetermined(GENERIC_32); |
---|
1427 | |
---|
1428 | /* AA data */ |
---|
1429 | |
---|
1430 | meaningAA['A'] = 0; /* alanine */ |
---|
1431 | meaningAA['R'] = 1; /* arginine */ |
---|
1432 | meaningAA['N'] = 2; /* asparagine*/ |
---|
1433 | meaningAA['D'] = 3; /* aspartic */ |
---|
1434 | meaningAA['C'] = 4; /* cysteine */ |
---|
1435 | meaningAA['Q'] = 5; /* glutamine */ |
---|
1436 | meaningAA['E'] = 6; /* glutamic */ |
---|
1437 | meaningAA['G'] = 7; /* glycine */ |
---|
1438 | meaningAA['H'] = 8; /* histidine */ |
---|
1439 | meaningAA['I'] = 9; /* isoleucine */ |
---|
1440 | meaningAA['L'] = 10; /* leucine */ |
---|
1441 | meaningAA['K'] = 11; /* lysine */ |
---|
1442 | meaningAA['M'] = 12; /* methionine */ |
---|
1443 | meaningAA['F'] = 13; /* phenylalanine */ |
---|
1444 | meaningAA['P'] = 14; /* proline */ |
---|
1445 | meaningAA['S'] = 15; /* serine */ |
---|
1446 | meaningAA['T'] = 16; /* threonine */ |
---|
1447 | meaningAA['W'] = 17; /* tryptophan */ |
---|
1448 | meaningAA['Y'] = 18; /* tyrosine */ |
---|
1449 | meaningAA['V'] = 19; /* valine */ |
---|
1450 | meaningAA['B'] = 20; /* asparagine, aspartic 2 and 3*/ |
---|
1451 | meaningAA['Z'] = 21; /*21 glutamine glutamic 5 and 6*/ |
---|
1452 | |
---|
1453 | meaningAA['X'] = |
---|
1454 | meaningAA['?'] = |
---|
1455 | meaningAA['*'] = |
---|
1456 | meaningAA['-'] = |
---|
1457 | getUndetermined(AA_DATA); |
---|
1458 | |
---|
1459 | /* DNA data */ |
---|
1460 | |
---|
1461 | meaningDNA['A'] = 1; |
---|
1462 | meaningDNA['B'] = 14; |
---|
1463 | meaningDNA['C'] = 2; |
---|
1464 | meaningDNA['D'] = 13; |
---|
1465 | meaningDNA['G'] = 4; |
---|
1466 | meaningDNA['H'] = 11; |
---|
1467 | meaningDNA['K'] = 12; |
---|
1468 | meaningDNA['M'] = 3; |
---|
1469 | meaningDNA['R'] = 5; |
---|
1470 | meaningDNA['S'] = 6; |
---|
1471 | meaningDNA['T'] = 8; |
---|
1472 | meaningDNA['U'] = 8; |
---|
1473 | meaningDNA['V'] = 7; |
---|
1474 | meaningDNA['W'] = 9; |
---|
1475 | meaningDNA['Y'] = 10; |
---|
1476 | |
---|
1477 | meaningDNA['N'] = |
---|
1478 | meaningDNA['O'] = |
---|
1479 | meaningDNA['X'] = |
---|
1480 | meaningDNA['-'] = |
---|
1481 | meaningDNA['?'] = |
---|
1482 | getUndetermined(DNA_DATA); |
---|
1483 | |
---|
1484 | /* BINARY DATA */ |
---|
1485 | |
---|
1486 | meaningBINARY['0'] = 1; |
---|
1487 | meaningBINARY['1'] = 2; |
---|
1488 | |
---|
1489 | meaningBINARY['-'] = |
---|
1490 | meaningBINARY['?'] = |
---|
1491 | getUndetermined(BINARY_DATA); |
---|
1492 | |
---|
1493 | |
---|
1494 | /*******************************************************************/ |
---|
1495 | |
---|
1496 | { |
---|
1497 | char |
---|
1498 | *line = NULL; |
---|
1499 | |
---|
1500 | size_t |
---|
1501 | len = 0; |
---|
1502 | |
---|
1503 | ssize_t |
---|
1504 | read; |
---|
1505 | |
---|
1506 | int |
---|
1507 | sequenceLength = 0, |
---|
1508 | sequences = 0, |
---|
1509 | taxa = 0, |
---|
1510 | sites = 0; |
---|
1511 | |
---|
1512 | |
---|
1513 | while((read = rax_getline(&line, &len, INFILE)) != -1) |
---|
1514 | { |
---|
1515 | ssize_t |
---|
1516 | i = 0; |
---|
1517 | |
---|
1518 | while((i < read - 1) && (line[i] == ' ' || line[i] == '\t')) |
---|
1519 | i++; |
---|
1520 | |
---|
1521 | if(line[i] == '>') |
---|
1522 | { |
---|
1523 | int |
---|
1524 | nameCount = 0, |
---|
1525 | nameLength; |
---|
1526 | |
---|
1527 | |
---|
1528 | |
---|
1529 | if(taxa == 1) |
---|
1530 | sequenceLength = sites; |
---|
1531 | |
---|
1532 | if(taxa > 0) |
---|
1533 | { |
---|
1534 | assert(sites > 0); |
---|
1535 | sequences++; |
---|
1536 | } |
---|
1537 | |
---|
1538 | if(taxa > 0) |
---|
1539 | assert(sequenceLength == sites); |
---|
1540 | |
---|
1541 | taxa++; |
---|
1542 | |
---|
1543 | i++; |
---|
1544 | |
---|
1545 | while((i < read - 1) && (line[i] == ' ' || line[i] == '\t')) |
---|
1546 | i++; |
---|
1547 | |
---|
1548 | while((i < read - 1) && !(line[i] == ' ' || line[i] == '\t')) |
---|
1549 | { |
---|
1550 | buffer[nameCount] = line[i]; |
---|
1551 | nameCount++; |
---|
1552 | i++; |
---|
1553 | } |
---|
1554 | |
---|
1555 | if(nameCount >= nmlngth) |
---|
1556 | { |
---|
1557 | if(processID == 0) |
---|
1558 | { |
---|
1559 | printf("Taxon Name too long at taxon %d, adapt constant nmlngth in\n", taxa); |
---|
1560 | printf("axml.h, current setting %d\n", nmlngth); |
---|
1561 | } |
---|
1562 | errorExit(-1); |
---|
1563 | } |
---|
1564 | |
---|
1565 | buffer[nameCount] = '\0'; |
---|
1566 | nameLength = strlen(buffer) + 1; |
---|
1567 | checkTaxonName(buffer, nameLength); |
---|
1568 | tr->nameList[taxa] = (char *)rax_malloc(sizeof(char) * nameLength); |
---|
1569 | strcpy(tr->nameList[taxa], buffer); |
---|
1570 | |
---|
1571 | sites = 0; |
---|
1572 | } |
---|
1573 | else |
---|
1574 | { |
---|
1575 | while(i < read - 1) |
---|
1576 | { |
---|
1577 | if(!(line[i] == ' ' || line[i] == '\t')) |
---|
1578 | { |
---|
1579 | int |
---|
1580 | ch = line[i]; |
---|
1581 | |
---|
1582 | uppercase(&ch); |
---|
1583 | |
---|
1584 | assert(tr->dataVector[sites + 1] != -1); |
---|
1585 | |
---|
1586 | switch(tr->dataVector[sites + 1]) |
---|
1587 | { |
---|
1588 | case BINARY_DATA: |
---|
1589 | meaning = meaningBINARY[ch]; |
---|
1590 | break; |
---|
1591 | case DNA_DATA: |
---|
1592 | case SECONDARY_DATA: |
---|
1593 | case SECONDARY_DATA_6: |
---|
1594 | case SECONDARY_DATA_7: |
---|
1595 | meaning = meaningDNA[ch]; |
---|
1596 | break; |
---|
1597 | case AA_DATA: |
---|
1598 | meaning = meaningAA[ch]; |
---|
1599 | break; |
---|
1600 | case GENERIC_32: |
---|
1601 | meaning = meaningGeneric32[ch]; |
---|
1602 | break; |
---|
1603 | case GENERIC_64: |
---|
1604 | meaning = meaningGeneric64[ch]; |
---|
1605 | break; |
---|
1606 | default: |
---|
1607 | assert(0); |
---|
1608 | } |
---|
1609 | |
---|
1610 | if (meaning != -1) |
---|
1611 | rdta->y[taxa][sites + 1] = ch; |
---|
1612 | else |
---|
1613 | { |
---|
1614 | if(processID == 0) |
---|
1615 | { |
---|
1616 | printf("ERROR: Bad base (%c) at site %d of sequence %d\n", |
---|
1617 | ch, sites + 1, taxa); |
---|
1618 | } |
---|
1619 | errorExit(-1); |
---|
1620 | } |
---|
1621 | |
---|
1622 | sites++; |
---|
1623 | } |
---|
1624 | i++; |
---|
1625 | } |
---|
1626 | } |
---|
1627 | } |
---|
1628 | |
---|
1629 | if(sites > 0) |
---|
1630 | sequences++; |
---|
1631 | |
---|
1632 | /* the assertions below should never fail, the have already been checked in getNums */ |
---|
1633 | |
---|
1634 | assert(taxa == sequences); |
---|
1635 | assert(sites == sequenceLength); |
---|
1636 | |
---|
1637 | if(line) |
---|
1638 | rax_free(line); |
---|
1639 | } |
---|
1640 | |
---|
1641 | |
---|
1642 | { |
---|
1643 | int |
---|
1644 | i, |
---|
1645 | j; |
---|
1646 | |
---|
1647 | for(j = 1; j <= tr->mxtips; j++) |
---|
1648 | for(i = 1; i <= rdta->sites; i++) |
---|
1649 | { |
---|
1650 | assert(tr->dataVector[i] != -1); |
---|
1651 | |
---|
1652 | switch(tr->dataVector[i]) |
---|
1653 | { |
---|
1654 | case BINARY_DATA: |
---|
1655 | meaning = meaningBINARY[rdta->y[j][i]]; |
---|
1656 | if(meaning == getUndetermined(BINARY_DATA)) |
---|
1657 | gaps++; |
---|
1658 | break; |
---|
1659 | |
---|
1660 | case SECONDARY_DATA: |
---|
1661 | case SECONDARY_DATA_6: |
---|
1662 | case SECONDARY_DATA_7: |
---|
1663 | assert(tr->secondaryStructurePairs[i - 1] != -1); |
---|
1664 | assert(i - 1 == tr->secondaryStructurePairs[tr->secondaryStructurePairs[i - 1]]); |
---|
1665 | /* |
---|
1666 | don't worry too much about undetermined column count here for sec-struct, just count |
---|
1667 | DNA/RNA gaps here and worry about the rest later-on, falling through to DNA again :-) |
---|
1668 | */ |
---|
1669 | case DNA_DATA: |
---|
1670 | meaning = meaningDNA[rdta->y[j][i]]; |
---|
1671 | if(meaning == getUndetermined(DNA_DATA)) |
---|
1672 | gaps++; |
---|
1673 | break; |
---|
1674 | |
---|
1675 | case AA_DATA: |
---|
1676 | meaning = meaningAA[rdta->y[j][i]]; |
---|
1677 | if(meaning == getUndetermined(AA_DATA)) |
---|
1678 | gaps++; |
---|
1679 | break; |
---|
1680 | |
---|
1681 | case GENERIC_32: |
---|
1682 | meaning = meaningGeneric32[rdta->y[j][i]]; |
---|
1683 | if(meaning == getUndetermined(GENERIC_32)) |
---|
1684 | gaps++; |
---|
1685 | break; |
---|
1686 | |
---|
1687 | case GENERIC_64: |
---|
1688 | meaning = meaningGeneric64[rdta->y[j][i]]; |
---|
1689 | if(meaning == getUndetermined(GENERIC_64)) |
---|
1690 | gaps++; |
---|
1691 | break; |
---|
1692 | default: |
---|
1693 | assert(0); |
---|
1694 | } |
---|
1695 | |
---|
1696 | total++; |
---|
1697 | rdta->y[j][i] = meaning; |
---|
1698 | } |
---|
1699 | } |
---|
1700 | |
---|
1701 | adef->gapyness = (double)gaps / (double)total; |
---|
1702 | |
---|
1703 | return; |
---|
1704 | } |
---|
1705 | |
---|
1706 | |
---|
1707 | |
---|
1708 | static void inputweights (rawdata *rdta) |
---|
1709 | { |
---|
1710 | int i, w, fres; |
---|
1711 | FILE *weightFile; |
---|
1712 | int *wv = (int *)rax_malloc(sizeof(int) * rdta->sites); |
---|
1713 | |
---|
1714 | weightFile = myfopen(weightFileName, "rb"); |
---|
1715 | |
---|
1716 | i = 0; |
---|
1717 | |
---|
1718 | while((fres = fscanf(weightFile,"%d", &w)) != EOF) |
---|
1719 | { |
---|
1720 | if(!fres) |
---|
1721 | { |
---|
1722 | if(processID == 0) |
---|
1723 | printf("error reading weight file probably encountered a non-integer weight value\n"); |
---|
1724 | errorExit(-1); |
---|
1725 | } |
---|
1726 | wv[i] = w; |
---|
1727 | i++; |
---|
1728 | } |
---|
1729 | |
---|
1730 | if(i != rdta->sites) |
---|
1731 | { |
---|
1732 | if(processID == 0) |
---|
1733 | printf("number %d of weights not equal to number %d of alignment columns\n", i, rdta->sites); |
---|
1734 | errorExit(-1); |
---|
1735 | } |
---|
1736 | |
---|
1737 | for(i = 1; i <= rdta->sites; i++) |
---|
1738 | rdta->wgt[i] = wv[i - 1]; |
---|
1739 | |
---|
1740 | fclose(weightFile); |
---|
1741 | rax_free(wv); |
---|
1742 | } |
---|
1743 | |
---|
1744 | |
---|
1745 | |
---|
1746 | static void getinput(analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
1747 | { |
---|
1748 | int i; |
---|
1749 | |
---|
1750 | if(!adef->readTaxaOnly) |
---|
1751 | { |
---|
1752 | INFILE = myfopen(seq_file, "rb"); |
---|
1753 | |
---|
1754 | getnums(rdta, adef); |
---|
1755 | } |
---|
1756 | |
---|
1757 | tr->mxtips = rdta->numsp; |
---|
1758 | |
---|
1759 | if(!adef->readTaxaOnly) |
---|
1760 | { |
---|
1761 | rdta->wgt = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1762 | cdta->alias = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1763 | cdta->aliaswgt = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1764 | cdta->rateCategory = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1765 | tr->model = (int *) rax_calloc((rdta->sites + 1), sizeof(int)); |
---|
1766 | tr->initialDataVector = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1767 | tr->extendedDataVector = (int *) rax_malloc((rdta->sites + 1) * sizeof(int)); |
---|
1768 | cdta->patrat = (double *) rax_malloc((rdta->sites + 1) * sizeof(double)); |
---|
1769 | cdta->patratStored = (double *) rax_malloc((rdta->sites + 1) * sizeof(double)); |
---|
1770 | |
---|
1771 | |
---|
1772 | |
---|
1773 | if(!adef->useWeightFile) |
---|
1774 | { |
---|
1775 | for (i = 1; i <= rdta->sites; i++) |
---|
1776 | rdta->wgt[i] = 1; |
---|
1777 | } |
---|
1778 | else |
---|
1779 | { |
---|
1780 | assert(!adef->useSecondaryStructure); |
---|
1781 | inputweights(rdta); |
---|
1782 | } |
---|
1783 | } |
---|
1784 | |
---|
1785 | tr->multiBranch = 0; |
---|
1786 | tr->numBranches = 1; |
---|
1787 | |
---|
1788 | if(!adef->readTaxaOnly) |
---|
1789 | { |
---|
1790 | if(adef->useMultipleModel) |
---|
1791 | { |
---|
1792 | int ref; |
---|
1793 | |
---|
1794 | parsePartitions(adef, rdta, tr); |
---|
1795 | |
---|
1796 | for(i = 1; i <= rdta->sites; i++) |
---|
1797 | { |
---|
1798 | ref = tr->model[i]; |
---|
1799 | tr->initialDataVector[i] = tr->initialPartitionData[ref].dataType; |
---|
1800 | } |
---|
1801 | } |
---|
1802 | else |
---|
1803 | { |
---|
1804 | int |
---|
1805 | dataType = -1; |
---|
1806 | |
---|
1807 | tr->initialPartitionData = (pInfo*)rax_malloc(sizeof(pInfo)); |
---|
1808 | tr->initialPartitionData[0].partitionName = (char*)rax_malloc(128 * sizeof(char)); |
---|
1809 | strcpy(tr->initialPartitionData[0].partitionName, "No Name Provided"); |
---|
1810 | |
---|
1811 | tr->initialPartitionData[0].protModels = adef->proteinMatrix; |
---|
1812 | if(adef->protEmpiricalFreqs) |
---|
1813 | tr->initialPartitionData[0].usePredefinedProtFreqs = FALSE; |
---|
1814 | else |
---|
1815 | tr->initialPartitionData[0].usePredefinedProtFreqs = TRUE; |
---|
1816 | |
---|
1817 | |
---|
1818 | |
---|
1819 | tr->NumberOfModels = 1; |
---|
1820 | |
---|
1821 | if(adef->model == M_PROTCAT || adef->model == M_PROTGAMMA) |
---|
1822 | dataType = AA_DATA; |
---|
1823 | if(adef->model == M_GTRCAT || adef->model == M_GTRGAMMA) |
---|
1824 | dataType = DNA_DATA; |
---|
1825 | if(adef->model == M_BINCAT || adef->model == M_BINGAMMA) |
---|
1826 | dataType = BINARY_DATA; |
---|
1827 | if(adef->model == M_32CAT || adef->model == M_32GAMMA) |
---|
1828 | dataType = GENERIC_32; |
---|
1829 | if(adef->model == M_64CAT || adef->model == M_64GAMMA) |
---|
1830 | dataType = GENERIC_64; |
---|
1831 | |
---|
1832 | |
---|
1833 | |
---|
1834 | assert(dataType == BINARY_DATA || dataType == DNA_DATA || dataType == AA_DATA || |
---|
1835 | dataType == GENERIC_32 || dataType == GENERIC_64); |
---|
1836 | |
---|
1837 | tr->initialPartitionData[0].dataType = dataType; |
---|
1838 | |
---|
1839 | if(dataType == AA_DATA && adef->userProteinModel) |
---|
1840 | { |
---|
1841 | tr->initialPartitionData[0].protModels = PROT_FILE; |
---|
1842 | tr->initialPartitionData[0].usePredefinedProtFreqs = TRUE; |
---|
1843 | strcpy(tr->initialPartitionData[0].proteinSubstitutionFileName, proteinModelFileName); |
---|
1844 | } |
---|
1845 | |
---|
1846 | for(i = 0; i <= rdta->sites; i++) |
---|
1847 | { |
---|
1848 | tr->initialDataVector[i] = dataType; |
---|
1849 | tr->model[i] = 0; |
---|
1850 | } |
---|
1851 | } |
---|
1852 | |
---|
1853 | if(adef->useSecondaryStructure) |
---|
1854 | { |
---|
1855 | memcpy(tr->extendedDataVector, tr->initialDataVector, (rdta->sites + 1) * sizeof(int)); |
---|
1856 | |
---|
1857 | tr->extendedPartitionData =(pInfo*)rax_malloc(sizeof(pInfo) * tr->NumberOfModels); |
---|
1858 | |
---|
1859 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
1860 | { |
---|
1861 | tr->extendedPartitionData[i].partitionName = (char*)rax_malloc((strlen(tr->initialPartitionData[i].partitionName) + 1) * sizeof(char)); |
---|
1862 | strcpy(tr->extendedPartitionData[i].partitionName, tr->initialPartitionData[i].partitionName); |
---|
1863 | strcpy(tr->extendedPartitionData[i].proteinSubstitutionFileName, tr->initialPartitionData[i].proteinSubstitutionFileName); |
---|
1864 | tr->extendedPartitionData[i].dataType = tr->initialPartitionData[i].dataType; |
---|
1865 | tr->extendedPartitionData[i].protModels = tr->initialPartitionData[i].protModels; |
---|
1866 | tr->extendedPartitionData[i].usePredefinedProtFreqs = tr->initialPartitionData[i].usePredefinedProtFreqs; |
---|
1867 | } |
---|
1868 | |
---|
1869 | parseSecondaryStructure(tr, adef, rdta->sites); |
---|
1870 | |
---|
1871 | tr->dataVector = tr->extendedDataVector; |
---|
1872 | tr->partitionData = tr->extendedPartitionData; |
---|
1873 | } |
---|
1874 | else |
---|
1875 | { |
---|
1876 | tr->dataVector = tr->initialDataVector; |
---|
1877 | tr->partitionData = tr->initialPartitionData; |
---|
1878 | } |
---|
1879 | |
---|
1880 | |
---|
1881 | |
---|
1882 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
1883 | if(tr->partitionData[i].dataType == AA_DATA && tr->partitionData[i].protModels == PROT_FILE) |
---|
1884 | parseProteinModel(tr->partitionData[i].externalAAMatrix, tr->partitionData[i].proteinSubstitutionFileName); |
---|
1885 | |
---|
1886 | |
---|
1887 | |
---|
1888 | tr->executeModel = (boolean *)rax_malloc(sizeof(boolean) * tr->NumberOfModels); |
---|
1889 | |
---|
1890 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
1891 | tr->executeModel[i] = TRUE; |
---|
1892 | |
---|
1893 | getyspace(rdta); |
---|
1894 | } |
---|
1895 | |
---|
1896 | setupTree(tr, adef); |
---|
1897 | |
---|
1898 | |
---|
1899 | if(!adef->readTaxaOnly) |
---|
1900 | { |
---|
1901 | switch(adef->alignmentFileType) |
---|
1902 | { |
---|
1903 | case PHYLIP: |
---|
1904 | if(!getdata(adef, rdta, tr)) |
---|
1905 | { |
---|
1906 | printf("Problem reading alignment file \n"); |
---|
1907 | errorExit(1); |
---|
1908 | } |
---|
1909 | break; |
---|
1910 | case FASTA: |
---|
1911 | parseFasta(adef, rdta, tr); |
---|
1912 | break; |
---|
1913 | default: |
---|
1914 | assert(0); |
---|
1915 | } |
---|
1916 | |
---|
1917 | tr->nameHash = initStringHashTable(10 * tr->mxtips); |
---|
1918 | for(i = 1; i <= tr->mxtips; i++) |
---|
1919 | addword(tr->nameList[i], tr->nameHash, i); |
---|
1920 | |
---|
1921 | fclose(INFILE); |
---|
1922 | } |
---|
1923 | } |
---|
1924 | |
---|
1925 | |
---|
1926 | |
---|
1927 | static unsigned char buildStates(int secModel, unsigned char v1, unsigned char v2) |
---|
1928 | { |
---|
1929 | unsigned char new = 0; |
---|
1930 | |
---|
1931 | switch(secModel) |
---|
1932 | { |
---|
1933 | case SECONDARY_DATA: |
---|
1934 | new = v1; |
---|
1935 | new = new << 4; |
---|
1936 | new = new | v2; |
---|
1937 | break; |
---|
1938 | case SECONDARY_DATA_6: |
---|
1939 | { |
---|
1940 | int |
---|
1941 | meaningDNA[256], |
---|
1942 | i; |
---|
1943 | |
---|
1944 | const unsigned char |
---|
1945 | allowedStates[6][2] = {{'A','T'}, {'C', 'G'}, {'G', 'C'}, {'G','T'}, {'T', 'A'}, {'T', 'G'}}; |
---|
1946 | |
---|
1947 | const unsigned char |
---|
1948 | finalBinaryStates[6] = {1, 2, 4, 8, 16, 32}; |
---|
1949 | |
---|
1950 | unsigned char |
---|
1951 | intermediateBinaryStates[6]; |
---|
1952 | |
---|
1953 | int length = 6; |
---|
1954 | |
---|
1955 | for(i = 0; i < 256; i++) |
---|
1956 | meaningDNA[i] = -1; |
---|
1957 | |
---|
1958 | meaningDNA['A'] = 1; |
---|
1959 | meaningDNA['B'] = 14; |
---|
1960 | meaningDNA['C'] = 2; |
---|
1961 | meaningDNA['D'] = 13; |
---|
1962 | meaningDNA['G'] = 4; |
---|
1963 | meaningDNA['H'] = 11; |
---|
1964 | meaningDNA['K'] = 12; |
---|
1965 | meaningDNA['M'] = 3; |
---|
1966 | meaningDNA['N'] = 15; |
---|
1967 | meaningDNA['O'] = 15; |
---|
1968 | meaningDNA['R'] = 5; |
---|
1969 | meaningDNA['S'] = 6; |
---|
1970 | meaningDNA['T'] = 8; |
---|
1971 | meaningDNA['U'] = 8; |
---|
1972 | meaningDNA['V'] = 7; |
---|
1973 | meaningDNA['W'] = 9; |
---|
1974 | meaningDNA['X'] = 15; |
---|
1975 | meaningDNA['Y'] = 10; |
---|
1976 | meaningDNA['-'] = 15; |
---|
1977 | meaningDNA['?'] = 15; |
---|
1978 | |
---|
1979 | for(i = 0; i < length; i++) |
---|
1980 | { |
---|
1981 | unsigned char n1 = meaningDNA[allowedStates[i][0]]; |
---|
1982 | unsigned char n2 = meaningDNA[allowedStates[i][1]]; |
---|
1983 | |
---|
1984 | new = n1; |
---|
1985 | new = new << 4; |
---|
1986 | new = new | n2; |
---|
1987 | |
---|
1988 | intermediateBinaryStates[i] = new; |
---|
1989 | } |
---|
1990 | |
---|
1991 | new = v1; |
---|
1992 | new = new << 4; |
---|
1993 | new = new | v2; |
---|
1994 | |
---|
1995 | for(i = 0; i < length; i++) |
---|
1996 | { |
---|
1997 | if(new == intermediateBinaryStates[i]) |
---|
1998 | break; |
---|
1999 | } |
---|
2000 | if(i < length) |
---|
2001 | new = finalBinaryStates[i]; |
---|
2002 | else |
---|
2003 | { |
---|
2004 | new = 0; |
---|
2005 | for(i = 0; i < length; i++) |
---|
2006 | { |
---|
2007 | if(v1 & meaningDNA[allowedStates[i][0]]) |
---|
2008 | { |
---|
2009 | /*printf("Adding %c%c\n", allowedStates[i][0], allowedStates[i][1]);*/ |
---|
2010 | new |= finalBinaryStates[i]; |
---|
2011 | } |
---|
2012 | if(v2 & meaningDNA[allowedStates[i][1]]) |
---|
2013 | { |
---|
2014 | /*printf("Adding %c%c\n", allowedStates[i][0], allowedStates[i][1]);*/ |
---|
2015 | new |= finalBinaryStates[i]; |
---|
2016 | } |
---|
2017 | } |
---|
2018 | } |
---|
2019 | } |
---|
2020 | break; |
---|
2021 | case SECONDARY_DATA_7: |
---|
2022 | { |
---|
2023 | int |
---|
2024 | meaningDNA[256], |
---|
2025 | i; |
---|
2026 | |
---|
2027 | const unsigned char |
---|
2028 | allowedStates[6][2] = {{'A','T'}, {'C', 'G'}, {'G', 'C'}, {'G','T'}, {'T', 'A'}, {'T', 'G'}}; |
---|
2029 | |
---|
2030 | const unsigned char |
---|
2031 | finalBinaryStates[7] = {1, 2, 4, 8, 16, 32, 64}; |
---|
2032 | |
---|
2033 | unsigned char |
---|
2034 | intermediateBinaryStates[7]; |
---|
2035 | |
---|
2036 | for(i = 0; i < 256; i++) |
---|
2037 | meaningDNA[i] = -1; |
---|
2038 | |
---|
2039 | meaningDNA['A'] = 1; |
---|
2040 | meaningDNA['B'] = 14; |
---|
2041 | meaningDNA['C'] = 2; |
---|
2042 | meaningDNA['D'] = 13; |
---|
2043 | meaningDNA['G'] = 4; |
---|
2044 | meaningDNA['H'] = 11; |
---|
2045 | meaningDNA['K'] = 12; |
---|
2046 | meaningDNA['M'] = 3; |
---|
2047 | meaningDNA['N'] = 15; |
---|
2048 | meaningDNA['O'] = 15; |
---|
2049 | meaningDNA['R'] = 5; |
---|
2050 | meaningDNA['S'] = 6; |
---|
2051 | meaningDNA['T'] = 8; |
---|
2052 | meaningDNA['U'] = 8; |
---|
2053 | meaningDNA['V'] = 7; |
---|
2054 | meaningDNA['W'] = 9; |
---|
2055 | meaningDNA['X'] = 15; |
---|
2056 | meaningDNA['Y'] = 10; |
---|
2057 | meaningDNA['-'] = 15; |
---|
2058 | meaningDNA['?'] = 15; |
---|
2059 | |
---|
2060 | |
---|
2061 | for(i = 0; i < 6; i++) |
---|
2062 | { |
---|
2063 | unsigned char n1 = meaningDNA[allowedStates[i][0]]; |
---|
2064 | unsigned char n2 = meaningDNA[allowedStates[i][1]]; |
---|
2065 | |
---|
2066 | new = n1; |
---|
2067 | new = new << 4; |
---|
2068 | new = new | n2; |
---|
2069 | |
---|
2070 | intermediateBinaryStates[i] = new; |
---|
2071 | } |
---|
2072 | |
---|
2073 | new = v1; |
---|
2074 | new = new << 4; |
---|
2075 | new = new | v2; |
---|
2076 | |
---|
2077 | for(i = 0; i < 6; i++) |
---|
2078 | { |
---|
2079 | /* exact match */ |
---|
2080 | if(new == intermediateBinaryStates[i]) |
---|
2081 | break; |
---|
2082 | } |
---|
2083 | if(i < 6) |
---|
2084 | new = finalBinaryStates[i]; |
---|
2085 | else |
---|
2086 | { |
---|
2087 | /* distinguish between exact mismatches and partial mismatches */ |
---|
2088 | |
---|
2089 | for(i = 0; i < 6; i++) |
---|
2090 | if((v1 & meaningDNA[allowedStates[i][0]]) && (v2 & meaningDNA[allowedStates[i][1]])) |
---|
2091 | break; |
---|
2092 | if(i < 6) |
---|
2093 | { |
---|
2094 | /* printf("partial mismatch\n"); */ |
---|
2095 | |
---|
2096 | new = 0; |
---|
2097 | for(i = 0; i < 6; i++) |
---|
2098 | { |
---|
2099 | if((v1 & meaningDNA[allowedStates[i][0]]) && (v2 & meaningDNA[allowedStates[i][1]])) |
---|
2100 | { |
---|
2101 | /*printf("Adding %c%c\n", allowedStates[i][0], allowedStates[i][1]);*/ |
---|
2102 | new |= finalBinaryStates[i]; |
---|
2103 | } |
---|
2104 | else |
---|
2105 | new |= finalBinaryStates[6]; |
---|
2106 | } |
---|
2107 | } |
---|
2108 | else |
---|
2109 | new = finalBinaryStates[6]; |
---|
2110 | } |
---|
2111 | } |
---|
2112 | break; |
---|
2113 | default: |
---|
2114 | assert(0); |
---|
2115 | } |
---|
2116 | |
---|
2117 | return new; |
---|
2118 | |
---|
2119 | } |
---|
2120 | |
---|
2121 | |
---|
2122 | |
---|
2123 | static void adaptRdataToSecondary(tree *tr, rawdata *rdta) |
---|
2124 | { |
---|
2125 | int *alias = (int*)rax_calloc(rdta->sites, sizeof(int)); |
---|
2126 | int i, j, realPosition; |
---|
2127 | |
---|
2128 | for(i = 0; i < rdta->sites; i++) |
---|
2129 | alias[i] = -1; |
---|
2130 | |
---|
2131 | for(i = 0, realPosition = 0; i < rdta->sites; i++) |
---|
2132 | { |
---|
2133 | int partner = tr->secondaryStructurePairs[i]; |
---|
2134 | if(partner != -1) |
---|
2135 | { |
---|
2136 | assert(tr->dataVector[i+1] == SECONDARY_DATA || tr->dataVector[i+1] == SECONDARY_DATA_6 || tr->dataVector[i+1] == SECONDARY_DATA_7); |
---|
2137 | |
---|
2138 | if(i < partner) |
---|
2139 | { |
---|
2140 | for(j = 1; j <= rdta->numsp; j++) |
---|
2141 | { |
---|
2142 | unsigned char v1 = rdta->y[j][i+1]; |
---|
2143 | unsigned char v2 = rdta->y[j][partner+1]; |
---|
2144 | |
---|
2145 | assert(i+1 < partner+1); |
---|
2146 | |
---|
2147 | rdta->y[j][i+1] = buildStates(tr->dataVector[i+1], v1, v2); |
---|
2148 | } |
---|
2149 | alias[realPosition] = i; |
---|
2150 | realPosition++; |
---|
2151 | } |
---|
2152 | } |
---|
2153 | else |
---|
2154 | { |
---|
2155 | alias[realPosition] = i; |
---|
2156 | realPosition++; |
---|
2157 | } |
---|
2158 | } |
---|
2159 | |
---|
2160 | assert(rdta->sites - realPosition == tr->numberOfSecondaryColumns / 2); |
---|
2161 | |
---|
2162 | rdta->sites = realPosition; |
---|
2163 | |
---|
2164 | for(i = 0; i < rdta->sites; i++) |
---|
2165 | { |
---|
2166 | assert(alias[i] != -1); |
---|
2167 | tr->model[i+1] = tr->model[alias[i]+1]; |
---|
2168 | tr->dataVector[i+1] = tr->dataVector[alias[i]+1]; |
---|
2169 | rdta->wgt[i+1] = rdta->wgt[alias[i]+1]; |
---|
2170 | |
---|
2171 | for(j = 1; j <= rdta->numsp; j++) |
---|
2172 | rdta->y[j][i+1] = rdta->y[j][alias[i]+1]; |
---|
2173 | } |
---|
2174 | |
---|
2175 | rax_free(alias); |
---|
2176 | } |
---|
2177 | |
---|
2178 | static void sitesort(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef) |
---|
2179 | { |
---|
2180 | int gap, i, j, jj, jg, k, n, nsp; |
---|
2181 | int |
---|
2182 | *index, |
---|
2183 | *category = (int*)NULL; |
---|
2184 | |
---|
2185 | boolean flip, tied; |
---|
2186 | unsigned char **data; |
---|
2187 | |
---|
2188 | if(adef->useSecondaryStructure) |
---|
2189 | { |
---|
2190 | assert(tr->NumberOfModels > 1 && adef->useMultipleModel); |
---|
2191 | |
---|
2192 | adaptRdataToSecondary(tr, rdta); |
---|
2193 | } |
---|
2194 | |
---|
2195 | if(adef->useMultipleModel) |
---|
2196 | category = tr->model; |
---|
2197 | |
---|
2198 | |
---|
2199 | index = cdta->alias; |
---|
2200 | data = rdta->y; |
---|
2201 | n = rdta->sites; |
---|
2202 | nsp = rdta->numsp; |
---|
2203 | index[0] = -1; |
---|
2204 | |
---|
2205 | |
---|
2206 | if(adef->compressPatterns) |
---|
2207 | { |
---|
2208 | for (gap = n / 2; gap > 0; gap /= 2) |
---|
2209 | { |
---|
2210 | for (i = gap + 1; i <= n; i++) |
---|
2211 | { |
---|
2212 | j = i - gap; |
---|
2213 | |
---|
2214 | do |
---|
2215 | { |
---|
2216 | jj = index[j]; |
---|
2217 | jg = index[j+gap]; |
---|
2218 | if(adef->useMultipleModel) |
---|
2219 | { |
---|
2220 | assert(category[jj] != -1 && |
---|
2221 | category[jg] != -1); |
---|
2222 | |
---|
2223 | flip = (category[jj] > category[jg]); |
---|
2224 | tied = (category[jj] == category[jg]); |
---|
2225 | } |
---|
2226 | else |
---|
2227 | { |
---|
2228 | flip = 0; |
---|
2229 | tied = 1; |
---|
2230 | } |
---|
2231 | |
---|
2232 | for (k = 1; (k <= nsp) && tied; k++) |
---|
2233 | { |
---|
2234 | flip = (data[k][jj] > data[k][jg]); |
---|
2235 | tied = (data[k][jj] == data[k][jg]); |
---|
2236 | } |
---|
2237 | |
---|
2238 | if (flip) |
---|
2239 | { |
---|
2240 | index[j] = jg; |
---|
2241 | index[j+gap] = jj; |
---|
2242 | j -= gap; |
---|
2243 | } |
---|
2244 | } |
---|
2245 | while (flip && (j > 0)); |
---|
2246 | } |
---|
2247 | } |
---|
2248 | } |
---|
2249 | } |
---|
2250 | |
---|
2251 | |
---|
2252 | static void sitecombcrunch (rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef) |
---|
2253 | { |
---|
2254 | boolean |
---|
2255 | tied; |
---|
2256 | |
---|
2257 | int |
---|
2258 | i, |
---|
2259 | sitei, |
---|
2260 | j, |
---|
2261 | sitej, |
---|
2262 | k, |
---|
2263 | *aliasModel = (int*)NULL, |
---|
2264 | *aliasSuperModel = (int*)NULL; |
---|
2265 | |
---|
2266 | tr->origNumSitePerModel = (int*)rax_calloc(tr->NumberOfModels, sizeof(int)); |
---|
2267 | |
---|
2268 | for(i = 1; i <= rdta->sites; i++) |
---|
2269 | tr->origNumSitePerModel[tr->model[i]]++; |
---|
2270 | |
---|
2271 | if(adef->useMultipleModel) |
---|
2272 | { |
---|
2273 | aliasSuperModel = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1)); |
---|
2274 | aliasModel = (int*)rax_malloc(sizeof(int) * (rdta->sites + 1)); |
---|
2275 | } |
---|
2276 | |
---|
2277 | cdta->alias[0] = cdta->alias[1]; |
---|
2278 | cdta->aliaswgt[0] = 0; |
---|
2279 | |
---|
2280 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2281 | { |
---|
2282 | tr->patternPosition = (int*)rax_malloc(sizeof(int) * rdta->sites); |
---|
2283 | tr->columnPosition = (int*)rax_malloc(sizeof(int) * rdta->sites); |
---|
2284 | |
---|
2285 | for(i = 0; i < rdta->sites; i++) |
---|
2286 | { |
---|
2287 | tr->patternPosition[i] = -1; |
---|
2288 | tr->columnPosition[i] = -1; |
---|
2289 | } |
---|
2290 | } |
---|
2291 | |
---|
2292 | i = 0; |
---|
2293 | for (j = 1; j <= rdta->sites; j++) |
---|
2294 | { |
---|
2295 | sitei = cdta->alias[i]; |
---|
2296 | sitej = cdta->alias[j]; |
---|
2297 | if(!adef->compressPatterns) |
---|
2298 | tied = 0; |
---|
2299 | else |
---|
2300 | { |
---|
2301 | if(adef->useMultipleModel) |
---|
2302 | { |
---|
2303 | tied = (tr->model[sitei] == tr->model[sitej]); |
---|
2304 | if(tied) |
---|
2305 | assert(tr->dataVector[sitei] == tr->dataVector[sitej]); |
---|
2306 | } |
---|
2307 | else |
---|
2308 | tied = 1; |
---|
2309 | } |
---|
2310 | |
---|
2311 | for (k = 1; tied && (k <= rdta->numsp); k++) |
---|
2312 | tied = (rdta->y[k][sitei] == rdta->y[k][sitej]); |
---|
2313 | |
---|
2314 | if (tied) |
---|
2315 | { |
---|
2316 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2317 | { |
---|
2318 | tr->patternPosition[j - 1] = i; |
---|
2319 | tr->columnPosition[j - 1] = sitej; |
---|
2320 | /* printf("Pattern %d from column %d also at site %d\n", i, sitei, sitej); */ |
---|
2321 | } |
---|
2322 | |
---|
2323 | |
---|
2324 | cdta->aliaswgt[i] += rdta->wgt[sitej]; |
---|
2325 | |
---|
2326 | if(adef->useMultipleModel) |
---|
2327 | { |
---|
2328 | aliasModel[i] = tr->model[sitej]; |
---|
2329 | aliasSuperModel[i] = tr->dataVector[sitej]; |
---|
2330 | } |
---|
2331 | } |
---|
2332 | else |
---|
2333 | { |
---|
2334 | if (cdta->aliaswgt[i] > 0) i++; |
---|
2335 | |
---|
2336 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2337 | { |
---|
2338 | tr->patternPosition[j - 1] = i; |
---|
2339 | tr->columnPosition[j - 1] = sitej; |
---|
2340 | /*printf("Pattern %d is from cloumn %d\n", i, sitej);*/ |
---|
2341 | } |
---|
2342 | |
---|
2343 | cdta->aliaswgt[i] = rdta->wgt[sitej]; |
---|
2344 | cdta->alias[i] = sitej; |
---|
2345 | if(adef->useMultipleModel) |
---|
2346 | { |
---|
2347 | aliasModel[i] = tr->model[sitej]; |
---|
2348 | aliasSuperModel[i] = tr->dataVector[sitej]; |
---|
2349 | } |
---|
2350 | } |
---|
2351 | } |
---|
2352 | |
---|
2353 | cdta->endsite = i; |
---|
2354 | if (cdta->aliaswgt[i] > 0) cdta->endsite++; |
---|
2355 | |
---|
2356 | if(adef->mode == PER_SITE_LL || adef->mode == ANCESTRAL_STATES) |
---|
2357 | { |
---|
2358 | for(i = 0; i < rdta->sites; i++) |
---|
2359 | { |
---|
2360 | int p = tr->patternPosition[i]; |
---|
2361 | int c = tr->columnPosition[i]; |
---|
2362 | |
---|
2363 | assert(p >= 0 && p < cdta->endsite); |
---|
2364 | assert(c >= 1 && c <= rdta->sites); |
---|
2365 | } |
---|
2366 | } |
---|
2367 | |
---|
2368 | |
---|
2369 | if(adef->useMultipleModel) |
---|
2370 | { |
---|
2371 | for(i = 0; i <= rdta->sites; i++) |
---|
2372 | { |
---|
2373 | tr->model[i] = aliasModel[i]; |
---|
2374 | tr->dataVector[i] = aliasSuperModel[i]; |
---|
2375 | } |
---|
2376 | } |
---|
2377 | |
---|
2378 | if(adef->useMultipleModel) |
---|
2379 | { |
---|
2380 | rax_free(aliasModel); |
---|
2381 | rax_free(aliasSuperModel); |
---|
2382 | } |
---|
2383 | } |
---|
2384 | |
---|
2385 | |
---|
2386 | static boolean makeweights (analdef *adef, rawdata *rdta, cruncheddata *cdta, tree *tr) |
---|
2387 | { |
---|
2388 | int i; |
---|
2389 | |
---|
2390 | for (i = 1; i <= rdta->sites; i++) |
---|
2391 | cdta->alias[i] = i; |
---|
2392 | |
---|
2393 | sitesort(rdta, cdta, tr, adef); |
---|
2394 | sitecombcrunch(rdta, cdta, tr, adef); |
---|
2395 | |
---|
2396 | return TRUE; |
---|
2397 | } |
---|
2398 | |
---|
2399 | |
---|
2400 | |
---|
2401 | |
---|
2402 | static boolean makevalues(rawdata *rdta, cruncheddata *cdta, tree *tr, analdef *adef) |
---|
2403 | { |
---|
2404 | int i, j, model, fullSites = 0, modelCounter; |
---|
2405 | |
---|
2406 | unsigned char |
---|
2407 | *y = (unsigned char *)rax_malloc(((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)), |
---|
2408 | *yBUF = (unsigned char *)rax_malloc( ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)); |
---|
2409 | |
---|
2410 | for (i = 1; i <= rdta->numsp; i++) |
---|
2411 | for (j = 0; j < cdta->endsite; j++) |
---|
2412 | y[(((size_t)(i - 1)) * ((size_t)cdta->endsite)) + j] = rdta->y[i][cdta->alias[j]]; |
---|
2413 | |
---|
2414 | rax_free(rdta->y0); |
---|
2415 | rax_free(rdta->y); |
---|
2416 | |
---|
2417 | rdta->y0 = y; |
---|
2418 | memcpy(yBUF, y, ((size_t)rdta->numsp) * ((size_t)cdta->endsite) * sizeof(unsigned char)); |
---|
2419 | rdta->yBUF = yBUF; |
---|
2420 | |
---|
2421 | if(!adef->useMultipleModel) |
---|
2422 | tr->NumberOfModels = 1; |
---|
2423 | |
---|
2424 | if(adef->useMultipleModel) |
---|
2425 | { |
---|
2426 | tr->partitionData[0].lower = 0; |
---|
2427 | |
---|
2428 | model = tr->model[0]; |
---|
2429 | modelCounter = 0; |
---|
2430 | |
---|
2431 | i = 1; |
---|
2432 | |
---|
2433 | while(i < cdta->endsite) |
---|
2434 | { |
---|
2435 | if(tr->model[i] != model) |
---|
2436 | { |
---|
2437 | tr->partitionData[modelCounter].upper = i; |
---|
2438 | tr->partitionData[modelCounter + 1].lower = i; |
---|
2439 | |
---|
2440 | model = tr->model[i]; |
---|
2441 | modelCounter++; |
---|
2442 | } |
---|
2443 | i++; |
---|
2444 | } |
---|
2445 | |
---|
2446 | tr->partitionData[tr->NumberOfModels - 1].upper = cdta->endsite; |
---|
2447 | |
---|
2448 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
2449 | tr->partitionData[i].width = tr->partitionData[i].upper - tr->partitionData[i].lower; |
---|
2450 | |
---|
2451 | model = tr->model[0]; |
---|
2452 | modelCounter = 0; |
---|
2453 | tr->model[0] = modelCounter; |
---|
2454 | i = 1; |
---|
2455 | |
---|
2456 | while(i < cdta->endsite) |
---|
2457 | { |
---|
2458 | if(tr->model[i] != model) |
---|
2459 | { |
---|
2460 | model = tr->model[i]; |
---|
2461 | modelCounter++; |
---|
2462 | tr->model[i] = modelCounter; |
---|
2463 | } |
---|
2464 | else |
---|
2465 | tr->model[i] = modelCounter; |
---|
2466 | i++; |
---|
2467 | } |
---|
2468 | } |
---|
2469 | else |
---|
2470 | { |
---|
2471 | tr->partitionData[0].lower = 0; |
---|
2472 | tr->partitionData[0].upper = cdta->endsite; |
---|
2473 | tr->partitionData[0].width = tr->partitionData[0].upper - tr->partitionData[0].lower; |
---|
2474 | } |
---|
2475 | |
---|
2476 | tr->rdta = rdta; |
---|
2477 | tr->cdta = cdta; |
---|
2478 | |
---|
2479 | tr->invariant = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2480 | tr->originalDataVector = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2481 | tr->originalModel = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2482 | tr->originalWeights = (int *)rax_malloc(cdta->endsite * sizeof(int)); |
---|
2483 | |
---|
2484 | memcpy(tr->originalModel, tr->model, cdta->endsite * sizeof(int)); |
---|
2485 | memcpy(tr->originalDataVector, tr->dataVector, cdta->endsite * sizeof(int)); |
---|
2486 | memcpy(tr->originalWeights, tr->cdta->aliaswgt, cdta->endsite * sizeof(int)); |
---|
2487 | |
---|
2488 | |
---|
2489 | tr->originalCrunchedLength = tr->cdta->endsite; |
---|
2490 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
2491 | fullSites += tr->cdta->aliaswgt[i]; |
---|
2492 | |
---|
2493 | tr->fullSites = fullSites; |
---|
2494 | |
---|
2495 | for(i = 0; i < rdta->numsp; i++) |
---|
2496 | tr->yVector[i + 1] = &(rdta->y0[((size_t)tr->originalCrunchedLength) * ((size_t)i)]); |
---|
2497 | |
---|
2498 | return TRUE; |
---|
2499 | } |
---|
2500 | |
---|
2501 | |
---|
2502 | |
---|
2503 | |
---|
2504 | |
---|
2505 | |
---|
2506 | |
---|
2507 | |
---|
2508 | static int sequenceSimilarity(unsigned char *tipJ, unsigned char *tipK, int n) |
---|
2509 | { |
---|
2510 | int i; |
---|
2511 | |
---|
2512 | for(i = 0; i < n; i++) |
---|
2513 | if(*tipJ++ != *tipK++) |
---|
2514 | return 0; |
---|
2515 | |
---|
2516 | return 1; |
---|
2517 | } |
---|
2518 | |
---|
2519 | static void checkSequences(tree *tr, rawdata *rdta, analdef *adef) |
---|
2520 | { |
---|
2521 | int n = tr->mxtips + 1; |
---|
2522 | int i, j; |
---|
2523 | int *omissionList = (int *)rax_calloc(n, sizeof(int)); |
---|
2524 | int *undeterminedList = (int *)rax_calloc((rdta->sites + 1), sizeof(int)); |
---|
2525 | int *modelList = (int *)rax_malloc((rdta->sites + 1)* sizeof(int)); |
---|
2526 | int count = 0; |
---|
2527 | int countNameDuplicates = 0; |
---|
2528 | int countUndeterminedColumns = 0; |
---|
2529 | int countOnlyGaps = 0; |
---|
2530 | int modelCounter = 1; |
---|
2531 | unsigned char *tipI, *tipJ; |
---|
2532 | |
---|
2533 | for(i = 1; i < n; i++) |
---|
2534 | { |
---|
2535 | for(j = i + 1; j < n; j++) |
---|
2536 | if(strcmp(tr->nameList[i], tr->nameList[j]) == 0) |
---|
2537 | { |
---|
2538 | countNameDuplicates++; |
---|
2539 | if(processID == 0) |
---|
2540 | printBothOpen("Sequence names of taxon %d and %d are identical, they are both called %s\n", i, j, tr->nameList[i]); |
---|
2541 | } |
---|
2542 | } |
---|
2543 | |
---|
2544 | if(countNameDuplicates > 0) |
---|
2545 | { |
---|
2546 | if(processID == 0) |
---|
2547 | printBothOpen("ERROR: Found %d taxa that had equal names in the alignment, exiting...\n", countNameDuplicates); |
---|
2548 | errorExit(-1); |
---|
2549 | } |
---|
2550 | |
---|
2551 | if(adef->checkForUndeterminedSequences) |
---|
2552 | { |
---|
2553 | for(i = 1; i < n; i++) |
---|
2554 | { |
---|
2555 | j = 1; |
---|
2556 | |
---|
2557 | while(j <= rdta->sites) |
---|
2558 | { |
---|
2559 | if(rdta->y[i][j] != getUndetermined(tr->dataVector[j])) |
---|
2560 | break; |
---|
2561 | |
---|
2562 | j++; |
---|
2563 | } |
---|
2564 | |
---|
2565 | if(j == (rdta->sites + 1)) |
---|
2566 | { |
---|
2567 | if(processID == 0) |
---|
2568 | printBothOpen("ERROR: Sequence %s consists entirely of undetermined values which will be treated as missing data\n", |
---|
2569 | tr->nameList[i]); |
---|
2570 | |
---|
2571 | countOnlyGaps++; |
---|
2572 | } |
---|
2573 | } |
---|
2574 | |
---|
2575 | if(countOnlyGaps > 0) |
---|
2576 | { |
---|
2577 | if(processID == 0) |
---|
2578 | printBothOpen("ERROR: Found %d sequences that consist entirely of undetermined values, exiting...\n", countOnlyGaps); |
---|
2579 | |
---|
2580 | errorExit(-1); |
---|
2581 | } |
---|
2582 | } |
---|
2583 | |
---|
2584 | for(i = 0; i <= rdta->sites; i++) |
---|
2585 | modelList[i] = -1; |
---|
2586 | |
---|
2587 | for(i = 1; i <= rdta->sites; i++) |
---|
2588 | { |
---|
2589 | j = 1; |
---|
2590 | |
---|
2591 | while(j < n) |
---|
2592 | { |
---|
2593 | if(rdta->y[j][i] != getUndetermined(tr->dataVector[i])) |
---|
2594 | break; |
---|
2595 | |
---|
2596 | |
---|
2597 | j++; |
---|
2598 | } |
---|
2599 | |
---|
2600 | if(j == n) |
---|
2601 | { |
---|
2602 | undeterminedList[i] = 1; |
---|
2603 | |
---|
2604 | if(processID == 0) |
---|
2605 | printBothOpen("IMPORTANT WARNING: Alignment column %d contains only undetermined values which will be treated as missing data\n", i); |
---|
2606 | |
---|
2607 | countUndeterminedColumns++; |
---|
2608 | } |
---|
2609 | else |
---|
2610 | { |
---|
2611 | if(adef->useMultipleModel) |
---|
2612 | { |
---|
2613 | modelList[modelCounter] = tr->model[i]; |
---|
2614 | modelCounter++; |
---|
2615 | } |
---|
2616 | } |
---|
2617 | } |
---|
2618 | |
---|
2619 | |
---|
2620 | for(i = 1; i < n; i++) |
---|
2621 | { |
---|
2622 | if(omissionList[i] == 0) |
---|
2623 | { |
---|
2624 | tipI = &(rdta->y[i][1]); |
---|
2625 | |
---|
2626 | for(j = i + 1; j < n; j++) |
---|
2627 | { |
---|
2628 | if(omissionList[j] == 0) |
---|
2629 | { |
---|
2630 | tipJ = &(rdta->y[j][1]); |
---|
2631 | if(sequenceSimilarity(tipI, tipJ, rdta->sites)) |
---|
2632 | { |
---|
2633 | if(processID == 0) |
---|
2634 | printBothOpen("\n\nIMPORTANT WARNING: Sequences %s and %s are exactly identical\n", tr->nameList[i], tr->nameList[j]); |
---|
2635 | |
---|
2636 | omissionList[j] = 1; |
---|
2637 | count++; |
---|
2638 | } |
---|
2639 | } |
---|
2640 | } |
---|
2641 | } |
---|
2642 | } |
---|
2643 | |
---|
2644 | if(count > 0 || countUndeterminedColumns > 0) |
---|
2645 | { |
---|
2646 | char noDupFile[2048]; |
---|
2647 | char noDupModels[2048]; |
---|
2648 | char noDupSecondary[2048]; |
---|
2649 | |
---|
2650 | if(count > 0 &&processID == 0) |
---|
2651 | { |
---|
2652 | printBothOpen("\nIMPORTANT WARNING\n"); |
---|
2653 | |
---|
2654 | printBothOpen("Found %d %s that %s exactly identical to other sequences in the alignment.\n", count, (count == 1)?"sequence":"sequences", (count == 1)?"is":"are"); |
---|
2655 | |
---|
2656 | printBothOpen("Normally they should be excluded from the analysis.\n\n"); |
---|
2657 | } |
---|
2658 | |
---|
2659 | if(countUndeterminedColumns > 0 && processID == 0) |
---|
2660 | { |
---|
2661 | printBothOpen("\nIMPORTANT WARNING\n"); |
---|
2662 | |
---|
2663 | printBothOpen("Found %d %s that %s only undetermined values which will be treated as missing data.\n", |
---|
2664 | countUndeterminedColumns, (countUndeterminedColumns == 1)?"column":"columns", (countUndeterminedColumns == 1)?"contains":"contain"); |
---|
2665 | |
---|
2666 | printBothOpen("Normally these columns should be excluded from the analysis.\n\n"); |
---|
2667 | } |
---|
2668 | |
---|
2669 | strcpy(noDupFile, seq_file); |
---|
2670 | strcat(noDupFile, ".reduced"); |
---|
2671 | |
---|
2672 | strcpy(noDupModels, modelFileName); |
---|
2673 | strcat(noDupModels, ".reduced"); |
---|
2674 | |
---|
2675 | strcpy(noDupSecondary, secondaryStructureFileName); |
---|
2676 | strcat(noDupSecondary, ".reduced"); |
---|
2677 | |
---|
2678 | if(processID == 0) |
---|
2679 | { |
---|
2680 | if(adef->useSecondaryStructure) |
---|
2681 | { |
---|
2682 | if(countUndeterminedColumns && !filexists(noDupSecondary)) |
---|
2683 | { |
---|
2684 | FILE *newFile = myfopen(noDupSecondary, "wb"); |
---|
2685 | int count2; |
---|
2686 | |
---|
2687 | printBothOpen("\nJust in case you might need it, a secondary structure file with \n"); |
---|
2688 | printBothOpen("structure assignments for undetermined columns removed is printed to file %s\n",noDupSecondary); |
---|
2689 | |
---|
2690 | for(i = 1, count2 = 0; i <= rdta->sites; i++) |
---|
2691 | { |
---|
2692 | if(undeterminedList[i] == 0) |
---|
2693 | fprintf(newFile, "%c", tr->secondaryStructureInput[i - 1]); |
---|
2694 | else |
---|
2695 | count2++; |
---|
2696 | } |
---|
2697 | |
---|
2698 | assert(count2 == countUndeterminedColumns); |
---|
2699 | |
---|
2700 | fprintf(newFile,"\n"); |
---|
2701 | |
---|
2702 | fclose(newFile); |
---|
2703 | } |
---|
2704 | else |
---|
2705 | { |
---|
2706 | if(countUndeterminedColumns) |
---|
2707 | { |
---|
2708 | printBothOpen("\nA secondary structure file with model assignments for undetermined\n"); |
---|
2709 | printBothOpen("columns removed has already been printed to file %s\n",noDupSecondary); |
---|
2710 | } |
---|
2711 | } |
---|
2712 | } |
---|
2713 | |
---|
2714 | |
---|
2715 | if(adef->useMultipleModel && !filexists(noDupModels) && countUndeterminedColumns) |
---|
2716 | { |
---|
2717 | FILE *newFile = myfopen(noDupModels, "wb"); |
---|
2718 | |
---|
2719 | printBothOpen("\nJust in case you might need it, a mixed model file with \n"); |
---|
2720 | printBothOpen("model assignments for undetermined columns removed is printed to file %s\n",noDupModels); |
---|
2721 | |
---|
2722 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
2723 | { |
---|
2724 | boolean modelStillExists = FALSE; |
---|
2725 | |
---|
2726 | for(j = 1; (j <= rdta->sites) && (!modelStillExists); j++) |
---|
2727 | { |
---|
2728 | if(modelList[j] == i) |
---|
2729 | modelStillExists = TRUE; |
---|
2730 | } |
---|
2731 | |
---|
2732 | if(modelStillExists) |
---|
2733 | { |
---|
2734 | int k = 1; |
---|
2735 | int lower, upper; |
---|
2736 | int parts = 0; |
---|
2737 | |
---|
2738 | |
---|
2739 | switch(tr->partitionData[i].dataType) |
---|
2740 | { |
---|
2741 | case AA_DATA: |
---|
2742 | { |
---|
2743 | char |
---|
2744 | AAmodel[1024]; |
---|
2745 | |
---|
2746 | if(tr->partitionData[i].protModels != PROT_FILE) |
---|
2747 | { |
---|
2748 | strcpy(AAmodel, protModels[tr->partitionData[i].protModels]); |
---|
2749 | if(tr->partitionData[i].usePredefinedProtFreqs == FALSE) |
---|
2750 | strcat(AAmodel, "F"); |
---|
2751 | |
---|
2752 | fprintf(newFile, "%s, ", AAmodel); |
---|
2753 | } |
---|
2754 | else |
---|
2755 | fprintf(newFile, "[%s], ", tr->partitionData[i].proteinSubstitutionFileName); |
---|
2756 | } |
---|
2757 | break; |
---|
2758 | case DNA_DATA: |
---|
2759 | fprintf(newFile, "DNA, "); |
---|
2760 | break; |
---|
2761 | case BINARY_DATA: |
---|
2762 | fprintf(newFile, "BIN, "); |
---|
2763 | break; |
---|
2764 | case GENERIC_32: |
---|
2765 | fprintf(newFile, "MULTI, "); |
---|
2766 | break; |
---|
2767 | case GENERIC_64: |
---|
2768 | fprintf(newFile, "CODON, "); |
---|
2769 | break; |
---|
2770 | default: |
---|
2771 | assert(0); |
---|
2772 | } |
---|
2773 | |
---|
2774 | fprintf(newFile, "%s = ", tr->partitionData[i].partitionName); |
---|
2775 | |
---|
2776 | while(k <= rdta->sites) |
---|
2777 | { |
---|
2778 | if(modelList[k] == i) |
---|
2779 | { |
---|
2780 | lower = k; |
---|
2781 | while((modelList[k + 1] == i) && (k <= rdta->sites)) |
---|
2782 | k++; |
---|
2783 | upper = k; |
---|
2784 | |
---|
2785 | if(lower == upper) |
---|
2786 | { |
---|
2787 | if(parts == 0) |
---|
2788 | fprintf(newFile, "%d", lower); |
---|
2789 | else |
---|
2790 | fprintf(newFile, ",%d", lower); |
---|
2791 | } |
---|
2792 | else |
---|
2793 | { |
---|
2794 | if(parts == 0) |
---|
2795 | fprintf(newFile, "%d-%d", lower, upper); |
---|
2796 | else |
---|
2797 | fprintf(newFile, ",%d-%d", lower, upper); |
---|
2798 | } |
---|
2799 | parts++; |
---|
2800 | } |
---|
2801 | k++; |
---|
2802 | } |
---|
2803 | fprintf(newFile, "\n"); |
---|
2804 | } |
---|
2805 | } |
---|
2806 | fclose(newFile); |
---|
2807 | } |
---|
2808 | else |
---|
2809 | { |
---|
2810 | if(adef->useMultipleModel) |
---|
2811 | { |
---|
2812 | printBothOpen("\nA mixed model file with model assignments for undetermined\n"); |
---|
2813 | printBothOpen("columns removed has already been printed to file %s\n",noDupModels); |
---|
2814 | } |
---|
2815 | } |
---|
2816 | |
---|
2817 | |
---|
2818 | if(!filexists(noDupFile)) |
---|
2819 | { |
---|
2820 | FILE *newFile; |
---|
2821 | |
---|
2822 | printBothOpen("Just in case you might need it, an alignment file with \n"); |
---|
2823 | if(count && !countUndeterminedColumns) |
---|
2824 | printBothOpen("sequence duplicates removed is printed to file %s\n", noDupFile); |
---|
2825 | if(!count && countUndeterminedColumns) |
---|
2826 | printBothOpen("undetermined columns removed is printed to file %s\n", noDupFile); |
---|
2827 | if(count && countUndeterminedColumns) |
---|
2828 | printBothOpen("sequence duplicates and undetermined columns removed is printed to file %s\n", noDupFile); |
---|
2829 | |
---|
2830 | newFile = myfopen(noDupFile, "wb"); |
---|
2831 | |
---|
2832 | fprintf(newFile, "%d %d\n", tr->mxtips - count, rdta->sites - countUndeterminedColumns); |
---|
2833 | |
---|
2834 | for(i = 1; i < n; i++) |
---|
2835 | { |
---|
2836 | if(!omissionList[i]) |
---|
2837 | { |
---|
2838 | fprintf(newFile, "%s ", tr->nameList[i]); |
---|
2839 | tipI = &(rdta->y[i][1]); |
---|
2840 | |
---|
2841 | for(j = 0; j < rdta->sites; j++) |
---|
2842 | { |
---|
2843 | if(undeterminedList[j + 1] == 0) |
---|
2844 | fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j])); |
---|
2845 | } |
---|
2846 | |
---|
2847 | fprintf(newFile, "\n"); |
---|
2848 | } |
---|
2849 | } |
---|
2850 | |
---|
2851 | fclose(newFile); |
---|
2852 | } |
---|
2853 | else |
---|
2854 | { |
---|
2855 | if(count && !countUndeterminedColumns) |
---|
2856 | printBothOpen("An alignment file with sequence duplicates removed has already\n"); |
---|
2857 | if(!count && countUndeterminedColumns) |
---|
2858 | printBothOpen("An alignment file with undetermined columns removed has already\n"); |
---|
2859 | if(count && countUndeterminedColumns) |
---|
2860 | printBothOpen("An alignment file with undetermined columns and sequence duplicates removed has already\n"); |
---|
2861 | |
---|
2862 | printBothOpen("been printed to file %s\n", noDupFile); |
---|
2863 | } |
---|
2864 | } |
---|
2865 | } |
---|
2866 | |
---|
2867 | rax_free(undeterminedList); |
---|
2868 | rax_free(omissionList); |
---|
2869 | rax_free(modelList); |
---|
2870 | } |
---|
2871 | |
---|
2872 | |
---|
2873 | |
---|
2874 | |
---|
2875 | |
---|
2876 | |
---|
2877 | |
---|
2878 | static void generateBS(tree *tr, analdef *adef) |
---|
2879 | { |
---|
2880 | int |
---|
2881 | i, |
---|
2882 | j, |
---|
2883 | k, |
---|
2884 | w; |
---|
2885 | |
---|
2886 | char outName[1024], buf[16]; |
---|
2887 | FILE *of; |
---|
2888 | |
---|
2889 | assert(adef->boot != 0); |
---|
2890 | |
---|
2891 | for(i = 0; i < adef->multipleRuns; i++) |
---|
2892 | { |
---|
2893 | int |
---|
2894 | count = 0; |
---|
2895 | |
---|
2896 | computeNextReplicate(tr, &adef->boot, (int*)NULL, (int*)NULL, FALSE, FALSE); |
---|
2897 | |
---|
2898 | count = 0; |
---|
2899 | for(j = 0; j < tr->cdta->endsite; j++) |
---|
2900 | count += tr->cdta->aliaswgt[j]; |
---|
2901 | |
---|
2902 | assert(count == tr->rdta->sites); |
---|
2903 | |
---|
2904 | strcpy(outName, workdir); |
---|
2905 | strcat(outName, seq_file); |
---|
2906 | strcat(outName, ".BS"); |
---|
2907 | sprintf(buf, "%d", i); |
---|
2908 | strcat(outName, buf); |
---|
2909 | printf("Printing replicate %d to %s\n", i, outName); |
---|
2910 | |
---|
2911 | of = myfopen(outName, "wb"); |
---|
2912 | |
---|
2913 | fprintf(of, "%d %d\n", tr->mxtips, count); |
---|
2914 | |
---|
2915 | for(j = 1; j <= tr->mxtips; j++) |
---|
2916 | { |
---|
2917 | unsigned char *tip = tr->yVector[tr->nodep[j]->number]; |
---|
2918 | fprintf(of, "%s ", tr->nameList[j]); |
---|
2919 | |
---|
2920 | for(k = 0; k < tr->cdta->endsite; k++) |
---|
2921 | { |
---|
2922 | for(w = 0; w < tr->cdta->aliaswgt[k]; w++) |
---|
2923 | fprintf(of, "%c", getInverseMeaning(tr->dataVector[k], tip[k])); |
---|
2924 | } |
---|
2925 | |
---|
2926 | fprintf(of, "\n"); |
---|
2927 | } |
---|
2928 | fclose(of); |
---|
2929 | } |
---|
2930 | } |
---|
2931 | |
---|
2932 | |
---|
2933 | |
---|
2934 | |
---|
2935 | |
---|
2936 | static void splitMultiGene(tree *tr, rawdata *rdta) |
---|
2937 | { |
---|
2938 | int i, l; |
---|
2939 | int n = rdta->sites + 1; |
---|
2940 | int *modelFilter = (int *)rax_malloc(sizeof(int) * n); |
---|
2941 | int length, k; |
---|
2942 | unsigned char *tip; |
---|
2943 | FILE *outf; |
---|
2944 | char outFileName[2048]; |
---|
2945 | |
---|
2946 | /* char buf[16]; */ |
---|
2947 | |
---|
2948 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
2949 | { |
---|
2950 | strcpy(outFileName, seq_file); |
---|
2951 | |
---|
2952 | /*sprintf(buf, "%d", i);*/ |
---|
2953 | /*strcat(outFileName, ".GENE.");*/ |
---|
2954 | |
---|
2955 | strcat(outFileName, "."); |
---|
2956 | strcat(outFileName, tr->partitionData[i].partitionName); |
---|
2957 | strcat(outFileName, ".phy"); |
---|
2958 | |
---|
2959 | /*strcat(outFileName, buf);*/ |
---|
2960 | |
---|
2961 | outf = myfopen(outFileName, "wb"); |
---|
2962 | |
---|
2963 | length = 0; |
---|
2964 | |
---|
2965 | for(k = 1; k < n; k++) |
---|
2966 | { |
---|
2967 | if(tr->model[k] == i) |
---|
2968 | { |
---|
2969 | modelFilter[k] = 1; |
---|
2970 | length++; |
---|
2971 | } |
---|
2972 | else |
---|
2973 | modelFilter[k] = -1; |
---|
2974 | } |
---|
2975 | |
---|
2976 | fprintf(outf, "%d %d\n", rdta->numsp, length); |
---|
2977 | |
---|
2978 | for(l = 1; l <= rdta->numsp; l++) |
---|
2979 | { |
---|
2980 | fprintf(outf, "%s ", tr->nameList[l]); |
---|
2981 | |
---|
2982 | tip = &(rdta->y[l][0]); |
---|
2983 | |
---|
2984 | for(k = 1; k < n; k++) |
---|
2985 | { |
---|
2986 | if(modelFilter[k] == 1) |
---|
2987 | fprintf(outf, "%c", getInverseMeaning(tr->dataVector[k], tip[k])); |
---|
2988 | } |
---|
2989 | fprintf(outf, "\n"); |
---|
2990 | |
---|
2991 | } |
---|
2992 | |
---|
2993 | fclose(outf); |
---|
2994 | |
---|
2995 | printf("Wrote individual gene/partition alignment to file %s\n", outFileName); |
---|
2996 | } |
---|
2997 | |
---|
2998 | rax_free(modelFilter); |
---|
2999 | printf("Wrote all %d individual gene/partition alignments\n", tr->NumberOfModels); |
---|
3000 | printf("Exiting normally\n"); |
---|
3001 | } |
---|
3002 | |
---|
3003 | |
---|
3004 | static int countTaxaInTopology(void) |
---|
3005 | { |
---|
3006 | FILE |
---|
3007 | *f = myfopen(tree_file, "rb"); |
---|
3008 | |
---|
3009 | int |
---|
3010 | c, |
---|
3011 | taxaCount = 0; |
---|
3012 | |
---|
3013 | while((c = fgetc(f)) != EOF) |
---|
3014 | { |
---|
3015 | if(c == '(' || c == ',') |
---|
3016 | { |
---|
3017 | c = fgetc(f); |
---|
3018 | if(c == '(' || c == ',') |
---|
3019 | ungetc(c, f); |
---|
3020 | else |
---|
3021 | { |
---|
3022 | do |
---|
3023 | { |
---|
3024 | c = fgetc(f); |
---|
3025 | } |
---|
3026 | while(c != ':' && c != ')' && c != ','); |
---|
3027 | |
---|
3028 | taxaCount++; |
---|
3029 | |
---|
3030 | ungetc(c, f); |
---|
3031 | } |
---|
3032 | } |
---|
3033 | } |
---|
3034 | |
---|
3035 | printBothOpen("Found a total of %d taxa in tree file %s\n", taxaCount, tree_file); |
---|
3036 | |
---|
3037 | fclose(f); |
---|
3038 | |
---|
3039 | return taxaCount; |
---|
3040 | } |
---|
3041 | |
---|
3042 | |
---|
3043 | |
---|
3044 | |
---|
3045 | |
---|
3046 | |
---|
3047 | |
---|
3048 | static void allocPartitions(tree *tr) |
---|
3049 | { |
---|
3050 | int |
---|
3051 | i, |
---|
3052 | maxCategories = tr->maxCategories; |
---|
3053 | |
---|
3054 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
3055 | { |
---|
3056 | const partitionLengths |
---|
3057 | *pl = getPartitionLengths(&(tr->partitionData[i])); |
---|
3058 | |
---|
3059 | if(tr->useFastScaling) |
---|
3060 | tr->partitionData[i].globalScaler = (unsigned int *)rax_calloc(2 * tr->mxtips, sizeof(unsigned int)); |
---|
3061 | |
---|
3062 | |
---|
3063 | tr->partitionData[i].left = (double *)rax_malloc(pl->leftLength * (maxCategories + 1) * sizeof(double)); |
---|
3064 | tr->partitionData[i].right = (double *)rax_malloc(pl->rightLength * (maxCategories + 1) * sizeof(double)); |
---|
3065 | tr->partitionData[i].EIGN = (double*)rax_malloc(pl->eignLength * sizeof(double)); |
---|
3066 | tr->partitionData[i].EV = (double*)rax_malloc(pl->evLength * sizeof(double)); |
---|
3067 | tr->partitionData[i].EI = (double*)rax_malloc(pl->eiLength * sizeof(double)); |
---|
3068 | tr->partitionData[i].substRates = (double *)rax_malloc(pl->substRatesLength * sizeof(double)); |
---|
3069 | tr->partitionData[i].frequencies = (double*)rax_malloc(pl->frequenciesLength * sizeof(double)); |
---|
3070 | tr->partitionData[i].tipVector = (double *)rax_malloc(pl->tipVectorLength * sizeof(double)); |
---|
3071 | |
---|
3072 | |
---|
3073 | if(tr->partitionData[i].protModels == LG4 || tr->partitionData[i].protModels == LG4X) |
---|
3074 | { |
---|
3075 | int |
---|
3076 | k; |
---|
3077 | |
---|
3078 | for(k = 0; k < 4; k++) |
---|
3079 | { |
---|
3080 | tr->partitionData[i].EIGN_LG4[k] = (double*)rax_malloc(pl->eignLength * sizeof(double)); |
---|
3081 | tr->partitionData[i].EV_LG4[k] = (double*)rax_malloc(pl->evLength * sizeof(double)); |
---|
3082 | tr->partitionData[i].EI_LG4[k] = (double*)rax_malloc(pl->eiLength * sizeof(double)); |
---|
3083 | tr->partitionData[i].substRates_LG4[k] = (double *)rax_malloc(pl->substRatesLength * sizeof(double)); |
---|
3084 | tr->partitionData[i].frequencies_LG4[k] = (double*)rax_malloc(pl->frequenciesLength * sizeof(double)); |
---|
3085 | tr->partitionData[i].tipVector_LG4[k] = (double *)rax_malloc(pl->tipVectorLength * sizeof(double)); |
---|
3086 | } |
---|
3087 | } |
---|
3088 | |
---|
3089 | |
---|
3090 | tr->partitionData[i].symmetryVector = (int *)rax_malloc(pl->symmetryVectorLength * sizeof(int)); |
---|
3091 | tr->partitionData[i].frequencyGrouping = (int *)rax_malloc(pl->frequencyGroupingLength * sizeof(int)); |
---|
3092 | tr->partitionData[i].perSiteRates = (double *)rax_malloc(sizeof(double) * tr->maxCategories); |
---|
3093 | tr->partitionData[i].unscaled_perSiteRates = (double *)rax_malloc(sizeof(double) * tr->maxCategories); |
---|
3094 | |
---|
3095 | |
---|
3096 | tr->partitionData[i].nonGTR = FALSE; |
---|
3097 | |
---|
3098 | |
---|
3099 | |
---|
3100 | tr->partitionData[i].gammaRates = (double*)rax_malloc(sizeof(double) * 4); |
---|
3101 | tr->partitionData[i].yVector = (unsigned char **)rax_malloc(sizeof(unsigned char*) * (tr->mxtips + 1)); |
---|
3102 | |
---|
3103 | |
---|
3104 | tr->partitionData[i].xVector = (double **)rax_malloc(sizeof(double*) * tr->innerNodes); |
---|
3105 | tr->partitionData[i].xSpaceVector = (size_t *)rax_calloc(tr->innerNodes, sizeof(size_t)); |
---|
3106 | |
---|
3107 | tr->partitionData[i].expVector = (int **)rax_malloc(sizeof(int*) * tr->innerNodes); |
---|
3108 | tr->partitionData[i].expSpaceVector = (size_t *)rax_calloc(tr->innerNodes, sizeof(size_t)); |
---|
3109 | |
---|
3110 | tr->partitionData[i].mxtips = tr->mxtips; |
---|
3111 | |
---|
3112 | |
---|
3113 | |
---|
3114 | |
---|
3115 | #ifndef _USE_PTHREADS |
---|
3116 | { |
---|
3117 | int j; |
---|
3118 | |
---|
3119 | for(j = 1; j <= tr->mxtips; j++) |
---|
3120 | tr->partitionData[i].yVector[j] = &(tr->yVector[j][tr->partitionData[i].lower]); |
---|
3121 | } |
---|
3122 | #endif |
---|
3123 | |
---|
3124 | } |
---|
3125 | } |
---|
3126 | |
---|
3127 | #ifndef _USE_PTHREADS |
---|
3128 | |
---|
3129 | |
---|
3130 | |
---|
3131 | |
---|
3132 | |
---|
3133 | static void allocNodex (tree *tr) |
---|
3134 | { |
---|
3135 | size_t |
---|
3136 | i, |
---|
3137 | model, |
---|
3138 | offset, |
---|
3139 | memoryRequirements = 0; |
---|
3140 | |
---|
3141 | allocPartitions(tr); |
---|
3142 | |
---|
3143 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
3144 | { |
---|
3145 | size_t |
---|
3146 | width = tr->partitionData[model].upper - tr->partitionData[model].lower; |
---|
3147 | |
---|
3148 | int |
---|
3149 | undetermined, |
---|
3150 | j; |
---|
3151 | |
---|
3152 | memoryRequirements += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width; |
---|
3153 | |
---|
3154 | tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1; |
---|
3155 | |
---|
3156 | tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int)); |
---|
3157 | |
---|
3158 | |
---|
3159 | tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int); |
---|
3160 | |
---|
3161 | /* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */ |
---|
3162 | |
---|
3163 | tr->partitionData[model].gapColumn = (double *)rax_malloc(((size_t)tr->innerNodes) * |
---|
3164 | ((size_t)4) * |
---|
3165 | ((size_t)(tr->partitionData[model].states)) * |
---|
3166 | sizeof(double)); |
---|
3167 | |
---|
3168 | undetermined = getUndetermined(tr->partitionData[model].dataType); |
---|
3169 | |
---|
3170 | for(j = 1; j <= tr->mxtips; j++) |
---|
3171 | for(i = 0; i < width; i++) |
---|
3172 | if(tr->partitionData[model].yVector[j][i] == undetermined) |
---|
3173 | tr->partitionData[model].gapVector[tr->partitionData[model].gapVectorLength * j + i / 32] |= mask32[i % 32]; |
---|
3174 | } |
---|
3175 | |
---|
3176 | tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double)); |
---|
3177 | assert(tr->perSiteLL != NULL); |
---|
3178 | |
---|
3179 | tr->sumBuffer = (double *)rax_malloc(memoryRequirements * sizeof(double)); |
---|
3180 | assert(tr->sumBuffer != NULL); |
---|
3181 | |
---|
3182 | offset = 0; |
---|
3183 | |
---|
3184 | /* C-OPT for initial testing tr->NumberOfModels will be 1 */ |
---|
3185 | |
---|
3186 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
3187 | { |
---|
3188 | size_t |
---|
3189 | lower = tr->partitionData[model].lower, |
---|
3190 | width = tr->partitionData[model].upper - lower; |
---|
3191 | |
---|
3192 | /* TODO all of this must be reset/adapted when fixModelIndices is called ! */ |
---|
3193 | |
---|
3194 | |
---|
3195 | tr->partitionData[model].sumBuffer = &tr->sumBuffer[offset]; |
---|
3196 | |
---|
3197 | |
---|
3198 | tr->partitionData[model].perSiteLL = &tr->perSiteLL[lower]; |
---|
3199 | |
---|
3200 | |
---|
3201 | tr->partitionData[model].wgt = &tr->cdta->aliaswgt[lower]; |
---|
3202 | tr->partitionData[model].invariant = &tr->invariant[lower]; |
---|
3203 | tr->partitionData[model].rateCategory = &tr->cdta->rateCategory[lower]; |
---|
3204 | |
---|
3205 | offset += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width; |
---|
3206 | } |
---|
3207 | |
---|
3208 | for(i = 0; i < tr->innerNodes; i++) |
---|
3209 | { |
---|
3210 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
3211 | { |
---|
3212 | tr->partitionData[model].expVector[i] = (int*)NULL; |
---|
3213 | tr->partitionData[model].xVector[i] = (double*)NULL; |
---|
3214 | } |
---|
3215 | } |
---|
3216 | } |
---|
3217 | |
---|
3218 | #endif |
---|
3219 | |
---|
3220 | |
---|
3221 | static void initAdef(analdef *adef) |
---|
3222 | { |
---|
3223 | adef->useSecondaryStructure = FALSE; |
---|
3224 | adef->bootstrapBranchLengths = FALSE; |
---|
3225 | adef->model = M_GTRCAT; |
---|
3226 | adef->max_rearrange = 21; |
---|
3227 | adef->stepwidth = 5; |
---|
3228 | adef->initial = adef->bestTrav = 10; |
---|
3229 | adef->initialSet = FALSE; |
---|
3230 | adef->restart = FALSE; |
---|
3231 | adef->mode = BIG_RAPID_MODE; |
---|
3232 | adef->categories = 25; |
---|
3233 | adef->boot = 0; |
---|
3234 | adef->rapidBoot = 0; |
---|
3235 | adef->useWeightFile = FALSE; |
---|
3236 | adef->checkpoints = 0; |
---|
3237 | adef->startingTreeOnly = 0; |
---|
3238 | adef->multipleRuns = 1; |
---|
3239 | adef->useMultipleModel = FALSE; |
---|
3240 | adef->likelihoodEpsilon = 0.1; |
---|
3241 | adef->constraint = FALSE; |
---|
3242 | adef->grouping = FALSE; |
---|
3243 | adef->randomStartingTree = FALSE; |
---|
3244 | adef->parsimonySeed = 0; |
---|
3245 | adef->proteinMatrix = JTT; |
---|
3246 | adef->protEmpiricalFreqs = 0; |
---|
3247 | adef->outgroup = FALSE; |
---|
3248 | adef->useInvariant = FALSE; |
---|
3249 | adef->permuteTreeoptimize = FALSE; |
---|
3250 | adef->useInvariant = FALSE; |
---|
3251 | adef->allInOne = FALSE; |
---|
3252 | adef->likelihoodTest = FALSE; |
---|
3253 | adef->perGeneBranchLengths = FALSE; |
---|
3254 | adef->generateBS = FALSE; |
---|
3255 | adef->bootStopping = FALSE; |
---|
3256 | adef->gapyness = 0.0; |
---|
3257 | adef->similarityFilterMode = 0; |
---|
3258 | adef->useExcludeFile = FALSE; |
---|
3259 | adef->userProteinModel = FALSE; |
---|
3260 | adef->computeELW = FALSE; |
---|
3261 | adef->computeDistance = FALSE; |
---|
3262 | adef->compressPatterns = TRUE; |
---|
3263 | adef->readTaxaOnly = FALSE; |
---|
3264 | adef->useBinaryModelFile = FALSE; |
---|
3265 | adef->leaveDropMode = FALSE; |
---|
3266 | adef->slidingWindowSize = 100; |
---|
3267 | adef->checkForUndeterminedSequences = TRUE; |
---|
3268 | adef->useQuartetGrouping = FALSE; |
---|
3269 | adef->alignmentFileType = PHYLIP; |
---|
3270 | adef->calculateIC = FALSE; |
---|
3271 | adef->verboseIC = FALSE; |
---|
3272 | adef->stepwiseAdditionOnly = FALSE; |
---|
3273 | } |
---|
3274 | |
---|
3275 | |
---|
3276 | |
---|
3277 | |
---|
3278 | static int modelExists(char *model, analdef *adef) |
---|
3279 | { |
---|
3280 | int i; |
---|
3281 | char thisModel[1024]; |
---|
3282 | |
---|
3283 | /********** BINARY ********************/ |
---|
3284 | |
---|
3285 | if(strcmp(model, "BINGAMMAI\0") == 0) |
---|
3286 | { |
---|
3287 | adef->model = M_BINGAMMA; |
---|
3288 | adef->useInvariant = TRUE; |
---|
3289 | return 1; |
---|
3290 | } |
---|
3291 | |
---|
3292 | if(strcmp(model, "BINGAMMA\0") == 0) |
---|
3293 | { |
---|
3294 | adef->model = M_BINGAMMA; |
---|
3295 | adef->useInvariant = FALSE; |
---|
3296 | return 1; |
---|
3297 | } |
---|
3298 | |
---|
3299 | if(strcmp(model, "BINCAT\0") == 0) |
---|
3300 | { |
---|
3301 | adef->model = M_BINCAT; |
---|
3302 | adef->useInvariant = FALSE; |
---|
3303 | return 1; |
---|
3304 | } |
---|
3305 | |
---|
3306 | if(strcmp(model, "BINCATI\0") == 0) |
---|
3307 | { |
---|
3308 | adef->model = M_BINCAT; |
---|
3309 | adef->useInvariant = TRUE; |
---|
3310 | return 1; |
---|
3311 | } |
---|
3312 | |
---|
3313 | /*********** 32 state ****************************/ |
---|
3314 | |
---|
3315 | if(strcmp(model, "MULTIGAMMAI\0") == 0) |
---|
3316 | { |
---|
3317 | adef->model = M_32GAMMA; |
---|
3318 | adef->useInvariant = TRUE; |
---|
3319 | return 1; |
---|
3320 | } |
---|
3321 | |
---|
3322 | if(strcmp(model, "MULTIGAMMA\0") == 0) |
---|
3323 | { |
---|
3324 | adef->model = M_32GAMMA; |
---|
3325 | adef->useInvariant = FALSE; |
---|
3326 | return 1; |
---|
3327 | } |
---|
3328 | |
---|
3329 | if(strcmp(model, "MULTICAT\0") == 0) |
---|
3330 | { |
---|
3331 | adef->model = M_32CAT; |
---|
3332 | adef->useInvariant = FALSE; |
---|
3333 | return 1; |
---|
3334 | } |
---|
3335 | |
---|
3336 | if(strcmp(model, "MULTICATI\0") == 0) |
---|
3337 | { |
---|
3338 | adef->model = M_32CAT; |
---|
3339 | adef->useInvariant = TRUE; |
---|
3340 | return 1; |
---|
3341 | } |
---|
3342 | |
---|
3343 | /*********** 64 state ****************************/ |
---|
3344 | |
---|
3345 | if(strcmp(model, "CODONGAMMAI\0") == 0) |
---|
3346 | { |
---|
3347 | adef->model = M_64GAMMA; |
---|
3348 | adef->useInvariant = TRUE; |
---|
3349 | return 1; |
---|
3350 | } |
---|
3351 | |
---|
3352 | if(strcmp(model, "CODONGAMMA\0") == 0) |
---|
3353 | { |
---|
3354 | adef->model = M_64GAMMA; |
---|
3355 | adef->useInvariant = FALSE; |
---|
3356 | return 1; |
---|
3357 | } |
---|
3358 | |
---|
3359 | if(strcmp(model, "CODONCAT\0") == 0) |
---|
3360 | { |
---|
3361 | adef->model = M_64CAT; |
---|
3362 | adef->useInvariant = FALSE; |
---|
3363 | return 1; |
---|
3364 | } |
---|
3365 | |
---|
3366 | if(strcmp(model, "CODONCATI\0") == 0) |
---|
3367 | { |
---|
3368 | adef->model = M_64CAT; |
---|
3369 | adef->useInvariant = TRUE; |
---|
3370 | return 1; |
---|
3371 | } |
---|
3372 | |
---|
3373 | |
---|
3374 | /*********** DNA **********************/ |
---|
3375 | |
---|
3376 | if(strcmp(model, "GTRGAMMAI\0") == 0) |
---|
3377 | { |
---|
3378 | adef->model = M_GTRGAMMA; |
---|
3379 | adef->useInvariant = TRUE; |
---|
3380 | return 1; |
---|
3381 | } |
---|
3382 | |
---|
3383 | if(strcmp(model, "GTRGAMMA\0") == 0) |
---|
3384 | { |
---|
3385 | adef->model = M_GTRGAMMA; |
---|
3386 | adef->useInvariant = FALSE; |
---|
3387 | return 1; |
---|
3388 | } |
---|
3389 | |
---|
3390 | |
---|
3391 | |
---|
3392 | if(strcmp(model, "GTRCAT\0") == 0) |
---|
3393 | { |
---|
3394 | adef->model = M_GTRCAT; |
---|
3395 | adef->useInvariant = FALSE; |
---|
3396 | return 1; |
---|
3397 | } |
---|
3398 | |
---|
3399 | |
---|
3400 | |
---|
3401 | if(strcmp(model, "GTRCATI\0") == 0) |
---|
3402 | { |
---|
3403 | adef->model = M_GTRCAT; |
---|
3404 | adef->useInvariant = TRUE; |
---|
3405 | return 1; |
---|
3406 | } |
---|
3407 | |
---|
3408 | |
---|
3409 | |
---|
3410 | |
---|
3411 | /*************** AA GTR ********************/ |
---|
3412 | |
---|
3413 | /* TODO empirical FREQS */ |
---|
3414 | |
---|
3415 | if(strcmp(model, "PROTCATGTR\0") == 0) |
---|
3416 | { |
---|
3417 | adef->model = M_PROTCAT; |
---|
3418 | adef->proteinMatrix = GTR; |
---|
3419 | adef->useInvariant = FALSE; |
---|
3420 | adef->protEmpiricalFreqs = 1; |
---|
3421 | return 1; |
---|
3422 | } |
---|
3423 | |
---|
3424 | if(strcmp(model, "PROTCATIGTR\0") == 0) |
---|
3425 | { |
---|
3426 | adef->model = M_PROTCAT; |
---|
3427 | adef->proteinMatrix = GTR; |
---|
3428 | adef->useInvariant = TRUE; |
---|
3429 | return 1; |
---|
3430 | } |
---|
3431 | |
---|
3432 | if(strcmp(model, "PROTGAMMAGTR\0") == 0) |
---|
3433 | { |
---|
3434 | adef->model = M_PROTGAMMA; |
---|
3435 | adef->proteinMatrix = GTR; |
---|
3436 | adef->useInvariant = FALSE; |
---|
3437 | adef->protEmpiricalFreqs = 1; |
---|
3438 | return 1; |
---|
3439 | } |
---|
3440 | |
---|
3441 | if(strcmp(model, "PROTGAMMAIGTR\0") == 0) |
---|
3442 | { |
---|
3443 | adef->model = M_PROTGAMMA; |
---|
3444 | adef->proteinMatrix = GTR; |
---|
3445 | adef->useInvariant = TRUE; |
---|
3446 | adef->protEmpiricalFreqs = 1; |
---|
3447 | return 1; |
---|
3448 | } |
---|
3449 | |
---|
3450 | /*************** AA GTR_UNLINKED ********************/ |
---|
3451 | |
---|
3452 | if(strcmp(model, "PROTCATGTR_UNLINKED\0") == 0) |
---|
3453 | { |
---|
3454 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3455 | |
---|
3456 | adef->model = M_PROTCAT; |
---|
3457 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3458 | adef->useInvariant = FALSE; |
---|
3459 | adef->protEmpiricalFreqs = 1; |
---|
3460 | return 1; |
---|
3461 | } |
---|
3462 | |
---|
3463 | if(strcmp(model, "PROTCATIGTR_UNLINKED\0") == 0) |
---|
3464 | { |
---|
3465 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3466 | |
---|
3467 | adef->model = M_PROTCAT; |
---|
3468 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3469 | adef->useInvariant = TRUE; |
---|
3470 | adef->protEmpiricalFreqs = 1; |
---|
3471 | return 1; |
---|
3472 | } |
---|
3473 | |
---|
3474 | if(strcmp(model, "PROTGAMMAGTR_UNLINKED\0") == 0) |
---|
3475 | { |
---|
3476 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3477 | |
---|
3478 | adef->model = M_PROTGAMMA; |
---|
3479 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3480 | adef->useInvariant = FALSE; |
---|
3481 | adef->protEmpiricalFreqs = 1; |
---|
3482 | return 1; |
---|
3483 | } |
---|
3484 | |
---|
3485 | if(strcmp(model, "PROTGAMMAIGTR_UNLINKED\0") == 0) |
---|
3486 | { |
---|
3487 | printf("Advisory: GTR_UNLINKED only has an effect if specified in the partition file\n"); |
---|
3488 | |
---|
3489 | adef->model = M_PROTGAMMA; |
---|
3490 | adef->proteinMatrix = GTR_UNLINKED; |
---|
3491 | adef->useInvariant = TRUE; |
---|
3492 | return 1; |
---|
3493 | } |
---|
3494 | |
---|
3495 | /****************** AA ************************/ |
---|
3496 | |
---|
3497 | for(i = 0; i < NUM_PROT_MODELS - 2; i++) |
---|
3498 | { |
---|
3499 | /* check CAT */ |
---|
3500 | |
---|
3501 | strcpy(thisModel, "PROTCAT"); |
---|
3502 | strcat(thisModel, protModels[i]); |
---|
3503 | |
---|
3504 | if(strcmp(model, thisModel) == 0) |
---|
3505 | { |
---|
3506 | adef->model = M_PROTCAT; |
---|
3507 | adef->proteinMatrix = i; |
---|
3508 | return 1; |
---|
3509 | } |
---|
3510 | |
---|
3511 | /* check CATF */ |
---|
3512 | |
---|
3513 | strcpy(thisModel, "PROTCAT"); |
---|
3514 | strcat(thisModel, protModels[i]); |
---|
3515 | strcat(thisModel, "F"); |
---|
3516 | |
---|
3517 | if(strcmp(model, thisModel) == 0) |
---|
3518 | { |
---|
3519 | adef->model = M_PROTCAT; |
---|
3520 | adef->proteinMatrix = i; |
---|
3521 | adef->protEmpiricalFreqs = 1; |
---|
3522 | return 1; |
---|
3523 | } |
---|
3524 | |
---|
3525 | |
---|
3526 | /* check CATI */ |
---|
3527 | |
---|
3528 | strcpy(thisModel, "PROTCATI"); |
---|
3529 | strcat(thisModel, protModels[i]); |
---|
3530 | |
---|
3531 | if(strcmp(model, thisModel) == 0) |
---|
3532 | { |
---|
3533 | adef->model = M_PROTCAT; |
---|
3534 | adef->proteinMatrix = i; |
---|
3535 | adef->useInvariant = TRUE; |
---|
3536 | return 1; |
---|
3537 | } |
---|
3538 | |
---|
3539 | /* check CATIF */ |
---|
3540 | |
---|
3541 | strcpy(thisModel, "PROTCATI"); |
---|
3542 | strcat(thisModel, protModels[i]); |
---|
3543 | strcat(thisModel, "F"); |
---|
3544 | |
---|
3545 | if(strcmp(model, thisModel) == 0) |
---|
3546 | { |
---|
3547 | adef->model = M_PROTCAT; |
---|
3548 | adef->proteinMatrix = i; |
---|
3549 | adef->protEmpiricalFreqs = 1; |
---|
3550 | adef->useInvariant = TRUE; |
---|
3551 | return 1; |
---|
3552 | } |
---|
3553 | |
---|
3554 | |
---|
3555 | /****************check GAMMA ************************/ |
---|
3556 | |
---|
3557 | strcpy(thisModel, "PROTGAMMA"); |
---|
3558 | strcat(thisModel, protModels[i]); |
---|
3559 | |
---|
3560 | if(strcmp(model, thisModel) == 0) |
---|
3561 | { |
---|
3562 | adef->model = M_PROTGAMMA; |
---|
3563 | adef->proteinMatrix = i; |
---|
3564 | adef->useInvariant = FALSE; |
---|
3565 | return 1; |
---|
3566 | } |
---|
3567 | |
---|
3568 | |
---|
3569 | |
---|
3570 | |
---|
3571 | /*check GAMMAI*/ |
---|
3572 | |
---|
3573 | strcpy(thisModel, "PROTGAMMAI"); |
---|
3574 | strcat(thisModel, protModels[i]); |
---|
3575 | |
---|
3576 | if(strcmp(model, thisModel) == 0) |
---|
3577 | { |
---|
3578 | adef->model = M_PROTGAMMA; |
---|
3579 | adef->proteinMatrix = i; |
---|
3580 | adef->useInvariant = TRUE; |
---|
3581 | return 1; |
---|
3582 | } |
---|
3583 | |
---|
3584 | |
---|
3585 | /* check GAMMAmodelF */ |
---|
3586 | |
---|
3587 | strcpy(thisModel, "PROTGAMMA"); |
---|
3588 | strcat(thisModel, protModels[i]); |
---|
3589 | strcat(thisModel, "F"); |
---|
3590 | |
---|
3591 | if(strcmp(model, thisModel) == 0) |
---|
3592 | { |
---|
3593 | adef->model = M_PROTGAMMA; |
---|
3594 | adef->proteinMatrix = i; |
---|
3595 | adef->protEmpiricalFreqs = 1; |
---|
3596 | adef->useInvariant = FALSE; |
---|
3597 | return 1; |
---|
3598 | } |
---|
3599 | |
---|
3600 | |
---|
3601 | /* check GAMMAImodelF */ |
---|
3602 | |
---|
3603 | strcpy(thisModel, "PROTGAMMAI"); |
---|
3604 | strcat(thisModel, protModels[i]); |
---|
3605 | strcat(thisModel, "F"); |
---|
3606 | |
---|
3607 | if(strcmp(model, thisModel) == 0) |
---|
3608 | { |
---|
3609 | adef->model = M_PROTGAMMA; |
---|
3610 | adef->proteinMatrix = i; |
---|
3611 | adef->protEmpiricalFreqs = 1; |
---|
3612 | adef->useInvariant = TRUE; |
---|
3613 | return 1; |
---|
3614 | } |
---|
3615 | |
---|
3616 | } |
---|
3617 | |
---|
3618 | /*********************************************************************************/ |
---|
3619 | |
---|
3620 | |
---|
3621 | |
---|
3622 | return 0; |
---|
3623 | } |
---|
3624 | |
---|
3625 | |
---|
3626 | |
---|
3627 | static int mygetopt(int argc, char **argv, const char *opts, int *optind, char **optarg) |
---|
3628 | { |
---|
3629 | static |
---|
3630 | int sp = 1; |
---|
3631 | |
---|
3632 | register |
---|
3633 | int c; |
---|
3634 | |
---|
3635 | register |
---|
3636 | char *cp; |
---|
3637 | |
---|
3638 | if(sp == 1) |
---|
3639 | { |
---|
3640 | if(*optind >= argc || argv[*optind][0] != '-' || argv[*optind][1] == '\0') |
---|
3641 | return -1; |
---|
3642 | } |
---|
3643 | else |
---|
3644 | { |
---|
3645 | if(strcmp(argv[*optind], "--") == 0) |
---|
3646 | { |
---|
3647 | *optind = *optind + 1; |
---|
3648 | return -1; |
---|
3649 | } |
---|
3650 | } |
---|
3651 | |
---|
3652 | c = argv[*optind][sp]; |
---|
3653 | if(c == ':' || (cp=strchr(opts, c)) == 0) |
---|
3654 | { |
---|
3655 | printf(": illegal option -- %c \n", c); |
---|
3656 | if(argv[*optind][++sp] == '\0') |
---|
3657 | { |
---|
3658 | *optind = *optind + 1; |
---|
3659 | sp = 1; |
---|
3660 | } |
---|
3661 | return('?'); |
---|
3662 | } |
---|
3663 | if(*++cp == ':') |
---|
3664 | { |
---|
3665 | if(argv[*optind][sp+1] != '\0') |
---|
3666 | { |
---|
3667 | *optarg = &argv[*optind][sp+1]; |
---|
3668 | *optind = *optind + 1; |
---|
3669 | } |
---|
3670 | else |
---|
3671 | { |
---|
3672 | *optind = *optind + 1; |
---|
3673 | if(*optind >= argc) |
---|
3674 | { |
---|
3675 | printf(": option requires an argument -- %c\n", c); |
---|
3676 | sp = 1; |
---|
3677 | return('?'); |
---|
3678 | } |
---|
3679 | else |
---|
3680 | { |
---|
3681 | *optarg = argv[*optind]; |
---|
3682 | *optind = *optind + 1; |
---|
3683 | } |
---|
3684 | } |
---|
3685 | sp = 1; |
---|
3686 | } |
---|
3687 | else |
---|
3688 | { |
---|
3689 | if(argv[*optind][++sp] == '\0') |
---|
3690 | { |
---|
3691 | sp = 1; |
---|
3692 | *optind = *optind + 1; |
---|
3693 | } |
---|
3694 | *optarg = 0; |
---|
3695 | } |
---|
3696 | |
---|
3697 | return(c); |
---|
3698 | } |
---|
3699 | |
---|
3700 | static void checkOutgroups(tree *tr, analdef *adef) |
---|
3701 | { |
---|
3702 | if(adef->outgroup) |
---|
3703 | { |
---|
3704 | boolean found; |
---|
3705 | int i, j; |
---|
3706 | |
---|
3707 | for(j = 0; j < tr->numberOfOutgroups; j++) |
---|
3708 | { |
---|
3709 | found = FALSE; |
---|
3710 | for(i = 1; (i <= tr->mxtips) && !found; i++) |
---|
3711 | { |
---|
3712 | if(strcmp(tr->nameList[i], tr->outgroups[j]) == 0) |
---|
3713 | { |
---|
3714 | tr->outgroupNums[j] = i; |
---|
3715 | found = TRUE; |
---|
3716 | } |
---|
3717 | } |
---|
3718 | if(!found) |
---|
3719 | { |
---|
3720 | printf("Error, the outgroup name \"%s\" you specified can not be found in the alignment, exiting ....\n", tr->outgroups[j]); |
---|
3721 | errorExit(-1); |
---|
3722 | } |
---|
3723 | } |
---|
3724 | } |
---|
3725 | |
---|
3726 | } |
---|
3727 | |
---|
3728 | static void parseOutgroups(char *outgr, tree *tr) |
---|
3729 | { |
---|
3730 | int count = 1, i, k; |
---|
3731 | char name[nmlngth]; |
---|
3732 | |
---|
3733 | i = 0; |
---|
3734 | while(outgr[i] != '\0') |
---|
3735 | { |
---|
3736 | if(outgr[i] == ',') |
---|
3737 | count++; |
---|
3738 | i++; |
---|
3739 | } |
---|
3740 | |
---|
3741 | tr->numberOfOutgroups = count; |
---|
3742 | |
---|
3743 | tr->outgroups = (char **)rax_malloc(sizeof(char *) * count); |
---|
3744 | |
---|
3745 | for(i = 0; i < tr->numberOfOutgroups; i++) |
---|
3746 | tr->outgroups[i] = (char *)rax_malloc(sizeof(char) * nmlngth); |
---|
3747 | |
---|
3748 | tr->outgroupNums = (int *)rax_malloc(sizeof(int) * count); |
---|
3749 | |
---|
3750 | i = 0; |
---|
3751 | k = 0; |
---|
3752 | count = 0; |
---|
3753 | while(outgr[i] != '\0') |
---|
3754 | { |
---|
3755 | if(outgr[i] == ',') |
---|
3756 | { |
---|
3757 | name[k] = '\0'; |
---|
3758 | strcpy(tr->outgroups[count], name); |
---|
3759 | count++; |
---|
3760 | k = 0; |
---|
3761 | } |
---|
3762 | else |
---|
3763 | { |
---|
3764 | name[k] = outgr[i]; |
---|
3765 | k++; |
---|
3766 | } |
---|
3767 | i++; |
---|
3768 | } |
---|
3769 | |
---|
3770 | name[k] = '\0'; |
---|
3771 | strcpy(tr->outgroups[count], name); |
---|
3772 | |
---|
3773 | /*for(i = 0; i < tr->numberOfOutgroups; i++) |
---|
3774 | printf("%d %s \n", i, tr->outgroups[i]);*/ |
---|
3775 | |
---|
3776 | |
---|
3777 | /*printf("%s \n", name);*/ |
---|
3778 | } |
---|
3779 | |
---|
3780 | |
---|
3781 | /*********************************** OUTGROUP STUFF END *********************************************************/ |
---|
3782 | |
---|
3783 | |
---|
3784 | static void printVersionInfo(boolean terminal, FILE *infoFile) |
---|
3785 | { |
---|
3786 | char |
---|
3787 | text[7][1024]; |
---|
3788 | |
---|
3789 | int |
---|
3790 | i; |
---|
3791 | |
---|
3792 | sprintf(text[0], "\n\nThis is %s version %s released by Alexandros Stamatakis on %s.\n\n", programName, programVersion, programDate); |
---|
3793 | sprintf(text[1], "With greatly appreciated code contributions by:\n"); |
---|
3794 | sprintf(text[2], "Andre Aberer (HITS)\n"); |
---|
3795 | sprintf(text[3], "Simon Berger (HITS)\n"); |
---|
3796 | sprintf(text[4], "Nick Pattengale (Sandia)\n"); |
---|
3797 | sprintf(text[5], "Wayne Pfeiffer (SDSC)\n"); |
---|
3798 | sprintf(text[6], "Akifumi S. Tanabe (NRIFS)\n\n"); |
---|
3799 | |
---|
3800 | for(i = 0; i < 7; i++) |
---|
3801 | { |
---|
3802 | if(terminal) |
---|
3803 | printf("%s", text[i]); |
---|
3804 | else |
---|
3805 | printBoth(infoFile, text[i]); |
---|
3806 | } |
---|
3807 | |
---|
3808 | } |
---|
3809 | |
---|
3810 | static void printMinusFUsage(void) |
---|
3811 | { |
---|
3812 | printf("\n"); |
---|
3813 | printf(" \"-f a\": rapid Bootstrap analysis and search for best-scoring ML tree in one program run\n"); |
---|
3814 | |
---|
3815 | printf(" \"-f A\": compute marginal ancestral states on a ROOTED reference tree provided with \"t\"\n"); |
---|
3816 | |
---|
3817 | printf(" \"-f b\": draw bipartition information on a tree provided with \"-t\" based on multiple trees\n"); |
---|
3818 | printf(" (e.g., from a bootstrap) in a file specifed by \"-z\"\n"); |
---|
3819 | |
---|
3820 | printf(" \"-f B\": optimize br-len scaler and other model parameters (GTR, alpha, etc.) on a tree provided with \"-t\".\n"); |
---|
3821 | printf(" The tree needs to contain branch lengths. The branch lengths will not be optimized, just scaled by a single common value.\n"); |
---|
3822 | |
---|
3823 | |
---|
3824 | printf(" \"-f c\": check if the alignment can be properly read by RAxML\n"); |
---|
3825 | |
---|
3826 | printf(" \"-f C\": ancestral sequence test for Jiajie, users will also need to provide a list of taxon names via -Y separated by whitespaces\n"); |
---|
3827 | |
---|
3828 | printf(" \"-f d\": new rapid hill-climbing \n"); |
---|
3829 | printf(" DEFAULT: ON\n"); |
---|
3830 | |
---|
3831 | printf(" \"-f e\": optimize model+branch lengths for given input tree under GAMMA/GAMMAI only\n"); |
---|
3832 | |
---|
3833 | |
---|
3834 | |
---|
3835 | printf(" \"-f E\": execute very fast experimental tree search, at present only for testing\n"); |
---|
3836 | |
---|
3837 | printf(" \"-f F\": execute fast experimental tree search, at present only for testing\n"); |
---|
3838 | |
---|
3839 | printf(" \"-f g\": compute per site log Likelihoods for one ore more trees passed via\n"); |
---|
3840 | printf(" \"-z\" and write them to a file that can be read by CONSEL\n"); |
---|
3841 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3842 | |
---|
3843 | printf(" \"-f G\": compute per site log Likelihoods for one ore more trees passed via\n"); |
---|
3844 | printf(" \"-z\" and write them to a file that can be read by CONSEL.\n"); |
---|
3845 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3846 | |
---|
3847 | printf(" \"-f h\": compute log likelihood test (SH-test) between best tree passed via \"-t\"\n"); |
---|
3848 | printf(" and a bunch of other trees passed via \"-z\" \n"); |
---|
3849 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3850 | |
---|
3851 | printf(" \"-f H\": compute log likelihood test (SH-test) between best tree passed via \"-t\"\n"); |
---|
3852 | printf(" and a bunch of other trees passed via \"-z\" \n"); |
---|
3853 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3854 | |
---|
3855 | printf(" \"-f i\": calculate IC and TC scores (Salichos and Rokas 2013) on a tree provided with \"-t\" based on multiple trees\n"); |
---|
3856 | printf(" (e.g., from a bootstrap) in a file specifed by \"-z\"\n"); |
---|
3857 | |
---|
3858 | printf(" \"-f I\": a simple tree rooting algorithm for unrooted trees.\n"); |
---|
3859 | printf(" It roots the tree by rooting it at the branch that best balances the subtree lengths\n"); |
---|
3860 | printf(" (sum over branches in the subtrees) of the left and right subtree.\n"); |
---|
3861 | printf(" A branch with an optimal balance does not always exist!\n"); |
---|
3862 | printf(" You need to specify the tree you want to root via \"-t\".\n"); |
---|
3863 | |
---|
3864 | printf(" \"-f j\": generate a bunch of bootstrapped alignment files from an original alignemnt file.\n"); |
---|
3865 | printf(" You need to specify a seed with \"-b\" and the number of replicates with \"-#\" \n"); |
---|
3866 | |
---|
3867 | printf(" \"-f J\": Compute SH-like support values on a given tree passed via \"-t\".\n"); |
---|
3868 | |
---|
3869 | printf(" \"-f m\": compare bipartitions between two bunches of trees passed via \"-t\" and \"-z\" \n"); |
---|
3870 | printf(" respectively. This will return the Pearson correlation between all bipartitions found\n"); |
---|
3871 | printf(" in the two tree files. A file called RAxML_bipartitionFrequencies.outpuFileName\n"); |
---|
3872 | printf(" will be printed that contains the pair-wise bipartition frequencies of the two sets\n"); |
---|
3873 | |
---|
3874 | printf(" \"-f n\": compute the log likelihood score of all trees contained in a tree file provided by\n"); |
---|
3875 | printf(" \"-z\" under GAMMA or GAMMA+P-Invar\n"); |
---|
3876 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3877 | |
---|
3878 | printf(" \"-f N\": compute the log likelihood score of all trees contained in a tree file provided by\n"); |
---|
3879 | printf(" \"-z\" under GAMMA or GAMMA+P-Invar\n"); |
---|
3880 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3881 | |
---|
3882 | |
---|
3883 | printf(" \"-f o\": old and slower rapid hill-climbing without heuristic cutoff\n"); |
---|
3884 | |
---|
3885 | printf(" \"-f p\": perform pure stepwise MP addition of new sequences to an incomplete starting tree and exit\n"); |
---|
3886 | |
---|
3887 | printf(" \"-f q\": fast quartet calculator\n"); |
---|
3888 | |
---|
3889 | printf(" \"-f r\": compute pairwise Robinson-Foulds (RF) distances between all pairs of trees in a tree file passed via \"-z\" \n"); |
---|
3890 | printf(" if the trees have node labales represented as integer support values the program will also compute two flavors of\n"); |
---|
3891 | printf(" the weighted Robinson-Foulds (WRF) distance\n"); |
---|
3892 | |
---|
3893 | printf(" \"-f R\": compute all pairwise Robinson-Foulds (RF) distances between a large reference tree passed via \"-t\" \n"); |
---|
3894 | printf(" and many smaller trees (that must have a subset of the taxa of the large tree) passed via \"-z\".\n"); |
---|
3895 | printf(" This option is intended for checking the plausibility of very large phylogenies that can not be inspected\n"); |
---|
3896 | printf(" visually any more.\n"); |
---|
3897 | |
---|
3898 | printf(" \"-f s\": split up a multi-gene partitioned alignment into the respective subalignments \n"); |
---|
3899 | |
---|
3900 | printf(" \"-f S\": compute site-specific placement bias using a leave one out test inspired by the evolutionary placement algorithm\n"); |
---|
3901 | |
---|
3902 | printf(" \"-f t\": do randomized tree searches on one fixed starting tree\n"); |
---|
3903 | |
---|
3904 | printf(" \"-f T\": do final thorough optimization of ML tree from rapid bootstrap search in stand-alone mode\n"); |
---|
3905 | |
---|
3906 | printf(" \"-f u\": execute morphological weight calibration using maximum likelihood, this will return a weight vector.\n"); |
---|
3907 | printf(" you need to provide a morphological alignment and a reference tree via \"-t\" \n"); |
---|
3908 | |
---|
3909 | printf(" \"-f v\": classify a bunch of environmental sequences into a reference tree using thorough read insertions\n"); |
---|
3910 | printf(" you will need to start RAxML with a non-comprehensive reference tree and an alignment containing all sequences (reference + query)\n"); |
---|
3911 | |
---|
3912 | printf(" \"-f V\": classify a bunch of environmental sequences into a reference tree using thorough read insertions\n"); |
---|
3913 | printf(" you will need to start RAxML with a non-comprehensive reference tree and an alignment containing all sequences (reference + query)\n"); |
---|
3914 | printf(" WARNING: this is a test implementation for more efficient handling of multi-gene/whole-genome datasets!\n"); |
---|
3915 | |
---|
3916 | printf(" \"-f w\": compute ELW test on a bunch of trees passed via \"-z\" \n"); |
---|
3917 | printf(" The model parameters will be estimated on the first tree only!\n"); |
---|
3918 | |
---|
3919 | printf(" \"-f W\": compute ELW test on a bunch of trees passed via \"-z\" \n"); |
---|
3920 | printf(" The model parameters will be re-estimated for each tree\n"); |
---|
3921 | |
---|
3922 | printf(" \"-f x\": compute pair-wise ML distances, ML model parameters will be estimated on an MP \n"); |
---|
3923 | printf(" starting tree or a user-defined tree passed via \"-t\", only allowed for GAMMA-based\n"); |
---|
3924 | printf(" models of rate heterogeneity\n"); |
---|
3925 | |
---|
3926 | printf(" \"-f y\": classify a bunch of environmental sequences into a reference tree using parsimony\n"); |
---|
3927 | printf(" you will need to start RAxML with a non-comprehensive reference tree and an alignment containing all sequences (reference + query)\n"); |
---|
3928 | |
---|
3929 | printf("\n"); |
---|
3930 | printf(" DEFAULT for \"-f\": new rapid hill climbing\n"); |
---|
3931 | |
---|
3932 | printf("\n"); |
---|
3933 | } |
---|
3934 | |
---|
3935 | |
---|
3936 | static void printREADME(void) |
---|
3937 | { |
---|
3938 | printVersionInfo(TRUE, (FILE*)NULL); |
---|
3939 | printf("\n"); |
---|
3940 | printf("Please also consult the RAxML-manual\n"); |
---|
3941 | printf("\nTo report bugs send an email to stamatak@cs.tum.edu\n"); |
---|
3942 | printf("Please send me all input files, the exact invocation, details of the HW and operating system,\n"); |
---|
3943 | printf("as well as all error messages printed to screen.\n\n\n"); |
---|
3944 | |
---|
3945 | printf("raxmlHPC[-SSE3|-PTHREADS|-PTHREADS-SSE3|-HYBRID|-HYBRID-SSE3]\n"); |
---|
3946 | printf(" -s sequenceFileName -n outputFileName -m substitutionModel\n"); |
---|
3947 | printf(" [-a weightFileName] [-A secondaryStructureSubstModel]\n"); |
---|
3948 | printf(" [-b bootstrapRandomNumberSeed] [-B wcCriterionThreshold]\n"); |
---|
3949 | printf(" [-c numberOfCategories] [-C] [-d] [-D]\n"); |
---|
3950 | printf(" [-e likelihoodEpsilon] [-E excludeFileName]\n"); |
---|
3951 | printf(" [-f a|A|b|B|c|C|d|e|E|F|g|G|h|H|i|I|j|J|m|n|N|o|p|q|r|R|s|S|t|T|u|v|V|w|W|x|y] [-F]\n"); |
---|
3952 | printf(" [-g groupingFileName] [-G placementThreshold] [-h]\n"); |
---|
3953 | printf(" [-i initialRearrangementSetting] [-I autoFC|autoMR|autoMRE|autoMRE_IGN]\n"); |
---|
3954 | printf(" [-j] [-J MR|MR_DROP|MRE|STRICT|STRICT_DROP|T_<PERCENT>] [-k] [-K] \n"); |
---|
3955 | printf(" [-L MR|MRE|T_<PERCENT>] [-M]\n"); |
---|
3956 | printf(" [-o outGroupName1[,outGroupName2[,...]]][-O]\n"); |
---|
3957 | printf(" [-p parsimonyRandomSeed] [-P proteinModel]\n"); |
---|
3958 | printf(" [-q multipleModelFileName] [-r binaryConstraintTree]\n"); |
---|
3959 | printf(" [-R binaryModelParamFile] [-S secondaryStructureFile] [-t userStartingTree]\n"); |
---|
3960 | printf(" [-T numberOfThreads] [-u] [-U] [-v] [-V] [-w outputDirectory] [-W slidingWindowSize]\n"); |
---|
3961 | printf(" [-x rapidBootstrapRandomNumberSeed] [-X] [-y] [-Y quartetGroupingFileName|ancestralSequenceCandidatesFileName]\n"); |
---|
3962 | printf(" [-z multipleTreesFile] [-#|-N numberOfRuns|autoFC|autoMR|autoMRE|autoMRE_IGN]\n"); |
---|
3963 | printf("\n"); |
---|
3964 | printf(" -a Specify a column weight file name to assign individual weights to each column of \n"); |
---|
3965 | printf(" the alignment. Those weights must be integers separated by any type and number \n"); |
---|
3966 | printf(" of whitespaces whithin a separate file, see file \"example_weights\" for an example.\n"); |
---|
3967 | printf("\n"); |
---|
3968 | printf(" -A Specify one of the secondary structure substitution models implemented in RAxML.\n"); |
---|
3969 | printf(" The same nomenclature as in the PHASE manual is used, available models: \n"); |
---|
3970 | printf(" S6A, S6B, S6C, S6D, S6E, S7A, S7B, S7C, S7D, S7E, S7F, S16, S16A, S16B\n"); |
---|
3971 | printf("\n"); |
---|
3972 | printf(" DEFAULT: 16-state GTR model (S16)\n"); |
---|
3973 | printf("\n"); |
---|
3974 | printf(" -b Specify an integer number (random seed) and turn on bootstrapping\n"); |
---|
3975 | printf("\n"); |
---|
3976 | printf(" DEFAULT: OFF\n"); |
---|
3977 | printf("\n"); |
---|
3978 | printf(" -B specify a floating point number between 0.0 and 1.0 that will be used as cutoff threshold \n"); |
---|
3979 | printf(" for the MR-based bootstopping criteria. The recommended setting is 0.03.\n"); |
---|
3980 | printf("\n"); |
---|
3981 | printf(" DEFAULT: 0.03 (recommended empirically determined setting)\n"); |
---|
3982 | printf("\n"); |
---|
3983 | printf(" -c Specify number of distinct rate catgories for RAxML when modelOfEvolution\n"); |
---|
3984 | printf(" is set to GTRCAT or GTRMIX\n"); |
---|
3985 | printf(" Individual per-site rates are categorized into numberOfCategories rate \n"); |
---|
3986 | printf(" categories to accelerate computations. \n"); |
---|
3987 | printf("\n"); |
---|
3988 | printf(" DEFAULT: 25\n"); |
---|
3989 | printf("\n"); |
---|
3990 | printf(" -C Enable verbose output for the \"-L\" and \"-f i\" options. This will produce more, as well as more verbose output files\n"); |
---|
3991 | printf("\n"); |
---|
3992 | printf(" DEFAULT: OFF\n"); |
---|
3993 | printf("\n"); |
---|
3994 | printf(" -d start ML optimization from random starting tree \n"); |
---|
3995 | printf("\n"); |
---|
3996 | printf(" DEFAULT: OFF\n"); |
---|
3997 | printf("\n"); |
---|
3998 | printf(" -D ML search convergence criterion. This will break off ML searches if the relative \n"); |
---|
3999 | printf(" Robinson-Foulds distance between the trees obtained from two consecutive lazy SPR cycles\n"); |
---|
4000 | printf(" is smaller or equal to 1%s. Usage recommended for very large datasets in terms of taxa.\n", "%"); |
---|
4001 | printf(" On trees with more than 500 taxa this will yield execution time improvements of approximately 50%s\n", "%"); |
---|
4002 | printf(" While yielding only slightly worse trees.\n"); |
---|
4003 | printf("\n"); |
---|
4004 | printf(" DEFAULT: OFF\n"); |
---|
4005 | printf("\n"); |
---|
4006 | printf(" -e set model optimization precision in log likelihood units for final\n"); |
---|
4007 | printf(" optimization of tree topology under MIX/MIXI or GAMMA/GAMMAI\n"); |
---|
4008 | printf("\n"); |
---|
4009 | printf(" DEFAULT: 0.1 for models not using proportion of invariant sites estimate\n"); |
---|
4010 | printf(" 0.001 for models using proportion of invariant sites estimate\n"); |
---|
4011 | printf("\n"); |
---|
4012 | printf(" -E specify an exclude file name, that contains a specification of alignment positions you wish to exclude.\n"); |
---|
4013 | printf(" Format is similar to Nexus, the file shall contain entries like \"100-200 300-400\", to exclude a\n"); |
---|
4014 | printf(" single column write, e.g., \"100-100\", if you use a mixed model, an appropriatly adapted model file\n"); |
---|
4015 | printf(" will be written.\n"); |
---|
4016 | printf("\n"); |
---|
4017 | printf(" -f select algorithm:\n"); |
---|
4018 | |
---|
4019 | printMinusFUsage(); |
---|
4020 | |
---|
4021 | printf("\n"); |
---|
4022 | printf(" -F enable ML tree searches under CAT model for very large trees without switching to \n"); |
---|
4023 | printf(" GAMMA in the end (saves memory).\n"); |
---|
4024 | printf(" This option can also be used with the GAMMA models in order to avoid the thorough optimization \n"); |
---|
4025 | printf(" of the best-scoring ML tree in the end.\n"); |
---|
4026 | printf("\n"); |
---|
4027 | printf(" DEFAULT: OFF\n"); |
---|
4028 | printf("\n"); |
---|
4029 | printf(" -g specify the file name of a multifurcating constraint tree\n"); |
---|
4030 | printf(" this tree does not need to be comprehensive, i.e. must not contain all taxa\n"); |
---|
4031 | printf("\n"); |
---|
4032 | printf(" -G enable the ML-based evolutionary placement algorithm heuristics\n"); |
---|
4033 | printf(" by specifiyng a threshold value (fraction of insertion branches to be evaluated\n"); |
---|
4034 | printf(" using slow insertions under ML).\n"); |
---|
4035 | printf("\n"); |
---|
4036 | printf(" -h Display this help message.\n"); |
---|
4037 | printf("\n"); |
---|
4038 | printf(" -i Initial rearrangement setting for the subsequent application of topological \n"); |
---|
4039 | printf(" changes phase\n"); |
---|
4040 | printf("\n"); |
---|
4041 | printf(" -I a posteriori bootstopping analysis. Use:\n"); |
---|
4042 | printf(" \"-I autoFC\" for the frequency-based criterion\n"); |
---|
4043 | printf(" \"-I autoMR\" for the majority-rule consensus tree criterion\n"); |
---|
4044 | printf(" \"-I autoMRE\" for the extended majority-rule consensus tree criterion\n"); |
---|
4045 | printf(" \"-I autoMRE_IGN\" for metrics similar to MRE, but include bipartitions under the threshold whether they are compatible\n"); |
---|
4046 | printf(" or not. This emulates MRE but is faster to compute.\n"); |
---|
4047 | printf(" You also need to pass a tree file containg several bootstrap replicates via \"-z\" \n"); |
---|
4048 | printf("\n"); |
---|
4049 | printf(" -j Specifies that intermediate tree files shall be written to file during the standard ML and BS tree searches.\n"); |
---|
4050 | printf("\n"); |
---|
4051 | printf(" DEFAULT: OFF\n"); |
---|
4052 | printf("\n"); |
---|
4053 | printf(" -J Compute majority rule consensus tree with \"-J MR\" or extended majority rule consensus tree with \"-J MRE\"\n"); |
---|
4054 | printf(" or strict consensus tree with \"-J STRICT\". For a custom consensus treshold >= 50%%, specify T_<NUM>, where 100 >= NUM >= 50.\n"); |
---|
4055 | printf(" Options \"-J STRICT_DROP\" and \"-J MR_DROP\" will execute an algorithm that identifies dropsets which contain\n"); |
---|
4056 | printf(" rogue taxa as proposed by Pattengale et al. in the paper \"Uncovering hidden phylogenetic consensus\".\n"); |
---|
4057 | printf(" You will also need to provide a tree file containing several UNROOTED trees via \"-z\"\n"); |
---|
4058 | printf("\n"); |
---|
4059 | printf(" -k Specifies that bootstrapped trees should be printed with branch lengths.\n"); |
---|
4060 | printf(" The bootstraps will run a bit longer, because model parameters will be optimized\n"); |
---|
4061 | printf(" at the end of each run under GAMMA or GAMMA+P-Invar respectively.\n"); |
---|
4062 | printf("\n"); |
---|
4063 | printf(" DEFAULT: OFF\n"); |
---|
4064 | printf("\n"); |
---|
4065 | printf(" -K Specify one of the multi-state substitution models (max 32 states) implemented in RAxML.\n"); |
---|
4066 | printf(" Available models are: ORDERED, MK, GTR\n"); |
---|
4067 | printf("\n"); |
---|
4068 | printf(" DEFAULT: GTR model \n"); |
---|
4069 | printf("\n"); |
---|
4070 | printf(" -L Compute consensus trees labelled by IC supports and the overall TC value as proposed in Salichos and Rokas 2013.\n"); |
---|
4071 | printf(" Compute a majority rule consensus tree with \"-L MR\" or an extended majority rule consensus tree with \"-L MRE\".\n"); |
---|
4072 | printf(" For a custom consensus treshold >= 50%%, specify \"-L T_<NUM>\", where 100 >= NUM >= 50.\n"); |
---|
4073 | printf(" You will of course also need to provide a tree file containing several UNROOTED trees via \"-z\"!\n"); |
---|
4074 | printf("\n"); |
---|
4075 | printf(" -m Model of Binary (Morphological), Nucleotide, Multi-State, or Amino Acid Substitution: \n"); |
---|
4076 | printf("\n"); |
---|
4077 | printf(" BINARY:\n\n"); |
---|
4078 | printf(" \"-m BINCAT\" : Optimization of site-specific\n"); |
---|
4079 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4080 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4081 | printf(" automatically under BINGAMMA, depending on the tree search option\n"); |
---|
4082 | printf(" \"-m BINCATI\" : Optimization of site-specific\n"); |
---|
4083 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4084 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4085 | printf(" automatically under BINGAMMAI, depending on the tree search option \n"); |
---|
4086 | printf(" \"-m BINGAMMA\" : GAMMA model of rate \n"); |
---|
4087 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4088 | printf(" \"-m BINGAMMAI\" : Same as BINGAMMA, but with estimate of proportion of invariable sites\n"); |
---|
4089 | printf("\n"); |
---|
4090 | printf(" NUCLEOTIDES:\n\n"); |
---|
4091 | printf(" \"-m GTRCAT\" : GTR + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4092 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4093 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4094 | printf(" under GTRGAMMA, depending on the tree search option\n"); |
---|
4095 | printf(" \"-m GTRCATI\" : GTR + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4096 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4097 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4098 | printf(" under GTRGAMMAI, depending on the tree search option\n"); |
---|
4099 | printf(" \"-m GTRGAMMA\" : GTR + Optimization of substitution rates + GAMMA model of rate \n"); |
---|
4100 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4101 | printf(" \"-m GTRGAMMAI\" : Same as GTRGAMMA, but with estimate of proportion of invariable sites \n"); |
---|
4102 | printf("\n"); |
---|
4103 | printf(" MULTI-STATE:\n\n"); |
---|
4104 | printf(" \"-m MULTICAT\" : Optimization of site-specific\n"); |
---|
4105 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4106 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4107 | printf(" automatically under MULTIGAMMA, depending on the tree search option\n"); |
---|
4108 | printf(" \"-m MULTICATI\" : Optimization of site-specific\n"); |
---|
4109 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4110 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4111 | printf(" automatically under MULTIGAMMAI, depending on the tree search option \n"); |
---|
4112 | printf(" \"-m MULTIGAMMA\" : GAMMA model of rate \n"); |
---|
4113 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4114 | printf(" \"-m MULTIGAMMAI\" : Same as MULTIGAMMA, but with estimate of proportion of invariable sites\n"); |
---|
4115 | printf("\n"); |
---|
4116 | printf(" You can use up to 32 distinct character states to encode multi-state regions, they must be used in the following order:\n"); |
---|
4117 | printf(" 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V\n"); |
---|
4118 | printf(" i.e., if you have 6 distinct character states you would use 0, 1, 2, 3, 4, 5 to encode these.\n"); |
---|
4119 | printf(" The substitution model for the multi-state regions can be selected via the \"-K\" option\n"); |
---|
4120 | printf("\n"); |
---|
4121 | printf(" AMINO ACIDS:\n\n"); |
---|
4122 | printf(" \"-m PROTCATmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4123 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4124 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4125 | printf(" automatically under PROTGAMMAmatrixName[f], depending on the tree search option\n"); |
---|
4126 | printf(" \"-m PROTCATImatrixName[F]\" : specified AA matrix + Optimization of substitution rates + Optimization of site-specific\n"); |
---|
4127 | printf(" evolutionary rates which are categorized into numberOfCategories distinct \n"); |
---|
4128 | printf(" rate categories for greater computational efficiency. Final tree might be evaluated\n"); |
---|
4129 | printf(" automatically under PROTGAMMAImatrixName[f], depending on the tree search option\n"); |
---|
4130 | printf(" \"-m PROTGAMMAmatrixName[F]\" : specified AA matrix + Optimization of substitution rates + GAMMA model of rate \n"); |
---|
4131 | printf(" heterogeneity (alpha parameter will be estimated)\n"); |
---|
4132 | printf(" \"-m PROTGAMMAImatrixName[F]\" : Same as PROTGAMMAmatrixName[F], but with estimate of proportion of invariable sites \n"); |
---|
4133 | printf("\n"); |
---|
4134 | printf(" Available AA substitution models:\n"); |
---|
4135 | printf(" "); |
---|
4136 | |
---|
4137 | { |
---|
4138 | int |
---|
4139 | i; |
---|
4140 | |
---|
4141 | for(i = 0; i < NUM_PROT_MODELS - 1; i++) |
---|
4142 | { |
---|
4143 | if(i > 0 && (i % 8 == 0)) |
---|
4144 | { |
---|
4145 | printf("\n"); |
---|
4146 | printf(" "); |
---|
4147 | } |
---|
4148 | printf("%s, ", protModels[i]); |
---|
4149 | } |
---|
4150 | |
---|
4151 | printf("%s\n", protModels[i]); |
---|
4152 | } |
---|
4153 | |
---|
4154 | printf(" With the optional \"F\" appendix you can specify if you want to use empirical base frequencies\n"); |
---|
4155 | printf(" Please note that for mixed models you can in addition specify the per-gene AA model in\n"); |
---|
4156 | printf(" the mixed model file (see manual for details). Also note that if you estimate AA GTR parameters on a partitioned\n"); |
---|
4157 | printf(" dataset, they will be linked (estimated jointly) across all partitions to avoid over-parametrization\n"); |
---|
4158 | printf("\n"); |
---|
4159 | printf(" -M Switch on estimation of individual per-partition branch lengths. Only has effect when used in combination with \"-q\"\n"); |
---|
4160 | printf(" Branch lengths for individual partitions will be printed to separate files\n"); |
---|
4161 | printf(" A weighted average of the branch lengths is computed by using the respective partition lengths\n"); |
---|
4162 | printf("\n"), |
---|
4163 | printf(" DEFAULT: OFF\n"); |
---|
4164 | printf("\n"); |
---|
4165 | printf(" -n Specifies the name of the output file.\n"); |
---|
4166 | printf("\n"); |
---|
4167 | printf(" -o Specify the name of a single outgrpoup or a comma-separated list of outgroups, eg \"-o Rat\" \n"); |
---|
4168 | printf(" or \"-o Rat,Mouse\", in case that multiple outgroups are not monophyletic the first name \n"); |
---|
4169 | printf(" in the list will be selected as outgroup, don't leave spaces between taxon names!\n"); |
---|
4170 | printf("\n"); |
---|
4171 | printf(" -O Disable check for completely undetermined sequence in alignment.\n"); |
---|
4172 | printf(" The program will not exit with an error message when \"-O\" is specified.\n"); |
---|
4173 | printf("\n"); |
---|
4174 | printf(" DEFAULT: check enabled\n"); |
---|
4175 | printf("\n"); |
---|
4176 | printf(" -p Specify a random number seed for the parsimony inferences. This allows you to reproduce your results\n"); |
---|
4177 | printf(" and will help me debug the program.\n"); |
---|
4178 | printf("\n"); |
---|
4179 | printf(" -P Specify the file name of a user-defined AA (Protein) substitution model. This file must contain\n"); |
---|
4180 | printf(" 420 entries, the first 400 being the AA substitution rates (this must be a symmetric matrix) and the\n"); |
---|
4181 | printf(" last 20 are the empirical base frequencies\n"); |
---|
4182 | printf("\n"); |
---|
4183 | printf(" -q Specify the file name which contains the assignment of models to alignment\n"); |
---|
4184 | printf(" partitions for multiple models of substitution. For the syntax of this file\n"); |
---|
4185 | printf(" please consult the manual.\n"); |
---|
4186 | printf("\n"); |
---|
4187 | printf(" -r Specify the file name of a binary constraint tree.\n"); |
---|
4188 | printf(" this tree does not need to be comprehensive, i.e. must not contain all taxa\n"); |
---|
4189 | printf("\n"); |
---|
4190 | printf(" -R Specify the file name of a binary model parameter file that has previously been generated\n"); |
---|
4191 | printf(" with RAxML using the -f e tree evaluation option. The file name should be: \n"); |
---|
4192 | printf(" RAxML_binaryModelParameters.runID\n"); |
---|
4193 | printf("\n"); |
---|
4194 | printf(" -s Specify the name of the alignment data file in PHYLIP format\n"); |
---|
4195 | printf("\n"); |
---|
4196 | printf(" -S Specify the name of a secondary structure file. The file can contain \".\" for \n"); |
---|
4197 | printf(" alignment columns that do not form part of a stem and characters \"()<>[]{}\" to define \n"); |
---|
4198 | printf(" stem regions and pseudoknots\n"); |
---|
4199 | printf("\n"); |
---|
4200 | printf(" -t Specify a user starting tree file name in Newick format\n"); |
---|
4201 | printf("\n"); |
---|
4202 | printf(" -T PTHREADS VERSION ONLY! Specify the number of threads you want to run.\n"); |
---|
4203 | printf(" Make sure to set \"-T\" to at most the number of CPUs you have on your machine,\n"); |
---|
4204 | printf(" otherwise, there will be a huge performance decrease!\n"); |
---|
4205 | printf("\n"); |
---|
4206 | printf(" -u use the median for the discrete approximation of the GAMMA model of rate heterogeneity\n"); |
---|
4207 | printf("\n"); |
---|
4208 | printf(" DEFAULT: OFF\n"); |
---|
4209 | printf("\n"); |
---|
4210 | printf(" -U Try to save memory by using SEV-based implementation for gap columns on large gappy alignments\n"); |
---|
4211 | printf(" The technique is described here: http://www.biomedcentral.com/1471-2105/12/470\n"); |
---|
4212 | printf(" This will only work for DNA and/or PROTEIN data and only with the SSE3 or AVX-vextorized version of the code.\n"); |
---|
4213 | printf("\n"); |
---|
4214 | printf(" -v Display version information\n"); |
---|
4215 | printf("\n"); |
---|
4216 | printf(" -V Disable rate heterogeneity among sites model and use one without rate heterogeneity instead.\n"); |
---|
4217 | printf(" Only works if you specify the CAT model of rate heterogeneity.\n"); |
---|
4218 | printf("\n"); |
---|
4219 | printf(" DEFAULT: use rate heterogeneity\n"); |
---|
4220 | printf("\n"); |
---|
4221 | printf(" -w FULL (!) path to the directory into which RAxML shall write its output files\n"); |
---|
4222 | printf("\n"); |
---|
4223 | printf(" DEFAULT: current directory\n"); |
---|
4224 | printf("\n"); |
---|
4225 | printf(" -W Sliding window size for leave-one-out site-specific placement bias algorithm\n"); |
---|
4226 | printf(" only effective when used in combination with \"-f S\" \n"); |
---|
4227 | printf("\n"); |
---|
4228 | printf(" DEFAULT: 100 sites\n"); |
---|
4229 | printf("\n"); |
---|
4230 | printf(" -x Specify an integer number (random seed) and turn on rapid bootstrapping\n"); |
---|
4231 | printf(" CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under \n"); |
---|
4232 | printf(" the model of rate heterogeneity you specified via \"-m\" and not by default under CAT\n"); |
---|
4233 | printf("\n"); |
---|
4234 | printf(" -X Same as the \"-y\" option below, however the parsimony search is more superficial.\n"); |
---|
4235 | printf(" RAxML will only do a randomized stepwise addition order parsimony tree reconstruction\n"); |
---|
4236 | printf(" without performing any additional SPRs.\n"); |
---|
4237 | printf(" This may be helpful for very broad whole-genome datasets, since this can generate topologically\n"); |
---|
4238 | printf(" more different starting trees.\n"); |
---|
4239 | printf("\n"); |
---|
4240 | printf(" DEFAULT: OFF\n"); |
---|
4241 | printf("\n"); |
---|
4242 | printf(" -y If you want to only compute a parsimony starting tree with RAxML specify \"-y\",\n"); |
---|
4243 | printf(" the program will exit after computation of the starting tree\n"); |
---|
4244 | printf("\n"); |
---|
4245 | printf(" DEFAULT: OFF\n"); |
---|
4246 | printf("\n"); |
---|
4247 | printf(" -Y Pass a quartet grouping file name defining four groups from which to draw quartets\n"); |
---|
4248 | printf(" The file input format must contain 4 groups in the following form:\n"); |
---|
4249 | printf(" (Chicken, Human, Loach), (Cow, Carp), (Mouse, Rat, Seal), (Whale, Frog);\n"); |
---|
4250 | printf(" Only works in combination with -f q !\n"); |
---|
4251 | printf("\n"); |
---|
4252 | printf(" -z Specify the file name of a file containing multiple trees e.g. from a bootstrap\n"); |
---|
4253 | printf(" that shall be used to draw bipartition values onto a tree provided with \"-t\",\n"); |
---|
4254 | printf(" It can also be used to compute per site log likelihoods in combination with \"-f g\"\n"); |
---|
4255 | printf(" and to read a bunch of trees for a couple of other options (\"-f h\", \"-f m\", \"-f n\").\n"); |
---|
4256 | printf("\n"); |
---|
4257 | printf(" -#|-N Specify the number of alternative runs on distinct starting trees\n"); |
---|
4258 | printf(" In combination with the \"-b\" option, this will invoke a multiple bootstrap analysis\n"); |
---|
4259 | printf(" Note that \"-N\" has been added as an alternative since \"-#\" sometimes caused problems\n"); |
---|
4260 | printf(" with certain MPI job submission systems, since \"-#\" is often used to start comments.\n"); |
---|
4261 | printf(" If you want to use the bootstopping criteria specify \"-# autoMR\" or \"-# autoMRE\" or \"-# autoMRE_IGN\"\n"); |
---|
4262 | printf(" for the majority-rule tree based criteria (see -I option) or \"-# autoFC\" for the frequency-based criterion.\n"); |
---|
4263 | printf(" Bootstopping will only work in combination with \"-x\" or \"-b\"\n"); |
---|
4264 | printf("\n"); |
---|
4265 | printf(" DEFAULT: 1 single analysis\n"); |
---|
4266 | printf("\n\n\n\n"); |
---|
4267 | |
---|
4268 | } |
---|
4269 | |
---|
4270 | |
---|
4271 | |
---|
4272 | |
---|
4273 | static void analyzeRunId(char id[128]) |
---|
4274 | { |
---|
4275 | int i = 0; |
---|
4276 | |
---|
4277 | while(id[i] != '\0') |
---|
4278 | { |
---|
4279 | if(i >= 128) |
---|
4280 | { |
---|
4281 | printf("Error: run id after \"-n\" is too long, it has %d characters please use a shorter one\n", i); |
---|
4282 | assert(0); |
---|
4283 | } |
---|
4284 | |
---|
4285 | if(id[i] == '/') |
---|
4286 | { |
---|
4287 | printf("Error character %c not allowed in run ID\n", id[i]); |
---|
4288 | assert(0); |
---|
4289 | } |
---|
4290 | |
---|
4291 | |
---|
4292 | i++; |
---|
4293 | } |
---|
4294 | |
---|
4295 | if(i == 0) |
---|
4296 | { |
---|
4297 | printf("Error: please provide a string for the run id after \"-n\" \n"); |
---|
4298 | assert(0); |
---|
4299 | } |
---|
4300 | |
---|
4301 | } |
---|
4302 | |
---|
4303 | static void get_args(int argc, char *argv[], analdef *adef, tree *tr) |
---|
4304 | { |
---|
4305 | boolean |
---|
4306 | bad_opt =FALSE, |
---|
4307 | resultDirSet = FALSE; |
---|
4308 | |
---|
4309 | char |
---|
4310 | resultDir[1024] = "", |
---|
4311 | aut[256], |
---|
4312 | *optarg, |
---|
4313 | model[2048] = "", |
---|
4314 | secondaryModel[2048] = "", |
---|
4315 | multiStateModel[2048] = "", |
---|
4316 | modelChar; |
---|
4317 | |
---|
4318 | double |
---|
4319 | likelihoodEpsilon, |
---|
4320 | wcThreshold, |
---|
4321 | fastEPAthreshold; |
---|
4322 | |
---|
4323 | int |
---|
4324 | optind = 1, |
---|
4325 | c, |
---|
4326 | nameSet = 0, |
---|
4327 | alignmentSet = 0, |
---|
4328 | multipleRuns = 0, |
---|
4329 | constraintSet = 0, |
---|
4330 | treeSet = 0, |
---|
4331 | groupSet = 0, |
---|
4332 | modelSet = 0, |
---|
4333 | treesSet = 0; |
---|
4334 | |
---|
4335 | boolean |
---|
4336 | bSeedSet = FALSE, |
---|
4337 | xSeedSet = FALSE, |
---|
4338 | multipleRunsSet = FALSE, |
---|
4339 | yFileSet = FALSE; |
---|
4340 | |
---|
4341 | run_id[0] = 0; |
---|
4342 | workdir[0] = 0; |
---|
4343 | seq_file[0] = 0; |
---|
4344 | tree_file[0] = 0; |
---|
4345 | model[0] = 0; |
---|
4346 | weightFileName[0] = 0; |
---|
4347 | modelFileName[0] = 0; |
---|
4348 | |
---|
4349 | /*********** tr inits **************/ |
---|
4350 | |
---|
4351 | #ifdef _USE_PTHREADS |
---|
4352 | NumberOfThreads = 0; |
---|
4353 | #endif |
---|
4354 | |
---|
4355 | |
---|
4356 | tr->useFastScaling = TRUE; |
---|
4357 | tr->bootStopCriterion = -1; |
---|
4358 | tr->wcThreshold = 0.03; |
---|
4359 | tr->doCutoff = TRUE; |
---|
4360 | tr->secondaryStructureModel = SEC_16; /* default setting */ |
---|
4361 | tr->searchConvergenceCriterion = FALSE; |
---|
4362 | tr->catOnly = FALSE; |
---|
4363 | tr->useEpaHeuristics = FALSE; |
---|
4364 | tr->fastEPAthreshold = -1.0; |
---|
4365 | tr->multiStateModel = GTR_MULTI_STATE; |
---|
4366 | tr->saveMemory = FALSE; |
---|
4367 | tr->useGammaMedian = FALSE; |
---|
4368 | tr->noRateHet = FALSE; |
---|
4369 | tr->perPartitionEPA = FALSE; |
---|
4370 | tr->useBrLenScaler = FALSE; |
---|
4371 | /********* tr inits end*************/ |
---|
4372 | |
---|
4373 | |
---|
4374 | while(!bad_opt && |
---|
4375 | ((c = mygetopt(argc,argv,"R:T:E:N:B:L:P:S:Y:A:G:H:I:J:K:W:l:x:z:g:r:e:a:b:c:f:i:m:t:w:s:n:o:q:#:p:vudyjhkMDFQUOVCX", &optind, &optarg))!=-1)) |
---|
4376 | { |
---|
4377 | switch(c) |
---|
4378 | { |
---|
4379 | case 'Y': |
---|
4380 | adef->useQuartetGrouping = TRUE; |
---|
4381 | yFileSet = TRUE; |
---|
4382 | strcpy(quartetGroupingFileName, optarg); |
---|
4383 | break; |
---|
4384 | case 'V': |
---|
4385 | tr->noRateHet = TRUE; |
---|
4386 | break; |
---|
4387 | case 'u': |
---|
4388 | tr->useGammaMedian = TRUE; |
---|
4389 | break; |
---|
4390 | case 'O': |
---|
4391 | adef->checkForUndeterminedSequences = FALSE; |
---|
4392 | break; |
---|
4393 | case 'W': |
---|
4394 | sscanf(optarg,"%d", &(adef->slidingWindowSize)); |
---|
4395 | if(adef->slidingWindowSize <= 0) |
---|
4396 | { |
---|
4397 | printf("You can't use a sliding window size smaller than 1, you specified %d\n", adef->slidingWindowSize); |
---|
4398 | exit(-1); |
---|
4399 | } |
---|
4400 | if(adef->slidingWindowSize <= 10) |
---|
4401 | { |
---|
4402 | printf("You specified a small sliding window size of %d sites\n", adef->slidingWindowSize); |
---|
4403 | printf("Are you sure you want to do this?\n"); |
---|
4404 | } |
---|
4405 | if(adef->slidingWindowSize >= 500) |
---|
4406 | { |
---|
4407 | printf("You specified a large sliding window size of %d sites\n", adef->slidingWindowSize); |
---|
4408 | printf("Are you sure you want to do this?\n"); |
---|
4409 | } |
---|
4410 | break; |
---|
4411 | case 'U': |
---|
4412 | tr->saveMemory = TRUE; |
---|
4413 | #if (!defined(__SIM_SSE3) && !defined(__AVX)) |
---|
4414 | printf("\nmemory saving option -U does only work with the AVX and SSE3 vectorized versions of the code\n"); |
---|
4415 | printf("please remove this option and execute the program again\n"); |
---|
4416 | printf("exiting ....\n\n"); |
---|
4417 | errorExit(0); |
---|
4418 | #endif |
---|
4419 | break; |
---|
4420 | case 'R': |
---|
4421 | adef->useBinaryModelFile = TRUE; |
---|
4422 | strcpy(binaryModelParamsInputFileName, optarg); |
---|
4423 | break; |
---|
4424 | case 'K': |
---|
4425 | { |
---|
4426 | const char *modelList[3] = { "ORDERED", "MK", "GTR"}; |
---|
4427 | const int states[3] = {ORDERED_MULTI_STATE, MK_MULTI_STATE, GTR_MULTI_STATE}; |
---|
4428 | int i; |
---|
4429 | |
---|
4430 | sscanf(optarg, "%s", multiStateModel); |
---|
4431 | |
---|
4432 | for(i = 0; i < 3; i++) |
---|
4433 | if(strcmp(multiStateModel, modelList[i]) == 0) |
---|
4434 | break; |
---|
4435 | |
---|
4436 | if(i < 3) |
---|
4437 | tr->multiStateModel = states[i]; |
---|
4438 | else |
---|
4439 | { |
---|
4440 | printf("The multi-state model %s you want to use does not exist, exiting .... \n", multiStateModel); |
---|
4441 | errorExit(0); |
---|
4442 | } |
---|
4443 | |
---|
4444 | |
---|
4445 | } |
---|
4446 | break; |
---|
4447 | case 'A': |
---|
4448 | { |
---|
4449 | const char *modelList[21] = { "S6A", "S6B", "S6C", "S6D", "S6E", "S7A", "S7B", "S7C", "S7D", "S7E", "S7F", "S16", "S16A", "S16B", "S16C", |
---|
4450 | "S16D", "S16E", "S16F", "S16I", "S16J", "S16K"}; |
---|
4451 | int i; |
---|
4452 | |
---|
4453 | sscanf(optarg, "%s", secondaryModel); |
---|
4454 | |
---|
4455 | for(i = 0; i < 21; i++) |
---|
4456 | if(strcmp(secondaryModel, modelList[i]) == 0) |
---|
4457 | break; |
---|
4458 | |
---|
4459 | if(i < 21) |
---|
4460 | tr->secondaryStructureModel = i; |
---|
4461 | else |
---|
4462 | { |
---|
4463 | printf("The secondary structure model %s you want to use does not exist, exiting .... \n", secondaryModel); |
---|
4464 | errorExit(0); |
---|
4465 | } |
---|
4466 | } |
---|
4467 | break; |
---|
4468 | case 'B': |
---|
4469 | sscanf(optarg,"%lf", &wcThreshold); |
---|
4470 | tr->wcThreshold = wcThreshold; |
---|
4471 | if(wcThreshold <= 0.0 || wcThreshold >= 1.0) |
---|
4472 | { |
---|
4473 | printf("\nBootstrap threshold must be set to values between 0.0 and 1.0, you just set it to %f\n", wcThreshold); |
---|
4474 | exit(-1); |
---|
4475 | } |
---|
4476 | if(wcThreshold < 0.01 || wcThreshold > 0.05) |
---|
4477 | { |
---|
4478 | printf("\n\nWARNING, reasonable settings for Bootstopping threshold with MR-based criteria range between 0.01 and 0.05.\n"); |
---|
4479 | printf("You are just setting it to %f, the most reasonable empirically determined setting is 0.03 \n\n", wcThreshold); |
---|
4480 | } |
---|
4481 | break; |
---|
4482 | case 'D': |
---|
4483 | tr->searchConvergenceCriterion = TRUE; |
---|
4484 | break; |
---|
4485 | case 'E': |
---|
4486 | strcpy(excludeFileName, optarg); |
---|
4487 | adef->useExcludeFile = TRUE; |
---|
4488 | break; |
---|
4489 | case 'F': |
---|
4490 | tr->catOnly = TRUE; |
---|
4491 | break; |
---|
4492 | case 'G': |
---|
4493 | tr->useEpaHeuristics = TRUE; |
---|
4494 | |
---|
4495 | sscanf(optarg,"%lf", &fastEPAthreshold); |
---|
4496 | tr->fastEPAthreshold = fastEPAthreshold; |
---|
4497 | |
---|
4498 | if(fastEPAthreshold <= 0.0 || fastEPAthreshold >= 1.0) |
---|
4499 | { |
---|
4500 | printf("\nHeuristic EPA threshold must be set to values between 0.0 and 1.0, you just set it to %f\n", fastEPAthreshold); |
---|
4501 | exit(-1); |
---|
4502 | } |
---|
4503 | if(fastEPAthreshold < 0.015625 || fastEPAthreshold > 0.5) |
---|
4504 | { |
---|
4505 | printf("\n\nWARNING, reasonable settings for heuristic EPA threshold range between 0.015625 (1/64) and 0.5 (1/2).\n"); |
---|
4506 | printf("You are just setting it to %f\n\n", fastEPAthreshold); |
---|
4507 | } |
---|
4508 | #ifdef _USE_PTHREADS |
---|
4509 | tr->useFastScaling = FALSE; |
---|
4510 | #endif |
---|
4511 | break; |
---|
4512 | |
---|
4513 | case 'I': |
---|
4514 | adef->readTaxaOnly = TRUE; |
---|
4515 | adef->mode = BOOTSTOP_ONLY; |
---|
4516 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "autoFC") == 0) || (strcmp(aut, "autoMR") == 0) || |
---|
4517 | (strcmp(aut, "autoMRE") == 0) || (strcmp(aut, "autoMRE_IGN") == 0))) |
---|
4518 | { |
---|
4519 | if((strcmp(aut, "autoFC") == 0)) |
---|
4520 | tr->bootStopCriterion = FREQUENCY_STOP; |
---|
4521 | if((strcmp(aut, "autoMR") == 0)) |
---|
4522 | tr->bootStopCriterion = MR_STOP; |
---|
4523 | if((strcmp(aut, "autoMRE") == 0)) |
---|
4524 | tr->bootStopCriterion = MRE_STOP; |
---|
4525 | if((strcmp(aut, "autoMRE_IGN") == 0)) |
---|
4526 | tr->bootStopCriterion = MRE_IGN_STOP; |
---|
4527 | } |
---|
4528 | else |
---|
4529 | { |
---|
4530 | if(processID == 0) |
---|
4531 | printf("Use -I a posteriori bootstop option either as \"-I autoFC\" or \"-I autoMR\" or \"-I autoMRE\" or \"-I autoMRE_IGN\"\n"); |
---|
4532 | errorExit(0); |
---|
4533 | } |
---|
4534 | break; |
---|
4535 | case 'J': |
---|
4536 | adef->readTaxaOnly = TRUE; |
---|
4537 | adef->mode = CONSENSUS_ONLY; |
---|
4538 | adef->calculateIC = FALSE; |
---|
4539 | |
---|
4540 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "MR") == 0) || (strcmp(aut, "MRE") == 0) || (strcmp(aut, "STRICT") == 0) || |
---|
4541 | (strcmp(aut, "STRICT_DROP") == 0) || (strcmp(aut, "MR_DROP") == 0))) |
---|
4542 | { |
---|
4543 | if((strcmp(aut, "MR") == 0)) |
---|
4544 | tr->consensusType = MR_CONSENSUS; |
---|
4545 | if((strcmp(aut, "MR_DROP") == 0)) |
---|
4546 | { |
---|
4547 | tr->consensusType = MR_CONSENSUS; |
---|
4548 | adef->leaveDropMode = TRUE; |
---|
4549 | } |
---|
4550 | |
---|
4551 | if((strcmp(aut, "MRE") == 0)) |
---|
4552 | tr->consensusType = MRE_CONSENSUS; |
---|
4553 | |
---|
4554 | |
---|
4555 | if((strcmp(aut, "STRICT") == 0)) |
---|
4556 | tr->consensusType = STRICT_CONSENSUS; |
---|
4557 | if((strcmp(aut, "STRICT_DROP") == 0)) |
---|
4558 | { |
---|
4559 | tr->consensusType = STRICT_CONSENSUS; |
---|
4560 | adef->leaveDropMode = TRUE; |
---|
4561 | } |
---|
4562 | } |
---|
4563 | else |
---|
4564 | { |
---|
4565 | if( (sscanf( optarg, "%s", aut) > 0) && optarg[0] == 'T' && optarg[1] == '_') |
---|
4566 | { |
---|
4567 | tr->consensusType = USER_DEFINED; |
---|
4568 | sscanf(optarg + 2,"%d", &tr->consensusUserThreshold); |
---|
4569 | |
---|
4570 | if(tr->consensusUserThreshold < 50 || tr->consensusUserThreshold > 100) |
---|
4571 | { |
---|
4572 | printf("Please specify a custom threshold c, with 50 <= c <= 100\n" ); |
---|
4573 | errorExit(0); |
---|
4574 | } |
---|
4575 | } |
---|
4576 | else |
---|
4577 | { |
---|
4578 | if(processID == 0) |
---|
4579 | printf("Use -J consensus tree option either as \"-J MR\" or \"-J MRE\" or \"-J STRICT\" or \"-J MR_DROP\" or \"-J STRICT_DROP\" or T_<NUM>, where NUM >= 50\n"); |
---|
4580 | errorExit(0); |
---|
4581 | } |
---|
4582 | } |
---|
4583 | break; |
---|
4584 | case 'C': |
---|
4585 | adef->verboseIC = TRUE; |
---|
4586 | break; |
---|
4587 | case 'L': |
---|
4588 | adef->readTaxaOnly = TRUE; |
---|
4589 | adef->mode = CONSENSUS_ONLY; |
---|
4590 | adef->leaveDropMode = FALSE; |
---|
4591 | adef->calculateIC = TRUE; |
---|
4592 | |
---|
4593 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "MR") == 0) || (strcmp(aut, "MRE") == 0))) |
---|
4594 | { |
---|
4595 | if((strcmp(aut, "MR") == 0)) |
---|
4596 | tr->consensusType = MR_CONSENSUS; |
---|
4597 | |
---|
4598 | if((strcmp(aut, "MRE") == 0)) |
---|
4599 | tr->consensusType = MRE_CONSENSUS; |
---|
4600 | } |
---|
4601 | else |
---|
4602 | { |
---|
4603 | if((sscanf( optarg, "%s", aut) > 0) && optarg[0] == 'T' && optarg[1] == '_') |
---|
4604 | { |
---|
4605 | tr->consensusType = USER_DEFINED; |
---|
4606 | sscanf(optarg + 2,"%d", &tr->consensusUserThreshold); |
---|
4607 | |
---|
4608 | if(tr->consensusUserThreshold < 50 || tr->consensusUserThreshold > 100) |
---|
4609 | { |
---|
4610 | printf("Please specify a custom threshold c, with 50 <= c <= 100\n" ); |
---|
4611 | errorExit(0); |
---|
4612 | } |
---|
4613 | } |
---|
4614 | else |
---|
4615 | { |
---|
4616 | if(processID == 0) |
---|
4617 | printf("Use -L consensus tree option including IC/TC score computation either as \"-L MR\" or \"-L MRE\" or \"-L T_<NUM>\", where NUM >= 50\n"); |
---|
4618 | errorExit(0); |
---|
4619 | } |
---|
4620 | } |
---|
4621 | break; |
---|
4622 | case 'M': |
---|
4623 | adef->perGeneBranchLengths = TRUE; |
---|
4624 | break; |
---|
4625 | case 'P': |
---|
4626 | strcpy(proteinModelFileName, optarg); |
---|
4627 | adef->userProteinModel = TRUE; |
---|
4628 | /*parseProteinModel(adef->externalAAMatrix, proteinModelFileName);*/ |
---|
4629 | break; |
---|
4630 | case 'S': |
---|
4631 | adef->useSecondaryStructure = TRUE; |
---|
4632 | strcpy(secondaryStructureFileName, optarg); |
---|
4633 | break; |
---|
4634 | case 'T': |
---|
4635 | #ifdef _USE_PTHREADS |
---|
4636 | sscanf(optarg,"%d", &NumberOfThreads); |
---|
4637 | #else |
---|
4638 | if(processID == 0) |
---|
4639 | { |
---|
4640 | printf("Option -T does not have any effect with the sequential or parallel MPI version.\n"); |
---|
4641 | printf("It is used to specify the number of threads for the Pthreads-based parallelization\n"); |
---|
4642 | } |
---|
4643 | #endif |
---|
4644 | break; |
---|
4645 | case 'o': |
---|
4646 | { |
---|
4647 | char *outgroups; |
---|
4648 | outgroups = (char*)rax_malloc(sizeof(char) * (strlen(optarg) + 1)); |
---|
4649 | strcpy(outgroups, optarg); |
---|
4650 | parseOutgroups(outgroups, tr); |
---|
4651 | rax_free(outgroups); |
---|
4652 | adef->outgroup = TRUE; |
---|
4653 | } |
---|
4654 | break; |
---|
4655 | case 'k': |
---|
4656 | adef->bootstrapBranchLengths = TRUE; |
---|
4657 | break; |
---|
4658 | case 'z': |
---|
4659 | strcpy(bootStrapFile, optarg); |
---|
4660 | treesSet = 1; |
---|
4661 | break; |
---|
4662 | case 'd': |
---|
4663 | adef->randomStartingTree = TRUE; |
---|
4664 | break; |
---|
4665 | case 'g': |
---|
4666 | strcpy(tree_file, optarg); |
---|
4667 | adef->grouping = TRUE; |
---|
4668 | adef->restart = TRUE; |
---|
4669 | groupSet = 1; |
---|
4670 | break; |
---|
4671 | case 'r': |
---|
4672 | strcpy(tree_file, optarg); |
---|
4673 | adef->restart = TRUE; |
---|
4674 | adef->constraint = TRUE; |
---|
4675 | constraintSet = 1; |
---|
4676 | break; |
---|
4677 | case 'e': |
---|
4678 | sscanf(optarg,"%lf", &likelihoodEpsilon); |
---|
4679 | adef->likelihoodEpsilon = likelihoodEpsilon; |
---|
4680 | break; |
---|
4681 | case 'q': |
---|
4682 | strcpy(modelFileName,optarg); |
---|
4683 | adef->useMultipleModel = TRUE; |
---|
4684 | break; |
---|
4685 | case 'p': |
---|
4686 | sscanf(optarg,"%ld", &(adef->parsimonySeed)); |
---|
4687 | if(adef->parsimonySeed <= 0) |
---|
4688 | { |
---|
4689 | printf("Parsimony seed specified via -p must be greater than zero\n"); |
---|
4690 | errorExit(-1); |
---|
4691 | } |
---|
4692 | break; |
---|
4693 | case 'N': |
---|
4694 | case '#': |
---|
4695 | if(sscanf(optarg,"%d", &multipleRuns) > 0) |
---|
4696 | { |
---|
4697 | adef->multipleRuns = multipleRuns; |
---|
4698 | } |
---|
4699 | else |
---|
4700 | { |
---|
4701 | if((sscanf(optarg,"%s", aut) > 0) && ((strcmp(aut, "autoFC") == 0) || (strcmp(aut, "autoMR") == 0) || |
---|
4702 | (strcmp(aut, "autoMRE") == 0) || (strcmp(aut, "autoMRE_IGN") == 0))) |
---|
4703 | |
---|
4704 | { |
---|
4705 | adef->bootStopping = TRUE; |
---|
4706 | adef->multipleRuns = 1000; |
---|
4707 | |
---|
4708 | if((strcmp(aut, "autoFC") == 0)) |
---|
4709 | tr->bootStopCriterion = FREQUENCY_STOP; |
---|
4710 | if((strcmp(aut, "autoMR") == 0)) |
---|
4711 | tr->bootStopCriterion = MR_STOP; |
---|
4712 | if((strcmp(aut, "autoMRE") == 0)) |
---|
4713 | tr->bootStopCriterion = MRE_STOP; |
---|
4714 | if((strcmp(aut, "autoMRE_IGN") == 0)) |
---|
4715 | tr->bootStopCriterion = MRE_IGN_STOP; |
---|
4716 | } |
---|
4717 | else |
---|
4718 | { |
---|
4719 | if(processID == 0) |
---|
4720 | { |
---|
4721 | printf("Use -# or -N option either with an integer, e.g., -# 100 or with -# autoFC or -# autoMR or -# autoMRE or -# autoMRE_IGN\n"); |
---|
4722 | printf("or -N 100 or -N autoFC or -N autoMR or -N autoMRE or -N autoMRE_IGN respectively, note that auto will not work for the\n"); |
---|
4723 | printf("MPI-based parallel version\n"); |
---|
4724 | } |
---|
4725 | errorExit(0); |
---|
4726 | } |
---|
4727 | } |
---|
4728 | multipleRunsSet = TRUE; |
---|
4729 | break; |
---|
4730 | case 'v': |
---|
4731 | printVersionInfo(TRUE, (FILE*)NULL); |
---|
4732 | errorExit(0); |
---|
4733 | case 'y': |
---|
4734 | adef->stepwiseAdditionOnly = FALSE; |
---|
4735 | adef->startingTreeOnly = 1; |
---|
4736 | break; |
---|
4737 | case 'X': |
---|
4738 | adef->stepwiseAdditionOnly = TRUE; |
---|
4739 | adef->startingTreeOnly = 1; |
---|
4740 | break; |
---|
4741 | case 'h': |
---|
4742 | printREADME(); |
---|
4743 | errorExit(0); |
---|
4744 | case 'j': |
---|
4745 | adef->checkpoints = 1; |
---|
4746 | break; |
---|
4747 | case 'a': |
---|
4748 | strcpy(weightFileName,optarg); |
---|
4749 | adef->useWeightFile = TRUE; |
---|
4750 | break; |
---|
4751 | case 'b': |
---|
4752 | sscanf(optarg,"%ld", &adef->boot); |
---|
4753 | if(adef->boot <= 0) |
---|
4754 | { |
---|
4755 | printf("Bootstrap seed specified via -b must be greater than zero\n"); |
---|
4756 | errorExit(-1); |
---|
4757 | } |
---|
4758 | bSeedSet = TRUE; |
---|
4759 | break; |
---|
4760 | case 'x': |
---|
4761 | sscanf(optarg,"%ld", &adef->rapidBoot); |
---|
4762 | if(adef->rapidBoot <= 0) |
---|
4763 | { |
---|
4764 | printf("Bootstrap seed specified via -x must be greater than zero\n"); |
---|
4765 | errorExit(-1); |
---|
4766 | } |
---|
4767 | xSeedSet = TRUE; |
---|
4768 | break; |
---|
4769 | case 'c': |
---|
4770 | sscanf(optarg, "%d", &adef->categories); |
---|
4771 | break; |
---|
4772 | case 'f': |
---|
4773 | sscanf(optarg, "%c", &modelChar); |
---|
4774 | switch(modelChar) |
---|
4775 | { |
---|
4776 | case 'A': |
---|
4777 | adef->mode = ANCESTRAL_STATES; |
---|
4778 | /*adef->compressPatterns = FALSE;*/ |
---|
4779 | break; |
---|
4780 | case 'a': |
---|
4781 | adef->allInOne = TRUE; |
---|
4782 | adef->mode = BIG_RAPID_MODE; |
---|
4783 | tr->doCutoff = TRUE; |
---|
4784 | break; |
---|
4785 | case 'b': |
---|
4786 | adef->readTaxaOnly = TRUE; |
---|
4787 | adef->mode = CALC_BIPARTITIONS; |
---|
4788 | break; |
---|
4789 | case 'B': |
---|
4790 | adef->mode = OPTIMIZE_BR_LEN_SCALER; |
---|
4791 | adef->perGeneBranchLengths = TRUE; |
---|
4792 | tr->useBrLenScaler = TRUE; |
---|
4793 | break; |
---|
4794 | case 'c': |
---|
4795 | adef->mode = CHECK_ALIGNMENT; |
---|
4796 | break; |
---|
4797 | case 'C': |
---|
4798 | adef->mode = ANCESTRAL_SEQUENCE_TEST; |
---|
4799 | tr->useFastScaling = FALSE; |
---|
4800 | break; |
---|
4801 | case 'd': |
---|
4802 | adef->mode = BIG_RAPID_MODE; |
---|
4803 | tr->doCutoff = TRUE; |
---|
4804 | break; |
---|
4805 | case 'e': |
---|
4806 | adef->mode = TREE_EVALUATION; |
---|
4807 | break; |
---|
4808 | case 'E': |
---|
4809 | adef->mode = FAST_SEARCH; |
---|
4810 | adef->veryFast = TRUE; |
---|
4811 | break; |
---|
4812 | case 'F': |
---|
4813 | adef->mode = FAST_SEARCH; |
---|
4814 | adef->veryFast = FALSE; |
---|
4815 | break; |
---|
4816 | case 'g': |
---|
4817 | tr->useFastScaling = FALSE; |
---|
4818 | tr->optimizeAllTrees = FALSE; |
---|
4819 | adef->mode = PER_SITE_LL; |
---|
4820 | break; |
---|
4821 | case 'G': |
---|
4822 | tr->useFastScaling = FALSE; |
---|
4823 | tr->optimizeAllTrees = TRUE; |
---|
4824 | adef->mode = PER_SITE_LL; |
---|
4825 | break; |
---|
4826 | case 'h': |
---|
4827 | tr->optimizeAllTrees = FALSE; |
---|
4828 | adef->mode = TREE_EVALUATION; |
---|
4829 | adef->likelihoodTest = TRUE; |
---|
4830 | tr->useFastScaling = FALSE; |
---|
4831 | break; |
---|
4832 | case 'H': |
---|
4833 | tr->optimizeAllTrees = TRUE; |
---|
4834 | adef->mode = TREE_EVALUATION; |
---|
4835 | adef->likelihoodTest = TRUE; |
---|
4836 | tr->useFastScaling = FALSE; |
---|
4837 | break; |
---|
4838 | case 'i': |
---|
4839 | adef->readTaxaOnly = TRUE; |
---|
4840 | adef->mode = CALC_BIPARTITIONS_IC; |
---|
4841 | break; |
---|
4842 | case 'I': |
---|
4843 | adef->mode = ROOT_TREE; |
---|
4844 | adef->readTaxaOnly = TRUE; |
---|
4845 | break; |
---|
4846 | case 'j': |
---|
4847 | adef->mode = GENERATE_BS; |
---|
4848 | adef->generateBS = TRUE; |
---|
4849 | break; |
---|
4850 | case 'J': |
---|
4851 | adef->mode = SH_LIKE_SUPPORTS; |
---|
4852 | tr->useFastScaling = FALSE; |
---|
4853 | break; |
---|
4854 | case 'm': |
---|
4855 | adef->readTaxaOnly = TRUE; |
---|
4856 | adef->mode = COMPUTE_BIPARTITION_CORRELATION; |
---|
4857 | break; |
---|
4858 | case 'n': |
---|
4859 | tr->optimizeAllTrees = FALSE; |
---|
4860 | adef->mode = COMPUTE_LHS; |
---|
4861 | break; |
---|
4862 | case 'N': |
---|
4863 | tr->optimizeAllTrees = TRUE; |
---|
4864 | adef->mode = COMPUTE_LHS; |
---|
4865 | break; |
---|
4866 | case 'o': |
---|
4867 | adef->mode = BIG_RAPID_MODE; |
---|
4868 | tr->doCutoff = FALSE; |
---|
4869 | break; |
---|
4870 | case 'p': |
---|
4871 | adef->mode = PARSIMONY_ADDITION; |
---|
4872 | break; |
---|
4873 | case 'q': |
---|
4874 | adef->mode = QUARTET_CALCULATION; |
---|
4875 | break; |
---|
4876 | case 'r': |
---|
4877 | adef->readTaxaOnly = TRUE; |
---|
4878 | adef->mode = COMPUTE_RF_DISTANCE; |
---|
4879 | break; |
---|
4880 | case 'R': |
---|
4881 | adef->readTaxaOnly = TRUE; |
---|
4882 | adef->mode = PLAUSIBILITY_CHECKER; |
---|
4883 | break; |
---|
4884 | case 's': |
---|
4885 | adef->mode = SPLIT_MULTI_GENE; |
---|
4886 | break; |
---|
4887 | case 'S': |
---|
4888 | adef->mode = EPA_SITE_SPECIFIC_BIAS; |
---|
4889 | tr->useFastScaling = FALSE; |
---|
4890 | adef->compressPatterns = FALSE; |
---|
4891 | break; |
---|
4892 | case 't': |
---|
4893 | adef->mode = BIG_RAPID_MODE; |
---|
4894 | tr->doCutoff = TRUE; |
---|
4895 | adef->permuteTreeoptimize = TRUE; |
---|
4896 | break; |
---|
4897 | case 'T': |
---|
4898 | adef->mode = THOROUGH_OPTIMIZATION; |
---|
4899 | break; |
---|
4900 | case 'u': |
---|
4901 | adef->mode = MORPH_CALIBRATOR; |
---|
4902 | tr->useFastScaling = FALSE; |
---|
4903 | adef->compressPatterns = FALSE; |
---|
4904 | break; |
---|
4905 | case 'v': |
---|
4906 | adef->mode = CLASSIFY_ML; |
---|
4907 | |
---|
4908 | tr->perPartitionEPA = FALSE; |
---|
4909 | #ifdef _PAVLOS |
---|
4910 | adef->compressPatterns = FALSE; |
---|
4911 | #endif |
---|
4912 | #ifdef _USE_PTHREADS |
---|
4913 | tr->useFastScaling = FALSE; |
---|
4914 | #endif |
---|
4915 | break; |
---|
4916 | |
---|
4917 | case 'V': |
---|
4918 | adef->mode = CLASSIFY_ML; |
---|
4919 | |
---|
4920 | |
---|
4921 | tr->perPartitionEPA = TRUE; |
---|
4922 | #ifdef _PAVLOS |
---|
4923 | adef->compressPatterns = FALSE; |
---|
4924 | #endif |
---|
4925 | #ifdef _USE_PTHREADS |
---|
4926 | tr->useFastScaling = FALSE; |
---|
4927 | #endif |
---|
4928 | break; |
---|
4929 | case 'w': |
---|
4930 | adef->mode = COMPUTE_ELW; |
---|
4931 | adef->computeELW = TRUE; |
---|
4932 | tr->optimizeAllTrees = FALSE; |
---|
4933 | break; |
---|
4934 | case 'W': |
---|
4935 | adef->mode = COMPUTE_ELW; |
---|
4936 | adef->computeELW = TRUE; |
---|
4937 | tr->optimizeAllTrees = TRUE; |
---|
4938 | break; |
---|
4939 | case 'x': |
---|
4940 | adef->mode = DISTANCE_MODE; |
---|
4941 | adef->computeDistance = TRUE; |
---|
4942 | break; |
---|
4943 | case 'y': |
---|
4944 | adef->mode = CLASSIFY_MP; |
---|
4945 | break; |
---|
4946 | default: |
---|
4947 | { |
---|
4948 | if(processID == 0) |
---|
4949 | { |
---|
4950 | printf("Error select one of the following algorithms via -f :\n"); |
---|
4951 | printMinusFUsage(); |
---|
4952 | } |
---|
4953 | errorExit(-1); |
---|
4954 | } |
---|
4955 | } |
---|
4956 | break; |
---|
4957 | case 'i': |
---|
4958 | sscanf(optarg, "%d", &adef->initial); |
---|
4959 | adef->initialSet = TRUE; |
---|
4960 | break; |
---|
4961 | case 'n': |
---|
4962 | strcpy(run_id,optarg); |
---|
4963 | analyzeRunId(run_id); |
---|
4964 | nameSet = 1; |
---|
4965 | break; |
---|
4966 | case 'w': |
---|
4967 | strcpy(resultDir, optarg); |
---|
4968 | resultDirSet = TRUE; |
---|
4969 | break; |
---|
4970 | case 't': |
---|
4971 | strcpy(tree_file, optarg); |
---|
4972 | adef->restart = TRUE; |
---|
4973 | treeSet = 1; |
---|
4974 | break; |
---|
4975 | case 's': |
---|
4976 | strcpy(seq_file, optarg); |
---|
4977 | alignmentSet = 1; |
---|
4978 | break; |
---|
4979 | case 'm': |
---|
4980 | strcpy(model,optarg); |
---|
4981 | if(modelExists(model, adef) == 0) |
---|
4982 | { |
---|
4983 | if(processID == 0) |
---|
4984 | { |
---|
4985 | printf("Model %s does not exist\n\n", model); |
---|
4986 | printf("For BINARY data use: BINCAT or BINGAMMA or\n"); |
---|
4987 | printf(" BINCATI or BINGAMMAI \n"); |
---|
4988 | printf("For DNA data use: GTRCAT or GTRGAMMA or\n"); |
---|
4989 | printf(" GTRCATI or GTRGAMMAI \n"); |
---|
4990 | printf("For AA data use: PROTCATmatrixName[F] or PROTGAMMAmatrixName[F] or\n"); |
---|
4991 | printf(" PROTCATImatrixName[F] or PROTGAMMAImatrixName[F] \n"); |
---|
4992 | printf("The AA substitution matrix can be one of the following: \n"); |
---|
4993 | |
---|
4994 | { |
---|
4995 | int |
---|
4996 | i; |
---|
4997 | |
---|
4998 | for(i = 0; i < NUM_PROT_MODELS - 1; i++) |
---|
4999 | { |
---|
5000 | if(i % 8 == 0) |
---|
5001 | printf("\n"); |
---|
5002 | printf("%s, ", protModels[i]); |
---|
5003 | } |
---|
5004 | |
---|
5005 | printf("%s\n\n", protModels[i]); |
---|
5006 | } |
---|
5007 | |
---|
5008 | printf("With the optional \"F\" appendix you can specify if you want to use empirical base frequencies\n"); |
---|
5009 | printf("Please note that for mixed models you can in addition specify the per-gene model in\n"); |
---|
5010 | printf("the mixed model file (see manual for details)\n"); |
---|
5011 | } |
---|
5012 | errorExit(-1); |
---|
5013 | } |
---|
5014 | else |
---|
5015 | modelSet = 1; |
---|
5016 | break; |
---|
5017 | default: |
---|
5018 | errorExit(-1); |
---|
5019 | } |
---|
5020 | } |
---|
5021 | |
---|
5022 | |
---|
5023 | |
---|
5024 | #ifdef _USE_PTHREADS |
---|
5025 | if(NumberOfThreads < 2) |
---|
5026 | { |
---|
5027 | printf("\nThe number of threads is currently set to %d\n", NumberOfThreads); |
---|
5028 | printf("Specify the number of threads to run via -T numberOfThreads\n"); |
---|
5029 | printf("NumberOfThreads must be set to an integer value greater than 1\n\n"); |
---|
5030 | errorExit(-1); |
---|
5031 | } |
---|
5032 | #endif |
---|
5033 | |
---|
5034 | #ifdef _QUARTET_MPI |
---|
5035 | if(adef->mode != QUARTET_CALCULATION) |
---|
5036 | { |
---|
5037 | if(processID == 0) |
---|
5038 | { |
---|
5039 | printf("you are using the dedicated RAxML MPI version for parallel quartet computations\n"); |
---|
5040 | printf("However you are not using the quartet option \"-f q\", raxml will exit now ...\n"); |
---|
5041 | } |
---|
5042 | |
---|
5043 | errorExit(-1); |
---|
5044 | } |
---|
5045 | if(!adef->useBinaryModelFile) |
---|
5046 | { |
---|
5047 | if(processID == 0) |
---|
5048 | { |
---|
5049 | printf("you are using the dedicated RAxML MPI version for parallel quartet computations\n"); |
---|
5050 | printf("However you must provide a binary model file via \"-R\" when using the MPI version, raxml will exit now ...\n"); |
---|
5051 | } |
---|
5052 | |
---|
5053 | errorExit(-1); |
---|
5054 | } |
---|
5055 | |
---|
5056 | #endif |
---|
5057 | |
---|
5058 | if(adef->mode == ANCESTRAL_SEQUENCE_TEST && !yFileSet) |
---|
5059 | { |
---|
5060 | if(!yFileSet) |
---|
5061 | { |
---|
5062 | printf("Error, for using the ancestral sequence test you have to provide a ancestral taxon name\n"); |
---|
5063 | printf("candidate file via \"-Y\" \n"); |
---|
5064 | errorExit(-1); |
---|
5065 | } |
---|
5066 | |
---|
5067 | if(!treeSet) |
---|
5068 | { |
---|
5069 | printf("Error, for using the ancestral sequence test you have to provide a tree file\n"); |
---|
5070 | printf("via \"-t\" \n"); |
---|
5071 | errorExit(-1); |
---|
5072 | } |
---|
5073 | } |
---|
5074 | |
---|
5075 | if(tr->catOnly && adef->rapidBoot) |
---|
5076 | { |
---|
5077 | printf("Error, you can not use \"-F\" in conjunction with the rapid bootstrapping option!\n"); |
---|
5078 | printf("it will only work with standard ML tree searches\n"); |
---|
5079 | errorExit(-1); |
---|
5080 | } |
---|
5081 | |
---|
5082 | if(tr->catOnly && adef->boot) |
---|
5083 | { |
---|
5084 | printf("Error, you can not use \"-F\" in conjunction with the standard bootstrapping option!\n"); |
---|
5085 | printf("it will only work with standard ML tree searches\n"); |
---|
5086 | errorExit(-1); |
---|
5087 | } |
---|
5088 | |
---|
5089 | |
---|
5090 | if(bSeedSet && xSeedSet) |
---|
5091 | { |
---|
5092 | printf("Error, you can't seed random seeds by using -x and -b at the same time\n"); |
---|
5093 | printf("use either -x or -b, exiting ......\n"); |
---|
5094 | errorExit(-1); |
---|
5095 | } |
---|
5096 | |
---|
5097 | if(bSeedSet || xSeedSet) |
---|
5098 | { |
---|
5099 | if(!multipleRunsSet) |
---|
5100 | { |
---|
5101 | printf("Error, you have specified a random number seed via -x or -b for some sort of bootstrapping,\n"); |
---|
5102 | printf("but you have not specified a number of replicates via -N or -#, exiting ....\n"); |
---|
5103 | errorExit(-1); |
---|
5104 | } |
---|
5105 | |
---|
5106 | if(adef->multipleRuns == 1) |
---|
5107 | { |
---|
5108 | printf("WARNING, you have specified a random number seed via -x or -b for some sort of bootstrapping,\n"); |
---|
5109 | printf("but you have specified a number of replicates via -N or -# euqal to one\n"); |
---|
5110 | printf("Are you really sure that this is what you want to do?\n"); |
---|
5111 | } |
---|
5112 | |
---|
5113 | |
---|
5114 | } |
---|
5115 | |
---|
5116 | |
---|
5117 | |
---|
5118 | |
---|
5119 | if(adef->computeELW) |
---|
5120 | { |
---|
5121 | if(processID == 0) |
---|
5122 | { |
---|
5123 | if(adef->boot == 0) |
---|
5124 | { |
---|
5125 | printf("Error, you must specify a bootstrap seed via \"-b\" to compute ELW statistics\n"); |
---|
5126 | errorExit(-1); |
---|
5127 | } |
---|
5128 | |
---|
5129 | if(adef->multipleRuns < 2) |
---|
5130 | { |
---|
5131 | printf("Error, you must specify the number of BS replicates via \"-#\" or \"-N\" to compute ELW statistics\n"); |
---|
5132 | printf("it should be larger than 1, recommended setting is 100\n"); |
---|
5133 | errorExit(-1); |
---|
5134 | } |
---|
5135 | |
---|
5136 | if(!treesSet) |
---|
5137 | { |
---|
5138 | printf("Error, you must specify an input file containing several candidate trees\n"); |
---|
5139 | printf("via \"-z\" to compute ELW statistics.\n"); |
---|
5140 | errorExit(-1); |
---|
5141 | } |
---|
5142 | |
---|
5143 | if(!isGamma(adef)) |
---|
5144 | { |
---|
5145 | printf("Error ELW test can only be conducted undetr GAMMA or GAMMA+P-Invar models\n"); |
---|
5146 | errorExit(-1); |
---|
5147 | } |
---|
5148 | } |
---|
5149 | } |
---|
5150 | |
---|
5151 | |
---|
5152 | if(isGamma(adef) && tr->noRateHet) |
---|
5153 | { |
---|
5154 | printf("\n\nError: using a model without any rate heterogeneity (enabled via \"-V\") only works if you specify a CAT model\n"); |
---|
5155 | printf("via the \"-m\" switch, exiting ....\n\n"); |
---|
5156 | errorExit(-1); |
---|
5157 | } |
---|
5158 | |
---|
5159 | if(((!adef->boot) && (!adef->rapidBoot)) && adef->bootStopping) |
---|
5160 | { |
---|
5161 | if(processID == 0) |
---|
5162 | { |
---|
5163 | printf("Can't use automatic bootstopping without actually doing a Bootstrap\n"); |
---|
5164 | printf("Specify either -x randomNumberSeed (rapid) or -b randomNumberSeed (standard)\n"); |
---|
5165 | errorExit(-1); |
---|
5166 | } |
---|
5167 | } |
---|
5168 | |
---|
5169 | if(adef->boot && adef->rapidBoot) |
---|
5170 | { |
---|
5171 | if(processID == 0) |
---|
5172 | { |
---|
5173 | printf("Can't use standard and rapid BOOTSTRAP simultaneously\n"); |
---|
5174 | errorExit(-1); |
---|
5175 | } |
---|
5176 | } |
---|
5177 | |
---|
5178 | if(adef->rapidBoot) |
---|
5179 | { |
---|
5180 | if(processID == 0 && (adef->restart || treesSet) && !(groupSet || constraintSet)) |
---|
5181 | { |
---|
5182 | printf("Error, starting tree(s) will be ignored by rapid Bootstrapping\n"); |
---|
5183 | errorExit(-1); |
---|
5184 | } |
---|
5185 | } |
---|
5186 | |
---|
5187 | if(adef->allInOne && (adef->rapidBoot == 0)) |
---|
5188 | { |
---|
5189 | if(processID == 0) |
---|
5190 | { |
---|
5191 | printf("Error, to carry out an ML search after a rapid BS inference you must specify a random number seed with -x\n"); |
---|
5192 | errorExit(-1); |
---|
5193 | } |
---|
5194 | } |
---|
5195 | |
---|
5196 | |
---|
5197 | |
---|
5198 | |
---|
5199 | if(adef->mode == PER_SITE_LL) |
---|
5200 | { |
---|
5201 | if(!isGamma(adef)) |
---|
5202 | { |
---|
5203 | if(processID == 0) |
---|
5204 | printf("\n ERROR: Computation of per-site log LHs is only allowed under GAMMA model of rate heterogeneity!\n"); |
---|
5205 | errorExit(-1); |
---|
5206 | } |
---|
5207 | |
---|
5208 | if(!treesSet) |
---|
5209 | { |
---|
5210 | if(processID == 0) |
---|
5211 | printf("\n ERROR: For Computation of per-site log LHs you need to specify several input trees with \"-z\"\n"); |
---|
5212 | errorExit(-1); |
---|
5213 | } |
---|
5214 | } |
---|
5215 | |
---|
5216 | if(adef->mode == FAST_SEARCH && (adef->grouping || adef->constraint)) |
---|
5217 | { |
---|
5218 | if(processID == 0) |
---|
5219 | printf("\n ERROR: Fast ML search algorithms -f F and -f E can not take as input constraint trees specified via -g or -r, since they will be ignored\n"); |
---|
5220 | errorExit(-1); |
---|
5221 | } |
---|
5222 | |
---|
5223 | if(adef->mode == SPLIT_MULTI_GENE && (!adef->useMultipleModel)) |
---|
5224 | { |
---|
5225 | if(processID == 0) |
---|
5226 | { |
---|
5227 | printf("\n Error, you are trying to split a multi-gene alignment into individual genes with the \"-f s\" option\n"); |
---|
5228 | printf("Without specifying a multiple model file with \"-q modelFileName\" \n"); |
---|
5229 | } |
---|
5230 | errorExit(-1); |
---|
5231 | } |
---|
5232 | |
---|
5233 | if(adef->mode == ROOT_TREE && !treeSet) |
---|
5234 | { |
---|
5235 | if(processID == 0) |
---|
5236 | printf("\n Error, for the tree rooting algorithm you need to specify a file containing the tree you want to root via \"-t\"\n"); |
---|
5237 | errorExit(-1); |
---|
5238 | } |
---|
5239 | |
---|
5240 | if((adef->mode == CALC_BIPARTITIONS || adef->mode == CALC_BIPARTITIONS_IC) && !treesSet) |
---|
5241 | { |
---|
5242 | if(processID == 0) |
---|
5243 | printf("\n Error, in bipartition and IC computation mode you must specify a file containing multiple trees with the \"-z\" option\n"); |
---|
5244 | errorExit(-1); |
---|
5245 | } |
---|
5246 | |
---|
5247 | if((adef->mode == CALC_BIPARTITIONS || adef->mode == CALC_BIPARTITIONS_IC) && !adef->restart) |
---|
5248 | { |
---|
5249 | if(processID == 0) |
---|
5250 | printf("\n Error, in bipartition and IC computation mode you must specify a tree on which bipartition information will be drawn with the \"-t\" option\n"); |
---|
5251 | errorExit(-1); |
---|
5252 | } |
---|
5253 | |
---|
5254 | if(!modelSet) |
---|
5255 | { |
---|
5256 | if(processID == 0) |
---|
5257 | printf("\n Error, you must specify a model of substitution with the \"-m\" option\n"); |
---|
5258 | errorExit(-1); |
---|
5259 | } |
---|
5260 | |
---|
5261 | if(adef->computeDistance) |
---|
5262 | { |
---|
5263 | if(isCat(adef)) |
---|
5264 | { |
---|
5265 | if(processID == 0) |
---|
5266 | printf("\n Error pairwise distance computation only allowed for GAMMA-based models of rate heterogeneity\n"); |
---|
5267 | errorExit(-1); |
---|
5268 | } |
---|
5269 | |
---|
5270 | if(adef->restart) |
---|
5271 | { |
---|
5272 | if(adef->randomStartingTree) |
---|
5273 | { |
---|
5274 | if(processID == 0) |
---|
5275 | printf("\n Error pairwise distance computation not allowed for random starting trees\n"); |
---|
5276 | errorExit(-1); |
---|
5277 | } |
---|
5278 | |
---|
5279 | if(adef->constraint) |
---|
5280 | { |
---|
5281 | if(processID == 0) |
---|
5282 | printf("\n Error pairwise distance computation not allowed for binary backbone constraint tree\n"); |
---|
5283 | errorExit(-1); |
---|
5284 | } |
---|
5285 | |
---|
5286 | if(adef->grouping) |
---|
5287 | { |
---|
5288 | if(processID == 0) |
---|
5289 | printf("\n Error pairwise distance computation not allowed for constraint tree\n"); |
---|
5290 | errorExit(-1); |
---|
5291 | } |
---|
5292 | |
---|
5293 | } |
---|
5294 | |
---|
5295 | if(adef->boot || adef->rapidBoot) |
---|
5296 | { |
---|
5297 | if(processID == 0) |
---|
5298 | printf("\n Bootstrapping not implemented for pairwise distance computation\n"); |
---|
5299 | errorExit(-1); |
---|
5300 | } |
---|
5301 | } |
---|
5302 | |
---|
5303 | |
---|
5304 | |
---|
5305 | |
---|
5306 | |
---|
5307 | |
---|
5308 | |
---|
5309 | |
---|
5310 | if(!adef->restart && adef->mode == PARSIMONY_ADDITION) |
---|
5311 | { |
---|
5312 | if(processID == 0) |
---|
5313 | { |
---|
5314 | printf("\n You need to specify an incomplete binary input tree with \"-t\" to execute \n"); |
---|
5315 | printf(" RAxML MP stepwise addition with \"-f p\"\n"); |
---|
5316 | } |
---|
5317 | errorExit(-1); |
---|
5318 | } |
---|
5319 | |
---|
5320 | |
---|
5321 | |
---|
5322 | if(adef->restart && adef->randomStartingTree) |
---|
5323 | { |
---|
5324 | if(processID == 0) |
---|
5325 | { |
---|
5326 | if(adef->constraint) |
---|
5327 | { |
---|
5328 | printf("\n Error you specified a binary constraint tree with -r AND the computation\n"); |
---|
5329 | printf("of a random starting tree with -d for the same run\n"); |
---|
5330 | } |
---|
5331 | else |
---|
5332 | { |
---|
5333 | if(adef->grouping) |
---|
5334 | { |
---|
5335 | printf("\n Error you specified a multifurcating constraint tree with -g AND the computation\n"); |
---|
5336 | printf("of a random starting tree with -d for the same run\n"); |
---|
5337 | } |
---|
5338 | else |
---|
5339 | { |
---|
5340 | printf("\n Error you specified a starting tree with -t AND the computation\n"); |
---|
5341 | printf("of a random starting tree with -d for the same run\n"); |
---|
5342 | } |
---|
5343 | } |
---|
5344 | } |
---|
5345 | errorExit(-1); |
---|
5346 | } |
---|
5347 | |
---|
5348 | if(adef->outgroup && adef->mode == ANCESTRAL_STATES) |
---|
5349 | { |
---|
5350 | if(processID == 0) |
---|
5351 | { |
---|
5352 | printf("\n Specifying an outgroup for ancestral state reconstruction is not allowed\n"); |
---|
5353 | printf(" You already need to specify a rooted input tree for computing ancestral states anyway.\n\n"); |
---|
5354 | } |
---|
5355 | errorExit(-1); |
---|
5356 | } |
---|
5357 | |
---|
5358 | if(!treeSet && adef->mode == ANCESTRAL_STATES) |
---|
5359 | { |
---|
5360 | if(processID == 0) |
---|
5361 | printf("\n Error you need to specify a ROOTED binary reference tree for ancestral state computations\n"); |
---|
5362 | errorExit(-1); |
---|
5363 | } |
---|
5364 | |
---|
5365 | if(treeSet && constraintSet) |
---|
5366 | { |
---|
5367 | if(processID == 0) |
---|
5368 | printf("\n Error you specified a binary constraint tree AND a starting tree for the same run\n"); |
---|
5369 | errorExit(-1); |
---|
5370 | } |
---|
5371 | |
---|
5372 | |
---|
5373 | if(treeSet && groupSet) |
---|
5374 | { |
---|
5375 | if(processID == 0) |
---|
5376 | printf("\n Error you specified a multifurcating constraint tree AND a starting tree for the same run\n"); |
---|
5377 | errorExit(-1); |
---|
5378 | } |
---|
5379 | |
---|
5380 | |
---|
5381 | if(groupSet && constraintSet) |
---|
5382 | { |
---|
5383 | if(processID == 0) |
---|
5384 | printf("\n Error you specified a bifurcating constraint tree AND a multifurcating constraint tree for the same run\n"); |
---|
5385 | errorExit(-1); |
---|
5386 | } |
---|
5387 | |
---|
5388 | if(adef->restart && adef->startingTreeOnly) |
---|
5389 | { |
---|
5390 | if(processID == 0) |
---|
5391 | { |
---|
5392 | printf("\n Error conflicting options: you want to compute only a parsimony starting tree with -y\n"); |
---|
5393 | printf(" while you actually specified a starting tree with -t %s\n", tree_file); |
---|
5394 | } |
---|
5395 | errorExit(-1); |
---|
5396 | } |
---|
5397 | |
---|
5398 | if((adef->mode == TREE_EVALUATION || adef->mode == OPTIMIZE_BR_LEN_SCALER) && (!adef->restart)) |
---|
5399 | { |
---|
5400 | if(processID == 0) |
---|
5401 | printf("\n Error: please specify a treefile for the tree you want to evaluate with -t\n"); |
---|
5402 | errorExit(-1); |
---|
5403 | } |
---|
5404 | |
---|
5405 | #ifdef _WAYNE_MPI |
---|
5406 | |
---|
5407 | if(adef->mode == SPLIT_MULTI_GENE) |
---|
5408 | { |
---|
5409 | if(processID == 0) |
---|
5410 | printf("Multi gene alignment splitting (-f s) not implemented for the MPI-Version\n"); |
---|
5411 | errorExit(-1); |
---|
5412 | } |
---|
5413 | |
---|
5414 | if(adef->mode == TREE_EVALUATION) |
---|
5415 | { |
---|
5416 | if(processID == 0) |
---|
5417 | printf("Tree Evaluation mode (-f e) not implemented for the MPI-Version\n"); |
---|
5418 | errorExit(-1); |
---|
5419 | } |
---|
5420 | |
---|
5421 | if(adef->mode == OPTIMIZE_BR_LEN_SCALER) |
---|
5422 | { |
---|
5423 | if(processID == 0) |
---|
5424 | printf("Branch length scaler optimization mode (-f B) not implemented for the MPI-Version\n"); |
---|
5425 | errorExit(-1); |
---|
5426 | } |
---|
5427 | |
---|
5428 | if(adef->mode == CALC_BIPARTITIONS) |
---|
5429 | { |
---|
5430 | if(processID == 0) |
---|
5431 | printf("Computation of bipartitions (-f b) not implemented for the MPI-Version\n"); |
---|
5432 | errorExit(-1); |
---|
5433 | } |
---|
5434 | |
---|
5435 | if(adef->mode == CALC_BIPARTITIONS_IC) |
---|
5436 | { |
---|
5437 | if(processID == 0) |
---|
5438 | printf("Computation of IC and TC scores (-f i) not implemented for the MPI-Version\n"); |
---|
5439 | errorExit(-1); |
---|
5440 | } |
---|
5441 | |
---|
5442 | if(adef->multipleRuns == 1) |
---|
5443 | { |
---|
5444 | if(processID == 0) |
---|
5445 | { |
---|
5446 | printf("Error: you are running the parallel MPI program but only want to compute one tree\n"); |
---|
5447 | printf("For the MPI version you must specify a number of trees greater than 1 with the -# or -N option\n"); |
---|
5448 | } |
---|
5449 | errorExit(-1); |
---|
5450 | } |
---|
5451 | |
---|
5452 | #endif |
---|
5453 | |
---|
5454 | if((adef->mode == TREE_EVALUATION || adef->mode == OPTIMIZE_BR_LEN_SCALER) && (isCat(adef))) |
---|
5455 | { |
---|
5456 | if(processID == 0) |
---|
5457 | { |
---|
5458 | printf("\n Warning: tree evaluation with CAT model of rate heterogeneity\n"); |
---|
5459 | printf("Only compare likelihood values for identical rate category assignments\n"); |
---|
5460 | printf("CAT-based Branch lengths are on average shorter by factor 0.5 than GAMMA-based branch lengths\n"); |
---|
5461 | printf("... but highly correlated with GAMMA branch lengths\n"); |
---|
5462 | } |
---|
5463 | } |
---|
5464 | |
---|
5465 | if(!nameSet) |
---|
5466 | { |
---|
5467 | if(processID == 0) |
---|
5468 | printf("\n Error: please specify a name for this run with -n\n"); |
---|
5469 | errorExit(-1); |
---|
5470 | } |
---|
5471 | |
---|
5472 | if(! alignmentSet && !adef->readTaxaOnly) |
---|
5473 | { |
---|
5474 | if(processID == 0) |
---|
5475 | printf("\n Error: please specify an alignment for this run with -s\n"); |
---|
5476 | errorExit(-1); |
---|
5477 | } |
---|
5478 | |
---|
5479 | |
---|
5480 | { |
---|
5481 | #ifdef WIN32 |
---|
5482 | const |
---|
5483 | char *separator = "\\"; |
---|
5484 | #else |
---|
5485 | const |
---|
5486 | char *separator = "/"; |
---|
5487 | #endif |
---|
5488 | |
---|
5489 | if(resultDirSet) |
---|
5490 | { |
---|
5491 | char |
---|
5492 | dir[1024] = ""; |
---|
5493 | |
---|
5494 | #ifndef WIN32 |
---|
5495 | if(resultDir[0] != separator[0]) |
---|
5496 | strcat(dir, separator); |
---|
5497 | #endif |
---|
5498 | |
---|
5499 | strcat(dir, resultDir); |
---|
5500 | |
---|
5501 | if(dir[strlen(dir) - 1] != separator[0]) |
---|
5502 | strcat(dir, separator); |
---|
5503 | strcpy(workdir, dir); |
---|
5504 | } |
---|
5505 | else |
---|
5506 | { |
---|
5507 | char |
---|
5508 | dir[1024] = "", |
---|
5509 | *result = getcwd(dir, sizeof(dir)); |
---|
5510 | |
---|
5511 | assert(result != (char*)NULL); |
---|
5512 | |
---|
5513 | if(dir[strlen(dir) - 1] != separator[0]) |
---|
5514 | strcat(dir, separator); |
---|
5515 | |
---|
5516 | strcpy(workdir, dir); |
---|
5517 | } |
---|
5518 | } |
---|
5519 | |
---|
5520 | return; |
---|
5521 | } |
---|
5522 | |
---|
5523 | |
---|
5524 | |
---|
5525 | |
---|
5526 | void errorExit(int e) |
---|
5527 | { |
---|
5528 | |
---|
5529 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
5530 | MPI_Finalize(); |
---|
5531 | #endif |
---|
5532 | |
---|
5533 | exit(e); |
---|
5534 | |
---|
5535 | } |
---|
5536 | |
---|
5537 | |
---|
5538 | |
---|
5539 | static void makeFileNames(void) |
---|
5540 | { |
---|
5541 | int infoFileExists = 0; |
---|
5542 | |
---|
5543 | strcpy(verboseSplitsFileName, workdir); |
---|
5544 | strcpy(permFileName, workdir); |
---|
5545 | strcpy(resultFileName, workdir); |
---|
5546 | strcpy(logFileName, workdir); |
---|
5547 | strcpy(checkpointFileName, workdir); |
---|
5548 | strcpy(infoFileName, workdir); |
---|
5549 | strcpy(randomFileName, workdir); |
---|
5550 | strcpy(bootstrapFileName, workdir); |
---|
5551 | strcpy(bipartitionsFileName, workdir); |
---|
5552 | strcpy(bipartitionsFileNameBranchLabels, workdir); |
---|
5553 | strcpy(icFileNameBranchLabels, workdir); |
---|
5554 | strcpy(ratesFileName, workdir); |
---|
5555 | strcpy(lengthFileName, workdir); |
---|
5556 | strcpy(lengthFileNameModel, workdir); |
---|
5557 | strcpy(perSiteLLsFileName, workdir); |
---|
5558 | strcpy(binaryModelParamsOutputFileName, workdir); |
---|
5559 | |
---|
5560 | strcat(verboseSplitsFileName, "RAxML_verboseSplits."); |
---|
5561 | strcat(permFileName, "RAxML_parsimonyTree."); |
---|
5562 | strcat(resultFileName, "RAxML_result."); |
---|
5563 | strcat(logFileName, "RAxML_log."); |
---|
5564 | strcat(checkpointFileName, "RAxML_checkpoint."); |
---|
5565 | strcat(infoFileName, "RAxML_info."); |
---|
5566 | strcat(randomFileName, "RAxML_randomTree."); |
---|
5567 | strcat(bootstrapFileName, "RAxML_bootstrap."); |
---|
5568 | strcat(bipartitionsFileName, "RAxML_bipartitions."); |
---|
5569 | strcat(bipartitionsFileNameBranchLabels, "RAxML_bipartitionsBranchLabels."); |
---|
5570 | strcat(icFileNameBranchLabels, "RAxML_IC_Score_BranchLabels."); |
---|
5571 | strcat(ratesFileName, "RAxML_perSiteRates."); |
---|
5572 | strcat(lengthFileName, "RAxML_treeLength."); |
---|
5573 | strcat(lengthFileNameModel, "RAxML_treeLengthModel."); |
---|
5574 | strcat(perSiteLLsFileName, "RAxML_perSiteLLs."); |
---|
5575 | strcat(binaryModelParamsOutputFileName, "RAxML_binaryModelParameters."); |
---|
5576 | |
---|
5577 | strcat(verboseSplitsFileName, run_id); |
---|
5578 | strcat(permFileName, run_id); |
---|
5579 | strcat(resultFileName, run_id); |
---|
5580 | strcat(logFileName, run_id); |
---|
5581 | strcat(checkpointFileName, run_id); |
---|
5582 | strcat(infoFileName, run_id); |
---|
5583 | strcat(randomFileName, run_id); |
---|
5584 | strcat(bootstrapFileName, run_id); |
---|
5585 | strcat(bipartitionsFileName, run_id); |
---|
5586 | strcat(bipartitionsFileNameBranchLabels, run_id); |
---|
5587 | strcat(icFileNameBranchLabels, run_id); |
---|
5588 | strcat(ratesFileName, run_id); |
---|
5589 | strcat(lengthFileName, run_id); |
---|
5590 | strcat(lengthFileNameModel, run_id); |
---|
5591 | strcat(perSiteLLsFileName, run_id); |
---|
5592 | strcat(binaryModelParamsOutputFileName, run_id); |
---|
5593 | |
---|
5594 | #ifdef _WAYNE_MPI |
---|
5595 | { |
---|
5596 | char buf[64]; |
---|
5597 | |
---|
5598 | strcpy(bootstrapFileNamePID, bootstrapFileName); |
---|
5599 | strcat(bootstrapFileNamePID, ".PID."); |
---|
5600 | sprintf(buf, "%d", processID); |
---|
5601 | strcat(bootstrapFileNamePID, buf); |
---|
5602 | } |
---|
5603 | #endif |
---|
5604 | |
---|
5605 | if(processID == 0) |
---|
5606 | { |
---|
5607 | infoFileExists = filexists(infoFileName); |
---|
5608 | |
---|
5609 | if(infoFileExists) |
---|
5610 | { |
---|
5611 | printf("RAxML output files with the run ID <%s> already exist \n", run_id); |
---|
5612 | printf("in directory %s ...... exiting\n", workdir); |
---|
5613 | |
---|
5614 | exit(-1); |
---|
5615 | } |
---|
5616 | } |
---|
5617 | } |
---|
5618 | |
---|
5619 | |
---|
5620 | |
---|
5621 | |
---|
5622 | |
---|
5623 | |
---|
5624 | |
---|
5625 | |
---|
5626 | |
---|
5627 | /***********************reading and initializing input ******************/ |
---|
5628 | |
---|
5629 | |
---|
5630 | /********************PRINTING various INFO **************************************/ |
---|
5631 | |
---|
5632 | |
---|
5633 | void printBaseFrequencies(tree *tr) |
---|
5634 | { |
---|
5635 | if(processID == 0) |
---|
5636 | { |
---|
5637 | int |
---|
5638 | model; |
---|
5639 | |
---|
5640 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
5641 | { |
---|
5642 | int i; |
---|
5643 | |
---|
5644 | printBothOpen("Partition: %d with name: %s\n", model, tr->partitionData[model].partitionName); |
---|
5645 | printBothOpen("Base frequencies: "); |
---|
5646 | |
---|
5647 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
5648 | { |
---|
5649 | int |
---|
5650 | k; |
---|
5651 | |
---|
5652 | printBothOpen("\n"); |
---|
5653 | |
---|
5654 | for(k = 0; k < 4; k++) |
---|
5655 | { |
---|
5656 | printBothOpen("LG4 %d: ", k); |
---|
5657 | for(i = 0; i < tr->partitionData[model].states; i++) |
---|
5658 | printBothOpen("%1.3f ", tr->partitionData[model].frequencies_LG4[k][i]); |
---|
5659 | printBothOpen("\n"); |
---|
5660 | } |
---|
5661 | } |
---|
5662 | else |
---|
5663 | { |
---|
5664 | for(i = 0; i < tr->partitionData[model].states; i++) |
---|
5665 | printBothOpen("%1.3f ", tr->partitionData[model].frequencies[i]); |
---|
5666 | } |
---|
5667 | |
---|
5668 | printBothOpen("\n\n"); |
---|
5669 | } |
---|
5670 | } |
---|
5671 | } |
---|
5672 | |
---|
5673 | static void printModelAndProgramInfo(tree *tr, analdef *adef, int argc, char *argv[]) |
---|
5674 | { |
---|
5675 | if(processID == 0) |
---|
5676 | { |
---|
5677 | int i, model; |
---|
5678 | FILE *infoFile = myfopen(infoFileName, "ab"); |
---|
5679 | char modelType[128]; |
---|
5680 | |
---|
5681 | if(!adef->readTaxaOnly) |
---|
5682 | { |
---|
5683 | if(adef->useInvariant) |
---|
5684 | strcpy(modelType, "GAMMA+P-Invar"); |
---|
5685 | else |
---|
5686 | strcpy(modelType, "GAMMA"); |
---|
5687 | } |
---|
5688 | |
---|
5689 | printVersionInfo(FALSE, infoFile); |
---|
5690 | |
---|
5691 | |
---|
5692 | |
---|
5693 | if(!adef->readTaxaOnly) |
---|
5694 | { |
---|
5695 | if(!adef->compressPatterns) |
---|
5696 | printBoth(infoFile, "\nAlignment has %d columns\n\n", tr->cdta->endsite); |
---|
5697 | else |
---|
5698 | printBoth(infoFile, "\nAlignment has %d distinct alignment patterns\n\n", tr->cdta->endsite); |
---|
5699 | |
---|
5700 | if(adef->useInvariant) |
---|
5701 | printBoth(infoFile, "Found %d invariant alignment patterns that correspond to %d columns \n", tr->numberOfInvariableColumns, tr->weightOfInvariableColumns); |
---|
5702 | |
---|
5703 | printBoth(infoFile, "Proportion of gaps and completely undetermined characters in this alignment: %3.2f%s\n", 100.0 * adef->gapyness, "%"); |
---|
5704 | } |
---|
5705 | |
---|
5706 | switch(adef->mode) |
---|
5707 | { |
---|
5708 | case DISTANCE_MODE: |
---|
5709 | printBoth(infoFile, "\nRAxML Computation of pairwise distances\n\n"); |
---|
5710 | break; |
---|
5711 | case TREE_EVALUATION : |
---|
5712 | printBoth(infoFile, "\nRAxML Model Optimization up to an accuracy of %f log likelihood units\n\n", adef->likelihoodEpsilon); |
---|
5713 | break; |
---|
5714 | case BIG_RAPID_MODE: |
---|
5715 | if(adef->rapidBoot) |
---|
5716 | { |
---|
5717 | if(adef->allInOne) |
---|
5718 | printBoth(infoFile, "\nRAxML rapid bootstrapping and subsequent ML search\n\n"); |
---|
5719 | else |
---|
5720 | printBoth(infoFile, "\nRAxML rapid bootstrapping algorithm\n\n"); |
---|
5721 | } |
---|
5722 | else |
---|
5723 | printBoth(infoFile, "\nRAxML rapid hill-climbing mode\n\n"); |
---|
5724 | break; |
---|
5725 | case CALC_BIPARTITIONS: |
---|
5726 | printBoth(infoFile, "\nRAxML Bipartition Computation: Drawing support values from trees in file %s onto tree in file %s\n\n", |
---|
5727 | bootStrapFile, tree_file); |
---|
5728 | break; |
---|
5729 | case CALC_BIPARTITIONS_IC: |
---|
5730 | printBoth(infoFile, "\nRAxML IC and TC score Computation: Computing IC and TC scores induced by trees in file %s w.r.t. tree in file %s\n\n", |
---|
5731 | bootStrapFile, tree_file); |
---|
5732 | break; |
---|
5733 | case PER_SITE_LL: |
---|
5734 | printBoth(infoFile, "\nRAxML computation of per-site log likelihoods\n"); |
---|
5735 | break; |
---|
5736 | case PARSIMONY_ADDITION: |
---|
5737 | printBoth(infoFile, "\nRAxML stepwise MP addition to incomplete starting tree\n\n"); |
---|
5738 | break; |
---|
5739 | case CLASSIFY_ML: |
---|
5740 | printBoth(infoFile, "\nRAxML likelihood-based placement algorithm\n\n"); |
---|
5741 | break; |
---|
5742 | case CLASSIFY_MP: |
---|
5743 | printBoth(infoFile, "\nRAxML parsimony-based placement algorithm\n\n"); |
---|
5744 | break; |
---|
5745 | case GENERATE_BS: |
---|
5746 | printBoth(infoFile, "\nRAxML BS replicate generation\n\n"); |
---|
5747 | break; |
---|
5748 | case COMPUTE_ELW: |
---|
5749 | printBoth(infoFile, "\nRAxML ELW test\n\n"); |
---|
5750 | break; |
---|
5751 | case BOOTSTOP_ONLY: |
---|
5752 | printBoth(infoFile, "\nRAxML a posteriori Bootstrap convergence assessment\n\n"); |
---|
5753 | break; |
---|
5754 | case CONSENSUS_ONLY: |
---|
5755 | if(adef->leaveDropMode) |
---|
5756 | printBoth(infoFile, "\nRAxML rogue taxa computation by Andre Aberer (HITS)\n\n"); |
---|
5757 | else |
---|
5758 | printBoth(infoFile, "\nRAxML consensus tree computation\n\n"); |
---|
5759 | break; |
---|
5760 | case COMPUTE_LHS: |
---|
5761 | printBoth(infoFile, "\nRAxML computation of likelihoods for a set of trees\n\n"); |
---|
5762 | break; |
---|
5763 | case COMPUTE_BIPARTITION_CORRELATION: |
---|
5764 | printBoth(infoFile, "\nRAxML computation of bipartition support correlation on two sets of trees\n\n"); |
---|
5765 | break; |
---|
5766 | case COMPUTE_RF_DISTANCE: |
---|
5767 | printBoth(infoFile, "\nRAxML computation of RF distances for all pairs of trees in a set of trees\n\n"); |
---|
5768 | break; |
---|
5769 | case MORPH_CALIBRATOR: |
---|
5770 | printBoth(infoFile, "\nRAxML morphological calibrator using Maximum Likelihood\n\n"); |
---|
5771 | break; |
---|
5772 | case FAST_SEARCH: |
---|
5773 | printBoth(infoFile, "\nRAxML experimental very fast tree search\n\n"); |
---|
5774 | break; |
---|
5775 | case SH_LIKE_SUPPORTS: |
---|
5776 | printBoth(infoFile, "\nRAxML computation of SH-like support values on a given tree\n\n"); |
---|
5777 | break; |
---|
5778 | case EPA_SITE_SPECIFIC_BIAS: |
---|
5779 | printBoth(infoFile, "\nRAxML exprimental site-specfific phylogenetic placement bias analysis algorithm\n\n"); |
---|
5780 | break; |
---|
5781 | case ANCESTRAL_STATES: |
---|
5782 | printBoth(infoFile, "\nRAxML marginal ancestral state computation\n\n"); |
---|
5783 | break; |
---|
5784 | case QUARTET_CALCULATION: |
---|
5785 | printBoth(infoFile, "\nRAxML quartet computation\n\n"); |
---|
5786 | break; |
---|
5787 | case THOROUGH_OPTIMIZATION: |
---|
5788 | printBoth(infoFile, "\nRAxML thorough tree optimization\n\n"); |
---|
5789 | break; |
---|
5790 | case OPTIMIZE_BR_LEN_SCALER : |
---|
5791 | printBoth(infoFile, "\nRAxML Branch length scaler and other model parameter optimization up to an accuracy of %f log likelihood units\n\n", adef->likelihoodEpsilon); |
---|
5792 | break; |
---|
5793 | case ANCESTRAL_SEQUENCE_TEST: |
---|
5794 | printBoth(infoFile, "\nRAxML ancestral sequence test for Jiajie\n\n"); |
---|
5795 | break; |
---|
5796 | case PLAUSIBILITY_CHECKER: |
---|
5797 | printBoth(infoFile, "\nRAxML large-tree plausibility-checker\n\n"); |
---|
5798 | break; |
---|
5799 | case ROOT_TREE: |
---|
5800 | printBoth(infoFile, "\nRAxML tree rooting algorithm\n\n"); |
---|
5801 | break; |
---|
5802 | default: |
---|
5803 | assert(0); |
---|
5804 | } |
---|
5805 | |
---|
5806 | |
---|
5807 | if(!adef->readTaxaOnly) |
---|
5808 | { |
---|
5809 | if(adef->perGeneBranchLengths) |
---|
5810 | printBoth(infoFile, "Using %d distinct models/data partitions with individual per partition branch length optimization\n\n\n", tr->NumberOfModels); |
---|
5811 | else |
---|
5812 | printBoth(infoFile, "Using %d distinct models/data partitions with joint branch length optimization\n\n\n", tr->NumberOfModels); |
---|
5813 | } |
---|
5814 | |
---|
5815 | if(adef->mode == BIG_RAPID_MODE) |
---|
5816 | { |
---|
5817 | if(adef->rapidBoot) |
---|
5818 | { |
---|
5819 | if(adef->allInOne) |
---|
5820 | printBoth(infoFile, "\nExecuting %d rapid bootstrap inferences and thereafter a thorough ML search \n\n", adef->multipleRuns); |
---|
5821 | else |
---|
5822 | printBoth(infoFile, "\nExecuting %d rapid bootstrap inferences\n\n", adef->multipleRuns); |
---|
5823 | } |
---|
5824 | else |
---|
5825 | { |
---|
5826 | if(adef->boot) |
---|
5827 | printBoth(infoFile, "Executing %d non-parametric bootstrap inferences\n\n", adef->multipleRuns); |
---|
5828 | else |
---|
5829 | { |
---|
5830 | char treeType[1024]; |
---|
5831 | |
---|
5832 | if(adef->restart) |
---|
5833 | strcpy(treeType, "user-specifed"); |
---|
5834 | else |
---|
5835 | { |
---|
5836 | if(adef->randomStartingTree) |
---|
5837 | strcpy(treeType, "distinct complete random"); |
---|
5838 | else |
---|
5839 | strcpy(treeType, "distinct randomized MP"); |
---|
5840 | } |
---|
5841 | |
---|
5842 | printBoth(infoFile, "Executing %d inferences on the original alignment using %d %s trees\n\n", |
---|
5843 | adef->multipleRuns, adef->multipleRuns, treeType); |
---|
5844 | } |
---|
5845 | } |
---|
5846 | } |
---|
5847 | |
---|
5848 | |
---|
5849 | if(!adef->readTaxaOnly) |
---|
5850 | { |
---|
5851 | printBoth(infoFile, "All free model parameters will be estimated by RAxML\n"); |
---|
5852 | |
---|
5853 | |
---|
5854 | if(tr->rateHetModel == GAMMA || tr->rateHetModel == GAMMA_I) |
---|
5855 | printBoth(infoFile, "%s model of rate heteorgeneity, ML estimate of alpha-parameter\n\n", modelType); |
---|
5856 | else |
---|
5857 | { |
---|
5858 | printBoth(infoFile, "ML estimate of %d per site rate categories\n\n", adef->categories); |
---|
5859 | if(adef->mode != CLASSIFY_ML && adef->mode != CLASSIFY_MP) |
---|
5860 | printBoth(infoFile, "Likelihood of final tree will be evaluated and optimized under %s\n\n", modelType); |
---|
5861 | } |
---|
5862 | |
---|
5863 | if(adef->mode != CLASSIFY_ML && adef->mode != CLASSIFY_MP) |
---|
5864 | printBoth(infoFile, "%s Model parameters will be estimated up to an accuracy of %2.10f Log Likelihood units\n\n", |
---|
5865 | modelType, adef->likelihoodEpsilon); |
---|
5866 | |
---|
5867 | |
---|
5868 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
5869 | { |
---|
5870 | printBoth(infoFile, "Partition: %d\n", model); |
---|
5871 | printBoth(infoFile, "Alignment Patterns: %d\n", tr->partitionData[model].upper - tr->partitionData[model].lower); |
---|
5872 | printBoth(infoFile, "Name: %s\n", tr->partitionData[model].partitionName); |
---|
5873 | |
---|
5874 | switch(tr->partitionData[model].dataType) |
---|
5875 | { |
---|
5876 | case DNA_DATA: |
---|
5877 | printBoth(infoFile, "DataType: DNA\n"); |
---|
5878 | printBoth(infoFile, "Substitution Matrix: GTR\n"); |
---|
5879 | break; |
---|
5880 | case AA_DATA: |
---|
5881 | assert(tr->partitionData[model].protModels >= 0 && tr->partitionData[model].protModels < NUM_PROT_MODELS); |
---|
5882 | printBoth(infoFile, "DataType: AA\n"); |
---|
5883 | if(tr->partitionData[model].protModels != PROT_FILE) |
---|
5884 | { |
---|
5885 | printBoth(infoFile, "Substitution Matrix: %s\n", protModels[tr->partitionData[model].protModels]); |
---|
5886 | printBoth(infoFile, "Using %s base frequencies\n", (tr->partitionData[model].usePredefinedProtFreqs == TRUE)?"fixed":"empirical"); |
---|
5887 | } |
---|
5888 | else |
---|
5889 | { |
---|
5890 | printBoth(infoFile, "Substitution Matrix File name: %s\n", tr->partitionData[model].proteinSubstitutionFileName); |
---|
5891 | printBoth(infoFile, "Using base frequencies as provided in the model file\n"); |
---|
5892 | } |
---|
5893 | break; |
---|
5894 | case BINARY_DATA: |
---|
5895 | printBoth(infoFile, "DataType: BINARY/MORPHOLOGICAL\n"); |
---|
5896 | printBoth(infoFile, "Substitution Matrix: Uncorrected\n"); |
---|
5897 | break; |
---|
5898 | case SECONDARY_DATA: |
---|
5899 | printBoth(infoFile, "DataType: SECONDARY STRUCTURE\n"); |
---|
5900 | printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]); |
---|
5901 | break; |
---|
5902 | case SECONDARY_DATA_6: |
---|
5903 | printBoth(infoFile, "DataType: SECONDARY STRUCTURE 6 STATE\n"); |
---|
5904 | printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]); |
---|
5905 | break; |
---|
5906 | case SECONDARY_DATA_7: |
---|
5907 | printBoth(infoFile, "DataType: SECONDARY STRUCTURE 7 STATE\n"); |
---|
5908 | printBoth(infoFile, "Substitution Matrix: %s\n", secondaryModelList[tr->secondaryStructureModel]); |
---|
5909 | break; |
---|
5910 | case GENERIC_32: |
---|
5911 | printBoth(infoFile, "DataType: Multi-State with %d distinct states in use (maximum 32)\n",tr->partitionData[model].states); |
---|
5912 | switch(tr->multiStateModel) |
---|
5913 | { |
---|
5914 | case ORDERED_MULTI_STATE: |
---|
5915 | printBoth(infoFile, "Substitution Matrix: Ordered Likelihood\n"); |
---|
5916 | break; |
---|
5917 | case MK_MULTI_STATE: |
---|
5918 | printBoth(infoFile, "Substitution Matrix: MK model\n"); |
---|
5919 | break; |
---|
5920 | case GTR_MULTI_STATE: |
---|
5921 | printBoth(infoFile, "Substitution Matrix: GTR\n"); |
---|
5922 | break; |
---|
5923 | default: |
---|
5924 | assert(0); |
---|
5925 | } |
---|
5926 | break; |
---|
5927 | case GENERIC_64: |
---|
5928 | printBoth(infoFile, "DataType: Codon\n"); |
---|
5929 | break; |
---|
5930 | default: |
---|
5931 | assert(0); |
---|
5932 | } |
---|
5933 | printBoth(infoFile, "\n\n\n"); |
---|
5934 | } |
---|
5935 | } |
---|
5936 | |
---|
5937 | printBoth(infoFile, "\n"); |
---|
5938 | |
---|
5939 | printBoth(infoFile, "RAxML was called as follows:\n\n"); |
---|
5940 | for(i = 0; i < argc; i++) |
---|
5941 | printBoth(infoFile,"%s ", argv[i]); |
---|
5942 | printBoth(infoFile,"\n\n\n"); |
---|
5943 | |
---|
5944 | fclose(infoFile); |
---|
5945 | } |
---|
5946 | } |
---|
5947 | |
---|
5948 | void printResult(tree *tr, analdef *adef, boolean finalPrint) |
---|
5949 | { |
---|
5950 | FILE *logFile; |
---|
5951 | char temporaryFileName[1024] = "", treeID[64] = ""; |
---|
5952 | |
---|
5953 | strcpy(temporaryFileName, resultFileName); |
---|
5954 | |
---|
5955 | switch(adef->mode) |
---|
5956 | { |
---|
5957 | case MORPH_CALIBRATOR: |
---|
5958 | break; |
---|
5959 | case TREE_EVALUATION: |
---|
5960 | |
---|
5961 | |
---|
5962 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, finalPrint, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
5963 | |
---|
5964 | logFile = myfopen(temporaryFileName, "wb"); |
---|
5965 | fprintf(logFile, "%s", tr->tree_string); |
---|
5966 | fclose(logFile); |
---|
5967 | |
---|
5968 | if(adef->perGeneBranchLengths) |
---|
5969 | printTreePerGene(tr, adef, temporaryFileName, "wb"); |
---|
5970 | |
---|
5971 | |
---|
5972 | break; |
---|
5973 | case BIG_RAPID_MODE: |
---|
5974 | if(!adef->boot) |
---|
5975 | { |
---|
5976 | if(adef->multipleRuns > 1) |
---|
5977 | { |
---|
5978 | sprintf(treeID, "%d", tr->treeID); |
---|
5979 | strcat(temporaryFileName, ".RUN."); |
---|
5980 | strcat(temporaryFileName, treeID); |
---|
5981 | } |
---|
5982 | |
---|
5983 | |
---|
5984 | if(finalPrint) |
---|
5985 | { |
---|
5986 | switch(tr->rateHetModel) |
---|
5987 | { |
---|
5988 | case GAMMA: |
---|
5989 | case GAMMA_I: |
---|
5990 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, finalPrint, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
5991 | |
---|
5992 | logFile = myfopen(temporaryFileName, "wb"); |
---|
5993 | fprintf(logFile, "%s", tr->tree_string); |
---|
5994 | fclose(logFile); |
---|
5995 | |
---|
5996 | if(adef->perGeneBranchLengths) |
---|
5997 | printTreePerGene(tr, adef, temporaryFileName, "wb"); |
---|
5998 | break; |
---|
5999 | case CAT: |
---|
6000 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6001 | |
---|
6002 | logFile = myfopen(temporaryFileName, "wb"); |
---|
6003 | fprintf(logFile, "%s", tr->tree_string); |
---|
6004 | fclose(logFile); |
---|
6005 | |
---|
6006 | break; |
---|
6007 | default: |
---|
6008 | assert(0); |
---|
6009 | } |
---|
6010 | } |
---|
6011 | else |
---|
6012 | { |
---|
6013 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6014 | logFile = myfopen(temporaryFileName, "wb"); |
---|
6015 | fprintf(logFile, "%s", tr->tree_string); |
---|
6016 | fclose(logFile); |
---|
6017 | } |
---|
6018 | } |
---|
6019 | break; |
---|
6020 | default: |
---|
6021 | printf("FATAL ERROR call to printResult from undefined STATE %d\n", adef->mode); |
---|
6022 | exit(-1); |
---|
6023 | break; |
---|
6024 | } |
---|
6025 | } |
---|
6026 | |
---|
6027 | void printBootstrapResult(tree *tr, analdef *adef, boolean finalPrint) |
---|
6028 | { |
---|
6029 | FILE |
---|
6030 | *logFile; |
---|
6031 | #ifdef _WAYNE_MPI |
---|
6032 | char |
---|
6033 | *fileName = bootstrapFileNamePID; |
---|
6034 | #else |
---|
6035 | char |
---|
6036 | *fileName = bootstrapFileName; |
---|
6037 | #endif |
---|
6038 | |
---|
6039 | if(adef->mode == BIG_RAPID_MODE && (adef->boot || adef->rapidBoot)) |
---|
6040 | { |
---|
6041 | if(adef->bootstrapBranchLengths) |
---|
6042 | { |
---|
6043 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, finalPrint, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
6044 | |
---|
6045 | logFile = myfopen(fileName, "ab"); |
---|
6046 | fprintf(logFile, "%s", tr->tree_string); |
---|
6047 | fclose(logFile); |
---|
6048 | |
---|
6049 | if(adef->perGeneBranchLengths) |
---|
6050 | printTreePerGene(tr, adef, fileName, "ab"); |
---|
6051 | } |
---|
6052 | else |
---|
6053 | { |
---|
6054 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6055 | |
---|
6056 | logFile = myfopen(fileName, "ab"); |
---|
6057 | fprintf(logFile, "%s", tr->tree_string); |
---|
6058 | fclose(logFile); |
---|
6059 | } |
---|
6060 | } |
---|
6061 | else |
---|
6062 | { |
---|
6063 | printf("FATAL ERROR in printBootstrapResult\n"); |
---|
6064 | exit(-1); |
---|
6065 | } |
---|
6066 | } |
---|
6067 | |
---|
6068 | |
---|
6069 | |
---|
6070 | void printBipartitionResult(tree *tr, analdef *adef, boolean finalPrint, boolean printIC) |
---|
6071 | { |
---|
6072 | if(processID == 0 || adef->allInOne) |
---|
6073 | { |
---|
6074 | FILE |
---|
6075 | *logFile; |
---|
6076 | |
---|
6077 | if(!printIC) |
---|
6078 | { |
---|
6079 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, TRUE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, printIC, FALSE); |
---|
6080 | |
---|
6081 | logFile = myfopen(bipartitionsFileName, "ab"); |
---|
6082 | |
---|
6083 | fprintf(logFile, "%s", tr->tree_string); |
---|
6084 | fclose(logFile); |
---|
6085 | } |
---|
6086 | |
---|
6087 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, TRUE, FALSE, printIC, FALSE); |
---|
6088 | |
---|
6089 | if(printIC) |
---|
6090 | logFile = myfopen(icFileNameBranchLabels, "ab"); |
---|
6091 | else |
---|
6092 | logFile = myfopen(bipartitionsFileNameBranchLabels, "ab"); |
---|
6093 | |
---|
6094 | fprintf(logFile, "%s", tr->tree_string); |
---|
6095 | fclose(logFile); |
---|
6096 | } |
---|
6097 | } |
---|
6098 | |
---|
6099 | |
---|
6100 | |
---|
6101 | void printLog(tree *tr, analdef *adef, boolean finalPrint) |
---|
6102 | { |
---|
6103 | FILE *logFile; |
---|
6104 | char temporaryFileName[1024] = "", checkPoints[1024] = "", treeID[64] = ""; |
---|
6105 | double lh, t; |
---|
6106 | |
---|
6107 | lh = tr->likelihood; |
---|
6108 | t = gettime() - masterTime; |
---|
6109 | |
---|
6110 | strcpy(temporaryFileName, logFileName); |
---|
6111 | strcpy(checkPoints, checkpointFileName); |
---|
6112 | |
---|
6113 | switch(adef->mode) |
---|
6114 | { |
---|
6115 | case TREE_EVALUATION: |
---|
6116 | logFile = myfopen(temporaryFileName, "ab"); |
---|
6117 | |
---|
6118 | printf("%f %f\n", t, lh); |
---|
6119 | fprintf(logFile, "%f %f\n", t, lh); |
---|
6120 | |
---|
6121 | fclose(logFile); |
---|
6122 | break; |
---|
6123 | case BIG_RAPID_MODE: |
---|
6124 | if(adef->boot || adef->rapidBoot) |
---|
6125 | { |
---|
6126 | /* testing only printf("%f %f\n", t, lh);*/ |
---|
6127 | /* NOTHING PRINTED so far */ |
---|
6128 | } |
---|
6129 | else |
---|
6130 | { |
---|
6131 | if(adef->multipleRuns > 1) |
---|
6132 | { |
---|
6133 | sprintf(treeID, "%d", tr->treeID); |
---|
6134 | strcat(temporaryFileName, ".RUN."); |
---|
6135 | strcat(temporaryFileName, treeID); |
---|
6136 | |
---|
6137 | strcat(checkPoints, ".RUN."); |
---|
6138 | strcat(checkPoints, treeID); |
---|
6139 | } |
---|
6140 | |
---|
6141 | |
---|
6142 | if(!adef->checkpoints) |
---|
6143 | { |
---|
6144 | logFile = myfopen(temporaryFileName, "ab"); |
---|
6145 | |
---|
6146 | fprintf(logFile, "%f %f\n", t, lh); |
---|
6147 | |
---|
6148 | fclose(logFile); |
---|
6149 | } |
---|
6150 | else |
---|
6151 | { |
---|
6152 | logFile = myfopen(temporaryFileName, "ab"); |
---|
6153 | |
---|
6154 | fprintf(logFile, "%f %f %d\n", t, lh, tr->checkPointCounter); |
---|
6155 | |
---|
6156 | fclose(logFile); |
---|
6157 | |
---|
6158 | strcat(checkPoints, "."); |
---|
6159 | |
---|
6160 | sprintf(treeID, "%d", tr->checkPointCounter); |
---|
6161 | strcat(checkPoints, treeID); |
---|
6162 | |
---|
6163 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6164 | |
---|
6165 | logFile = myfopen(checkPoints, "ab"); |
---|
6166 | fprintf(logFile, "%s", tr->tree_string); |
---|
6167 | fclose(logFile); |
---|
6168 | |
---|
6169 | tr->checkPointCounter++; |
---|
6170 | } |
---|
6171 | } |
---|
6172 | break; |
---|
6173 | case MORPH_CALIBRATOR: |
---|
6174 | break; |
---|
6175 | default: |
---|
6176 | assert(0); |
---|
6177 | } |
---|
6178 | } |
---|
6179 | |
---|
6180 | |
---|
6181 | |
---|
6182 | void printStartingTree(tree *tr, analdef *adef, boolean finalPrint) |
---|
6183 | { |
---|
6184 | if(adef->boot) |
---|
6185 | { |
---|
6186 | /* not printing starting trees for bootstrap */ |
---|
6187 | } |
---|
6188 | else |
---|
6189 | { |
---|
6190 | FILE *treeFile; |
---|
6191 | char temporaryFileName[1024] = "", treeID[64] = ""; |
---|
6192 | |
---|
6193 | Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, finalPrint, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); |
---|
6194 | |
---|
6195 | if(adef->randomStartingTree) |
---|
6196 | strcpy(temporaryFileName, randomFileName); |
---|
6197 | else |
---|
6198 | strcpy(temporaryFileName, permFileName); |
---|
6199 | |
---|
6200 | if(adef->multipleRuns > 1) |
---|
6201 | { |
---|
6202 | sprintf(treeID, "%d", tr->treeID); |
---|
6203 | strcat(temporaryFileName, ".RUN."); |
---|
6204 | strcat(temporaryFileName, treeID); |
---|
6205 | } |
---|
6206 | |
---|
6207 | treeFile = myfopen(temporaryFileName, "ab"); |
---|
6208 | fprintf(treeFile, "%s", tr->tree_string); |
---|
6209 | fclose(treeFile); |
---|
6210 | } |
---|
6211 | } |
---|
6212 | |
---|
6213 | void writeInfoFile(analdef *adef, tree *tr, double t) |
---|
6214 | { |
---|
6215 | |
---|
6216 | { |
---|
6217 | switch(adef->mode) |
---|
6218 | { |
---|
6219 | case TREE_EVALUATION: |
---|
6220 | break; |
---|
6221 | case BIG_RAPID_MODE: |
---|
6222 | if(adef->boot || adef->rapidBoot) |
---|
6223 | { |
---|
6224 | if(!adef->initialSet) |
---|
6225 | printBothOpen("Bootstrap[%d]: Time %f seconds, bootstrap likelihood %f, best rearrangement setting %d\n", tr->treeID, t, tr->likelihood, adef->bestTrav); |
---|
6226 | else |
---|
6227 | printBothOpen("Bootstrap[%d]: Time %f seconds, bootstrap likelihood %f\n", tr->treeID, t, tr->likelihood); |
---|
6228 | } |
---|
6229 | else |
---|
6230 | { |
---|
6231 | int model; |
---|
6232 | char modelType[128]; |
---|
6233 | |
---|
6234 | switch(tr->rateHetModel) |
---|
6235 | { |
---|
6236 | case GAMMA_I: |
---|
6237 | strcpy(modelType, "GAMMA+P-Invar"); |
---|
6238 | break; |
---|
6239 | case GAMMA: |
---|
6240 | strcpy(modelType, "GAMMA"); |
---|
6241 | break; |
---|
6242 | case CAT: |
---|
6243 | strcpy(modelType, "CAT"); |
---|
6244 | break; |
---|
6245 | default: |
---|
6246 | assert(0); |
---|
6247 | } |
---|
6248 | |
---|
6249 | if(!adef->initialSet) |
---|
6250 | printBothOpen("Inference[%d]: Time %f %s-based likelihood %f, best rearrangement setting %d\n", |
---|
6251 | tr->treeID, t, modelType, tr->likelihood, adef->bestTrav); |
---|
6252 | else |
---|
6253 | printBothOpen("Inference[%d]: Time %f %s-based likelihood %f\n", |
---|
6254 | tr->treeID, t, modelType, tr->likelihood); |
---|
6255 | |
---|
6256 | { |
---|
6257 | FILE *infoFile = myfopen(infoFileName, "ab"); |
---|
6258 | |
---|
6259 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
6260 | { |
---|
6261 | fprintf(infoFile, "alpha[%d]: %f ", model, tr->partitionData[model].alpha); |
---|
6262 | if(adef->useInvariant) |
---|
6263 | fprintf(infoFile, "invar[%d]: %f ", model, tr->partitionData[model].propInvariant); |
---|
6264 | |
---|
6265 | if(tr->partitionData[model].dataType == DNA_DATA) |
---|
6266 | { |
---|
6267 | int |
---|
6268 | k, |
---|
6269 | states = tr->partitionData[model].states, |
---|
6270 | rates = ((states * states - states) / 2); |
---|
6271 | |
---|
6272 | fprintf(infoFile, "rates[%d] ac ag at cg ct gt: ", model); |
---|
6273 | for(k = 0; k < rates; k++) |
---|
6274 | fprintf(infoFile, "%f ", tr->partitionData[model].substRates[k]); |
---|
6275 | } |
---|
6276 | |
---|
6277 | } |
---|
6278 | |
---|
6279 | fprintf(infoFile, "\n"); |
---|
6280 | fclose(infoFile); |
---|
6281 | } |
---|
6282 | } |
---|
6283 | break; |
---|
6284 | default: |
---|
6285 | assert(0); |
---|
6286 | } |
---|
6287 | } |
---|
6288 | } |
---|
6289 | |
---|
6290 | static void printFreqs(int n, double *f, const char **names) |
---|
6291 | { |
---|
6292 | int k; |
---|
6293 | |
---|
6294 | for(k = 0; k < n; k++) |
---|
6295 | printBothOpen("freq pi(%s): %f\n", names[k], f[k]); |
---|
6296 | } |
---|
6297 | |
---|
6298 | static void printRatesDNA_BIN(int n, double *r, const char **names) |
---|
6299 | { |
---|
6300 | int i, j, c; |
---|
6301 | |
---|
6302 | for(i = 0, c = 0; i < n; i++) |
---|
6303 | { |
---|
6304 | for(j = i + 1; j < n; j++) |
---|
6305 | { |
---|
6306 | if(i == n - 2 && j == n - 1) |
---|
6307 | printBothOpen("rate %s <-> %s: %f\n", names[i], names[j], 1.0); |
---|
6308 | else |
---|
6309 | printBothOpen("rate %s <-> %s: %f\n", names[i], names[j], r[c]); |
---|
6310 | c++; |
---|
6311 | } |
---|
6312 | } |
---|
6313 | } |
---|
6314 | |
---|
6315 | static void printRatesRest(int n, double *r, const char **names) |
---|
6316 | { |
---|
6317 | int i, j, c; |
---|
6318 | |
---|
6319 | for(i = 0, c = 0; i < n; i++) |
---|
6320 | { |
---|
6321 | for(j = i + 1; j < n; j++) |
---|
6322 | { |
---|
6323 | printBothOpen("rate %s <-> %s: %f\n", names[i], names[j], r[c]); |
---|
6324 | c++; |
---|
6325 | } |
---|
6326 | } |
---|
6327 | } |
---|
6328 | |
---|
6329 | |
---|
6330 | void getDataTypeString(tree *tr, int model, char typeOfData[1024]) |
---|
6331 | { |
---|
6332 | switch(tr->partitionData[model].dataType) |
---|
6333 | { |
---|
6334 | case AA_DATA: |
---|
6335 | strcpy(typeOfData,"AA"); |
---|
6336 | break; |
---|
6337 | case DNA_DATA: |
---|
6338 | strcpy(typeOfData,"DNA"); |
---|
6339 | break; |
---|
6340 | case BINARY_DATA: |
---|
6341 | strcpy(typeOfData,"BINARY/MORPHOLOGICAL"); |
---|
6342 | break; |
---|
6343 | case SECONDARY_DATA: |
---|
6344 | strcpy(typeOfData,"SECONDARY 16 STATE MODEL USING "); |
---|
6345 | strcat(typeOfData, secondaryModelList[tr->secondaryStructureModel]); |
---|
6346 | break; |
---|
6347 | case SECONDARY_DATA_6: |
---|
6348 | strcpy(typeOfData,"SECONDARY 6 STATE MODEL USING "); |
---|
6349 | strcat(typeOfData, secondaryModelList[tr->secondaryStructureModel]); |
---|
6350 | break; |
---|
6351 | case SECONDARY_DATA_7: |
---|
6352 | strcpy(typeOfData,"SECONDARY 7 STATE MODEL USING "); |
---|
6353 | strcat(typeOfData, secondaryModelList[tr->secondaryStructureModel]); |
---|
6354 | break; |
---|
6355 | case GENERIC_32: |
---|
6356 | strcpy(typeOfData,"Multi-State"); |
---|
6357 | break; |
---|
6358 | case GENERIC_64: |
---|
6359 | strcpy(typeOfData,"Codon"); |
---|
6360 | break; |
---|
6361 | default: |
---|
6362 | assert(0); |
---|
6363 | } |
---|
6364 | } |
---|
6365 | |
---|
6366 | |
---|
6367 | |
---|
6368 | void printModelParams(tree *tr, analdef *adef) |
---|
6369 | { |
---|
6370 | int |
---|
6371 | model; |
---|
6372 | |
---|
6373 | double |
---|
6374 | *f = (double*)NULL, |
---|
6375 | *r = (double*)NULL; |
---|
6376 | |
---|
6377 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
6378 | { |
---|
6379 | double tl; |
---|
6380 | char typeOfData[1024]; |
---|
6381 | |
---|
6382 | getDataTypeString(tr, model, typeOfData); |
---|
6383 | |
---|
6384 | printBothOpen("Model Parameters of Partition %d, Name: %s, Type of Data: %s\n", |
---|
6385 | model, tr->partitionData[model].partitionName, typeOfData); |
---|
6386 | printBothOpen("alpha: %f\n", tr->partitionData[model].alpha); |
---|
6387 | |
---|
6388 | if(adef->useInvariant) |
---|
6389 | printBothOpen("invar: %f\n", tr->partitionData[model].propInvariant); |
---|
6390 | |
---|
6391 | if(tr->useBrLenScaler) |
---|
6392 | printBothOpen("Branch length scaler: %f\n", tr->partitionData[model].brLenScaler); |
---|
6393 | |
---|
6394 | if(adef->perGeneBranchLengths) |
---|
6395 | tl = treeLength(tr, model); |
---|
6396 | else |
---|
6397 | tl = treeLength(tr, 0); |
---|
6398 | |
---|
6399 | printBothOpen("Tree-Length: %f\n", tl); |
---|
6400 | |
---|
6401 | f = tr->partitionData[model].frequencies; |
---|
6402 | r = tr->partitionData[model].substRates; |
---|
6403 | |
---|
6404 | switch(tr->partitionData[model].dataType) |
---|
6405 | { |
---|
6406 | case AA_DATA: |
---|
6407 | { |
---|
6408 | const char *freqNames[20] = {"A", "R", "N","D", "C", "Q", "E", "G", |
---|
6409 | "H", "I", "L", "K", "M", "F", "P", "S", |
---|
6410 | "T", "W", "Y", "V"}; |
---|
6411 | |
---|
6412 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
6413 | { |
---|
6414 | int |
---|
6415 | k; |
---|
6416 | |
---|
6417 | for(k = 0; k < 4; k++) |
---|
6418 | { |
---|
6419 | printBothOpen("LGM %d\n", k); |
---|
6420 | printRatesRest(20, tr->partitionData[model].substRates_LG4[k], freqNames); |
---|
6421 | printBothOpen("\n"); |
---|
6422 | printFreqs(20, tr->partitionData[model].frequencies_LG4[k], freqNames); |
---|
6423 | } |
---|
6424 | } |
---|
6425 | else |
---|
6426 | { |
---|
6427 | printRatesRest(20, r, freqNames); |
---|
6428 | printBothOpen("\n"); |
---|
6429 | printFreqs(20, f, freqNames); |
---|
6430 | } |
---|
6431 | } |
---|
6432 | break; |
---|
6433 | case GENERIC_32: |
---|
6434 | { |
---|
6435 | const char *freqNames[32] = {"0", "1", "2", "3", "4", "5", "6", "7", |
---|
6436 | "8", "9", "A", "B", "C", "D", "E", "F", |
---|
6437 | "G", "H", "I", "J", "K", "L", "M", "N", |
---|
6438 | "O", "P", "Q", "R", "S", "T", "U", "V"}; |
---|
6439 | |
---|
6440 | printRatesRest(32, r, freqNames); |
---|
6441 | printBothOpen("\n"); |
---|
6442 | printFreqs(32, f, freqNames); |
---|
6443 | } |
---|
6444 | break; |
---|
6445 | case GENERIC_64: |
---|
6446 | assert(0); |
---|
6447 | break; |
---|
6448 | case DNA_DATA: |
---|
6449 | { |
---|
6450 | const char *freqNames[4] = {"A", "C", "G", "T"}; |
---|
6451 | |
---|
6452 | printRatesDNA_BIN(4, r, freqNames); |
---|
6453 | printBothOpen("\n"); |
---|
6454 | printFreqs(4, f, freqNames); |
---|
6455 | } |
---|
6456 | break; |
---|
6457 | case SECONDARY_DATA_6: |
---|
6458 | { |
---|
6459 | const char *freqNames[6] = {"AU", "CG", "GC", "GU", "UA", "UG"}; |
---|
6460 | |
---|
6461 | printRatesRest(6, r, freqNames); |
---|
6462 | printBothOpen("\n"); |
---|
6463 | printFreqs(6, f, freqNames); |
---|
6464 | } |
---|
6465 | break; |
---|
6466 | case SECONDARY_DATA_7: |
---|
6467 | { |
---|
6468 | const char *freqNames[7] = {"AU", "CG", "GC", "GU", "UA", "UG", "REST"}; |
---|
6469 | |
---|
6470 | printRatesRest(7, r, freqNames); |
---|
6471 | printBothOpen("\n"); |
---|
6472 | printFreqs(7, f, freqNames); |
---|
6473 | } |
---|
6474 | break; |
---|
6475 | case SECONDARY_DATA: |
---|
6476 | { |
---|
6477 | const char *freqNames[16] = {"AA", "AC", "AG", "AU", "CA", "CC", "CG", "CU", |
---|
6478 | "GA", "GC", "GG", "GU", "UA", "UC", "UG", "UU"}; |
---|
6479 | |
---|
6480 | printRatesRest(16, r, freqNames); |
---|
6481 | printBothOpen("\n"); |
---|
6482 | printFreqs(16, f, freqNames); |
---|
6483 | } |
---|
6484 | break; |
---|
6485 | case BINARY_DATA: |
---|
6486 | { |
---|
6487 | const char *freqNames[2] = {"0", "1"}; |
---|
6488 | |
---|
6489 | printRatesDNA_BIN(2, r, freqNames); |
---|
6490 | printBothOpen("\n"); |
---|
6491 | printFreqs(2, f, freqNames); |
---|
6492 | } |
---|
6493 | break; |
---|
6494 | default: |
---|
6495 | assert(0); |
---|
6496 | } |
---|
6497 | |
---|
6498 | printBothOpen("\n"); |
---|
6499 | } |
---|
6500 | } |
---|
6501 | |
---|
6502 | static void finalizeInfoFile(tree *tr, analdef *adef) |
---|
6503 | { |
---|
6504 | if(processID == 0) |
---|
6505 | { |
---|
6506 | double t; |
---|
6507 | |
---|
6508 | t = gettime() - masterTime; |
---|
6509 | |
---|
6510 | switch(adef->mode) |
---|
6511 | { |
---|
6512 | case TREE_EVALUATION : |
---|
6513 | case OPTIMIZE_BR_LEN_SCALER: |
---|
6514 | |
---|
6515 | if(adef->mode == OPTIMIZE_BR_LEN_SCALER) |
---|
6516 | printBothOpen("\n\nOverall Time for Tree Evaluation with branch length scalers: %f\n", t); |
---|
6517 | else |
---|
6518 | printBothOpen("\n\nOverall Time for Tree Evaluation %f\n", t); |
---|
6519 | |
---|
6520 | printBothOpen("Final GAMMA likelihood: %f\n", tr->likelihood); |
---|
6521 | |
---|
6522 | { |
---|
6523 | boolean |
---|
6524 | linkedProteinGTR = FALSE; |
---|
6525 | |
---|
6526 | int |
---|
6527 | model, |
---|
6528 | params = 0, |
---|
6529 | paramsBrLen = 0; |
---|
6530 | |
---|
6531 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
6532 | { |
---|
6533 | switch(tr->partitionData[model].dataType) |
---|
6534 | { |
---|
6535 | case AA_DATA: |
---|
6536 | if(tr->partitionData[model].protModels == GTR_UNLINKED) |
---|
6537 | params += 189; |
---|
6538 | |
---|
6539 | if(tr->partitionData[model].protModels == GTR) |
---|
6540 | linkedProteinGTR = TRUE; |
---|
6541 | |
---|
6542 | if(!tr->partitionData[model].usePredefinedProtFreqs) |
---|
6543 | params += 19; |
---|
6544 | break; |
---|
6545 | case GENERIC_32: |
---|
6546 | { // Warning: weird scope starts here! |
---|
6547 | int |
---|
6548 | states1 = tr->partitionData[model].states; |
---|
6549 | |
---|
6550 | /* frequencies */ |
---|
6551 | |
---|
6552 | params += (states1 - 1); |
---|
6553 | |
---|
6554 | switch(tr->multiStateModel) |
---|
6555 | { |
---|
6556 | case ORDERED_MULTI_STATE: |
---|
6557 | break; |
---|
6558 | case MK_MULTI_STATE: |
---|
6559 | params += (states1 - 1); |
---|
6560 | break; |
---|
6561 | case GTR_MULTI_STATE: |
---|
6562 | params += ((((states1 * states1) - states1) / 2) - 1); |
---|
6563 | break; |
---|
6564 | default: |
---|
6565 | assert(0); |
---|
6566 | } |
---|
6567 | break; |
---|
6568 | case GENERIC_64: |
---|
6569 | assert(0); |
---|
6570 | break; |
---|
6571 | case DNA_DATA: |
---|
6572 | params += 5 + 3; |
---|
6573 | break; |
---|
6574 | case SECONDARY_DATA_6: |
---|
6575 | case SECONDARY_DATA_7: |
---|
6576 | case SECONDARY_DATA: |
---|
6577 | { |
---|
6578 | int |
---|
6579 | states2 = tr->partitionData[model].states; |
---|
6580 | |
---|
6581 | switch(tr->secondaryStructureModel) |
---|
6582 | { |
---|
6583 | case SEC_6_A: |
---|
6584 | params += ((((states2 * states2) - states2) / 2) - 1); /*rates*/ |
---|
6585 | params += (states2 - 1); /* frequencies */ |
---|
6586 | break; |
---|
6587 | case SEC_6_B: |
---|
6588 | params += 1; /*rates */ |
---|
6589 | params += 5; /* frequencies */ |
---|
6590 | break; |
---|
6591 | case SEC_6_C: |
---|
6592 | params += 1; /*rates */ |
---|
6593 | params += 2; /* frequencies */ |
---|
6594 | break; |
---|
6595 | case SEC_6_D: |
---|
6596 | params += 1; /*rates */ |
---|
6597 | params += 1; /* frequencies */ |
---|
6598 | break; |
---|
6599 | case SEC_6_E: |
---|
6600 | params += 1; /*rates */ |
---|
6601 | params += 5; /* frequencies */ |
---|
6602 | break; |
---|
6603 | case SEC_7_A: |
---|
6604 | params += ((((states2 * states2) - states2) / 2) - 1); /*rates*/ |
---|
6605 | params += (states2 - 1); /* frequencies */ |
---|
6606 | break; |
---|
6607 | case SEC_7_B: |
---|
6608 | params += 20; /*rates */ |
---|
6609 | params += 3; /* frequencies */ |
---|
6610 | break; |
---|
6611 | case SEC_7_C: |
---|
6612 | params += 9; /*rates */ |
---|
6613 | params += 6; /* frequencies */ |
---|
6614 | break; |
---|
6615 | case SEC_7_D: |
---|
6616 | params += 3; /*rates */ |
---|
6617 | params += 6; /* frequencies */ |
---|
6618 | break; |
---|
6619 | case SEC_7_E: |
---|
6620 | params += 1; /*rates */ |
---|
6621 | params += 6; /* frequencies */ |
---|
6622 | break; |
---|
6623 | case SEC_7_F: |
---|
6624 | params += 3; /*rates */ |
---|
6625 | params += 3; /* frequencies */ |
---|
6626 | break; |
---|
6627 | case SEC_16: |
---|
6628 | params += ((((states2 * states2) - states2) / 2) - 1); /*rates*/ |
---|
6629 | params += (states2 - 1); /* frequencies */ |
---|
6630 | break; |
---|
6631 | case SEC_16_A: |
---|
6632 | params += 4; /*rates */ |
---|
6633 | params += 15; /* frequencies */ |
---|
6634 | break; |
---|
6635 | case SEC_16_B: |
---|
6636 | params += 0; /*rates */ |
---|
6637 | params += 15; /* frequencies */ |
---|
6638 | break; |
---|
6639 | case SEC_16_C: |
---|
6640 | case SEC_16_D: |
---|
6641 | case SEC_16_E: |
---|
6642 | case SEC_16_F: |
---|
6643 | case SEC_16_I: |
---|
6644 | case SEC_16_J: |
---|
6645 | case SEC_16_K: |
---|
6646 | assert(0); |
---|
6647 | default: |
---|
6648 | assert(0); |
---|
6649 | } |
---|
6650 | } |
---|
6651 | break; |
---|
6652 | case BINARY_DATA: |
---|
6653 | params += 1; |
---|
6654 | break; |
---|
6655 | default: |
---|
6656 | assert(0); |
---|
6657 | } // Warning: weird scope ends here! |
---|
6658 | } |
---|
6659 | |
---|
6660 | if(adef->useInvariant) |
---|
6661 | params += 2; |
---|
6662 | else /* GAMMA */ |
---|
6663 | params += 1; |
---|
6664 | } |
---|
6665 | |
---|
6666 | if(linkedProteinGTR) |
---|
6667 | params += 189; |
---|
6668 | |
---|
6669 | if(adef->mode == TREE_EVALUATION) |
---|
6670 | { |
---|
6671 | if(tr->multiBranch) |
---|
6672 | paramsBrLen = params + tr->NumberOfModels * (2 * tr->mxtips - 3); |
---|
6673 | else |
---|
6674 | paramsBrLen = params + 2 * tr->mxtips - 3; |
---|
6675 | } |
---|
6676 | else |
---|
6677 | { |
---|
6678 | paramsBrLen = params + tr->NumberOfModels; |
---|
6679 | } |
---|
6680 | |
---|
6681 | printBothOpen("\n"); |
---|
6682 | |
---|
6683 | |
---|
6684 | printBothOpen("Number of free parameters for AIC-TEST(BR-LEN): %d\n", paramsBrLen); |
---|
6685 | printBothOpen("Number of free parameters for AIC-TEST(NO-BR-LEN): %d\n", params); |
---|
6686 | |
---|
6687 | |
---|
6688 | printBothOpen("\n\n"); |
---|
6689 | |
---|
6690 | printModelParams(tr, adef); |
---|
6691 | |
---|
6692 | if(adef->mode == TREE_EVALUATION) |
---|
6693 | { |
---|
6694 | printBothOpen("Final tree written to: %s\n", resultFileName); |
---|
6695 | printBothOpen("Execution Log File written to: %s\n", logFileName); |
---|
6696 | } |
---|
6697 | |
---|
6698 | } |
---|
6699 | break; |
---|
6700 | case BIG_RAPID_MODE: |
---|
6701 | if(adef->boot) |
---|
6702 | { |
---|
6703 | printBothOpen("\n\nOverall Time for %d Bootstraps %f\n", adef->multipleRuns, t); |
---|
6704 | printBothOpen("\n\nAverage Time per Bootstrap %f\n", (double)(t/((double)adef->multipleRuns))); |
---|
6705 | printBothOpen("All %d bootstrapped trees written to: %s\n", adef->multipleRuns, bootstrapFileName); |
---|
6706 | } |
---|
6707 | else |
---|
6708 | { |
---|
6709 | if(adef->multipleRuns > 1) |
---|
6710 | { |
---|
6711 | double avgLH = 0; |
---|
6712 | double bestLH = unlikely; |
---|
6713 | int i, bestI = 0; |
---|
6714 | |
---|
6715 | for(i = 0; i < adef->multipleRuns; i++) |
---|
6716 | { |
---|
6717 | avgLH += tr->likelihoods[i]; |
---|
6718 | if(tr->likelihoods[i] > bestLH) |
---|
6719 | { |
---|
6720 | bestLH = tr->likelihoods[i]; |
---|
6721 | bestI = i; |
---|
6722 | } |
---|
6723 | } |
---|
6724 | avgLH /= ((double)adef->multipleRuns); |
---|
6725 | |
---|
6726 | printBothOpen("\n\nOverall Time for %d Inferences %f\n", adef->multipleRuns, t); |
---|
6727 | printBothOpen("Average Time per Inference %f\n", (double)(t/((double)adef->multipleRuns))); |
---|
6728 | printBothOpen("Average Likelihood : %f\n", avgLH); |
---|
6729 | printBothOpen("\n"); |
---|
6730 | printBothOpen("Best Likelihood in run number %d: likelihood %f\n\n", bestI, bestLH); |
---|
6731 | |
---|
6732 | if(adef->checkpoints) |
---|
6733 | printBothOpen("Checkpoints written to: %s.RUN.%d.* to %d.*\n", checkpointFileName, 0, adef->multipleRuns - 1); |
---|
6734 | if(!adef->restart) |
---|
6735 | { |
---|
6736 | if(adef->randomStartingTree) |
---|
6737 | printBothOpen("Random starting trees written to: %s.RUN.%d to %d\n", randomFileName, 0, adef->multipleRuns - 1); |
---|
6738 | else |
---|
6739 | printBothOpen("Parsimony starting trees written to: %s.RUN.%d to %d\n", permFileName, 0, adef->multipleRuns - 1); |
---|
6740 | } |
---|
6741 | printBothOpen("Final trees written to: %s.RUN.%d to %d\n", resultFileName, 0, adef->multipleRuns - 1); |
---|
6742 | printBothOpen("Execution Log Files written to: %s.RUN.%d to %d\n", logFileName, 0, adef->multipleRuns - 1); |
---|
6743 | printBothOpen("Execution information file written to: %s\n", infoFileName); |
---|
6744 | } |
---|
6745 | else |
---|
6746 | { |
---|
6747 | printBothOpen("\n\nOverall Time for 1 Inference %f\n", t); |
---|
6748 | printBothOpen("Likelihood : %f\n", tr->likelihood); |
---|
6749 | printBothOpen("\n\n"); |
---|
6750 | |
---|
6751 | if(adef->checkpoints) |
---|
6752 | printBothOpen("Checkpoints written to: %s.*\n", checkpointFileName); |
---|
6753 | if(!adef->restart) |
---|
6754 | { |
---|
6755 | if(adef->randomStartingTree) |
---|
6756 | printBothOpen("Random starting tree written to: %s\n", randomFileName); |
---|
6757 | else |
---|
6758 | printBothOpen("Parsimony starting tree written to: %s\n", permFileName); |
---|
6759 | } |
---|
6760 | printBothOpen("Final tree written to: %s\n", resultFileName); |
---|
6761 | printBothOpen("Execution Log File written to: %s\n", logFileName); |
---|
6762 | printBothOpen("Execution information file written to: %s\n",infoFileName); |
---|
6763 | } |
---|
6764 | } |
---|
6765 | |
---|
6766 | break; |
---|
6767 | case CALC_BIPARTITIONS: |
---|
6768 | printBothOpen("\n\nTime for Computation of Bipartitions %f\n", t); |
---|
6769 | printBothOpen("Tree with bipartitions written to file: %s\n", bipartitionsFileName); |
---|
6770 | printBothOpen("Tree with bipartitions as branch labels written to file: %s\n", bipartitionsFileNameBranchLabels); |
---|
6771 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6772 | break; |
---|
6773 | case CALC_BIPARTITIONS_IC: |
---|
6774 | printBothOpen("\n\nTime for Computation of TC and IC scores %f\n", t); |
---|
6775 | printBothOpen("Tree with IC scores as branch labels written to file: %s\n", icFileNameBranchLabels); |
---|
6776 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6777 | break; |
---|
6778 | case PER_SITE_LL: |
---|
6779 | printBothOpen("\n\nTime for Optimization of per-site log likelihoods %f\n", t); |
---|
6780 | printBothOpen("Per-site Log Likelihoods written to File %s in Tree-Puzzle format\n", perSiteLLsFileName); |
---|
6781 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6782 | |
---|
6783 | break; |
---|
6784 | case PARSIMONY_ADDITION: |
---|
6785 | printBothOpen("\n\nTime for MP stepwise addition %f\n", t); |
---|
6786 | printBothOpen("Execution information file written to : %s\n",infoFileName); |
---|
6787 | printBothOpen("Complete parsimony tree written to: %s\n", permFileName); |
---|
6788 | break; |
---|
6789 | case ANCESTRAL_STATES: |
---|
6790 | printBothOpen("\n\nTime for marginal ancestral state computation: %f\n\n", t); |
---|
6791 | break; |
---|
6792 | case QUARTET_CALCULATION: |
---|
6793 | printBothOpen("\n\nOverall Time for quartet computation: %f\n\n", t); |
---|
6794 | break; |
---|
6795 | case THOROUGH_OPTIMIZATION: |
---|
6796 | printBothOpen("\n\nTime for thorough tree optimization: %f\n\n", t); |
---|
6797 | break; |
---|
6798 | case ROOT_TREE: |
---|
6799 | printBothOpen("\n\nTime for tree rooting: %f\n\n", t); |
---|
6800 | break; |
---|
6801 | default: |
---|
6802 | assert(0); |
---|
6803 | } |
---|
6804 | } |
---|
6805 | |
---|
6806 | } |
---|
6807 | |
---|
6808 | |
---|
6809 | /************************************************************************************/ |
---|
6810 | |
---|
6811 | |
---|
6812 | #ifdef _USE_PTHREADS |
---|
6813 | |
---|
6814 | |
---|
6815 | |
---|
6816 | |
---|
6817 | |
---|
6818 | |
---|
6819 | static void computeFraction(tree *localTree, int tid, int n) |
---|
6820 | { |
---|
6821 | int |
---|
6822 | model; |
---|
6823 | |
---|
6824 | size_t |
---|
6825 | i; |
---|
6826 | |
---|
6827 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
6828 | { |
---|
6829 | int width = 0; |
---|
6830 | |
---|
6831 | for(i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++) |
---|
6832 | if(i % (size_t)n == (size_t)tid) |
---|
6833 | width++; |
---|
6834 | |
---|
6835 | localTree->partitionData[model].width = width; |
---|
6836 | } |
---|
6837 | } |
---|
6838 | |
---|
6839 | |
---|
6840 | |
---|
6841 | static void threadFixModelIndices(tree *tr, tree *localTree, int tid, int n) |
---|
6842 | { |
---|
6843 | size_t |
---|
6844 | model, |
---|
6845 | j, |
---|
6846 | i, |
---|
6847 | globalCounter = 0, |
---|
6848 | localCounter = 0, |
---|
6849 | offset, |
---|
6850 | countOffset, |
---|
6851 | myLength = 0; |
---|
6852 | |
---|
6853 | for(model = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6854 | { |
---|
6855 | localTree->partitionData[model].lower = tr->partitionData[model].lower; |
---|
6856 | localTree->partitionData[model].upper = tr->partitionData[model].upper; |
---|
6857 | } |
---|
6858 | |
---|
6859 | computeFraction(localTree, tid, n); |
---|
6860 | |
---|
6861 | for(model = 0, offset = 0, countOffset = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6862 | { |
---|
6863 | localTree->partitionData[model].sumBuffer = &localTree->sumBuffer[offset]; |
---|
6864 | localTree->partitionData[model].perSiteLL = &localTree->perSiteLLPtr[countOffset]; |
---|
6865 | localTree->partitionData[model].wgt = &localTree->wgtPtr[countOffset]; |
---|
6866 | localTree->partitionData[model].invariant = &localTree->invariantPtr[countOffset]; |
---|
6867 | localTree->partitionData[model].rateCategory = &localTree->rateCategoryPtr[countOffset]; |
---|
6868 | |
---|
6869 | countOffset += localTree->partitionData[model].width; |
---|
6870 | |
---|
6871 | offset += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * (size_t)(localTree->partitionData[model].width); |
---|
6872 | } |
---|
6873 | |
---|
6874 | myLength = countOffset; |
---|
6875 | |
---|
6876 | |
---|
6877 | /* figure in data */ |
---|
6878 | |
---|
6879 | for(i = 0; i < (size_t)localTree->mxtips; i++) |
---|
6880 | { |
---|
6881 | for(model = 0, offset = 0, countOffset = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6882 | { |
---|
6883 | localTree->partitionData[model].yVector[i+1] = &localTree->y_ptr[i * myLength + countOffset]; |
---|
6884 | countOffset += localTree->partitionData[model].width; |
---|
6885 | } |
---|
6886 | assert(countOffset == myLength); |
---|
6887 | } |
---|
6888 | |
---|
6889 | |
---|
6890 | |
---|
6891 | for(model = 0, globalCounter = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6892 | { |
---|
6893 | for(localCounter = 0, i = (size_t)localTree->partitionData[model].lower; i < (size_t)localTree->partitionData[model].upper; i++) |
---|
6894 | { |
---|
6895 | if(i % (size_t)n == (size_t)tid) |
---|
6896 | { |
---|
6897 | localTree->partitionData[model].wgt[localCounter] = tr->cdta->aliaswgt[globalCounter]; |
---|
6898 | localTree->partitionData[model].invariant[localCounter] = tr->invariant[globalCounter]; |
---|
6899 | localTree->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[globalCounter]; |
---|
6900 | |
---|
6901 | for(j = 1; j <= (size_t)localTree->mxtips; j++) |
---|
6902 | localTree->partitionData[model].yVector[j][localCounter] = tr->yVector[j][globalCounter]; |
---|
6903 | |
---|
6904 | localCounter++; |
---|
6905 | } |
---|
6906 | globalCounter++; |
---|
6907 | } |
---|
6908 | } |
---|
6909 | |
---|
6910 | for(model = 0; model < (size_t)localTree->NumberOfModels; model++) |
---|
6911 | { |
---|
6912 | int |
---|
6913 | undetermined = getUndetermined(localTree->partitionData[model].dataType); |
---|
6914 | |
---|
6915 | size_t |
---|
6916 | width = localTree->partitionData[model].width; |
---|
6917 | |
---|
6918 | localTree->partitionData[model].gapVectorLength = ((int)width / 32) + 1; |
---|
6919 | |
---|
6920 | memset(localTree->partitionData[model].gapVector, 0, localTree->partitionData[model].initialGapVectorSize); |
---|
6921 | |
---|
6922 | for(j = 1; j <= (size_t)(localTree->mxtips); j++) |
---|
6923 | for(i = 0; i < width; i++) |
---|
6924 | if(localTree->partitionData[model].yVector[j][i] == undetermined) |
---|
6925 | localTree->partitionData[model].gapVector[localTree->partitionData[model].gapVectorLength * j + i / 32] |= mask32[i % 32]; |
---|
6926 | } |
---|
6927 | } |
---|
6928 | |
---|
6929 | |
---|
6930 | static void initPartition(tree *tr, tree *localTree, int tid) |
---|
6931 | { |
---|
6932 | int model; |
---|
6933 | |
---|
6934 | localTree->threadID = tid; |
---|
6935 | |
---|
6936 | if(tid > 0) |
---|
6937 | { |
---|
6938 | int totalLength = 0; |
---|
6939 | |
---|
6940 | localTree->useGammaMedian = tr->useGammaMedian; |
---|
6941 | localTree->saveMemory = tr->saveMemory; |
---|
6942 | localTree->innerNodes = tr->innerNodes; |
---|
6943 | localTree->useFastScaling = tr->useFastScaling; |
---|
6944 | localTree->perPartitionEPA = tr->perPartitionEPA; |
---|
6945 | localTree->maxCategories = tr->maxCategories; |
---|
6946 | |
---|
6947 | localTree->originalCrunchedLength = tr->originalCrunchedLength; |
---|
6948 | localTree->NumberOfModels = tr->NumberOfModels; |
---|
6949 | localTree->mxtips = tr->mxtips; |
---|
6950 | localTree->multiBranch = tr->multiBranch; |
---|
6951 | |
---|
6952 | localTree->nameList = tr->nameList; |
---|
6953 | localTree->numBranches = tr->numBranches; |
---|
6954 | localTree->lhs = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength); |
---|
6955 | localTree->executeModel = (boolean*)rax_malloc(sizeof(boolean) * localTree->NumberOfModels); |
---|
6956 | localTree->perPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6957 | localTree->storedPerPartitionLH = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6958 | |
---|
6959 | localTree->fracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6960 | localTree->rawFracchanges = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6961 | |
---|
6962 | localTree->partitionContributions = (double*)rax_malloc(sizeof(double) * localTree->NumberOfModels); |
---|
6963 | |
---|
6964 | localTree->partitionData = (pInfo*)rax_malloc(sizeof(pInfo) * localTree->NumberOfModels); |
---|
6965 | |
---|
6966 | /* extend for multi-branch */ |
---|
6967 | localTree->td[0].count = 0; |
---|
6968 | localTree->td[0].ti = (traversalInfo *)rax_malloc(sizeof(traversalInfo) * localTree->mxtips); |
---|
6969 | |
---|
6970 | localTree->cdta = (cruncheddata*)rax_malloc(sizeof(cruncheddata)); |
---|
6971 | localTree->cdta->patrat = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength); |
---|
6972 | localTree->cdta->patratStored = (double*)rax_malloc(sizeof(double) * localTree->originalCrunchedLength); |
---|
6973 | |
---|
6974 | localTree->discreteRateCategories = tr->discreteRateCategories; |
---|
6975 | |
---|
6976 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
6977 | { |
---|
6978 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
6979 | localTree->partitionData[model].states = tr->partitionData[model].states; |
---|
6980 | localTree->partitionData[model].maxTipStates = tr->partitionData[model].maxTipStates; |
---|
6981 | localTree->partitionData[model].dataType = tr->partitionData[model].dataType; |
---|
6982 | localTree->partitionData[model].protModels = tr->partitionData[model].protModels; |
---|
6983 | localTree->partitionData[model].usePredefinedProtFreqs = tr->partitionData[model].usePredefinedProtFreqs; |
---|
6984 | localTree->partitionData[model].mxtips = tr->partitionData[model].mxtips; |
---|
6985 | localTree->partitionData[model].lower = tr->partitionData[model].lower; |
---|
6986 | localTree->partitionData[model].upper = tr->partitionData[model].upper; |
---|
6987 | localTree->executeModel[model] = TRUE; |
---|
6988 | localTree->perPartitionLH[model] = 0.0; |
---|
6989 | localTree->storedPerPartitionLH[model] = 0.0; |
---|
6990 | totalLength += (localTree->partitionData[model].upper - localTree->partitionData[model].lower); |
---|
6991 | } |
---|
6992 | |
---|
6993 | assert(totalLength == localTree->originalCrunchedLength); |
---|
6994 | } |
---|
6995 | |
---|
6996 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
6997 | localTree->partitionData[model].width = 0; |
---|
6998 | } |
---|
6999 | |
---|
7000 | |
---|
7001 | static void allocNodex(tree *tr, int tid, int n) |
---|
7002 | { |
---|
7003 | size_t |
---|
7004 | model, |
---|
7005 | memoryRequirements = 0, |
---|
7006 | myLength = 0; |
---|
7007 | |
---|
7008 | computeFraction(tr, tid, n); |
---|
7009 | |
---|
7010 | allocPartitions(tr); |
---|
7011 | |
---|
7012 | |
---|
7013 | for(model = 0; model < (size_t)tr->NumberOfModels; model++) |
---|
7014 | { |
---|
7015 | size_t |
---|
7016 | width = tr->partitionData[model].width, |
---|
7017 | i; |
---|
7018 | |
---|
7019 | myLength += width; |
---|
7020 | |
---|
7021 | memoryRequirements += (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states) * width; |
---|
7022 | |
---|
7023 | tr->partitionData[model].gapVectorLength = ((int)width / 32) + 1; |
---|
7024 | |
---|
7025 | tr->partitionData[model].gapVector = (unsigned int*)rax_calloc(tr->partitionData[model].gapVectorLength * 2 * tr->mxtips, sizeof(unsigned int)); |
---|
7026 | |
---|
7027 | tr->partitionData[model].initialGapVectorSize = tr->partitionData[model].gapVectorLength * 2 * tr->mxtips * sizeof(int); |
---|
7028 | |
---|
7029 | /* always multiply by 4 due to frequent switching between CAT and GAMMA in standard RAxML */ |
---|
7030 | |
---|
7031 | tr->partitionData[model].gapColumn = (double *)rax_malloc( |
---|
7032 | ((size_t)(tr->innerNodes)) * |
---|
7033 | ((size_t)(4)) * |
---|
7034 | ((size_t)(tr->partitionData[model].states)) * |
---|
7035 | sizeof(double)); |
---|
7036 | for(i = 0; i < tr->innerNodes; i++) |
---|
7037 | { |
---|
7038 | tr->partitionData[model].xVector[i] = (double*)NULL; |
---|
7039 | tr->partitionData[model].expVector[i] = (int*)NULL; |
---|
7040 | } |
---|
7041 | } |
---|
7042 | |
---|
7043 | if(tid == 0) |
---|
7044 | { |
---|
7045 | tr->perSiteLL = (double *)rax_malloc((size_t)tr->cdta->endsite * sizeof(double)); |
---|
7046 | assert(tr->perSiteLL != NULL); |
---|
7047 | } |
---|
7048 | |
---|
7049 | tr->sumBuffer = (double *)rax_malloc(memoryRequirements * sizeof(double)); |
---|
7050 | assert(tr->sumBuffer != NULL); |
---|
7051 | |
---|
7052 | tr->y_ptr = (unsigned char *)rax_malloc(myLength * (size_t)(tr->mxtips) * sizeof(unsigned char)); |
---|
7053 | assert(tr->y_ptr != NULL); |
---|
7054 | |
---|
7055 | tr->perSiteLLPtr = (double*) rax_malloc(myLength * sizeof(double)); |
---|
7056 | assert(tr->perSiteLLPtr != NULL); |
---|
7057 | |
---|
7058 | tr->wgtPtr = (int*) rax_malloc(myLength * sizeof(int)); |
---|
7059 | assert(tr->wgtPtr != NULL); |
---|
7060 | |
---|
7061 | tr->invariantPtr = (int*) rax_malloc(myLength * sizeof(int)); |
---|
7062 | assert(tr->invariantPtr != NULL); |
---|
7063 | |
---|
7064 | tr->rateCategoryPtr = (int*) rax_malloc(myLength * sizeof(int)); |
---|
7065 | assert(tr->rateCategoryPtr != NULL); |
---|
7066 | } |
---|
7067 | |
---|
7068 | |
---|
7069 | |
---|
7070 | |
---|
7071 | |
---|
7072 | |
---|
7073 | inline static void sendTraversalInfo(tree *localTree, tree *tr) |
---|
7074 | { |
---|
7075 | localTree->td[0] = tr->td[0]; |
---|
7076 | } |
---|
7077 | |
---|
7078 | |
---|
7079 | static void collectDouble(double *dst, double *src, tree *tr, int n, int tid) |
---|
7080 | { |
---|
7081 | int |
---|
7082 | model; |
---|
7083 | |
---|
7084 | size_t |
---|
7085 | i; |
---|
7086 | |
---|
7087 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
7088 | { |
---|
7089 | for(i = tr->partitionData[model].lower; i < tr->partitionData[model].upper; i++) |
---|
7090 | { |
---|
7091 | if(i % (size_t)n == (size_t)tid) |
---|
7092 | dst[i] = src[i]; |
---|
7093 | } |
---|
7094 | } |
---|
7095 | } |
---|
7096 | |
---|
7097 | |
---|
7098 | static void broadcastPerSiteRates(tree *tr, tree *localTree) |
---|
7099 | { |
---|
7100 | int |
---|
7101 | i = 0, |
---|
7102 | model = 0; |
---|
7103 | |
---|
7104 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7105 | { |
---|
7106 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
7107 | |
---|
7108 | for(i = 0; i < localTree->partitionData[model].numberOfCategories; i++) |
---|
7109 | { |
---|
7110 | localTree->partitionData[model].perSiteRates[i] = tr->partitionData[model].perSiteRates[i]; |
---|
7111 | localTree->partitionData[model].unscaled_perSiteRates[i] = tr->partitionData[model].unscaled_perSiteRates[i]; |
---|
7112 | } |
---|
7113 | } |
---|
7114 | |
---|
7115 | } |
---|
7116 | |
---|
7117 | static void copyLG4(tree *localTree, tree *tr, int model, const partitionLengths *pl) |
---|
7118 | { |
---|
7119 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
7120 | { |
---|
7121 | int |
---|
7122 | k; |
---|
7123 | |
---|
7124 | for(k = 0; k < 4; k++) |
---|
7125 | { |
---|
7126 | memcpy(localTree->partitionData[model].EIGN_LG4[k], tr->partitionData[model].EIGN_LG4[k], pl->eignLength * sizeof(double)); |
---|
7127 | memcpy(localTree->partitionData[model].EV_LG4[k], tr->partitionData[model].EV_LG4[k], pl->evLength * sizeof(double)); |
---|
7128 | memcpy(localTree->partitionData[model].EI_LG4[k], tr->partitionData[model].EI_LG4[k], pl->eiLength * sizeof(double)); |
---|
7129 | memcpy(localTree->partitionData[model].substRates_LG4[k], tr->partitionData[model].substRates_LG4[k], pl->substRatesLength * sizeof(double)); |
---|
7130 | memcpy(localTree->partitionData[model].frequencies_LG4[k], tr->partitionData[model].frequencies_LG4[k], pl->frequenciesLength * sizeof(double)); |
---|
7131 | memcpy(localTree->partitionData[model].tipVector_LG4[k], tr->partitionData[model].tipVector_LG4[k], pl->tipVectorLength * sizeof(double)); |
---|
7132 | } |
---|
7133 | } |
---|
7134 | } |
---|
7135 | |
---|
7136 | static void execFunction(tree *tr, tree *localTree, int tid, int n) |
---|
7137 | { |
---|
7138 | double volatile result; |
---|
7139 | |
---|
7140 | size_t |
---|
7141 | i; |
---|
7142 | |
---|
7143 | int |
---|
7144 | currentJob, |
---|
7145 | model, |
---|
7146 | localCounter, |
---|
7147 | globalCounter; |
---|
7148 | |
---|
7149 | currentJob = threadJob >> 16; |
---|
7150 | |
---|
7151 | switch(currentJob) |
---|
7152 | { |
---|
7153 | case THREAD_INIT_PARTITION: |
---|
7154 | initPartition(tr, localTree, tid); |
---|
7155 | break; |
---|
7156 | case THREAD_ALLOC_LIKELIHOOD: |
---|
7157 | allocNodex(localTree, tid, n); |
---|
7158 | threadFixModelIndices(tr, localTree, tid, n); |
---|
7159 | break; |
---|
7160 | case THREAD_FIX_MODEL_INDICES: |
---|
7161 | threadFixModelIndices(tr, localTree, tid, n); |
---|
7162 | break; |
---|
7163 | case THREAD_EVALUATE: |
---|
7164 | sendTraversalInfo(localTree, tr); |
---|
7165 | result = evaluateIterative(localTree, FALSE); |
---|
7166 | |
---|
7167 | if(localTree->NumberOfModels > 1) |
---|
7168 | { |
---|
7169 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7170 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7171 | } |
---|
7172 | else |
---|
7173 | reductionBuffer[tid] = result; |
---|
7174 | |
---|
7175 | if(tid > 0) |
---|
7176 | { |
---|
7177 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7178 | localTree->executeModel[model] = TRUE; |
---|
7179 | } |
---|
7180 | break; |
---|
7181 | case THREAD_NEWVIEW_MASKED: |
---|
7182 | sendTraversalInfo(localTree, tr); |
---|
7183 | memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels); |
---|
7184 | newviewIterative(localTree); |
---|
7185 | if(tid > 0) |
---|
7186 | { |
---|
7187 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7188 | localTree->executeModel[model] = TRUE; |
---|
7189 | } |
---|
7190 | break; |
---|
7191 | case THREAD_NEWVIEW: |
---|
7192 | sendTraversalInfo(localTree, tr); |
---|
7193 | newviewIterative(localTree); |
---|
7194 | break; |
---|
7195 | case THREAD_MAKENEWZ_FIRST: |
---|
7196 | { |
---|
7197 | volatile double |
---|
7198 | dlnLdlz[NUM_BRANCHES], |
---|
7199 | d2lnLdlz2[NUM_BRANCHES]; |
---|
7200 | |
---|
7201 | sendTraversalInfo(localTree, tr); |
---|
7202 | if(tid > 0) |
---|
7203 | { |
---|
7204 | memcpy(localTree->coreLZ, tr->coreLZ, sizeof(double) * localTree->numBranches); |
---|
7205 | memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels); |
---|
7206 | } |
---|
7207 | |
---|
7208 | makenewzIterative(localTree); |
---|
7209 | execCore(localTree, dlnLdlz, d2lnLdlz2); |
---|
7210 | |
---|
7211 | if(!tr->multiBranch) |
---|
7212 | { |
---|
7213 | reductionBuffer[tid] = dlnLdlz[0]; |
---|
7214 | reductionBufferTwo[tid] = d2lnLdlz2[0]; |
---|
7215 | } |
---|
7216 | else |
---|
7217 | { |
---|
7218 | for(i = 0; i < (size_t)localTree->NumberOfModels; i++) |
---|
7219 | { |
---|
7220 | reductionBuffer[tid * localTree->NumberOfModels + i] = dlnLdlz[i]; |
---|
7221 | reductionBufferTwo[tid * localTree->NumberOfModels + i] = d2lnLdlz2[i]; |
---|
7222 | } |
---|
7223 | } |
---|
7224 | |
---|
7225 | if(tid > 0) |
---|
7226 | { |
---|
7227 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7228 | localTree->executeModel[model] = TRUE; |
---|
7229 | } |
---|
7230 | } |
---|
7231 | break; |
---|
7232 | case THREAD_MAKENEWZ: |
---|
7233 | { |
---|
7234 | volatile double |
---|
7235 | dlnLdlz[NUM_BRANCHES], |
---|
7236 | d2lnLdlz2[NUM_BRANCHES]; |
---|
7237 | |
---|
7238 | memcpy(localTree->coreLZ, tr->coreLZ, sizeof(double) * localTree->numBranches); |
---|
7239 | memcpy(localTree->executeModel, tr->executeModel, sizeof(boolean) * localTree->NumberOfModels); |
---|
7240 | |
---|
7241 | execCore(localTree, dlnLdlz, d2lnLdlz2); |
---|
7242 | |
---|
7243 | if(!tr->multiBranch) |
---|
7244 | { |
---|
7245 | reductionBuffer[tid] = dlnLdlz[0]; |
---|
7246 | reductionBufferTwo[tid] = d2lnLdlz2[0]; |
---|
7247 | } |
---|
7248 | else |
---|
7249 | { |
---|
7250 | for(i = 0; i < (size_t)localTree->NumberOfModels; i++) |
---|
7251 | { |
---|
7252 | reductionBuffer[tid * localTree->NumberOfModels + i] = dlnLdlz[i]; |
---|
7253 | reductionBufferTwo[tid * localTree->NumberOfModels + i] = d2lnLdlz2[i]; |
---|
7254 | } |
---|
7255 | } |
---|
7256 | if(tid > 0) |
---|
7257 | { |
---|
7258 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7259 | localTree->executeModel[model] = TRUE; |
---|
7260 | } |
---|
7261 | } |
---|
7262 | break; |
---|
7263 | case THREAD_COPY_RATES: |
---|
7264 | if(tid > 0) |
---|
7265 | { |
---|
7266 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7267 | { |
---|
7268 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7269 | |
---|
7270 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7271 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7272 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7273 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7274 | |
---|
7275 | copyLG4(localTree, tr, model, pl); |
---|
7276 | } |
---|
7277 | } |
---|
7278 | break; |
---|
7279 | case THREAD_OPT_RATE: |
---|
7280 | if(tid > 0) |
---|
7281 | { |
---|
7282 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
7283 | |
---|
7284 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7285 | { |
---|
7286 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7287 | |
---|
7288 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7289 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7290 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7291 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7292 | |
---|
7293 | copyLG4(localTree, tr, model, pl); |
---|
7294 | } |
---|
7295 | } |
---|
7296 | |
---|
7297 | result = evaluateIterative(localTree, FALSE); |
---|
7298 | |
---|
7299 | |
---|
7300 | if(localTree->NumberOfModels > 1) |
---|
7301 | { |
---|
7302 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7303 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7304 | } |
---|
7305 | else |
---|
7306 | reductionBuffer[tid] = result; |
---|
7307 | |
---|
7308 | |
---|
7309 | if(tid > 0) |
---|
7310 | { |
---|
7311 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7312 | localTree->executeModel[model] = TRUE; |
---|
7313 | } |
---|
7314 | break; |
---|
7315 | case THREAD_COPY_INVAR: |
---|
7316 | if(tid > 0) |
---|
7317 | { |
---|
7318 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7319 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7320 | } |
---|
7321 | break; |
---|
7322 | case THREAD_OPT_INVAR: |
---|
7323 | if(tid > 0) |
---|
7324 | { |
---|
7325 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
7326 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7327 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7328 | } |
---|
7329 | |
---|
7330 | result = evaluateIterative(localTree, FALSE); |
---|
7331 | |
---|
7332 | if(localTree->NumberOfModels > 1) |
---|
7333 | { |
---|
7334 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7335 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7336 | } |
---|
7337 | else |
---|
7338 | reductionBuffer[tid] = result; |
---|
7339 | |
---|
7340 | if(tid > 0) |
---|
7341 | { |
---|
7342 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7343 | localTree->executeModel[model] = TRUE; |
---|
7344 | } |
---|
7345 | break; |
---|
7346 | case THREAD_COPY_ALPHA: |
---|
7347 | if(tid > 0) |
---|
7348 | { |
---|
7349 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7350 | { |
---|
7351 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7352 | localTree->partitionData[model].alpha = tr->partitionData[model].alpha; |
---|
7353 | } |
---|
7354 | } |
---|
7355 | break; |
---|
7356 | case THREAD_OPT_ALPHA: |
---|
7357 | if(tid > 0) |
---|
7358 | { |
---|
7359 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
7360 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7361 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7362 | } |
---|
7363 | |
---|
7364 | result = evaluateIterative(localTree, FALSE); |
---|
7365 | |
---|
7366 | |
---|
7367 | if(localTree->NumberOfModels > 1) |
---|
7368 | { |
---|
7369 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7370 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7371 | } |
---|
7372 | else |
---|
7373 | reductionBuffer[tid] = result; |
---|
7374 | |
---|
7375 | if(tid > 0) |
---|
7376 | { |
---|
7377 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7378 | localTree->executeModel[model] = TRUE; |
---|
7379 | } |
---|
7380 | break; |
---|
7381 | case THREAD_RESET_MODEL: |
---|
7382 | if(tid > 0) |
---|
7383 | { |
---|
7384 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7385 | { |
---|
7386 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7387 | |
---|
7388 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7389 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7390 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7391 | memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double)); |
---|
7392 | memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double)); |
---|
7393 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7394 | |
---|
7395 | copyLG4(localTree, tr, model, pl); |
---|
7396 | |
---|
7397 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7398 | localTree->partitionData[model].alpha = tr->partitionData[model].alpha; |
---|
7399 | localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler; |
---|
7400 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7401 | } |
---|
7402 | } |
---|
7403 | break; |
---|
7404 | case THREAD_COPY_INIT_MODEL: |
---|
7405 | if(tid > 0) |
---|
7406 | { |
---|
7407 | localTree->rateHetModel = tr->rateHetModel; |
---|
7408 | |
---|
7409 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7410 | { |
---|
7411 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7412 | |
---|
7413 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7414 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7415 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7416 | memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double)); |
---|
7417 | memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double)); |
---|
7418 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7419 | |
---|
7420 | copyLG4(localTree, tr, model, pl); |
---|
7421 | |
---|
7422 | memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4); |
---|
7423 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
7424 | localTree->partitionData[model].alpha = tr->partitionData[model].alpha; |
---|
7425 | localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler; |
---|
7426 | localTree->partitionData[model].propInvariant = tr->partitionData[model].propInvariant; |
---|
7427 | localTree->partitionData[model].lower = tr->partitionData[model].lower; |
---|
7428 | localTree->partitionData[model].upper = tr->partitionData[model].upper; |
---|
7429 | |
---|
7430 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
7431 | } |
---|
7432 | |
---|
7433 | memcpy(localTree->cdta->patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double)); |
---|
7434 | memcpy(localTree->cdta->patratStored, tr->cdta->patratStored, localTree->originalCrunchedLength * sizeof(double)); |
---|
7435 | } |
---|
7436 | |
---|
7437 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7438 | { |
---|
7439 | int |
---|
7440 | localIndex; |
---|
7441 | |
---|
7442 | for(i = localTree->partitionData[model].lower, localIndex = 0; i < localTree->partitionData[model].upper; i++) |
---|
7443 | if(i % (size_t)n == (size_t)tid) |
---|
7444 | { |
---|
7445 | localTree->partitionData[model].wgt[localIndex] = tr->cdta->aliaswgt[i]; |
---|
7446 | localTree->partitionData[model].invariant[localIndex] = tr->invariant[i]; |
---|
7447 | |
---|
7448 | localIndex++; |
---|
7449 | } |
---|
7450 | } |
---|
7451 | break; |
---|
7452 | case THREAD_RATE_CATS: |
---|
7453 | sendTraversalInfo(localTree, tr); |
---|
7454 | if(tid > 0) |
---|
7455 | { |
---|
7456 | localTree->lower_spacing = tr->lower_spacing; |
---|
7457 | localTree->upper_spacing = tr->upper_spacing; |
---|
7458 | } |
---|
7459 | |
---|
7460 | optRateCatPthreads(localTree, localTree->lower_spacing, localTree->upper_spacing, localTree->lhs, n, tid); |
---|
7461 | |
---|
7462 | if(tid > 0) |
---|
7463 | { |
---|
7464 | collectDouble(tr->cdta->patrat, localTree->cdta->patrat, localTree, n, tid); |
---|
7465 | collectDouble(tr->cdta->patratStored, localTree->cdta->patratStored, localTree, n, tid); |
---|
7466 | collectDouble(tr->lhs, localTree->lhs, localTree, n, tid); |
---|
7467 | } |
---|
7468 | break; |
---|
7469 | case THREAD_COPY_RATE_CATS: |
---|
7470 | if(tid > 0) |
---|
7471 | { |
---|
7472 | memcpy(localTree->cdta->patrat, tr->cdta->patrat, localTree->originalCrunchedLength * sizeof(double)); |
---|
7473 | memcpy(localTree->cdta->patratStored, tr->cdta->patratStored, localTree->originalCrunchedLength * sizeof(double)); |
---|
7474 | broadcastPerSiteRates(tr, localTree); |
---|
7475 | } |
---|
7476 | |
---|
7477 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7478 | { |
---|
7479 | localTree->partitionData[model].numberOfCategories = tr->partitionData[model].numberOfCategories; |
---|
7480 | |
---|
7481 | for(localCounter = 0, i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++) |
---|
7482 | { |
---|
7483 | if(i % (size_t)n == (size_t)tid) |
---|
7484 | { |
---|
7485 | localTree->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[i]; |
---|
7486 | localCounter++; |
---|
7487 | } |
---|
7488 | } |
---|
7489 | } |
---|
7490 | break; |
---|
7491 | case THREAD_CAT_TO_GAMMA: |
---|
7492 | if(tid > 0) |
---|
7493 | localTree->rateHetModel = tr->rateHetModel; |
---|
7494 | break; |
---|
7495 | case THREAD_GAMMA_TO_CAT: |
---|
7496 | if(tid > 0) |
---|
7497 | localTree->rateHetModel = tr->rateHetModel; |
---|
7498 | break; |
---|
7499 | case THREAD_EVALUATE_VECTOR: |
---|
7500 | sendTraversalInfo(localTree, tr); |
---|
7501 | result = evaluateIterative(localTree, TRUE); |
---|
7502 | |
---|
7503 | if(localTree->NumberOfModels > 1) |
---|
7504 | { |
---|
7505 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7506 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
7507 | } |
---|
7508 | else |
---|
7509 | reductionBuffer[tid] = result; |
---|
7510 | |
---|
7511 | if(tid > 0) |
---|
7512 | { |
---|
7513 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7514 | localTree->executeModel[model] = TRUE; |
---|
7515 | } |
---|
7516 | |
---|
7517 | for(model = 0, globalCounter = 0; model < localTree->NumberOfModels; model++) |
---|
7518 | { |
---|
7519 | for(localCounter = 0, i = localTree->partitionData[model].lower; i < localTree->partitionData[model].upper; i++) |
---|
7520 | { |
---|
7521 | if(i % (size_t)n == (size_t)tid) |
---|
7522 | { |
---|
7523 | tr->perSiteLL[globalCounter] = localTree->partitionData[model].perSiteLL[localCounter]; |
---|
7524 | localCounter++; |
---|
7525 | } |
---|
7526 | globalCounter++; |
---|
7527 | } |
---|
7528 | } |
---|
7529 | break; |
---|
7530 | case THREAD_COPY_PARAMS: |
---|
7531 | if(tid > 0) |
---|
7532 | { |
---|
7533 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7534 | { |
---|
7535 | const partitionLengths *pl = getPartitionLengths(&(tr->partitionData[model])); |
---|
7536 | |
---|
7537 | memcpy(localTree->partitionData[model].EIGN, tr->partitionData[model].EIGN, pl->eignLength * sizeof(double)); |
---|
7538 | memcpy(localTree->partitionData[model].EV, tr->partitionData[model].EV, pl->evLength * sizeof(double)); |
---|
7539 | memcpy(localTree->partitionData[model].EI, tr->partitionData[model].EI, pl->eiLength * sizeof(double)); |
---|
7540 | memcpy(localTree->partitionData[model].substRates, tr->partitionData[model].substRates, pl->substRatesLength * sizeof(double)); |
---|
7541 | memcpy(localTree->partitionData[model].frequencies, tr->partitionData[model].frequencies, pl->frequenciesLength * sizeof(double)); |
---|
7542 | memcpy(localTree->partitionData[model].tipVector, tr->partitionData[model].tipVector, pl->tipVectorLength * sizeof(double)); |
---|
7543 | |
---|
7544 | copyLG4(localTree, tr, model, pl); |
---|
7545 | |
---|
7546 | } |
---|
7547 | } |
---|
7548 | break; |
---|
7549 | case THREAD_INIT_EPA: |
---|
7550 | if(tid > 0) |
---|
7551 | { |
---|
7552 | localTree->leftRootNode = tr->leftRootNode; |
---|
7553 | localTree->rightRootNode = tr->rightRootNode; |
---|
7554 | localTree->wasRooted = tr->wasRooted; |
---|
7555 | localTree->bInf = tr->bInf; |
---|
7556 | localTree->numberOfBranches = tr->numberOfBranches; |
---|
7557 | localTree->contiguousVectorLength = tr->contiguousVectorLength; |
---|
7558 | localTree->contiguousScalingLength = tr->contiguousScalingLength; |
---|
7559 | localTree->inserts = tr->inserts; |
---|
7560 | localTree->numberOfTipsForInsertion = tr->numberOfTipsForInsertion; |
---|
7561 | localTree->fracchange = tr->fracchange; |
---|
7562 | localTree->rawFracchange = tr->rawFracchange; |
---|
7563 | |
---|
7564 | memcpy(localTree->partitionContributions, tr->partitionContributions, sizeof(double) * localTree->NumberOfModels); |
---|
7565 | |
---|
7566 | memcpy(localTree->fracchanges, tr->fracchanges, sizeof(double) * localTree->NumberOfModels); |
---|
7567 | |
---|
7568 | memcpy(localTree->rawFracchanges, tr->rawFracchanges, sizeof(double) * localTree->NumberOfModels); |
---|
7569 | |
---|
7570 | |
---|
7571 | if(localTree->perPartitionEPA) |
---|
7572 | { |
---|
7573 | localTree->readPartition = (int *)rax_malloc(sizeof(int) * (size_t)localTree->numberOfTipsForInsertion); |
---|
7574 | memcpy(localTree->readPartition, tr->readPartition, sizeof(int) * (size_t)localTree->numberOfTipsForInsertion); |
---|
7575 | } |
---|
7576 | |
---|
7577 | } |
---|
7578 | |
---|
7579 | localTree->temporarySumBuffer = (double *)rax_malloc(sizeof(double) * localTree->contiguousVectorLength); |
---|
7580 | localTree->temporaryVector = (double *)rax_malloc(sizeof(double) * localTree->contiguousVectorLength); |
---|
7581 | |
---|
7582 | localTree->temporaryScaling = (int *)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7583 | |
---|
7584 | |
---|
7585 | localTree->contiguousWgt = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7586 | localTree->contiguousInvariant = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7587 | |
---|
7588 | |
---|
7589 | memcpy(localTree->contiguousWgt , tr->cdta->aliaswgt, sizeof(int) * localTree->contiguousScalingLength); |
---|
7590 | memcpy(localTree->contiguousInvariant , tr->invariant, sizeof(int) * localTree->contiguousScalingLength); |
---|
7591 | |
---|
7592 | if(tid > 0) |
---|
7593 | broadcastPerSiteRates(tr, localTree); |
---|
7594 | |
---|
7595 | |
---|
7596 | localTree->contiguousRateCategory = (int*)rax_malloc(sizeof(int) * localTree->contiguousScalingLength); |
---|
7597 | |
---|
7598 | |
---|
7599 | memcpy(localTree->contiguousRateCategory, tr->cdta->rateCategory, sizeof(int) * localTree->contiguousScalingLength); |
---|
7600 | |
---|
7601 | localTree->contiguousTips = tr->yVector; |
---|
7602 | |
---|
7603 | break; |
---|
7604 | case THREAD_GATHER_LIKELIHOOD: |
---|
7605 | { |
---|
7606 | int |
---|
7607 | branchCounter = tr->branchCounter; |
---|
7608 | |
---|
7609 | double |
---|
7610 | *leftContigousVector = localTree->bInf[branchCounter].epa->left, |
---|
7611 | *rightContigousVector = localTree->bInf[branchCounter].epa->right; |
---|
7612 | |
---|
7613 | int |
---|
7614 | *leftContigousScalingVector = localTree->bInf[branchCounter].epa->leftScaling, |
---|
7615 | *rightContigousScalingVector = localTree->bInf[branchCounter].epa->rightScaling, |
---|
7616 | rightNumber = localTree->bInf[branchCounter].epa->rightNodeNumber, |
---|
7617 | leftNumber = localTree->bInf[branchCounter].epa->leftNodeNumber; |
---|
7618 | |
---|
7619 | size_t |
---|
7620 | globalColumnCount = 0, |
---|
7621 | globalCount = 0; |
---|
7622 | |
---|
7623 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7624 | { |
---|
7625 | size_t |
---|
7626 | blockRequirements; |
---|
7627 | |
---|
7628 | double |
---|
7629 | *leftStridedVector = (double *)NULL, |
---|
7630 | *rightStridedVector = (double *)NULL; |
---|
7631 | |
---|
7632 | int |
---|
7633 | *leftStridedScalingVector = (int *)NULL, |
---|
7634 | *rightStridedScalingVector = (int *)NULL; |
---|
7635 | |
---|
7636 | size_t |
---|
7637 | localColumnCount = 0, |
---|
7638 | localCount = 0; |
---|
7639 | |
---|
7640 | if(!isTip(leftNumber, localTree->mxtips)) |
---|
7641 | { |
---|
7642 | leftStridedVector = localTree->partitionData[model].xVector[leftNumber - localTree->mxtips - 1]; |
---|
7643 | leftStridedScalingVector = localTree->partitionData[model].expVector[leftNumber - localTree->mxtips - 1]; |
---|
7644 | } |
---|
7645 | |
---|
7646 | if(!isTip(rightNumber, localTree->mxtips)) |
---|
7647 | { |
---|
7648 | rightStridedVector = localTree->partitionData[model].xVector[rightNumber - localTree->mxtips - 1]; |
---|
7649 | rightStridedScalingVector = localTree->partitionData[model].expVector[rightNumber - localTree->mxtips - 1]; |
---|
7650 | } |
---|
7651 | |
---|
7652 | assert(!(isTip(leftNumber, localTree->mxtips) && isTip(rightNumber, localTree->mxtips))); |
---|
7653 | |
---|
7654 | blockRequirements = (size_t)(tr->discreteRateCategories) * (size_t)(tr->partitionData[model].states); |
---|
7655 | |
---|
7656 | for(globalColumnCount = localTree->partitionData[model].lower; globalColumnCount < localTree->partitionData[model].upper; globalColumnCount++) |
---|
7657 | { |
---|
7658 | if(globalColumnCount % (size_t)n == (size_t)tid) |
---|
7659 | { |
---|
7660 | if(leftStridedVector) |
---|
7661 | { |
---|
7662 | memcpy(&leftContigousVector[globalCount], &leftStridedVector[localCount], sizeof(double) * blockRequirements); |
---|
7663 | leftContigousScalingVector[globalColumnCount] = leftStridedScalingVector[localColumnCount]; |
---|
7664 | } |
---|
7665 | |
---|
7666 | if(rightStridedVector) |
---|
7667 | { |
---|
7668 | memcpy(&rightContigousVector[globalCount], &rightStridedVector[localCount], sizeof(double) * blockRequirements); |
---|
7669 | rightContigousScalingVector[globalColumnCount] = rightStridedScalingVector[localColumnCount]; |
---|
7670 | } |
---|
7671 | |
---|
7672 | localColumnCount++; |
---|
7673 | localCount += blockRequirements; |
---|
7674 | } |
---|
7675 | |
---|
7676 | |
---|
7677 | |
---|
7678 | globalCount += blockRequirements; |
---|
7679 | } |
---|
7680 | |
---|
7681 | assert(localColumnCount == localTree->partitionData[model].width); |
---|
7682 | assert(localCount == (localTree->partitionData[model].width * (int)blockRequirements)); |
---|
7683 | |
---|
7684 | } |
---|
7685 | } |
---|
7686 | break; |
---|
7687 | case THREAD_INSERT_CLASSIFY: |
---|
7688 | case THREAD_INSERT_CLASSIFY_THOROUGH: |
---|
7689 | { |
---|
7690 | int |
---|
7691 | branchNumber; |
---|
7692 | |
---|
7693 | boolean |
---|
7694 | done = FALSE; |
---|
7695 | |
---|
7696 | while(!done) |
---|
7697 | { |
---|
7698 | pthread_mutex_lock(&mutex); |
---|
7699 | |
---|
7700 | if(NumberOfJobs == 0) |
---|
7701 | done = TRUE; |
---|
7702 | else |
---|
7703 | { |
---|
7704 | branchNumber = localTree->numberOfBranches - NumberOfJobs; |
---|
7705 | NumberOfJobs--; |
---|
7706 | } |
---|
7707 | |
---|
7708 | pthread_mutex_unlock(&mutex); |
---|
7709 | |
---|
7710 | if(!done) |
---|
7711 | { |
---|
7712 | switch(currentJob) |
---|
7713 | { |
---|
7714 | case THREAD_INSERT_CLASSIFY: |
---|
7715 | addTraverseRobIterative(localTree, branchNumber); |
---|
7716 | break; |
---|
7717 | case THREAD_INSERT_CLASSIFY_THOROUGH: |
---|
7718 | testInsertThoroughIterative(localTree, branchNumber); |
---|
7719 | break; |
---|
7720 | default: |
---|
7721 | assert(0); |
---|
7722 | } |
---|
7723 | |
---|
7724 | } |
---|
7725 | } |
---|
7726 | } |
---|
7727 | break; |
---|
7728 | case THREAD_PREPARE_BIPS_FOR_PRINT: |
---|
7729 | { |
---|
7730 | int |
---|
7731 | i = 0, |
---|
7732 | j = 0; |
---|
7733 | |
---|
7734 | boolean |
---|
7735 | done = FALSE; |
---|
7736 | |
---|
7737 | while(!done) |
---|
7738 | { |
---|
7739 | pthread_mutex_lock(&mutex); |
---|
7740 | |
---|
7741 | if(NumberOfJobs == 0) |
---|
7742 | done = TRUE; |
---|
7743 | else |
---|
7744 | { |
---|
7745 | i = tr->consensusBipLen - NumberOfJobs; |
---|
7746 | NumberOfJobs--; |
---|
7747 | } |
---|
7748 | |
---|
7749 | pthread_mutex_unlock(&mutex); |
---|
7750 | |
---|
7751 | if( ! done) |
---|
7752 | { |
---|
7753 | entry |
---|
7754 | *bipA = tr->consensusBips[i] ; |
---|
7755 | |
---|
7756 | unsigned int |
---|
7757 | firstIndex = 0; |
---|
7758 | |
---|
7759 | while(firstIndex < tr->bitVectorLength && bipA->bitVector[firstIndex] == 0 ) |
---|
7760 | firstIndex++; |
---|
7761 | |
---|
7762 | |
---|
7763 | for(j = i + 1; j < tr->consensusBipLen; j++) |
---|
7764 | { |
---|
7765 | entry |
---|
7766 | *bipB = tr->consensusBips[j]; |
---|
7767 | |
---|
7768 | if(bipA->amountTips < bipB->amountTips && |
---|
7769 | issubset(bipA->bitVector, bipB->bitVector, tr->bitVectorLength, firstIndex)) |
---|
7770 | { |
---|
7771 | /* i is child of j */ |
---|
7772 | List |
---|
7773 | *elem = (List*) rax_malloc(sizeof(List)); |
---|
7774 | |
---|
7775 | elem->value = rax_calloc(1, sizeof(int)); |
---|
7776 | |
---|
7777 | *(int*)elem->value = i; |
---|
7778 | |
---|
7779 | pthread_mutex_lock(tr->mutexesForHashing[j]); /* LOCKED */ |
---|
7780 | |
---|
7781 | tr->hasAncestor[i] = TRUE; |
---|
7782 | |
---|
7783 | elem->next = tr->listOfDirectChildren[j]; |
---|
7784 | tr->listOfDirectChildren[j] = elem; |
---|
7785 | |
---|
7786 | pthread_mutex_unlock(tr->mutexesForHashing[j]); /* UNLOCKED */ |
---|
7787 | |
---|
7788 | break; /* each node has only 1 parent -> nothing more to do */ |
---|
7789 | } |
---|
7790 | } |
---|
7791 | } |
---|
7792 | } |
---|
7793 | } |
---|
7794 | break; |
---|
7795 | case THREAD_MRE_COMPUTE: |
---|
7796 | { |
---|
7797 | if(tid > 0) |
---|
7798 | { |
---|
7799 | /* worker threads */ |
---|
7800 | boolean done = FALSE; |
---|
7801 | int localEntryCount = (int) tr->h->entryCount; /* problem? */ |
---|
7802 | while(!done ) |
---|
7803 | { |
---|
7804 | int acquiredJobs = 0; |
---|
7805 | int jobId = -1; |
---|
7806 | |
---|
7807 | /* get new job */ |
---|
7808 | |
---|
7809 | pthread_mutex_lock(&mutex) ; /* START LOCK */ |
---|
7810 | |
---|
7811 | if( NumberOfJobs == 0 ) |
---|
7812 | { |
---|
7813 | /* finish */ |
---|
7814 | done = TRUE; |
---|
7815 | } |
---|
7816 | else |
---|
7817 | if( localEntryCount - NumberOfJobs + tr->recommendedAmountJobs < tr->sectionEnd) |
---|
7818 | { |
---|
7819 | /* try to acquire the recommended amount of jobs */ |
---|
7820 | jobId = localEntryCount - NumberOfJobs; |
---|
7821 | acquiredJobs = tr->recommendedAmountJobs; |
---|
7822 | NumberOfJobs -= acquiredJobs; |
---|
7823 | } |
---|
7824 | else |
---|
7825 | if( localEntryCount - NumberOfJobs < (signed int)tr->sectionEnd) |
---|
7826 | { |
---|
7827 | /* at least get one job */ |
---|
7828 | jobId = tr->h->entryCount - NumberOfJobs; |
---|
7829 | acquiredJobs = 1; |
---|
7830 | NumberOfJobs--; |
---|
7831 | } |
---|
7832 | |
---|
7833 | pthread_mutex_unlock(&mutex); /* END LOCK */ |
---|
7834 | |
---|
7835 | if(*(tr->len) >= tr->maxBips) |
---|
7836 | break; |
---|
7837 | |
---|
7838 | /* check all */ |
---|
7839 | while(acquiredJobs > 0) |
---|
7840 | { |
---|
7841 | boolean |
---|
7842 | compatflag = TRUE; |
---|
7843 | |
---|
7844 | entry |
---|
7845 | *currentEntry = tr->sbw[jobId]; |
---|
7846 | |
---|
7847 | int k; |
---|
7848 | |
---|
7849 | if(!((unsigned int)tr->mr_thresh < currentEntry->supportFromTreeset[0])) |
---|
7850 | { |
---|
7851 | for(k = *(tr->len); k > 0; k--) |
---|
7852 | { |
---|
7853 | if(! compatible(tr->sbi[k-1], currentEntry, tr->bitVectorLength)) |
---|
7854 | { |
---|
7855 | compatflag = FALSE; |
---|
7856 | break; |
---|
7857 | } |
---|
7858 | } |
---|
7859 | } |
---|
7860 | if(compatflag) |
---|
7861 | tr->bipStatus[jobId - tr->sectionEnd + tr->bipStatusLen] = MRE_POSSIBLE_CANDIDATE; /* ready to check */ |
---|
7862 | else |
---|
7863 | tr->bipStatus[jobId - tr->sectionEnd + tr->bipStatusLen] = MRE_EXCLUDED; /* can be omitted */ |
---|
7864 | |
---|
7865 | acquiredJobs--; |
---|
7866 | jobId++; |
---|
7867 | } |
---|
7868 | } |
---|
7869 | } |
---|
7870 | else |
---|
7871 | /* master thread */ |
---|
7872 | { |
---|
7873 | /* check in a looping manner, if bipartitions could be added */ |
---|
7874 | |
---|
7875 | int |
---|
7876 | highestToCheck, |
---|
7877 | tmpCounter = 0; |
---|
7878 | |
---|
7879 | double |
---|
7880 | density = 0.0; |
---|
7881 | |
---|
7882 | while(TRUE) |
---|
7883 | { |
---|
7884 | /* get highest bip to check */ |
---|
7885 | highestToCheck = 0; |
---|
7886 | while(highestToCheck < tr->bipStatusLen) |
---|
7887 | { |
---|
7888 | /* waits busily as long as there is nothing to do */ |
---|
7889 | /* printf("%d is highest to check\n", highestToCheck); */ |
---|
7890 | if( ! tr->bipStatus[highestToCheck] ) |
---|
7891 | highestToCheck = 0; |
---|
7892 | else |
---|
7893 | if(tr->bipStatus[highestToCheck] == MRE_POSSIBLE_CANDIDATE) |
---|
7894 | break; |
---|
7895 | else |
---|
7896 | highestToCheck++; |
---|
7897 | } |
---|
7898 | |
---|
7899 | /* try to finish */ |
---|
7900 | if( tmpCounter >= tr->maxBips || |
---|
7901 | (highestToCheck == tr->bipStatusLen /* end of buffer that is examined */ |
---|
7902 | && (unsigned int)tr->sectionEnd == tr->h->entryCount /* the end of the buffer is also the hashtable */ |
---|
7903 | && tr->bipStatus[highestToCheck-1] > MRE_POSSIBLE_CANDIDATE)) |
---|
7904 | { |
---|
7905 | /* the last entry in buffer was already processed */ |
---|
7906 | *(tr->len) = tmpCounter; /* for the workers to finish */ |
---|
7907 | break; /* master says goodbye */ |
---|
7908 | } |
---|
7909 | |
---|
7910 | /* reset section (resp. the buffer to be checked) */ |
---|
7911 | else |
---|
7912 | if( highestToCheck == tr->bipStatusLen) |
---|
7913 | { |
---|
7914 | int |
---|
7915 | newSectionEnd, |
---|
7916 | min, |
---|
7917 | max; |
---|
7918 | |
---|
7919 | *(tr->len) = tmpCounter; /* reset counter for workers */ |
---|
7920 | tr->entriesOfSection = &(tr->sbw[tr->sectionEnd ]); |
---|
7921 | |
---|
7922 | /* find new section end: tries to find a new window |
---|
7923 | size (and resp. sectionEnd) s.t. the expected |
---|
7924 | amount of work for master and workers is the same. |
---|
7925 | */ |
---|
7926 | density /= tr->bipStatusLen; |
---|
7927 | |
---|
7928 | /* I am not entirely sure, if this makes the code really incredible faster... */ |
---|
7929 | max = 5 * (NumberOfThreads-1); |
---|
7930 | min = 1; |
---|
7931 | tr->recommendedAmountJobs = (int)(max + (min - max) * density); /* recommend an amount of jobs to be calculate per thread between min and max */ |
---|
7932 | |
---|
7933 | if(density) |
---|
7934 | { |
---|
7935 | int |
---|
7936 | tmp = MAX((2 * tmpCounter * SECTION_CONSTANT / (NumberOfThreads * density)), /* the above discussed formula */ |
---|
7937 | NumberOfThreads * MRE_MIN_AMOUNT_JOBS_PER_THREAD ); /* we need at least a bit work */ |
---|
7938 | newSectionEnd = MIN(tr->sectionEnd + tmp, (int)(tr->h->entryCount)); |
---|
7939 | } |
---|
7940 | else |
---|
7941 | newSectionEnd = tr->h->entryCount; |
---|
7942 | |
---|
7943 | density = 0.0; |
---|
7944 | |
---|
7945 | tr->bipStatusLen = newSectionEnd - tr->sectionEnd; |
---|
7946 | rax_free(tr->bipStatus); |
---|
7947 | /* printf("%d\n" ,tr->bipStatusLen); */ |
---|
7948 | tr->bipStatus = (int*)rax_calloc(tr->bipStatusLen, sizeof(int)); |
---|
7949 | tr->sectionEnd = newSectionEnd; |
---|
7950 | continue; |
---|
7951 | } |
---|
7952 | |
---|
7953 | assert( tr->bipStatus[highestToCheck] == MRE_POSSIBLE_CANDIDATE); |
---|
7954 | |
---|
7955 | for(i = highestToCheck; i > 0; i--) /* checking new bip */ |
---|
7956 | { |
---|
7957 | assert(tr->bipStatus[i-1] == MRE_ADDED || tr->bipStatus[i-1] == MRE_EXCLUDED); |
---|
7958 | |
---|
7959 | if(tr->bipStatus[i-1] == MRE_ADDED |
---|
7960 | && ! compatible(tr->entriesOfSection[i-1], tr->entriesOfSection[highestToCheck], tr->bitVectorLength)) |
---|
7961 | { |
---|
7962 | tr->bipStatus[highestToCheck] = MRE_EXCLUDED; |
---|
7963 | break; |
---|
7964 | } |
---|
7965 | } |
---|
7966 | |
---|
7967 | if(i == 0) /* accepting */ |
---|
7968 | { |
---|
7969 | tr->bipStatus[highestToCheck] = MRE_ADDED; |
---|
7970 | tr->sbi[tmpCounter] = tr->entriesOfSection[highestToCheck]; |
---|
7971 | tmpCounter++; |
---|
7972 | density++; |
---|
7973 | } |
---|
7974 | } |
---|
7975 | } |
---|
7976 | } |
---|
7977 | break; |
---|
7978 | case THREAD_NEWVIEW_ANCESTRAL: |
---|
7979 | sendTraversalInfo(localTree, tr); |
---|
7980 | newviewIterativeAncestral(localTree); |
---|
7981 | break; |
---|
7982 | case THREAD_GATHER_ANCESTRAL: |
---|
7983 | { |
---|
7984 | double |
---|
7985 | *contigousVector = tr->ancestralStates; |
---|
7986 | |
---|
7987 | size_t |
---|
7988 | globalColumnCount = 0, |
---|
7989 | globalCount = 0; |
---|
7990 | |
---|
7991 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
7992 | { |
---|
7993 | size_t |
---|
7994 | rateHet, |
---|
7995 | blockRequirements; |
---|
7996 | |
---|
7997 | |
---|
7998 | size_t |
---|
7999 | localColumnCount = 0, |
---|
8000 | localCount = 0; |
---|
8001 | |
---|
8002 | double |
---|
8003 | *stridedVector = localTree->partitionData[model].sumBuffer; |
---|
8004 | |
---|
8005 | if(tr->rateHetModel == CAT) |
---|
8006 | rateHet = 1; |
---|
8007 | else |
---|
8008 | rateHet = 4; |
---|
8009 | |
---|
8010 | blockRequirements = (size_t)(rateHet) * (size_t)(tr->partitionData[model].states); |
---|
8011 | |
---|
8012 | for(globalColumnCount = localTree->partitionData[model].lower; globalColumnCount < localTree->partitionData[model].upper; globalColumnCount++) |
---|
8013 | { |
---|
8014 | if(globalColumnCount % (size_t)n == (size_t)tid) |
---|
8015 | { |
---|
8016 | memcpy(&contigousVector[globalCount], &stridedVector[localCount], sizeof(double) * blockRequirements); |
---|
8017 | |
---|
8018 | localColumnCount++; |
---|
8019 | localCount += blockRequirements; |
---|
8020 | } |
---|
8021 | |
---|
8022 | globalCount += blockRequirements; |
---|
8023 | } |
---|
8024 | |
---|
8025 | assert(localColumnCount == localTree->partitionData[model].width); |
---|
8026 | assert(localCount == (localTree->partitionData[model].width * (int)blockRequirements)); |
---|
8027 | } |
---|
8028 | } |
---|
8029 | break; |
---|
8030 | case THREAD_OPT_SCALER: |
---|
8031 | if(tid > 0) |
---|
8032 | { |
---|
8033 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
8034 | |
---|
8035 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8036 | localTree->partitionData[model].brLenScaler = tr->partitionData[model].brLenScaler; |
---|
8037 | } |
---|
8038 | |
---|
8039 | result = evaluateIterative(localTree, FALSE); |
---|
8040 | |
---|
8041 | if(localTree->NumberOfModels > 1) |
---|
8042 | { |
---|
8043 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8044 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
8045 | } |
---|
8046 | else |
---|
8047 | reductionBuffer[tid] = result; |
---|
8048 | |
---|
8049 | if(tid > 0) |
---|
8050 | { |
---|
8051 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8052 | localTree->executeModel[model] = TRUE; |
---|
8053 | } |
---|
8054 | break; |
---|
8055 | case THREAD_COPY_LG4X_RATES: |
---|
8056 | if(tid > 0) |
---|
8057 | { |
---|
8058 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8059 | { |
---|
8060 | memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4); |
---|
8061 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
8062 | } |
---|
8063 | } |
---|
8064 | break; |
---|
8065 | case THREAD_OPT_LG4X_RATES: |
---|
8066 | if(tid > 0) |
---|
8067 | { |
---|
8068 | memcpy(localTree->executeModel, tr->executeModel, localTree->NumberOfModels * sizeof(boolean)); |
---|
8069 | |
---|
8070 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8071 | { |
---|
8072 | memcpy(localTree->partitionData[model].weights, tr->partitionData[model].weights, sizeof(double) * 4); |
---|
8073 | memcpy(localTree->partitionData[model].gammaRates, tr->partitionData[model].gammaRates, sizeof(double) * 4); |
---|
8074 | } |
---|
8075 | } |
---|
8076 | |
---|
8077 | |
---|
8078 | result = evaluateIterative(localTree, FALSE); |
---|
8079 | |
---|
8080 | if(localTree->NumberOfModels > 1) |
---|
8081 | { |
---|
8082 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8083 | reductionBuffer[tid * localTree->NumberOfModels + model] = localTree->perPartitionLH[model]; |
---|
8084 | } |
---|
8085 | else |
---|
8086 | reductionBuffer[tid] = result; |
---|
8087 | |
---|
8088 | if(tid > 0) |
---|
8089 | { |
---|
8090 | for(model = 0; model < localTree->NumberOfModels; model++) |
---|
8091 | localTree->executeModel[model] = TRUE; |
---|
8092 | } |
---|
8093 | break; |
---|
8094 | default: |
---|
8095 | printf("Job %d\n", currentJob); |
---|
8096 | assert(0); |
---|
8097 | } |
---|
8098 | } |
---|
8099 | |
---|
8100 | |
---|
8101 | |
---|
8102 | |
---|
8103 | void masterBarrier(int jobType, tree *tr) |
---|
8104 | { |
---|
8105 | const int |
---|
8106 | n = NumberOfThreads; |
---|
8107 | |
---|
8108 | int |
---|
8109 | i, |
---|
8110 | sum; |
---|
8111 | |
---|
8112 | jobCycle = !jobCycle; |
---|
8113 | threadJob = (jobType << 16) + jobCycle; |
---|
8114 | |
---|
8115 | execFunction(tr, tr, 0, n); |
---|
8116 | |
---|
8117 | |
---|
8118 | do |
---|
8119 | { |
---|
8120 | for(i = 1, sum = 1; i < n; i++) |
---|
8121 | sum += barrierBuffer[i]; |
---|
8122 | } |
---|
8123 | while(sum < n); |
---|
8124 | |
---|
8125 | for(i = 1; i < n; i++) |
---|
8126 | barrierBuffer[i] = 0; |
---|
8127 | } |
---|
8128 | |
---|
8129 | #ifndef _PORTABLE_PTHREADS |
---|
8130 | |
---|
8131 | static void pinToCore(int tid) |
---|
8132 | { |
---|
8133 | cpu_set_t cpuset; |
---|
8134 | |
---|
8135 | CPU_ZERO(&cpuset); |
---|
8136 | CPU_SET(tid, &cpuset); |
---|
8137 | |
---|
8138 | if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) != 0) |
---|
8139 | { |
---|
8140 | printBothOpen("\n\nThere was a problem finding a physical core for thread number %d to run on.\n", tid); |
---|
8141 | printBothOpen("Probably this happend because you are trying to run more threads than you have cores available,\n"); |
---|
8142 | printBothOpen("which is a thing you should never ever do again, good bye .... \n\n"); |
---|
8143 | assert(0); |
---|
8144 | } |
---|
8145 | } |
---|
8146 | |
---|
8147 | #endif |
---|
8148 | |
---|
8149 | static void *likelihoodThread(void *tData) |
---|
8150 | { |
---|
8151 | threadData *td = (threadData*)tData; |
---|
8152 | tree |
---|
8153 | *tr = td->tr, |
---|
8154 | *localTree = (tree *)rax_malloc(sizeof(tree)); |
---|
8155 | int |
---|
8156 | myCycle = 0; |
---|
8157 | |
---|
8158 | const int |
---|
8159 | n = NumberOfThreads, |
---|
8160 | tid = td->threadNumber; |
---|
8161 | |
---|
8162 | #ifndef _PORTABLE_PTHREADS |
---|
8163 | pinToCore(tid); |
---|
8164 | #endif |
---|
8165 | |
---|
8166 | printf("\nThis is RAxML Worker Pthread Number: %d\n", tid); |
---|
8167 | |
---|
8168 | while(1) |
---|
8169 | { |
---|
8170 | while (myCycle == threadJob); |
---|
8171 | myCycle = threadJob; |
---|
8172 | |
---|
8173 | execFunction(tr, localTree, tid, n); |
---|
8174 | |
---|
8175 | |
---|
8176 | barrierBuffer[tid] = 1; |
---|
8177 | } |
---|
8178 | |
---|
8179 | return (void*)NULL; |
---|
8180 | } |
---|
8181 | |
---|
8182 | static void startPthreads(tree *tr) |
---|
8183 | { |
---|
8184 | pthread_t *threads; |
---|
8185 | pthread_attr_t attr; |
---|
8186 | int rc, t; |
---|
8187 | threadData *tData; |
---|
8188 | |
---|
8189 | jobCycle = 0; |
---|
8190 | threadJob = 0; |
---|
8191 | |
---|
8192 | printf("\nThis is the RAxML Master Pthread\n"); |
---|
8193 | |
---|
8194 | pthread_attr_init(&attr); |
---|
8195 | pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); |
---|
8196 | |
---|
8197 | pthread_mutex_init(&mutex , (pthread_mutexattr_t *)NULL); |
---|
8198 | |
---|
8199 | threads = (pthread_t *)rax_malloc(NumberOfThreads * sizeof(pthread_t)); |
---|
8200 | tData = (threadData *)rax_malloc(NumberOfThreads * sizeof(threadData)); |
---|
8201 | |
---|
8202 | |
---|
8203 | reductionBuffer = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels); |
---|
8204 | reductionBufferTwo = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels); |
---|
8205 | reductionBufferThree = (volatile double *)rax_malloc(sizeof(volatile double) * NumberOfThreads * tr->NumberOfModels); |
---|
8206 | reductionBufferParsimony = (volatile int *)rax_malloc(sizeof(volatile int) * NumberOfThreads); |
---|
8207 | |
---|
8208 | |
---|
8209 | barrierBuffer = (volatile char *)rax_malloc(sizeof(volatile char) * NumberOfThreads); |
---|
8210 | |
---|
8211 | for(t = 0; t < NumberOfThreads; t++) |
---|
8212 | barrierBuffer[t] = 0; |
---|
8213 | |
---|
8214 | |
---|
8215 | branchInfos = (volatile branchInfo **)rax_malloc(sizeof(volatile branchInfo *) * NumberOfThreads); |
---|
8216 | |
---|
8217 | for(t = 1; t < NumberOfThreads; t++) |
---|
8218 | { |
---|
8219 | tData[t].tr = tr; |
---|
8220 | tData[t].threadNumber = t; |
---|
8221 | rc = pthread_create(&threads[t], &attr, likelihoodThread, (void *)(&tData[t])); |
---|
8222 | if(rc) |
---|
8223 | { |
---|
8224 | printf("ERROR; return code from pthread_create() is %d\n", rc); |
---|
8225 | exit(-1); |
---|
8226 | } |
---|
8227 | } |
---|
8228 | } |
---|
8229 | |
---|
8230 | |
---|
8231 | |
---|
8232 | #endif |
---|
8233 | |
---|
8234 | |
---|
8235 | /*************************************************************************************************************************************************************/ |
---|
8236 | |
---|
8237 | static int elwCompare(const void *p1, const void *p2) |
---|
8238 | { |
---|
8239 | elw *rc1 = (elw *)p1; |
---|
8240 | elw *rc2 = (elw *)p2; |
---|
8241 | |
---|
8242 | double i = rc1->weight; |
---|
8243 | double j = rc2->weight; |
---|
8244 | |
---|
8245 | if (i > j) |
---|
8246 | return (-1); |
---|
8247 | if (i < j) |
---|
8248 | return (1); |
---|
8249 | return (0); |
---|
8250 | } |
---|
8251 | |
---|
8252 | static int elwCompareLikelihood(const void *p1, const void *p2) |
---|
8253 | { |
---|
8254 | elw *rc1 = (elw *)p1; |
---|
8255 | elw *rc2 = (elw *)p2; |
---|
8256 | |
---|
8257 | double i = rc1->lh; |
---|
8258 | double j = rc2->lh; |
---|
8259 | |
---|
8260 | if (i > j) |
---|
8261 | return (-1); |
---|
8262 | if (i < j) |
---|
8263 | return (1); |
---|
8264 | return (0); |
---|
8265 | } |
---|
8266 | |
---|
8267 | static void computeLHTest(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8268 | { |
---|
8269 | int |
---|
8270 | i; |
---|
8271 | |
---|
8272 | double |
---|
8273 | bestLH, |
---|
8274 | currentLH, |
---|
8275 | weightSum = 0.0; |
---|
8276 | |
---|
8277 | FILE |
---|
8278 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef); |
---|
8279 | |
---|
8280 | double |
---|
8281 | *bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
8282 | |
---|
8283 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
8284 | weightSum += (double)(tr->cdta->aliaswgt[i]); |
---|
8285 | |
---|
8286 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8287 | printBothOpen("Model optimization, best Tree: %f\n", tr->likelihood); |
---|
8288 | bestLH = tr->likelihood; |
---|
8289 | |
---|
8290 | evaluateGenericVector(tr, tr->start); |
---|
8291 | memcpy(bestVector, tr->perSiteLL, tr->cdta->endsite * sizeof(double)); |
---|
8292 | |
---|
8293 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8294 | { |
---|
8295 | int |
---|
8296 | j; |
---|
8297 | |
---|
8298 | double |
---|
8299 | temp, |
---|
8300 | wtemp, |
---|
8301 | sum = 0.0, |
---|
8302 | sum2 = 0.0, |
---|
8303 | sd; |
---|
8304 | |
---|
8305 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8306 | |
---|
8307 | |
---|
8308 | if(tr->optimizeAllTrees) |
---|
8309 | { |
---|
8310 | treeEvaluate(tr, 1); |
---|
8311 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8312 | } |
---|
8313 | else |
---|
8314 | treeEvaluate(tr, 2); |
---|
8315 | |
---|
8316 | tr->start = tr->nodep[1]; |
---|
8317 | |
---|
8318 | currentLH = tr->likelihood; |
---|
8319 | |
---|
8320 | if(currentLH > bestLH) |
---|
8321 | printBothOpen("Better tree found %d at %f\n", i, currentLH); |
---|
8322 | |
---|
8323 | evaluateGenericVector(tr, tr->start); |
---|
8324 | |
---|
8325 | sum = 0.0; |
---|
8326 | sum2 = 0.0; |
---|
8327 | |
---|
8328 | for (j = 0; j < tr->cdta->endsite; j++) |
---|
8329 | { |
---|
8330 | temp = bestVector[j] - tr->perSiteLL[j]; |
---|
8331 | wtemp = tr->cdta->aliaswgt[j] * temp; |
---|
8332 | sum += wtemp; |
---|
8333 | sum2 += wtemp * temp; |
---|
8334 | } |
---|
8335 | |
---|
8336 | sd = sqrt( weightSum * (sum2 - sum*sum / weightSum) / (weightSum - 1) ); |
---|
8337 | /* this is for a 5% p level */ |
---|
8338 | |
---|
8339 | printBothOpen("Tree: %d Likelihood: %f D(LH): %f SD: %f Significantly Worse: %s (5%s), %s (2%s), %s (1%s)\n", |
---|
8340 | i, currentLH, currentLH - bestLH, sd, |
---|
8341 | (sum > 1.95996 * sd) ? "Yes" : " No", "%", |
---|
8342 | (sum > 2.326 * sd) ? "Yes" : " No", "%", |
---|
8343 | (sum > 2.57583 * sd) ? "Yes" : " No", "%"); |
---|
8344 | } |
---|
8345 | |
---|
8346 | |
---|
8347 | rax_free(bestVector); |
---|
8348 | fclose(treeFile); |
---|
8349 | exit(0); |
---|
8350 | } |
---|
8351 | |
---|
8352 | static void computePerSiteLLs(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8353 | { |
---|
8354 | int |
---|
8355 | i; |
---|
8356 | |
---|
8357 | FILE |
---|
8358 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef), |
---|
8359 | *tlf = myfopen(perSiteLLsFileName, "wb"); |
---|
8360 | |
---|
8361 | double |
---|
8362 | *unsortedSites = (double*)rax_malloc(sizeof(double) * tr->rdta->sites); |
---|
8363 | |
---|
8364 | |
---|
8365 | |
---|
8366 | fprintf(tlf, " %d %d\n", tr->numberOfTrees, tr->rdta->sites); |
---|
8367 | |
---|
8368 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8369 | { |
---|
8370 | int |
---|
8371 | k, |
---|
8372 | j; |
---|
8373 | |
---|
8374 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8375 | assert(tr->ntips == tr->mxtips); |
---|
8376 | |
---|
8377 | if(i == 0) |
---|
8378 | { |
---|
8379 | if(adef->useBinaryModelFile) |
---|
8380 | { |
---|
8381 | readBinaryModel(tr); |
---|
8382 | evaluateGenericInitrav(tr, tr->start); |
---|
8383 | treeEvaluate(tr, 2); |
---|
8384 | } |
---|
8385 | else |
---|
8386 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8387 | } |
---|
8388 | else |
---|
8389 | { |
---|
8390 | if(tr->optimizeAllTrees) |
---|
8391 | { |
---|
8392 | treeEvaluate(tr, 1); |
---|
8393 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8394 | } |
---|
8395 | else |
---|
8396 | treeEvaluate(tr, 2); |
---|
8397 | } |
---|
8398 | |
---|
8399 | tr->start = tr->nodep[1]; |
---|
8400 | |
---|
8401 | evaluateGenericVector(tr, tr->start); |
---|
8402 | |
---|
8403 | printBothOpen("Tree %d: %f\n", i, tr->likelihood); |
---|
8404 | |
---|
8405 | fprintf(tlf, "tr%d\t", i + 1); |
---|
8406 | |
---|
8407 | for(j = 0; j < tr->cdta->endsite; j++) |
---|
8408 | { |
---|
8409 | for(k = 0; k < tr->rdta->sites; k++) |
---|
8410 | if(j == tr->patternPosition[k]) |
---|
8411 | unsortedSites[tr->columnPosition[k] - 1] = tr->perSiteLL[j]; |
---|
8412 | } |
---|
8413 | |
---|
8414 | for(j = 0; j < tr->rdta->sites; j++) |
---|
8415 | fprintf(tlf, "%f ", unsortedSites[j]); |
---|
8416 | |
---|
8417 | fprintf(tlf, "\n"); |
---|
8418 | } |
---|
8419 | |
---|
8420 | fclose(treeFile); |
---|
8421 | |
---|
8422 | rax_free(unsortedSites); |
---|
8423 | fclose(tlf); |
---|
8424 | } |
---|
8425 | |
---|
8426 | |
---|
8427 | static double cumulativeTreeLength(tree *tr, analdef *adef) |
---|
8428 | { |
---|
8429 | double tl = 0.0; |
---|
8430 | |
---|
8431 | if(adef->perGeneBranchLengths) |
---|
8432 | { |
---|
8433 | int |
---|
8434 | accWgt = 0, |
---|
8435 | model; |
---|
8436 | |
---|
8437 | double |
---|
8438 | accLength = 0.0; |
---|
8439 | |
---|
8440 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
8441 | { |
---|
8442 | int |
---|
8443 | wgt = 0, |
---|
8444 | i, |
---|
8445 | lower, |
---|
8446 | upper; |
---|
8447 | |
---|
8448 | double |
---|
8449 | tlm; |
---|
8450 | |
---|
8451 | tlm = treeLength(tr, model); |
---|
8452 | |
---|
8453 | lower = tr->partitionData[model].lower; |
---|
8454 | upper = tr->partitionData[model].upper; |
---|
8455 | |
---|
8456 | for(i = lower; i < upper; i++) |
---|
8457 | wgt += tr->cdta->aliaswgt[i]; |
---|
8458 | |
---|
8459 | accLength += ((double)wgt) * tlm; |
---|
8460 | accWgt += wgt; |
---|
8461 | } |
---|
8462 | |
---|
8463 | tl = accLength / ((double)accWgt); |
---|
8464 | |
---|
8465 | } |
---|
8466 | else |
---|
8467 | tl = treeLength(tr, 0); |
---|
8468 | |
---|
8469 | |
---|
8470 | return tl; |
---|
8471 | } |
---|
8472 | |
---|
8473 | static void computeAllLHs(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8474 | { |
---|
8475 | int |
---|
8476 | i; |
---|
8477 | |
---|
8478 | double |
---|
8479 | bestLH = unlikely; |
---|
8480 | |
---|
8481 | bestlist |
---|
8482 | *bestT; |
---|
8483 | |
---|
8484 | FILE |
---|
8485 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef), |
---|
8486 | *result = myfopen(resultFileName, "wb"); |
---|
8487 | |
---|
8488 | elw |
---|
8489 | *list; |
---|
8490 | |
---|
8491 | INFILE = getNumberOfTrees(tr, bootStrapFileName, adef); |
---|
8492 | |
---|
8493 | bestT = (bestlist *) rax_malloc(sizeof(bestlist)); |
---|
8494 | bestT->ninit = 0; |
---|
8495 | initBestTree(bestT, 1, tr->mxtips); |
---|
8496 | |
---|
8497 | list = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees); |
---|
8498 | |
---|
8499 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8500 | { |
---|
8501 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8502 | resetBranches(tr); |
---|
8503 | |
---|
8504 | if(i == 0) |
---|
8505 | { |
---|
8506 | |
---|
8507 | if(adef->useBinaryModelFile) |
---|
8508 | { |
---|
8509 | readBinaryModel(tr); |
---|
8510 | evaluateGenericInitrav(tr, tr->start); |
---|
8511 | treeEvaluate(tr, 2); |
---|
8512 | } |
---|
8513 | else |
---|
8514 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8515 | |
---|
8516 | printBothOpen("Model optimization on first Tree: %f\n", tr->likelihood); |
---|
8517 | } |
---|
8518 | else |
---|
8519 | { |
---|
8520 | evaluateGenericInitrav(tr, tr->start); |
---|
8521 | |
---|
8522 | /* |
---|
8523 | treeEvaluateProgressive(tr); |
---|
8524 | treeEvaluateRandom(tr, 2); |
---|
8525 | */ |
---|
8526 | if(tr->optimizeAllTrees) |
---|
8527 | { |
---|
8528 | treeEvaluate(tr, 1); |
---|
8529 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8530 | } |
---|
8531 | else |
---|
8532 | treeEvaluate(tr, 2); |
---|
8533 | } |
---|
8534 | |
---|
8535 | list[i].tree = i; |
---|
8536 | list[i].lh = tr->likelihood; |
---|
8537 | |
---|
8538 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
8539 | |
---|
8540 | fprintf(result, "%s", tr->tree_string); |
---|
8541 | |
---|
8542 | saveBestTree(bestT, tr); |
---|
8543 | |
---|
8544 | if(tr->likelihood > bestLH) |
---|
8545 | bestLH = tr->likelihood; |
---|
8546 | |
---|
8547 | printBothOpen("Tree %d Likelihood %f Tree-Length %f\n", i, tr->likelihood, cumulativeTreeLength(tr, adef)); |
---|
8548 | } |
---|
8549 | |
---|
8550 | qsort(list, tr->numberOfTrees, sizeof(elw), elwCompareLikelihood); |
---|
8551 | |
---|
8552 | printBothOpen("\n"); |
---|
8553 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8554 | printBothOpen("%d %f\n", list[i].tree, list[i].lh); |
---|
8555 | |
---|
8556 | printBothOpen("\n"); |
---|
8557 | |
---|
8558 | /* |
---|
8559 | recallBestTree(bestT, 1, tr); |
---|
8560 | evaluateGeneric(tr, tr->start); |
---|
8561 | printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8562 | fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8563 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8564 | treeEvaluate(tr, 2); |
---|
8565 | printf("Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8566 | fprintf(infoFile, "Model optimization, %f <-> %f\n", bestLH, tr->likelihood); |
---|
8567 | */ |
---|
8568 | |
---|
8569 | printBothOpen("\nAll evaluated trees with branch lengths written to File: %s\n", resultFileName); |
---|
8570 | printBothOpen("\nTotal execution time: %f\n", gettime() - masterTime); |
---|
8571 | |
---|
8572 | |
---|
8573 | fclose(result); |
---|
8574 | exit(0); |
---|
8575 | } |
---|
8576 | |
---|
8577 | |
---|
8578 | |
---|
8579 | |
---|
8580 | static void computeELW(tree *tr, analdef *adef, char *bootStrapFileName) |
---|
8581 | { |
---|
8582 | FILE |
---|
8583 | *treeFile = getNumberOfTrees(tr, bootStrapFileName, adef); |
---|
8584 | |
---|
8585 | int |
---|
8586 | bestIndex = -1, |
---|
8587 | i, |
---|
8588 | k, |
---|
8589 | *originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)), |
---|
8590 | *originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); |
---|
8591 | |
---|
8592 | long |
---|
8593 | startSeed; |
---|
8594 | |
---|
8595 | double |
---|
8596 | best = unlikely, |
---|
8597 | **lhs, |
---|
8598 | **lhweights, |
---|
8599 | sum = 0.0; |
---|
8600 | |
---|
8601 | elw |
---|
8602 | *bootweights, |
---|
8603 | **rankTest; |
---|
8604 | |
---|
8605 | initModel(tr, tr->rdta, tr->cdta, adef); |
---|
8606 | |
---|
8607 | if(tr->numberOfTrees < 2) |
---|
8608 | { |
---|
8609 | printBothOpen("Error, there is only one tree in file %s which you want to use to conduct an ELW test\n", bootStrapFileName); |
---|
8610 | |
---|
8611 | exit(-1); |
---|
8612 | } |
---|
8613 | |
---|
8614 | bootweights = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees); |
---|
8615 | |
---|
8616 | rankTest = (elw **)rax_malloc(sizeof(elw *) * adef->multipleRuns); |
---|
8617 | |
---|
8618 | for(k = 0; k < adef->multipleRuns; k++) |
---|
8619 | rankTest[k] = (elw *)rax_malloc(sizeof(elw) * tr->numberOfTrees); |
---|
8620 | |
---|
8621 | lhs = (double **)rax_malloc(sizeof(double *) * tr->numberOfTrees); |
---|
8622 | |
---|
8623 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8624 | lhs[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double)); |
---|
8625 | |
---|
8626 | |
---|
8627 | lhweights = (double **)rax_malloc(sizeof(double *) * tr->numberOfTrees); |
---|
8628 | |
---|
8629 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8630 | lhweights[k] = (double *)rax_calloc(adef->multipleRuns, sizeof(double)); |
---|
8631 | |
---|
8632 | /* read in the first tree and optimize ML params on it */ |
---|
8633 | |
---|
8634 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8635 | |
---|
8636 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8637 | rewind(treeFile); |
---|
8638 | |
---|
8639 | printBothOpen("Model optimization, first Tree: %f\n", tr->likelihood); |
---|
8640 | |
---|
8641 | memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite); |
---|
8642 | memcpy(originalInvariant, tr->invariant, sizeof(int) * tr->cdta->endsite); |
---|
8643 | |
---|
8644 | assert(adef->boot > 0); |
---|
8645 | |
---|
8646 | /* TODO this is ugly, should be passed as param to computenextreplicate() */ |
---|
8647 | |
---|
8648 | startSeed = adef->boot; |
---|
8649 | |
---|
8650 | |
---|
8651 | /* |
---|
8652 | now read the trees one by one, do a couple of BS replicates and re-compute their likelihood |
---|
8653 | for every replicate |
---|
8654 | */ |
---|
8655 | |
---|
8656 | /* loop over all trees */ |
---|
8657 | |
---|
8658 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8659 | { |
---|
8660 | |
---|
8661 | /* read in new tree */ |
---|
8662 | |
---|
8663 | treeReadLen(treeFile, tr, FALSE, FALSE, FALSE, adef, TRUE, FALSE); |
---|
8664 | |
---|
8665 | if(tr->optimizeAllTrees) |
---|
8666 | { |
---|
8667 | treeEvaluate(tr, 1); |
---|
8668 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
8669 | } |
---|
8670 | else |
---|
8671 | treeEvaluate(tr, 2.0); |
---|
8672 | |
---|
8673 | printBothOpen("Original tree %d likelihood %f\n", i, tr->likelihood); |
---|
8674 | |
---|
8675 | if(tr->likelihood > best) |
---|
8676 | { |
---|
8677 | best = tr->likelihood; |
---|
8678 | bestIndex = i; |
---|
8679 | } |
---|
8680 | /* reset branches to default values */ |
---|
8681 | |
---|
8682 | resetBranches(tr); |
---|
8683 | |
---|
8684 | /* reset BS random seed, we want to use the same replicates for every tree */ |
---|
8685 | |
---|
8686 | adef->rapidBoot = startSeed; |
---|
8687 | |
---|
8688 | for(k = 0; k < adef->multipleRuns; k++) |
---|
8689 | { |
---|
8690 | /* compute the next BS replicate, i.e., re-sample alignment columns */ |
---|
8691 | |
---|
8692 | computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE); |
---|
8693 | |
---|
8694 | evaluateGenericInitrav(tr, tr->start); |
---|
8695 | |
---|
8696 | /* if this is the first replicate for this tree do a slightly more thorough br-len opt */ |
---|
8697 | /* we don't re-estimate ML model params (except branches) for every replicate to make things a bit faster */ |
---|
8698 | |
---|
8699 | if(k == 0) |
---|
8700 | treeEvaluate(tr, 2.0); |
---|
8701 | else |
---|
8702 | treeEvaluate(tr, 0.5); |
---|
8703 | |
---|
8704 | /* store the likelihood of replicate k for tree i */ |
---|
8705 | lhs[i][k] = tr->likelihood; |
---|
8706 | |
---|
8707 | rankTest[k][i].lh = tr->likelihood; |
---|
8708 | rankTest[k][i].tree = i; |
---|
8709 | } |
---|
8710 | |
---|
8711 | /* restore the original alignment to start BS procedure for the next tree */ |
---|
8712 | |
---|
8713 | reductionCleanup(tr, originalRateCategories, originalInvariant); |
---|
8714 | } |
---|
8715 | |
---|
8716 | assert(bestIndex >= 0 && best != unlikely); |
---|
8717 | |
---|
8718 | printBothOpen("Best-Scoring tree is tree %d with score %f\n", bestIndex, best); |
---|
8719 | |
---|
8720 | |
---|
8721 | /* now loop over all replicates */ |
---|
8722 | |
---|
8723 | for(k = 0; k < adef->multipleRuns; k++) |
---|
8724 | { |
---|
8725 | /* find best score for this replicate */ |
---|
8726 | |
---|
8727 | for(i = 0, best = unlikely; i < tr->numberOfTrees; i++) |
---|
8728 | if(lhs[i][k] > best) |
---|
8729 | best = lhs[i][k]; |
---|
8730 | |
---|
8731 | /* compute exponential weights w.r.t. the best likelihood for replicate k */ |
---|
8732 | |
---|
8733 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8734 | lhweights[i][k] = exp(lhs[i][k] - best); |
---|
8735 | |
---|
8736 | /* sum over all exponential weights */ |
---|
8737 | |
---|
8738 | for(i = 0, sum = 0.0; i < tr->numberOfTrees; i++) |
---|
8739 | sum += lhweights[i][k]; |
---|
8740 | |
---|
8741 | /* and normalize by the sum */ |
---|
8742 | |
---|
8743 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8744 | lhweights[i][k] = lhweights[i][k] / sum; |
---|
8745 | |
---|
8746 | } |
---|
8747 | |
---|
8748 | /* now loop over all trees */ |
---|
8749 | |
---|
8750 | for(i = 0; i < tr->numberOfTrees; i++) |
---|
8751 | { |
---|
8752 | |
---|
8753 | /* loop to sum over all replicate weights for tree i */ |
---|
8754 | |
---|
8755 | for(k = 0, sum = 0.0; k < adef->multipleRuns; k++) |
---|
8756 | sum += lhweights[i][k]; |
---|
8757 | |
---|
8758 | /* set the weight and the index of the respective tree */ |
---|
8759 | |
---|
8760 | bootweights[i].weight = sum / ((double)adef->multipleRuns); |
---|
8761 | bootweights[i].tree = i; |
---|
8762 | } |
---|
8763 | |
---|
8764 | /* now just sort the tree collection by weights */ |
---|
8765 | |
---|
8766 | qsort(bootweights, tr->numberOfTrees, sizeof(elw), elwCompare); |
---|
8767 | |
---|
8768 | printBothOpen("Tree\t Posterior Probability \t Cumulative posterior probability\n"); |
---|
8769 | |
---|
8770 | /* loop over the sorted array of trees and print out statistics */ |
---|
8771 | |
---|
8772 | for(i = 0, sum = 0.0; i < tr->numberOfTrees; i++) |
---|
8773 | { |
---|
8774 | sum += bootweights[i].weight; |
---|
8775 | |
---|
8776 | printBothOpen("%d\t\t %f \t\t %f\n", bootweights[i].tree, bootweights[i].weight, sum); |
---|
8777 | } |
---|
8778 | |
---|
8779 | |
---|
8780 | /* |
---|
8781 | if(0) |
---|
8782 | { |
---|
8783 | // now compute the super-duper rank test |
---|
8784 | |
---|
8785 | printBothOpen("\n\nNow also computing the super-duper rank test, though I still don't\n"); |
---|
8786 | printBothOpen("understand what it actually means. What this thing does is to initially determine\n"); |
---|
8787 | printBothOpen("the best-scoring ML tree on the original alignment and then the scores of the input\n"); |
---|
8788 | printBothOpen("trees on the number of specified Bootstrap replicates. Then it sorts the scores of the trees\n"); |
---|
8789 | printBothOpen("for every bootstrap replicate and determines the rank of the best-scoring tree on every BS\n"); |
---|
8790 | printBothOpen("replicate. It then prints out how many positions in the sorted lists of thz BS replicates \n"); |
---|
8791 | printBothOpen("must be included in order for the best scoring tree to appear 95 and 99 times respectively.\n"); |
---|
8792 | printBothOpen("This gives some intuition about how variable the score order of the trees will be under\n"); |
---|
8793 | printBothOpen("slight alterations of the data.\n\n"); |
---|
8794 | |
---|
8795 | // sort all BS replicates accodring to likelihood scores |
---|
8796 | |
---|
8797 | for(i = 0; i < adef->multipleRuns; i++) |
---|
8798 | qsort(rankTest[i], tr->numberOfTrees, sizeof(elw), elwCompareLikelihood); |
---|
8799 | |
---|
8800 | |
---|
8801 | // search for our best-scoring tree in every sorted array of likelihood scores |
---|
8802 | |
---|
8803 | for(i = 0; i < adef->multipleRuns; i++) |
---|
8804 | { |
---|
8805 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8806 | { |
---|
8807 | if(rankTest[i][k].tree == bestIndex) |
---|
8808 | countBest[k]++; |
---|
8809 | } |
---|
8810 | } |
---|
8811 | |
---|
8812 | for(k = 0; k < tr->numberOfTrees; k++) |
---|
8813 | { |
---|
8814 | if(k > 0) |
---|
8815 | countBest[k] += countBest[k - 1]; |
---|
8816 | |
---|
8817 | printBothOpen("Number of Occurences of best-scoring tree for %d BS replicates up to position %d in sorted list: %d\n", |
---|
8818 | adef->multipleRuns, k, countBest[k]); |
---|
8819 | |
---|
8820 | if(cutOff95 == -1 && countBest[k] <= (int)((double)adef->multipleRuns * 0.95 + 0.5)) |
---|
8821 | cutOff95 = k; |
---|
8822 | |
---|
8823 | if(cutOff99 == -1 && countBest[k] <= (int)((double)adef->multipleRuns * 0.99 + 0.5)) |
---|
8824 | cutOff99 = k; |
---|
8825 | } |
---|
8826 | |
---|
8827 | assert(countBest[k-1] == adef->multipleRuns); |
---|
8828 | assert(cutOff95 >= 0 && cutOff99 >= 0); |
---|
8829 | |
---|
8830 | printBothOpen("\n95%s cutoff reached after including %d out of %d sorted likelihood columns\n", "%", countBest[cutOff95], adef->multipleRuns); |
---|
8831 | |
---|
8832 | printBothOpen("99%s cutoff reached after including %d out of %d sorted likelihood columns\n\n", "%", countBest[cutOff99], adef->multipleRuns); |
---|
8833 | } |
---|
8834 | */ |
---|
8835 | |
---|
8836 | printBothOpen("\nTotal execution time: %f\n\n", gettime() - masterTime); |
---|
8837 | |
---|
8838 | rax_free(originalRateCategories); |
---|
8839 | rax_free(originalInvariant); |
---|
8840 | fclose(treeFile); |
---|
8841 | |
---|
8842 | exit(0); |
---|
8843 | } |
---|
8844 | |
---|
8845 | |
---|
8846 | |
---|
8847 | static void computeDistances(tree *tr, analdef *adef) |
---|
8848 | { |
---|
8849 | int i, j, modelCounter; |
---|
8850 | double z0[NUM_BRANCHES]; |
---|
8851 | double result[NUM_BRANCHES]; |
---|
8852 | double t; |
---|
8853 | char distanceFileName[1024]; |
---|
8854 | |
---|
8855 | FILE |
---|
8856 | *out; |
---|
8857 | |
---|
8858 | strcpy(distanceFileName, workdir); |
---|
8859 | strcat(distanceFileName, "RAxML_distances."); |
---|
8860 | strcat(distanceFileName, run_id); |
---|
8861 | |
---|
8862 | out = myfopen(distanceFileName, "wb"); |
---|
8863 | |
---|
8864 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8865 | |
---|
8866 | printBothOpen("\nLog Likelihood Score after parameter optimization: %f\n\n", tr->likelihood); |
---|
8867 | printBothOpen("\nComputing pairwise ML-distances ...\n"); |
---|
8868 | |
---|
8869 | for(modelCounter = 0; modelCounter < tr->NumberOfModels; modelCounter++) |
---|
8870 | z0[modelCounter] = defaultz; |
---|
8871 | |
---|
8872 | t = gettime(); |
---|
8873 | |
---|
8874 | for(i = 1; i <= tr->mxtips; i++) |
---|
8875 | for(j = i + 1; j <= tr->mxtips; j++) |
---|
8876 | { |
---|
8877 | double z, x; |
---|
8878 | |
---|
8879 | makenewzGenericDistance(tr, 10, z0, result, i, j); |
---|
8880 | |
---|
8881 | if(tr->multiBranch) |
---|
8882 | { |
---|
8883 | int k; |
---|
8884 | |
---|
8885 | for(k = 0, x = 0.0; k < tr->numBranches; k++) |
---|
8886 | { |
---|
8887 | assert(tr->partitionContributions[k] != -1.0); |
---|
8888 | assert(tr->fracchanges[k] != -1.0); |
---|
8889 | z = result[k]; |
---|
8890 | if (z < zmin) |
---|
8891 | z = zmin; |
---|
8892 | x += (-log(z) * tr->fracchanges[k]) * tr->partitionContributions[k]; |
---|
8893 | } |
---|
8894 | } |
---|
8895 | else |
---|
8896 | { |
---|
8897 | z = result[0]; |
---|
8898 | if (z < zmin) |
---|
8899 | z = zmin; |
---|
8900 | x = -log(z) * tr->fracchange; |
---|
8901 | } |
---|
8902 | |
---|
8903 | /*printf("%s-%s \t %f\n", tr->nameList[i], tr->nameList[j], x);*/ |
---|
8904 | fprintf(out, "%s %s \t %f\n", tr->nameList[i], tr->nameList[j], x); |
---|
8905 | } |
---|
8906 | |
---|
8907 | fclose(out); |
---|
8908 | |
---|
8909 | t = gettime() - t; |
---|
8910 | |
---|
8911 | printBothOpen("\nTime for pair-wise ML distance computation of %d distances: %f seconds\n", |
---|
8912 | (tr->mxtips * tr->mxtips - tr->mxtips) / 2, t); |
---|
8913 | printBothOpen("\nDistances written to file: %s\n", distanceFileName); |
---|
8914 | |
---|
8915 | |
---|
8916 | |
---|
8917 | exit(0); |
---|
8918 | } |
---|
8919 | |
---|
8920 | |
---|
8921 | |
---|
8922 | static void morphologicalCalibration(tree *tr, analdef *adef) |
---|
8923 | { |
---|
8924 | int |
---|
8925 | replicates = adef->multipleRuns, |
---|
8926 | i, |
---|
8927 | *significanceCounter = (int*)rax_malloc(sizeof(int) * tr->cdta->endsite); |
---|
8928 | |
---|
8929 | double |
---|
8930 | *reference = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
8931 | |
---|
8932 | char |
---|
8933 | integerFileName[1024] = ""; |
---|
8934 | |
---|
8935 | FILE |
---|
8936 | *integerFile; |
---|
8937 | |
---|
8938 | if(replicates == 1) |
---|
8939 | { |
---|
8940 | printBothOpen("You did not specify the number of random trees to be generated by \"-#\" !\n"); |
---|
8941 | printBothOpen("Automatically setting it to 100.\n"); |
---|
8942 | replicates = 100; |
---|
8943 | } |
---|
8944 | |
---|
8945 | printBothOpen("Likelihood on Reference tree: %f\n\n", tr->likelihood); |
---|
8946 | |
---|
8947 | evaluateGenericVector(tr, tr->start); |
---|
8948 | |
---|
8949 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
8950 | significanceCounter[i] = 0; |
---|
8951 | |
---|
8952 | memcpy(reference, tr->perSiteLL, tr->cdta->endsite * sizeof(double)); |
---|
8953 | |
---|
8954 | for(i = 0; i < replicates; i++) |
---|
8955 | { |
---|
8956 | int k; |
---|
8957 | |
---|
8958 | printBothOpen("Testing Random Tree [%d]\n", i); |
---|
8959 | makeRandomTree(tr, adef); |
---|
8960 | evaluateGenericInitrav(tr, tr->start); |
---|
8961 | treeEvaluate(tr, 2); |
---|
8962 | |
---|
8963 | /* |
---|
8964 | don't really need modOpt here |
---|
8965 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
8966 | */ |
---|
8967 | |
---|
8968 | evaluateGenericVector(tr, tr->start); |
---|
8969 | |
---|
8970 | |
---|
8971 | for(k = 0; k < tr->cdta->endsite; k++) |
---|
8972 | if(tr->perSiteLL[k] <= reference[k]) |
---|
8973 | significanceCounter[k] = significanceCounter[k] + 1; |
---|
8974 | } |
---|
8975 | |
---|
8976 | strcpy(integerFileName, workdir); |
---|
8977 | strcat(integerFileName, "RAxML_weights."); |
---|
8978 | strcat(integerFileName, run_id); |
---|
8979 | |
---|
8980 | integerFile = myfopen(integerFileName, "wb"); |
---|
8981 | |
---|
8982 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
8983 | fprintf(integerFile, "%d ", significanceCounter[i]); |
---|
8984 | |
---|
8985 | fclose(integerFile); |
---|
8986 | |
---|
8987 | printBothOpen("RAxML calibrated integer weight file written to: %s\n", integerFileName); |
---|
8988 | |
---|
8989 | exit(0); |
---|
8990 | } |
---|
8991 | |
---|
8992 | |
---|
8993 | |
---|
8994 | |
---|
8995 | static int sortLex(const void *a, const void *b) |
---|
8996 | { |
---|
8997 | int |
---|
8998 | i = 0; |
---|
8999 | |
---|
9000 | char |
---|
9001 | *aPtr = *(char**)a, |
---|
9002 | *bPtr = *(char**)b; |
---|
9003 | |
---|
9004 | while((aPtr[i] != '\0') && (bPtr[i] != '\0') && (aPtr[i] == bPtr[i])) |
---|
9005 | i++; |
---|
9006 | |
---|
9007 | if((aPtr[i] == '\0') || (bPtr[i] == '\0')) |
---|
9008 | return (bPtr[i] == '\0'); |
---|
9009 | |
---|
9010 | return (aPtr[i] > bPtr[i]); |
---|
9011 | } |
---|
9012 | |
---|
9013 | |
---|
9014 | static void extractTaxaFromTopology(tree *tr, rawdata *rdta, cruncheddata *cdta, char fileName[1024]) |
---|
9015 | { |
---|
9016 | FILE |
---|
9017 | *f = myfopen(fileName, "rb"); |
---|
9018 | |
---|
9019 | char |
---|
9020 | **nameList, |
---|
9021 | buffer[nmlngth + 2]; |
---|
9022 | |
---|
9023 | int |
---|
9024 | i = 0, |
---|
9025 | c, |
---|
9026 | taxaSize = 1024, |
---|
9027 | taxaCount = 0; |
---|
9028 | |
---|
9029 | nameList = (char**)rax_malloc(sizeof(char*) * taxaSize); |
---|
9030 | |
---|
9031 | while((c = fgetc(f)) != ';') |
---|
9032 | { |
---|
9033 | if(c == '(' || c == ',') |
---|
9034 | { |
---|
9035 | c = fgetc(f); |
---|
9036 | if(c == '(' || c == ',') |
---|
9037 | ungetc(c, f); |
---|
9038 | else |
---|
9039 | { |
---|
9040 | i = 0; |
---|
9041 | |
---|
9042 | do |
---|
9043 | { |
---|
9044 | buffer[i++] = c; |
---|
9045 | c = fgetc(f); |
---|
9046 | } |
---|
9047 | while(c != ':' && c != ')' && c != ','); |
---|
9048 | buffer[i] = '\0'; |
---|
9049 | |
---|
9050 | if(taxaCount == taxaSize) |
---|
9051 | { |
---|
9052 | taxaSize *= 2; |
---|
9053 | nameList = (char **)rax_realloc(nameList, sizeof(char*) * taxaSize, FALSE); |
---|
9054 | } |
---|
9055 | |
---|
9056 | nameList[taxaCount] = (char*)rax_malloc(sizeof(char) * (strlen(buffer) + 1)); |
---|
9057 | strcpy(nameList[taxaCount], buffer); |
---|
9058 | |
---|
9059 | taxaCount++; |
---|
9060 | |
---|
9061 | ungetc(c, f); |
---|
9062 | } |
---|
9063 | } |
---|
9064 | } |
---|
9065 | |
---|
9066 | |
---|
9067 | /* BEGIN ensuring no taxon occurs twice */ |
---|
9068 | { |
---|
9069 | char |
---|
9070 | **taxList = (char **)rax_malloc(sizeof(char *) * (size_t)taxaCount); |
---|
9071 | |
---|
9072 | for(i = 0; i < taxaCount; ++i) |
---|
9073 | taxList[i] = nameList[i]; |
---|
9074 | |
---|
9075 | qsort(taxList, taxaCount, sizeof(char**), sortLex); |
---|
9076 | |
---|
9077 | for(i = 1; i < taxaCount; ++i) |
---|
9078 | if(strcmp(taxList[i], taxList[i-1]) == 0) |
---|
9079 | { |
---|
9080 | printf("A taxon labelled by %s appears twice in the first tree of tree collection %s, exiting ...\n", buffer, bootStrapFile); |
---|
9081 | exit(-1); |
---|
9082 | } |
---|
9083 | |
---|
9084 | rax_free(taxList); |
---|
9085 | } |
---|
9086 | /* END */ |
---|
9087 | |
---|
9088 | |
---|
9089 | printf("Found a total of %d taxa in first tree of tree collection %s\n", taxaCount, bootStrapFile); |
---|
9090 | printf("Expecting all remaining trees in collection to have the same taxon set\n"); |
---|
9091 | |
---|
9092 | rdta->numsp = taxaCount; |
---|
9093 | |
---|
9094 | tr->nameList = (char **)rax_malloc(sizeof(char *) * (taxaCount + 1)); |
---|
9095 | for(i = 1; i <= taxaCount; i++) |
---|
9096 | tr->nameList[i] = nameList[i - 1]; |
---|
9097 | |
---|
9098 | rax_free(nameList); |
---|
9099 | |
---|
9100 | tr->rdta = rdta; |
---|
9101 | tr->cdta = cdta; |
---|
9102 | |
---|
9103 | if (rdta->numsp < 4) |
---|
9104 | { |
---|
9105 | printf("TOO FEW SPECIES, tree contains only %d species\n", rdta->numsp); |
---|
9106 | assert(0); |
---|
9107 | } |
---|
9108 | |
---|
9109 | tr->nameHash = initStringHashTable(10 * taxaCount); |
---|
9110 | for(i = 1; i <= taxaCount; i++) |
---|
9111 | addword(tr->nameList[i], tr->nameHash, i); |
---|
9112 | |
---|
9113 | fclose(f); |
---|
9114 | } |
---|
9115 | |
---|
9116 | |
---|
9117 | static void myfwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) |
---|
9118 | { |
---|
9119 | size_t |
---|
9120 | bytes_written = fwrite(ptr, size, nmemb, stream); |
---|
9121 | |
---|
9122 | assert(bytes_written = nmemb); |
---|
9123 | } |
---|
9124 | |
---|
9125 | |
---|
9126 | static void writeLG4(tree *tr, int model, int dataType, FILE *f, partitionLengths p_lengths[MAX_MODEL]) |
---|
9127 | { |
---|
9128 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
9129 | { |
---|
9130 | int |
---|
9131 | k; |
---|
9132 | |
---|
9133 | for(k = 0; k < 4; k++) |
---|
9134 | { |
---|
9135 | myfwrite(tr->partitionData[model].EIGN_LG4[k], sizeof(double), p_lengths[dataType].eignLength, f); |
---|
9136 | myfwrite(tr->partitionData[model].EV_LG4[k], sizeof(double), p_lengths[dataType].evLength, f); |
---|
9137 | myfwrite(tr->partitionData[model].EI_LG4[k], sizeof(double), p_lengths[dataType].eiLength, f); |
---|
9138 | myfwrite(tr->partitionData[model].frequencies_LG4[k], sizeof(double), p_lengths[dataType].frequenciesLength, f); |
---|
9139 | myfwrite(tr->partitionData[model].tipVector_LG4[k], sizeof(double), p_lengths[dataType].tipVectorLength, f); |
---|
9140 | myfwrite(tr->partitionData[model].substRates_LG4[k], sizeof(double), p_lengths[dataType].substRatesLength, f); |
---|
9141 | } |
---|
9142 | } |
---|
9143 | } |
---|
9144 | |
---|
9145 | |
---|
9146 | void writeBinaryModel(tree *tr) |
---|
9147 | { |
---|
9148 | int |
---|
9149 | model; |
---|
9150 | |
---|
9151 | FILE |
---|
9152 | *f = myfopen(binaryModelParamsOutputFileName, "w"); |
---|
9153 | |
---|
9154 | /* cdta */ |
---|
9155 | |
---|
9156 | myfwrite(tr->cdta->rateCategory, sizeof(int), tr->rdta->sites + 1, f); |
---|
9157 | myfwrite(tr->cdta->patrat, sizeof(double), tr->rdta->sites + 1, f); |
---|
9158 | myfwrite(tr->cdta->patratStored, sizeof(double), tr->rdta->sites + 1, f); |
---|
9159 | |
---|
9160 | /* partition contributions for fracchange */ |
---|
9161 | |
---|
9162 | myfwrite(tr->partitionContributions, sizeof(double), tr->NumberOfModels, f); |
---|
9163 | |
---|
9164 | /* fracchange */ |
---|
9165 | |
---|
9166 | myfwrite(&tr->fracchange, sizeof(double), 1, f); |
---|
9167 | myfwrite(tr->fracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9168 | |
---|
9169 | myfwrite(&tr->rawFracchange, sizeof(double), 1, f); |
---|
9170 | myfwrite(tr->rawFracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9171 | |
---|
9172 | /* pInfo */ |
---|
9173 | |
---|
9174 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
9175 | { |
---|
9176 | int |
---|
9177 | dataType = tr->partitionData[model].dataType; |
---|
9178 | |
---|
9179 | myfwrite(tr->partitionData[model].weightExponents, sizeof(double), 4, f); |
---|
9180 | myfwrite(tr->partitionData[model].weights, sizeof(double), 4, f); |
---|
9181 | |
---|
9182 | myfwrite(tr->partitionData[model].gammaRates, sizeof(double), 4, f); |
---|
9183 | |
---|
9184 | myfwrite(tr->partitionData[model].EIGN, sizeof(double), pLengths[dataType].eignLength, f); |
---|
9185 | myfwrite(tr->partitionData[model].EV, sizeof(double), pLengths[dataType].evLength, f); |
---|
9186 | myfwrite(tr->partitionData[model].EI, sizeof(double), pLengths[dataType].eiLength, f); |
---|
9187 | |
---|
9188 | myfwrite(tr->partitionData[model].frequencies, sizeof(double), pLengths[dataType].frequenciesLength, f); |
---|
9189 | myfwrite(tr->partitionData[model].tipVector, sizeof(double), pLengths[dataType].tipVectorLength, f); |
---|
9190 | myfwrite(tr->partitionData[model].substRates, sizeof(double), pLengths[dataType].substRatesLength, f); |
---|
9191 | myfwrite(&(tr->partitionData[model].alpha), sizeof(double), 1, f); |
---|
9192 | myfwrite(&(tr->partitionData[model].propInvariant), sizeof(double), 1, f); |
---|
9193 | |
---|
9194 | myfwrite(&(tr->partitionData[model].numberOfCategories), sizeof(int), 1, f); |
---|
9195 | |
---|
9196 | myfwrite(&(tr->partitionData[model].protModels), sizeof(int), 1, f); |
---|
9197 | myfwrite(&(tr->partitionData[model].autoProtModels), sizeof(int), 1, f); |
---|
9198 | |
---|
9199 | myfwrite(tr->partitionData[model].perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9200 | myfwrite(tr->partitionData[model].unscaled_perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9201 | |
---|
9202 | writeLG4(tr, model, dataType, f, pLengths); |
---|
9203 | } |
---|
9204 | |
---|
9205 | printBothOpen("\nModel parameters (binary file format) written to: %s\n", binaryModelParamsOutputFileName); |
---|
9206 | |
---|
9207 | fclose(f); |
---|
9208 | } |
---|
9209 | |
---|
9210 | static void myfread(void *ptr, size_t size, size_t nmemb, FILE *stream) |
---|
9211 | { |
---|
9212 | size_t |
---|
9213 | bytes_read; |
---|
9214 | |
---|
9215 | bytes_read = fread(ptr, size, nmemb, stream); |
---|
9216 | |
---|
9217 | assert(bytes_read == nmemb); |
---|
9218 | } |
---|
9219 | |
---|
9220 | |
---|
9221 | static void readLG4(tree *tr, int model, int dataType, FILE *f, partitionLengths p_lengths[MAX_MODEL]) |
---|
9222 | { |
---|
9223 | if(tr->partitionData[model].protModels == LG4 || tr->partitionData[model].protModels == LG4X) |
---|
9224 | { |
---|
9225 | int |
---|
9226 | k; |
---|
9227 | |
---|
9228 | for(k = 0; k < 4; k++) |
---|
9229 | { |
---|
9230 | myfread(tr->partitionData[model].EIGN_LG4[k], sizeof(double), p_lengths[dataType].eignLength, f); |
---|
9231 | myfread(tr->partitionData[model].EV_LG4[k], sizeof(double), p_lengths[dataType].evLength, f); |
---|
9232 | myfread(tr->partitionData[model].EI_LG4[k], sizeof(double), p_lengths[dataType].eiLength, f); |
---|
9233 | myfread(tr->partitionData[model].frequencies_LG4[k], sizeof(double), p_lengths[dataType].frequenciesLength, f); |
---|
9234 | myfread(tr->partitionData[model].tipVector_LG4[k], sizeof(double), p_lengths[dataType].tipVectorLength, f); |
---|
9235 | myfread(tr->partitionData[model].substRates_LG4[k], sizeof(double), p_lengths[dataType].substRatesLength, f); |
---|
9236 | } |
---|
9237 | } |
---|
9238 | } |
---|
9239 | |
---|
9240 | void readBinaryModel(tree *tr) |
---|
9241 | { |
---|
9242 | FILE |
---|
9243 | *f; |
---|
9244 | |
---|
9245 | |
---|
9246 | printBothOpen("\nRAxML is reading a binary model file and not optimizing model params\n"); |
---|
9247 | |
---|
9248 | f = fopen(binaryModelParamsInputFileName, "r"); |
---|
9249 | |
---|
9250 | /* cdta */ |
---|
9251 | |
---|
9252 | myfread(tr->cdta->rateCategory, sizeof(int), (size_t)(tr->rdta->sites + 1), f); |
---|
9253 | myfread(tr->cdta->patrat, sizeof(double), (size_t)(tr->rdta->sites + 1), f); |
---|
9254 | myfread(tr->cdta->patratStored, sizeof(double), (size_t)(tr->rdta->sites + 1), f); |
---|
9255 | |
---|
9256 | /* partition contributions for fracchange */ |
---|
9257 | |
---|
9258 | myfread(tr->partitionContributions, sizeof(double), tr->NumberOfModels, f); |
---|
9259 | |
---|
9260 | /* fracchange */ |
---|
9261 | |
---|
9262 | myfread(&tr->fracchange, sizeof(double), 1, f); |
---|
9263 | myfread(tr->fracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9264 | |
---|
9265 | myfread(&tr->rawFracchange, sizeof(double), 1, f); |
---|
9266 | myfread(tr->rawFracchanges, sizeof(double), (size_t)tr->NumberOfModels, f); |
---|
9267 | |
---|
9268 | /* pInfo */ |
---|
9269 | |
---|
9270 | for(int model = 0; model < tr->NumberOfModels; model++) |
---|
9271 | { |
---|
9272 | int |
---|
9273 | dataType = tr->partitionData[model].dataType; |
---|
9274 | |
---|
9275 | myfread(tr->partitionData[model].weightExponents, sizeof(double), 4, f); |
---|
9276 | myfread(tr->partitionData[model].weights, sizeof(double), 4, f); |
---|
9277 | |
---|
9278 | myfread(tr->partitionData[model].gammaRates, sizeof(double), 4, f); |
---|
9279 | |
---|
9280 | myfread(tr->partitionData[model].EIGN, sizeof(double), (size_t)(pLengths[dataType].eignLength), f); |
---|
9281 | myfread(tr->partitionData[model].EV, sizeof(double), (size_t)(pLengths[dataType].evLength), f); |
---|
9282 | myfread(tr->partitionData[model].EI, sizeof(double), (size_t)(pLengths[dataType].eiLength), f); |
---|
9283 | |
---|
9284 | myfread(tr->partitionData[model].frequencies, sizeof(double), (size_t)(pLengths[dataType].frequenciesLength), f); |
---|
9285 | myfread(tr->partitionData[model].tipVector, sizeof(double), (size_t)(pLengths[dataType].tipVectorLength), f); |
---|
9286 | myfread(tr->partitionData[model].substRates, sizeof(double), (size_t)(pLengths[dataType].substRatesLength), f); |
---|
9287 | |
---|
9288 | myfread(&(tr->partitionData[model].alpha), sizeof(double), 1, f); |
---|
9289 | myfread(&(tr->partitionData[model].propInvariant), sizeof(double), 1, f); |
---|
9290 | |
---|
9291 | myfread(&(tr->partitionData[model].numberOfCategories), sizeof(int), 1, f); |
---|
9292 | |
---|
9293 | myfread(&(tr->partitionData[model].protModels), sizeof(int), 1, f); |
---|
9294 | myfread(&(tr->partitionData[model].autoProtModels), sizeof(int), 1, f); |
---|
9295 | |
---|
9296 | myfread(tr->partitionData[model].perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9297 | myfread(tr->partitionData[model].unscaled_perSiteRates, sizeof(double), tr->partitionData[model].numberOfCategories, f); |
---|
9298 | |
---|
9299 | readLG4(tr, model, dataType, f, pLengths); |
---|
9300 | } |
---|
9301 | |
---|
9302 | #ifdef _USE_PTHREADS |
---|
9303 | masterBarrier(THREAD_COPY_INIT_MODEL, tr); |
---|
9304 | //masterBarrier(THREAD_RESET_MODEL, tr); |
---|
9305 | #endif |
---|
9306 | |
---|
9307 | if(tr->rateHetModel == CAT) |
---|
9308 | { |
---|
9309 | #ifdef _USE_PTHREADS |
---|
9310 | masterBarrier(THREAD_COPY_RATE_CATS, tr); |
---|
9311 | #else |
---|
9312 | { |
---|
9313 | size_t |
---|
9314 | i; |
---|
9315 | int |
---|
9316 | model; |
---|
9317 | |
---|
9318 | for(model = 0; model < tr->NumberOfModels; model++) |
---|
9319 | { |
---|
9320 | int |
---|
9321 | localCounter = 0; |
---|
9322 | |
---|
9323 | for(i = tr->partitionData[model].lower; i < tr->partitionData[model].upper; i++, localCounter++) |
---|
9324 | tr->partitionData[model].rateCategory[localCounter] = tr->cdta->rateCategory[i]; |
---|
9325 | } |
---|
9326 | } |
---|
9327 | #endif |
---|
9328 | } |
---|
9329 | |
---|
9330 | fclose(f); |
---|
9331 | } |
---|
9332 | |
---|
9333 | |
---|
9334 | |
---|
9335 | |
---|
9336 | static int iterated_bitcount(unsigned int n) |
---|
9337 | { |
---|
9338 | int |
---|
9339 | count=0; |
---|
9340 | |
---|
9341 | while(n) |
---|
9342 | { |
---|
9343 | count += n & 0x1u ; |
---|
9344 | n >>= 1 ; |
---|
9345 | } |
---|
9346 | |
---|
9347 | return count; |
---|
9348 | } |
---|
9349 | |
---|
9350 | static char bits_in_16bits [0x1u << 16]; |
---|
9351 | |
---|
9352 | static void compute_bits_in_16bits(void) |
---|
9353 | { |
---|
9354 | unsigned int i; |
---|
9355 | |
---|
9356 | assert(sizeof(unsigned int) == 4); |
---|
9357 | |
---|
9358 | for (i = 0; i < (0x1u<<16); i++) |
---|
9359 | bits_in_16bits[i] = iterated_bitcount(i); |
---|
9360 | |
---|
9361 | return ; |
---|
9362 | } |
---|
9363 | |
---|
9364 | unsigned int precomputed16_bitcount (unsigned int n) |
---|
9365 | { |
---|
9366 | /* works only for 32-bit int*/ |
---|
9367 | |
---|
9368 | return bits_in_16bits [n & 0xffffu] |
---|
9369 | + bits_in_16bits [(n >> 16) & 0xffffu] ; |
---|
9370 | } |
---|
9371 | |
---|
9372 | /* functions to compute likelihoods on quartets */ |
---|
9373 | |
---|
9374 | |
---|
9375 | /* a parser error function */ |
---|
9376 | |
---|
9377 | static void parseError(int c) |
---|
9378 | { |
---|
9379 | printf("Quartet grouping parser expecting symbol: %c\n", c); |
---|
9380 | assert(0); |
---|
9381 | } |
---|
9382 | |
---|
9383 | /* parser for the taxon grouping format, one has to specify 4 groups in a newick-like |
---|
9384 | format from which quartets (a substantially smaller number compared to ungrouped quartets) |
---|
9385 | will be drawn */ |
---|
9386 | |
---|
9387 | static void groupingParser(char *quartetGroupFileName, int *groups[4], int groupSize[4], tree *tr) |
---|
9388 | { |
---|
9389 | FILE |
---|
9390 | *f = myfopen(quartetGroupFileName, "r"); |
---|
9391 | |
---|
9392 | int |
---|
9393 | taxonCounter = 0, |
---|
9394 | n, |
---|
9395 | state = 0, |
---|
9396 | groupCounter = 0, |
---|
9397 | ch, |
---|
9398 | i; |
---|
9399 | |
---|
9400 | printf("%s\n", quartetGroupFileName); |
---|
9401 | |
---|
9402 | for(i = 0; i < 4; i++) |
---|
9403 | { |
---|
9404 | groups[i] = (int*)rax_malloc(sizeof(int) * (tr->mxtips + 1)); |
---|
9405 | groupSize[i] = 0; |
---|
9406 | } |
---|
9407 | |
---|
9408 | while((ch = getc(f)) != EOF) |
---|
9409 | { |
---|
9410 | if(!whitechar(ch)) |
---|
9411 | { |
---|
9412 | switch(state) |
---|
9413 | { |
---|
9414 | case 0: |
---|
9415 | if(ch != '(') |
---|
9416 | parseError('('); |
---|
9417 | state = 1; |
---|
9418 | break; |
---|
9419 | case 1: |
---|
9420 | ungetc(ch, f); |
---|
9421 | n = treeFindTipName(f, tr, FALSE); |
---|
9422 | if(n <= 0 || n > tr->mxtips) |
---|
9423 | printf("parsing error, raxml is expecting to read a taxon name, found \"%c\" instead\n", ch); |
---|
9424 | assert(n > 0 && n <= tr->mxtips); |
---|
9425 | taxonCounter++; |
---|
9426 | groups[groupCounter][groupSize[groupCounter]] = n; |
---|
9427 | groupSize[groupCounter] = groupSize[groupCounter] + 1; |
---|
9428 | state = 2; |
---|
9429 | break; |
---|
9430 | case 2: |
---|
9431 | if(ch == ',') |
---|
9432 | state = 1; |
---|
9433 | else |
---|
9434 | { |
---|
9435 | if(ch == ')') |
---|
9436 | { |
---|
9437 | groupCounter++; |
---|
9438 | state = 3; |
---|
9439 | } |
---|
9440 | else |
---|
9441 | parseError('?'); |
---|
9442 | } |
---|
9443 | break; |
---|
9444 | case 3: |
---|
9445 | if(groupCounter == 4) |
---|
9446 | { |
---|
9447 | if(ch == ';') |
---|
9448 | state = 4; |
---|
9449 | else |
---|
9450 | parseError(';'); |
---|
9451 | } |
---|
9452 | else |
---|
9453 | { |
---|
9454 | if(ch != ',') |
---|
9455 | parseError(','); |
---|
9456 | state = 0; |
---|
9457 | } |
---|
9458 | break; |
---|
9459 | case 4: |
---|
9460 | printf("Error: extra char after ; %c\n", ch); |
---|
9461 | assert(0); |
---|
9462 | default: |
---|
9463 | assert(0); |
---|
9464 | } |
---|
9465 | } |
---|
9466 | } |
---|
9467 | |
---|
9468 | assert(state == 4); |
---|
9469 | assert(groupCounter == 4); |
---|
9470 | assert(taxonCounter == tr->mxtips); |
---|
9471 | |
---|
9472 | printBothOpen("Successfully parsed quartet groups\n\n"); |
---|
9473 | |
---|
9474 | /* print out the taxa that have been assigned to the 4 groups */ |
---|
9475 | |
---|
9476 | for(i = 0; i < 4; i++) |
---|
9477 | { |
---|
9478 | int |
---|
9479 | j; |
---|
9480 | |
---|
9481 | printBothOpen("group %d has %d members\n", i, groupSize[i]); |
---|
9482 | |
---|
9483 | for(j = 0; j < groupSize[i]; j++) |
---|
9484 | printBothOpen("%s\n", tr->nameList[groups[i][j]]); |
---|
9485 | |
---|
9486 | printBothOpen("\n"); |
---|
9487 | } |
---|
9488 | |
---|
9489 | fclose(f); |
---|
9490 | } |
---|
9491 | |
---|
9492 | |
---|
9493 | static double quartetLikelihood(tree *tr, nodeptr p1, nodeptr p2, nodeptr p3, nodeptr p4, nodeptr q1, nodeptr q2) |
---|
9494 | { |
---|
9495 | /* |
---|
9496 | build a quartet tree, where q1 and q2 are the inner nodes and p1, p2, p3, p4 |
---|
9497 | are the tips of the quartet where the sequence data is located. |
---|
9498 | |
---|
9499 | initially set all branch lengths to the default value. |
---|
9500 | */ |
---|
9501 | |
---|
9502 | /* |
---|
9503 | for the tree and node data structure used, please see one of the last chapter's of Joe |
---|
9504 | Felsensteins book. |
---|
9505 | */ |
---|
9506 | |
---|
9507 | hookupDefault(q1, q2, tr->numBranches); |
---|
9508 | |
---|
9509 | hookupDefault(q1->next, p1, tr->numBranches); |
---|
9510 | hookupDefault(q1->next->next, p2, tr->numBranches); |
---|
9511 | |
---|
9512 | hookupDefault(q2->next, p3, tr->numBranches); |
---|
9513 | hookupDefault(q2->next->next, p4, tr->numBranches); |
---|
9514 | |
---|
9515 | /* now compute the likelihood vectors at the two inner nodes of the tree, |
---|
9516 | here the virtual root is located between the two inner nodes q1 and q2. |
---|
9517 | */ |
---|
9518 | |
---|
9519 | newviewGeneric(tr, q1); |
---|
9520 | newviewGeneric(tr, q2); |
---|
9521 | |
---|
9522 | /* call a function that is also used for NNIs that iteratively optimizes all |
---|
9523 | 5 branch lengths in the tree. |
---|
9524 | |
---|
9525 | Note that 16 is an important tuning parameter, this integer value determines |
---|
9526 | how many times we visit all branches until we give up further optimizing the branch length |
---|
9527 | configuration. |
---|
9528 | */ |
---|
9529 | |
---|
9530 | nniSmooth(tr, q1, 16); |
---|
9531 | |
---|
9532 | /* now compute the log likelihood of the tree for the virtual root located between inner nodes q1 and q2 */ |
---|
9533 | |
---|
9534 | /* debugging code |
---|
9535 | { |
---|
9536 | double l; |
---|
9537 | */ |
---|
9538 | |
---|
9539 | evaluateGeneric(tr, q1->back->next->next); |
---|
9540 | |
---|
9541 | /* debugging code |
---|
9542 | |
---|
9543 | l = tr->likelihood; |
---|
9544 | |
---|
9545 | newviewGeneric(tr, q1); |
---|
9546 | newviewGeneric(tr, q2); |
---|
9547 | evaluateGeneric(tr, q1); |
---|
9548 | |
---|
9549 | |
---|
9550 | assert(ABS(l - tr->likelihood) < 0.00001); |
---|
9551 | } |
---|
9552 | */ |
---|
9553 | |
---|
9554 | return (tr->likelihood); |
---|
9555 | } |
---|
9556 | |
---|
9557 | #ifdef _QUARTET_MPI |
---|
9558 | |
---|
9559 | typedef struct |
---|
9560 | { |
---|
9561 | int a1; |
---|
9562 | int b1; |
---|
9563 | int c1; |
---|
9564 | int d1; |
---|
9565 | |
---|
9566 | int a2; |
---|
9567 | int b2; |
---|
9568 | int c2; |
---|
9569 | int d2; |
---|
9570 | |
---|
9571 | int a3; |
---|
9572 | int b3; |
---|
9573 | int c3; |
---|
9574 | int d3; |
---|
9575 | |
---|
9576 | double l1; |
---|
9577 | double l2; |
---|
9578 | double l3; |
---|
9579 | } quartetResult; |
---|
9580 | |
---|
9581 | #define QUARTET_MESSAGE_SIZE sizeof(quartetResult) |
---|
9582 | #define QUARTET_MESSAGE 0 |
---|
9583 | #define I_AM_DONE 1 |
---|
9584 | |
---|
9585 | static void startQuartetMaster(tree *tr, FILE *f) |
---|
9586 | { |
---|
9587 | quartetResult |
---|
9588 | *qr = (quartetResult *)rax_malloc(sizeof(quartetResult)); |
---|
9589 | |
---|
9590 | MPI_Status |
---|
9591 | status, |
---|
9592 | recvStatus; |
---|
9593 | |
---|
9594 | int |
---|
9595 | dummy, |
---|
9596 | workersDone = 0; |
---|
9597 | |
---|
9598 | assert(processID == 0); |
---|
9599 | |
---|
9600 | while(1) |
---|
9601 | { |
---|
9602 | MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); |
---|
9603 | |
---|
9604 | switch(status.MPI_TAG) |
---|
9605 | { |
---|
9606 | case QUARTET_MESSAGE: |
---|
9607 | MPI_Recv((void *)(qr), QUARTET_MESSAGE_SIZE, MPI_BYTE, status.MPI_SOURCE, QUARTET_MESSAGE, MPI_COMM_WORLD, &recvStatus); |
---|
9608 | fprintf(f, "%d %d | %d %d: %f\n", qr->a1, qr->b1, qr->c1, qr->d1, qr->l1); |
---|
9609 | fprintf(f, "%d %d | %d %d: %f\n", qr->a2, qr->b2, qr->c2, qr->d2, qr->l2); |
---|
9610 | fprintf(f, "%d %d | %d %d: %f\n", qr->a3, qr->b3, qr->c3, qr->d3, qr->l3); |
---|
9611 | break; |
---|
9612 | case I_AM_DONE: |
---|
9613 | MPI_Recv(&dummy, 1, MPI_INT, status.MPI_SOURCE, I_AM_DONE, MPI_COMM_WORLD, &recvStatus); |
---|
9614 | workersDone++; |
---|
9615 | if(workersDone == processes -1) |
---|
9616 | goto END_IT; |
---|
9617 | break; |
---|
9618 | default: |
---|
9619 | assert(0); |
---|
9620 | } |
---|
9621 | } |
---|
9622 | |
---|
9623 | END_IT: |
---|
9624 | rax_free(qr); |
---|
9625 | return; |
---|
9626 | } |
---|
9627 | |
---|
9628 | #endif |
---|
9629 | |
---|
9630 | static void computeAllThreeQuartets(tree *tr, nodeptr q1, nodeptr q2, int t1, int t2, int t3, int t4, FILE *f) |
---|
9631 | { |
---|
9632 | /* set the tip nodes to different sequences |
---|
9633 | with the tip indices t1, t2, t3, t4 */ |
---|
9634 | |
---|
9635 | nodeptr |
---|
9636 | p1 = tr->nodep[t1], |
---|
9637 | p2 = tr->nodep[t2], |
---|
9638 | p3 = tr->nodep[t3], |
---|
9639 | p4 = tr->nodep[t4]; |
---|
9640 | |
---|
9641 | double |
---|
9642 | l; |
---|
9643 | |
---|
9644 | #ifdef _QUARTET_MPI |
---|
9645 | quartetResult |
---|
9646 | *qr = (quartetResult *)rax_malloc(sizeof(quartetResult)); |
---|
9647 | #endif |
---|
9648 | |
---|
9649 | /* first quartet */ |
---|
9650 | |
---|
9651 | /* compute the likelihood of tree ((p1, p2), (p3, p4)) */ |
---|
9652 | |
---|
9653 | l = quartetLikelihood(tr, p1, p2, p3, p4, q1, q2); |
---|
9654 | |
---|
9655 | #ifndef _QUARTET_MPI |
---|
9656 | fprintf(f, "%d %d | %d %d: %f\n", p1->number, p2->number, p3->number, p4->number, l); |
---|
9657 | #else |
---|
9658 | qr->a1 = p1->number; |
---|
9659 | qr->b1 = p2->number; |
---|
9660 | qr->c1 = p3->number; |
---|
9661 | qr->d1 = p4->number; |
---|
9662 | qr->l1 = l; |
---|
9663 | #endif |
---|
9664 | /* second quartet */ |
---|
9665 | |
---|
9666 | /* compute the likelihood of tree ((p1, p3), (p2, p4)) */ |
---|
9667 | |
---|
9668 | l = quartetLikelihood(tr, p1, p3, p2, p4, q1, q2); |
---|
9669 | |
---|
9670 | #ifndef _QUARTET_MPI |
---|
9671 | fprintf(f, "%d %d | %d %d: %f\n", p1->number, p3->number, p2->number, p4->number, l); |
---|
9672 | #else |
---|
9673 | qr->a2 = p1->number; |
---|
9674 | qr->b2 = p3->number; |
---|
9675 | qr->c2 = p2->number; |
---|
9676 | qr->d2 = p4->number; |
---|
9677 | qr->l2 = l; |
---|
9678 | #endif |
---|
9679 | /* third quartet */ |
---|
9680 | |
---|
9681 | /* compute the likelihood of tree ((p1, p4), (p2, p3)) */ |
---|
9682 | |
---|
9683 | l = quartetLikelihood(tr, p1, p4, p2, p3, q1, q2); |
---|
9684 | |
---|
9685 | #ifndef _QUARTET_MPI |
---|
9686 | fprintf(f, "%d %d | %d %d: %f\n", p1->number, p4->number, p2->number, p3->number, l); |
---|
9687 | #else |
---|
9688 | qr->a3 = p1->number; |
---|
9689 | qr->b3 = p4->number; |
---|
9690 | qr->c3 = p2->number; |
---|
9691 | qr->d3 = p3->number; |
---|
9692 | qr->l3 = l; |
---|
9693 | |
---|
9694 | MPI_Send((void *)qr, QUARTET_MESSAGE_SIZE, MPI_BYTE, 0, QUARTET_MESSAGE, MPI_COMM_WORLD); |
---|
9695 | |
---|
9696 | assert(processID > 0); |
---|
9697 | rax_free(qr); |
---|
9698 | #endif |
---|
9699 | } |
---|
9700 | |
---|
9701 | /* the three quartet options: all quartets, randomly sub-sample a certain number n of quartets, |
---|
9702 | subsample all quartets from 4 pre-defined groups of quartets */ |
---|
9703 | |
---|
9704 | #define ALL_QUARTETS 0 |
---|
9705 | #define RANDOM_QUARTETS 1 |
---|
9706 | #define GROUPED_QUARTETS 2 |
---|
9707 | |
---|
9708 | |
---|
9709 | |
---|
9710 | static void computeQuartets(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
9711 | { |
---|
9712 | /* some indices for generating quartets in an arbitrary way */ |
---|
9713 | |
---|
9714 | int |
---|
9715 | flavor = ALL_QUARTETS, |
---|
9716 | i, |
---|
9717 | t1, |
---|
9718 | t2, |
---|
9719 | t3, |
---|
9720 | t4, |
---|
9721 | *groups[4], |
---|
9722 | groupSize[4]; |
---|
9723 | |
---|
9724 | double |
---|
9725 | fraction = 0.0, |
---|
9726 | t; |
---|
9727 | |
---|
9728 | unsigned long int |
---|
9729 | randomQuartets = (unsigned long int)(adef->multipleRuns), |
---|
9730 | quartetCounter = 0, |
---|
9731 | numberOfQuartets = ((unsigned long int)tr->mxtips * ((unsigned long int)tr->mxtips - 1) * ((unsigned long int)tr->mxtips - 2) * ((unsigned long int)tr->mxtips - 3)) / 24; |
---|
9732 | |
---|
9733 | /* use two inner nodes for building quartet trees */ |
---|
9734 | |
---|
9735 | nodeptr |
---|
9736 | q1 = tr->nodep[tr->mxtips + 1], |
---|
9737 | q2 = tr->nodep[tr->mxtips + 2]; |
---|
9738 | |
---|
9739 | char |
---|
9740 | quartetFileName[1024]; |
---|
9741 | |
---|
9742 | FILE |
---|
9743 | *f; |
---|
9744 | |
---|
9745 | /* build output file name */ |
---|
9746 | |
---|
9747 | strcpy(quartetFileName, workdir); |
---|
9748 | strcat(quartetFileName, "RAxML_quartets."); |
---|
9749 | strcat(quartetFileName, run_id); |
---|
9750 | |
---|
9751 | /* open output file */ |
---|
9752 | |
---|
9753 | |
---|
9754 | |
---|
9755 | #ifdef _QUARTET_MPI |
---|
9756 | if(processID == 0) |
---|
9757 | #endif |
---|
9758 | f = myfopen(quartetFileName, "w"); |
---|
9759 | |
---|
9760 | /* initialize model parameters */ |
---|
9761 | |
---|
9762 | initModel(tr, rdta, cdta, adef); |
---|
9763 | |
---|
9764 | |
---|
9765 | |
---|
9766 | if(!adef->useBinaryModelFile) |
---|
9767 | { |
---|
9768 | #ifdef _QUARTET_MPI |
---|
9769 | assert(0); |
---|
9770 | #endif |
---|
9771 | |
---|
9772 | /* get a starting tree: either reads in a tree or computes a randomized stepwise addition parsimony tree */ |
---|
9773 | |
---|
9774 | getStartingTree(tr, adef); |
---|
9775 | |
---|
9776 | /* optimize model parameters on that comprehensive tree that can subsequently be used for qyartet building */ |
---|
9777 | |
---|
9778 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
9779 | |
---|
9780 | printBothOpen("Time for parsing input tree or building parsimony tree and optimizing model parameters: %f\n\n", gettime() - masterTime); |
---|
9781 | } |
---|
9782 | else |
---|
9783 | { |
---|
9784 | readBinaryModel(tr); |
---|
9785 | |
---|
9786 | printBothOpen("Time for reading model parameters: %f\n\n", gettime() - masterTime); |
---|
9787 | } |
---|
9788 | |
---|
9789 | |
---|
9790 | /* figure out which flavor of quartets we want to compute */ |
---|
9791 | |
---|
9792 | if(adef->useQuartetGrouping) |
---|
9793 | { |
---|
9794 | flavor = GROUPED_QUARTETS; |
---|
9795 | groupingParser(quartetGroupingFileName, groups, groupSize, tr); |
---|
9796 | } |
---|
9797 | else |
---|
9798 | { |
---|
9799 | if(randomQuartets > numberOfQuartets) |
---|
9800 | randomQuartets = 1; |
---|
9801 | |
---|
9802 | if(randomQuartets == 1) |
---|
9803 | flavor = ALL_QUARTETS; |
---|
9804 | else |
---|
9805 | { |
---|
9806 | fraction = (double)randomQuartets / (double)numberOfQuartets; |
---|
9807 | flavor = RANDOM_QUARTETS; |
---|
9808 | } |
---|
9809 | } |
---|
9810 | |
---|
9811 | /* print some output on what we are doing*/ |
---|
9812 | |
---|
9813 | switch(flavor) |
---|
9814 | { |
---|
9815 | case ALL_QUARTETS: |
---|
9816 | printBothOpen("There are %u quartet sets for which RAxML will evaluate all %u quartet trees\n", numberOfQuartets, numberOfQuartets * 3); |
---|
9817 | break; |
---|
9818 | case RANDOM_QUARTETS: |
---|
9819 | printBothOpen("There are %u quartet sets for which RAxML will randomly sub-sambple %u sets (%f per cent), i.e., compute %u quartet trees\n", numberOfQuartets, randomQuartets, 100 * fraction, randomQuartets * 3); |
---|
9820 | break; |
---|
9821 | case GROUPED_QUARTETS: |
---|
9822 | printBothOpen("There are 4 quartet groups from which RAxML will evaluate all %u quartet trees\n", (unsigned int)groupSize[0] * (unsigned int)groupSize[1] * (unsigned int)groupSize[2] * (unsigned int)groupSize[3] * 3); |
---|
9823 | break; |
---|
9824 | default: |
---|
9825 | assert(0); |
---|
9826 | } |
---|
9827 | |
---|
9828 | /* print taxon name to taxon number correspondance table to output file */ |
---|
9829 | #ifdef _QUARTET_MPI |
---|
9830 | if(processID == 0) |
---|
9831 | #endif |
---|
9832 | { |
---|
9833 | fprintf(f, "Taxon names and indices:\n\n"); |
---|
9834 | |
---|
9835 | for(i = 1; i <= tr->mxtips; i++) |
---|
9836 | { |
---|
9837 | fprintf(f, "%s %d\n", tr->nameList[i], i); |
---|
9838 | assert(tr->nodep[i]->number == i); |
---|
9839 | } |
---|
9840 | |
---|
9841 | fprintf(f, "\n\n"); |
---|
9842 | } |
---|
9843 | |
---|
9844 | |
---|
9845 | t = gettime(); |
---|
9846 | |
---|
9847 | /* do a loop to generate some quartets to test. |
---|
9848 | note that tip nodes/sequences in RAxML are indexed from 1,...,n |
---|
9849 | and not from 0,...,n-1 as one might expect |
---|
9850 | |
---|
9851 | tr->mxtips is the maximum number of tips in the alignment/tree |
---|
9852 | */ |
---|
9853 | |
---|
9854 | #ifdef _QUARTET_MPI |
---|
9855 | if(processID > 0) |
---|
9856 | #endif |
---|
9857 | { |
---|
9858 | switch(flavor) |
---|
9859 | { |
---|
9860 | case ALL_QUARTETS: |
---|
9861 | { |
---|
9862 | assert(randomQuartets == 1); |
---|
9863 | |
---|
9864 | /* compute all possible quartets */ |
---|
9865 | |
---|
9866 | for(t1 = 1; t1 <= tr->mxtips; t1++) |
---|
9867 | for(t2 = t1 + 1; t2 <= tr->mxtips; t2++) |
---|
9868 | for(t3 = t2 + 1; t3 <= tr->mxtips; t3++) |
---|
9869 | for(t4 = t3 + 1; t4 <= tr->mxtips; t4++) |
---|
9870 | { |
---|
9871 | #ifdef _QUARTET_MPI |
---|
9872 | if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1)) |
---|
9873 | #endif |
---|
9874 | computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f); |
---|
9875 | quartetCounter++; |
---|
9876 | } |
---|
9877 | |
---|
9878 | assert(quartetCounter == numberOfQuartets); |
---|
9879 | } |
---|
9880 | break; |
---|
9881 | case RANDOM_QUARTETS: |
---|
9882 | { |
---|
9883 | /* randomly sub-sample a fraction of all quartets */ |
---|
9884 | |
---|
9885 | for(t1 = 1; t1 <= tr->mxtips; t1++) |
---|
9886 | for(t2 = t1 + 1; t2 <= tr->mxtips; t2++) |
---|
9887 | for(t3 = t2 + 1; t3 <= tr->mxtips; t3++) |
---|
9888 | for(t4 = t3 + 1; t4 <= tr->mxtips; t4++) |
---|
9889 | { |
---|
9890 | double |
---|
9891 | r = randum(&adef->parsimonySeed); |
---|
9892 | |
---|
9893 | if(r < fraction) |
---|
9894 | { |
---|
9895 | #ifdef _QUARTET_MPI |
---|
9896 | if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1)) |
---|
9897 | #endif |
---|
9898 | computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f); |
---|
9899 | quartetCounter++; |
---|
9900 | } |
---|
9901 | |
---|
9902 | if(quartetCounter == randomQuartets) |
---|
9903 | goto DONE; |
---|
9904 | } |
---|
9905 | |
---|
9906 | DONE: |
---|
9907 | assert(quartetCounter == randomQuartets); |
---|
9908 | } |
---|
9909 | break; |
---|
9910 | case GROUPED_QUARTETS: |
---|
9911 | { |
---|
9912 | /* compute all quartets that can be built out of the four pre-defined groups */ |
---|
9913 | |
---|
9914 | for(t1 = 0; t1 < groupSize[0]; t1++) |
---|
9915 | for(t2 = 0; t2 < groupSize[1]; t2++) |
---|
9916 | for(t3 = 0; t3 < groupSize[2]; t3++) |
---|
9917 | for(t4 = 0; t4 < groupSize[3]; t4++) |
---|
9918 | { |
---|
9919 | int |
---|
9920 | i1 = groups[0][t1], |
---|
9921 | i2 = groups[1][t2], |
---|
9922 | i3 = groups[2][t3], |
---|
9923 | i4 = groups[3][t4]; |
---|
9924 | |
---|
9925 | #ifdef _QUARTET_MPI |
---|
9926 | if((quartetCounter % (unsigned long int)(processes - 1)) == (unsigned long int)(processID - 1)) |
---|
9927 | #endif |
---|
9928 | computeAllThreeQuartets(tr, q1, q2, i1, i2, i3, i4, f); |
---|
9929 | quartetCounter++; |
---|
9930 | } |
---|
9931 | |
---|
9932 | printBothOpen("\nComputed all %u possible grouped quartets\n", quartetCounter); |
---|
9933 | } |
---|
9934 | break; |
---|
9935 | default: |
---|
9936 | assert(0); |
---|
9937 | } |
---|
9938 | } |
---|
9939 | #ifdef _QUARTET_MPI |
---|
9940 | if(processID == 0) |
---|
9941 | startQuartetMaster(tr, f); |
---|
9942 | else |
---|
9943 | { |
---|
9944 | int |
---|
9945 | dummy; |
---|
9946 | |
---|
9947 | MPI_Send(&dummy, 1, MPI_INT, 0, I_AM_DONE, MPI_COMM_WORLD); |
---|
9948 | } |
---|
9949 | #endif |
---|
9950 | |
---|
9951 | t = gettime() - t; |
---|
9952 | |
---|
9953 | printBothOpen("\nPure quartet computation time: %f secs\n", t); |
---|
9954 | |
---|
9955 | printBothOpen("\nAll quartets and corresponding likelihoods written to file %s\n", quartetFileName); |
---|
9956 | |
---|
9957 | #ifdef _QUARTET_MPI |
---|
9958 | if(processID == 0) |
---|
9959 | #endif |
---|
9960 | fclose(f); |
---|
9961 | } |
---|
9962 | |
---|
9963 | static void thoroughTreeOptimization(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) |
---|
9964 | { |
---|
9965 | char |
---|
9966 | bestTreeFileName[1024]; |
---|
9967 | |
---|
9968 | FILE |
---|
9969 | *f; |
---|
9970 | |
---|
9971 | initModel(tr, rdta, cdta, adef); |
---|
9972 | |
---|
9973 | getStartingTree(tr, adef); |
---|
9974 | |
---|
9975 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
9976 | |
---|
9977 | Thorough = 1; |
---|
9978 | tr->doCutoff = FALSE; |
---|
9979 | |
---|
9980 | printBothOpen("\nStart likelihood: %f\n\n", tr->likelihood); |
---|
9981 | |
---|
9982 | treeOptimizeThorough(tr, 1, 10); |
---|
9983 | evaluateGenericInitrav(tr, tr->start); |
---|
9984 | |
---|
9985 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
9986 | |
---|
9987 | printBothOpen("End likelihood: %f\n\n", tr->likelihood); |
---|
9988 | |
---|
9989 | printModelParams(tr, adef); |
---|
9990 | |
---|
9991 | strcpy(bestTreeFileName, workdir); |
---|
9992 | strcat(bestTreeFileName, "RAxML_bestTree."); |
---|
9993 | strcat(bestTreeFileName, run_id); |
---|
9994 | |
---|
9995 | Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); |
---|
9996 | f = myfopen(bestTreeFileName, "wb"); |
---|
9997 | fprintf(f, "%s", tr->tree_string); |
---|
9998 | fclose(f); |
---|
9999 | |
---|
10000 | printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName); |
---|
10001 | } |
---|
10002 | |
---|
10003 | static void evaluateSD(tree *tr, double bestLH, double *bestVector, double weightSum, int configuration, int i, FILE *f) |
---|
10004 | { |
---|
10005 | double |
---|
10006 | sum = 0.0, |
---|
10007 | sum2 = 0.0, |
---|
10008 | sd, |
---|
10009 | currentLH; |
---|
10010 | |
---|
10011 | int |
---|
10012 | k; |
---|
10013 | |
---|
10014 | evaluateGenericInitrav(tr, tr->start); |
---|
10015 | evaluateGenericVector(tr, tr->start); |
---|
10016 | |
---|
10017 | currentLH = tr->likelihood; |
---|
10018 | |
---|
10019 | printBothOpen("Configuration %d Likelihood: %f\n", configuration, tr->likelihood); |
---|
10020 | |
---|
10021 | fprintf(f, "tr%d\t", configuration); |
---|
10022 | |
---|
10023 | if(currentLH > bestLH) |
---|
10024 | printBothOpen("WARNING tree with ancestral sequence taxon %s has a better likelihood %f > %f than the reference tree!\n", tr->nameList[i], currentLH, bestLH); |
---|
10025 | |
---|
10026 | for (k = 0; k < tr->cdta->endsite; k++) |
---|
10027 | { |
---|
10028 | int |
---|
10029 | w; |
---|
10030 | |
---|
10031 | double |
---|
10032 | temp = bestVector[k] - tr->perSiteLL[k], |
---|
10033 | wtemp = tr->cdta->aliaswgt[k] * temp; |
---|
10034 | |
---|
10035 | for(w = 0; w < tr->cdta->aliaswgt[k]; w++) |
---|
10036 | fprintf(f, "%f ", tr->perSiteLL[k]); |
---|
10037 | |
---|
10038 | sum += wtemp; |
---|
10039 | sum2 += wtemp * temp; |
---|
10040 | } |
---|
10041 | |
---|
10042 | fprintf(f, "\n"); |
---|
10043 | |
---|
10044 | sd = sqrt( weightSum * (sum2 - sum * sum / weightSum) / (weightSum - 1) ); |
---|
10045 | |
---|
10046 | printBothOpen("Ancestral Taxon: %s Likelihood: %f D(LH): %f SD: %f \nSignificantly Worse: %s (5%s), %s (2%s), %s (1%s)\n", |
---|
10047 | tr->nameList[i], currentLH, currentLH - bestLH, sd, |
---|
10048 | (sum > 1.95996 * sd) ? "Yes" : " No", "%", |
---|
10049 | (sum > 2.326 * sd) ? "Yes" : " No", "%", |
---|
10050 | (sum > 2.57583 * sd) ? "Yes" : " No", "%"); |
---|
10051 | |
---|
10052 | printBothOpen("\n"); |
---|
10053 | } |
---|
10054 | |
---|
10055 | static void ancestralSequenceTest(tree *tr) |
---|
10056 | { |
---|
10057 | int |
---|
10058 | ch, |
---|
10059 | i, |
---|
10060 | *candidateAncestorList = (int *)rax_calloc((tr->mxtips + 1), sizeof(int)), |
---|
10061 | numberOfCandidateAncestors = 0; |
---|
10062 | |
---|
10063 | double |
---|
10064 | bestLH, |
---|
10065 | weightSum = 0.0, |
---|
10066 | *bestVector = (double*)rax_malloc(sizeof(double) * tr->cdta->endsite); |
---|
10067 | |
---|
10068 | { |
---|
10069 | FILE |
---|
10070 | *f = myfopen(quartetGroupingFileName, "r"); |
---|
10071 | |
---|
10072 | assert(tr->useFastScaling == FALSE); |
---|
10073 | |
---|
10074 | for(i = 0; i < tr->cdta->endsite; i++) |
---|
10075 | weightSum += (double)(tr->cdta->aliaswgt[i]); |
---|
10076 | |
---|
10077 | evaluateGenericInitrav(tr, tr->start); |
---|
10078 | evaluateGenericVector(tr, tr->start); |
---|
10079 | |
---|
10080 | bestLH = tr->likelihood; |
---|
10081 | |
---|
10082 | memcpy(bestVector, tr->perSiteLL, tr->cdta->endsite * sizeof(double)); |
---|
10083 | |
---|
10084 | printBothOpen("Likelihood of reference tree: %f\n\n\n", tr->likelihood); |
---|
10085 | |
---|
10086 | while((ch = getc(f)) != EOF) |
---|
10087 | { |
---|
10088 | if(!whitechar(ch)) |
---|
10089 | { |
---|
10090 | int |
---|
10091 | n; |
---|
10092 | |
---|
10093 | ungetc(ch, f); |
---|
10094 | |
---|
10095 | n = treeFindTipName(f, tr, FALSE); |
---|
10096 | |
---|
10097 | if(n <= 0 || n > tr->mxtips) |
---|
10098 | printf("parsing error, raxml is expecting to read a taxon name that is contained in the reference tree you passed!\n"); |
---|
10099 | |
---|
10100 | assert(n > 0 && n <= tr->mxtips); |
---|
10101 | |
---|
10102 | candidateAncestorList[n] = 1; |
---|
10103 | numberOfCandidateAncestors++; |
---|
10104 | } |
---|
10105 | } |
---|
10106 | |
---|
10107 | fclose(f); |
---|
10108 | } |
---|
10109 | |
---|
10110 | for(i = 1; i <= tr->mxtips; i++) |
---|
10111 | { |
---|
10112 | if(candidateAncestorList[i]) |
---|
10113 | { |
---|
10114 | nodeptr |
---|
10115 | p = tr->nodep[i], |
---|
10116 | q = p->back, |
---|
10117 | l = q->next, |
---|
10118 | r = q->next->next; |
---|
10119 | |
---|
10120 | int |
---|
10121 | k; |
---|
10122 | |
---|
10123 | double |
---|
10124 | attachmentBranch[NUM_BRANCHES], |
---|
10125 | leftBranch[NUM_BRANCHES], |
---|
10126 | rightBranch[NUM_BRANCHES]; |
---|
10127 | |
---|
10128 | FILE |
---|
10129 | *f; |
---|
10130 | |
---|
10131 | char |
---|
10132 | fileName[1024]; |
---|
10133 | |
---|
10134 | strcpy(fileName, workdir); |
---|
10135 | strcat(fileName, "RAxML_ancestralTest."); |
---|
10136 | strcat(fileName, tr->nameList[i]); |
---|
10137 | strcat(fileName, "."); |
---|
10138 | strcat(fileName, run_id); |
---|
10139 | |
---|
10140 | f = myfopen(fileName, "w"); |
---|
10141 | |
---|
10142 | fprintf(f, " 3 %d\n", tr->rdta->sites); |
---|
10143 | |
---|
10144 | assert(strcmp(tr->nameList[i], tr->nameList[p->number]) == 0); |
---|
10145 | |
---|
10146 | printBothOpen("Checking if %s is a candidate ancestor\n\n", tr->nameList[i]); |
---|
10147 | printBothOpen("Per site log likelihoods for the three configurations will be written to file %s\n\n", fileName); |
---|
10148 | |
---|
10149 | memcpy(attachmentBranch, p->z, sizeof(double) * NUM_BRANCHES); |
---|
10150 | memcpy(leftBranch, l->z, sizeof(double) * NUM_BRANCHES); |
---|
10151 | memcpy(rightBranch, r->z, sizeof(double) * NUM_BRANCHES); |
---|
10152 | |
---|
10153 | |
---|
10154 | //configuration 1 |
---|
10155 | |
---|
10156 | for(k = 0; k < NUM_BRANCHES; k++) |
---|
10157 | p->z[k] = q->z[k] = zmax; |
---|
10158 | |
---|
10159 | evaluateSD(tr, bestLH, bestVector, weightSum, 1, i, f); |
---|
10160 | |
---|
10161 | memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10162 | memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10163 | |
---|
10164 | evaluateGenericInitrav(tr, tr->start); |
---|
10165 | assert(tr->likelihood == bestLH); |
---|
10166 | |
---|
10167 | //configuration 2 |
---|
10168 | |
---|
10169 | for(k = 0; k < NUM_BRANCHES; k++) |
---|
10170 | { |
---|
10171 | p->z[k] = q->z[k] = zmax; |
---|
10172 | l->z[k] = l->back->z[k] = zmax; |
---|
10173 | } |
---|
10174 | |
---|
10175 | evaluateSD(tr, bestLH, bestVector, weightSum, 2, i, f); |
---|
10176 | |
---|
10177 | memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10178 | memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10179 | memcpy(l->z, leftBranch, sizeof(double) * NUM_BRANCHES); |
---|
10180 | memcpy(l->back->z, leftBranch, sizeof(double) * NUM_BRANCHES); |
---|
10181 | |
---|
10182 | evaluateGenericInitrav(tr, tr->start); |
---|
10183 | assert(tr->likelihood == bestLH); |
---|
10184 | |
---|
10185 | //configuration 3 |
---|
10186 | |
---|
10187 | for(k = 0; k < NUM_BRANCHES; k++) |
---|
10188 | { |
---|
10189 | p->z[k] = q->z[k] = zmax; |
---|
10190 | r->z[k] = r->back->z[k] = zmax; |
---|
10191 | } |
---|
10192 | |
---|
10193 | evaluateSD(tr, bestLH, bestVector, weightSum, 3, i, f); |
---|
10194 | |
---|
10195 | memcpy(p->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10196 | memcpy(p->back->z, attachmentBranch, sizeof(double) * NUM_BRANCHES); |
---|
10197 | memcpy(r->z, rightBranch, sizeof(double) * NUM_BRANCHES); |
---|
10198 | memcpy(r->back->z, rightBranch, sizeof(double) * NUM_BRANCHES); |
---|
10199 | |
---|
10200 | evaluateGenericInitrav(tr, tr->start); |
---|
10201 | assert(tr->likelihood == bestLH); |
---|
10202 | |
---|
10203 | printBothOpen("\n\n"); |
---|
10204 | fclose(f); |
---|
10205 | } |
---|
10206 | } |
---|
10207 | |
---|
10208 | printBothOpen("good-bye\n\n"); |
---|
10209 | |
---|
10210 | rax_free(candidateAncestorList); |
---|
10211 | rax_free(bestVector); |
---|
10212 | exit(0); |
---|
10213 | } |
---|
10214 | |
---|
10215 | static double distancesInitial(nodeptr p, double *distances, tree *tr, boolean fullTraversal) |
---|
10216 | { |
---|
10217 | if(isTip(p->number, tr->mxtips)) |
---|
10218 | return p->z[0]; |
---|
10219 | else |
---|
10220 | { |
---|
10221 | double |
---|
10222 | acc = 0.0; |
---|
10223 | |
---|
10224 | nodeptr |
---|
10225 | q; |
---|
10226 | |
---|
10227 | if(fullTraversal || !p->x) |
---|
10228 | { |
---|
10229 | q = p->next; |
---|
10230 | |
---|
10231 | while(q != p) |
---|
10232 | { |
---|
10233 | acc += distancesInitial(q->back, distances, tr, fullTraversal); |
---|
10234 | q = q->next; |
---|
10235 | } |
---|
10236 | |
---|
10237 | distances[p->number] = acc; |
---|
10238 | p->x = 1; |
---|
10239 | p->next->x = 0; |
---|
10240 | p->next->next->x = 0; |
---|
10241 | } |
---|
10242 | else |
---|
10243 | acc = distances[p->number]; |
---|
10244 | |
---|
10245 | return acc + p->z[0]; |
---|
10246 | } |
---|
10247 | } |
---|
10248 | |
---|
10249 | |
---|
10250 | |
---|
10251 | static void distancesNewview(nodeptr p, double *distances, tree *tr, nodeptr *rootBranch, double *minimum) |
---|
10252 | { |
---|
10253 | nodeptr |
---|
10254 | q; |
---|
10255 | |
---|
10256 | double |
---|
10257 | left = 0.0, |
---|
10258 | right = 0.0; |
---|
10259 | |
---|
10260 | if(isTip(p->number, tr->mxtips)) |
---|
10261 | { |
---|
10262 | q = p->back; |
---|
10263 | |
---|
10264 | if(!isTip(q->number, tr->mxtips)) |
---|
10265 | { |
---|
10266 | if(!q->x) |
---|
10267 | distancesInitial(q, distances, tr, FALSE); |
---|
10268 | left = distances[q->number]; |
---|
10269 | } |
---|
10270 | |
---|
10271 | if(left <= p->z[0]) |
---|
10272 | { |
---|
10273 | //the balanced root is in this branch |
---|
10274 | *rootBranch = p; |
---|
10275 | *minimum = 0.0; |
---|
10276 | } |
---|
10277 | else |
---|
10278 | { |
---|
10279 | double |
---|
10280 | diff = left - p->z[0]; |
---|
10281 | |
---|
10282 | if(diff < *minimum) |
---|
10283 | { |
---|
10284 | *minimum = diff; |
---|
10285 | *rootBranch = p; |
---|
10286 | } |
---|
10287 | } |
---|
10288 | } |
---|
10289 | else |
---|
10290 | { |
---|
10291 | q = p->back; |
---|
10292 | |
---|
10293 | if(!isTip(q->number, tr->mxtips)) |
---|
10294 | { |
---|
10295 | if(!q->x) |
---|
10296 | distancesInitial(q, distances, tr, FALSE); |
---|
10297 | |
---|
10298 | left = distances[q->number]; |
---|
10299 | } |
---|
10300 | else |
---|
10301 | left = 0.0; |
---|
10302 | |
---|
10303 | if(!isTip(p->number, tr->mxtips)) |
---|
10304 | { |
---|
10305 | if(!p->x) |
---|
10306 | distancesInitial(p, distances, tr, FALSE); |
---|
10307 | |
---|
10308 | right = distances[p->number]; |
---|
10309 | } |
---|
10310 | else |
---|
10311 | right = 0.0; |
---|
10312 | |
---|
10313 | if(ABS(left - right) <= p->z[0]) |
---|
10314 | { |
---|
10315 | *rootBranch = p; |
---|
10316 | *minimum = 0.0; |
---|
10317 | } |
---|
10318 | else |
---|
10319 | { |
---|
10320 | double |
---|
10321 | diff; |
---|
10322 | |
---|
10323 | if(left > right) |
---|
10324 | diff = left - (right + p->z[0]); |
---|
10325 | else |
---|
10326 | diff = right - (left + p->z[0]); |
---|
10327 | |
---|
10328 | if(*minimum > diff) |
---|
10329 | { |
---|
10330 | *minimum = diff; |
---|
10331 | *rootBranch = p; |
---|
10332 | } |
---|
10333 | } |
---|
10334 | |
---|
10335 | q = p->next; |
---|
10336 | |
---|
10337 | while(q != p) |
---|
10338 | { |
---|
10339 | distancesNewview(q->back, distances, tr, rootBranch, minimum); |
---|
10340 | q = q->next; |
---|
10341 | } |
---|
10342 | } |
---|
10343 | } |
---|
10344 | |
---|
10345 | static void printTreeRec(FILE *f, nodeptr p, tree *tr, boolean rootDescendant, boolean printBranchLabels) |
---|
10346 | { |
---|
10347 | if(isTip(p->number, tr->mxtips)) |
---|
10348 | { |
---|
10349 | if(rootDescendant) |
---|
10350 | fprintf(f, "%s", tr->nameList[p->number]); |
---|
10351 | else |
---|
10352 | fprintf(f, "%s:%f", tr->nameList[p->number], p->z[0]); |
---|
10353 | } |
---|
10354 | else |
---|
10355 | { |
---|
10356 | fprintf(f, "("); |
---|
10357 | printTreeRec(f, p->next->back, tr, FALSE, printBranchLabels); |
---|
10358 | fprintf(f, ","); |
---|
10359 | printTreeRec(f, p->next->next->back, tr, FALSE, printBranchLabels); |
---|
10360 | |
---|
10361 | if(rootDescendant) |
---|
10362 | fprintf(f, ")"); |
---|
10363 | else |
---|
10364 | { |
---|
10365 | if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(p->back->number, tr->mxtips)) |
---|
10366 | { |
---|
10367 | assert(p->support == p->back->support); |
---|
10368 | fprintf(f, "):%f[%d]", p->z[0], p->support); |
---|
10369 | } |
---|
10370 | else |
---|
10371 | fprintf(f, "):%f", p->z[0]); |
---|
10372 | } |
---|
10373 | } |
---|
10374 | } |
---|
10375 | |
---|
10376 | static void printTree(nodeptr p, tree *tr, double *distances, FILE *f, boolean printBranchLabels) |
---|
10377 | { |
---|
10378 | double |
---|
10379 | leftRoot, |
---|
10380 | rightRoot, |
---|
10381 | thisBranch = p->z[0], |
---|
10382 | left = 0.0, |
---|
10383 | right = 0.0; |
---|
10384 | |
---|
10385 | nodeptr |
---|
10386 | q = p->back; |
---|
10387 | |
---|
10388 | if(!isTip(p->number, tr->mxtips)) |
---|
10389 | { |
---|
10390 | if(!p->x) |
---|
10391 | distancesInitial(p, distances, tr, FALSE); |
---|
10392 | |
---|
10393 | left = distances[p->number]; |
---|
10394 | } |
---|
10395 | else |
---|
10396 | left = 0.0; |
---|
10397 | |
---|
10398 | if(!isTip(q->number, tr->mxtips)) |
---|
10399 | { |
---|
10400 | if(!q->x) |
---|
10401 | distancesInitial(q, distances, tr, FALSE); |
---|
10402 | |
---|
10403 | right = distances[q->number]; |
---|
10404 | } |
---|
10405 | else |
---|
10406 | left = 0.0; |
---|
10407 | |
---|
10408 | //printf("left %f right %f thisBranch %f\n", left, right, thisBranch); |
---|
10409 | |
---|
10410 | if(ABS(left - right) <= thisBranch) |
---|
10411 | { |
---|
10412 | if(left < right) |
---|
10413 | { |
---|
10414 | leftRoot = (right + thisBranch - left) / 2.0; |
---|
10415 | rightRoot = thisBranch - leftRoot; |
---|
10416 | } |
---|
10417 | else |
---|
10418 | { |
---|
10419 | rightRoot = (left + thisBranch - right) / 2.0; |
---|
10420 | leftRoot = thisBranch - rightRoot; |
---|
10421 | } |
---|
10422 | } |
---|
10423 | else |
---|
10424 | { |
---|
10425 | if(left < right) |
---|
10426 | { |
---|
10427 | leftRoot = thisBranch; |
---|
10428 | rightRoot = 0.0; |
---|
10429 | } |
---|
10430 | else |
---|
10431 | { |
---|
10432 | leftRoot = 0.0; |
---|
10433 | rightRoot = thisBranch; |
---|
10434 | } |
---|
10435 | } |
---|
10436 | |
---|
10437 | //descend into right subtree and print it |
---|
10438 | |
---|
10439 | fprintf(f, "("); |
---|
10440 | printTreeRec(f, p, tr, TRUE, printBranchLabels); |
---|
10441 | |
---|
10442 | //finished right subtree, print attachment branch of right subtree |
---|
10443 | //noew descent into left subtree |
---|
10444 | |
---|
10445 | if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips)) |
---|
10446 | { |
---|
10447 | assert(p->support == q->support); |
---|
10448 | fprintf(f, ":%f[%d], ", leftRoot, p->support); |
---|
10449 | } |
---|
10450 | else |
---|
10451 | fprintf(f, ":%f, ", leftRoot); |
---|
10452 | printTreeRec(f, q, tr, TRUE, printBranchLabels); |
---|
10453 | |
---|
10454 | //finished left subtree, now print its branch to the root node |
---|
10455 | //and we are done |
---|
10456 | |
---|
10457 | if(printBranchLabels && !isTip(p->number, tr->mxtips) && !isTip(q->number, tr->mxtips)) |
---|
10458 | { |
---|
10459 | assert(p->support == q->support); |
---|
10460 | fprintf(f, ":%f[%d]);", rightRoot, q->support); |
---|
10461 | } |
---|
10462 | else |
---|
10463 | fprintf(f, ":%f);", rightRoot); |
---|
10464 | } |
---|
10465 | |
---|
10466 | static void rootTree(tree *tr, analdef *adef) |
---|
10467 | { |
---|
10468 | int |
---|
10469 | i; |
---|
10470 | |
---|
10471 | double |
---|
10472 | checkDistances, |
---|
10473 | minimum, |
---|
10474 | *distances = (double *)rax_malloc(sizeof(double) * 2 * tr->mxtips); |
---|
10475 | |
---|
10476 | char |
---|
10477 | rootedTreeFile[1024]; |
---|
10478 | |
---|
10479 | FILE |
---|
10480 | *f = myfopen(tree_file, "r"); |
---|
10481 | |
---|
10482 | nodeptr |
---|
10483 | rootBranch; |
---|
10484 | |
---|
10485 | boolean |
---|
10486 | printBranchLabels = FALSE; |
---|
10487 | |
---|
10488 | for(i = 0; i < 2 * tr->mxtips; i++) |
---|
10489 | distances[i] = 0.0; |
---|
10490 | |
---|
10491 | strcpy(rootedTreeFile, workdir); |
---|
10492 | strcat(rootedTreeFile, "RAxML_rootedTree."); |
---|
10493 | strcat(rootedTreeFile, run_id); |
---|
10494 | |
---|
10495 | treeReadLen(f, tr, TRUE, FALSE, TRUE, adef, TRUE, TRUE); |
---|
10496 | |
---|
10497 | if(tr->branchLabelCounter > 0) |
---|
10498 | { |
---|
10499 | assert(tr->branchLabelCounter == (tr->ntips - 3)); |
---|
10500 | printBranchLabels = TRUE; |
---|
10501 | printBothOpen("\nYour input tree contains branch labels, these will also be printed in the output tree ...\n\n"); |
---|
10502 | } |
---|
10503 | |
---|
10504 | fclose(f); |
---|
10505 | |
---|
10506 | minimum = checkDistances = distancesInitial(tr->start->back, distances, tr, TRUE); |
---|
10507 | |
---|
10508 | //printf("Tree Lenght: %f\n", checkDistances); |
---|
10509 | |
---|
10510 | f = myfopen(rootedTreeFile, "w"); |
---|
10511 | |
---|
10512 | distancesNewview(tr->start->back, distances, tr, &rootBranch, &minimum); |
---|
10513 | |
---|
10514 | printTree(rootBranch, tr, distances, f, printBranchLabels); |
---|
10515 | |
---|
10516 | fclose(f); |
---|
10517 | |
---|
10518 | printBothOpen("RAxML-rooted tree using subtree length-balance printed to file:\n%s\n", rootedTreeFile); |
---|
10519 | |
---|
10520 | rax_free(distances); |
---|
10521 | } |
---|
10522 | |
---|
10523 | int main (int argc, char *argv[]) |
---|
10524 | { |
---|
10525 | rawdata *rdta; |
---|
10526 | cruncheddata *cdta; |
---|
10527 | tree *tr; |
---|
10528 | analdef *adef; |
---|
10529 | int |
---|
10530 | i, |
---|
10531 | countGTR = 0, |
---|
10532 | countOtherModel = 0; |
---|
10533 | |
---|
10534 | #if (defined(_USE_PTHREADS) && !defined(_PORTABLE_PTHREADS)) |
---|
10535 | pinToCore(0); |
---|
10536 | #endif |
---|
10537 | |
---|
10538 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
10539 | MPI_Init(&argc, &argv); |
---|
10540 | MPI_Comm_rank(MPI_COMM_WORLD, &processID); |
---|
10541 | MPI_Comm_size(MPI_COMM_WORLD, &processes); |
---|
10542 | printf("\nThis is RAxML MPI Process Number: %d\n", processID); |
---|
10543 | #else |
---|
10544 | processID = 0; |
---|
10545 | #endif |
---|
10546 | |
---|
10547 | masterTime = gettime(); |
---|
10548 | |
---|
10549 | globalArgc = argc; |
---|
10550 | globalArgv = (char **)rax_malloc(sizeof(char *) * argc); |
---|
10551 | for(i = 0; i < argc; i++) |
---|
10552 | globalArgv[i] = argv[i]; |
---|
10553 | |
---|
10554 | |
---|
10555 | |
---|
10556 | #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC)) |
---|
10557 | |
---|
10558 | /* |
---|
10559 | David Defour's command |
---|
10560 | _mm_setcsr( _mm_getcsr() | (_MM_FLUSH_ZERO_ON | MM_DAZ_ON)); |
---|
10561 | */ |
---|
10562 | |
---|
10563 | _mm_setcsr( _mm_getcsr() | _MM_FLUSH_ZERO_ON); |
---|
10564 | |
---|
10565 | #endif |
---|
10566 | |
---|
10567 | adef = (analdef *)rax_malloc(sizeof(analdef)); |
---|
10568 | rdta = (rawdata *)rax_malloc(sizeof(rawdata)); |
---|
10569 | cdta = (cruncheddata *)rax_malloc(sizeof(cruncheddata)); |
---|
10570 | tr = (tree *)rax_malloc(sizeof(tree)); |
---|
10571 | |
---|
10572 | /* initialize lookup table for fast bit counter */ |
---|
10573 | |
---|
10574 | compute_bits_in_16bits(); |
---|
10575 | |
---|
10576 | initAdef(adef); |
---|
10577 | get_args(argc,argv, adef, tr); |
---|
10578 | |
---|
10579 | |
---|
10580 | if(adef->readTaxaOnly) |
---|
10581 | { |
---|
10582 | if(adef->mode == PLAUSIBILITY_CHECKER || adef->mode == ROOT_TREE) |
---|
10583 | extractTaxaFromTopology(tr, rdta, cdta, tree_file); |
---|
10584 | else |
---|
10585 | extractTaxaFromTopology(tr, rdta, cdta, bootStrapFile); |
---|
10586 | } |
---|
10587 | |
---|
10588 | getinput(adef, rdta, cdta, tr); |
---|
10589 | |
---|
10590 | checkOutgroups(tr, adef); |
---|
10591 | makeFileNames(); |
---|
10592 | |
---|
10593 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
10594 | MPI_Barrier(MPI_COMM_WORLD); |
---|
10595 | #endif |
---|
10596 | |
---|
10597 | if(adef->useInvariant && adef->likelihoodEpsilon > 0.001) |
---|
10598 | { |
---|
10599 | printBothOpen("\nYou are using a proportion of Invariable sites estimate, although I don't\n"); |
---|
10600 | printBothOpen("like it. The likelihood epsilon \"-f e\" will be automatically lowered to 0.001\n"); |
---|
10601 | printBothOpen("to avoid unfavorable effects caused by simultaneous optimization of alpha and P-Invar\n"); |
---|
10602 | |
---|
10603 | adef->likelihoodEpsilon = 0.001; |
---|
10604 | } |
---|
10605 | |
---|
10606 | |
---|
10607 | /* |
---|
10608 | switch back to model without secondary structure for all this |
---|
10609 | checking stuff |
---|
10610 | */ |
---|
10611 | |
---|
10612 | if(adef->useSecondaryStructure) |
---|
10613 | { |
---|
10614 | tr->dataVector = tr->initialDataVector; |
---|
10615 | tr->partitionData = tr->initialPartitionData; |
---|
10616 | tr->NumberOfModels--; |
---|
10617 | } |
---|
10618 | |
---|
10619 | if(adef->useExcludeFile) |
---|
10620 | { |
---|
10621 | handleExcludeFile(tr, adef, rdta); |
---|
10622 | exit(0); |
---|
10623 | } |
---|
10624 | |
---|
10625 | |
---|
10626 | if(!adef->readTaxaOnly && adef->mode != FAST_SEARCH && adef->mode != SH_LIKE_SUPPORTS) |
---|
10627 | checkSequences(tr, rdta, adef); |
---|
10628 | |
---|
10629 | |
---|
10630 | if(adef->mode == SPLIT_MULTI_GENE) |
---|
10631 | { |
---|
10632 | splitMultiGene(tr, rdta); |
---|
10633 | exit(0); |
---|
10634 | } |
---|
10635 | |
---|
10636 | if(adef->mode == CHECK_ALIGNMENT) |
---|
10637 | { |
---|
10638 | printf("Alignment format can be read by RAxML \n"); |
---|
10639 | exit(0); |
---|
10640 | } |
---|
10641 | |
---|
10642 | /* |
---|
10643 | switch back to model with secondary structure for all this |
---|
10644 | checking stuff |
---|
10645 | */ |
---|
10646 | |
---|
10647 | if(adef->useSecondaryStructure && !adef->readTaxaOnly) |
---|
10648 | { |
---|
10649 | tr->dataVector = tr->extendedDataVector; |
---|
10650 | tr->partitionData = tr->extendedPartitionData; |
---|
10651 | tr->NumberOfModels++; |
---|
10652 | /* might as well rax_free the initial structures here */ |
---|
10653 | |
---|
10654 | } |
---|
10655 | |
---|
10656 | if(!adef->readTaxaOnly) |
---|
10657 | { |
---|
10658 | int |
---|
10659 | countNonSev = 0, |
---|
10660 | countLG4 =0; |
---|
10661 | |
---|
10662 | makeweights(adef, rdta, cdta, tr); |
---|
10663 | makevalues(rdta, cdta, tr, adef); |
---|
10664 | |
---|
10665 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
10666 | { |
---|
10667 | if(!(tr->partitionData[i].dataType == AA_DATA || tr->partitionData[i].dataType == DNA_DATA)) |
---|
10668 | countNonSev++; |
---|
10669 | |
---|
10670 | if(tr->partitionData[i].protModels == LG4 || tr->partitionData[i].protModels == LG4X) |
---|
10671 | countLG4++; |
---|
10672 | |
---|
10673 | if(tr->partitionData[i].dataType == AA_DATA) |
---|
10674 | { |
---|
10675 | if(tr->partitionData[i].protModels == GTR || tr->partitionData[i].protModels == GTR_UNLINKED) |
---|
10676 | countGTR++; |
---|
10677 | else |
---|
10678 | countOtherModel++; |
---|
10679 | } |
---|
10680 | } |
---|
10681 | |
---|
10682 | if(countLG4 > 0) |
---|
10683 | { |
---|
10684 | if(tr->saveMemory) |
---|
10685 | { |
---|
10686 | printf("Error: the LG4 substitution model does not work in combination with the \"-U\" memory saving flag!\n\n"); |
---|
10687 | errorExit(-1); |
---|
10688 | } |
---|
10689 | |
---|
10690 | if(adef->useInvariant) |
---|
10691 | { |
---|
10692 | printf("Error: the LG4 substitution model does not work for proportion of invariavble sites estimates!\n\n"); |
---|
10693 | errorExit(-1); |
---|
10694 | } |
---|
10695 | |
---|
10696 | if(isCat(adef)) |
---|
10697 | { |
---|
10698 | printf("Error: the LG4 substitution model does not work with the CAT model of rate heterogeneity!\n\n"); |
---|
10699 | errorExit(-1); |
---|
10700 | } |
---|
10701 | } |
---|
10702 | |
---|
10703 | if(tr->saveMemory && countNonSev > 0) |
---|
10704 | { |
---|
10705 | printf("\nError, you want to use the SEV-based memory saving technique for large gappy datasets with missing data.\n"); |
---|
10706 | printf("However, this is only implelemented for DNA and protein data partitions, one of your partitions is neither DNA\n"); |
---|
10707 | printf("nor protein data ... exiting to prevent bad things from happening ;-) \n\n"); |
---|
10708 | |
---|
10709 | errorExit(-1); |
---|
10710 | } |
---|
10711 | |
---|
10712 | |
---|
10713 | if(countGTR > 0 && countOtherModel > 0) |
---|
10714 | { |
---|
10715 | printf("Error, it is only allowed to conduct partitioned AA analyses\n"); |
---|
10716 | printf("with a GTR model of AA substitution, if not all AA partitions are assigned\n"); |
---|
10717 | printf("the GTR or GTR_UNLINKED model.\n\n"); |
---|
10718 | |
---|
10719 | printf("The following partitions do not use GTR:\n"); |
---|
10720 | |
---|
10721 | for(i = 0; i < tr->NumberOfModels; i++) |
---|
10722 | { |
---|
10723 | if(tr->partitionData[i].dataType == AA_DATA && (tr->partitionData[i].protModels != GTR || tr->partitionData[i].protModels != GTR_UNLINKED)) |
---|
10724 | printf("Partition %s\n", tr->partitionData[i].partitionName); |
---|
10725 | } |
---|
10726 | printf("exiting ...\n"); |
---|
10727 | errorExit(-1); |
---|
10728 | } |
---|
10729 | |
---|
10730 | if(countGTR > 0 && tr->NumberOfModels > 1) |
---|
10731 | { |
---|
10732 | FILE *Info = myfopen(infoFileName, "ab"); |
---|
10733 | |
---|
10734 | printBoth(Info, "You are using the GTR model of AA substitution!\n"); |
---|
10735 | printBoth(Info, "GTR parameters for AA substiution will automatically be estimated\n"); |
---|
10736 | printBoth(Info, "either jointly (GTR params will be linked) or independently (when using GTR_UNLINKED) across all partitions.\n"); |
---|
10737 | printBoth(Info, "WARNING: you may be over-parametrizing the model!\n\n\n"); |
---|
10738 | |
---|
10739 | fclose(Info); |
---|
10740 | } |
---|
10741 | } |
---|
10742 | |
---|
10743 | if(adef->mode == CLASSIFY_ML || adef->mode == CLASSIFY_MP) |
---|
10744 | tr->innerNodes = (size_t)(countTaxaInTopology() - 1); |
---|
10745 | else |
---|
10746 | tr->innerNodes = tr->mxtips; |
---|
10747 | |
---|
10748 | |
---|
10749 | setRateHetAndDataIncrement(tr, adef); |
---|
10750 | |
---|
10751 | #ifdef _USE_PTHREADS |
---|
10752 | startPthreads(tr); |
---|
10753 | masterBarrier(THREAD_INIT_PARTITION, tr); |
---|
10754 | if(!adef->readTaxaOnly) |
---|
10755 | masterBarrier(THREAD_ALLOC_LIKELIHOOD, tr); |
---|
10756 | #else |
---|
10757 | if(!adef->readTaxaOnly) |
---|
10758 | allocNodex(tr); |
---|
10759 | #endif |
---|
10760 | |
---|
10761 | printModelAndProgramInfo(tr, adef, argc, argv); |
---|
10762 | |
---|
10763 | switch(adef->mode) |
---|
10764 | { |
---|
10765 | case CLASSIFY_MP: |
---|
10766 | getStartingTree(tr, adef); |
---|
10767 | assert(0); |
---|
10768 | break; |
---|
10769 | case CLASSIFY_ML: |
---|
10770 | if(adef->useBinaryModelFile) |
---|
10771 | { |
---|
10772 | assert(tr->rateHetModel != CAT); |
---|
10773 | readBinaryModel(tr); |
---|
10774 | } |
---|
10775 | else |
---|
10776 | initModel(tr, rdta, cdta, adef); |
---|
10777 | |
---|
10778 | getStartingTree(tr, adef); |
---|
10779 | exit(0); |
---|
10780 | break; |
---|
10781 | case GENERATE_BS: |
---|
10782 | generateBS(tr, adef); |
---|
10783 | exit(0); |
---|
10784 | break; |
---|
10785 | case COMPUTE_ELW: |
---|
10786 | computeELW(tr, adef, bootStrapFile); |
---|
10787 | exit(0); |
---|
10788 | break; |
---|
10789 | case COMPUTE_LHS: |
---|
10790 | initModel(tr, rdta, cdta, adef); |
---|
10791 | computeAllLHs(tr, adef, bootStrapFile); |
---|
10792 | exit(0); |
---|
10793 | break; |
---|
10794 | case COMPUTE_BIPARTITION_CORRELATION: |
---|
10795 | compareBips(tr, bootStrapFile, adef); |
---|
10796 | exit(0); |
---|
10797 | break; |
---|
10798 | case COMPUTE_RF_DISTANCE: |
---|
10799 | computeRF(tr, bootStrapFile, adef); |
---|
10800 | exit(0); |
---|
10801 | break; |
---|
10802 | case BOOTSTOP_ONLY: |
---|
10803 | computeBootStopOnly(tr, bootStrapFile, adef); |
---|
10804 | exit(0); |
---|
10805 | break; |
---|
10806 | case CONSENSUS_ONLY: |
---|
10807 | if(adef->leaveDropMode) |
---|
10808 | computeRogueTaxa(tr, bootStrapFile, adef); |
---|
10809 | else |
---|
10810 | computeConsensusOnly(tr, bootStrapFile, adef, adef->calculateIC); |
---|
10811 | exit(0); |
---|
10812 | break; |
---|
10813 | case DISTANCE_MODE: |
---|
10814 | initModel(tr, rdta, cdta, adef); |
---|
10815 | getStartingTree(tr, adef); |
---|
10816 | computeDistances(tr, adef); |
---|
10817 | break; |
---|
10818 | case PARSIMONY_ADDITION: |
---|
10819 | initModel(tr, rdta, cdta, adef); |
---|
10820 | getStartingTree(tr, adef); |
---|
10821 | printStartingTree(tr, adef, TRUE); |
---|
10822 | break; |
---|
10823 | case PER_SITE_LL: |
---|
10824 | initModel(tr, rdta, cdta, adef); |
---|
10825 | computePerSiteLLs(tr, adef, bootStrapFile); |
---|
10826 | break; |
---|
10827 | case TREE_EVALUATION: |
---|
10828 | initModel(tr, rdta, cdta, adef); |
---|
10829 | |
---|
10830 | getStartingTree(tr, adef); |
---|
10831 | |
---|
10832 | if(adef->likelihoodTest) |
---|
10833 | computeLHTest(tr, adef, bootStrapFile); |
---|
10834 | else |
---|
10835 | { |
---|
10836 | if(adef->useBinaryModelFile) |
---|
10837 | { |
---|
10838 | readBinaryModel(tr); |
---|
10839 | evaluateGenericInitrav(tr, tr->start); |
---|
10840 | treeEvaluate(tr, 2); |
---|
10841 | } |
---|
10842 | else |
---|
10843 | { |
---|
10844 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10845 | writeBinaryModel(tr); |
---|
10846 | } |
---|
10847 | |
---|
10848 | printLog(tr, adef, TRUE); |
---|
10849 | printResult(tr, adef, TRUE); |
---|
10850 | } |
---|
10851 | |
---|
10852 | break; |
---|
10853 | case ANCESTRAL_STATES: |
---|
10854 | initModel(tr, rdta, cdta, adef); |
---|
10855 | |
---|
10856 | getStartingTree(tr, adef); |
---|
10857 | |
---|
10858 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10859 | |
---|
10860 | evaluateGenericInitrav(tr, tr->start); |
---|
10861 | |
---|
10862 | computeAncestralStates(tr, tr->likelihood); |
---|
10863 | break; |
---|
10864 | case QUARTET_CALCULATION: |
---|
10865 | computeQuartets(tr, adef, rdta, cdta); |
---|
10866 | break; |
---|
10867 | case THOROUGH_OPTIMIZATION: |
---|
10868 | thoroughTreeOptimization(tr, adef, rdta, cdta); |
---|
10869 | break; |
---|
10870 | case CALC_BIPARTITIONS: |
---|
10871 | calcBipartitions(tr, adef, tree_file, bootStrapFile); |
---|
10872 | break; |
---|
10873 | case CALC_BIPARTITIONS_IC: |
---|
10874 | calcBipartitions_IC(tr, adef, tree_file, bootStrapFile); |
---|
10875 | break; |
---|
10876 | case BIG_RAPID_MODE: |
---|
10877 | if(adef->boot) |
---|
10878 | doBootstrap(tr, adef, rdta, cdta); |
---|
10879 | else |
---|
10880 | { |
---|
10881 | if(adef->rapidBoot) |
---|
10882 | { |
---|
10883 | initModel(tr, rdta, cdta, adef); |
---|
10884 | doAllInOne(tr, adef); |
---|
10885 | } |
---|
10886 | else |
---|
10887 | doInference(tr, adef, rdta, cdta); |
---|
10888 | } |
---|
10889 | break; |
---|
10890 | case MORPH_CALIBRATOR: |
---|
10891 | initModel(tr, rdta, cdta, adef); |
---|
10892 | getStartingTree(tr, adef); |
---|
10893 | evaluateGenericInitrav(tr, tr->start); |
---|
10894 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10895 | morphologicalCalibration(tr, adef); |
---|
10896 | break; |
---|
10897 | case FAST_SEARCH: |
---|
10898 | fastSearch(tr, adef, rdta, cdta); |
---|
10899 | exit(0); |
---|
10900 | case SH_LIKE_SUPPORTS: |
---|
10901 | shSupports(tr, adef, rdta, cdta); |
---|
10902 | break; |
---|
10903 | case EPA_SITE_SPECIFIC_BIAS: |
---|
10904 | initModel(tr, rdta, cdta, adef); |
---|
10905 | getStartingTree(tr, adef); |
---|
10906 | modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); |
---|
10907 | computePlacementBias(tr, adef); |
---|
10908 | break; |
---|
10909 | case OPTIMIZE_BR_LEN_SCALER: |
---|
10910 | initModel(tr, rdta, cdta, adef); |
---|
10911 | |
---|
10912 | getStartingTree(tr, adef); |
---|
10913 | evaluateGenericInitrav(tr, tr->start); |
---|
10914 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
10915 | |
---|
10916 | printBothOpen("Likelihood: %f\n", tr->likelihood); |
---|
10917 | |
---|
10918 | break; |
---|
10919 | case ANCESTRAL_SEQUENCE_TEST: |
---|
10920 | initModel(tr, rdta, cdta, adef); |
---|
10921 | |
---|
10922 | getStartingTree(tr, adef); |
---|
10923 | |
---|
10924 | evaluateGenericInitrav(tr, tr->start); |
---|
10925 | modOpt(tr, adef, FALSE, adef->likelihoodEpsilon); |
---|
10926 | |
---|
10927 | ancestralSequenceTest(tr); |
---|
10928 | break; |
---|
10929 | case PLAUSIBILITY_CHECKER: |
---|
10930 | plausibilityChecker(tr, adef); |
---|
10931 | exit(0); |
---|
10932 | break; |
---|
10933 | case ROOT_TREE: |
---|
10934 | rootTree(tr, adef); |
---|
10935 | break; |
---|
10936 | default: |
---|
10937 | assert(0); |
---|
10938 | } |
---|
10939 | |
---|
10940 | finalizeInfoFile(tr, adef); |
---|
10941 | |
---|
10942 | #if (defined(_WAYNE_MPI) || defined (_QUARTET_MPI)) |
---|
10943 | MPI_Finalize(); |
---|
10944 | #endif |
---|
10945 | |
---|
10946 | return 0; |
---|
10947 | } |
---|
10948 | |
---|
10949 | |
---|