Context Navigation

source: branches/nameserver/GDE/RAxML/leaveDropping.c

Visit:

Last change on this file was 13845, checked in by westram, 11 years ago
remove executable flag
File size: 39.9 KB

Line
1	#include <stdlib.h>
2	#include <stdio.h>
3	#include <string.h>
4	#include <math.h>
5	#include <assert.h>
6	#include "axml.h"
7
8
9	/* the define below makes the algorithm run much faster and should therefore always be used :-) */
10
11	#define _SORTED
12
13	/*
14	The define below is usually not required, Andre just
15	used it, to have a one to one comparison/verification with Nick's original
16	python script.
17	The only thing this changes is that taxa are sorted in exactly the same way as
18	in Nick's script. In some cases, since we are dealing with a greedy algorithm
19	the taxon order has an impact on the dropsets size and composition.
20
21
22	#define _COMPARE_TO_NICK
23
24	*/
25
26	extern char run_id[128];
27	extern char workdir[1024];
28	extern double masterTime;
29	extern const unsigned int mask32[];
30
31
32
33	typedef struct hash_el
34	{
35	unsigned int fullKey;
36	void *value;
37	struct hash_el *next;
38	} HashElem;
39
40	typedef struct hash_table
41	{
42	unsigned int tableSize;
43	unsigned int entryCount;
44	void *commonAttributes;
45	unsigned int (hashFunction)(struct hash_table h, void *value);
46	boolean (equalFunction)(struct hash_table hashtable, void entrA, void entryB);
47	HashElem **table;
48	} HashTable;
49
50	typedef struct
51	{
52	HashTable *hashTable;
53	HashElem *hashElem;
54	unsigned int index;
55	} HashTableIterator;
56
57
58
59	typedef struct profile_elem
60	{
61	/* TODO should better be named taxaSet and treeSet */
62	unsigned int *bitVector;
63	unsigned int *treeVector;
64	unsigned int treeVectorSupport;
65	unsigned int id;
66	} ProfileElem;
67
68	typedef struct
69	{
70	unsigned int *dropset;
71	unsigned int sizeOfDropset;
72	unsigned int numberOfMergingPairs;
73	} Dropset;
74
75	typedef struct
76	{
77	unsigned int dropsetVectorLength;
78	unsigned int bipartitionVetorLength;
79	unsigned int *randForTaxa;
80	} DropsetAttr;
81
82	typedef struct
83	{
84	unsigned int bitVectorLength;
85	unsigned int treeVectorLength;
86	unsigned int randForTaxa; / random numbers to hash the vectors */
87	unsigned int lastByte; /* the padding bits */
88	} ProfileElemAttr;
89
90
91	typedef struct
92	{
93	void *arrayTable;
94	void *commonAttributes;
95	unsigned int length;
96	} Array;
97
98
99	#define FLIP_NTH_BIT(bitVector,n) (bitVector[n / MASK_LENGTH] \|= mask32[ n % MASK_LENGTH ])
100	#define UNFLIP_NTH_BIT(bitVeoctor,n) (bitVector[n / MASK_LENGTH] &= ~mask32[ n % MASK_LENGTH ])
101	#define NTH_BIT_IS_SET(bitVector,n) (bitVector[n / MASK_LENGTH] & mask32[n % MASK_LENGTH])
102
103
104	typedef struct
105	{
106	List *elems;
107	unsigned int length;
108	} SparseBitVector;
109
110	/* SWITCHES */
111
112	#define PROFILE_TABLE(x,y) createHashTable(x,y,randomProfileElemHashFunction, bitVectorEqual)
113	#define DROPSET_TABLE(x,y) createHashTable(x,y,randomDropsetHashFunctionSparse, dropsetEqualFunctionSparse)
114	#define SORT_ARRAY(array, elemType, sortFunction) qsort(array->arrayTable, array->length, sizeof(elemType*), sortFunction)
115	#define HASH_TABLE_SIZE(x) (x * LOG(x))
116
117	/* END */
118
119	unsigned int genericBitCount(unsigned int* bitVector, unsigned int bitVectorLength)
120	{
121	unsigned int
122	i,
123	result = 0;
124
125	for(i = 0; i < bitVectorLength; i++)
126	result += BIT_COUNT(bitVector[i]);
127
128	return result;
129	}
130
131
132	#ifdef _ANDRE_UNUSED_FUNCTIONS
133
134	/* stuff added by Andre that is not used however */
135
136	static unsigned int naiveHashFunctionAdapterVersion(unsigned int *bv, unsigned int length)
137	{
138	unsigned int
139	i,
140	result = 0;
141
142	for( i = 0; i < length; ++i)
143	result ^= bv[i];
144
145	/* return enhanceHash(result); */
146
147	return result;
148	}
149
150
151
152	static void searchHashTableSeq(HashTable hashtable, void *value, unsigned int hashValue)
153	{
154	unsigned int
155	position = hashValue % hashtable->tableSize;
156
157	HashElem
158	*elem,
159	result = (HashElem)NULL;
160
161	for(elem = hashtable->table[position];
162	elem;
163	elem = elem->next)
164	{
165	if(elem->fullKey == hashValue &&
166	hashtable->equalFunction(hashtable, elem->value, value))
167	{
168	result = elem->value;
169	break;
170	}
171	}
172
173	return result;
174	}
175
176	static Array* dropsetHashToArray(HashTable *dropsets)
177	{
178	unsigned int
179	count = 0;
180
181	HashTableIterator
182	*hashTableIterator = createHashTableIterator(dropsets);
183
184	Array
185	array = (Array)rax_calloc(1, sizeof(Array));
186
187	array->arrayTable = (void )rax_calloc(dropsets->entryCount, sizeof(Dropset));
188	array->length = dropsets->entryCount;
189
190	do
191	{
192	((Dropset**)array->arrayTable)[count++] = getCurrentValueFromHashTableIterator(hashTableIterator);
193	}
194	while(hashTableIteratorNext(hashTableIterator));
195
196	assert(count == dropsets->entryCount);
197
198	rax_free(hashTableIterator);
199
200	return array;
201	}
202
203	static void printDropset(unsigned int bitVector, unsigned int numberOfTaxa, char *nameList)
204	{
205	unsigned int
206	i;
207
208	printBothOpen("DROPSET: ");
209
210	for(i = 0; i < numberOfTaxa; ++i)
211	if(NTH_BIT_IS_SET(bitVector, i))
212	printBothOpen("%s,", nameList[i+1]);
213
214	printBothOpen("\t");
215	}
216
217	static unsigned int naiveDropsetHashFunction(HashTable hashTable, void value)
218	{
219	unsigned int
220	i,
221	result = 0,
222	length = ((DropsetAttr*)hashTable->commonAttributes)->dropsetVectorLength;
223
224	Dropset
225	dropset = (Dropset)value;
226
227	for(i = 0; i < length; ++i )
228	result ^= dropset->dropset[i];
229
230	return result;
231	}
232
233	static List* appendToList(void value, List list)
234	{
235	List
236	listElem = (List )rax_calloc(1, sizeof(List));
237
238	listElem->value = value;
239	listElem->next = list;
240
241	return listElem;
242	}
243
244	static void insertIntoHashTableSeq(HashTable hashTable, void value, unsigned int index)
245	{
246	HashElem
247	hashElem = (HashElem)rax_calloc(1, sizeof(HashElem));
248
249	hashElem->fullKey = index;
250
251	index = hashElem->fullKey % hashTable->tableSize;
252
253	hashElem->value = value;
254	hashElem->next = hashTable->table[index];
255	hashTable->table[index] = hashElem;
256	}
257	#endif
258
259	static HashTable *createHashTable(unsigned int size,
260	void *commonAttr,
261	unsigned int (hashFunction)(HashTable hash_table, void *value),
262	boolean (equalFunction)(HashTable hash_table, void entryA, void entryB))
263	{
264	static const unsigned int
265	initTable[] = {64, 128, 256, 512, 1024, 2048, 4096,
266	8192, 16384, 32768, 65536, 131072,
267	262144, 524288, 1048576, 2097152,
268	4194304, 8388608, 16777216, 33554432,
269	67108864, 134217728, 268435456,
270	536870912, 1073741824, 2147483648U};
271
272	HashTable
273	hashTable = (HashTable)rax_calloc(1, sizeof(HashTable));
274
275	unsigned int
276	tableSize,
277	i,
278	primeTableLength = sizeof(initTable)/sizeof(initTable[0]),
279	maxSize = (unsigned int)-1;
280
281	hashTable->hashFunction = hashFunction;
282	hashTable->equalFunction = equalFunction;
283	hashTable->commonAttributes = commonAttr;
284
285	assert(size <= maxSize);
286	for(i = 0; initTable[i] < size && i < primeTableLength; ++i);
287	assert(i < primeTableLength);
288
289	tableSize = initTable[i];
290
291	hashTable->table = (HashElem*)rax_calloc(tableSize, sizeof(HashElem));
292	hashTable->tableSize = tableSize;
293	hashTable->entryCount = 0;
294
295	return hashTable;
296	}
297
298
299	/* NOTE: computing hashvalue outside...cannot afford to compute it
300	twice for bit vectors */
301
302	static void searchHashTable(HashTable hashtable, void *value, unsigned int hashValue)
303	{
304	unsigned int
305	position = hashValue % hashtable->tableSize;
306
307	HashElem
308	*elem,
309	result = (HashElem)NULL;
310
311
312
313	for(elem = hashtable->table[position];
314	elem;
315	elem = elem->next)
316	{
317	if(elem->fullKey == hashValue &&
318	hashtable->equalFunction(hashtable, elem->value, value))
319	{
320	result = elem->value;
321	break;
322	}
323	}
324
325
326
327	return result;
328	}
329
330
331
332
333
334
335
336	static void insertIntoHashTable(HashTable hashTable, void value, unsigned int index)
337	{
338	HashElem
339	hashElem = (HashElem)rax_calloc(1, sizeof(HashElem));
340
341	hashElem->fullKey = index;
342
343	index = hashElem->fullKey % hashTable->tableSize;
344
345	hashElem->value = value;
346	hashElem->next = hashTable->table[index];
347	hashTable->table[index] = hashElem;
348	}
349
350
351	static void destroyHashTable(HashTable hashTable, void (rax_freeValue)(void *value))
352	{
353	unsigned
354	int i;
355
356	HashElem
357	*elemA,
358	*elemB,
359	**table = hashTable->table;
360
361	for(i = 0; i < hashTable->tableSize; ++i)
362	{
363	elemA = table[i];
364	while(elemA != NULL)
365	{
366	elemB = elemA;
367	elemA = elemA->next;
368	if(rax_freeValue)
369	rax_freeValue(elemB->value);
370	rax_free(elemB);
371	}
372
373
374
375	}
376
377	rax_free(hashTable->commonAttributes);
378	rax_free(hashTable->table);
379	rax_free(hashTable);
380	}
381
382
383	static void updateEntryCount(HashTable *hashTable)
384	{
385	unsigned int
386	i,
387	result = 0;
388
389	for(i = 0; i < hashTable->tableSize; ++i)
390	{
391	HashElem
392	elem = ((HashElem*)hashTable->table)[i];
393
394	while(elem)
395	{
396	result++;
397	elem = elem->next;
398	}
399	}
400
401	hashTable->entryCount = result;
402	}
403
404
405	static HashTableIterator createHashTableIterator(HashTable hashTable)
406	{
407	unsigned
408	int i;
409
410	HashTableIterator
411	hashTableIterator = (HashTableIterator)rax_calloc(1, sizeof(HashTableIterator));
412
413	hashTableIterator->hashTable = hashTable;
414	hashTableIterator->hashElem = NULL;
415	hashTableIterator->index = hashTable->tableSize;
416
417	if(!hashTable->entryCount)
418	return hashTableIterator;
419
420	for(i = 0; i < hashTable->tableSize; ++i)
421	{
422	if(hashTable->table[i])
423	{
424	hashTableIterator->hashElem = hashTable->table[i];
425	hashTableIterator->index = i;
426	break;
427	}
428	}
429
430	return hashTableIterator;
431	}
432
433	static boolean hashTableIteratorNext(HashTableIterator *hashTableIterator)
434	{
435	unsigned int
436	i,
437	tableSize = hashTableIterator->hashTable->tableSize;
438
439	HashElem
440	*next = hashTableIterator->hashElem->next;
441
442	if(next)
443	{
444	hashTableIterator->hashElem = next;
445	return TRUE;
446	}
447
448	/* TODO test case for this! */
449	/* TODO looks like it should be optimised ... =/ */
450
451	i = hashTableIterator->index + 1;
452
453	if(i >= tableSize)
454	{
455	hashTableIterator->index = i;
456	return FALSE;
457	}
458
459	while((next = hashTableIterator->hashTable->table[i]) == ((HashElem*)NULL))
460	{
461	if( ++i >= tableSize)
462	{
463	hashTableIterator->index = i;
464	return FALSE;
465	}
466	}
467
468	hashTableIterator->index = i;
469	hashTableIterator->hashElem = next;
470
471	return (next != ((HashElem*)NULL));
472	}
473
474
475	/* TODO what about performance of this? */
476
477	static void getCurrentValueFromHashTableIterator(HashTableIterator hashTableIterator)
478	{
479	return ((hashTableIterator->hashElem)
480	? hashTableIterator->hashElem->value
481	: NULL);
482	}
483
484	/* TODO implement remove */
485	/* TODO hash table change */
486
487
488	static unsigned int randomProfileElemHashFunction(HashTable hashTable, void value)
489	{
490	unsigned int
491	i,
492	result = 0,
493	length = ((ProfileElemAttr*)hashTable->commonAttributes)->bitVectorLength;
494
495	ProfileElem
496	profileElem = (ProfileElem)value;
497
498	for(i = 0; i < length * MASK_LENGTH; ++i)
499	if(NTH_BIT_IS_SET(profileElem->bitVector, i))
500	result ^= ((ProfileElemAttr*)hashTable->commonAttributes)->randForTaxa[i];
501
502	return result;
503	}
504
505
506	static boolean bitVectorEqual(HashTable hashtable, void entryA, void *entryB)
507	{
508	unsigned int
509	i,
510	bitVectorLength = ((ProfileElemAttr*)hashtable->commonAttributes)->bitVectorLength;
511
512	unsigned int
513	a = ((ProfileElem)entryA)->bitVector,
514	b = ((ProfileElem)entryB)->bitVector;
515
516	for(i = 0; i < bitVectorLength; ++i)
517	{
518	if(a[i] != b[i])
519	return FALSE;
520	}
521
522	return TRUE;
523	}
524
525
526
527
528	static Array* profileToArray(HashTable *profile, boolean updateFrequencyCount, boolean assignIds)
529	{
530	HashTableIterator*
531	hashTableIterator = createHashTableIterator(profile);
532
533	Array
534	result = (Array)rax_calloc(1, sizeof(Array));
535
536	unsigned int
537	count = 0;
538
539	/* remember to always copy s.t. rax_free() runs w/o problems */
540
541	ProfileElemAttr
542	*profileElemAttr;
543
544	result->commonAttributes = (void*)rax_calloc(1, sizeof(ProfileElemAttr));
545	result->commonAttributes = memcpy(result->commonAttributes, profile->commonAttributes, sizeof(ProfileElemAttr));
546	profileElemAttr = result->commonAttributes;
547
548	result->length = profile->entryCount;
549	result->arrayTable = (void)rax_calloc(profile->entryCount, sizeof(ProfileElem));
550
551	if(!hashTableIterator)
552	return result;
553
554	do
555	{
556	ProfileElem *profileElem = getCurrentValueFromHashTableIterator(hashTableIterator);
557
558	if(updateFrequencyCount)
559	profileElem->treeVectorSupport = genericBitCount(profileElem->treeVector, profileElemAttr->treeVectorLength);
560
561	if(assignIds)
562	profileElem->id = count;
563
564	((ProfileElem**)result->arrayTable)[count] = profileElem;
565	assert(profileElem->bitVector && profileElem->treeVector);
566	count++;
567	}
568	while(hashTableIteratorNext(hashTableIterator));
569
570	assert(count == profile->entryCount);
571
572	rax_free(hashTableIterator);
573
574	return result;
575	}
576
577
578	/* THIS IS AN ADAPTER */
579
580	static Array convertHashtableToArray(hashtable oldHashtable, unsigned int bitVectorLength, unsigned int treeVectorLength,
581	unsigned int *randForTaxa, unsigned int lastByte)
582	{
583	unsigned int
584	count = 0,
585	i;
586
587	entry
588	*ent,
589	*entTmp;
590
591	Array
592	*result;
593
594	ProfileElemAttr
595	attr = (ProfileElemAttr)rax_calloc(1, sizeof(ProfileElemAttr));
596
597	attr->bitVectorLength = bitVectorLength;
598	attr->treeVectorLength = treeVectorLength;
599	attr->randForTaxa = randForTaxa;
600	attr->lastByte = lastByte;
601
602	HashTable
603	*hashTable = PROFILE_TABLE(HASH_TABLE_SIZE(oldHashtable->entryCount), attr);
604
605	for(i = 0; i < oldHashtable->tableSize; ++i)
606	{
607	ent = oldHashtable->table[i];
608	while(ent)
609	{
610	ProfileElem
611	profileElem = (ProfileElem)rax_calloc(1, sizeof(ProfileElem));
612
613	profileElem->bitVector = ent->bitVector;
614	profileElem->treeVector = ent->treeVector;
615
616	insertIntoHashTable(hashTable, profileElem,
617	hashTable->hashFunction(hashTable, profileElem));
618
619	entTmp = ent->next;
620	rax_free(ent);
621	ent = entTmp;
622	}
623	}
624
625	rax_free(oldHashtable);
626
627	updateEntryCount(hashTable);
628
629
630	for(i = 0; i < hashTable->tableSize; ++i)
631	{
632	HashElem
633	elem = ((HashElem*)hashTable->table)[i];
634
635	while(elem)
636	{
637	((ProfileElem*)elem->value)->id = count++;
638	elem = elem->next;
639	}
640	}
641
642	assert(count == hashTable->entryCount);
643
644	result = profileToArray(hashTable, TRUE, TRUE);
645	destroyHashTable(hashTable, NULL);
646
647	return result;
648	}
649
650
651	#ifdef _SORTED
652
653	static int sortProfileElems(const void a, const void b)
654	{
655	unsigned int aFreq = ((ProfileElem*)a)->treeVectorSupport,
656	bFreq = ((ProfileElem*)b)->treeVectorSupport;
657
658	if(aFreq < bFreq)
659	return 1;
660	else if(bFreq < aFreq)
661	return -1;
662	else
663	return 0;
664	}
665
666	#endif
667
668
669	static Array getInfrequentBipartitions(Array oldArray, unsigned int threshold)
670	{
671	Array
672	array = (Array )rax_calloc(1, sizeof(Array));
673
674	ProfileElem**
675	oldArrayTable = oldArray->arrayTable;
676
677	unsigned int
678	i,
679	numberInfrequent = 0,
680	count = 0;
681
682	for(i = 0; i < oldArray->length; ++i)
683	numberInfrequent += (oldArrayTable[i]->treeVectorSupport <= threshold);
684
685	array->commonAttributes = (void*)rax_calloc(1, sizeof(ProfileElemAttr));
686	memcpy(array->commonAttributes, oldArray->commonAttributes, sizeof(ProfileElemAttr));
687	array->length = numberInfrequent;
688	array->arrayTable = (void)rax_calloc(numberInfrequent, sizeof(ProfileElem));
689
690	for(i = 0; i < oldArray->length; ++i)
691	{
692	ProfileElem elem = ((ProfileElem*)oldArray->arrayTable)[i];
693	assert(elem->bitVector && elem->treeVector);
694	if( elem->treeVectorSupport <= threshold)
695	((ProfileElem**)array->arrayTable)[count++] = elem;
696	}
697
698	assert(count == numberInfrequent);
699
700	#ifdef _SORTED
701	SORT_ARRAY(array, ProfileElem, sortProfileElems);
702	#endif
703
704	return array;
705	}
706
707
708	static boolean isUnionOfTreesAboveThreshold( const ProfileElemAttr *profileElemAttr,
709	const ProfileElem elemA, const ProfileElem elemB,
710	unsigned int frequencyThreshold)
711	{
712	unsigned int
713	i,
714	count = 0,
715	length = profileElemAttr->treeVectorLength;
716
717	for(i = 0; i < length; ++i )
718	count += BIT_COUNT(elemA->treeVector[i] \| elemB->treeVector[i]);
719
720	return (count > frequencyThreshold);
721	}
722
723
724
725
726
727	static Dropset getBestDropset(HashTable dropsets
728
729	#ifdef _COMPARE_TO_NICK
730	,char **nameList, unsigned int numberOfTaxa
731	#endif
732
733	)
734	{
735
736	#ifdef _COMPARE_TO_NICK
737	unsigned int j;
738	#endif
739
740	int
741	bestImpact = 0;
742
743	unsigned int
744	i;
745
746	Dropset
747	bestDropset = (Dropset )NULL;
748
749	HashTableIterator
750	*hashTableIterator = createHashTableIterator(dropsets);
751
752	if(!getCurrentValueFromHashTableIterator(hashTableIterator))
753	return NULL;
754
755	do
756	{
757	Dropset
758	*dropset = getCurrentValueFromHashTableIterator(hashTableIterator);
759
760	unsigned int
761	droppedTaxa = dropset->sizeOfDropset,
762	gainedBips = dropset->numberOfMergingPairs;
763
764	int
765	impact = gainedBips - droppedTaxa;
766
767	if(impact > bestImpact)
768	{
769	printBothOpen("found better dropset [gained-dropped=impact] %d\t-\t%d\t=%d\n", gainedBips, droppedTaxa, impact);
770	bestImpact = impact;
771	bestDropset = dropset;
772	}
773
774	#ifdef _COMPARE_TO_NICK
775	else if(impact > 0 && impact == bestImpact)
776	{
777	/* resolve by number of taxa */
778	unsigned int
779	fromBest = bestDropset->sizeOfDropset,
780	fromCurrent = dropset->sizeOfDropset;
781
782	if(fromCurrent < fromBest)
783	{
784	printBothOpen("[SIZE] ");
785	printDropset(dropset->dropset, numberOfTaxa, nameList);
786	printBothOpen(" is shorter than ");
787	printDropset(bestDropset->dropset, numberOfTaxa, nameList);
788	bestDropset = dropset;
789	printBothOpen("\n");
790	}
791	else if(fromBest == fromCurrent)
792	{
793	for(j = 0; j < fromCurrent; ++j)
794	{
795	if( bestDropset->dropset[j] < dropset->dropset[j] )
796	{
797	printBothOpen("[LEX] ");
798	printDropset(bestDropset->dropset, numberOfTaxa, nameList);
799	printBothOpen(" less than ");
800	printDropset(dropset->dropset, numberOfTaxa, nameList);
801	printBothOpen("\n");
802	break;
803	}
804	else if( dropset->dropset[j] < bestDropset->dropset[j] )
805	{
806	printBothOpen("[LEX] ");
807	printDropset(dropset->dropset, numberOfTaxa, nameList);
808	printBothOpen(" less than ");
809	printDropset(bestDropset->dropset, numberOfTaxa, nameList);
810	bestDropset = dropset;
811	printBothOpen("\n");
812	break;
813	}
814	}
815	}
816	}
817	#endif
818	}
819	while(hashTableIteratorNext(hashTableIterator));
820
821	rax_free(hashTableIterator);
822
823	/* we did not find anything */
824	if(!bestDropset)
825	return bestDropset;
826
827	/* else convert to non-sparse representation */
828	unsigned int
829	*bvPtr = bestDropset->dropset;
830
831	bestDropset->dropset = (unsigned int)rax_calloc(((DropsetAttr)dropsets->commonAttributes)->dropsetVectorLength, sizeof(unsigned int));
832
833	for(i = 0; i < bestDropset->sizeOfDropset; ++i)
834	FLIP_NTH_BIT(bestDropset->dropset, bvPtr[i]);
835
836	rax_free(bvPtr);
837
838	return bestDropset;
839	}
840
841
842	static void insertBipartitionPairDropset_helper(HashTable *dropsets,
843	unsigned int *diff, unsigned int diffCount)
844	{
845	Dropset
846	dropset = (Dropset)rax_calloc(1, sizeof(Dropset)),
847	*foundInHashTable;
848	unsigned int
849	i,
850	ctr = 0;
851
852	dropset->dropset = (unsigned int*)rax_calloc(diffCount, sizeof(unsigned int));
853
854	for(i = 0; i < ((DropsetAttr)dropsets->commonAttributes)->dropsetVectorLength MASK_LENGTH; ++i)
855	{
856	if(NTH_BIT_IS_SET(diff, i))
857	{
858	dropset->dropset[ctr] = i;
859	ctr++;
860	if(ctr == diffCount)
861	break;
862	}
863	}
864
865	rax_free(diff);
866
867	assert(ctr == diffCount);
868
869	dropset->numberOfMergingPairs = 1;
870	dropset->sizeOfDropset = diffCount;
871	unsigned int hashValue = dropsets->hashFunction(dropsets, dropset);
872
873	foundInHashTable = (Dropset*)searchHashTable(dropsets, dropset, hashValue);
874
875	if(!foundInHashTable)
876	insertIntoHashTable(dropsets, dropset, hashValue);
877	else
878	{
879	foundInHashTable->numberOfMergingPairs++;
880
881	rax_free(dropset->dropset);
882	rax_free(dropset);
883	}
884	}
885
886
887	static void insertBipartitionPairDropset(HashTable *dropsets,
888	const ProfileElemAttr *profileElemAttr,
889	const ProfileElem elemA, const ProfileElem elemB,
890	const unsigned int *droppedTaxa )
891	{
892	unsigned int
893	i,
894	diffCount1 = 0,
895	diffCount2 = 0,
896	length = profileElemAttr->bitVectorLength,
897	diff1 = (unsigned int)rax_calloc(length, sizeof(unsigned int)),
898	diff2 = (unsigned int)rax_calloc(length, sizeof(unsigned int)),
899	lastByte = profileElemAttr->lastByte;
900
901
902	for(i = 0; i < length; ++i )
903	{
904	diff1[i] = ( elemA->bitVector[i] ^ elemB->bitVector[i] ) ;
905
906	if(i == length - 1)
907	diff2[i] = (elemA->bitVector[i] & ~ droppedTaxa[i]) ^ ~ (elemB->bitVector[i] \| droppedTaxa[i] \| lastByte);
908	else
909	diff2[i] = (elemA->bitVector[i] & ~ droppedTaxa[i]) ^ ~ (elemB->bitVector[i] \| droppedTaxa[i]);
910	}
911
912
913	diffCount1 = genericBitCount(diff1,length);
914	diffCount2 = genericBitCount(diff2,length);
915
916	if (!(diffCount1 && diffCount2))
917	printf("problem with bip %d and %d\n", elemA->id, elemB->id);
918
919	assert( diffCount1 && diffCount2 ) ;
920
921	if(diffCount1 < diffCount2)
922	{
923	insertBipartitionPairDropset_helper(dropsets, diff1, diffCount1);
924	rax_free(diff2);
925	}
926	else if(diffCount1 > diffCount2)
927	{
928	insertBipartitionPairDropset_helper(dropsets, diff2, diffCount2);
929	rax_free(diff1);
930	}
931	else
932	{
933	insertBipartitionPairDropset_helper(dropsets, diff1, diffCount1);
934	insertBipartitionPairDropset_helper(dropsets, diff2, diffCount2);
935	}
936	}
937
938
939
940
941	static unsigned int randomDropsetHashFunctionSparse(HashTable hashTable, void value)
942	{
943	Dropset
944	dropset = (Dropset)value;
945
946	unsigned int
947	i,
948	result = 0,
949	length = dropset->sizeOfDropset,
950	randForTaxa = ((DropsetAttr)hashTable->commonAttributes)->randForTaxa;
951
952	for(i = 0; i < length; ++i)
953	result ^= randForTaxa[dropset->dropset[i]];
954
955	return result;
956	}
957
958
959	static boolean dropsetEqualFunctionSparse(HashTable hashTable, void entryA, void *entryB)
960	{
961	unsigned int i,
962	aLength = ((Dropset*)entryA)->sizeOfDropset,
963	bLength = ((Dropset*)entryB)->sizeOfDropset;
964
965	unsigned int
966	a = ((Dropset)entryA)->dropset,
967	b = ((Dropset)entryB)->dropset;
968
969	if(aLength != bLength)
970	return FALSE;
971
972	for(i = 0; i < aLength; i++)
973	if(a[i] != b[i])
974	return FALSE;
975
976	return TRUE;
977	}
978
979
980	static HashTable* potentialProfileDropsets(Array *infrequentBipartitions,
981	unsigned int frequencyThreshold,
982	unsigned int *droppedTaxa )
983	{
984	DropsetAttr
985	dropsetAttr = (DropsetAttr)rax_calloc(1, sizeof(DropsetAttr));
986
987	dropsetAttr->dropsetVectorLength = ((ProfileElemAttr*)infrequentBipartitions->commonAttributes)->bitVectorLength;
988	dropsetAttr->bipartitionVetorLength = infrequentBipartitions->length;
989	dropsetAttr->randForTaxa = ((ProfileElemAttr*)infrequentBipartitions->commonAttributes)->randForTaxa;
990
991	HashTable
992	*dropsets = DROPSET_TABLE(HASH_TABLE_SIZE(infrequentBipartitions->length), dropsetAttr);
993
994	ProfileElem
995	*elemA,
996	*elemB;
997
998	unsigned int
999	firstCount,
1000	secondCount;
1001
1002	for(firstCount = 0; firstCount < infrequentBipartitions->length; ++firstCount)
1003	{
1004	elemA = ((ProfileElem**)(infrequentBipartitions->arrayTable))[firstCount];
1005	for(secondCount = firstCount + 1; secondCount < infrequentBipartitions->length; ++secondCount)
1006	{
1007	elemB = ((ProfileElem**)(infrequentBipartitions->arrayTable))[secondCount];
1008	assert(elemB->treeVector && elemB->bitVector);
1009
1010	if( elemA->treeVectorSupport + elemB->treeVectorSupport > frequencyThreshold )
1011	{
1012	if( isUnionOfTreesAboveThreshold(infrequentBipartitions->commonAttributes, elemA, elemB, frequencyThreshold))
1013	insertBipartitionPairDropset(dropsets, infrequentBipartitions->commonAttributes, elemA, elemB, droppedTaxa);
1014	}
1015	#ifdef _SORTED
1016	else
1017	break;
1018	#endif
1019	}
1020	}
1021
1022	/* due to parallelisation, we have to compute the entry count a posteriori */
1023	/* TODO rewrite this step for the sequential code */
1024
1025	updateEntryCount(dropsets);
1026
1027	return dropsets;
1028	}
1029
1030
1031	static void destroyProfileElem(ProfileElem *profileElem)
1032	{
1033	rax_free(profileElem->bitVector);
1034	rax_free(profileElem->treeVector);
1035	rax_free(profileElem);
1036	}
1037
1038
1039	static void destroyDropset(void *dropset_)
1040	{
1041	Dropset
1042	dropset = (Dropset)dropset_;
1043
1044	rax_free(dropset->dropset);
1045	rax_free(dropset);
1046	}
1047
1048
1049	static List* getListOfConsensusBips(Array *allBipartitions, unsigned int frequencyThreshold)
1050	{
1051	List*
1052	result = (List*)NULL;
1053
1054	unsigned int
1055	i;
1056
1057	ProfileElem
1058	*elem;
1059
1060	for(i = 0; i < allBipartitions->length; ++i)
1061	{
1062	elem = ((ProfileElem**)allBipartitions->arrayTable)[i];
1063	if(elem->treeVectorSupport > frequencyThreshold)
1064	{
1065	List
1066	tmp = (List)rax_calloc(1, sizeof(List));
1067
1068	tmp->value = elem;
1069	tmp->next = result;
1070	result = tmp;
1071	}
1072	}
1073
1074	return result;
1075	}
1076
1077
1078	static unsigned int getLengthOfList(List* list)
1079	{
1080	unsigned int
1081	result = 0;
1082
1083	for( ; list ; list = list->next)
1084	result++;
1085
1086	return result;
1087	}
1088
1089
1090
1091
1092
1093	static HashTable updateAndInsertElem(ProfileElem elem, HashTable hashTable, unsigned int droppedTaxa, ProfileElemAttr* profileElemAttr)
1094	{
1095	unsigned int
1096	j;
1097
1098	ProfileElem
1099	foundInHashTable, foundComplementInHashTable,
1100	complement = (ProfileElem)rax_calloc(1, sizeof(ProfileElem));
1101
1102	unsigned int
1103	lastByte = ((ProfileElemAttr*)hashTable->commonAttributes)->lastByte;
1104
1105	complement->bitVector = (unsigned int *)rax_calloc(profileElemAttr->bitVectorLength, sizeof(unsigned int));
1106
1107	for(j = 0; j < profileElemAttr->bitVectorLength; ++j )
1108	{
1109	if(j == profileElemAttr->bitVectorLength - 1)
1110	{
1111	elem->bitVector[j] &= ~ ( droppedTaxa[j] \| lastByte );
1112	complement->bitVector[j] = ~ (elem->bitVector[j] \| droppedTaxa[j] \| lastByte);
1113	}
1114	else
1115	{
1116	elem->bitVector[j] &= ~ droppedTaxa[j];
1117	complement->bitVector[j] = ~ (elem->bitVector[j] \| droppedTaxa[j] );
1118	}
1119	}
1120
1121	/* bipartition vanishes */
1122
1123	unsigned int
1124	numberOfElements = genericBitCount(elem->bitVector, profileElemAttr->bitVectorLength),
1125	numberElementsInComplement = genericBitCount(complement->bitVector, profileElemAttr->bitVectorLength);
1126
1127	if(numberOfElements < 2 \|\| numberElementsInComplement < 2)
1128	{
1129	destroyProfileElem(elem);
1130	rax_free(complement->bitVector);
1131	rax_free(complement);
1132	return hashTable;
1133	}
1134
1135	unsigned int
1136	hashValue = hashTable->hashFunction(hashTable, elem);
1137
1138	foundInHashTable = searchHashTable(hashTable, elem, hashValue);
1139
1140	if(!foundInHashTable)
1141	{
1142	unsigned int
1143	complementHashValue = hashTable->hashFunction(hashTable, complement);
1144
1145	foundComplementInHashTable = searchHashTable(hashTable, complement, complementHashValue);
1146
1147	if(foundComplementInHashTable)
1148	foundInHashTable = foundComplementInHashTable;
1149	}
1150
1151	if(foundInHashTable)
1152	{
1153	for (j = 0; j < profileElemAttr->treeVectorLength; ++j )
1154	foundInHashTable->treeVector[j] \|= elem->treeVector[j];
1155
1156	foundInHashTable->treeVectorSupport = genericBitCount(foundInHashTable->treeVector, profileElemAttr->treeVectorLength);
1157
1158	destroyProfileElem(elem);
1159	rax_free(complement->bitVector);
1160	rax_free(complement);
1161	}
1162	else
1163	{
1164	insertIntoHashTable(hashTable, elem, hashValue);
1165	rax_free(complement->bitVector);
1166	rax_free(complement);
1167	}
1168
1169	return hashTable;
1170	}
1171
1172
1173	static Array* restrictProfile( Array infrequentBipartitions, List consensusBipartitions, unsigned int *droppedTaxa )
1174	{
1175	unsigned int
1176	i;
1177
1178	ProfileElemAttr
1179	profileElemAttr = (ProfileElemAttr )rax_calloc(1, sizeof(ProfileElemAttr));
1180
1181	profileElemAttr = memcpy(profileElemAttr, infrequentBipartitions->commonAttributes, sizeof(ProfileElemAttr));
1182
1183	HashTable
1184	*hashTable = PROFILE_TABLE(HASH_TABLE_SIZE(infrequentBipartitions->length + getLengthOfList(consensusBipartitions)),
1185	profileElemAttr);
1186
1187	unsigned int
1188	lengthOfConsensus = getLengthOfList(consensusBipartitions);
1189
1190	Array
1191	*result;
1192
1193	List
1194	*listElem,
1195	*listTmp;
1196
1197	Array
1198	tmpArray = (Array )rax_calloc(1, sizeof(Array));
1199
1200
1201	tmpArray->arrayTable = (void )rax_calloc(infrequentBipartitions->length + lengthOfConsensus, sizeof(ProfileElem));
1202	tmpArray->commonAttributes = (void *)rax_calloc(1, sizeof(ProfileElemAttr));
1203	tmpArray->commonAttributes = memcpy(tmpArray->commonAttributes, infrequentBipartitions->commonAttributes, sizeof(ProfileElemAttr));
1204
1205	for(i = 0; i < infrequentBipartitions->length; ++i)
1206	((ProfileElem)tmpArray->arrayTable)[i] = ((ProfileElem)infrequentBipartitions->arrayTable)[i];
1207
1208	tmpArray->length = infrequentBipartitions->length;
1209
1210	rax_free(infrequentBipartitions->commonAttributes);
1211	rax_free((ProfileElem**)infrequentBipartitions->arrayTable);
1212	rax_free(infrequentBipartitions);
1213
1214	listElem = consensusBipartitions;
1215
1216	while(listElem)
1217	{
1218	((ProfileElem*)tmpArray->arrayTable)[tmpArray->length] = (ProfileElem)listElem->value;
1219	tmpArray->length++;
1220	listTmp = listElem;
1221	listElem = listElem->next;
1222	rax_free(listTmp);
1223	}
1224
1225	for(i = 0; i < tmpArray->length; ++i)
1226	{
1227	hashTable = updateAndInsertElem(((ProfileElem**)tmpArray->arrayTable)[i],
1228	hashTable,
1229	droppedTaxa,
1230	profileElemAttr);
1231	}
1232
1233	rax_free(tmpArray->arrayTable);
1234	rax_free(tmpArray->commonAttributes);
1235	rax_free(tmpArray);
1236
1237	updateEntryCount(hashTable);
1238
1239	result = profileToArray(hashTable, TRUE, FALSE);
1240
1241	destroyHashTable(hashTable, NULL);
1242	return result;
1243	}
1244
1245
1246	/**
1247	Computes in a greedy manner a set of rogue taxa.
1248
1249	@return the dropset
1250	@param profile -- the hashtable used in bipartitionList.c
1251	@param tree -- the standard tree
1252	@param resultBipartitions -- bipartitions after dropping the rogue
1253	taxa. necessary part of the output!
1254	*/
1255
1256	static unsigned int* determineGreedyDropset(hashtable profile, tree tree, Array **resultBipartitions)
1257	{
1258
1259	unsigned int
1260	numberOfTaxa = tree->mxtips,
1261	numberOfTrees = tree->numberOfTrees,
1262	frequencyThreshold,
1263	i,
1264	treeVectorLength = GET_BITVECTOR_LENGTH(numberOfTrees),
1265	bitVectorLength = GET_BITVECTOR_LENGTH(numberOfTaxa),
1266	droppedTaxa = (unsigned int)rax_calloc(bitVectorLength, sizeof(unsigned int));
1267
1268	boolean
1269	droppedTaxaThisRound = FALSE;
1270
1271	HashTable
1272	*dropsets;
1273
1274	Dropset
1275	*bestDropset;
1276
1277	Array
1278	*allBipartitions;
1279
1280
1281
1282	/* initialise random numbers: one for each taxon */
1283	unsigned int
1284	randForTaxa = (unsigned int)rax_calloc(numberOfTaxa, sizeof(unsigned int));
1285
1286	for(i = 0; i < numberOfTaxa; ++i)
1287	randForTaxa[i] = rand();
1288
1289	switch(tree->consensusType)
1290	{
1291	case MR_CONSENSUS:
1292	frequencyThreshold = numberOfTrees / 2;
1293	break;
1294	case STRICT_CONSENSUS:
1295	frequencyThreshold = numberOfTrees - 1;
1296	break;
1297	case MRE_CONSENSUS:
1298	default:
1299	assert(0);
1300	}
1301
1302	/*
1303	prepare a compensator for the padding bits (bits that are
1304	necessarily in the bitvector but do not represent taxa)
1305	*/
1306
1307	unsigned int
1308	lastByte = 0;
1309
1310	for(i = numberOfTaxa; i < MASK_LENGTH * bitVectorLength; ++i)
1311	lastByte \|= mask32[i % MASK_LENGTH];
1312
1313	assert(numberOfTrees > 0 && numberOfTaxa > 0 && bitVectorLength > 0 && treeVectorLength > 0 && frequencyThreshold > 0);
1314
1315	/*
1316	caling adapter: adapts the old hashtable to the new implementation
1317	*/
1318
1319	allBipartitions = convertHashtableToArray(profile, bitVectorLength, treeVectorLength, randForTaxa, lastByte);
1320
1321	printBothOpen("tree->consensusType=%d, numberOfTaxa=%d, bitVectorLength=%d, numberOfTrees=%d, treeVectorLength=%d, frequencyThreshold=%d, profile->entryCount=%d\n\n",
1322	tree->consensusType, numberOfTaxa, bitVectorLength,
1323	numberOfTrees, treeVectorLength, frequencyThreshold,allBipartitions->length);
1324
1325
1326
1327	do
1328	{
1329	printBothOpen("================================================================\n");
1330
1331
1332	Array
1333	*infrequentBipartitions = getInfrequentBipartitions(allBipartitions, frequencyThreshold);
1334
1335	List
1336	*consensusBipartitions = getListOfConsensusBips(allBipartitions, frequencyThreshold);
1337
1338	assert(allBipartitions->length == infrequentBipartitions->length + getLengthOfList(consensusBipartitions));
1339
1340	printBothOpen("divided bips: %d = %d infreq + %d consensus\n", allBipartitions->length, infrequentBipartitions->length, getLengthOfList(consensusBipartitions));
1341
1342	rax_free((ProfileElem**)allBipartitions->arrayTable);
1343	rax_free(allBipartitions->commonAttributes);
1344	rax_free(allBipartitions);
1345
1346	/*
1347	NOTE: this below would be possible and might make sense in some
1348	situations. But as there is no equivalent in Nick's script and
1349	this somehow contradicts the philosophy stated in the paper (to
1350	remove all the rogues), I comment it out.
1351	It would of advantage, if the algorithm is used to get a better
1352	resolved tree. However, when we stop via this criteria, rogues
1353	might remain in the tree. If it is the target to yield a high
1354	number of branches with the highest possible support, then it
1355	would be better, if those rogues were dropped as well.
1356	*/
1357
1358	/*
1359	if(getLengthOfList(consensusBipartitions) >= numberOfTaxa - 3 - genericBitCount(droppedTaxa))
1360	{
1361	printBothOpen("we have enough bips: %d / %d\n", getLengthOfList(consensusBipartitions), numberOfTaxa-3);
1362	break;
1363	}
1364	else
1365	printBothOpen("we have %d bips, we need %d.\n", getLengthOfList(consensusBipartitions), numberOfTaxa-3);
1366	*/
1367
1368
1369
1370
1371
1372	dropsets = potentialProfileDropsets(infrequentBipartitions, frequencyThreshold, droppedTaxa);
1373
1374
1375
1376	#ifdef _COMPARE_TO_NICK
1377	bestDropset = getBestDropset(dropsets , tree->nameList, numberOfTaxa );
1378	#else
1379	bestDropset = getBestDropset(dropsets);
1380	#endif
1381
1382	droppedTaxaThisRound = (bestDropset != NULL);
1383
1384
1385	if(droppedTaxaThisRound)
1386	{
1387	/*
1388	update dropped taxa
1389	*/
1390
1391	for(i = 0 ; i < ((DropsetAttr*)dropsets->commonAttributes)->dropsetVectorLength; ++i)
1392	droppedTaxa[i] \|= bestDropset->dropset[i];
1393
1394	/*
1395	update profile array
1396	*/
1397
1398	allBipartitions = restrictProfile(infrequentBipartitions, consensusBipartitions, droppedTaxa );
1399	}
1400	else
1401	{
1402	allBipartitions = restrictProfile( infrequentBipartitions, consensusBipartitions, droppedTaxa );
1403	*resultBipartitions = allBipartitions;
1404	}
1405
1406	/* just debug */
1407
1408	if(droppedTaxaThisRound)
1409	{
1410	printBothOpen("dropping taxa: " );
1411	for(i = 0; i < numberOfTaxa; ++i)
1412	if(NTH_BIT_IS_SET(bestDropset->dropset, i))
1413	printBothOpen("%s,", tree->nameList[i+1]);
1414	printBothOpen("\n");
1415	}
1416
1417	destroyHashTable(dropsets, destroyDropset);
1418	}
1419	while(droppedTaxaThisRound);
1420
1421	rax_free(randForTaxa);
1422
1423	return droppedTaxa;
1424	}
1425
1426	static unsigned int renormalizeBipartitions(Array bipartitions, unsigned int droppedTaxa, unsigned int numberOfTaxa)
1427	{
1428	unsigned
1429	i,
1430	resultIndex = 0,
1431	bitVectorLength = ((ProfileElemAttr*)bipartitions->commonAttributes)->bitVectorLength,
1432	lastByte = 0;
1433
1434
1435	for(i = numberOfTaxa; i < MASK_LENGTH * bitVectorLength; ++i)
1436	lastByte \|= mask32[i % MASK_LENGTH];
1437
1438	/* if the reference leave did not change, nothing needs to be done */
1439
1440	if(!NTH_BIT_IS_SET(droppedTaxa, 0))
1441	return 0;
1442
1443	/* find adequate new leave */
1444
1445	for(i = 0; i < numberOfTaxa; ++i)
1446	{
1447	if(!NTH_BIT_IS_SET(droppedTaxa,i))
1448	{
1449	resultIndex = i;
1450	break;
1451	}
1452	}
1453
1454	/* is only the case if everything was dropped */
1455
1456	assert(resultIndex);
1457
1458	/* invert bit-vectors according to the reference leave */
1459
1460	for(i = 0; i < bipartitions->length; ++i)
1461	{
1462	ProfileElem
1463	elem = ((ProfileElem*)bipartitions->arrayTable)[i];
1464
1465	if(NTH_BIT_IS_SET(elem->bitVector,resultIndex))
1466	{
1467	unsigned int
1468	j;
1469
1470	for(j = 0; j < bitVectorLength; ++j)
1471	{
1472	elem->bitVector[j] =
1473	(j == bitVectorLength - 1)
1474	? ~ (elem->bitVector[j] \| droppedTaxa[j] \| lastByte)
1475	: ~ (elem->bitVector[j] \| droppedTaxa[j]);
1476	}
1477	}
1478	}
1479
1480	return resultIndex;
1481	}
1482
1483	static hashtable reconvertHashtable(Array bipartitionArray)
1484	{
1485	unsigned int
1486	i,
1487	count = 0;
1488
1489	hashtable
1490	*htable = initHashTable(bipartitionArray->length);
1491
1492	assert(htable->tableSize >= bipartitionArray->length );
1493
1494	for(i = 0; i < bipartitionArray->length; ++i)
1495	{
1496	ProfileElem
1497	elem = ((ProfileElem*)bipartitionArray->arrayTable)[i];
1498
1499	entry
1500	*ent = initEntry();
1501
1502	ent->bitVector = elem->bitVector;
1503	ent->treeVector = elem->treeVector;
1504	ent->bipNumber = count++;
1505	htable->table[i] = ent;
1506	rax_free(elem);
1507	}
1508
1509	htable->entryCount = count;
1510
1511	return htable;
1512	}
1513
1514	static void pruneTaxon(tree *tr, unsigned int k)
1515	{
1516
1517	assert(k > 0 && k <= ((unsigned int)(tr->mxtips)));
1518
1519	nodeptr
1520	p = tr->nodep[k],
1521	q = p->back,
1522	q1 = q->next->back,
1523	q2 = q->next->next->back;
1524
1525
1526	hookupDefault(q1, q2, tr->numBranches);
1527
1528	tr->start = findAnyTip(q1, tr->mxtips);
1529
1530	assert(p != tr->start && q != tr->start);
1531	}
1532
1533
1534
1535
1536	void computeRogueTaxa(tree tr, char treeSetFileName, analdef *adef)
1537	{
1538	char
1539	dropFileName[1024];
1540
1541	hashtable
1542	h = initHashTable(tr->mxtips FC_INIT * 10);
1543
1544	Array
1545	resultBipartitions = (Array )rax_calloc(1, sizeof(Array*));
1546
1547	unsigned int
1548	droppedTaxaNum = 0,
1549	numberOfTrees = 0,
1550	i,
1551	treeVectorLength,
1552	vectorLength,
1553	**bitVectors = initBitVector(tr, &vectorLength),
1554	*droppedTaxa;
1555
1556	FILE
1557	*treeFile = getNumberOfTrees(tr, treeSetFileName, adef),
1558	*outf;
1559
1560	numberOfTrees = tr->numberOfTrees;
1561
1562	assert(adef->leaveDropMode);
1563	assert(sizeof(unsigned char) == 1);
1564
1565	treeVectorLength = GET_BITVECTOR_LENGTH(numberOfTrees);
1566
1567	/* read the trees and process the bipartitions */
1568
1569	for(i = 1; i <= numberOfTrees; i++)
1570	{
1571	int
1572	bCount = 0;
1573
1574	treeReadLen(treeFile, tr, FALSE, FALSE, TRUE, adef, TRUE, FALSE);
1575
1576	assert(tr->mxtips == tr->ntips);
1577
1578	bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vectorLength, h, (i - 1), BIPARTITIONS_BOOTSTOP, (branchInfo *)NULL,
1579	&bCount, treeVectorLength, FALSE, FALSE);
1580
1581	assert(bCount == tr->mxtips - 3);
1582	}
1583
1584
1585	rewind(treeFile);
1586
1587	droppedTaxa = determineGreedyDropset(h, tr, resultBipartitions);
1588
1589
1590	renormalizeBipartitions(*resultBipartitions, droppedTaxa, tr->mxtips);
1591
1592	h = reconvertHashtable(*resultBipartitions);
1593
1594	/* TODO output for egrep */
1595
1596	printBothOpen("> ALL dropped taxa: ");
1597
1598	for(i = 0; i < (unsigned int)tr->mxtips; ++i)
1599	{
1600	if(NTH_BIT_IS_SET(droppedTaxa, i))
1601	{
1602	printBothOpen("%d %s,", (i+1), tr->nameList[i + 1]);
1603	droppedTaxaNum++;
1604	}
1605	}
1606	printBothOpen("\n");
1607
1608
1609	printBothOpen("\nDropping %u taxa\n", droppedTaxaNum);
1610
1611	printBothOpen("\nTime for dropset calculation: %f seconds\n", gettime() - masterTime );
1612
1613	if(droppedTaxaNum > 0)
1614	{
1615	strcpy(dropFileName, workdir);
1616	strcat(dropFileName, "RAxML_prunedTrees.");
1617	strcat(dropFileName, run_id);
1618
1619	outf = myfopen(dropFileName, "w");
1620
1621	for(i = 1; i <= numberOfTrees; i++)
1622	{
1623	unsigned int
1624	k;
1625
1626	int
1627	tips = 0;
1628
1629	/printf("Tree %d\n", i);/
1630
1631	treeReadLen(treeFile, tr, FALSE, FALSE, TRUE, adef, TRUE, FALSE);
1632
1633	assert(tr->mxtips == tr->ntips);
1634
1635	for(k = 0; k < (unsigned int)tr->mxtips; k++)
1636	{
1637	if(NTH_BIT_IS_SET(droppedTaxa, k))
1638	pruneTaxon(tr, (k+1));
1639	}
1640
1641	tips = countTips(tr->start, tr->mxtips) + countTips(tr->start->back, tr->mxtips);
1642
1643	assert((unsigned)tips == ((unsigned)tr->mxtips - droppedTaxaNum));
1644
1645	Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE);
1646	fprintf(outf, "%s", tr->tree_string);
1647
1648	/printf("%u %u\n", (unsigned)tips, ((unsigned)tr->mxtips - droppedTaxaNum));/
1649	}
1650
1651
1652	printBothOpen("\nA tree collection, where the taxa from the dropset have been pruned in each tree\n");
1653	printBothOpen("has been written to file %s\n\n", dropFileName);
1654	fclose(outf);
1655	}
1656
1657	printBothOpen("Total execution time: %f\n", gettime() - masterTime);
1658
1659	fclose(treeFile);
1660
1661	freeBitVectors(bitVectors, 2 * tr->mxtips);
1662	rax_free(droppedTaxa);
1663	rax_free(bitVectors);
1664	freeHashTable(h);
1665	rax_free(h);
1666
1667	exit(0);
1668	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: