Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

leaveDropping.c

Visit:

Last change on this file was 19480, checked in by westram, 2 years ago
reintegrates 'gde' into 'trunk' adds: log:branches/gde@19460:19479
File size: 40.0 KB

Line
1	#include <stdlib.h>
2	#include <stdio.h>
3	#include <string.h>
4	#include <math.h>
5	#include <assert.h>
6	#include "axml.h"
7
8
9	/* the define below makes the algorithm run much faster and should therefore always be used :-) */
10
11	#define _SORTED
12
13	/*
14	The define below is usually not required, Andre just
15	used it, to have a one to one comparison/verification with Nick's original
16	python script.
17	The only thing this changes is that taxa are sorted in exactly the same way as
18	in Nick's script. In some cases, since we are dealing with a greedy algorithm
19	the taxon order has an impact on the dropsets size and composition.
20
21
22	#define _COMPARE_TO_NICK
23
24	*/
25
26	extern char run_id[128];
27	extern char workdir[1024];
28	extern double masterTime;
29	extern const unsigned int mask32[];
30
31
32
33	typedef struct hash_el
34	{
35	unsigned int fullKey;
36	void *value;
37	struct hash_el *next;
38	} HashElem;
39
40	typedef struct hash_table
41	{
42	unsigned int tableSize;
43	unsigned int entryCount;
44	void *commonAttributes;
45	unsigned int (hashFunction)(struct hash_table h, void *value);
46	boolean (equalFunction)(struct hash_table hashtable, void entrA, void entryB);
47	HashElem **table;
48	} HashTable;
49
50	typedef struct
51	{
52	HashTable *hashTable;
53	HashElem *hashElem;
54	unsigned int index;
55	} HashTableIterator;
56
57
58
59	typedef struct profile_elem
60	{
61	/* TODO should better be named taxaSet and treeSet */
62	unsigned int *bitVector;
63	unsigned int *treeVector;
64	unsigned int treeVectorSupport;
65	unsigned int id;
66	} ProfileElem;
67
68	typedef struct
69	{
70	unsigned int *dropset;
71	unsigned int sizeOfDropset;
72	unsigned int numberOfMergingPairs;
73	} Dropset;
74
75	typedef struct
76	{
77	unsigned int dropsetVectorLength;
78	unsigned int bipartitionVetorLength;
79	unsigned int *randForTaxa;
80	} DropsetAttr;
81
82	typedef struct
83	{
84	unsigned int bitVectorLength;
85	unsigned int treeVectorLength;
86	unsigned int randForTaxa; / random numbers to hash the vectors */
87	unsigned int lastByte; /* the padding bits */
88	} ProfileElemAttr;
89
90
91	typedef struct
92	{
93	void *arrayTable;
94	void *commonAttributes;
95	unsigned int length;
96	} Array;
97
98
99	#define FLIP_NTH_BIT(bitVector,n) (bitVector[n / MASK_LENGTH] \|= mask32[ n % MASK_LENGTH ])
100	#define UNFLIP_NTH_BIT(bitVeoctor,n) (bitVector[n / MASK_LENGTH] &= ~mask32[ n % MASK_LENGTH ])
101	#define NTH_BIT_IS_SET(bitVector,n) (bitVector[n / MASK_LENGTH] & mask32[n % MASK_LENGTH])
102
103
104	typedef struct
105	{
106	List *elems;
107	unsigned int length;
108	} SparseBitVector;
109
110	/* SWITCHES */
111
112	#define PROFILE_TABLE(x,y) createHashTable(x,y,randomProfileElemHashFunction, bitVectorEqual)
113	#define DROPSET_TABLE(x,y) createHashTable(x,y,randomDropsetHashFunctionSparse, dropsetEqualFunctionSparse)
114	#define SORT_ARRAY(array, elemType, sortFunction) qsort(array->arrayTable, array->length, sizeof(elemType*), sortFunction)
115	#define HASH_TABLE_SIZE(x) (x * LOG(x))
116
117	/* END */
118
119	unsigned int genericBitCount(unsigned int* bitVector, unsigned int bitVectorLength)
120	{
121	unsigned int
122	i,
123	result = 0;
124
125	for(i = 0; i < bitVectorLength; i++)
126	result += BIT_COUNT(bitVector[i]);
127
128	return result;
129	}
130
131
132	#ifdef _ANDRE_UNUSED_FUNCTIONS
133
134	/* stuff added by Andre that is not used however */
135
136	static unsigned int naiveHashFunctionAdapterVersion(unsigned int *bv, unsigned int length)
137	{
138	unsigned int
139	i,
140	result = 0;
141
142	for( i = 0; i < length; ++i)
143	result ^= bv[i];
144
145	/* return enhanceHash(result); */
146
147	return result;
148	}
149
150
151
152	static void searchHashTableSeq(HashTable hashtable, void *value, unsigned int hashValue)
153	{
154	unsigned int
155	position = hashValue % hashtable->tableSize;
156
157	HashElem
158	*elem,
159	result = (HashElem)NULL;
160
161	for(elem = hashtable->table[position];
162	elem;
163	elem = elem->next)
164	{
165	if(elem->fullKey == hashValue &&
166	hashtable->equalFunction(hashtable, elem->value, value))
167	{
168	result = elem->value;
169	break;
170	}
171	}
172
173	return result;
174	}
175
176	static Array* dropsetHashToArray(HashTable *dropsets)
177	{
178	unsigned int
179	count = 0;
180
181	HashTableIterator
182	*hashTableIterator = createHashTableIterator(dropsets);
183
184	Array
185	array = (Array)rax_calloc(1, sizeof(Array));
186
187	array->arrayTable = (void )rax_calloc(dropsets->entryCount, sizeof(Dropset));
188	array->length = dropsets->entryCount;
189
190	do
191	{
192	((Dropset**)array->arrayTable)[count++] = getCurrentValueFromHashTableIterator(hashTableIterator);
193	}
194	while(hashTableIteratorNext(hashTableIterator));
195
196	assert(count == dropsets->entryCount);
197
198	rax_free(hashTableIterator);
199
200	return array;
201	}
202
203	static void printDropset(unsigned int bitVector, unsigned int numberOfTaxa, char *nameList)
204	{
205	unsigned int
206	i;
207
208	printBothOpen("DROPSET: ");
209
210	for(i = 0; i < numberOfTaxa; ++i)
211	if(NTH_BIT_IS_SET(bitVector, i))
212	printBothOpen("%s,", nameList[i+1]);
213
214	printBothOpen("\t");
215	}
216
217	static unsigned int naiveDropsetHashFunction(HashTable hashTable, void value)
218	{
219	unsigned int
220	i,
221	result = 0,
222	length = ((DropsetAttr*)hashTable->commonAttributes)->dropsetVectorLength;
223
224	Dropset
225	dropset = (Dropset)value;
226
227	for(i = 0; i < length; ++i )
228	result ^= dropset->dropset[i];
229
230	return result;
231	}
232
233	static List* appendToList(void value, List list)
234	{
235	List
236	listElem = (List )rax_calloc(1, sizeof(List));
237
238	listElem->value = value;
239	listElem->next = list;
240
241	return listElem;
242	}
243
244	static void insertIntoHashTableSeq(HashTable hashTable, void value, unsigned int index)
245	{
246	HashElem
247	hashElem = (HashElem)rax_calloc(1, sizeof(HashElem));
248
249	hashElem->fullKey = index;
250
251	index = hashElem->fullKey % hashTable->tableSize;
252
253	hashElem->value = value;
254	hashElem->next = hashTable->table[index];
255	hashTable->table[index] = hashElem;
256	}
257	#endif
258
259	static HashTable *createHashTable(unsigned int size,
260	void *commonAttr,
261	unsigned int (hashFunction)(HashTable hash_table, void *value),
262	boolean (equalFunction)(HashTable hash_table, void entryA, void entryB))
263	{
264	static const unsigned int
265	initTable[] = {64, 128, 256, 512, 1024, 2048, 4096,
266	8192, 16384, 32768, 65536, 131072,
267	262144, 524288, 1048576, 2097152,
268	4194304, 8388608, 16777216, 33554432,
269	67108864, 134217728, 268435456,
270	536870912, 1073741824, 2147483648U};
271
272	HashTable
273	hashTable = (HashTable)rax_calloc(1, sizeof(HashTable));
274
275	unsigned int
276	tableSize,
277	i,
278	primeTableLength = sizeof(initTable)/sizeof(initTable[0]),
279	maxSize = (unsigned int)-1;
280
281	hashTable->hashFunction = hashFunction;
282	hashTable->equalFunction = equalFunction;
283	hashTable->commonAttributes = commonAttr;
284
285	assert(size <= maxSize);
286	for(i = 0; initTable[i] < size && i < primeTableLength; ++i);
287	assert(i < primeTableLength);
288
289	tableSize = initTable[i];
290
291	hashTable->table = (HashElem*)rax_calloc(tableSize, sizeof(HashElem));
292	hashTable->tableSize = tableSize;
293	hashTable->entryCount = 0;
294
295	return hashTable;
296	}
297
298
299	/* NOTE: computing hashvalue outside...cannot afford to compute it
300	twice for bit vectors */
301
302	static void searchHashTable(HashTable Hashtable, void *value, unsigned int hashValue)
303	{
304	unsigned int
305	position = hashValue % Hashtable->tableSize;
306
307	HashElem
308	*elem,
309	result = (HashElem)NULL;
310
311
312
313	for(elem = Hashtable->table[position];
314	elem;
315	elem = elem->next)
316	{
317	if(elem->fullKey == hashValue &&
318	Hashtable->equalFunction(Hashtable, elem->value, value))
319	{
320	result = elem->value;
321	break;
322	}
323	}
324
325
326
327	return result;
328	}
329
330
331
332
333
334
335
336	static void insertIntoHashTable(HashTable hashTable, void value, unsigned int index)
337	{
338	HashElem
339	hashElem = (HashElem)rax_calloc(1, sizeof(HashElem));
340
341	hashElem->fullKey = index;
342
343	index = hashElem->fullKey % hashTable->tableSize;
344
345	hashElem->value = value;
346	hashElem->next = hashTable->table[index];
347	hashTable->table[index] = hashElem;
348	}
349
350
351	static void destroyHashTable(HashTable hashTable, void (rax_freeValue)(void *value))
352	{
353	unsigned
354	int i;
355
356	HashElem
357	*elemA,
358	*elemB,
359	**table = hashTable->table;
360
361	for(i = 0; i < hashTable->tableSize; ++i)
362	{
363	elemA = table[i];
364	while(elemA != NULL)
365	{
366	elemB = elemA;
367	elemA = elemA->next;
368	if(rax_freeValue)
369	rax_freeValue(elemB->value);
370	rax_free(elemB);
371	}
372
373
374
375	}
376
377	rax_free(hashTable->commonAttributes);
378	rax_free(hashTable->table);
379	rax_free(hashTable);
380	}
381
382
383	static void updateEntryCount(HashTable *hashTable)
384	{
385	unsigned int
386	i,
387	result = 0;
388
389	for(i = 0; i < hashTable->tableSize; ++i)
390	{
391	HashElem
392	elem = ((HashElem*)hashTable->table)[i];
393
394	while(elem)
395	{
396	result++;
397	elem = elem->next;
398	}
399	}
400
401	hashTable->entryCount = result;
402	}
403
404
405	static HashTableIterator createHashTableIterator(HashTable hashTable)
406	{
407	unsigned
408	int i;
409
410	HashTableIterator
411	hashTableIterator = (HashTableIterator)rax_calloc(1, sizeof(HashTableIterator));
412
413	hashTableIterator->hashTable = hashTable;
414	hashTableIterator->hashElem = NULL;
415	hashTableIterator->index = hashTable->tableSize;
416
417	if(!hashTable->entryCount)
418	return hashTableIterator;
419
420	for(i = 0; i < hashTable->tableSize; ++i)
421	{
422	if(hashTable->table[i])
423	{
424	hashTableIterator->hashElem = hashTable->table[i];
425	hashTableIterator->index = i;
426	break;
427	}
428	}
429
430	return hashTableIterator;
431	}
432
433	static boolean hashTableIteratorNext(HashTableIterator *hashTableIterator)
434	{
435	unsigned int
436	i,
437	tableSize = hashTableIterator->hashTable->tableSize;
438
439	HashElem
440	*next = hashTableIterator->hashElem->next;
441
442	if(next)
443	{
444	hashTableIterator->hashElem = next;
445	return TRUE;
446	}
447
448	/* TODO test case for this! */
449	/* TODO looks like it should be optimised ... =/ */
450
451	i = hashTableIterator->index + 1;
452
453	if(i >= tableSize)
454	{
455	hashTableIterator->index = i;
456	return FALSE;
457	}
458
459	while((next = hashTableIterator->hashTable->table[i]) == ((HashElem*)NULL))
460	{
461	if( ++i >= tableSize)
462	{
463	hashTableIterator->index = i;
464	return FALSE;
465	}
466	}
467
468	hashTableIterator->index = i;
469	hashTableIterator->hashElem = next;
470
471	return (next != ((HashElem*)NULL));
472	}
473
474
475	/* TODO what about performance of this? */
476
477	static void getCurrentValueFromHashTableIterator(HashTableIterator hashTableIterator)
478	{
479	return ((hashTableIterator->hashElem)
480	? hashTableIterator->hashElem->value
481	: NULL);
482	}
483
484	/* TODO implement remove */
485	/* TODO hash table change */
486
487
488	static unsigned int randomProfileElemHashFunction(HashTable hashTable, void value)
489	{
490	unsigned int
491	i,
492	result = 0,
493	length = ((ProfileElemAttr*)hashTable->commonAttributes)->bitVectorLength;
494
495	ProfileElem
496	profileElem = (ProfileElem)value;
497
498	for(i = 0; i < length * MASK_LENGTH; ++i)
499	if(NTH_BIT_IS_SET(profileElem->bitVector, i))
500	result ^= ((ProfileElemAttr*)hashTable->commonAttributes)->randForTaxa[i];
501
502	return result;
503	}
504
505
506	static boolean bitVectorEqual(HashTable Hashtable, void entryA, void *entryB)
507	{
508	unsigned int
509	i,
510	bitVectorLength = ((ProfileElemAttr*)Hashtable->commonAttributes)->bitVectorLength;
511
512	unsigned int
513	a = ((ProfileElem)entryA)->bitVector,
514	b = ((ProfileElem)entryB)->bitVector;
515
516	for(i = 0; i < bitVectorLength; ++i)
517	{
518	if(a[i] != b[i])
519	return FALSE;
520	}
521
522	return TRUE;
523	}
524
525
526
527
528	static Array* profileToArray(HashTable *profile, boolean updateFrequencyCount, boolean assignIds)
529	{
530	HashTableIterator*
531	hashTableIterator = createHashTableIterator(profile);
532
533	Array
534	result = (Array)rax_calloc(1, sizeof(Array));
535
536	unsigned int
537	count = 0;
538
539	/* remember to always copy s.t. rax_free() runs w/o problems */
540
541	ProfileElemAttr
542	*profileElemAttr;
543
544	result->commonAttributes = (void*)rax_calloc(1, sizeof(ProfileElemAttr));
545	result->commonAttributes = memcpy(result->commonAttributes, profile->commonAttributes, sizeof(ProfileElemAttr));
546	profileElemAttr = result->commonAttributes;
547
548	result->length = profile->entryCount;
549	result->arrayTable = (void)rax_calloc(profile->entryCount, sizeof(ProfileElem));
550
551	if(!hashTableIterator)
552	return result;
553
554	do
555	{
556	ProfileElem *profileElem = getCurrentValueFromHashTableIterator(hashTableIterator);
557
558	if(updateFrequencyCount)
559	profileElem->treeVectorSupport = genericBitCount(profileElem->treeVector, profileElemAttr->treeVectorLength);
560
561	if(assignIds)
562	profileElem->id = count;
563
564	((ProfileElem**)result->arrayTable)[count] = profileElem;
565	assert(profileElem->bitVector && profileElem->treeVector);
566	count++;
567	}
568	while(hashTableIteratorNext(hashTableIterator));
569
570	assert(count == profile->entryCount);
571
572	rax_free(hashTableIterator);
573
574	return result;
575	}
576
577
578	/* THIS IS AN ADAPTER */
579
580	static Array convertHashtableToArray(hashtable oldHashtable, unsigned int bitVectorLength, unsigned int treeVectorLength,
581	unsigned int *randForTaxa, unsigned int lastByte)
582	{
583	unsigned int
584	count = 0,
585	i;
586
587	entry
588	*ent,
589	*entTmp;
590
591	Array
592	*result;
593
594	ProfileElemAttr
595	attr = (ProfileElemAttr)rax_calloc(1, sizeof(ProfileElemAttr));
596
597	attr->bitVectorLength = bitVectorLength;
598	attr->treeVectorLength = treeVectorLength;
599	attr->randForTaxa = randForTaxa;
600	attr->lastByte = lastByte;
601
602	HashTable
603	*hashTable = PROFILE_TABLE(HASH_TABLE_SIZE(oldHashtable->entryCount), attr);
604
605	for(i = 0; i < oldHashtable->tableSize; ++i)
606	{
607	ent = oldHashtable->table[i];
608	while(ent)
609	{
610	ProfileElem
611	profileElem = (ProfileElem)rax_calloc(1, sizeof(ProfileElem));
612
613	profileElem->bitVector = ent->bitVector;
614	profileElem->treeVector = ent->treeVector;
615
616	insertIntoHashTable(hashTable, profileElem,
617	hashTable->hashFunction(hashTable, profileElem));
618
619	entTmp = ent->next;
620	rax_free(ent);
621	ent = entTmp;
622	}
623	}
624
625	rax_free(oldHashtable);
626
627	updateEntryCount(hashTable);
628
629
630	for(i = 0; i < hashTable->tableSize; ++i)
631	{
632	HashElem
633	elem = ((HashElem*)hashTable->table)[i];
634
635	while(elem)
636	{
637	((ProfileElem*)elem->value)->id = count++;
638	elem = elem->next;
639	}
640	}
641
642	assert(count == hashTable->entryCount);
643
644	result = profileToArray(hashTable, TRUE, TRUE);
645	destroyHashTable(hashTable, NULL);
646
647	return result;
648	}
649
650
651	#ifdef _SORTED
652
653	static int sortProfileElems(const void a, const void b)
654	{
655	unsigned int aFreq = ((ProfileElem*)a)->treeVectorSupport,
656	bFreq = ((ProfileElem*)b)->treeVectorSupport;
657
658	if(aFreq < bFreq)
659	return 1;
660	else if(bFreq < aFreq)
661	return -1;
662	else
663	return 0;
664	}
665
666	#endif
667
668
669	static Array getInfrequentBipartitions(Array oldArray, unsigned int threshold)
670	{
671	Array
672	array = (Array )rax_calloc(1, sizeof(Array));
673
674	ProfileElem**
675	oldArrayTable = oldArray->arrayTable;
676
677	unsigned int
678	i,
679	numberInfrequent = 0,
680	count = 0;
681
682	for(i = 0; i < oldArray->length; ++i)
683	numberInfrequent += (oldArrayTable[i]->treeVectorSupport <= threshold);
684
685	array->commonAttributes = (void*)rax_calloc(1, sizeof(ProfileElemAttr));
686	memcpy(array->commonAttributes, oldArray->commonAttributes, sizeof(ProfileElemAttr));
687	array->length = numberInfrequent;
688	array->arrayTable = (void)rax_calloc(numberInfrequent, sizeof(ProfileElem));
689
690	for(i = 0; i < oldArray->length; ++i)
691	{
692	ProfileElem elem = ((ProfileElem*)oldArray->arrayTable)[i];
693	assert(elem->bitVector && elem->treeVector);
694	if( elem->treeVectorSupport <= threshold)
695	((ProfileElem**)array->arrayTable)[count++] = elem;
696	}
697
698	assert(count == numberInfrequent);
699
700	#ifdef _SORTED
701	SORT_ARRAY(array, ProfileElem, sortProfileElems);
702	#endif
703
704	return array;
705	}
706
707
708	static boolean isUnionOfTreesAboveThreshold( const ProfileElemAttr *profileElemAttr,
709	const ProfileElem elemA, const ProfileElem elemB,
710	unsigned int frequencyThreshold)
711	{
712	unsigned int
713	i,
714	count = 0,
715	length = profileElemAttr->treeVectorLength;
716
717	for(i = 0; i < length; ++i )
718	count += BIT_COUNT(elemA->treeVector[i] \| elemB->treeVector[i]);
719
720	return (count > frequencyThreshold);
721	}
722
723
724
725
726
727	static Dropset getBestDropset(HashTable dropsets
728
729	#ifdef _COMPARE_TO_NICK
730	,char **nameList, unsigned int numberOfTaxa
731	#endif
732
733	)
734	{
735
736	#ifdef _COMPARE_TO_NICK
737	unsigned int j;
738	#endif
739
740	int
741	bestImpact = 0;
742
743	unsigned int
744	i;
745
746	Dropset
747	bestDropset = (Dropset )NULL;
748
749	HashTableIterator
750	*hashTableIterator = createHashTableIterator(dropsets);
751
752	if(!getCurrentValueFromHashTableIterator(hashTableIterator))
753	return NULL;
754
755	do
756	{
757	Dropset
758	*dropset = getCurrentValueFromHashTableIterator(hashTableIterator);
759
760	unsigned int
761	droppedTaxa = dropset->sizeOfDropset,
762	gainedBips = dropset->numberOfMergingPairs;
763
764	int
765	impact = gainedBips - droppedTaxa;
766
767	if(impact > bestImpact)
768	{
769	printBothOpen("found better dropset [gained-dropped=impact] %d\t-\t%d\t=%d\n", gainedBips, droppedTaxa, impact);
770	bestImpact = impact;
771	bestDropset = dropset;
772	}
773
774	#ifdef _COMPARE_TO_NICK
775	else if(impact > 0 && impact == bestImpact)
776	{
777	/* resolve by number of taxa */
778	unsigned int
779	fromBest = bestDropset->sizeOfDropset,
780	fromCurrent = dropset->sizeOfDropset;
781
782	if(fromCurrent < fromBest)
783	{
784	printBothOpen("[SIZE] ");
785	printDropset(dropset->dropset, numberOfTaxa, nameList);
786	printBothOpen(" is shorter than ");
787	printDropset(bestDropset->dropset, numberOfTaxa, nameList);
788	bestDropset = dropset;
789	printBothOpen("\n");
790	}
791	else if(fromBest == fromCurrent)
792	{
793	for(j = 0; j < fromCurrent; ++j)
794	{
795	if( bestDropset->dropset[j] < dropset->dropset[j] )
796	{
797	printBothOpen("[LEX] ");
798	printDropset(bestDropset->dropset, numberOfTaxa, nameList);
799	printBothOpen(" less than ");
800	printDropset(dropset->dropset, numberOfTaxa, nameList);
801	printBothOpen("\n");
802	break;
803	}
804	else if( dropset->dropset[j] < bestDropset->dropset[j] )
805	{
806	printBothOpen("[LEX] ");
807	printDropset(dropset->dropset, numberOfTaxa, nameList);
808	printBothOpen(" less than ");
809	printDropset(bestDropset->dropset, numberOfTaxa, nameList);
810	bestDropset = dropset;
811	printBothOpen("\n");
812	break;
813	}
814	}
815	}
816	}
817	#endif
818	}
819	while(hashTableIteratorNext(hashTableIterator));
820
821	rax_free(hashTableIterator);
822
823	/* we did not find anything */
824	if(!bestDropset)
825	return bestDropset;
826
827	/* else convert to non-sparse representation */
828	unsigned int
829	*bvPtr = bestDropset->dropset;
830
831	bestDropset->dropset = (unsigned int)rax_calloc(((DropsetAttr)dropsets->commonAttributes)->dropsetVectorLength, sizeof(unsigned int));
832
833	for(i = 0; i < bestDropset->sizeOfDropset; ++i)
834	FLIP_NTH_BIT(bestDropset->dropset, bvPtr[i]);
835
836	rax_free(bvPtr);
837
838	return bestDropset;
839	}
840
841
842	static void insertBipartitionPairDropset_helper(HashTable *dropsets,
843	unsigned int *diff, unsigned int diffCount)
844	{
845	Dropset
846	dropset = (Dropset)rax_calloc(1, sizeof(Dropset)),
847	*foundInHashTable;
848	unsigned int
849	i,
850	ctr = 0;
851
852	dropset->dropset = (unsigned int*)rax_calloc(diffCount, sizeof(unsigned int));
853
854	for(i = 0; i < ((DropsetAttr)dropsets->commonAttributes)->dropsetVectorLength MASK_LENGTH; ++i)
855	{
856	if(NTH_BIT_IS_SET(diff, i))
857	{
858	dropset->dropset[ctr] = i;
859	ctr++;
860	if(ctr == diffCount)
861	break;
862	}
863	}
864
865	rax_free(diff);
866
867	assert(ctr == diffCount);
868
869	dropset->numberOfMergingPairs = 1;
870	dropset->sizeOfDropset = diffCount;
871	unsigned int hashValue = dropsets->hashFunction(dropsets, dropset);
872
873	foundInHashTable = (Dropset*)searchHashTable(dropsets, dropset, hashValue);
874
875	if(!foundInHashTable)
876	insertIntoHashTable(dropsets, dropset, hashValue);
877	else
878	{
879	foundInHashTable->numberOfMergingPairs++;
880
881	rax_free(dropset->dropset);
882	rax_free(dropset);
883	}
884	}
885
886
887	static void insertBipartitionPairDropset(HashTable *dropsets,
888	const ProfileElemAttr *profileElemAttr,
889	const ProfileElem elemA, const ProfileElem elemB,
890	const unsigned int *droppedTaxa )
891	{
892	unsigned int
893	i,
894	diffCount1 = 0,
895	diffCount2 = 0,
896	length = profileElemAttr->bitVectorLength,
897	diff1 = (unsigned int)rax_calloc(length, sizeof(unsigned int)),
898	diff2 = (unsigned int)rax_calloc(length, sizeof(unsigned int)),
899	lastByte = profileElemAttr->lastByte;
900
901
902	for(i = 0; i < length; ++i )
903	{
904	diff1[i] = ( elemA->bitVector[i] ^ elemB->bitVector[i] ) ;
905
906	if(i == length - 1)
907	diff2[i] = (elemA->bitVector[i] & ~ droppedTaxa[i]) ^ ~ (elemB->bitVector[i] \| droppedTaxa[i] \| lastByte);
908	else
909	diff2[i] = (elemA->bitVector[i] & ~ droppedTaxa[i]) ^ ~ (elemB->bitVector[i] \| droppedTaxa[i]);
910	}
911
912
913	diffCount1 = genericBitCount(diff1,length);
914	diffCount2 = genericBitCount(diff2,length);
915
916	if (!(diffCount1 && diffCount2))
917	printf("problem with bip %d and %d\n", elemA->id, elemB->id);
918
919	assert( diffCount1 && diffCount2 ) ;
920
921	if(diffCount1 < diffCount2)
922	{
923	insertBipartitionPairDropset_helper(dropsets, diff1, diffCount1);
924	rax_free(diff2);
925	}
926	else if(diffCount1 > diffCount2)
927	{
928	insertBipartitionPairDropset_helper(dropsets, diff2, diffCount2);
929	rax_free(diff1);
930	}
931	else
932	{
933	insertBipartitionPairDropset_helper(dropsets, diff1, diffCount1);
934	insertBipartitionPairDropset_helper(dropsets, diff2, diffCount2);
935	}
936	}
937
938
939
940
941	static unsigned int randomDropsetHashFunctionSparse(HashTable hashTable, void value)
942	{
943	Dropset
944	dropset = (Dropset)value;
945
946	unsigned int
947	i,
948	result = 0,
949	length = dropset->sizeOfDropset,
950	randForTaxa = ((DropsetAttr)hashTable->commonAttributes)->randForTaxa;
951
952	for(i = 0; i < length; ++i)
953	result ^= randForTaxa[dropset->dropset[i]];
954
955	return result;
956	}
957
958
959	static boolean dropsetEqualFunctionSparse(HashTable hashTable, void entryA, void *entryB)
960	{
961	(void)hashTable; // suppress warning
962	unsigned int i,
963	aLength = ((Dropset*)entryA)->sizeOfDropset,
964	bLength = ((Dropset*)entryB)->sizeOfDropset;
965
966	unsigned int
967	a = ((Dropset)entryA)->dropset,
968	b = ((Dropset)entryB)->dropset;
969
970	if(aLength != bLength)
971	return FALSE;
972
973	for(i = 0; i < aLength; i++)
974	if(a[i] != b[i])
975	return FALSE;
976
977	return TRUE;
978	}
979
980
981	static HashTable* potentialProfileDropsets(Array *infrequentBipartitions,
982	unsigned int frequencyThreshold,
983	unsigned int *droppedTaxa )
984	{
985	DropsetAttr
986	dropsetAttr = (DropsetAttr)rax_calloc(1, sizeof(DropsetAttr));
987
988	dropsetAttr->dropsetVectorLength = ((ProfileElemAttr*)infrequentBipartitions->commonAttributes)->bitVectorLength;
989	dropsetAttr->bipartitionVetorLength = infrequentBipartitions->length;
990	dropsetAttr->randForTaxa = ((ProfileElemAttr*)infrequentBipartitions->commonAttributes)->randForTaxa;
991
992	HashTable
993	*dropsets = DROPSET_TABLE(HASH_TABLE_SIZE(infrequentBipartitions->length), dropsetAttr);
994
995	ProfileElem
996	*elemA,
997	*elemB;
998
999	unsigned int
1000	firstCount,
1001	secondCount;
1002
1003	for(firstCount = 0; firstCount < infrequentBipartitions->length; ++firstCount)
1004	{
1005	elemA = ((ProfileElem**)(infrequentBipartitions->arrayTable))[firstCount];
1006	for(secondCount = firstCount + 1; secondCount < infrequentBipartitions->length; ++secondCount)
1007	{
1008	elemB = ((ProfileElem**)(infrequentBipartitions->arrayTable))[secondCount];
1009	assert(elemB->treeVector && elemB->bitVector);
1010
1011	if( elemA->treeVectorSupport + elemB->treeVectorSupport > frequencyThreshold )
1012	{
1013	if( isUnionOfTreesAboveThreshold(infrequentBipartitions->commonAttributes, elemA, elemB, frequencyThreshold))
1014	insertBipartitionPairDropset(dropsets, infrequentBipartitions->commonAttributes, elemA, elemB, droppedTaxa);
1015	}
1016	#ifdef _SORTED
1017	else
1018	break;
1019	#endif
1020	}
1021	}
1022
1023	/* due to parallelisation, we have to compute the entry count a posteriori */
1024	/* TODO rewrite this step for the sequential code */
1025
1026	updateEntryCount(dropsets);
1027
1028	return dropsets;
1029	}
1030
1031
1032	static void destroyProfileElem(ProfileElem *profileElem)
1033	{
1034	rax_free(profileElem->bitVector);
1035	rax_free(profileElem->treeVector);
1036	rax_free(profileElem);
1037	}
1038
1039
1040	static void destroyDropset(void *dropset_)
1041	{
1042	Dropset
1043	dropset = (Dropset)dropset_;
1044
1045	rax_free(dropset->dropset);
1046	rax_free(dropset);
1047	}
1048
1049
1050	static List* getListOfConsensusBips(Array *allBipartitions, unsigned int frequencyThreshold)
1051	{
1052	List*
1053	result = (List*)NULL;
1054
1055	unsigned int
1056	i;
1057
1058	ProfileElem
1059	*elem;
1060
1061	for(i = 0; i < allBipartitions->length; ++i)
1062	{
1063	elem = ((ProfileElem**)allBipartitions->arrayTable)[i];
1064	if(elem->treeVectorSupport > frequencyThreshold)
1065	{
1066	List
1067	tmp = (List)rax_calloc(1, sizeof(List));
1068
1069	tmp->value = elem;
1070	tmp->next = result;
1071	result = tmp;
1072	}
1073	}
1074
1075	return result;
1076	}
1077
1078
1079	static unsigned int getLengthOfList(List* list)
1080	{
1081	unsigned int
1082	result = 0;
1083
1084	for( ; list ; list = list->next)
1085	result++;
1086
1087	return result;
1088	}
1089
1090
1091
1092
1093
1094	static HashTable updateAndInsertElem(ProfileElem elem, HashTable hashTable, unsigned int droppedTaxa, ProfileElemAttr* profileElemAttr)
1095	{
1096	unsigned int
1097	j;
1098
1099	ProfileElem
1100	foundInHashTable, foundComplementInHashTable,
1101	complement = (ProfileElem)rax_calloc(1, sizeof(ProfileElem));
1102
1103	unsigned int
1104	lastByte = ((ProfileElemAttr*)hashTable->commonAttributes)->lastByte;
1105
1106	complement->bitVector = (unsigned int *)rax_calloc(profileElemAttr->bitVectorLength, sizeof(unsigned int));
1107
1108	for(j = 0; j < profileElemAttr->bitVectorLength; ++j )
1109	{
1110	if(j == profileElemAttr->bitVectorLength - 1)
1111	{
1112	elem->bitVector[j] &= ~ ( droppedTaxa[j] \| lastByte );
1113	complement->bitVector[j] = ~ (elem->bitVector[j] \| droppedTaxa[j] \| lastByte);
1114	}
1115	else
1116	{
1117	elem->bitVector[j] &= ~ droppedTaxa[j];
1118	complement->bitVector[j] = ~ (elem->bitVector[j] \| droppedTaxa[j] );
1119	}
1120	}
1121
1122	/* bipartition vanishes */
1123
1124	unsigned int
1125	numberOfElements = genericBitCount(elem->bitVector, profileElemAttr->bitVectorLength),
1126	numberElementsInComplement = genericBitCount(complement->bitVector, profileElemAttr->bitVectorLength);
1127
1128	if(numberOfElements < 2 \|\| numberElementsInComplement < 2)
1129	{
1130	destroyProfileElem(elem);
1131	rax_free(complement->bitVector);
1132	rax_free(complement);
1133	return hashTable;
1134	}
1135
1136	unsigned int
1137	hashValue = hashTable->hashFunction(hashTable, elem);
1138
1139	foundInHashTable = searchHashTable(hashTable, elem, hashValue);
1140
1141	if(!foundInHashTable)
1142	{
1143	unsigned int
1144	complementHashValue = hashTable->hashFunction(hashTable, complement);
1145
1146	foundComplementInHashTable = searchHashTable(hashTable, complement, complementHashValue);
1147
1148	if(foundComplementInHashTable)
1149	foundInHashTable = foundComplementInHashTable;
1150	}
1151
1152	if(foundInHashTable)
1153	{
1154	for (j = 0; j < profileElemAttr->treeVectorLength; ++j )
1155	foundInHashTable->treeVector[j] \|= elem->treeVector[j];
1156
1157	foundInHashTable->treeVectorSupport = genericBitCount(foundInHashTable->treeVector, profileElemAttr->treeVectorLength);
1158
1159	destroyProfileElem(elem);
1160	rax_free(complement->bitVector);
1161	rax_free(complement);
1162	}
1163	else
1164	{
1165	insertIntoHashTable(hashTable, elem, hashValue);
1166	rax_free(complement->bitVector);
1167	rax_free(complement);
1168	}
1169
1170	return hashTable;
1171	}
1172
1173
1174	static Array* restrictProfile( Array infrequentBipartitions, List consensusBipartitions, unsigned int *droppedTaxa )
1175	{
1176	unsigned int
1177	i;
1178
1179	ProfileElemAttr
1180	profileElemAttr = (ProfileElemAttr )rax_calloc(1, sizeof(ProfileElemAttr));
1181
1182	profileElemAttr = memcpy(profileElemAttr, infrequentBipartitions->commonAttributes, sizeof(ProfileElemAttr));
1183
1184	HashTable
1185	*hashTable = PROFILE_TABLE(HASH_TABLE_SIZE(infrequentBipartitions->length + getLengthOfList(consensusBipartitions)),
1186	profileElemAttr);
1187
1188	unsigned int
1189	lengthOfConsensus = getLengthOfList(consensusBipartitions);
1190
1191	Array
1192	*result;
1193
1194	List
1195	*listElem,
1196	*listTmp;
1197
1198	Array
1199	tmpArray = (Array )rax_calloc(1, sizeof(Array));
1200
1201
1202	tmpArray->arrayTable = (void )rax_calloc(infrequentBipartitions->length + lengthOfConsensus, sizeof(ProfileElem));
1203	tmpArray->commonAttributes = (void *)rax_calloc(1, sizeof(ProfileElemAttr));
1204	tmpArray->commonAttributes = memcpy(tmpArray->commonAttributes, infrequentBipartitions->commonAttributes, sizeof(ProfileElemAttr));
1205
1206	for(i = 0; i < infrequentBipartitions->length; ++i)
1207	((ProfileElem)tmpArray->arrayTable)[i] = ((ProfileElem)infrequentBipartitions->arrayTable)[i];
1208
1209	tmpArray->length = infrequentBipartitions->length;
1210
1211	rax_free(infrequentBipartitions->commonAttributes);
1212	rax_free((ProfileElem**)infrequentBipartitions->arrayTable);
1213	rax_free(infrequentBipartitions);
1214
1215	listElem = consensusBipartitions;
1216
1217	while(listElem)
1218	{
1219	((ProfileElem*)tmpArray->arrayTable)[tmpArray->length] = (ProfileElem)listElem->value;
1220	tmpArray->length++;
1221	listTmp = listElem;
1222	listElem = listElem->next;
1223	rax_free(listTmp);
1224	}
1225
1226	for(i = 0; i < tmpArray->length; ++i)
1227	{
1228	hashTable = updateAndInsertElem(((ProfileElem**)tmpArray->arrayTable)[i],
1229	hashTable,
1230	droppedTaxa,
1231	profileElemAttr);
1232	}
1233
1234	rax_free(tmpArray->arrayTable);
1235	rax_free(tmpArray->commonAttributes);
1236	rax_free(tmpArray);
1237
1238	updateEntryCount(hashTable);
1239
1240	result = profileToArray(hashTable, TRUE, FALSE);
1241
1242	destroyHashTable(hashTable, NULL);
1243	return result;
1244	}
1245
1246
1247	/**
1248	Computes in a greedy manner a set of rogue taxa.
1249
1250	@return the dropset
1251	@param profile -- the hashtable used in bipartitionList.c
1252	@param tree -- the standard tree
1253	@param resultBipartitions -- bipartitions after dropping the rogue
1254	taxa. necessary part of the output!
1255	*/
1256
1257	static unsigned int* determineGreedyDropset(hashtable profile, tree Tree, Array **resultBipartitions)
1258	{
1259
1260	unsigned int
1261	numberOfTaxa = Tree->mxtips,
1262	numberOfTrees = Tree->numberOfTrees,
1263	frequencyThreshold,
1264	i,
1265	treeVectorLength = GET_BITVECTOR_LENGTH(numberOfTrees),
1266	bitVectorLength = GET_BITVECTOR_LENGTH(numberOfTaxa),
1267	droppedTaxa = (unsigned int)rax_calloc(bitVectorLength, sizeof(unsigned int));
1268
1269	boolean
1270	droppedTaxaThisRound = FALSE;
1271
1272	HashTable
1273	*dropsets;
1274
1275	Dropset
1276	*bestDropset;
1277
1278	Array
1279	*allBipartitions;
1280
1281
1282
1283	/* initialise random numbers: one for each taxon */
1284	unsigned int
1285	randForTaxa = (unsigned int)rax_calloc(numberOfTaxa, sizeof(unsigned int));
1286
1287	for(i = 0; i < numberOfTaxa; ++i)
1288	randForTaxa[i] = rand();
1289
1290	switch(Tree->consensusType)
1291	{
1292	case MR_CONSENSUS:
1293	frequencyThreshold = numberOfTrees / 2;
1294	break;
1295	case STRICT_CONSENSUS:
1296	frequencyThreshold = numberOfTrees - 1;
1297	break;
1298	case MRE_CONSENSUS:
1299	default:
1300	assert(0);
1301	}
1302
1303	/*
1304	prepare a compensator for the padding bits (bits that are
1305	necessarily in the bitvector but do not represent taxa)
1306	*/
1307
1308	unsigned int
1309	lastByte = 0;
1310
1311	for(i = numberOfTaxa; i < MASK_LENGTH * bitVectorLength; ++i)
1312	lastByte \|= mask32[i % MASK_LENGTH];
1313
1314	assert(numberOfTrees > 0 && numberOfTaxa > 0 && bitVectorLength > 0 && treeVectorLength > 0 && frequencyThreshold > 0);
1315
1316	/*
1317	caling adapter: adapts the old hashtable to the new implementation
1318	*/
1319
1320	allBipartitions = convertHashtableToArray(profile, bitVectorLength, treeVectorLength, randForTaxa, lastByte);
1321
1322	printBothOpen("Tree->consensusType=%d, numberOfTaxa=%d, bitVectorLength=%d, numberOfTrees=%d, treeVectorLength=%d, frequencyThreshold=%d, profile->entryCount=%d\n\n",
1323	Tree->consensusType, numberOfTaxa, bitVectorLength,
1324	numberOfTrees, treeVectorLength, frequencyThreshold,allBipartitions->length);
1325
1326
1327
1328	do
1329	{
1330	printBothOpen("================================================================\n");
1331
1332
1333	Array
1334	*infrequentBipartitions = getInfrequentBipartitions(allBipartitions, frequencyThreshold);
1335
1336	List
1337	*consensusBipartitions = getListOfConsensusBips(allBipartitions, frequencyThreshold);
1338
1339	assert(allBipartitions->length == infrequentBipartitions->length + getLengthOfList(consensusBipartitions));
1340
1341	printBothOpen("divided bips: %d = %d infreq + %d consensus\n", allBipartitions->length, infrequentBipartitions->length, getLengthOfList(consensusBipartitions));
1342
1343	rax_free((ProfileElem**)allBipartitions->arrayTable);
1344	rax_free(allBipartitions->commonAttributes);
1345	rax_free(allBipartitions);
1346
1347	/*
1348	NOTE: this below would be possible and might make sense in some
1349	situations. But as there is no equivalent in Nick's script and
1350	this somehow contradicts the philosophy stated in the paper (to
1351	remove all the rogues), I comment it out.
1352	It would of advantage, if the algorithm is used to get a better
1353	resolved tree. However, when we stop via this criteria, rogues
1354	might remain in the tree. If it is the target to yield a high
1355	number of branches with the highest possible support, then it
1356	would be better, if those rogues were dropped as well.
1357	*/
1358
1359	/*
1360	if(getLengthOfList(consensusBipartitions) >= numberOfTaxa - 3 - genericBitCount(droppedTaxa))
1361	{
1362	printBothOpen("we have enough bips: %d / %d\n", getLengthOfList(consensusBipartitions), numberOfTaxa-3);
1363	break;
1364	}
1365	else
1366	printBothOpen("we have %d bips, we need %d.\n", getLengthOfList(consensusBipartitions), numberOfTaxa-3);
1367	*/
1368
1369
1370
1371
1372
1373	dropsets = potentialProfileDropsets(infrequentBipartitions, frequencyThreshold, droppedTaxa);
1374
1375
1376
1377	#ifdef _COMPARE_TO_NICK
1378	bestDropset = getBestDropset(dropsets , Tree->nameList, numberOfTaxa );
1379	#else
1380	bestDropset = getBestDropset(dropsets);
1381	#endif
1382
1383	droppedTaxaThisRound = (bestDropset != NULL);
1384
1385
1386	if(droppedTaxaThisRound)
1387	{
1388	/*
1389	update dropped taxa
1390	*/
1391
1392	for(i = 0 ; i < ((DropsetAttr*)dropsets->commonAttributes)->dropsetVectorLength; ++i)
1393	droppedTaxa[i] \|= bestDropset->dropset[i];
1394
1395	/*
1396	update profile array
1397	*/
1398
1399	allBipartitions = restrictProfile(infrequentBipartitions, consensusBipartitions, droppedTaxa );
1400	}
1401	else
1402	{
1403	allBipartitions = restrictProfile( infrequentBipartitions, consensusBipartitions, droppedTaxa );
1404	*resultBipartitions = allBipartitions;
1405	}
1406
1407	/* just debug */
1408
1409	if(droppedTaxaThisRound)
1410	{
1411	printBothOpen("dropping taxa: " );
1412	for(i = 0; i < numberOfTaxa; ++i)
1413	if(NTH_BIT_IS_SET(bestDropset->dropset, i))
1414	printBothOpen("%s,", Tree->nameList[i+1]);
1415	printBothOpen("\n");
1416	}
1417
1418	destroyHashTable(dropsets, destroyDropset);
1419	}
1420	while(droppedTaxaThisRound);
1421
1422	rax_free(randForTaxa);
1423
1424	return droppedTaxa;
1425	}
1426
1427	static unsigned int renormalizeBipartitions(Array bipartitions, unsigned int droppedTaxa, unsigned int numberOfTaxa)
1428	{
1429	unsigned
1430	i,
1431	resultIndex = 0,
1432	bitVectorLength = ((ProfileElemAttr*)bipartitions->commonAttributes)->bitVectorLength,
1433	lastByte = 0;
1434
1435
1436	for(i = numberOfTaxa; i < MASK_LENGTH * bitVectorLength; ++i)
1437	lastByte \|= mask32[i % MASK_LENGTH];
1438
1439	/* if the reference leave did not change, nothing needs to be done */
1440
1441	if(!NTH_BIT_IS_SET(droppedTaxa, 0))
1442	return 0;
1443
1444	/* find adequate new leave */
1445
1446	for(i = 0; i < numberOfTaxa; ++i)
1447	{
1448	if(!NTH_BIT_IS_SET(droppedTaxa,i))
1449	{
1450	resultIndex = i;
1451	break;
1452	}
1453	}
1454
1455	/* is only the case if everything was dropped */
1456
1457	assert(resultIndex);
1458
1459	/* invert bit-vectors according to the reference leave */
1460
1461	for(i = 0; i < bipartitions->length; ++i)
1462	{
1463	ProfileElem
1464	elem = ((ProfileElem*)bipartitions->arrayTable)[i];
1465
1466	if(NTH_BIT_IS_SET(elem->bitVector,resultIndex))
1467	{
1468	unsigned int
1469	j;
1470
1471	for(j = 0; j < bitVectorLength; ++j)
1472	{
1473	elem->bitVector[j] =
1474	(j == bitVectorLength - 1)
1475	? ~ (elem->bitVector[j] \| droppedTaxa[j] \| lastByte)
1476	: ~ (elem->bitVector[j] \| droppedTaxa[j]);
1477	}
1478	}
1479	}
1480
1481	return resultIndex;
1482	}
1483
1484	static hashtable reconvertHashtable(Array bipartitionArray)
1485	{
1486	unsigned int
1487	i,
1488	count = 0;
1489
1490	hashtable
1491	*htable = initHashTable(bipartitionArray->length);
1492
1493	assert(htable->tableSize >= bipartitionArray->length );
1494
1495	for(i = 0; i < bipartitionArray->length; ++i)
1496	{
1497	ProfileElem
1498	elem = ((ProfileElem*)bipartitionArray->arrayTable)[i];
1499
1500	entry
1501	*ent = initEntry();
1502
1503	ent->bitVector = elem->bitVector;
1504	ent->treeVector = elem->treeVector;
1505	ent->bipNumber = count++;
1506	htable->table[i] = ent;
1507	rax_free(elem);
1508	}
1509
1510	htable->entryCount = count;
1511
1512	return htable;
1513	}
1514
1515	static void pruneTaxon(tree *tr, unsigned int k)
1516	{
1517
1518	assert(k > 0 && k <= ((unsigned int)(tr->mxtips)));
1519
1520	nodeptr
1521	p = tr->nodep[k],
1522	q = p->back,
1523	q1 = q->next->back,
1524	q2 = q->next->next->back;
1525
1526
1527	hookupDefault(q1, q2, tr->numBranches);
1528
1529	tr->start = findAnyTip(q1, tr->mxtips);
1530
1531	assert(p != tr->start && q != tr->start);
1532	}
1533
1534
1535
1536
1537	void computeRogueTaxa(tree tr, char treeSetFileName, analdef *adef)
1538	{
1539	char
1540	dropFileName[1024];
1541
1542	hashtable
1543	h = initHashTable(tr->mxtips FC_INIT * 10);
1544
1545	Array
1546	resultBipartitions = (Array )rax_calloc(1, sizeof(Array*));
1547
1548	unsigned int
1549	droppedTaxaNum = 0,
1550	numberOfTrees = 0,
1551	i,
1552	treeVectorLength,
1553	vectorLength,
1554	**bitVectors = initBitVector(tr, &vectorLength),
1555	*droppedTaxa;
1556
1557	FILE
1558	*treeFile = getNumberOfTrees(tr, treeSetFileName, adef),
1559	*outf;
1560
1561	numberOfTrees = tr->numberOfTrees;
1562
1563	assert(adef->leaveDropMode);
1564	assert(sizeof(unsigned char) == 1);
1565
1566	treeVectorLength = GET_BITVECTOR_LENGTH(numberOfTrees);
1567
1568	/* read the trees and process the bipartitions */
1569
1570	for(i = 1; i <= numberOfTrees; i++)
1571	{
1572	int
1573	bCount = 0;
1574
1575	treeReadLen(treeFile, tr, FALSE, FALSE, TRUE, adef, TRUE, FALSE);
1576
1577	assert(tr->mxtips == tr->ntips);
1578
1579	bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vectorLength, h, (i - 1), BIPARTITIONS_BOOTSTOP, (branchInfo *)NULL,
1580	&bCount, treeVectorLength, FALSE, FALSE);
1581
1582	assert(bCount == tr->mxtips - 3);
1583	}
1584
1585
1586	rewind(treeFile);
1587
1588	droppedTaxa = determineGreedyDropset(h, tr, resultBipartitions);
1589
1590
1591	renormalizeBipartitions(*resultBipartitions, droppedTaxa, tr->mxtips);
1592
1593	h = reconvertHashtable(*resultBipartitions);
1594
1595	/* TODO output for egrep */
1596
1597	printBothOpen("> ALL dropped taxa: ");
1598
1599	for(i = 0; i < (unsigned int)tr->mxtips; ++i)
1600	{
1601	if(NTH_BIT_IS_SET(droppedTaxa, i))
1602	{
1603	printBothOpen("%d %s,", (i+1), tr->nameList[i + 1]);
1604	droppedTaxaNum++;
1605	}
1606	}
1607	printBothOpen("\n");
1608
1609
1610	printBothOpen("\nDropping %u taxa\n", droppedTaxaNum);
1611
1612	printBothOpen("\nTime for dropset calculation: %f seconds\n", gettime() - masterTime );
1613
1614	if(droppedTaxaNum > 0)
1615	{
1616	strcpy(dropFileName, workdir);
1617	strcat(dropFileName, "RAxML_prunedTrees.");
1618	strcat(dropFileName, run_id);
1619
1620	outf = myfopen(dropFileName, "w");
1621
1622	for(i = 1; i <= numberOfTrees; i++)
1623	{
1624	unsigned int
1625	k;
1626
1627	int
1628	tips = 0;
1629
1630	/printf("Tree %d\n", i);/
1631
1632	treeReadLen(treeFile, tr, FALSE, FALSE, TRUE, adef, TRUE, FALSE);
1633
1634	assert(tr->mxtips == tr->ntips);
1635
1636	for(k = 0; k < (unsigned int)tr->mxtips; k++)
1637	{
1638	if(NTH_BIT_IS_SET(droppedTaxa, k))
1639	pruneTaxon(tr, (k+1));
1640	}
1641
1642	tips = countTips(tr->start, tr->mxtips) + countTips(tr->start->back, tr->mxtips);
1643
1644	assert((unsigned)tips == ((unsigned)tr->mxtips - droppedTaxaNum));
1645
1646	Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE);
1647	fprintf(outf, "%s", tr->tree_string);
1648
1649	/printf("%u %u\n", (unsigned)tips, ((unsigned)tr->mxtips - droppedTaxaNum));/
1650	}
1651
1652
1653	printBothOpen("\nA tree collection, where the taxa from the dropset have been pruned in each tree\n");
1654	printBothOpen("has been written to file %s\n\n", dropFileName);
1655	fclose(outf);
1656	}
1657
1658	printBothOpen("Total execution time: %f\n", gettime() - masterTime);
1659
1660	fclose(treeFile);
1661
1662	freeBitVectors(bitVectors, 2 * tr->mxtips);
1663	rax_free(droppedTaxa);
1664	rax_free(bitVectors);
1665	freeHashTable(h);
1666	rax_free(h);
1667
1668	exit(0);
1669	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/GDE/RAxML/leaveDropping.c

Download in other formats: