Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

rogueEPA.c

Visit:

Last change on this file was 13845, checked in by westram, 11 years ago
remove executable flag
File size: 10.2 KB

Line
1	/* RAxML-VI-HPC (version 2.2) a program for sequential and parallel estimation of phylogenetic trees
2	* Copyright August 2006 by Alexandros Stamatakis
3	*
4	* Partially derived from
5	* fastDNAml, a program for estimation of phylogenetic trees from sequences by Gary J. Olsen
6	*
7	* and
8	*
9	* Programs of the PHYLIP package by Joe Felsenstein.
10	*
11	* This program is free software; you may redistribute it and/or modify its
12	* under the terms of the GNU General Public License as published by the Free
13	* Software Foundation; either version 2 of the License, or (at your option)
14	* any later version.
15	*
16	* This program is distributed in the hope that it will be useful, but
17	* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18	* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19	* for more details.
20	*
21	*
22	* For any other enquiries send an Email to Alexandros Stamatakis
23	* Alexandros.Stamatakis@epfl.ch
24	*
25	* When publishing work that is based on the results from RAxML-VI-HPC please cite:
26	*
27	* Alexandros Stamatakis:"RAxML-VI-HPC: maximum likelihood-based phylogenetic analyses with thousands
28	* of taxa and mixed models".
29	* Bioinformatics 2006; doi: 10.1093/bioinformatics/btl446
30	*/
31
32	#ifndef WIN32
33	#include <unistd.h>
34	#endif
35
36	#include <math.h>
37	#include <time.h>
38	#include <stdlib.h>
39	#include <stdio.h>
40	#include <ctype.h>
41	#include <string.h>
42	#include "axml.h"
43
44	extern char run_id[128];
45	extern char workdir[1024];
46	extern double masterTime;
47
48	/*
49	function below not very interesting, standard RAxML stuff
50	*/
51
52	static double testInsertThorough(tree *tr, nodeptr r, nodeptr q, boolean useVector)
53	{
54	double
55	result,
56	qz[NUM_BRANCHES],
57	z[NUM_BRANCHES];
58
59	nodeptr
60	x = q->back,
61	s = r->back;
62
63	int
64	j;
65
66	for(j = 0; j < tr->numBranches; j++)
67	{
68	qz[j] = q->z[j];
69	z[j] = sqrt(qz[j]);
70
71	if(z[j] < zmin)
72	z[j] = zmin;
73
74	if(z[j] > zmax)
75	z[j] = zmax;
76	}
77
78	hookup(r->next, q, z, tr->numBranches);
79	hookup(r->next->next, x, z, tr->numBranches);
80	hookupDefault(r, s, tr->numBranches);
81
82	newviewGeneric(tr, r);
83
84	localSmooth(tr, r, smoothings);
85
86	if(useVector)
87	result = evaluateGenericVector(tr, r);
88	else
89	result = evaluateGeneric(tr, r);
90
91	hookup(q, x, qz, tr->numBranches);
92
93	r->next->next->back = r->next->back = (nodeptr) NULL;
94
95	return result;
96	}
97
98
99	/*
100	structure to store likelihood and insertion node
101	for each window position
102	*/
103
104	typedef struct
105	{
106	double lh;
107	nodeptr p;
108	} positionData;
109
110
111	/*
112	traverse the tree recursively and insert taxon into each position
113	*/
114
115	static void traverseBias(nodeptr p, nodeptr q, tree tr, int branchCounter, positionData *pd, int windowSize)
116	{
117	double
118	sum = 0.0,
119	result = 0.0;
120
121	int
122	i;
123
124
125	/*
126	compute the likelihood of inserting the tip attached to
127	p between q and q->back
128
129	Actually in testInsertThorough() we compute the per site
130	log likes which are then stored in an array tr->perSiteLL[i]
131	*/
132
133	result = testInsertThorough(tr, p, q, TRUE);
134
135
136	/*
137	stuff below can be removed at some point
138	it just makes me feel better
139	*/
140
141	for(i = 0; i < tr->cdta->endsite; i++)
142	sum += tr->perSiteLL[i];
143
144	assert(ABS(sum - result) < 0.001);
145
146	/*************************************/
147
148	/*
149	for each window position just compute the likelihood over
150	the window.
151
152	If its is better than the current best one, store the likelihood
153	and the insertion node in the data structure
154	*/
155
156	for(i = 0; i < tr->cdta->endsite - windowSize; i++)
157	{
158	int
159	j;
160
161	for(j = i, sum = 0.0; j < i + windowSize; j++)
162	sum += tr->perSiteLL[j];
163
164	if(sum > pd[i].lh)
165	{
166	pd[i].lh = sum;
167	pd[i].p = q;
168	}
169	}
170
171	branchCounter = branchCounter + 1;
172
173
174	/* and here comes the recursion */
175
176	if(!isTip(q->number, tr->rdta->numsp))
177	{
178	traverseBias(p, q->next->back, tr, branchCounter, pd, windowSize);
179	traverseBias(p, q->next->next->back, tr, branchCounter, pd, windowSize);
180	}
181	}
182
183
184	/*
185	functions to compute the node distances between inferred and true
186	placement positions. I think that they are correct.
187	*/
188
189	static int findRec(nodeptr ref, nodeptr best, int mxtips, int value)
190	{
191	if(isTip(ref->number, mxtips))
192	{
193	if(ref == best \|\| ref->back == best)
194	return value;
195	else
196	return 0;
197	}
198	else
199	{
200	int
201	d1,
202	d2;
203
204	if(ref == best \|\| ref->back == best)
205	return value;
206
207	d1 = findRec(ref->next->back, best, mxtips, value + 1);
208	d2 = findRec(ref->next->next->back, best, mxtips, value + 1);
209
210	assert((d1 > 0 && d2 == 0) \|\| (d2 > 0 && d1 == 0) \|\| (d1 == 0 && d2 == 0));
211
212	return (d1 + d2);
213	}
214	}
215
216	static double getNodeDistance(nodeptr ref, nodeptr best, int mxtips)
217	{
218	int
219	d1 = findRec(ref, best, mxtips, 0),
220	d2 = findRec(ref->back, best, mxtips, 0);
221
222	assert((d1 > 0 && d2 == 0) \|\| (d2 > 0 && d1 == 0) \|\| (d1 == 0 && d2 == 0));
223
224	return ((double)(d1 + d2));
225	}
226
227	void computePlacementBias(tree tr, analdef adef)
228	{
229	int
230	windowSize = adef->slidingWindowSize,
231	k,
232	i,
233	tips,
234	numTraversalBranches = (2 * (tr->mxtips - 1)) - 3; /* compute number of branches into which we need to insert once we have removed a taxon */
235
236	char
237	fileName[1024];
238
239	FILE
240	*outFile;
241
242	/* data for each sliding window starting position */
243
244	positionData
245	pd = (positionData )rax_malloc(sizeof(positionData) * (tr->cdta->endsite - windowSize));
246
247	double
248	nodeDistances = (double)rax_calloc(tr->cdta->endsite - windowSize, sizeof(double)), /* array to store node distnces ND for every sliding window position */
249	distances = (double)rax_calloc(tr->cdta->endsite, sizeof(double)); /* array to store avg distances for every site */
250
251	strcpy(fileName, workdir);
252	strcat(fileName, "RAxML_SiteSpecificPlacementBias.");
253	strcat(fileName, run_id);
254
255	outFile = myfopen(fileName, "w");
256
257	printBothOpen("Likelihood of comprehensive tree %f\n\n", tr->likelihood);
258
259	if(windowSize > tr->cdta->endsite)
260	{
261	printBothOpen("The size of your sliding window is %d while the number of sites in the alignment is %d\n\n", windowSize, tr->cdta->endsite);
262	exit(-1);
263	}
264
265	if(windowSize >= (int)(0.9 * tr->cdta->endsite))
266	printBothOpen("WARNING: your sliding window of size %d is only slightly smaller than you alignment that has %d sites\n\n", windowSize, tr->cdta->endsite);
267
268	printBothOpen("Sliding window size: %d\n\n", windowSize);
269
270	/* prune and re-insert on tip at a time into all branches of the remaining tree */
271
272	for(tips = 1; tips <= tr->mxtips; tips++)
273	{
274	nodeptr
275	myStart,
276	p = tr->nodep[tips]->back, /* this is the node at which we are prunung */
277	p1 = p->next->back,
278	p2 = p->next->next->back;
279
280	double
281	pz[NUM_BRANCHES],
282	p1z[NUM_BRANCHES],
283	p2z[NUM_BRANCHES];
284
285	int
286	branchCounter = 0;
287
288	/* reset array values for this tip */
289
290	for(i = 0; i < tr->cdta->endsite; i++)
291	{
292	pd[i].lh = unlikely;
293	pd[i].p = (nodeptr)NULL;
294	}
295
296	/* store the three branch lengths adjacent to the position at which we prune */
297
298	for(i = 0; i < tr->numBranches; i++)
299	{
300	p1z[i] = p1->z[i];
301	p2z[i] = p2->z[i];
302	pz[i] = p->z[i];
303	}
304
305	/* prune the taxon, optimizing the branch between p1 and p2 */
306
307	removeNodeBIG(tr, p, tr->numBranches);
308
309	printBothOpen("Pruning taxon Number %d [%s]\n", tips, tr->nameList[tips]);
310
311	/* find any tip to start traversing the tree */
312
313	myStart = findAnyTip(p1, tr->mxtips);
314
315	/* insert taxon, compute likelihood and remove taxon again from all branches */
316
317	traverseBias(p, myStart->back, tr, &branchCounter, pd, windowSize);
318
319	assert(branchCounter == numTraversalBranches);
320
321	/* for every sliding window position calc ND to the true/correct position at p */
322
323	for(i = 0; i < tr->cdta->endsite - windowSize; i++)
324	nodeDistances[i] = getNodeDistance(p1, pd[i].p, tr->mxtips);
325
326	/* now analyze */
327
328	for(i = 0; i < tr->cdta->endsite; i++)
329	{
330	double
331	d = 0.0;
332
333	int
334	s = 0;
335
336	/*
337	check site position, i.e., doe we have windowSize data points available
338	or fewer because we are at the start or the end of the alignment
339	*/
340
341	/*
342	for each site just accumulate the node distances we have for all
343	sliding windows that passed over this site
344	*/
345
346	if(i < windowSize)
347	{
348	for(k = 0; k <= i; k++, s++)
349	d += nodeDistances[k];
350	}
351	else
352	{
353	if(i < tr->cdta->endsite - windowSize)
354	{
355	for(k = i - windowSize + 1; k <= i; k++, s++)
356	d += nodeDistances[k];
357	}
358	else
359	{
360	for(k = i - windowSize; k < (tr->cdta->endsite - windowSize); k++, s++)
361	d += nodeDistances[k];
362	}
363	}
364
365
366	/*
367	now just divide the accumultaed ND distance by
368	the number of distances we have for this position and then add it to the acc
369	distances over all taxa.
370	I just realized that the version on which I did the tests
371	I sent to Simon I used
372
373	distances[i] = d / ((double)s);
374
375	instead of
376
377	distances[i] += d / ((double)s);
378
379	gamo tin poutana mou
380	*/
381
382	distances[i] += (d / ((double)s));
383	}
384
385
386
387	/*
388	re-connect taxon to its original position
389	*/
390
391	hookup(p->next, p1, p1z, tr->numBranches);
392	hookup(p->next->next, p2, p2z, tr->numBranches);
393	hookup(p, p->back, pz, tr->numBranches);
394
395	/*
396	fix likelihood vectors
397	*/
398
399	newviewGeneric(tr, p);
400	}
401
402	/*
403	now just compute the average ND over all taxa
404	*/
405
406
407	for(i = 0; i < tr->cdta->endsite; i++)
408	{
409	double
410	avg = distances[i] / ((double)tr->mxtips);
411
412	fprintf(outFile, "%d %f\n", i, avg);
413	}
414
415	printBothOpen("\nTime for EPA-based site-specific placement bias calculation: %f\n", gettime() - masterTime);
416	printBothOpen("Site-specific placement bias statistics written to file %s\n", fileName);
417
418	fclose(outFile);
419
420	exit(0);
421	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/lib/GDE/RAxML/rogueEPA.c

Download in other formats: