Context Navigation

source: branches/stable/GDE/TREEPUZZLE/src/ml1.c

Visit:

Last change on this file was 655, checked in by westram, 22 years ago
Mac OSX patches from Ben Hines
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 39.6 KB

Line
1	/*
2	* ml1.c
3	*
4	*
5	* Part of TREE-PUZZLE 5.0 (June 2000)
6	*
7	* (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8	* M. Vingron, and Arndt von Haeseler
9	* (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10	*
11	* All parts of the source except where indicated are distributed under
12	* the GNU public licence. See http://www.opensource.org for details.
13	*/
14
15
16	/******************************************************************************/
17	/* definitions and prototypes */
18	/******************************************************************************/
19
20	#define EXTERN extern
21
22	/* prototypes */
23	#include <stdio.h>
24	#include <stdlib.h>
25	#include <math.h>
26	#include <ctype.h>
27	#include "util.h"
28	#include "ml.h"
29
30	#define STDOUT stdout
31	#ifndef PARALLEL /* because printf() runs significantly faster */
32	/* than fprintf(stdout) on an Apple McIntosh */
33	/* (HS) */
34	# define FPRINTF printf
35	# define STDOUTFILE
36	#else
37	# define FPRINTF fprintf
38	# define STDOUTFILE STDOUT,
39	#endif
40
41
42	/******************************************************************************/
43	/* compacting sequence data information */
44	/******************************************************************************/
45
46
47	/*************************** internal functions ***************************/
48
49
50	/* make all frequencies a little different */
51	void convfreq(dvector freqemp)
52	{
53	int i, j, maxi=0;
54	double freq, maxfreq, sum;
55
56
57	sum = 0.0;
58	maxfreq = 0.0;
59	for (i = 0; i < tpmradix; i++) {
60	freq = freqemp[i];
61	if (freq < MINFREQ) freqemp[i] = MINFREQ;
62	if (freq > maxfreq) {
63	maxfreq = freq;
64	maxi = i;
65	}
66	sum += freqemp[i];
67	}
68	freqemp[maxi] += 1.0 - sum;
69
70	for (i = 0; i < tpmradix - 1; i++) {
71	for (j = i + 1; j < tpmradix; j++) {
72	if (freqemp[i] == freqemp[j]) {
73	freqemp[i] += MINFDIFF/2.0;
74	freqemp[j] -= MINFDIFF/2.0;
75	}
76	}
77	}
78	}
79
80	/* sort site patters of original input data */
81	void a_radixsort(cmatrix seqchar, ivector ali, int maxspc, int maxsite,
82	int *numptrn)
83	{
84	int i, j, k, l, n, pass;
85	int *awork;
86	int *count;
87
88
89	awork = new_ivector(maxsite);
90	count = new_ivector(tpmradix+1);
91	for (i = 0; i < maxsite; i++)
92	ali[i] = i;
93	for (pass = maxspc - 1; pass >= 0; pass--) {
94	for (j = 0; j < tpmradix+1; j++)
95	count[j] = 0;
96	for (i = 0; i < maxsite; i++)
97	count[(int) seqchar[pass][ali[i]]]++;
98	for (j = 1; j < tpmradix+1; j++)
99	count[j] += count[j-1];
100	for (i = maxsite-1; i >= 0; i--)
101	awork[ --count[(int) seqchar[pass][ali[i]]] ] = ali[i];
102	for (i = 0; i < maxsite; i++)
103	ali[i] = awork[i];
104	}
105	free_ivector(awork);
106	free_ivector(count);
107	n = 1;
108	for (j = 1; j < maxsite; j++) {
109	k = ali[j];
110	l = ali[j-1];
111	for (i = 0; i < maxspc; i++) {
112	if (seqchar[i][l] != seqchar[i][k]) {
113	n++;
114	break;
115	}
116	}
117	}
118	*numptrn = n;
119	}
120
121
122	void condenceseq(cmatrix seqchar, ivector ali, cmatrix seqconint,
123	ivector weight, int maxspc, int maxsite, int numptrn)
124	{
125	int i, j, k, n;
126	int agree_flag; /* boolean */
127
128
129	n = 0;
130	k = ali[n];
131	for (i = 0; i < maxspc; i++) {
132	seqconint[i][n] = seqchar[i][k];
133	}
134	weight[n] = 1;
135	Alias[k] = 0;
136	for (j = 1; j < maxsite; j++) {
137	k = ali[j];
138	agree_flag = TRUE;
139	for (i = 0; i < maxspc; i++) {
140	if (seqconint[i][n] != seqchar[i][k]) {
141	agree_flag = FALSE;
142	break;
143	}
144	}
145	if (agree_flag == FALSE) {
146	n++;
147	for (i = 0; i < maxspc; i++) {
148	seqconint[i][n] = seqchar[i][k];
149	}
150	weight[n] = 1;
151	Alias[k] = n;
152	} else {
153	weight[n]++;
154	Alias[k] = n;
155	}
156	}
157	n++;
158	if (numptrn != n) {
159	/* Problem in condenceseq */
160	FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR A TO DEVELOPERS\n\n\n");
161	exit(1);
162	}
163	}
164
165	void countconstantsites(cmatrix seqpat, ivector weight, int maxspc, int numptrn,
166	int numconst, int numconstpat)
167	{
168	int character, s, i, constflag;
169
170	*numconst = 0;
171	*numconstpat = 0;
172	for (s = 0; s < numptrn; s++) { /* check all patterns */
173	constpat[s] = FALSE;
174	constflag = TRUE;
175	character = seqpat[0][s];
176	for (i = 1; i < maxspc; i++) {
177	if (seqpat[i][s] != character) {
178	constflag = FALSE;
179	break;
180	}
181	}
182	if (character != tpmradix && constflag) {
183	(numconst) = (numconst) + weight[s];
184	(*numconstpat)++;
185	constpat[s] = TRUE;
186	}
187	}
188	}
189
190	/*************************** exported functions ***************************/
191
192
193	void evaluateseqs()
194	{
195	ivector ali;
196
197	convfreq(Freqtpm); /* make all frequencies slightly different */
198	ali = new_ivector(Maxsite);
199	a_radixsort(Seqchar, ali, Maxspc, Maxsite, &Numptrn);
200	Seqpat = new_cmatrix(Maxspc, Numptrn);
201	constpat = new_ivector(Numptrn);
202	Weight = new_ivector(Numptrn);
203	condenceseq(Seqchar, ali, Seqpat, Weight, Maxspc, Maxsite, Numptrn);
204	free_ivector(ali);
205	countconstantsites(Seqpat, Weight, Maxspc, Numptrn, &Numconst, &Numconstpat);
206	fracconstpat = (double) Numconstpat / (double) Numptrn;
207	fracconst = (double) Numconst / (double) Maxsite;
208	}
209
210
211	/******************************************************************************/
212	/* computation of Pij(t) */
213	/******************************************************************************/
214
215
216	/*************************** internal functions ***************************/
217
218
219	void elmhes(dmatrix a, ivector ordr, int n)
220	{
221	int m, j, i;
222	double y, x;
223
224
225	for (i = 0; i < n; i++)
226	ordr[i] = 0;
227	for (m = 2; m < n; m++) {
228	x = 0.0;
229	i = m;
230	for (j = m; j <= n; j++) {
231	if (fabs(a[j - 1][m - 2]) > fabs(x)) {
232	x = a[j - 1][m - 2];
233	i = j;
234	}
235	}
236	ordr[m - 1] = i; /* vector */
237	if (i != m) {
238	for (j = m - 2; j < n; j++) {
239	y = a[i - 1][j];
240	a[i - 1][j] = a[m - 1][j];
241	a[m - 1][j] = y;
242	}
243	for (j = 0; j < n; j++) {
244	y = a[j][i - 1];
245	a[j][i - 1] = a[j][m - 1];
246	a[j][m - 1] = y;
247	}
248	}
249	if (x != 0.0) {
250	for (i = m; i < n; i++) {
251	y = a[i][m - 2];
252	if (y != 0.0) {
253	y /= x;
254	a[i][m - 2] = y;
255	for (j = m - 1; j < n; j++)
256	a[i][j] -= y * a[m - 1][j];
257	for (j = 0; j < n; j++)
258	a[j][m - 1] += y * a[j][i];
259	}
260	}
261	}
262	}
263	}
264
265
266	void eltran(dmatrix a, dmatrix zz, ivector ordr, int n)
267	{
268	int i, j, m;
269
270
271	for (i = 0; i < n; i++) {
272	for (j = i + 1; j < n; j++) {
273	zz[i][j] = 0.0;
274	zz[j][i] = 0.0;
275	}
276	zz[i][i] = 1.0;
277	}
278	if (n <= 2)
279	return;
280	for (m = n - 1; m >= 2; m--) {
281	for (i = m; i < n; i++)
282	zz[i][m - 1] = a[i][m - 2];
283	i = ordr[m - 1];
284	if (i != m) {
285	for (j = m - 1; j < n; j++) {
286	zz[m - 1][j] = zz[i - 1][j];
287	zz[i - 1][j] = 0.0;
288	}
289	zz[i - 1][m - 1] = 1.0;
290	}
291	}
292	}
293
294
295	void mcdiv(double ar, double ai, double br, double bi,
296	double cr, double ci)
297	{
298	double s, ars, ais, brs, bis;
299
300
301	s = fabs(br) + fabs(bi);
302	ars = ar / s;
303	ais = ai / s;
304	brs = br / s;
305	bis = bi / s;
306	s = brs * brs + bis * bis;
307	cr = (ars brs + ais * bis) / s;
308	ci = (ais brs - ars * bis) / s;
309	}
310
311
312	void hqr2(int n, int low, int hgh, dmatrix h,
313	dmatrix zz, dvector wr, dvector wi)
314	{
315	int i, j, k, l=0, m, en, na, itn, its;
316	double p=0, q=0, r=0, s=0, t, w, x=0, y, ra, sa, vi, vr, z=0, norm, tst1, tst2;
317	int notlas; /* boolean */
318
319
320	norm = 0.0;
321	k = 1;
322	/* store isolated roots and compute matrix norm */
323	for (i = 0; i < n; i++) {
324	for (j = k - 1; j < n; j++)
325	norm += fabs(h[i][j]);
326	k = i + 1;
327	if (i + 1 < low \|\| i + 1 > hgh) {
328	wr[i] = h[i][i];
329	wi[i] = 0.0;
330	}
331	}
332	en = hgh;
333	t = 0.0;
334	itn = n * 30;
335	while (en >= low) { /* search for next eigenvalues */
336	its = 0;
337	na = en - 1;
338	while (en >= 1) {
339	/* look for single small sub-diagonal element */
340	for (l = en; l > low; l--) {
341	s = fabs(h[l - 2][l - 2]) + fabs(h[l - 1][l - 1]);
342	if (s == 0.0)
343	s = norm;
344	tst1 = s;
345	tst2 = tst1 + fabs(h[l - 1][l - 2]);
346	if (tst2 == tst1)
347	goto L100;
348	}
349	l = low;
350	L100:
351	x = h[en - 1][en - 1]; /* form shift */
352	if (l == en \|\| l == na)
353	break;
354	if (itn == 0) {
355	/* all eigenvalues have not converged */
356	FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR B TO DEVELOPERS\n\n\n");
357	exit(1);
358	}
359	y = h[na - 1][na - 1];
360	w = h[en - 1][na - 1] * h[na - 1][en - 1];
361	/* form exceptional shift */
362	if (its == 10 \|\| its == 20) {
363	t += x;
364	for (i = low - 1; i < en; i++)
365	h[i][i] -= x;
366	s = fabs(h[en - 1][na - 1]) + fabs(h[na - 1][en - 3]);
367	x = 0.75 * s;
368	y = x;
369	w = -0.4375 * s * s;
370	}
371	its++;
372	itn--;
373	/* look for two consecutive small sub-diagonal elements */
374	for (m = en - 2; m >= l; m--) {
375	z = h[m - 1][m - 1];
376	r = x - z;
377	s = y - z;
378	p = (r * s - w) / h[m][m - 1] + h[m - 1][m];
379	q = h[m][m] - z - r - s;
380	r = h[m + 1][m];
381	s = fabs(p) + fabs(q) + fabs(r);
382	p /= s;
383	q /= s;
384	r /= s;
385	if (m == l)
386	break;
387	tst1 = fabs(p) *
388	(fabs(h[m - 2][m - 2]) + fabs(z) + fabs(h[m][m]));
389	tst2 = tst1 + fabs(h[m - 1][m - 2]) * (fabs(q) + fabs(r));
390	if (tst2 == tst1)
391	break;
392	}
393	for (i = m + 2; i <= en; i++) {
394	h[i - 1][i - 3] = 0.0;
395	if (i != m + 2)
396	h[i - 1][i - 4] = 0.0;
397	}
398	for (k = m; k <= na; k++) {
399	notlas = (k != na);
400	if (k != m) {
401	p = h[k - 1][k - 2];
402	q = h[k][k - 2];
403	r = 0.0;
404	if (notlas)
405	r = h[k + 1][k - 2];
406	x = fabs(p) + fabs(q) + fabs(r);
407	if (x != 0.0) {
408	p /= x;
409	q /= x;
410	r /= x;
411	}
412	}
413	if (x != 0.0) {
414	if (p < 0.0) /* sign */
415	s = - sqrt(p * p + q * q + r * r);
416	else
417	s = sqrt(p * p + q * q + r * r);
418	if (k != m)
419	h[k - 1][k - 2] = -s * x;
420	else {
421	if (l != m)
422	h[k - 1][k - 2] = -h[k - 1][k - 2];
423	}
424	p += s;
425	x = p / s;
426	y = q / s;
427	z = r / s;
428	q /= p;
429	r /= p;
430	if (!notlas) {
431	for (j = k - 1; j < n; j++) { /* row modification */
432	p = h[k - 1][j] + q * h[k][j];
433	h[k - 1][j] -= p * x;
434	h[k][j] -= p * y;
435	}
436	j = (en < (k + 3)) ? en : (k + 3); /* min */
437	for (i = 0; i < j; i++) { /* column modification */
438	p = x * h[i][k - 1] + y * h[i][k];
439	h[i][k - 1] -= p;
440	h[i][k] -= p * q;
441	}
442	/* accumulate transformations */
443	for (i = low - 1; i < hgh; i++) {
444	p = x * zz[i][k - 1] + y * zz[i][k];
445	zz[i][k - 1] -= p;
446	zz[i][k] -= p * q;
447	}
448	} else {
449	for (j = k - 1; j < n; j++) { /* row modification */
450	p = h[k - 1][j] + q * h[k][j] + r * h[k + 1][j];
451	h[k - 1][j] -= p * x;
452	h[k][j] -= p * y;
453	h[k + 1][j] -= p * z;
454	}
455	j = (en < (k + 3)) ? en : (k + 3); /* min */
456	for (i = 0; i < j; i++) { /* column modification */
457	p = x * h[i][k - 1] + y * h[i][k] + z * h[i][k + 1];
458	h[i][k - 1] -= p;
459	h[i][k] -= p * q;
460	h[i][k + 1] -= p * r;
461	}
462	/* accumulate transformations */
463	for (i = low - 1; i < hgh; i++) {
464	p = x * zz[i][k - 1] + y * zz[i][k] +
465	z * zz[i][k + 1];
466	zz[i][k - 1] -= p;
467	zz[i][k] -= p * q;
468	zz[i][k + 1] -= p * r;
469	}
470	}
471	}
472	} /* for k */
473	} /* while infinite loop */
474	if (l == en) { /* one root found */
475	h[en - 1][en - 1] = x + t;
476	wr[en - 1] = h[en - 1][en - 1];
477	wi[en - 1] = 0.0;
478	en = na;
479	continue;
480	}
481	y = h[na - 1][na - 1];
482	w = h[en - 1][na - 1] * h[na - 1][en - 1];
483	p = (y - x) / 2.0;
484	q = p * p + w;
485	z = sqrt(fabs(q));
486	h[en - 1][en - 1] = x + t;
487	x = h[en - 1][en - 1];
488	h[na - 1][na - 1] = y + t;
489	if (q >= 0.0) { /* real pair */
490	if (p < 0.0) /* sign */
491	z = p - fabs(z);
492	else
493	z = p + fabs(z);
494	wr[na - 1] = x + z;
495	wr[en - 1] = wr[na - 1];
496	if (z != 0.0)
497	wr[en - 1] = x - w / z;
498	wi[na - 1] = 0.0;
499	wi[en - 1] = 0.0;
500	x = h[en - 1][na - 1];
501	s = fabs(x) + fabs(z);
502	p = x / s;
503	q = z / s;
504	r = sqrt(p * p + q * q);
505	p /= r;
506	q /= r;
507	for (j = na - 1; j < n; j++) { /* row modification */
508	z = h[na - 1][j];
509	h[na - 1][j] = q * z + p * h[en - 1][j];
510	h[en - 1][j] = q * h[en - 1][j] - p * z;
511	}
512	for (i = 0; i < en; i++) { /* column modification */
513	z = h[i][na - 1];
514	h[i][na - 1] = q * z + p * h[i][en - 1];
515	h[i][en - 1] = q * h[i][en - 1] - p * z;
516	}
517	/* accumulate transformations */
518	for (i = low - 1; i < hgh; i++) {
519	z = zz[i][na - 1];
520	zz[i][na - 1] = q * z + p * zz[i][en - 1];
521	zz[i][en - 1] = q * zz[i][en - 1] - p * z;
522	}
523	} else { /* complex pair */
524	wr[na - 1] = x + p;
525	wr[en - 1] = x + p;
526	wi[na - 1] = z;
527	wi[en - 1] = -z;
528	}
529	en -= 2;
530	} /* while en >= low */
531	/* backsubstitute to find vectors of upper triangular form */
532	if (norm != 0.0) {
533	for (en = n; en >= 1; en--) {
534	p = wr[en - 1];
535	q = wi[en - 1];
536	na = en - 1;
537	if (q == 0.0) {/* real vector */
538	m = en;
539	h[en - 1][en - 1] = 1.0;
540	if (na != 0) {
541	for (i = en - 2; i >= 0; i--) {
542	w = h[i][i] - p;
543	r = 0.0;
544	for (j = m - 1; j < en; j++)
545	r += h[i][j] * h[j][en - 1];
546	if (wi[i] < 0.0) {
547	z = w;
548	s = r;
549	} else {
550	m = i + 1;
551	if (wi[i] == 0.0) {
552	t = w;
553	if (t == 0.0) {
554	tst1 = norm;
555	t = tst1;
556	do {
557	t = 0.01 * t;
558	tst2 = norm + t;
559	} while (tst2 > tst1);
560	}
561	h[i][en - 1] = -(r / t);
562	} else { /* solve real equations */
563	x = h[i][i + 1];
564	y = h[i + 1][i];
565	q = (wr[i] - p) * (wr[i] - p) + wi[i] * wi[i];
566	t = (x * s - z * r) / q;
567	h[i][en - 1] = t;
568	if (fabs(x) > fabs(z))
569	h[i + 1][en - 1] = (-r - w * t) / x;
570	else
571	h[i + 1][en - 1] = (-s - y * t) / z;
572	}
573	/* overflow control */
574	t = fabs(h[i][en - 1]);
575	if (t != 0.0) {
576	tst1 = t;
577	tst2 = tst1 + 1.0 / tst1;
578	if (tst2 <= tst1) {
579	for (j = i; j < en; j++)
580	h[j][en - 1] /= t;
581	}
582	}
583	}
584	}
585	}
586	} else if (q > 0.0) {
587	m = na;
588	if (fabs(h[en - 1][na - 1]) > fabs(h[na - 1][en - 1])) {
589	h[na - 1][na - 1] = q / h[en - 1][na - 1];
590	h[na - 1][en - 1] = (p - h[en - 1][en - 1]) /
591	h[en - 1][na - 1];
592	} else
593	mcdiv(0.0, -h[na - 1][en - 1], h[na - 1][na - 1] - p, q,
594	&h[na - 1][na - 1], &h[na - 1][en - 1]);
595	h[en - 1][na - 1] = 0.0;
596	h[en - 1][en - 1] = 1.0;
597	if (en != 2) {
598	for (i = en - 3; i >= 0; i--) {
599	w = h[i][i] - p;
600	ra = 0.0;
601	sa = 0.0;
602	for (j = m - 1; j < en; j++) {
603	ra += h[i][j] * h[j][na - 1];
604	sa += h[i][j] * h[j][en - 1];
605	}
606	if (wi[i] < 0.0) {
607	z = w;
608	r = ra;
609	s = sa;
610	} else {
611	m = i + 1;
612	if (wi[i] == 0.0)
613	mcdiv(-ra, -sa, w, q, &h[i][na - 1],
614	&h[i][en - 1]);
615	else { /* solve complex equations */
616	x = h[i][i + 1];
617	y = h[i + 1][i];
618	vr = (wr[i] - p) * (wr[i] - p);
619	vr = vr + wi[i] * wi[i] - q * q;
620	vi = (wr[i] - p) * 2.0 * q;
621	if (vr == 0.0 && vi == 0.0) {
622	tst1 = norm * (fabs(w) + fabs(q) + fabs(x) +
623	fabs(y) + fabs(z));
624	vr = tst1;
625	do {
626	vr = 0.01 * vr;
627	tst2 = tst1 + vr;
628	} while (tst2 > tst1);
629	}
630	mcdiv(x * r - z * ra + q * sa,
631	x * s - z * sa - q * ra, vr, vi,
632	&h[i][na - 1], &h[i][en - 1]);
633	if (fabs(x) > fabs(z) + fabs(q)) {
634	h[i + 1]
635	[na - 1] = (q * h[i][en - 1] -
636	w * h[i][na - 1] - ra) / x;
637	h[i + 1][en - 1] = (-sa - w * h[i][en - 1] -
638	q * h[i][na - 1]) / x;
639	} else
640	mcdiv(-r - y * h[i][na - 1],
641	-s - y * h[i][en - 1], z, q,
642	&h[i + 1][na - 1], &h[i + 1][en - 1]);
643	}
644	/* overflow control */
645	t = (fabs(h[i][na - 1]) > fabs(h[i][en - 1])) ?
646	fabs(h[i][na - 1]) : fabs(h[i][en - 1]);
647	if (t != 0.0) {
648	tst1 = t;
649	tst2 = tst1 + 1.0 / tst1;
650	if (tst2 <= tst1) {
651	for (j = i; j < en; j++) {
652	h[j][na - 1] /= t;
653	h[j][en - 1] /= t;
654	}
655	}
656	}
657	}
658	}
659	}
660	}
661	}
662	/* end back substitution. vectors of isolated roots */
663	for (i = 0; i < n; i++) {
664	if (i + 1 < low \|\| i + 1 > hgh) {
665	for (j = i; j < n; j++)
666	zz[i][j] = h[i][j];
667	}
668	}
669	/* multiply by transformation matrix to give vectors of
670	* original full matrix. */
671	for (j = n - 1; j >= low - 1; j--) {
672	m = ((j + 1) < hgh) ? (j + 1) : hgh; /* min */
673	for (i = low - 1; i < hgh; i++) {
674	z = 0.0;
675	for (k = low - 1; k < m; k++)
676	z += zz[i][k] * h[k][j];
677	zz[i][j] = z;
678	}
679	}
680	}
681	return;
682	}
683
684
685	/* make rate matrix with 0.01 expected substitutions per unit time */
686	void onepamratematrix(dmatrix a)
687	{
688	int i, j;
689	double delta, temp, sum;
690	dvector m;
691
692	for (i = 0; i < tpmradix; i++)
693	{
694	for (j = 0; j < tpmradix; j++)
695	{
696	a[i][j] = Freqtpm[j]*a[i][j];
697	}
698	}
699
700	m = new_dvector(tpmradix);
701	for (i = 0, sum = 0.0; i < tpmradix; i++)
702	{
703	for (j = 0, temp = 0.0; j < tpmradix; j++)
704	temp += a[i][j];
705	m[i] = temp; /* row sum */
706	sum += tempFreqtpm[i]; / exp. rate */
707	}
708	delta = 0.01 / sum; /* 0.01 subst. per unit time */
709	for (i = 0; i < tpmradix; i++) {
710	for (j = 0; j < tpmradix; j++) {
711	if (i != j)
712	a[i][j] = delta * a[i][j];
713	else
714	a[i][j] = delta * (-m[i]);
715	}
716	}
717	free_dvector(m);
718	}
719
720
721	void eigensystem(dvector eval, dmatrix evec)
722	{
723	dvector evali, forg;
724	dmatrix a, b;
725	ivector ordr;
726	int i, j, k, error;
727	double zero;
728
729
730	ordr = new_ivector(tpmradix);
731	evali = new_dvector(tpmradix);
732	forg = new_dvector(tpmradix);
733	a = new_dmatrix(tpmradix,tpmradix);
734	b = new_dmatrix(tpmradix,tpmradix);
735
736	rtfdata(a, forg); /* get relative transition matrix and frequencies */
737
738	onepamratematrix(a); /* make 1 PAM rate matrix */
739
740	/* copy a to b */
741	for (i = 0; i < tpmradix; i++)
742	for (j = 0; j < tpmradix; j++)
743	b[i][j] = a[i][j];
744
745	elmhes(a, ordr, tpmradix); /* compute eigenvalues and eigenvectors */
746	eltran(a, evec, ordr, tpmradix);
747	hqr2(tpmradix, 1, tpmradix, a, evec, eval, evali);
748
749	/* check eigenvalue equation */
750	error = FALSE;
751	for (j = 0; j < tpmradix; j++) {
752	for (i = 0, zero = 0.0; i < tpmradix; i++) {
753	for (k = 0; k < tpmradix; k++) zero += b[i][k] * evec[k][j];
754	zero -= eval[j] * evec[i][j];
755	if (fabs(zero) > 1.0e-5)
756	error = TRUE;
757	}
758	}
759	if (error)
760	FPRINTF(STDOUTFILE "\nWARNING: Eigensystem doesn't satisfy eigenvalue equation!\n");
761
762	free_ivector(ordr);
763	free_dvector(evali);
764	free_dvector(forg);
765	free_dmatrix(a);
766	free_dmatrix(b);
767	}
768
769
770	void luinverse(dmatrix inmat, dmatrix imtrx, int size)
771	{
772	double eps = 1.0e-20; /* ! */
773	int i, j, k, l, maxi=0, idx, ix, jx;
774	double sum, tmp, maxb, aw;
775	ivector index;
776	double *wk;
777	dmatrix omtrx;
778
779
780	index = new_ivector(tpmradix);
781	omtrx = new_dmatrix(tpmradix,tpmradix);
782
783	/* copy inmat to omtrx */
784	for (i = 0; i < tpmradix; i++)
785	for (j = 0; j < tpmradix; j++)
786	omtrx[i][j] = inmat[i][j];
787
788	wk = (double ) malloc((unsigned)size sizeof(double));
789	aw = 1.0;
790	for (i = 0; i < size; i++) {
791	maxb = 0.0;
792	for (j = 0; j < size; j++) {
793	if (fabs(omtrx[i][j]) > maxb)
794	maxb = fabs(omtrx[i][j]);
795	}
796	if (maxb == 0.0) {
797	/* Singular matrix */
798	FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR C TO DEVELOPERS\n\n\n");
799	exit(1);
800	}
801	wk[i] = 1.0 / maxb;
802	}
803	for (j = 0; j < size; j++) {
804	for (i = 0; i < j; i++) {
805	sum = omtrx[i][j];
806	for (k = 0; k < i; k++)
807	sum -= omtrx[i][k] * omtrx[k][j];
808	omtrx[i][j] = sum;
809	}
810	maxb = 0.0;
811	for (i = j; i < size; i++) {
812	sum = omtrx[i][j];
813	for (k = 0; k < j; k++)
814	sum -= omtrx[i][k] * omtrx[k][j];
815	omtrx[i][j] = sum;
816	tmp = wk[i] * fabs(sum);
817	if (tmp >= maxb) {
818	maxb = tmp;
819	maxi = i;
820	}
821	}
822	if (j != maxi) {
823	for (k = 0; k < size; k++) {
824	tmp = omtrx[maxi][k];
825	omtrx[maxi][k] = omtrx[j][k];
826	omtrx[j][k] = tmp;
827	}
828	aw = -aw;
829	wk[maxi] = wk[j];
830	}
831	index[j] = maxi;
832	if (omtrx[j][j] == 0.0)
833	omtrx[j][j] = eps;
834	if (j != size - 1) {
835	tmp = 1.0 / omtrx[j][j];
836	for (i = j + 1; i < size; i++)
837	omtrx[i][j] *= tmp;
838	}
839	}
840	for (jx = 0; jx < size; jx++) {
841	for (ix = 0; ix < size; ix++)
842	wk[ix] = 0.0;
843	wk[jx] = 1.0;
844	l = -1;
845	for (i = 0; i < size; i++) {
846	idx = index[i];
847	sum = wk[idx];
848	wk[idx] = wk[i];
849	if (l != -1) {
850	for (j = l; j < i; j++)
851	sum -= omtrx[i][j] * wk[j];
852	} else if (sum != 0.0)
853	l = i;
854	wk[i] = sum;
855	}
856	for (i = size - 1; i >= 0; i--) {
857	sum = wk[i];
858	for (j = i + 1; j < size; j++)
859	sum -= omtrx[i][j] * wk[j];
860	wk[i] = sum / omtrx[i][i];
861	}
862	for (ix = 0; ix < size; ix++)
863	imtrx[ix][jx] = wk[ix];
864	}
865	free((char *)wk);
866	wk = NULL;
867	free_ivector(index);
868	free_dmatrix(omtrx);
869	}
870
871
872	void checkevector(dmatrix evec, dmatrix ivec, int nn)
873	{
874	int i, j, ia, ib, ic, error;
875	dmatrix matx;
876	double sum;
877
878
879	matx = new_dmatrix(nn, nn);
880	/* multiply matrix of eigenvectors and its inverse */
881	for (ia = 0; ia < nn; ia++) {
882	for (ic = 0; ic < nn; ic++) {
883	sum = 0.0;
884	for (ib = 0; ib < nn; ib++) sum += evec[ia][ib] * ivec[ib][ic];
885	matx[ia][ic] = sum;
886	}
887	}
888	/* check whether the unitary matrix is obtained */
889	error = FALSE;
890	for (i = 0; i < nn; i++) {
891	for (j = 0; j < nn; j++) {
892	if (i == j) {
893	if (fabs(matx[i][j] - 1.0) > 1.0e-5)
894	error = TRUE;
895	} else {
896	if (fabs(matx[i][j]) > 1.0e-5)
897	error = TRUE;
898	}
899	}
900	}
901	if (error) {
902	FPRINTF(STDOUTFILE "\nWARNING: Inversion of eigenvector matrix not perfect!\n");
903	}
904	free_dmatrix(matx);
905	}
906
907
908	/*************************** exported functions ***************************/
909
910
911	/* compute 1 PAM rate matrix, its eigensystem, and the inverse matrix thereof */
912	void tranprobmat()
913	{
914	eigensystem(Eval, Evec); /* eigensystem of 1 PAM rate matrix */
915	luinverse(Evec, Ievc, tpmradix); /* inverse eigenvectors are in Ievc */
916	checkevector(Evec, Ievc, tpmradix); /* check whether inversion was OK */
917	}
918
919
920	/* compute P(t) */
921	void tprobmtrx(double arc, dmatrix tpr)
922	{
923	register int i, j, k;
924	register double temp;
925
926
927	for (k = 0; k < tpmradix; k++) {
928	temp = exp(arc * Eval[k]);
929	for (j = 0; j < tpmradix; j++)
930	iexp[k][j] = Ievc[k][j] * temp;
931	}
932	for (i = 0; i < tpmradix; i++) {
933	for (j = 0; j < tpmradix; j++) {
934	temp = 0.0;
935	for (k = 0; k < tpmradix; k++)
936	temp += Evec[i][k] * iexp[k][j];
937	tpr[i][j] = fabs(temp);
938	}
939	}
940	}
941
942
943	/******************************************************************************/
944	/* estimation of maximum likelihood distances */
945	/******************************************************************************/
946
947	/* compute total log-likelihood
948	input: likelihoods for each site and non-zero rate
949	output: total log-likelihood (incl. zero rate category) */
950	double comptotloglkl(dmatrix cdl)
951	{
952	int k, r;
953	double loglkl, fv, fv2, sitelkl;
954
955	loglkl = 0.0;
956	fv = 1.0-fracinv;
957	fv2 = (1.0-fracinv)/(double) numcats;
958
959	if (numcats == 1) {
960
961	for (k = 0; k < Numptrn; k++) {
962
963	/* compute likelihood for pattern k */
964	sitelkl = cdl[0][k]*fv;
965	if (constpat[k] == TRUE)
966	sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
967
968	/* total log-likelihood */
969	loglkl += log(sitelkl)*Weight[k];
970
971	}
972
973	} else {
974
975	for (k = 0; k < Numptrn; k++) {
976
977	/* this general routine works always but it's better
978	to run it only when it's really necessary */
979
980	/* compute likelihood for pattern k */
981	sitelkl = 0.0;
982	for (r = 0; r < numcats; r++)
983	sitelkl += cdl[r][k];
984	sitelkl = fv2*sitelkl;
985	if (constpat[k] == TRUE)
986	sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
987
988	/* total log-likelihood */
989	loglkl += log(sitelkl)*Weight[k];
990
991	}
992
993	}
994
995	return loglkl;
996	}
997
998
999	/* computes the site log-likelihoods
1000	input: likelihoods for each site and non-zero rate
1001	output: log-likelihood for each site */
1002	void allsitelkl(dmatrix cdl, dvector aslkl)
1003	{
1004	int k, r;
1005	double fv, fv2, sitelkl;
1006
1007	fv = 1.0-fracinv;
1008	fv2 = (1.0-fracinv)/(double) numcats;
1009
1010	if (numcats == 1) {
1011
1012	for (k = 0; k < Numptrn; k++) {
1013
1014	/* compute likelihood for pattern k */
1015	sitelkl = cdl[0][k]*fv;
1016	if (constpat[k] == TRUE)
1017	sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1018
1019	/* site log-likelihood */
1020	aslkl[k] = log(sitelkl);
1021	}
1022
1023	} else {
1024
1025	for (k = 0; k < Numptrn; k++) {
1026
1027	/* this general routine works always but it's better
1028	to run it only when it's really necessary */
1029
1030	/* compute likelihood for pattern k */
1031	sitelkl = 0.0;
1032	for (r = 0; r < numcats; r++)
1033	sitelkl += cdl[r][k];
1034	sitelkl = fv2*sitelkl;
1035	if (constpat[k] == TRUE)
1036	sitelkl += fracinv*Freqtpm[(int) Seqpat[0][k]];
1037
1038	/* total log-likelihood */
1039	aslkl[k] = log(sitelkl);
1040
1041	}
1042	}
1043	}
1044
1045
1046	/*************************** internal functions ***************************/
1047
1048	/* compute negative log-likelihood of distance arc between sequences seqchi/j */
1049	double pairlkl(double arc)
1050	{
1051	int k, r, ci, cj;
1052	double loglkl, fv, sitelkl;
1053
1054
1055	/* compute tpms */
1056	for (r = 0; r < numcats; r++)
1057	/* compute tpm for rate category r */
1058	tprobmtrx(arc*Rates[r], ltprobr[r]);
1059
1060	loglkl = 0.0;
1061	fv = 1.0-fracinv;
1062
1063	if (numcats == 1) {
1064
1065	for (k = 0; k < Numptrn; k++) {
1066
1067	/* compute likelihood for site k */
1068	ci = seqchi[k];
1069	cj = seqchj[k];
1070	if (ci != tpmradix && cj != tpmradix)
1071	sitelkl = ltprobr[0][ci][cj]*fv;
1072	else
1073	sitelkl = fv;
1074	if (ci == cj && ci != tpmradix)
1075	sitelkl += fracinv*Freqtpm[ci];
1076
1077	/* total log-likelihood */
1078	loglkl += log(sitelkl)*Weight[k];
1079
1080	}
1081
1082	} else {
1083
1084	for (k = 0; k < Numptrn; k++) {
1085
1086	/* this general routine works always but it's better
1087	to run it only when it's really necessary */
1088
1089	/* compute likelihood for site k */
1090	ci = seqchi[k];
1091	cj = seqchj[k];
1092	if (ci != tpmradix && cj != tpmradix) {
1093	sitelkl = 0.0;
1094	for (r = 0; r < numcats; r++)
1095	sitelkl += ltprobr[r][ci][cj];
1096	sitelkl = fv*sitelkl/(double) numcats;
1097	} else
1098	sitelkl = fv;
1099	if (ci == cj && ci != tpmradix)
1100	sitelkl += fracinv*Freqtpm[ci];
1101
1102	/* total log-likelihood */
1103	loglkl += log(sitelkl)*Weight[k];
1104
1105	}
1106
1107	}
1108
1109	/* return negative log-likelihood as we use a minimizing procedure */
1110	return -loglkl;
1111	}
1112
1113
1114	/*************************** exported functions ***************************/
1115
1116
1117	/* maximum likelihood distance between sequence i and j */
1118	double mldistance(int i, int j)
1119	{
1120	double dist, fx, f2x;
1121
1122	if (i == j) return 0.0;
1123
1124	/* use old distance as start value */
1125	dist = Distanmat[i][j];
1126
1127	if (dist == 0.0) return 0.0;
1128
1129	seqchi = Seqpat[i];
1130	seqchj = Seqpat[j];
1131
1132	if (dist <= MINARC) dist = MINARC+1.0;
1133	if (dist >= MAXARC) dist = MAXARC-1.0;
1134
1135	dist = onedimenmin(MINARC, dist, MAXARC, pairlkl, EPSILON, &fx, &f2x);
1136
1137	return dist;
1138	}
1139
1140
1141	/* initialize distance matrix */
1142	void initdistan()
1143	{
1144	int i, j, k, diff, x, y;
1145	double obs, temp;
1146
1147	for (i = 0; i < Maxspc; i++) {
1148	Distanmat[i][i] = 0.0;
1149	for (j = i + 1; j < Maxspc; j++) {
1150	seqchi = Seqpat[i];
1151	seqchj = Seqpat[j];
1152
1153	/* count observed differences */
1154	diff = 0;
1155	for (k = 0; k < Numptrn; k++) {
1156	x = seqchi[k];
1157	y = seqchj[k];
1158	if (x != y &&
1159	x != tpmradix &&
1160	y != tpmradix)
1161	diff += Weight[k];
1162	}
1163	if (diff == 0)
1164	Distanmat[i][j] = 0.0;
1165	else {
1166	/* use generalized JC correction to get first estimate
1167	(for the SH model the observed distance is used) */
1168	/* observed distance */
1169	obs = (double) diff / (double) Maxsite;
1170	temp = 1.0 - (double) obs*tpmradix/(tpmradix-1.0);
1171	if (temp > 0.0 && !(data_optn == 0 && SH_optn))
1172	/* use JC corrected distance */
1173	Distanmat[i][j] = -100.0(tpmradix-1.0)/tpmradix log(temp);
1174	else
1175	/* use observed distance */
1176	Distanmat[i][j] = obs * 100.0;
1177	if (Distanmat[i][j] < MINARC) Distanmat[i][j] = MINARC;
1178	if (Distanmat[i][j] > MAXARC) Distanmat[i][j] = MAXARC;
1179	}
1180	Distanmat[j][i] = Distanmat[i][j];
1181	}
1182	}
1183	}
1184
1185	/* compute distance matrix */
1186	void computedistan()
1187	{
1188	int i, j;
1189
1190	for (i = 0; i < Maxspc; i++)
1191	for (j = i; j < Maxspc; j++) {
1192	Distanmat[i][j] = mldistance(i, j);
1193	Distanmat[j][i] = Distanmat[i][j];
1194	}
1195	}
1196
1197
1198	/******************************************************************************/
1199	/* computation of maximum likelihood edge lengths for a given tree */
1200	/******************************************************************************/
1201
1202
1203	/*************************** internal functions ***************************/
1204
1205
1206	/* multiply partial likelihoods */
1207	void productpartials(Node *op)
1208	{
1209	Node *cp;
1210	int i, j, r;
1211	dcube opc, cpc;
1212
1213	cp = op;
1214	opc = op->partials;
1215	while (cp->isop->isop != op) {
1216	cp = cp->isop;
1217	cpc = cp->partials;
1218	for (r = 0; r < numcats; r++)
1219	for (i = 0; i < Numptrn; i++)
1220	for (j = 0; j < tpmradix; j++)
1221	opc[r][i][j] *= cpc[r][i][j];
1222	}
1223	}
1224
1225
1226	/* compute internal partial likelihoods */
1227	void partialsinternal(Node *op)
1228	{
1229	int i, j, k, r;
1230	double sum;
1231	dcube oprob, cprob;
1232
1233	if (clockmode == 1) { /* clocklike branch lengths */
1234	for (r = 0; r < numcats; r++) {
1235	tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1236	}
1237	} else { /* non-clocklike branch lengths */
1238	for (r = 0; r < numcats; r++) {
1239	tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1240	}
1241	}
1242
1243	oprob = op->partials;
1244	cprob = op->kinp->isop->partials;
1245	for (r = 0; r < numcats; r++) {
1246	for (k = 0; k < Numptrn; k++) {
1247	for (i = 0; i < tpmradix; i++) {
1248	sum = 0.0;
1249	for (j = 0; j < tpmradix; j++)
1250	sum += ltprobr[r][i][j] * cprob[r][k][j];
1251	oprob[r][k][i] = sum;
1252	}
1253	}
1254	}
1255	}
1256
1257
1258	/* compute external partial likelihoods */
1259	void partialsexternal(Node *op)
1260	{
1261	int i, j, k, r;
1262	dcube oprob;
1263	cvector dseqi;
1264
1265	if (clockmode == 1) { /* clocklike branch lengths */
1266	for (r = 0; r < numcats; r++) {
1267	tprobmtrx((op->lengthc)*Rates[r], ltprobr[r]);
1268	}
1269	} else { /* nonclocklike branch lengths */
1270	for (r = 0; r < numcats; r++) {
1271	tprobmtrx((op->length)*Rates[r], ltprobr[r]);
1272	}
1273	}
1274
1275	oprob = op->partials;
1276	dseqi = op->kinp->eprob;
1277	for (r = 0; r < numcats; r++) {
1278	for (k = 0; k < Numptrn; k++) {
1279	if ((j = dseqi[k]) == tpmradix) {
1280	for (i = 0; i < tpmradix; i++)
1281	oprob[r][k][i] = 1.0;
1282	} else {
1283	for (i = 0; i < tpmradix; i++)
1284	oprob[r][k][i] = ltprobr[r][i][j];
1285	}
1286	}
1287	}
1288	}
1289
1290
1291	/* compute all partial likelihoods */
1292	void initpartials(Tree *tr)
1293	{
1294	Node cp, rp;
1295
1296	cp = rp = tr->rootp;
1297	do {
1298	cp = cp->isop->kinp;
1299	if (cp->isop == NULL) { /* external node */
1300	cp = cp->kinp; /* not descen */
1301	partialsexternal(cp);
1302	} else { /* internal node */
1303	if (!cp->descen) {
1304	productpartials(cp->kinp->isop);
1305	partialsinternal(cp);
1306	}
1307	}
1308	} while (cp != rp);
1309	}
1310
1311
1312	/* compute log-likelihood given internal branch with length arc
1313	between partials partiali and partials partialj */
1314	double intlkl(double arc)
1315	{
1316	double sumlk, slk;
1317	int r, s, i, j;
1318	dmatrix cdl;
1319
1320	cdl = Ctree->condlkl;
1321	for (r = 0; r < numcats; r++) {
1322	tprobmtrx(arc*Rates[r], ltprobr[r]);
1323	}
1324	for (r = 0; r < numcats; r++) {
1325	for (s = 0; s < Numptrn; s++) {
1326	sumlk = 0.0;
1327	for (i = 0; i < tpmradix; i++) {
1328	slk = 0.0;
1329	for (j = 0; j < tpmradix; j++)
1330	slk += partialj[r][s][j] * ltprobr[r][i][j];
1331	sumlk += Freqtpm[i] * partiali[r][s][i] * slk;
1332	}
1333	cdl[r][s] = sumlk;
1334	}
1335	}
1336
1337	/* compute total log-likelihood for current tree */
1338	Ctree->lklhd = comptotloglkl(cdl);
1339
1340	return -(Ctree->lklhd); /* we use a minimizing procedure */
1341	}
1342
1343
1344	/* optimize internal branch */
1345	void optinternalbranch(Node *op)
1346	{
1347	double arc, fx, f2x;
1348
1349	partiali = op->isop->partials;
1350	partialj = op->kinp->isop->partials;
1351	arc = op->length; /* nonclocklike branch lengths */
1352	if (arc <= MINARC) arc = MINARC+1.0;
1353	if (arc >= MAXARC) arc = MAXARC-1.0;
1354	arc = onedimenmin(MINARC, arc, MAXARC, intlkl, EPSILON, &fx, &f2x);
1355	op->kinp->length = arc;
1356	op->length = arc;
1357
1358	/* variance of branch length */
1359	f2x = fabs(f2x);
1360	if (1.0/(MAXARC*MAXARC) < f2x)
1361	op->varlen = 1.0/f2x;
1362	else
1363	op->varlen = MAXARC*MAXARC;
1364	}
1365
1366
1367	/* compute log-likelihood given external branch with length arc
1368	between partials partiali and sequence seqchi */
1369	double extlkl(double arc)
1370	{
1371	double sumlk;
1372	int r, s, i, j;
1373	dvector opb;
1374	dmatrix cdl;
1375
1376	cdl = Ctree->condlkl;
1377	for (r = 0; r < numcats; r++) {
1378	tprobmtrx(arc*Rates[r], ltprobr[r]);
1379	}
1380	for (r = 0; r < numcats; r++) {
1381	for (s = 0; s < Numptrn; s++) {
1382	opb = partiali[r][s];
1383	sumlk = 0.0;
1384	if ((j = seqchi[s]) != tpmradix) {
1385	for (i = 0; i < tpmradix; i++)
1386	sumlk += (Freqtpm[i] * (opb[i] * ltprobr[r][i][j]));
1387	} else {
1388	for (i = 0; i < tpmradix; i++)
1389	sumlk += Freqtpm[i] * opb[i];
1390	}
1391	cdl[r][s] = sumlk;
1392	}
1393	}
1394
1395	/* compute total log-likelihood for current tree */
1396	Ctree->lklhd = comptotloglkl(cdl);
1397
1398	return -(Ctree->lklhd); /* we use a minimizing procedure */
1399	}
1400
1401	/* optimize external branch */
1402	void optexternalbranch(Node *op)
1403	{
1404	double arc, fx, f2x;
1405
1406	partiali = op->isop->partials;
1407	seqchi = op->kinp->eprob;
1408	arc = op->length; /* nonclocklike branch lengths */
1409	if (arc <= MINARC) arc = MINARC+1.0;
1410	if (arc >= MAXARC) arc = MAXARC-1.0;
1411	arc = onedimenmin(MINARC, arc, MAXARC, extlkl, EPSILON, &fx, &f2x);
1412	op->kinp->length = arc;
1413	op->length = arc;
1414
1415	/* variance of branch length */
1416	f2x = fabs(f2x);
1417	if (1.0/(MAXARC*MAXARC) < f2x)
1418	op->varlen = 1.0/f2x;
1419	else
1420	op->varlen = MAXARC*MAXARC;
1421	}
1422
1423
1424	/* finish likelihoods for each rate and site */
1425	void finishlkl(Node *op)
1426	{
1427	int r, k, i, j;
1428	double arc, sumlk, slk;
1429	dmatrix cdl;
1430
1431	partiali = op->isop->partials;
1432	partialj = op->kinp->isop->partials;
1433	cdl = Ctree->condlkl;
1434	arc = op->length; /* nonclocklike branch lengths */
1435	for (r = 0; r < numcats; r++) {
1436	tprobmtrx(arc*Rates[r], ltprobr[r]);
1437	}
1438	for (r = 0; r < numcats; r++) {
1439	for (k = 0; k < Numptrn; k++) {
1440	sumlk = 0.0;
1441	for (i = 0; i < tpmradix; i++) {
1442	slk = 0.0;
1443	for (j = 0; j < tpmradix; j++)
1444	slk += partialj[r][k][j] * ltprobr[r][i][j];
1445	sumlk += Freqtpm[i] * partiali[r][k][i] * slk;
1446	}
1447	cdl[r][k] = sumlk;
1448	}
1449	}
1450	}
1451
1452
1453	/*************************** exported functions ***************************/
1454
1455
1456	/* optimize branch lengths to get maximum likelihood (nonclocklike branchs) */
1457	double optlkl(Tree *tr)
1458	{
1459	Node cp, rp;
1460	int nconv;
1461	double lendiff;
1462
1463	clockmode = 0; /* nonclocklike branch lengths */
1464	nconv = 0;
1465	Converg = FALSE;
1466	initpartials(tr);
1467	for (Numit = 1; (Numit <= MAXIT) && (!Converg); Numit++) {
1468
1469	cp = rp = tr->rootp;
1470	do {
1471	cp = cp->isop->kinp;
1472	productpartials(cp->kinp->isop);
1473	if (cp->isop == NULL) { /* external node */
1474	cp = cp->kinp; /* not descen */
1475
1476	lendiff = cp->length;
1477	optexternalbranch(cp);
1478	lendiff = fabs(lendiff - cp->length);
1479	if (lendiff < EPSILON) nconv++;
1480	else nconv = 0;
1481
1482	partialsexternal(cp);
1483	} else { /* internal node */
1484	if (cp->descen) {
1485	partialsinternal(cp);
1486	} else {
1487
1488	lendiff = cp->length;
1489	optinternalbranch(cp);
1490	lendiff = fabs(lendiff - cp->length);
1491	if (lendiff < EPSILON) nconv++;
1492	else nconv = 0;
1493
1494	/* eventually compute likelihoods for each site */
1495	if ((cp->number == Numibrnch-1 && lendiff < EPSILON) \|\|
1496	Numit == MAXIT-1) finishlkl(cp);
1497
1498	partialsinternal(cp);
1499	}
1500	}
1501	if (nconv >= Numbrnch) { /* convergence */
1502	Converg = TRUE;
1503	cp = rp; /* get out of here */
1504	}
1505	} while (cp != rp);
1506	}
1507
1508	/* compute total log-likelihood for current tree */
1509	return comptotloglkl(tr->condlkl);
1510	}
1511
1512
1513	/* compute likelihood of tree for given branch lengths */
1514	double treelkl(Tree *tr)
1515	{
1516	int i, k, r;
1517	Node *cp;
1518	dmatrix cdl;
1519	dcube prob1, prob2;
1520	double sumlk;
1521
1522	/* compute for each site and rate log-likelihoods */
1523	initpartials(tr);
1524	cp = tr->rootp;
1525	productpartials(cp->isop);
1526	prob1 = cp->partials;
1527	prob2 = cp->isop->partials;
1528	cdl = tr->condlkl;
1529	for (r = 0; r < numcats; r++) {
1530	for (k = 0; k < Numptrn; k++) {
1531	sumlk = 0.0;
1532	for (i = 0; i < tpmradix; i++)
1533	sumlk += Freqtpm[i] * (prob1[r][k][i] * prob2[r][k][i]);
1534	cdl[r][k] = sumlk;
1535	}
1536	}
1537
1538	/* return total log-likelihood for current tree */
1539	return comptotloglkl(cdl);
1540	}
1541
1542
1543	/******************************************************************************/
1544	/* least-squares estimate of branch lengths */
1545	/******************************************************************************/
1546
1547
1548	/*************************** internal functions ***************************/
1549
1550
1551	void luequation(dmatrix amat, dvector yvec, int size)
1552	{
1553	double eps = 1.0e-20; /* ! */
1554	int i, j, k, l, maxi=0, idx;
1555	double sum, tmp, maxb, aw;
1556	dvector wk;
1557	ivector index;
1558
1559
1560	wk = new_dvector(size);
1561	index = new_ivector(size);
1562	aw = 1.0;
1563	for (i = 0; i < size; i++) {
1564	maxb = 0.0;
1565	for (j = 0; j < size; j++) {
1566	if (fabs(amat[i][j]) > maxb)
1567	maxb = fabs(amat[i][j]);
1568	}
1569	if (maxb == 0.0) {
1570	/* Singular matrix */
1571	FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR D TO DEVELOPERS\n\n\n");
1572	exit(1);
1573	}
1574	wk[i] = 1.0 / maxb;
1575	}
1576	for (j = 0; j < size; j++) {
1577	for (i = 0; i < j; i++) {
1578	sum = amat[i][j];
1579	for (k = 0; k < i; k++)
1580	sum -= amat[i][k] * amat[k][j];
1581	amat[i][j] = sum;
1582	}
1583	maxb = 0.0;
1584	for (i = j; i < size; i++) {
1585	sum = amat[i][j];
1586	for (k = 0; k < j; k++)
1587	sum -= amat[i][k] * amat[k][j];
1588	amat[i][j] = sum;
1589	tmp = wk[i] * fabs(sum);
1590	if (tmp >= maxb) {
1591	maxb = tmp;
1592	maxi = i;
1593	}
1594	}
1595	if (j != maxi) {
1596	for (k = 0; k < size; k++) {
1597	tmp = amat[maxi][k];
1598	amat[maxi][k] = amat[j][k];
1599	amat[j][k] = tmp;
1600	}
1601	aw = -aw;
1602	wk[maxi] = wk[j];
1603	}
1604	index[j] = maxi;
1605	if (amat[j][j] == 0.0)
1606	amat[j][j] = eps;
1607	if (j != size - 1) {
1608	tmp = 1.0 / amat[j][j];
1609	for (i = j + 1; i < size; i++)
1610	amat[i][j] *= tmp;
1611	}
1612	}
1613	l = -1;
1614	for (i = 0; i < size; i++) {
1615	idx = index[i];
1616	sum = yvec[idx];
1617	yvec[idx] = yvec[i];
1618	if (l != -1) {
1619	for (j = l; j < i; j++)
1620	sum -= amat[i][j] * yvec[j];
1621	} else if (sum != 0.0)
1622	l = i;
1623	yvec[i] = sum;
1624	}
1625	for (i = size - 1; i >= 0; i--) {
1626	sum = yvec[i];
1627	for (j = i + 1; j < size; j++)
1628	sum -= amat[i][j] * yvec[j];
1629	yvec[i] = sum / amat[i][i];
1630	}
1631	free_ivector(index);
1632	free_dvector(wk);
1633	}
1634
1635
1636	/* least square estimation of branch lengths
1637	used for the approximate ML and as starting point
1638	in the calculation of the exact value of the ML */
1639	void lslength(Tree *tr, dvector distanvec, int numspc, int numibrnch, dvector Brnlength)
1640	{
1641	int i, i1, j, j1, j2, k, numbrnch, numpair;
1642	double sum, leng, alllen, rss;
1643	ivector pths;
1644	dmatrix atmt, atamt;
1645	Node ebp, ibp;
1646
1647	numbrnch = numspc + numibrnch;
1648	numpair = (numspc * (numspc - 1)) / 2;
1649	atmt = new_dmatrix(numbrnch, numpair);
1650	atamt = new_dmatrix(numbrnch, numbrnch);
1651	ebp = tr->ebrnchp;
1652	ibp = tr->ibrnchp;
1653	for (i = 0; i < numspc; i++) {
1654	for (j1 = 1, j = 0; j1 < numspc; j1++) {
1655	if (j1 == i) {
1656	for (j2 = 0; j2 < j1; j2++, j++) {
1657	atmt[i][j] = 1.0;
1658	}
1659	} else {
1660	for (j2 = 0; j2 < j1; j2++, j++) {
1661	if (j2 == i)
1662	atmt[i][j] = 1.0;
1663	else
1664	atmt[i][j] = 0.0;
1665	}
1666	}
1667	}
1668	}
1669	for (i1 = 0, i = numspc; i1 < numibrnch; i1++, i++) {
1670	pths = ibp[i1]->paths;
1671	for (j1 = 1, j = 0; j1 < numspc; j1++) {
1672	for (j2 = 0; j2 < j1; j2++, j++) {
1673	if (pths[j1] != pths[j2])
1674	atmt[i][j] = 1.0;
1675	else
1676	atmt[i][j] = 0.0;
1677	}
1678	}
1679	}
1680	for (i = 0; i < numbrnch; i++) {
1681	for (j = 0; j <= i; j++) {
1682	for (k = 0, sum = 0.0; k < numpair; k++)
1683	sum += atmt[i][k] * atmt[j][k];
1684	atamt[i][j] = sum;
1685	atamt[j][i] = sum;
1686	}
1687	}
1688	for (i = 0; i < numbrnch; i++) {
1689	for (k = 0, sum = 0.0; k < numpair; k++)
1690	sum += atmt[i][k] * distanvec[k];
1691	Brnlength[i] = sum;
1692	}
1693	luequation(atamt, Brnlength, numbrnch);
1694	for (i = 0, rss = 0.0; i < numpair; i++) {
1695	sum = distanvec[i];
1696	for (j = 0; j < numbrnch; j++) {
1697	if (atmt[j][i] == 1.0 && Brnlength[j] > 0.0)
1698	sum -= Brnlength[j];
1699	}
1700	rss += sum * sum;
1701	}
1702	tr->rssleast = sqrt(rss);
1703	alllen = 0.0;
1704	for (i = 0; i < numspc; i++) {
1705	leng = Brnlength[i];
1706	alllen += leng;
1707	if (leng < MINARC) leng = MINARC;
1708	if (leng > MAXARC) leng = MAXARC;
1709	if (clockmode) { /* clock */
1710	ebp[i]->lengthc = leng;
1711	ebp[i]->kinp->lengthc = leng;
1712	} else { /* no clock */
1713	ebp[i]->length = leng;
1714	ebp[i]->kinp->length = leng;
1715	}
1716	Brnlength[i] = leng;
1717	}
1718	for (i = 0, j = numspc; i < numibrnch; i++, j++) {
1719	leng = Brnlength[j];
1720	alllen += leng;
1721	if (leng < MINARC) leng = MINARC;
1722	if (leng > MAXARC) leng = MAXARC;
1723	if (clockmode) { /* clock */
1724	ibp[i]->lengthc = leng;
1725	ibp[i]->kinp->lengthc = leng;
1726	} else { /* no clock */
1727	ibp[i]->length = leng;
1728	ibp[i]->kinp->length = leng;
1729	}
1730	Brnlength[j] = leng;
1731	}
1732	free_dmatrix(atmt);
1733	free_dmatrix(atamt);
1734	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: