source: branches/help/GDE/MUSCLE/src/threewaywt.cpp

Last change on this file was 10390, checked in by aboeckma, 11 years ago

added muscle sourcles amd makefile

File size: 10.2 KB
Line 
1#include "muscle.h"
2#include "tree.h"
3#include <math.h>
4
5#define TRACE   0
6
7/***
8Sequence weights derived from a tree using Gotoh's
9three-way method.
10
11        Gotoh (1995) CABIOS 11(5), 543-51.
12
13Each edge e is assigned a weight w(e).
14
15Consider first a tree with three leaves A,B and C
16having branch lengths a, b and c, as follows.
17
18            B
19            |
20            b
21            |
22    A---a---R---c---C
23
24The internal node is denoted by R.
25
26Define:
27
28        S = (ab + ca + ab)
29        x = bc(a + b)(a + c)
30        y = a(b + c)FS
31
32Here F is a tunable normalization factor which is
33approximately 1.0. Then the edge weight for AR
34is computed as:
35
36        w(AR) = sqrt(x/y)
37
38Similar expressions for the other edges follow by
39symmetry.
40
41For a tree with more than three edges, the weight
42of an edge that ends in a leaf is computed from
43the three-way tree that includes the edge and
44its two neighbors. The weight of an internal edge
45is computed as the product of the weights for that
46edge derived from the two three-way subtrees that
47include that edge.
48
49For example, consider the following tree.
50
51       B
52       |
53    A--R--V--C
54          |
55          D
56
57Here, w(RV) is computed as the product of the
58two values for w(RV) derived from the three-way
59trees with leaves ABV and RCD respectively.
60
61The calculation is done using "Gotoh lengths",
62not the real edge lengths.
63
64The Gotoh length G of a directed edge is calculated
65recursively as:
66
67        G = d + LR/(L + R)
68
69where d is the length of the edge, and L and R are
70the Gotoh lengths of the left and right edges adjoining
71the terminal end of the edge. If the edge terminates on
72a leaf, then G=d.
73
74Pairwise sequence weights are computed as the
75product of edge weights on the path that connects
76their leaves.
77
78If the tree is split into two subtrees by deleting
79a given edge e, then the pairwise weights factorize.
80For operations on profiles formed from the two
81subtrees, it is possible to assign a weight to a
82sequence as the product of edge weights on a path
83from e to its leaf.
84***/
85
86// The xxxUnrooted functions present a rooted tree as
87// if it had been unrooted by deleting the root node.
88static unsigned GetFirstNeighborUnrooted(const Tree &tree, unsigned uNode1,
89  unsigned uNode2)
90        {
91        if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
92                Quit("GetFirstNeighborUnrooted, should never be called with root");
93        if (!tree.IsEdge(uNode1, uNode2))
94                {
95                if (!tree.IsRoot(tree.GetParent(uNode1)) ||
96                  !tree.IsRoot(tree.GetParent(uNode2)))
97                        Quit("GetFirstNeighborUnrooted, not edge");
98                const unsigned uRoot = tree.GetRootNodeIndex();
99                return tree.GetFirstNeighbor(uNode1, uRoot);
100                }
101
102        unsigned uNeighbor = tree.GetFirstNeighbor(uNode1, uNode2);
103        if (tree.IsRoot(uNeighbor))
104                return tree.GetFirstNeighbor(uNeighbor, uNode1);
105        return uNeighbor;
106        }
107
108static unsigned GetSecondNeighborUnrooted(const Tree &tree, unsigned uNode1,
109  unsigned uNode2)
110        {
111        if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
112                Quit("GetFirstNeighborUnrooted, should never be called with root");
113        if (!tree.IsEdge(uNode1, uNode2))
114                {
115                if (!tree.IsRoot(tree.GetParent(uNode1)) ||
116                  !tree.IsRoot(tree.GetParent(uNode2)))
117                        Quit("GetFirstNeighborUnrooted, not edge");
118                const unsigned uRoot = tree.GetRootNodeIndex();
119                return tree.GetSecondNeighbor(uNode1, uRoot);
120                }
121
122        unsigned uNeighbor = tree.GetSecondNeighbor(uNode1, uNode2);
123        if (tree.IsRoot(uNeighbor))
124                return tree.GetFirstNeighbor(uNeighbor, uNode1);
125        return uNeighbor;
126        }
127
128static unsigned GetNeighborUnrooted(const Tree &tree, unsigned uNode1,
129  unsigned uSub)
130        {
131        unsigned uNeighbor = tree.GetNeighbor(uNode1, uSub);
132        if (tree.IsRoot(uNeighbor))
133                return tree.GetFirstNeighbor(uNeighbor, uNode1);
134        return uNeighbor;
135        }
136
137static unsigned GetNeighborSubscriptUnrooted(const Tree &tree, unsigned uNode1,
138  unsigned uNode2)
139        {
140        if (tree.IsEdge(uNode1, uNode2))
141                return tree.GetNeighborSubscript(uNode1, uNode2);
142        if (!tree.IsRoot(tree.GetParent(uNode1)) ||
143          !tree.IsRoot(tree.GetParent(uNode2)))
144                Quit("GetNeighborSubscriptUnrooted, not edge");
145        for (unsigned uSub = 0; uSub < 3; ++uSub)
146                if (GetNeighborUnrooted(tree, uNode1, uSub) == uNode2)
147                        return uSub;
148        Quit("GetNeighborSubscriptUnrooted, not a neighbor");
149        return NULL_NEIGHBOR;
150        }
151
152static double GetEdgeLengthUnrooted(const Tree &tree, unsigned uNode1,
153  unsigned uNode2)
154        {
155        if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
156                Quit("GetEdgeLengthUnrooted, should never be called with root");
157        if (!tree.IsEdge(uNode1, uNode2))
158                {
159                if (!tree.IsRoot(tree.GetParent(uNode1)) ||
160                  !tree.IsRoot(tree.GetParent(uNode2)))
161                        Quit("GetEdgeLengthUnrooted, not edge");
162
163                const unsigned uRoot = tree.GetRootNodeIndex();
164                return tree.GetEdgeLength(uNode1, uRoot) +
165                  tree.GetEdgeLength(uNode2, uRoot);
166                }
167        return tree.GetEdgeLength(uNode1, uNode2);
168        }
169
170double GetGotohLength(const Tree &tree, unsigned R, unsigned A)
171        {
172        double dThis = GetEdgeLengthUnrooted(tree, R, A);
173
174// Enforce non-negative edge lengths
175        if (dThis < 0)
176                dThis = 0;
177
178        if (tree.IsLeaf(A))
179                return dThis;
180
181        const unsigned uFirst = GetFirstNeighborUnrooted(tree, A, R);
182        const unsigned uSecond = GetSecondNeighborUnrooted(tree, A, R);
183        const double dFirst = GetGotohLength(tree, A, uFirst);
184        const double dSecond = GetGotohLength(tree, A, uSecond);
185        const double dSum = dFirst + dSecond;
186        const double dThird = dSum == 0 ? 0 : (dFirst*dSecond)/dSum;
187        return dThis + dThird;
188        }
189
190// Return weight of edge A-R in three-way subtree that has
191// leaves A,B,C and internal node R.
192static double GotohWeightThreeWay(const Tree &tree, unsigned A,
193  unsigned B, unsigned C, unsigned R)
194        {
195        const double F = 1.0;
196
197        if (tree.IsLeaf(R))
198                Quit("GotohThreeWay: R must be internal node");
199
200        double a = GetGotohLength(tree, R, A);
201        double b = GetGotohLength(tree, R, B);
202        double c = GetGotohLength(tree, R, C);
203
204        double S = b*c + c*a + a*b;
205        double x = b*c*(a + b)*(a + c);
206        double y = a*(b + c)*F*S;
207
208// y is zero iff all three branch lengths are zero.
209        if (y < 0.001)
210                return 1.0;
211        return sqrt(x/y);
212        }
213
214static double GotohWeightEdge(const Tree &tree, unsigned uNodeIndex1,
215  unsigned uNodeIndex2)
216        {
217        double w1 = 1.0;
218        double w2 = 1.0;
219        if (!tree.IsLeaf(uNodeIndex1))
220                {
221                unsigned R = uNodeIndex1;
222                unsigned A = uNodeIndex2;
223                unsigned B = GetFirstNeighborUnrooted(tree, R, A);
224                unsigned C = GetSecondNeighborUnrooted(tree, R, A);
225                w1 = GotohWeightThreeWay(tree, A, B, C, R);
226                }
227        if (!tree.IsLeaf(uNodeIndex2))
228                {
229                unsigned R = uNodeIndex2;
230                unsigned A = uNodeIndex1;
231                unsigned B = GetFirstNeighborUnrooted(tree, R, A);
232                unsigned C = GetSecondNeighborUnrooted(tree, R, A);
233                w2 = GotohWeightThreeWay(tree, A, B, C, R);
234                }
235        return w1*w2;
236        }
237
238void CalcThreeWayEdgeWeights(const Tree &tree, WEIGHT **EdgeWeights)
239        {
240        const unsigned uNodeCount = tree.GetNodeCount();
241        for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
242                {
243                if (tree.IsRoot(uNodeIndex1))
244                        continue;
245                for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
246                        {
247                        const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
248                        if (NULL_NEIGHBOR == uNodeIndex2)
249                                continue;
250
251                // Avoid computing same edge twice in reversed order
252                        if (uNodeIndex2 < uNodeIndex1)
253                                continue;
254
255                        const WEIGHT w = (WEIGHT) GotohWeightEdge(tree, uNodeIndex1, uNodeIndex2);
256                        const unsigned uSub2 = GetNeighborSubscriptUnrooted(tree, uNodeIndex2, uNodeIndex1);
257#if     DEBUG
258                        {
259                        assert(uNodeIndex2 == GetNeighborUnrooted(tree, uNodeIndex1, uSub1));
260                        assert(uNodeIndex1 == GetNeighborUnrooted(tree, uNodeIndex2, uSub2));
261                        const WEIGHT wRev = (WEIGHT) GotohWeightEdge(tree, uNodeIndex2, uNodeIndex1);
262                        if (!BTEq(w, wRev))
263                                Quit("CalcThreeWayWeights: rev check failed %g %g",
264                                  w, wRev);
265                        }
266#endif
267                        EdgeWeights[uNodeIndex1][uSub1] = w;
268                        EdgeWeights[uNodeIndex2][uSub2] = w;
269                        }
270                }
271        }
272
273static void SetSeqWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
274  double dPathWeight, WEIGHT *Weights)
275        {
276        if (tree.IsRoot(uNode1) || tree.IsRoot(uNode2))
277                Quit("SetSeqWeights, should never be called with root");
278
279        const double dThisLength = GetEdgeLengthUnrooted(tree, uNode1, uNode2);
280        if (tree.IsLeaf(uNode2))
281                {
282                const unsigned Id = tree.GetLeafId(uNode2);
283                Weights[Id] = (WEIGHT) (dPathWeight + dThisLength);
284                return;
285                }
286        const unsigned uFirst = GetFirstNeighborUnrooted(tree, uNode2, uNode1);
287        const unsigned uSecond = GetSecondNeighborUnrooted(tree, uNode2, uNode1);
288        dPathWeight *= dThisLength;
289        SetSeqWeights(tree, uNode2, uFirst, dPathWeight, Weights);
290        SetSeqWeights(tree, uNode2, uSecond, dPathWeight, Weights);
291        }
292
293void CalcThreeWayWeights(const Tree &tree, unsigned uNode1, unsigned uNode2,
294  WEIGHT *Weights)
295        {
296#if     TRACE
297        Log("CalcThreeWayEdgeWeights\n");
298        tree.LogMe();
299#endif
300
301        if (tree.IsRoot(uNode1))
302                uNode1 = tree.GetFirstNeighbor(uNode1, uNode2);
303        else if (tree.IsRoot(uNode2))
304                uNode2 = tree.GetFirstNeighbor(uNode2, uNode1);
305        const unsigned uNodeCount = tree.GetNodeCount();
306        WEIGHT **EdgeWeights = new WEIGHT *[uNodeCount];
307        for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
308                EdgeWeights[uNodeIndex] = new WEIGHT[3];
309
310        CalcThreeWayEdgeWeights(tree, EdgeWeights);
311
312#if     TRACE
313        {
314        Log("Node1  Node2  Length   Gotoh  EdgeWt\n");
315        Log("-----  -----  ------  ------  ------\n");
316        for (unsigned uNodeIndex1 = 0; uNodeIndex1 < uNodeCount; ++uNodeIndex1)
317                {
318                if (tree.IsRoot(uNodeIndex1))
319                        continue;
320                for (unsigned uSub1 = 0; uSub1 < 3; ++uSub1)
321                        {
322                        const unsigned uNodeIndex2 = GetNeighborUnrooted(tree, uNodeIndex1, uSub1);
323                        if (NULL_NEIGHBOR == uNodeIndex2)
324                                continue;
325                        if (uNodeIndex2 < uNodeIndex1)
326                                continue;
327                        const WEIGHT ew = EdgeWeights[uNodeIndex1][uSub1];
328                        const double d = GetEdgeLengthUnrooted(tree, uNodeIndex1, uNodeIndex2);
329                        const double g = GetGotohLength(tree, uNodeIndex1, uNodeIndex2);
330                        Log("%5u  %5u  %6.3f  %6.3f  %6.3f\n", uNodeIndex1, uNodeIndex2, d, g, ew);
331                        }
332                }
333        }
334#endif
335
336        SetSeqWeights(tree, uNode1, uNode2, 0.0, Weights);
337        SetSeqWeights(tree, uNode2, uNode1, 0.0, Weights);
338
339        for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
340                delete[] EdgeWeights[uNodeIndex];
341        delete[] EdgeWeights;
342        }
Note: See TracBrowser for help on using the repository browser.