1 | #include "muscle.h" |
---|
2 | #include "tree.h" |
---|
3 | #include "seqvect.h" |
---|
4 | #include "profile.h" |
---|
5 | #include "msa.h" |
---|
6 | #include "pwpath.h" |
---|
7 | #include "distfunc.h" |
---|
8 | #include "textfile.h" |
---|
9 | #include "estring.h" |
---|
10 | |
---|
11 | #define TRACE 0 |
---|
12 | #define VALIDATE 0 |
---|
13 | #define TRACE_LENGTH_DELTA 0 |
---|
14 | |
---|
15 | static void LogLeafNames(const Tree &tree, unsigned uNodeIndex) |
---|
16 | { |
---|
17 | const unsigned uNodeCount = tree.GetNodeCount(); |
---|
18 | unsigned *Leaves = new unsigned[uNodeCount]; |
---|
19 | unsigned uLeafCount; |
---|
20 | GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount); |
---|
21 | for (unsigned i = 0; i < uLeafCount; ++i) |
---|
22 | { |
---|
23 | if (i > 0) |
---|
24 | Log(","); |
---|
25 | Log("%s", tree.GetLeafName(Leaves[i])); |
---|
26 | } |
---|
27 | delete[] Leaves; |
---|
28 | } |
---|
29 | |
---|
30 | ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a) |
---|
31 | { |
---|
32 | assert(GuideTree.IsRooted()); |
---|
33 | |
---|
34 | #if TRACE |
---|
35 | Log("GuideTree:\n"); |
---|
36 | GuideTree.LogMe(); |
---|
37 | #endif |
---|
38 | |
---|
39 | const unsigned uSeqCount = v.Length(); |
---|
40 | const unsigned uNodeCount = 2*uSeqCount - 1; |
---|
41 | const unsigned uIterCount = uSeqCount - 1; |
---|
42 | |
---|
43 | WEIGHT *Weights = new WEIGHT[uSeqCount]; |
---|
44 | CalcClustalWWeights(GuideTree, Weights); |
---|
45 | |
---|
46 | ProgNode *ProgNodes = new ProgNode[uNodeCount]; |
---|
47 | |
---|
48 | unsigned uJoin = 0; |
---|
49 | unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode(); |
---|
50 | SetProgressDesc("Align node"); |
---|
51 | do |
---|
52 | { |
---|
53 | if (GuideTree.IsLeaf(uTreeNodeIndex)) |
---|
54 | { |
---|
55 | if (uTreeNodeIndex >= uNodeCount) |
---|
56 | Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount); |
---|
57 | ProgNode &Node = ProgNodes[uTreeNodeIndex]; |
---|
58 | unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex); |
---|
59 | if (uId >= uSeqCount) |
---|
60 | Quit("Seq index out of range"); |
---|
61 | const Seq &s = *(v[uId]); |
---|
62 | Node.m_MSA.FromSeq(s); |
---|
63 | Node.m_MSA.SetSeqId(0, uId); |
---|
64 | Node.m_uLength = Node.m_MSA.GetColCount(); |
---|
65 | Node.m_Weight = Weights[uId]; |
---|
66 | // TODO: Term gaps settable |
---|
67 | Node.m_Prof = ProfileFromMSA(Node.m_MSA); |
---|
68 | Node.m_EstringL = 0; |
---|
69 | Node.m_EstringR = 0; |
---|
70 | #if TRACE |
---|
71 | Log("Leaf id=%u\n", uId); |
---|
72 | Log("MSA=\n"); |
---|
73 | Node.m_MSA.LogMe(); |
---|
74 | Log("Profile (from MSA)=\n"); |
---|
75 | ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA); |
---|
76 | #endif |
---|
77 | } |
---|
78 | else |
---|
79 | { |
---|
80 | Progress(uJoin, uSeqCount - 1); |
---|
81 | ++uJoin; |
---|
82 | |
---|
83 | const unsigned uMergeNodeIndex = uTreeNodeIndex; |
---|
84 | ProgNode &Parent = ProgNodes[uMergeNodeIndex]; |
---|
85 | |
---|
86 | const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex); |
---|
87 | const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex); |
---|
88 | |
---|
89 | if (g_bVerbose) |
---|
90 | { |
---|
91 | Log("Align: ("); |
---|
92 | LogLeafNames(GuideTree, uLeft); |
---|
93 | Log(") ("); |
---|
94 | LogLeafNames(GuideTree, uRight); |
---|
95 | Log(")\n"); |
---|
96 | } |
---|
97 | |
---|
98 | ProgNode &Node1 = ProgNodes[uLeft]; |
---|
99 | ProgNode &Node2 = ProgNodes[uRight]; |
---|
100 | |
---|
101 | #if TRACE |
---|
102 | Log("AlignTwoMSAs:\n"); |
---|
103 | #endif |
---|
104 | AlignTwoProfs( |
---|
105 | Node1.m_Prof, Node1.m_uLength, Node1.m_Weight, |
---|
106 | Node2.m_Prof, Node2.m_uLength, Node2.m_Weight, |
---|
107 | Parent.m_Path, |
---|
108 | &Parent.m_Prof, &Parent.m_uLength); |
---|
109 | #if TRACE_LENGTH_DELTA |
---|
110 | { |
---|
111 | unsigned L = Node1.m_uLength; |
---|
112 | unsigned R = Node2.m_uLength; |
---|
113 | unsigned P = Parent.m_Path.GetEdgeCount(); |
---|
114 | unsigned Max = L > R ? L : R; |
---|
115 | unsigned d = P - Max; |
---|
116 | Log("LD%u;%u;%u;%u\n", L, R, P, d); |
---|
117 | } |
---|
118 | #endif |
---|
119 | PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR); |
---|
120 | |
---|
121 | Parent.m_Weight = Node1.m_Weight + Node2.m_Weight; |
---|
122 | |
---|
123 | #if VALIDATE |
---|
124 | { |
---|
125 | #if TRACE |
---|
126 | Log("AlignTwoMSAs:\n"); |
---|
127 | #endif |
---|
128 | PWPath TmpPath; |
---|
129 | AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath); |
---|
130 | ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true); |
---|
131 | ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true); |
---|
132 | unsigned uLength = Parent.m_MSA.GetColCount(); |
---|
133 | ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true); |
---|
134 | |
---|
135 | #if TRACE |
---|
136 | Log("Node1 MSA=\n"); |
---|
137 | Node1.m_MSA.LogMe(); |
---|
138 | |
---|
139 | Log("Node1 prof=\n"); |
---|
140 | ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA); |
---|
141 | Log("Node1 prof (from MSA)=\n"); |
---|
142 | ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA); |
---|
143 | |
---|
144 | AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount()); |
---|
145 | |
---|
146 | Log("Node2 prof=\n"); |
---|
147 | ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA); |
---|
148 | |
---|
149 | Log("Node2 MSA=\n"); |
---|
150 | Node2.m_MSA.LogMe(); |
---|
151 | |
---|
152 | Log("Node2 prof (from MSA)=\n"); |
---|
153 | ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA); |
---|
154 | |
---|
155 | AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount()); |
---|
156 | |
---|
157 | TmpPath.AssertEqual(Parent.m_Path); |
---|
158 | |
---|
159 | Log("Parent MSA=\n"); |
---|
160 | Parent.m_MSA.LogMe(); |
---|
161 | |
---|
162 | Log("Parent prof=\n"); |
---|
163 | ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA); |
---|
164 | |
---|
165 | Log("Parent prof (from MSA)=\n"); |
---|
166 | ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA); |
---|
167 | |
---|
168 | #endif // TRACE |
---|
169 | AssertProfsEq(Parent.m_Prof, Parent.m_uLength, |
---|
170 | TmpProf, Parent.m_MSA.GetColCount()); |
---|
171 | delete[] P1; |
---|
172 | delete[] P2; |
---|
173 | delete[] TmpProf; |
---|
174 | } |
---|
175 | #endif // VALIDATE |
---|
176 | |
---|
177 | Node1.m_MSA.Clear(); |
---|
178 | Node2.m_MSA.Clear(); |
---|
179 | |
---|
180 | // Don't delete profiles, may need them for tree refinement. |
---|
181 | //delete[] Node1.m_Prof; |
---|
182 | //delete[] Node2.m_Prof; |
---|
183 | //Node1.m_Prof = 0; |
---|
184 | //Node2.m_Prof = 0; |
---|
185 | } |
---|
186 | uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex); |
---|
187 | } |
---|
188 | while (NULL_NEIGHBOR != uTreeNodeIndex); |
---|
189 | ProgressStepsDone(); |
---|
190 | |
---|
191 | if (g_bBrenner) |
---|
192 | MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a); |
---|
193 | else |
---|
194 | MakeRootMSA(v, GuideTree, ProgNodes, a); |
---|
195 | |
---|
196 | #if VALIDATE |
---|
197 | { |
---|
198 | unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex(); |
---|
199 | const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex]; |
---|
200 | AssertMSAEq(a, RootProgNode.m_MSA); |
---|
201 | } |
---|
202 | #endif |
---|
203 | |
---|
204 | delete[] Weights; |
---|
205 | return ProgNodes; |
---|
206 | } |
---|