1 | #include "muscle.h" |
---|
2 | #include "msa.h" |
---|
3 | #include "pwpath.h" |
---|
4 | #include "profile.h" |
---|
5 | |
---|
6 | #define TRACE 0 |
---|
7 | |
---|
8 | static void LogPP(const ProfPos &PP) |
---|
9 | { |
---|
10 | Log("ResidueGroup %u\n", PP.m_uResidueGroup); |
---|
11 | Log("AllGaps %d\n", PP.m_bAllGaps); |
---|
12 | Log("Occ %.3g\n", PP.m_fOcc); |
---|
13 | Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG); |
---|
14 | Log("Freqs "); |
---|
15 | for (unsigned i = 0; i < 20; ++i) |
---|
16 | if (PP.m_fcCounts[i] > 0) |
---|
17 | Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]); |
---|
18 | Log("\n"); |
---|
19 | } |
---|
20 | |
---|
21 | static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i) |
---|
22 | { |
---|
23 | const ProfPos &PPA = PA[i]; |
---|
24 | const ProfPos &PPB = PB[i]; |
---|
25 | #define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); } |
---|
26 | #define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); } |
---|
27 | eq(bAllGaps) |
---|
28 | eq(uResidueGroup) |
---|
29 | |
---|
30 | be(LL) |
---|
31 | be(LG) |
---|
32 | be(GL) |
---|
33 | be(GG) |
---|
34 | be(fOcc) |
---|
35 | be(scoreGapOpen) |
---|
36 | be(scoreGapClose) |
---|
37 | |
---|
38 | for (unsigned j = 0; j < 20; ++j) |
---|
39 | { |
---|
40 | #define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j); |
---|
41 | #define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j); |
---|
42 | bej(fcCounts[j]); |
---|
43 | // eqj(uSortOrder[j]) // may differ due to ties, don't check? |
---|
44 | bej(AAScores[j]) |
---|
45 | #undef eqj |
---|
46 | #undef bej |
---|
47 | } |
---|
48 | #undef eq |
---|
49 | #undef be |
---|
50 | } |
---|
51 | |
---|
52 | void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
---|
53 | unsigned uLengthB) |
---|
54 | { |
---|
55 | if (uLengthA != uLengthB) |
---|
56 | Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB); |
---|
57 | for (unsigned i = 0; i < uLengthB; ++i) |
---|
58 | AssertProfPosEq(PA, PB, i); |
---|
59 | } |
---|
60 | |
---|
61 | #if DEBUG |
---|
62 | static void ValidateProf(const ProfPos *Prof, unsigned uLength) |
---|
63 | { |
---|
64 | for (unsigned i = 0; i < uLength; ++i) |
---|
65 | { |
---|
66 | const ProfPos &PP = Prof[i]; |
---|
67 | |
---|
68 | FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG; |
---|
69 | assert(BTEq(s1, 1.0)); |
---|
70 | |
---|
71 | if (i > 0) |
---|
72 | { |
---|
73 | const ProfPos &PPPrev = Prof[i-1]; |
---|
74 | FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL; |
---|
75 | FCOUNT s3 = PP.m_LL + PP.m_LG; |
---|
76 | assert(BTEq(s2, s3)); |
---|
77 | } |
---|
78 | if (i < uLength - 1) |
---|
79 | { |
---|
80 | const ProfPos &PPNext = Prof[i+1]; |
---|
81 | FCOUNT s4 = PP.m_LL + PP.m_GL; |
---|
82 | FCOUNT s5 = PPNext.m_LL + PPNext.m_LG; |
---|
83 | assert(BTEq(s4, s5)); |
---|
84 | } |
---|
85 | } |
---|
86 | } |
---|
87 | #else |
---|
88 | #define ValidateProf(Prof, Length) /* empty */ |
---|
89 | #endif |
---|
90 | |
---|
91 | static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos) |
---|
92 | { |
---|
93 | ProfPos &PP = Prof[uPos]; |
---|
94 | SortCounts(PP.m_fcCounts, PP.m_uSortOrder); |
---|
95 | PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts); |
---|
96 | |
---|
97 | // "Occupancy" |
---|
98 | PP.m_fOcc = PP.m_LL + PP.m_GL; |
---|
99 | |
---|
100 | // Frequency of gap-opens in this position (i) |
---|
101 | // Gap open = letter in i-1 and gap in i |
---|
102 | // = iff LG in i |
---|
103 | FCOUNT fcOpen = PP.m_LG; |
---|
104 | |
---|
105 | // Frequency of gap-closes in this position |
---|
106 | // Gap close = gap in i and letter in i+1 |
---|
107 | // = iff GL in i+1 |
---|
108 | FCOUNT fcClose; |
---|
109 | if (uPos + 1 < uLength) |
---|
110 | fcClose = Prof[uPos + 1].m_GL; |
---|
111 | else |
---|
112 | fcClose = PP.m_GG + PP.m_LG; |
---|
113 | |
---|
114 | PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0); |
---|
115 | PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0); |
---|
116 | #if DOUBLE_AFFINE |
---|
117 | PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0); |
---|
118 | PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0); |
---|
119 | #endif |
---|
120 | |
---|
121 | for (unsigned i = 0; i < g_AlphaSize; ++i) |
---|
122 | { |
---|
123 | SCORE scoreSum = 0; |
---|
124 | for (unsigned j = 0; j < g_AlphaSize; ++j) |
---|
125 | scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j]; |
---|
126 | PP.m_AAScores[i] = scoreSum; |
---|
127 | } |
---|
128 | } |
---|
129 | |
---|
130 | void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength) |
---|
131 | { |
---|
132 | for (unsigned i = 0; i < uLength; ++i) |
---|
133 | ScoresFromFreqsPos(Prof, uLength, i); |
---|
134 | } |
---|
135 | |
---|
136 | static void AppendDelete(const MSA &msaA, unsigned &uColIndexA, |
---|
137 | unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined, |
---|
138 | unsigned &uColIndexCombined) |
---|
139 | { |
---|
140 | #if TRACE |
---|
141 | Log("AppendDelete ColIxA=%u ColIxCmb=%u\n", |
---|
142 | uColIndexA, uColIndexCombined); |
---|
143 | #endif |
---|
144 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
---|
145 | { |
---|
146 | char c = msaA.GetChar(uSeqIndexA, uColIndexA); |
---|
147 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c); |
---|
148 | } |
---|
149 | |
---|
150 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
---|
151 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-'); |
---|
152 | |
---|
153 | ++uColIndexCombined; |
---|
154 | ++uColIndexA; |
---|
155 | } |
---|
156 | |
---|
157 | static void AppendInsert(const MSA &msaB, unsigned &uColIndexB, |
---|
158 | unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined, |
---|
159 | unsigned &uColIndexCombined) |
---|
160 | { |
---|
161 | #if TRACE |
---|
162 | Log("AppendInsert ColIxB=%u ColIxCmb=%u\n", |
---|
163 | uColIndexB, uColIndexCombined); |
---|
164 | #endif |
---|
165 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
---|
166 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-'); |
---|
167 | |
---|
168 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
---|
169 | { |
---|
170 | char c = msaB.GetChar(uSeqIndexB, uColIndexB); |
---|
171 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c); |
---|
172 | } |
---|
173 | |
---|
174 | ++uColIndexCombined; |
---|
175 | ++uColIndexB; |
---|
176 | } |
---|
177 | |
---|
178 | static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA, |
---|
179 | const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA, |
---|
180 | unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined) |
---|
181 | { |
---|
182 | #if TRACE |
---|
183 | Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n", |
---|
184 | uColIndexA, uColIndexB, uColIndexCombined); |
---|
185 | #endif |
---|
186 | const unsigned uLengthA = msaA.GetColCount(); |
---|
187 | const unsigned uLengthB = msaB.GetColCount(); |
---|
188 | |
---|
189 | unsigned uNewColCount = uColCountA; |
---|
190 | if (uColCountB > uNewColCount) |
---|
191 | uNewColCount = uColCountB; |
---|
192 | |
---|
193 | for (unsigned n = 0; n < uColCountA; ++n) |
---|
194 | { |
---|
195 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
---|
196 | { |
---|
197 | char c = msaA.GetChar(uSeqIndexA, uColIndexA + n); |
---|
198 | c = UnalignChar(c); |
---|
199 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c); |
---|
200 | } |
---|
201 | } |
---|
202 | for (unsigned n = uColCountA; n < uNewColCount; ++n) |
---|
203 | { |
---|
204 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
---|
205 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.'); |
---|
206 | } |
---|
207 | |
---|
208 | for (unsigned n = 0; n < uColCountB; ++n) |
---|
209 | { |
---|
210 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
---|
211 | { |
---|
212 | char c = msaB.GetChar(uSeqIndexB, uColIndexB + n); |
---|
213 | c = UnalignChar(c); |
---|
214 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c); |
---|
215 | } |
---|
216 | } |
---|
217 | for (unsigned n = uColCountB; n < uNewColCount; ++n) |
---|
218 | { |
---|
219 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
---|
220 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.'); |
---|
221 | } |
---|
222 | |
---|
223 | uColIndexCombined += uNewColCount; |
---|
224 | uColIndexA += uColCountA; |
---|
225 | uColIndexB += uColCountB; |
---|
226 | } |
---|
227 | |
---|
228 | static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB, |
---|
229 | unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB, |
---|
230 | MSA &msaCombined, unsigned &uColIndexCombined) |
---|
231 | { |
---|
232 | #if TRACE |
---|
233 | Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n", |
---|
234 | uColIndexA, uColIndexB, uColIndexCombined); |
---|
235 | #endif |
---|
236 | |
---|
237 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
---|
238 | { |
---|
239 | char c = msaA.GetChar(uSeqIndexA, uColIndexA); |
---|
240 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c); |
---|
241 | } |
---|
242 | |
---|
243 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
---|
244 | { |
---|
245 | char c = msaB.GetChar(uSeqIndexB, uColIndexB); |
---|
246 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c); |
---|
247 | } |
---|
248 | |
---|
249 | ++uColIndexA; |
---|
250 | ++uColIndexB; |
---|
251 | ++uColIndexCombined; |
---|
252 | } |
---|
253 | |
---|
254 | void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB, |
---|
255 | MSA &msaCombined) |
---|
256 | { |
---|
257 | msaCombined.Clear(); |
---|
258 | |
---|
259 | #if TRACE |
---|
260 | Log("FastAlignProfiles\n"); |
---|
261 | Log("Template A:\n"); |
---|
262 | msaA.LogMe(); |
---|
263 | Log("Template B:\n"); |
---|
264 | msaB.LogMe(); |
---|
265 | #endif |
---|
266 | |
---|
267 | const unsigned uColCountA = msaA.GetColCount(); |
---|
268 | const unsigned uColCountB = msaB.GetColCount(); |
---|
269 | |
---|
270 | const unsigned uSeqCountA = msaA.GetSeqCount(); |
---|
271 | const unsigned uSeqCountB = msaB.GetSeqCount(); |
---|
272 | |
---|
273 | msaCombined.SetSeqCount(uSeqCountA + uSeqCountB); |
---|
274 | |
---|
275 | // Copy sequence names into combined MSA |
---|
276 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
---|
277 | { |
---|
278 | msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA)); |
---|
279 | msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA)); |
---|
280 | } |
---|
281 | |
---|
282 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
---|
283 | { |
---|
284 | msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB)); |
---|
285 | msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB)); |
---|
286 | } |
---|
287 | |
---|
288 | unsigned uColIndexA = 0; |
---|
289 | unsigned uColIndexB = 0; |
---|
290 | unsigned uColIndexCombined = 0; |
---|
291 | const unsigned uEdgeCount = Path.GetEdgeCount(); |
---|
292 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
---|
293 | { |
---|
294 | const PWEdge &Edge = Path.GetEdge(uEdgeIndex); |
---|
295 | #if TRACE |
---|
296 | Log("\nEdge %u %c%u.%u\n", |
---|
297 | uEdgeIndex, |
---|
298 | Edge.cType, |
---|
299 | Edge.uPrefixLengthA, |
---|
300 | Edge.uPrefixLengthB); |
---|
301 | #endif |
---|
302 | const char cType = Edge.cType; |
---|
303 | const unsigned uPrefixLengthA = Edge.uPrefixLengthA; |
---|
304 | unsigned uColCountA = 0; |
---|
305 | if (uPrefixLengthA > 0) |
---|
306 | { |
---|
307 | const unsigned uNodeIndexA = uPrefixLengthA - 1; |
---|
308 | const unsigned uTplColIndexA = uNodeIndexA; |
---|
309 | if (uTplColIndexA > uColIndexA) |
---|
310 | uColCountA = uTplColIndexA - uColIndexA; |
---|
311 | } |
---|
312 | |
---|
313 | const unsigned uPrefixLengthB = Edge.uPrefixLengthB; |
---|
314 | unsigned uColCountB = 0; |
---|
315 | if (uPrefixLengthB > 0) |
---|
316 | { |
---|
317 | const unsigned uNodeIndexB = uPrefixLengthB - 1; |
---|
318 | const unsigned uTplColIndexB = uNodeIndexB; |
---|
319 | if (uTplColIndexB > uColIndexB) |
---|
320 | uColCountB = uTplColIndexB - uColIndexB; |
---|
321 | } |
---|
322 | |
---|
323 | // TODO: This code looks like a hangover from HMM estimation -- can we delete it? |
---|
324 | assert(uColCountA == 0); |
---|
325 | assert(uColCountB == 0); |
---|
326 | AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB, |
---|
327 | uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
---|
328 | |
---|
329 | switch (cType) |
---|
330 | { |
---|
331 | case 'M': |
---|
332 | { |
---|
333 | assert(uPrefixLengthA > 0); |
---|
334 | assert(uPrefixLengthB > 0); |
---|
335 | const unsigned uColA = uPrefixLengthA - 1; |
---|
336 | const unsigned uColB = uPrefixLengthB - 1; |
---|
337 | assert(uColIndexA == uColA); |
---|
338 | assert(uColIndexB == uColB); |
---|
339 | AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB, |
---|
340 | msaCombined, uColIndexCombined); |
---|
341 | break; |
---|
342 | } |
---|
343 | case 'D': |
---|
344 | { |
---|
345 | assert(uPrefixLengthA > 0); |
---|
346 | const unsigned uColA = uPrefixLengthA - 1; |
---|
347 | assert(uColIndexA == uColA); |
---|
348 | AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
---|
349 | break; |
---|
350 | } |
---|
351 | case 'I': |
---|
352 | { |
---|
353 | assert(uPrefixLengthB > 0); |
---|
354 | const unsigned uColB = uPrefixLengthB - 1; |
---|
355 | assert(uColIndexB == uColB); |
---|
356 | AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
---|
357 | break; |
---|
358 | } |
---|
359 | default: |
---|
360 | assert(false); |
---|
361 | } |
---|
362 | } |
---|
363 | unsigned uInsertColCountA = uColCountA - uColIndexA; |
---|
364 | unsigned uInsertColCountB = uColCountB - uColIndexB; |
---|
365 | |
---|
366 | // TODO: This code looks like a hangover from HMM estimation -- can we delete it? |
---|
367 | assert(uInsertColCountA == 0); |
---|
368 | assert(uInsertColCountB == 0); |
---|
369 | AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB, |
---|
370 | uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
---|
371 | |
---|
372 | assert(msaCombined.GetColCount() == uEdgeCount); |
---|
373 | } |
---|
374 | |
---|
375 | static const ProfPos PPStart = |
---|
376 | { |
---|
377 | false, //m_bAllGaps; |
---|
378 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21]; |
---|
379 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20]; |
---|
380 | 1.0, // m_LL; |
---|
381 | 0.0, // m_LG; |
---|
382 | 0.0, // m_GL; |
---|
383 | 0.0, // m_GG; |
---|
384 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores |
---|
385 | 0, // m_uResidueGroup; |
---|
386 | 1.0, // m_fOcc; |
---|
387 | 0.0, // m_fcStartOcc; |
---|
388 | 0.0, // m_fcEndOcc; |
---|
389 | 0.0, // m_scoreGapOpen; |
---|
390 | 0.0, // m_scoreGapClose; |
---|
391 | }; |
---|
392 | |
---|
393 | // MM |
---|
394 | // Ai1 Ai Out |
---|
395 | // X X LL LL |
---|
396 | // X - LG LG |
---|
397 | // - X GL GL |
---|
398 | // - - GG GG |
---|
399 | // |
---|
400 | // Bj1 Bj |
---|
401 | // X X LL LL |
---|
402 | // X - LG LG |
---|
403 | // - X GL GL |
---|
404 | // - - GG GG |
---|
405 | static void SetGapsMM( |
---|
406 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
407 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
408 | ProfPos *POut, unsigned uColIndexOut) |
---|
409 | { |
---|
410 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
411 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
412 | ProfPos &PPO = POut[uColIndexOut]; |
---|
413 | |
---|
414 | PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL; |
---|
415 | PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG; |
---|
416 | PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL; |
---|
417 | PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG; |
---|
418 | } |
---|
419 | |
---|
420 | // MD |
---|
421 | // Ai1 Ai Out |
---|
422 | // X X LL LL |
---|
423 | // X - LG LG |
---|
424 | // - X GL GL |
---|
425 | // - - GG GG |
---|
426 | // |
---|
427 | // Bj (-) |
---|
428 | // X - ?L LG |
---|
429 | // - - ?G GG |
---|
430 | static void SetGapsMD( |
---|
431 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
432 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
433 | ProfPos *POut, unsigned uColIndexOut) |
---|
434 | { |
---|
435 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
436 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
437 | ProfPos &PPO = POut[uColIndexOut]; |
---|
438 | |
---|
439 | PPO.m_LL = wA*PPA.m_LL; |
---|
440 | PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL); |
---|
441 | PPO.m_GL = wA*PPA.m_GL; |
---|
442 | PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG); |
---|
443 | } |
---|
444 | |
---|
445 | // DD |
---|
446 | // Ai1 Ai Out |
---|
447 | // X X LL LL |
---|
448 | // X - LG LG |
---|
449 | // - X GL GL |
---|
450 | // - - GG GG |
---|
451 | // |
---|
452 | // (-) (-) |
---|
453 | // - - ?? GG |
---|
454 | static void SetGapsDD( |
---|
455 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
456 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
457 | ProfPos *POut, unsigned uColIndexOut) |
---|
458 | { |
---|
459 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
460 | ProfPos &PPO = POut[uColIndexOut]; |
---|
461 | |
---|
462 | PPO.m_LL = wA*PPA.m_LL; |
---|
463 | PPO.m_LG = wA*PPA.m_LG; |
---|
464 | PPO.m_GL = wA*PPA.m_GL; |
---|
465 | PPO.m_GG = wA*PPA.m_GG + wB; |
---|
466 | } |
---|
467 | |
---|
468 | // MI |
---|
469 | // Ai (-) Out |
---|
470 | // X - ?L LG |
---|
471 | // - - ?G GG |
---|
472 | |
---|
473 | // Bj1 Bj |
---|
474 | // X X LL LL |
---|
475 | // X - LG LG |
---|
476 | // - X GL GL |
---|
477 | // - - GG GG |
---|
478 | static void SetGapsMI( |
---|
479 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
480 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
481 | ProfPos *POut, unsigned uColIndexOut) |
---|
482 | { |
---|
483 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
484 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
485 | ProfPos &PPO = POut[uColIndexOut]; |
---|
486 | |
---|
487 | PPO.m_LL = wB*PPB.m_LL; |
---|
488 | PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL); |
---|
489 | PPO.m_GL = wB*PPB.m_GL; |
---|
490 | PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG); |
---|
491 | } |
---|
492 | |
---|
493 | // DM |
---|
494 | // Ai1 Ai Out |
---|
495 | // X X LL LL |
---|
496 | // X - LG LG |
---|
497 | // - X GL GL |
---|
498 | // - - GG GG |
---|
499 | // |
---|
500 | // (-) Bj |
---|
501 | // - X ?L GL |
---|
502 | // - - ?G GG |
---|
503 | static void SetGapsDM( |
---|
504 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
505 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
506 | ProfPos *POut, unsigned uColIndexOut) |
---|
507 | { |
---|
508 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
509 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
510 | ProfPos &PPO = POut[uColIndexOut]; |
---|
511 | |
---|
512 | PPO.m_LL = wA*PPA.m_LL; |
---|
513 | PPO.m_LG = wA*PPA.m_LG; |
---|
514 | PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL); |
---|
515 | PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG); |
---|
516 | } |
---|
517 | |
---|
518 | // IM |
---|
519 | // (-) Ai Out |
---|
520 | // - X ?L GL |
---|
521 | // - - ?G GG |
---|
522 | |
---|
523 | // Bj1 Bj |
---|
524 | // X X LL LL |
---|
525 | // X - LG LG |
---|
526 | // - X GL GL |
---|
527 | // - - GG GG |
---|
528 | static void SetGapsIM( |
---|
529 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
530 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
531 | ProfPos *POut, unsigned uColIndexOut) |
---|
532 | { |
---|
533 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
534 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
535 | ProfPos &PPO = POut[uColIndexOut]; |
---|
536 | |
---|
537 | PPO.m_LL = wB*PPB.m_LL; |
---|
538 | PPO.m_LG = wB*PPB.m_LG; |
---|
539 | PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL); |
---|
540 | PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG); |
---|
541 | } |
---|
542 | |
---|
543 | // ID |
---|
544 | // (-) Ai Out |
---|
545 | // - X ?L GL |
---|
546 | // - - ?G GG |
---|
547 | |
---|
548 | // Bj (-) |
---|
549 | // X - ?L LG |
---|
550 | // - - ?G GG |
---|
551 | static void SetGapsID( |
---|
552 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
553 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
554 | ProfPos *POut, unsigned uColIndexOut) |
---|
555 | { |
---|
556 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
557 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
558 | ProfPos &PPO = POut[uColIndexOut]; |
---|
559 | |
---|
560 | PPO.m_LL = 0; |
---|
561 | PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL; |
---|
562 | PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL; |
---|
563 | PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG); |
---|
564 | } |
---|
565 | |
---|
566 | // DI |
---|
567 | // Ai (-) Out |
---|
568 | // X - ?L LG |
---|
569 | // - - ?G GG |
---|
570 | |
---|
571 | // (-) Bj |
---|
572 | // - X ?L GL |
---|
573 | // - - ?G GG |
---|
574 | static void SetGapsDI( |
---|
575 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
576 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
577 | ProfPos *POut, unsigned uColIndexOut) |
---|
578 | { |
---|
579 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
580 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
581 | ProfPos &PPO = POut[uColIndexOut]; |
---|
582 | |
---|
583 | PPO.m_LL = 0; |
---|
584 | PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL; |
---|
585 | PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL; |
---|
586 | PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG); |
---|
587 | } |
---|
588 | |
---|
589 | // II |
---|
590 | // (-) (-) Out |
---|
591 | // - - ?? GG |
---|
592 | |
---|
593 | // Bj1 Bj |
---|
594 | // X X LL LL |
---|
595 | // X - LG LG |
---|
596 | // - X GL GL |
---|
597 | // - - GG GG |
---|
598 | static void SetGapsII( |
---|
599 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
600 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
601 | ProfPos *POut, unsigned uColIndexOut) |
---|
602 | { |
---|
603 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
604 | ProfPos &PPO = POut[uColIndexOut]; |
---|
605 | |
---|
606 | PPO.m_LL = wB*PPB.m_LL; |
---|
607 | PPO.m_LG = wB*PPB.m_LG; |
---|
608 | PPO.m_GL = wB*PPB.m_GL; |
---|
609 | PPO.m_GG = wB*PPB.m_GG + wA; |
---|
610 | } |
---|
611 | |
---|
612 | static void SetFreqs( |
---|
613 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
614 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
615 | ProfPos *POut, unsigned uColIndexOut) |
---|
616 | { |
---|
617 | const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart; |
---|
618 | const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart; |
---|
619 | ProfPos &PPO = POut[uColIndexOut]; |
---|
620 | |
---|
621 | if (g_bNormalizeCounts) |
---|
622 | { |
---|
623 | const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB); |
---|
624 | const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB); |
---|
625 | FCOUNT fTotal = 0; |
---|
626 | for (unsigned i = 0; i < 20; ++i) |
---|
627 | { |
---|
628 | const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i]; |
---|
629 | PPO.m_fcCounts[i] = f; |
---|
630 | fTotal += f; |
---|
631 | } |
---|
632 | if (fTotal > 0) |
---|
633 | for (unsigned i = 0; i < 20; ++i) |
---|
634 | PPO.m_fcCounts[i] /= fTotal; |
---|
635 | } |
---|
636 | else |
---|
637 | { |
---|
638 | for (unsigned i = 0; i < 20; ++i) |
---|
639 | PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i]; |
---|
640 | } |
---|
641 | } |
---|
642 | |
---|
643 | void AlignTwoProfsGivenPath(const PWPath &Path, |
---|
644 | const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA, |
---|
645 | const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB, |
---|
646 | ProfPos **ptrPOut, unsigned *ptruLengthOut) |
---|
647 | { |
---|
648 | #if TRACE |
---|
649 | Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB); |
---|
650 | Path.LogMe(); |
---|
651 | #endif |
---|
652 | assert(BTEq(wA + wB, 1.0)); |
---|
653 | |
---|
654 | unsigned uColIndexA = 0; |
---|
655 | unsigned uColIndexB = 0; |
---|
656 | unsigned uColIndexOut = 0; |
---|
657 | const unsigned uEdgeCount = Path.GetEdgeCount(); |
---|
658 | ProfPos *POut = new ProfPos[uEdgeCount]; |
---|
659 | char cPrevType = 'M'; |
---|
660 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
---|
661 | { |
---|
662 | const PWEdge &Edge = Path.GetEdge(uEdgeIndex); |
---|
663 | const char cType = Edge.cType; |
---|
664 | |
---|
665 | const unsigned uPrefixLengthA = Edge.uPrefixLengthA; |
---|
666 | const unsigned uPrefixLengthB = Edge.uPrefixLengthB; |
---|
667 | |
---|
668 | #if TRACE |
---|
669 | Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n", |
---|
670 | uEdgeIndex, |
---|
671 | Edge.cType, |
---|
672 | Edge.uPrefixLengthA, |
---|
673 | Edge.uPrefixLengthB, |
---|
674 | uColIndexA, |
---|
675 | uColIndexB); |
---|
676 | #endif |
---|
677 | |
---|
678 | POut[uColIndexOut].m_bAllGaps = false; |
---|
679 | switch (cType) |
---|
680 | { |
---|
681 | case 'M': |
---|
682 | { |
---|
683 | assert(uPrefixLengthA > 0); |
---|
684 | assert(uPrefixLengthB > 0); |
---|
685 | SetFreqs( |
---|
686 | PA, uPrefixLengthA, wA, |
---|
687 | PB, uPrefixLengthB, wB, |
---|
688 | POut, uColIndexOut); |
---|
689 | switch (cPrevType) |
---|
690 | { |
---|
691 | case 'M': |
---|
692 | SetGapsMM( |
---|
693 | PA, uPrefixLengthA, wA, |
---|
694 | PB, uPrefixLengthB, wB, |
---|
695 | POut, uColIndexOut); |
---|
696 | break; |
---|
697 | case 'D': |
---|
698 | SetGapsDM( |
---|
699 | PA, uPrefixLengthA, wA, |
---|
700 | PB, uPrefixLengthB, wB, |
---|
701 | POut, uColIndexOut); |
---|
702 | break; |
---|
703 | case 'I': |
---|
704 | SetGapsIM( |
---|
705 | PA, uPrefixLengthA, wA, |
---|
706 | PB, uPrefixLengthB, wB, |
---|
707 | POut, uColIndexOut); |
---|
708 | break; |
---|
709 | default: |
---|
710 | Quit("Bad cPrevType"); |
---|
711 | } |
---|
712 | ++uColIndexA; |
---|
713 | ++uColIndexB; |
---|
714 | ++uColIndexOut; |
---|
715 | break; |
---|
716 | } |
---|
717 | case 'D': |
---|
718 | { |
---|
719 | assert(uPrefixLengthA > 0); |
---|
720 | SetFreqs( |
---|
721 | PA, uPrefixLengthA, wA, |
---|
722 | PB, uPrefixLengthB, 0, |
---|
723 | POut, uColIndexOut); |
---|
724 | switch (cPrevType) |
---|
725 | { |
---|
726 | case 'M': |
---|
727 | SetGapsMD( |
---|
728 | PA, uPrefixLengthA, wA, |
---|
729 | PB, uPrefixLengthB, wB, |
---|
730 | POut, uColIndexOut); |
---|
731 | break; |
---|
732 | case 'D': |
---|
733 | SetGapsDD( |
---|
734 | PA, uPrefixLengthA, wA, |
---|
735 | PB, uPrefixLengthB, wB, |
---|
736 | POut, uColIndexOut); |
---|
737 | break; |
---|
738 | case 'I': |
---|
739 | SetGapsID( |
---|
740 | PA, uPrefixLengthA, wA, |
---|
741 | PB, uPrefixLengthB, wB, |
---|
742 | POut, uColIndexOut); |
---|
743 | break; |
---|
744 | default: |
---|
745 | Quit("Bad cPrevType"); |
---|
746 | } |
---|
747 | ++uColIndexA; |
---|
748 | ++uColIndexOut; |
---|
749 | break; |
---|
750 | } |
---|
751 | case 'I': |
---|
752 | { |
---|
753 | assert(uPrefixLengthB > 0); |
---|
754 | SetFreqs( |
---|
755 | PA, uPrefixLengthA, 0, |
---|
756 | PB, uPrefixLengthB, wB, |
---|
757 | POut, uColIndexOut); |
---|
758 | switch (cPrevType) |
---|
759 | { |
---|
760 | case 'M': |
---|
761 | SetGapsMI( |
---|
762 | PA, uPrefixLengthA, wA, |
---|
763 | PB, uPrefixLengthB, wB, |
---|
764 | POut, uColIndexOut); |
---|
765 | break; |
---|
766 | case 'D': |
---|
767 | SetGapsDI( |
---|
768 | PA, uPrefixLengthA, wA, |
---|
769 | PB, uPrefixLengthB, wB, |
---|
770 | POut, uColIndexOut); |
---|
771 | break; |
---|
772 | case 'I': |
---|
773 | SetGapsII( |
---|
774 | PA, uPrefixLengthA, wA, |
---|
775 | PB, uPrefixLengthB, wB, |
---|
776 | POut, uColIndexOut); |
---|
777 | break; |
---|
778 | default: |
---|
779 | Quit("Bad cPrevType"); |
---|
780 | } |
---|
781 | ++uColIndexB; |
---|
782 | ++uColIndexOut; |
---|
783 | break; |
---|
784 | } |
---|
785 | default: |
---|
786 | assert(false); |
---|
787 | } |
---|
788 | cPrevType = cType; |
---|
789 | } |
---|
790 | assert(uColIndexOut == uEdgeCount); |
---|
791 | |
---|
792 | ProfScoresFromFreqs(POut, uEdgeCount); |
---|
793 | ValidateProf(POut, uEdgeCount); |
---|
794 | |
---|
795 | *ptrPOut = POut; |
---|
796 | *ptruLengthOut = uEdgeCount; |
---|
797 | |
---|
798 | #if TRACE |
---|
799 | Log("AlignTwoProfsGivenPath:\n"); |
---|
800 | ListProfile(POut, uEdgeCount, 0); |
---|
801 | #endif |
---|
802 | } |
---|