1 | #include "muscle.h" |
---|
2 | #include "msa.h" |
---|
3 | #include "profile.h" |
---|
4 | #include "objscore.h" |
---|
5 | |
---|
6 | #if DOUBLE_AFFINE |
---|
7 | |
---|
8 | #define TRACE 0 |
---|
9 | #define TEST_SPFAST 0 |
---|
10 | |
---|
11 | static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e) |
---|
12 | { |
---|
13 | //if (Term) |
---|
14 | // { |
---|
15 | // switch (g_TermGap) |
---|
16 | // { |
---|
17 | // case TERMGAP_Full: |
---|
18 | // return g + (uLength - 1)*e; |
---|
19 | |
---|
20 | // case TERMGAP_Half: |
---|
21 | // return g/2 + (uLength - 1)*e; |
---|
22 | |
---|
23 | // case TERMGAP_Ext: |
---|
24 | // return uLength*e; |
---|
25 | // } |
---|
26 | // Quit("Bad termgap"); |
---|
27 | // } |
---|
28 | //else |
---|
29 | // return g + (uLength - 1)*e; |
---|
30 | //return MINUS_INFINITY; |
---|
31 | return g + (uLength - 1)*e; |
---|
32 | } |
---|
33 | |
---|
34 | static SCORE GapPenalty(unsigned uLength, bool Term) |
---|
35 | { |
---|
36 | SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend); |
---|
37 | #if DOUBLE_AFFINE |
---|
38 | SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2); |
---|
39 | if (s1 > s2) |
---|
40 | return s1; |
---|
41 | return s2; |
---|
42 | #else |
---|
43 | return s1; |
---|
44 | #endif |
---|
45 | } |
---|
46 | |
---|
47 | static const MSA *g_ptrMSA1; |
---|
48 | static const MSA *g_ptrMSA2; |
---|
49 | static unsigned g_uSeqIndex1; |
---|
50 | static unsigned g_uSeqIndex2; |
---|
51 | |
---|
52 | static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength, |
---|
53 | bool bNTerm, bool bCTerm) |
---|
54 | { |
---|
55 | Log("%16.16s ", ""); |
---|
56 | for (unsigned i = 0; i < uStart; ++i) |
---|
57 | Log(" "); |
---|
58 | unsigned uMyLength = 0; |
---|
59 | for (unsigned i = uStart; i <= uEnd; ++i) |
---|
60 | { |
---|
61 | bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i); |
---|
62 | bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i); |
---|
63 | if (!bGap1 && !bGap2) |
---|
64 | Quit("Error -- neither gapping"); |
---|
65 | if (bGap1 && bGap2) |
---|
66 | Log("."); |
---|
67 | else |
---|
68 | { |
---|
69 | ++uMyLength; |
---|
70 | Log("-"); |
---|
71 | } |
---|
72 | } |
---|
73 | SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm); |
---|
74 | Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s); |
---|
75 | Log("\n"); |
---|
76 | if (uMyLength != uGapLength) |
---|
77 | Quit("Lengths differ"); |
---|
78 | |
---|
79 | } |
---|
80 | |
---|
81 | static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1, |
---|
82 | const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps) |
---|
83 | { |
---|
84 | g_ptrMSA1 = &msa1; |
---|
85 | g_ptrMSA2 = &msa2; |
---|
86 | g_uSeqIndex1 = uSeqIndex1; |
---|
87 | g_uSeqIndex2 = uSeqIndex2; |
---|
88 | |
---|
89 | const unsigned uColCount = msa1.GetColCount(); |
---|
90 | const unsigned uColCount2 = msa2.GetColCount(); |
---|
91 | if (uColCount != uColCount2) |
---|
92 | Quit("ScoreSeqPair, different lengths"); |
---|
93 | |
---|
94 | #if TRACE |
---|
95 | Log("ScoreSeqPair\n"); |
---|
96 | Log("%16.16s ", msa1.GetSeqName(uSeqIndex1)); |
---|
97 | for (unsigned i = 0; i < uColCount; ++i) |
---|
98 | Log("%c", msa1.GetChar(uSeqIndex1, i)); |
---|
99 | Log("\n"); |
---|
100 | Log("%16.16s ", msa2.GetSeqName(uSeqIndex2)); |
---|
101 | for (unsigned i = 0; i < uColCount; ++i) |
---|
102 | Log("%c", msa1.GetChar(uSeqIndex2, i)); |
---|
103 | Log("\n"); |
---|
104 | #endif |
---|
105 | |
---|
106 | SCORE scoreTotal = 0; |
---|
107 | |
---|
108 | // Substitution scores |
---|
109 | unsigned uFirstLetter1 = uInsane; |
---|
110 | unsigned uFirstLetter2 = uInsane; |
---|
111 | unsigned uLastLetter1 = uInsane; |
---|
112 | unsigned uLastLetter2 = uInsane; |
---|
113 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
114 | { |
---|
115 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
---|
116 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
---|
117 | bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex); |
---|
118 | bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex); |
---|
119 | |
---|
120 | if (!bGap1) |
---|
121 | { |
---|
122 | if (uInsane == uFirstLetter1) |
---|
123 | uFirstLetter1 = uColIndex; |
---|
124 | uLastLetter1 = uColIndex; |
---|
125 | } |
---|
126 | if (!bGap2) |
---|
127 | { |
---|
128 | if (uInsane == uFirstLetter2) |
---|
129 | uFirstLetter2 = uColIndex; |
---|
130 | uLastLetter2 = uColIndex; |
---|
131 | } |
---|
132 | |
---|
133 | if (bGap1 || bGap2 || bWildcard1 || bWildcard2) |
---|
134 | continue; |
---|
135 | |
---|
136 | unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex); |
---|
137 | unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex); |
---|
138 | |
---|
139 | SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2]; |
---|
140 | scoreTotal += scoreMatch; |
---|
141 | #if TRACE |
---|
142 | Log("%c <-> %c = %7.1f %10.1f\n", |
---|
143 | msa1.GetChar(uSeqIndex1, uColIndex), |
---|
144 | msa2.GetChar(uSeqIndex2, uColIndex), |
---|
145 | scoreMatch, |
---|
146 | scoreTotal); |
---|
147 | #endif |
---|
148 | } |
---|
149 | |
---|
150 | *ptrLetters = scoreTotal; |
---|
151 | |
---|
152 | // Gap penalties |
---|
153 | unsigned uGapLength = uInsane; |
---|
154 | unsigned uGapStartCol = uInsane; |
---|
155 | bool bGapping1 = false; |
---|
156 | bool bGapping2 = false; |
---|
157 | |
---|
158 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
159 | { |
---|
160 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
---|
161 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
---|
162 | |
---|
163 | if (bGap1 && bGap2) |
---|
164 | continue; |
---|
165 | |
---|
166 | if (bGapping1) |
---|
167 | { |
---|
168 | if (bGap1) |
---|
169 | ++uGapLength; |
---|
170 | else |
---|
171 | { |
---|
172 | bGapping1 = false; |
---|
173 | bool bNTerm = (uFirstLetter2 == uGapStartCol); |
---|
174 | bool bCTerm = (uLastLetter2 + 1 == uColIndex); |
---|
175 | SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm); |
---|
176 | scoreTotal += scoreGap; |
---|
177 | #if TRACE |
---|
178 | LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm); |
---|
179 | Log("GAP %7.1f %10.1f\n", |
---|
180 | scoreGap, |
---|
181 | scoreTotal); |
---|
182 | #endif |
---|
183 | } |
---|
184 | continue; |
---|
185 | } |
---|
186 | else |
---|
187 | { |
---|
188 | if (bGap1) |
---|
189 | { |
---|
190 | uGapStartCol = uColIndex; |
---|
191 | bGapping1 = true; |
---|
192 | uGapLength = 1; |
---|
193 | continue; |
---|
194 | } |
---|
195 | } |
---|
196 | |
---|
197 | if (bGapping2) |
---|
198 | { |
---|
199 | if (bGap2) |
---|
200 | ++uGapLength; |
---|
201 | else |
---|
202 | { |
---|
203 | bGapping2 = false; |
---|
204 | bool bNTerm = (uFirstLetter1 == uGapStartCol); |
---|
205 | bool bCTerm = (uLastLetter1 + 1 == uColIndex); |
---|
206 | SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm); |
---|
207 | scoreTotal += scoreGap; |
---|
208 | #if TRACE |
---|
209 | LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm); |
---|
210 | Log("GAP %7.1f %10.1f\n", |
---|
211 | scoreGap, |
---|
212 | scoreTotal); |
---|
213 | #endif |
---|
214 | } |
---|
215 | } |
---|
216 | else |
---|
217 | { |
---|
218 | if (bGap2) |
---|
219 | { |
---|
220 | uGapStartCol = uColIndex; |
---|
221 | bGapping2 = true; |
---|
222 | uGapLength = 1; |
---|
223 | } |
---|
224 | } |
---|
225 | } |
---|
226 | |
---|
227 | if (bGapping1 || bGapping2) |
---|
228 | { |
---|
229 | SCORE scoreGap = GapPenalty(uGapLength, true); |
---|
230 | scoreTotal += scoreGap; |
---|
231 | #if TRACE |
---|
232 | LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true); |
---|
233 | Log("GAP %7.1f %10.1f\n", |
---|
234 | scoreGap, |
---|
235 | scoreTotal); |
---|
236 | #endif |
---|
237 | } |
---|
238 | *ptrGaps = scoreTotal - *ptrLetters; |
---|
239 | return scoreTotal; |
---|
240 | } |
---|
241 | |
---|
242 | // The usual sum-of-pairs objective score: sum the score |
---|
243 | // of the alignment of each pair of sequences. |
---|
244 | SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps) |
---|
245 | { |
---|
246 | const unsigned uSeqCount = msa.GetSeqCount(); |
---|
247 | SCORE scoreTotal = 0; |
---|
248 | unsigned uPairCount = 0; |
---|
249 | #if TRACE |
---|
250 | msa.LogMe(); |
---|
251 | Log(" Score Weight Weight Total\n"); |
---|
252 | Log("---------- ------ ------ ----------\n"); |
---|
253 | #endif |
---|
254 | SCORE TotalLetters = 0; |
---|
255 | SCORE TotalGaps = 0; |
---|
256 | for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) |
---|
257 | { |
---|
258 | const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); |
---|
259 | for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2) |
---|
260 | { |
---|
261 | const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); |
---|
262 | const WEIGHT w = w1*w2; |
---|
263 | SCORE Letters; |
---|
264 | SCORE Gaps; |
---|
265 | SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2, |
---|
266 | &Letters, &Gaps); |
---|
267 | scoreTotal += w1*w2*scorePair; |
---|
268 | TotalLetters += w1*w2*Letters; |
---|
269 | TotalGaps += w1*w2*Gaps; |
---|
270 | ++uPairCount; |
---|
271 | #if TRACE |
---|
272 | Log("%10.2f %6.3f %6.3f %10.2f %d=%s %d=%s\n", |
---|
273 | scorePair, |
---|
274 | w1, |
---|
275 | w2, |
---|
276 | scorePair*w1*w2, |
---|
277 | uSeqIndex1, |
---|
278 | msa.GetSeqName(uSeqIndex1), |
---|
279 | uSeqIndex2, |
---|
280 | msa.GetSeqName(uSeqIndex2)); |
---|
281 | #endif |
---|
282 | } |
---|
283 | } |
---|
284 | *ptrLetters = TotalLetters; |
---|
285 | *ptrGaps = TotalGaps; |
---|
286 | return scoreTotal; |
---|
287 | } |
---|
288 | |
---|
289 | #endif // DOUBLE_AFFINE |
---|