1 | #include "muscle.h" |
---|
2 | #include "profile.h" |
---|
3 | |
---|
4 | #define TRACE 0 |
---|
5 | |
---|
6 | enum |
---|
7 | { |
---|
8 | LL = 0, |
---|
9 | LG = 1, |
---|
10 | GL = 2, |
---|
11 | GG = 3, |
---|
12 | }; |
---|
13 | |
---|
14 | static const char *GapTypeToStr(int GapType) |
---|
15 | { |
---|
16 | switch (GapType) |
---|
17 | { |
---|
18 | case LL: return "LL"; |
---|
19 | case LG: return "LG"; |
---|
20 | case GL: return "GL"; |
---|
21 | case GG: return "GG"; |
---|
22 | } |
---|
23 | Quit("Invalid gap type"); |
---|
24 | return "?"; |
---|
25 | } |
---|
26 | |
---|
27 | static SCORE GapScoreMatrix[4][4]; |
---|
28 | |
---|
29 | static void InitGapScoreMatrix() |
---|
30 | { |
---|
31 | const SCORE t = (SCORE) 0.2; |
---|
32 | |
---|
33 | GapScoreMatrix[LL][LL] = 0; |
---|
34 | GapScoreMatrix[LL][LG] = g_scoreGapOpen; |
---|
35 | GapScoreMatrix[LL][GL] = 0; |
---|
36 | GapScoreMatrix[LL][GG] = 0; |
---|
37 | |
---|
38 | GapScoreMatrix[LG][LL] = g_scoreGapOpen; |
---|
39 | GapScoreMatrix[LG][LG] = 0; |
---|
40 | GapScoreMatrix[LG][GL] = g_scoreGapOpen; |
---|
41 | GapScoreMatrix[LG][GG] = t*g_scoreGapOpen; // approximation! |
---|
42 | |
---|
43 | GapScoreMatrix[GL][LL] = 0; |
---|
44 | GapScoreMatrix[GL][LG] = g_scoreGapOpen; |
---|
45 | GapScoreMatrix[GL][GL] = 0; |
---|
46 | GapScoreMatrix[GL][GG] = 0; |
---|
47 | |
---|
48 | GapScoreMatrix[GG][LL] = 0; |
---|
49 | GapScoreMatrix[GG][LG] = t*g_scoreGapOpen; // approximation! |
---|
50 | GapScoreMatrix[GG][GL] = 0; |
---|
51 | GapScoreMatrix[GG][GG] = 0; |
---|
52 | |
---|
53 | for (int i = 0; i < 4; ++i) |
---|
54 | for (int j = 0; j < i; ++j) |
---|
55 | if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i]) |
---|
56 | Quit("GapScoreMatrix not symmetrical"); |
---|
57 | } |
---|
58 | |
---|
59 | static SCORE SPColBrute(const MSA &msa, unsigned uColIndex) |
---|
60 | { |
---|
61 | SCORE Sum = 0; |
---|
62 | const unsigned uSeqCount = msa.GetSeqCount(); |
---|
63 | for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) |
---|
64 | { |
---|
65 | const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); |
---|
66 | unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex); |
---|
67 | if (uLetter1 >= 20) |
---|
68 | continue; |
---|
69 | for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2) |
---|
70 | { |
---|
71 | const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); |
---|
72 | unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex); |
---|
73 | if (uLetter2 >= 20) |
---|
74 | continue; |
---|
75 | SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2]; |
---|
76 | #if TRACE |
---|
77 | Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n", |
---|
78 | LetterToCharAmino(uLetter1), |
---|
79 | LetterToCharAmino(uLetter2), |
---|
80 | w1, |
---|
81 | w2, |
---|
82 | (*g_ptrScoreMatrix)[uLetter1][uLetter2], |
---|
83 | t); |
---|
84 | #endif |
---|
85 | Sum += t; |
---|
86 | } |
---|
87 | } |
---|
88 | return Sum; |
---|
89 | } |
---|
90 | |
---|
91 | static SCORE SPGapFreqs(const FCOUNT Freqs[]) |
---|
92 | { |
---|
93 | #if TRACE |
---|
94 | Log("Freqs="); |
---|
95 | for (unsigned i = 0; i < 4; ++i) |
---|
96 | if (Freqs[i] != 0) |
---|
97 | Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]); |
---|
98 | Log("\n"); |
---|
99 | #endif |
---|
100 | |
---|
101 | SCORE TotalOffDiag = 0; |
---|
102 | SCORE TotalDiag = 0; |
---|
103 | for (unsigned i = 0; i < 4; ++i) |
---|
104 | { |
---|
105 | const FCOUNT fi = Freqs[i]; |
---|
106 | if (0 == fi) |
---|
107 | continue; |
---|
108 | const float *Row = GapScoreMatrix[i]; |
---|
109 | SCORE diagt = fi*fi*Row[i]; |
---|
110 | TotalDiag += diagt; |
---|
111 | #if TRACE |
---|
112 | Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n", |
---|
113 | GapTypeToStr(i), |
---|
114 | GapTypeToStr(i), |
---|
115 | Row[i], |
---|
116 | diagt); |
---|
117 | #endif |
---|
118 | SCORE Sum = 0; |
---|
119 | for (unsigned j = 0; j < i; ++j) |
---|
120 | { |
---|
121 | SCORE t = Freqs[j]*Row[j]; |
---|
122 | #if TRACE |
---|
123 | if (Freqs[j] != 0) |
---|
124 | Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n", |
---|
125 | GapTypeToStr(i), |
---|
126 | GapTypeToStr(j), |
---|
127 | Row[j], |
---|
128 | fi*t); |
---|
129 | #endif |
---|
130 | Sum += t; |
---|
131 | } |
---|
132 | TotalOffDiag += fi*Sum; |
---|
133 | } |
---|
134 | #if TRACE |
---|
135 | Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n", |
---|
136 | TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag); |
---|
137 | #endif |
---|
138 | return TotalOffDiag*2 + TotalDiag; |
---|
139 | } |
---|
140 | |
---|
141 | static SCORE SPFreqs(const FCOUNT Freqs[]) |
---|
142 | { |
---|
143 | #if TRACE |
---|
144 | Log("Freqs="); |
---|
145 | for (unsigned i = 0; i < 20; ++i) |
---|
146 | if (Freqs[i] != 0) |
---|
147 | Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]); |
---|
148 | Log("\n"); |
---|
149 | #endif |
---|
150 | |
---|
151 | SCORE TotalOffDiag = 0; |
---|
152 | SCORE TotalDiag = 0; |
---|
153 | for (unsigned i = 0; i < 20; ++i) |
---|
154 | { |
---|
155 | const FCOUNT fi = Freqs[i]; |
---|
156 | if (0 == fi) |
---|
157 | continue; |
---|
158 | const float *Row = (*g_ptrScoreMatrix)[i]; |
---|
159 | SCORE diagt = fi*fi*Row[i]; |
---|
160 | TotalDiag += diagt; |
---|
161 | #if TRACE |
---|
162 | Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n", |
---|
163 | LetterToCharAmino(i), |
---|
164 | LetterToCharAmino(i), |
---|
165 | Row[i], |
---|
166 | diagt); |
---|
167 | #endif |
---|
168 | SCORE Sum = 0; |
---|
169 | for (unsigned j = 0; j < i; ++j) |
---|
170 | { |
---|
171 | SCORE t = Freqs[j]*Row[j]; |
---|
172 | #if TRACE |
---|
173 | if (Freqs[j] != 0) |
---|
174 | Log("SPF %c %c + Mx=%.3g Sum += %.3g\n", |
---|
175 | LetterToCharAmino(i), |
---|
176 | LetterToCharAmino(j), |
---|
177 | Row[j], |
---|
178 | fi*t); |
---|
179 | #endif |
---|
180 | Sum += t; |
---|
181 | } |
---|
182 | TotalOffDiag += fi*Sum; |
---|
183 | } |
---|
184 | #if TRACE |
---|
185 | Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n", |
---|
186 | TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag); |
---|
187 | #endif |
---|
188 | return TotalOffDiag*2 + TotalDiag; |
---|
189 | } |
---|
190 | |
---|
191 | static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex) |
---|
192 | { |
---|
193 | FCOUNT Freqs[20]; |
---|
194 | FCOUNT GapFreqs[4]; |
---|
195 | |
---|
196 | memset(Freqs, 0, sizeof(Freqs)); |
---|
197 | memset(GapFreqs, 0, sizeof(GapFreqs)); |
---|
198 | |
---|
199 | const unsigned uSeqCount = msa.GetSeqCount(); |
---|
200 | #if TRACE |
---|
201 | Log("Weights="); |
---|
202 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
203 | Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex)); |
---|
204 | Log("\n"); |
---|
205 | #endif |
---|
206 | SCORE SelfOverCount = 0; |
---|
207 | SCORE GapSelfOverCount = 0; |
---|
208 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
209 | { |
---|
210 | WEIGHT w = msa.GetSeqWeight(uSeqIndex); |
---|
211 | |
---|
212 | bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex); |
---|
213 | bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1)); |
---|
214 | int GapType = bGapThisCol + 2*bGapPrevCol; |
---|
215 | assert(GapType >= 0 && GapType < 4); |
---|
216 | GapFreqs[GapType] += w; |
---|
217 | SCORE gapt = w*w*GapScoreMatrix[GapType][GapType]; |
---|
218 | GapSelfOverCount += gapt; |
---|
219 | |
---|
220 | if (bGapThisCol) |
---|
221 | continue; |
---|
222 | unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex); |
---|
223 | if (uLetter >= 20) |
---|
224 | continue; |
---|
225 | Freqs[uLetter] += w; |
---|
226 | SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter]; |
---|
227 | #if TRACE |
---|
228 | Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n", |
---|
229 | LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t); |
---|
230 | #endif |
---|
231 | SelfOverCount += t; |
---|
232 | } |
---|
233 | SCORE SPF = SPFreqs(Freqs); |
---|
234 | SCORE Col = SPF - SelfOverCount; |
---|
235 | |
---|
236 | SCORE SPFGaps = SPGapFreqs(GapFreqs); |
---|
237 | SCORE ColGaps = SPFGaps - GapSelfOverCount; |
---|
238 | #if TRACE |
---|
239 | Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col); |
---|
240 | Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps); |
---|
241 | #endif |
---|
242 | return Col + ColGaps; |
---|
243 | } |
---|
244 | |
---|
245 | SCORE ObjScoreSPDimer(const MSA &msa) |
---|
246 | { |
---|
247 | static bool bGapScoreMatrixInit = false; |
---|
248 | if (!bGapScoreMatrixInit) |
---|
249 | InitGapScoreMatrix(); |
---|
250 | |
---|
251 | SCORE Total = 0; |
---|
252 | const unsigned uSeqCount = msa.GetSeqCount(); |
---|
253 | const unsigned uColCount = msa.GetColCount(); |
---|
254 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
255 | { |
---|
256 | SCORE Col = ObjScoreSPCol(msa, uColIndex); |
---|
257 | #if TRACE |
---|
258 | { |
---|
259 | SCORE ColCheck = SPColBrute(msa, uColIndex); |
---|
260 | Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck); |
---|
261 | } |
---|
262 | #endif |
---|
263 | Total += Col; |
---|
264 | } |
---|
265 | #if TRACE |
---|
266 | Log("Total/2 = %.3g (final result from fast)\n", Total/2); |
---|
267 | #endif |
---|
268 | return Total/2; |
---|
269 | } |
---|