| 1 | #include "muscle.h" |
|---|
| 2 | #include "msa.h" |
|---|
| 3 | #include "profile.h" |
|---|
| 4 | #include "objscore.h" |
|---|
| 5 | |
|---|
| 6 | #define TRACE 0 |
|---|
| 7 | #define TRACE_SEQPAIR 0 |
|---|
| 8 | #define TEST_SPFAST 0 |
|---|
| 9 | |
|---|
| 10 | extern SCOREMATRIX VTML_LA; |
|---|
| 11 | extern SCOREMATRIX PAM200; |
|---|
| 12 | extern SCOREMATRIX PAM200NoCenter; |
|---|
| 13 | extern SCOREMATRIX VTML_SP; |
|---|
| 14 | extern SCOREMATRIX VTML_SPNoCenter; |
|---|
| 15 | extern SCOREMATRIX NUC_SP; |
|---|
| 16 | |
|---|
| 17 | SCORE g_SPScoreLetters; |
|---|
| 18 | SCORE g_SPScoreGaps; |
|---|
| 19 | |
|---|
| 20 | static SCORE TermGapScore(bool Gap) |
|---|
| 21 | { |
|---|
| 22 | switch (g_TermGaps) |
|---|
| 23 | { |
|---|
| 24 | case TERMGAPS_Full: |
|---|
| 25 | return 0; |
|---|
| 26 | |
|---|
| 27 | case TERMGAPS_Half: |
|---|
| 28 | if (Gap) |
|---|
| 29 | return g_scoreGapOpen/2; |
|---|
| 30 | return 0; |
|---|
| 31 | |
|---|
| 32 | case TERMGAPS_Ext: |
|---|
| 33 | if (Gap) |
|---|
| 34 | return g_scoreGapExtend; |
|---|
| 35 | return 0; |
|---|
| 36 | } |
|---|
| 37 | Quit("TermGapScore?!"); |
|---|
| 38 | return 0; |
|---|
| 39 | } |
|---|
| 40 | |
|---|
| 41 | SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1, |
|---|
| 42 | const MSA &msa2, unsigned uSeqIndex2) |
|---|
| 43 | { |
|---|
| 44 | const unsigned uColCount = msa1.GetColCount(); |
|---|
| 45 | const unsigned uColCount2 = msa2.GetColCount(); |
|---|
| 46 | if (uColCount != uColCount2) |
|---|
| 47 | Quit("ScoreSeqPairLetters, different lengths"); |
|---|
| 48 | |
|---|
| 49 | #if TRACE_SEQPAIR |
|---|
| 50 | { |
|---|
| 51 | Log("\n"); |
|---|
| 52 | Log("ScoreSeqPairLetters\n"); |
|---|
| 53 | MSA msaTmp; |
|---|
| 54 | msaTmp.SetSize(2, uColCount); |
|---|
| 55 | msaTmp.CopySeq(0, msa1, uSeqIndex1); |
|---|
| 56 | msaTmp.CopySeq(1, msa2, uSeqIndex2); |
|---|
| 57 | msaTmp.LogMe(); |
|---|
| 58 | } |
|---|
| 59 | #endif |
|---|
| 60 | |
|---|
| 61 | SCORE scoreLetters = 0; |
|---|
| 62 | SCORE scoreGaps = 0; |
|---|
| 63 | bool bGapping1 = false; |
|---|
| 64 | bool bGapping2 = false; |
|---|
| 65 | |
|---|
| 66 | unsigned uColStart = 0; |
|---|
| 67 | bool bLeftTermGap = false; |
|---|
| 68 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 69 | { |
|---|
| 70 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
|---|
| 71 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
|---|
| 72 | if (!bGap1 || !bGap2) |
|---|
| 73 | { |
|---|
| 74 | if (bGap1 || bGap2) |
|---|
| 75 | bLeftTermGap = true; |
|---|
| 76 | uColStart = uColIndex; |
|---|
| 77 | break; |
|---|
| 78 | } |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | unsigned uColEnd = uColCount - 1; |
|---|
| 82 | bool bRightTermGap = false; |
|---|
| 83 | for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex) |
|---|
| 84 | { |
|---|
| 85 | bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex); |
|---|
| 86 | bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex); |
|---|
| 87 | if (!bGap1 || !bGap2) |
|---|
| 88 | { |
|---|
| 89 | if (bGap1 || bGap2) |
|---|
| 90 | bRightTermGap = true; |
|---|
| 91 | uColEnd = (unsigned) iColIndex; |
|---|
| 92 | break; |
|---|
| 93 | } |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | #if TRACE_SEQPAIR |
|---|
| 97 | Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap); |
|---|
| 98 | #endif |
|---|
| 99 | |
|---|
| 100 | for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex) |
|---|
| 101 | { |
|---|
| 102 | unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex); |
|---|
| 103 | if (uLetter1 >= g_AlphaSize) |
|---|
| 104 | continue; |
|---|
| 105 | unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex); |
|---|
| 106 | if (uLetter2 >= g_AlphaSize) |
|---|
| 107 | continue; |
|---|
| 108 | |
|---|
| 109 | SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2]; |
|---|
| 110 | scoreLetters += scoreMatch; |
|---|
| 111 | } |
|---|
| 112 | return scoreLetters; |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1, |
|---|
| 116 | const MSA &msa2, unsigned uSeqIndex2) |
|---|
| 117 | { |
|---|
| 118 | const unsigned uColCount = msa1.GetColCount(); |
|---|
| 119 | const unsigned uColCount2 = msa2.GetColCount(); |
|---|
| 120 | if (uColCount != uColCount2) |
|---|
| 121 | Quit("ScoreSeqPairGaps, different lengths"); |
|---|
| 122 | |
|---|
| 123 | #if TRACE_SEQPAIR |
|---|
| 124 | { |
|---|
| 125 | Log("\n"); |
|---|
| 126 | Log("ScoreSeqPairGaps\n"); |
|---|
| 127 | MSA msaTmp; |
|---|
| 128 | msaTmp.SetSize(2, uColCount); |
|---|
| 129 | msaTmp.CopySeq(0, msa1, uSeqIndex1); |
|---|
| 130 | msaTmp.CopySeq(1, msa2, uSeqIndex2); |
|---|
| 131 | msaTmp.LogMe(); |
|---|
| 132 | } |
|---|
| 133 | #endif |
|---|
| 134 | |
|---|
| 135 | SCORE scoreGaps = 0; |
|---|
| 136 | bool bGapping1 = false; |
|---|
| 137 | bool bGapping2 = false; |
|---|
| 138 | |
|---|
| 139 | unsigned uColStart = 0; |
|---|
| 140 | bool bLeftTermGap = false; |
|---|
| 141 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 142 | { |
|---|
| 143 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
|---|
| 144 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
|---|
| 145 | if (!bGap1 || !bGap2) |
|---|
| 146 | { |
|---|
| 147 | if (bGap1 || bGap2) |
|---|
| 148 | bLeftTermGap = true; |
|---|
| 149 | uColStart = uColIndex; |
|---|
| 150 | break; |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | unsigned uColEnd = uColCount - 1; |
|---|
| 155 | bool bRightTermGap = false; |
|---|
| 156 | for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex) |
|---|
| 157 | { |
|---|
| 158 | bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex); |
|---|
| 159 | bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex); |
|---|
| 160 | if (!bGap1 || !bGap2) |
|---|
| 161 | { |
|---|
| 162 | if (bGap1 || bGap2) |
|---|
| 163 | bRightTermGap = true; |
|---|
| 164 | uColEnd = (unsigned) iColIndex; |
|---|
| 165 | break; |
|---|
| 166 | } |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | #if TRACE_SEQPAIR |
|---|
| 170 | Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap); |
|---|
| 171 | #endif |
|---|
| 172 | |
|---|
| 173 | for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex) |
|---|
| 174 | { |
|---|
| 175 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
|---|
| 176 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
|---|
| 177 | |
|---|
| 178 | if (bGap1 && bGap2) |
|---|
| 179 | continue; |
|---|
| 180 | |
|---|
| 181 | if (bGap1) |
|---|
| 182 | { |
|---|
| 183 | if (!bGapping1) |
|---|
| 184 | { |
|---|
| 185 | #if TRACE_SEQPAIR |
|---|
| 186 | Log("Gap open seq 1 col %d\n", uColIndex); |
|---|
| 187 | #endif |
|---|
| 188 | if (uColIndex == uColStart) |
|---|
| 189 | scoreGaps += TermGapScore(true); |
|---|
| 190 | else |
|---|
| 191 | scoreGaps += g_scoreGapOpen; |
|---|
| 192 | bGapping1 = true; |
|---|
| 193 | } |
|---|
| 194 | else |
|---|
| 195 | scoreGaps += g_scoreGapExtend; |
|---|
| 196 | continue; |
|---|
| 197 | } |
|---|
| 198 | |
|---|
| 199 | else if (bGap2) |
|---|
| 200 | { |
|---|
| 201 | if (!bGapping2) |
|---|
| 202 | { |
|---|
| 203 | #if TRACE_SEQPAIR |
|---|
| 204 | Log("Gap open seq 2 col %d\n", uColIndex); |
|---|
| 205 | #endif |
|---|
| 206 | if (uColIndex == uColStart) |
|---|
| 207 | scoreGaps += TermGapScore(true); |
|---|
| 208 | else |
|---|
| 209 | scoreGaps += g_scoreGapOpen; |
|---|
| 210 | bGapping2 = true; |
|---|
| 211 | } |
|---|
| 212 | else |
|---|
| 213 | scoreGaps += g_scoreGapExtend; |
|---|
| 214 | continue; |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | bGapping1 = false; |
|---|
| 218 | bGapping2 = false; |
|---|
| 219 | } |
|---|
| 220 | |
|---|
| 221 | if (bGapping1 || bGapping2) |
|---|
| 222 | { |
|---|
| 223 | scoreGaps -= g_scoreGapOpen; |
|---|
| 224 | scoreGaps += TermGapScore(true); |
|---|
| 225 | } |
|---|
| 226 | return scoreGaps; |
|---|
| 227 | } |
|---|
| 228 | |
|---|
| 229 | // The usual sum-of-pairs objective score: sum the score |
|---|
| 230 | // of the alignment of each pair of sequences. |
|---|
| 231 | SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[]) |
|---|
| 232 | { |
|---|
| 233 | #if TRACE |
|---|
| 234 | Log("==================ObjScoreSP==============\n"); |
|---|
| 235 | Log("msa=\n"); |
|---|
| 236 | msa.LogMe(); |
|---|
| 237 | #endif |
|---|
| 238 | g_SPScoreLetters = 0; |
|---|
| 239 | g_SPScoreGaps = 0; |
|---|
| 240 | |
|---|
| 241 | if (0 != MatchScore) |
|---|
| 242 | { |
|---|
| 243 | const unsigned uColCount = msa.GetColCount(); |
|---|
| 244 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 245 | MatchScore[uColIndex] = 0; |
|---|
| 246 | } |
|---|
| 247 | |
|---|
| 248 | const unsigned uSeqCount = msa.GetSeqCount(); |
|---|
| 249 | SCORE scoreTotal = 0; |
|---|
| 250 | unsigned uPairCount = 0; |
|---|
| 251 | #if TRACE |
|---|
| 252 | Log("Seq1 Seq2 wt1 wt2 Letters Gaps Unwt.Score Wt.Score Total\n"); |
|---|
| 253 | Log("---- ---- ------ ------ ---------- ---------- ---------- ---------- ----------\n"); |
|---|
| 254 | #endif |
|---|
| 255 | for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) |
|---|
| 256 | { |
|---|
| 257 | const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); |
|---|
| 258 | for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2) |
|---|
| 259 | { |
|---|
| 260 | const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); |
|---|
| 261 | const WEIGHT w = w1*w2; |
|---|
| 262 | |
|---|
| 263 | SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2); |
|---|
| 264 | SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2); |
|---|
| 265 | SCORE scorePair = scoreLetters + scoreGaps; |
|---|
| 266 | ++uPairCount; |
|---|
| 267 | |
|---|
| 268 | scoreTotal += w*scorePair; |
|---|
| 269 | |
|---|
| 270 | g_SPScoreLetters += w*scoreLetters; |
|---|
| 271 | g_SPScoreGaps += w*scoreGaps; |
|---|
| 272 | #if TRACE |
|---|
| 273 | Log("%4d %4d %6.3f %6.3f %10.2f %10.2f %10.2f %10.2f %10.2f >%s >%s\n", |
|---|
| 274 | uSeqIndex1, |
|---|
| 275 | uSeqIndex2, |
|---|
| 276 | w1, |
|---|
| 277 | w2, |
|---|
| 278 | scoreLetters, |
|---|
| 279 | scoreGaps, |
|---|
| 280 | scorePair, |
|---|
| 281 | scorePair*w1*w2, |
|---|
| 282 | scoreTotal, |
|---|
| 283 | msa.GetSeqName(uSeqIndex1), |
|---|
| 284 | msa.GetSeqName(uSeqIndex2)); |
|---|
| 285 | #endif |
|---|
| 286 | } |
|---|
| 287 | } |
|---|
| 288 | #if TEST_SPFAST |
|---|
| 289 | { |
|---|
| 290 | SCORE f = ObjScoreSPFast(msa); |
|---|
| 291 | Log("Fast = %.6g\n", f); |
|---|
| 292 | Log("Brute = %.6g\n", scoreTotal); |
|---|
| 293 | if (BTEq(f, scoreTotal)) |
|---|
| 294 | Log("Agree\n"); |
|---|
| 295 | else |
|---|
| 296 | Log("** DISAGREE **\n"); |
|---|
| 297 | } |
|---|
| 298 | #endif |
|---|
| 299 | // return scoreTotal / uPairCount; |
|---|
| 300 | return scoreTotal; |
|---|
| 301 | } |
|---|
| 302 | |
|---|
| 303 | // Objective score defined as the dynamic programming score. |
|---|
| 304 | // Input is two alignments, which must be of the same length. |
|---|
| 305 | // Result is the same profile-profile score that is optimized |
|---|
| 306 | // by dynamic programming. |
|---|
| 307 | SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[]) |
|---|
| 308 | { |
|---|
| 309 | const unsigned uColCount = msa1.GetColCount(); |
|---|
| 310 | if (msa2.GetColCount() != uColCount) |
|---|
| 311 | Quit("ObjScoreDP, must be same length"); |
|---|
| 312 | |
|---|
| 313 | const unsigned uColCount1 = msa1.GetColCount(); |
|---|
| 314 | const unsigned uColCount2 = msa2.GetColCount(); |
|---|
| 315 | |
|---|
| 316 | const ProfPos *PA = ProfileFromMSA(msa1); |
|---|
| 317 | const ProfPos *PB = ProfileFromMSA(msa2); |
|---|
| 318 | |
|---|
| 319 | return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore); |
|---|
| 320 | } |
|---|
| 321 | |
|---|
| 322 | SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount, |
|---|
| 323 | SCORE MatchScore[]) |
|---|
| 324 | { |
|---|
| 325 | //#if TRACE |
|---|
| 326 | // Log("Profile 1:\n"); |
|---|
| 327 | // ListProfile(PA, uColCount, &msa1); |
|---|
| 328 | // |
|---|
| 329 | // Log("Profile 2:\n"); |
|---|
| 330 | // ListProfile(PB, uColCount, &msa2); |
|---|
| 331 | //#endif |
|---|
| 332 | |
|---|
| 333 | SCORE scoreTotal = 0; |
|---|
| 334 | |
|---|
| 335 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 336 | { |
|---|
| 337 | const ProfPos &PPA = PA[uColIndex]; |
|---|
| 338 | const ProfPos &PPB = PB[uColIndex]; |
|---|
| 339 | |
|---|
| 340 | SCORE scoreGap = 0; |
|---|
| 341 | SCORE scoreMatch = 0; |
|---|
| 342 | // If gapped column... |
|---|
| 343 | if (PPA.m_bAllGaps && PPB.m_bAllGaps) |
|---|
| 344 | scoreGap = 0; |
|---|
| 345 | else if (PPA.m_bAllGaps) |
|---|
| 346 | { |
|---|
| 347 | if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps) |
|---|
| 348 | scoreGap = PPB.m_scoreGapClose; |
|---|
| 349 | if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps) |
|---|
| 350 | scoreGap += PPB.m_scoreGapOpen; |
|---|
| 351 | //if (0 == scoreGap) |
|---|
| 352 | // scoreGap = PPB.m_scoreGapExtend; |
|---|
| 353 | } |
|---|
| 354 | else if (PPB.m_bAllGaps) |
|---|
| 355 | { |
|---|
| 356 | if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps) |
|---|
| 357 | scoreGap = PPA.m_scoreGapClose; |
|---|
| 358 | if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps) |
|---|
| 359 | scoreGap += PPA.m_scoreGapOpen; |
|---|
| 360 | //if (0 == scoreGap) |
|---|
| 361 | // scoreGap = PPA.m_scoreGapExtend; |
|---|
| 362 | } |
|---|
| 363 | else |
|---|
| 364 | scoreMatch = ScoreProfPos2(PPA, PPB); |
|---|
| 365 | |
|---|
| 366 | if (0 != MatchScore) |
|---|
| 367 | MatchScore[uColIndex] = scoreMatch; |
|---|
| 368 | |
|---|
| 369 | scoreTotal += scoreMatch + scoreGap; |
|---|
| 370 | |
|---|
| 371 | extern bool g_bTracePPScore; |
|---|
| 372 | extern MSA *g_ptrPPScoreMSA1; |
|---|
| 373 | extern MSA *g_ptrPPScoreMSA2; |
|---|
| 374 | if (g_bTracePPScore) |
|---|
| 375 | { |
|---|
| 376 | const MSA &msa1 = *g_ptrPPScoreMSA1; |
|---|
| 377 | const MSA &msa2 = *g_ptrPPScoreMSA2; |
|---|
| 378 | const unsigned uSeqCount1 = msa1.GetSeqCount(); |
|---|
| 379 | const unsigned uSeqCount2 = msa2.GetSeqCount(); |
|---|
| 380 | |
|---|
| 381 | for (unsigned n = 0; n < uSeqCount1; ++n) |
|---|
| 382 | Log("%c", msa1.GetChar(n, uColIndex)); |
|---|
| 383 | Log(" "); |
|---|
| 384 | for (unsigned n = 0; n < uSeqCount2; ++n) |
|---|
| 385 | Log("%c", msa2.GetChar(n, uColIndex)); |
|---|
| 386 | Log(" %10.3f", scoreMatch); |
|---|
| 387 | if (scoreGap != 0) |
|---|
| 388 | Log(" %10.3f", scoreGap); |
|---|
| 389 | Log("\n"); |
|---|
| 390 | } |
|---|
| 391 | } |
|---|
| 392 | |
|---|
| 393 | delete[] PA; |
|---|
| 394 | delete[] PB; |
|---|
| 395 | |
|---|
| 396 | return scoreTotal; |
|---|
| 397 | } |
|---|
| 398 | |
|---|
| 399 | // Objective score defined as the sum of profile-sequence |
|---|
| 400 | // scores for each sequence in the alignment. The profile |
|---|
| 401 | // is computed from the entire alignment, so this includes |
|---|
| 402 | // the score of each sequence against itself. This is to |
|---|
| 403 | // avoid recomputing the profile each time, so we reduce |
|---|
| 404 | // complexity but introduce a questionable approximation. |
|---|
| 405 | // The goal is to see if we can exploit the apparent |
|---|
| 406 | // improvement in performance of log-expectation score |
|---|
| 407 | // over the usual sum-of-pairs by optimizing this |
|---|
| 408 | // objective score in the iterative refinement stage. |
|---|
| 409 | SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[]) |
|---|
| 410 | { |
|---|
| 411 | if (g_PPScore != PPSCORE_LE) |
|---|
| 412 | Quit("FastScoreMSA_LASimple: LA"); |
|---|
| 413 | |
|---|
| 414 | const unsigned uSeqCount = msa.GetSeqCount(); |
|---|
| 415 | const unsigned uColCount = msa.GetColCount(); |
|---|
| 416 | |
|---|
| 417 | const ProfPos *Prof = ProfileFromMSA(msa); |
|---|
| 418 | |
|---|
| 419 | if (0 != MatchScore) |
|---|
| 420 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 421 | MatchScore[uColIndex] = 0; |
|---|
| 422 | |
|---|
| 423 | SCORE scoreTotal = 0; |
|---|
| 424 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
|---|
| 425 | { |
|---|
| 426 | const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex); |
|---|
| 427 | SCORE scoreSeq = 0; |
|---|
| 428 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 429 | { |
|---|
| 430 | const ProfPos &PP = Prof[uColIndex]; |
|---|
| 431 | if (msa.IsGap(uSeqIndex, uColIndex)) |
|---|
| 432 | { |
|---|
| 433 | bool bOpen = (0 == uColIndex || |
|---|
| 434 | !msa.IsGap(uSeqIndex, uColIndex - 1)); |
|---|
| 435 | bool bClose = (uColCount - 1 == uColIndex || |
|---|
| 436 | !msa.IsGap(uSeqIndex, uColIndex + 1)); |
|---|
| 437 | |
|---|
| 438 | if (bOpen) |
|---|
| 439 | scoreSeq += PP.m_scoreGapOpen; |
|---|
| 440 | if (bClose) |
|---|
| 441 | scoreSeq += PP.m_scoreGapClose; |
|---|
| 442 | //if (!bOpen && !bClose) |
|---|
| 443 | // scoreSeq += PP.m_scoreGapExtend; |
|---|
| 444 | } |
|---|
| 445 | else if (msa.IsWildcard(uSeqIndex, uColIndex)) |
|---|
| 446 | continue; |
|---|
| 447 | else |
|---|
| 448 | { |
|---|
| 449 | unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex); |
|---|
| 450 | const SCORE scoreMatch = PP.m_AAScores[uLetter]; |
|---|
| 451 | if (0 != MatchScore) |
|---|
| 452 | MatchScore[uColIndex] += weightSeq*scoreMatch; |
|---|
| 453 | scoreSeq += scoreMatch; |
|---|
| 454 | } |
|---|
| 455 | } |
|---|
| 456 | scoreTotal += weightSeq*scoreSeq; |
|---|
| 457 | } |
|---|
| 458 | |
|---|
| 459 | delete[] Prof; |
|---|
| 460 | return scoreTotal; |
|---|
| 461 | } |
|---|
| 462 | |
|---|
| 463 | // The XP score is the sum of the score of each pair of |
|---|
| 464 | // sequences between two profiles which are aligned to |
|---|
| 465 | // each other. Notice that for two given profiles aligned |
|---|
| 466 | // in different ways, the difference in XP score must be |
|---|
| 467 | // the same as the difference in SP score because the |
|---|
| 468 | // score of a pair of sequences in one profile doesn't |
|---|
| 469 | // depend on the alignment. |
|---|
| 470 | SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2) |
|---|
| 471 | { |
|---|
| 472 | const unsigned uColCount1 = msa1.GetColCount(); |
|---|
| 473 | const unsigned uColCount2 = msa2.GetColCount(); |
|---|
| 474 | if (uColCount1 != uColCount2) |
|---|
| 475 | Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2); |
|---|
| 476 | |
|---|
| 477 | const unsigned uSeqCount1 = msa1.GetSeqCount(); |
|---|
| 478 | const unsigned uSeqCount2 = msa2.GetSeqCount(); |
|---|
| 479 | |
|---|
| 480 | #if TRACE |
|---|
| 481 | Log(" Score Weight Weight Total\n"); |
|---|
| 482 | Log("---------- ------ ------ ----------\n"); |
|---|
| 483 | #endif |
|---|
| 484 | |
|---|
| 485 | SCORE scoreTotal = 0; |
|---|
| 486 | unsigned uPairCount = 0; |
|---|
| 487 | for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1) |
|---|
| 488 | { |
|---|
| 489 | const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1); |
|---|
| 490 | for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2) |
|---|
| 491 | { |
|---|
| 492 | const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2); |
|---|
| 493 | const WEIGHT w = w1*w2; |
|---|
| 494 | SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2); |
|---|
| 495 | SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2); |
|---|
| 496 | SCORE scorePair = scoreLetters + scoreGaps; |
|---|
| 497 | scoreTotal += w1*w2*scorePair; |
|---|
| 498 | ++uPairCount; |
|---|
| 499 | #if TRACE |
|---|
| 500 | Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n", |
|---|
| 501 | scorePair, |
|---|
| 502 | w1, |
|---|
| 503 | w2, |
|---|
| 504 | scorePair*w1*w2, |
|---|
| 505 | msa1.GetSeqName(uSeqIndex1), |
|---|
| 506 | msa2.GetSeqName(uSeqIndex2)); |
|---|
| 507 | #endif |
|---|
| 508 | } |
|---|
| 509 | } |
|---|
| 510 | if (0 == uPairCount) |
|---|
| 511 | Quit("0 == uPairCount"); |
|---|
| 512 | |
|---|
| 513 | #if TRACE |
|---|
| 514 | Log("msa1=\n"); |
|---|
| 515 | msa1.LogMe(); |
|---|
| 516 | Log("msa2=\n"); |
|---|
| 517 | msa2.LogMe(); |
|---|
| 518 | Log("XP=%g\n", scoreTotal); |
|---|
| 519 | #endif |
|---|
| 520 | // return scoreTotal / uPairCount; |
|---|
| 521 | return scoreTotal; |
|---|
| 522 | } |
|---|