| 1 | #include "muscle.h" |
|---|
| 2 | #include "msa.h" |
|---|
| 3 | #include "profile.h" |
|---|
| 4 | #include "objscore.h" |
|---|
| 5 | |
|---|
| 6 | #if DOUBLE_AFFINE |
|---|
| 7 | |
|---|
| 8 | #define TRACE 0 |
|---|
| 9 | #define TEST_SPFAST 0 |
|---|
| 10 | |
|---|
| 11 | static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e) |
|---|
| 12 | { |
|---|
| 13 | //if (Term) |
|---|
| 14 | // { |
|---|
| 15 | // switch (g_TermGap) |
|---|
| 16 | // { |
|---|
| 17 | // case TERMGAP_Full: |
|---|
| 18 | // return g + (uLength - 1)*e; |
|---|
| 19 | |
|---|
| 20 | // case TERMGAP_Half: |
|---|
| 21 | // return g/2 + (uLength - 1)*e; |
|---|
| 22 | |
|---|
| 23 | // case TERMGAP_Ext: |
|---|
| 24 | // return uLength*e; |
|---|
| 25 | // } |
|---|
| 26 | // Quit("Bad termgap"); |
|---|
| 27 | // } |
|---|
| 28 | //else |
|---|
| 29 | // return g + (uLength - 1)*e; |
|---|
| 30 | //return MINUS_INFINITY; |
|---|
| 31 | return g + (uLength - 1)*e; |
|---|
| 32 | } |
|---|
| 33 | |
|---|
| 34 | static SCORE GapPenalty(unsigned uLength, bool Term) |
|---|
| 35 | { |
|---|
| 36 | SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend); |
|---|
| 37 | #if DOUBLE_AFFINE |
|---|
| 38 | SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2); |
|---|
| 39 | if (s1 > s2) |
|---|
| 40 | return s1; |
|---|
| 41 | return s2; |
|---|
| 42 | #else |
|---|
| 43 | return s1; |
|---|
| 44 | #endif |
|---|
| 45 | } |
|---|
| 46 | |
|---|
| 47 | static const MSA *g_ptrMSA1; |
|---|
| 48 | static const MSA *g_ptrMSA2; |
|---|
| 49 | static unsigned g_uSeqIndex1; |
|---|
| 50 | static unsigned g_uSeqIndex2; |
|---|
| 51 | |
|---|
| 52 | static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength, |
|---|
| 53 | bool bNTerm, bool bCTerm) |
|---|
| 54 | { |
|---|
| 55 | Log("%16.16s ", ""); |
|---|
| 56 | for (unsigned i = 0; i < uStart; ++i) |
|---|
| 57 | Log(" "); |
|---|
| 58 | unsigned uMyLength = 0; |
|---|
| 59 | for (unsigned i = uStart; i <= uEnd; ++i) |
|---|
| 60 | { |
|---|
| 61 | bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i); |
|---|
| 62 | bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i); |
|---|
| 63 | if (!bGap1 && !bGap2) |
|---|
| 64 | Quit("Error -- neither gapping"); |
|---|
| 65 | if (bGap1 && bGap2) |
|---|
| 66 | Log("."); |
|---|
| 67 | else |
|---|
| 68 | { |
|---|
| 69 | ++uMyLength; |
|---|
| 70 | Log("-"); |
|---|
| 71 | } |
|---|
| 72 | } |
|---|
| 73 | SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm); |
|---|
| 74 | Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s); |
|---|
| 75 | Log("\n"); |
|---|
| 76 | if (uMyLength != uGapLength) |
|---|
| 77 | Quit("Lengths differ"); |
|---|
| 78 | |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1, |
|---|
| 82 | const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps) |
|---|
| 83 | { |
|---|
| 84 | g_ptrMSA1 = &msa1; |
|---|
| 85 | g_ptrMSA2 = &msa2; |
|---|
| 86 | g_uSeqIndex1 = uSeqIndex1; |
|---|
| 87 | g_uSeqIndex2 = uSeqIndex2; |
|---|
| 88 | |
|---|
| 89 | const unsigned uColCount = msa1.GetColCount(); |
|---|
| 90 | const unsigned uColCount2 = msa2.GetColCount(); |
|---|
| 91 | if (uColCount != uColCount2) |
|---|
| 92 | Quit("ScoreSeqPair, different lengths"); |
|---|
| 93 | |
|---|
| 94 | #if TRACE |
|---|
| 95 | Log("ScoreSeqPair\n"); |
|---|
| 96 | Log("%16.16s ", msa1.GetSeqName(uSeqIndex1)); |
|---|
| 97 | for (unsigned i = 0; i < uColCount; ++i) |
|---|
| 98 | Log("%c", msa1.GetChar(uSeqIndex1, i)); |
|---|
| 99 | Log("\n"); |
|---|
| 100 | Log("%16.16s ", msa2.GetSeqName(uSeqIndex2)); |
|---|
| 101 | for (unsigned i = 0; i < uColCount; ++i) |
|---|
| 102 | Log("%c", msa1.GetChar(uSeqIndex2, i)); |
|---|
| 103 | Log("\n"); |
|---|
| 104 | #endif |
|---|
| 105 | |
|---|
| 106 | SCORE scoreTotal = 0; |
|---|
| 107 | |
|---|
| 108 | // Substitution scores |
|---|
| 109 | unsigned uFirstLetter1 = uInsane; |
|---|
| 110 | unsigned uFirstLetter2 = uInsane; |
|---|
| 111 | unsigned uLastLetter1 = uInsane; |
|---|
| 112 | unsigned uLastLetter2 = uInsane; |
|---|
| 113 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 114 | { |
|---|
| 115 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
|---|
| 116 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
|---|
| 117 | bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex); |
|---|
| 118 | bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex); |
|---|
| 119 | |
|---|
| 120 | if (!bGap1) |
|---|
| 121 | { |
|---|
| 122 | if (uInsane == uFirstLetter1) |
|---|
| 123 | uFirstLetter1 = uColIndex; |
|---|
| 124 | uLastLetter1 = uColIndex; |
|---|
| 125 | } |
|---|
| 126 | if (!bGap2) |
|---|
| 127 | { |
|---|
| 128 | if (uInsane == uFirstLetter2) |
|---|
| 129 | uFirstLetter2 = uColIndex; |
|---|
| 130 | uLastLetter2 = uColIndex; |
|---|
| 131 | } |
|---|
| 132 | |
|---|
| 133 | if (bGap1 || bGap2 || bWildcard1 || bWildcard2) |
|---|
| 134 | continue; |
|---|
| 135 | |
|---|
| 136 | unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex); |
|---|
| 137 | unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex); |
|---|
| 138 | |
|---|
| 139 | SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2]; |
|---|
| 140 | scoreTotal += scoreMatch; |
|---|
| 141 | #if TRACE |
|---|
| 142 | Log("%c <-> %c = %7.1f %10.1f\n", |
|---|
| 143 | msa1.GetChar(uSeqIndex1, uColIndex), |
|---|
| 144 | msa2.GetChar(uSeqIndex2, uColIndex), |
|---|
| 145 | scoreMatch, |
|---|
| 146 | scoreTotal); |
|---|
| 147 | #endif |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | *ptrLetters = scoreTotal; |
|---|
| 151 | |
|---|
| 152 | // Gap penalties |
|---|
| 153 | unsigned uGapLength = uInsane; |
|---|
| 154 | unsigned uGapStartCol = uInsane; |
|---|
| 155 | bool bGapping1 = false; |
|---|
| 156 | bool bGapping2 = false; |
|---|
| 157 | |
|---|
| 158 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 159 | { |
|---|
| 160 | bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex); |
|---|
| 161 | bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex); |
|---|
| 162 | |
|---|
| 163 | if (bGap1 && bGap2) |
|---|
| 164 | continue; |
|---|
| 165 | |
|---|
| 166 | if (bGapping1) |
|---|
| 167 | { |
|---|
| 168 | if (bGap1) |
|---|
| 169 | ++uGapLength; |
|---|
| 170 | else |
|---|
| 171 | { |
|---|
| 172 | bGapping1 = false; |
|---|
| 173 | bool bNTerm = (uFirstLetter2 == uGapStartCol); |
|---|
| 174 | bool bCTerm = (uLastLetter2 + 1 == uColIndex); |
|---|
| 175 | SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm); |
|---|
| 176 | scoreTotal += scoreGap; |
|---|
| 177 | #if TRACE |
|---|
| 178 | LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm); |
|---|
| 179 | Log("GAP %7.1f %10.1f\n", |
|---|
| 180 | scoreGap, |
|---|
| 181 | scoreTotal); |
|---|
| 182 | #endif |
|---|
| 183 | } |
|---|
| 184 | continue; |
|---|
| 185 | } |
|---|
| 186 | else |
|---|
| 187 | { |
|---|
| 188 | if (bGap1) |
|---|
| 189 | { |
|---|
| 190 | uGapStartCol = uColIndex; |
|---|
| 191 | bGapping1 = true; |
|---|
| 192 | uGapLength = 1; |
|---|
| 193 | continue; |
|---|
| 194 | } |
|---|
| 195 | } |
|---|
| 196 | |
|---|
| 197 | if (bGapping2) |
|---|
| 198 | { |
|---|
| 199 | if (bGap2) |
|---|
| 200 | ++uGapLength; |
|---|
| 201 | else |
|---|
| 202 | { |
|---|
| 203 | bGapping2 = false; |
|---|
| 204 | bool bNTerm = (uFirstLetter1 == uGapStartCol); |
|---|
| 205 | bool bCTerm = (uLastLetter1 + 1 == uColIndex); |
|---|
| 206 | SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm); |
|---|
| 207 | scoreTotal += scoreGap; |
|---|
| 208 | #if TRACE |
|---|
| 209 | LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm); |
|---|
| 210 | Log("GAP %7.1f %10.1f\n", |
|---|
| 211 | scoreGap, |
|---|
| 212 | scoreTotal); |
|---|
| 213 | #endif |
|---|
| 214 | } |
|---|
| 215 | } |
|---|
| 216 | else |
|---|
| 217 | { |
|---|
| 218 | if (bGap2) |
|---|
| 219 | { |
|---|
| 220 | uGapStartCol = uColIndex; |
|---|
| 221 | bGapping2 = true; |
|---|
| 222 | uGapLength = 1; |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | } |
|---|
| 226 | |
|---|
| 227 | if (bGapping1 || bGapping2) |
|---|
| 228 | { |
|---|
| 229 | SCORE scoreGap = GapPenalty(uGapLength, true); |
|---|
| 230 | scoreTotal += scoreGap; |
|---|
| 231 | #if TRACE |
|---|
| 232 | LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true); |
|---|
| 233 | Log("GAP %7.1f %10.1f\n", |
|---|
| 234 | scoreGap, |
|---|
| 235 | scoreTotal); |
|---|
| 236 | #endif |
|---|
| 237 | } |
|---|
| 238 | *ptrGaps = scoreTotal - *ptrLetters; |
|---|
| 239 | return scoreTotal; |
|---|
| 240 | } |
|---|
| 241 | |
|---|
| 242 | // The usual sum-of-pairs objective score: sum the score |
|---|
| 243 | // of the alignment of each pair of sequences. |
|---|
| 244 | SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps) |
|---|
| 245 | { |
|---|
| 246 | const unsigned uSeqCount = msa.GetSeqCount(); |
|---|
| 247 | SCORE scoreTotal = 0; |
|---|
| 248 | unsigned uPairCount = 0; |
|---|
| 249 | #if TRACE |
|---|
| 250 | msa.LogMe(); |
|---|
| 251 | Log(" Score Weight Weight Total\n"); |
|---|
| 252 | Log("---------- ------ ------ ----------\n"); |
|---|
| 253 | #endif |
|---|
| 254 | SCORE TotalLetters = 0; |
|---|
| 255 | SCORE TotalGaps = 0; |
|---|
| 256 | for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1) |
|---|
| 257 | { |
|---|
| 258 | const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1); |
|---|
| 259 | for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2) |
|---|
| 260 | { |
|---|
| 261 | const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2); |
|---|
| 262 | const WEIGHT w = w1*w2; |
|---|
| 263 | SCORE Letters; |
|---|
| 264 | SCORE Gaps; |
|---|
| 265 | SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2, |
|---|
| 266 | &Letters, &Gaps); |
|---|
| 267 | scoreTotal += w1*w2*scorePair; |
|---|
| 268 | TotalLetters += w1*w2*Letters; |
|---|
| 269 | TotalGaps += w1*w2*Gaps; |
|---|
| 270 | ++uPairCount; |
|---|
| 271 | #if TRACE |
|---|
| 272 | Log("%10.2f %6.3f %6.3f %10.2f %d=%s %d=%s\n", |
|---|
| 273 | scorePair, |
|---|
| 274 | w1, |
|---|
| 275 | w2, |
|---|
| 276 | scorePair*w1*w2, |
|---|
| 277 | uSeqIndex1, |
|---|
| 278 | msa.GetSeqName(uSeqIndex1), |
|---|
| 279 | uSeqIndex2, |
|---|
| 280 | msa.GetSeqName(uSeqIndex2)); |
|---|
| 281 | #endif |
|---|
| 282 | } |
|---|
| 283 | } |
|---|
| 284 | *ptrLetters = TotalLetters; |
|---|
| 285 | *ptrGaps = TotalGaps; |
|---|
| 286 | return scoreTotal; |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | #endif // DOUBLE_AFFINE |
|---|