| 1 | #include "muscle.h" |
|---|
| 2 | #include <math.h> |
|---|
| 3 | #include "pwpath.h" |
|---|
| 4 | #include "profile.h" |
|---|
| 5 | #include <stdio.h> |
|---|
| 6 | |
|---|
| 7 | #define TRACE 0 |
|---|
| 8 | |
|---|
| 9 | #if 1 // SINGLE_AFFINE |
|---|
| 10 | |
|---|
| 11 | extern bool g_bKeepSimpleDP; |
|---|
| 12 | extern SCORE *g_DPM; |
|---|
| 13 | extern SCORE *g_DPD; |
|---|
| 14 | extern SCORE *g_DPI; |
|---|
| 15 | extern char *g_TBM; |
|---|
| 16 | extern char *g_TBD; |
|---|
| 17 | extern char *g_TBI; |
|---|
| 18 | |
|---|
| 19 | static const char *LocalScoreToStr(SCORE s) |
|---|
| 20 | { |
|---|
| 21 | static char str[16]; |
|---|
| 22 | if (s < -100000) |
|---|
| 23 | return " *"; |
|---|
| 24 | sprintf(str, "%6.1f", s); |
|---|
| 25 | return str; |
|---|
| 26 | } |
|---|
| 27 | |
|---|
| 28 | static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB, |
|---|
| 29 | unsigned uPrefixCountA, unsigned uPrefixCountB) |
|---|
| 30 | { |
|---|
| 31 | Log(" "); |
|---|
| 32 | for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) |
|---|
| 33 | { |
|---|
| 34 | char c = ' '; |
|---|
| 35 | if (uPrefixLengthB > 0) |
|---|
| 36 | c = ConsensusChar(PB[uPrefixLengthB - 1]); |
|---|
| 37 | Log(" %4u:%c", uPrefixLengthB, c); |
|---|
| 38 | } |
|---|
| 39 | Log("\n"); |
|---|
| 40 | for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) |
|---|
| 41 | { |
|---|
| 42 | char c = ' '; |
|---|
| 43 | if (uPrefixLengthA > 0) |
|---|
| 44 | c = ConsensusChar(PA[uPrefixLengthA - 1]); |
|---|
| 45 | Log("%4u:%c ", uPrefixLengthA, c); |
|---|
| 46 | for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) |
|---|
| 47 | Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB)); |
|---|
| 48 | Log("\n"); |
|---|
| 49 | } |
|---|
| 50 | } |
|---|
| 51 | |
|---|
| 52 | static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB, |
|---|
| 53 | unsigned uPrefixCountA, unsigned uPrefixCountB) |
|---|
| 54 | { |
|---|
| 55 | Log(" "); |
|---|
| 56 | for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) |
|---|
| 57 | { |
|---|
| 58 | char c = ' '; |
|---|
| 59 | if (uPrefixLengthB > 0) |
|---|
| 60 | c = ConsensusChar(PB[uPrefixLengthB - 1]); |
|---|
| 61 | Log(" %4u:%c", uPrefixLengthB, c); |
|---|
| 62 | } |
|---|
| 63 | Log("\n"); |
|---|
| 64 | for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) |
|---|
| 65 | { |
|---|
| 66 | char c = ' '; |
|---|
| 67 | if (uPrefixLengthA > 0) |
|---|
| 68 | c = ConsensusChar(PA[uPrefixLengthA - 1]); |
|---|
| 69 | Log("%4u:%c ", uPrefixLengthA, c); |
|---|
| 70 | for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) |
|---|
| 71 | Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB))); |
|---|
| 72 | Log("\n"); |
|---|
| 73 | } |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | SCORE GlobalAlignSimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB, |
|---|
| 77 | unsigned uLengthB, PWPath &Path) |
|---|
| 78 | { |
|---|
| 79 | assert(uLengthB > 0 && uLengthA > 0); |
|---|
| 80 | |
|---|
| 81 | SetTermGaps(PA, uLengthA); |
|---|
| 82 | SetTermGaps(PB, uLengthB); |
|---|
| 83 | |
|---|
| 84 | const unsigned uPrefixCountA = uLengthA + 1; |
|---|
| 85 | const unsigned uPrefixCountB = uLengthB + 1; |
|---|
| 86 | |
|---|
| 87 | // Allocate DP matrices |
|---|
| 88 | const size_t LM = uPrefixCountA*uPrefixCountB; |
|---|
| 89 | SCORE *DPL_ = new SCORE[LM]; |
|---|
| 90 | SCORE *DPM_ = new SCORE[LM]; |
|---|
| 91 | SCORE *DPD_ = new SCORE[LM]; |
|---|
| 92 | SCORE *DPI_ = new SCORE[LM]; |
|---|
| 93 | |
|---|
| 94 | char *TBM_ = new char[LM]; |
|---|
| 95 | char *TBD_ = new char[LM]; |
|---|
| 96 | char *TBI_ = new char[LM]; |
|---|
| 97 | |
|---|
| 98 | memset(TBM_, '?', LM); |
|---|
| 99 | memset(TBD_, '?', LM); |
|---|
| 100 | memset(TBI_, '?', LM); |
|---|
| 101 | |
|---|
| 102 | DPM(0, 0) = 0; |
|---|
| 103 | DPD(0, 0) = MINUS_INFINITY; |
|---|
| 104 | DPI(0, 0) = MINUS_INFINITY; |
|---|
| 105 | |
|---|
| 106 | DPM(1, 0) = MINUS_INFINITY; |
|---|
| 107 | DPD(1, 0) = PA[0].m_scoreGapOpen; |
|---|
| 108 | TBD(1, 0) = 'D'; |
|---|
| 109 | DPI(1, 0) = MINUS_INFINITY; |
|---|
| 110 | |
|---|
| 111 | DPM(0, 1) = MINUS_INFINITY; |
|---|
| 112 | DPD(0, 1) = MINUS_INFINITY; |
|---|
| 113 | DPI(0, 1) = PB[0].m_scoreGapOpen; |
|---|
| 114 | TBI(0, 1) = 'I'; |
|---|
| 115 | |
|---|
| 116 | // Empty prefix of B is special case |
|---|
| 117 | for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) |
|---|
| 118 | { |
|---|
| 119 | // M=LetterA+LetterB, impossible with empty prefix |
|---|
| 120 | DPM(uPrefixLengthA, 0) = MINUS_INFINITY; |
|---|
| 121 | |
|---|
| 122 | // D=LetterA+GapB |
|---|
| 123 | DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend; |
|---|
| 124 | TBD(uPrefixLengthA, 0) = 'D'; |
|---|
| 125 | |
|---|
| 126 | // I=GapA+LetterB, impossible with empty prefix |
|---|
| 127 | DPI(uPrefixLengthA, 0) = MINUS_INFINITY; |
|---|
| 128 | } |
|---|
| 129 | |
|---|
| 130 | // Empty prefix of A is special case |
|---|
| 131 | for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) |
|---|
| 132 | { |
|---|
| 133 | // M=LetterA+LetterB, impossible with empty prefix |
|---|
| 134 | DPM(0, uPrefixLengthB) = MINUS_INFINITY; |
|---|
| 135 | |
|---|
| 136 | // D=LetterA+GapB, impossible with empty prefix |
|---|
| 137 | DPD(0, uPrefixLengthB) = MINUS_INFINITY; |
|---|
| 138 | |
|---|
| 139 | // I=GapA+LetterB |
|---|
| 140 | DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend; |
|---|
| 141 | TBI(0, uPrefixLengthB) = 'I'; |
|---|
| 142 | } |
|---|
| 143 | |
|---|
| 144 | // Special case to agree with NWFast, no D-I transitions so... |
|---|
| 145 | DPD(uLengthA, 0) = MINUS_INFINITY; |
|---|
| 146 | // DPI(0, uLengthB) = MINUS_INFINITY; |
|---|
| 147 | |
|---|
| 148 | // ============ |
|---|
| 149 | // Main DP loop |
|---|
| 150 | // ============ |
|---|
| 151 | SCORE scoreGapCloseB = MINUS_INFINITY; |
|---|
| 152 | for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB) |
|---|
| 153 | { |
|---|
| 154 | const ProfPos &PPB = PB[uPrefixLengthB - 1]; |
|---|
| 155 | |
|---|
| 156 | SCORE scoreGapCloseA = MINUS_INFINITY; |
|---|
| 157 | for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA) |
|---|
| 158 | { |
|---|
| 159 | const ProfPos &PPA = PA[uPrefixLengthA - 1]; |
|---|
| 160 | |
|---|
| 161 | { |
|---|
| 162 | // Match M=LetterA+LetterB |
|---|
| 163 | SCORE scoreLL = ScoreProfPos2(PPA, PPB); |
|---|
| 164 | DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL; |
|---|
| 165 | |
|---|
| 166 | SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1); |
|---|
| 167 | SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA; |
|---|
| 168 | SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB; |
|---|
| 169 | |
|---|
| 170 | SCORE scoreBest; |
|---|
| 171 | if (scoreMM >= scoreDM && scoreMM >= scoreIM) |
|---|
| 172 | { |
|---|
| 173 | scoreBest = scoreMM; |
|---|
| 174 | TBM(uPrefixLengthA, uPrefixLengthB) = 'M'; |
|---|
| 175 | } |
|---|
| 176 | else if (scoreDM >= scoreMM && scoreDM >= scoreIM) |
|---|
| 177 | { |
|---|
| 178 | scoreBest = scoreDM; |
|---|
| 179 | TBM(uPrefixLengthA, uPrefixLengthB) = 'D'; |
|---|
| 180 | } |
|---|
| 181 | else |
|---|
| 182 | { |
|---|
| 183 | assert(scoreIM >= scoreMM && scoreIM >= scoreDM); |
|---|
| 184 | scoreBest = scoreIM; |
|---|
| 185 | TBM(uPrefixLengthA, uPrefixLengthB) = 'I'; |
|---|
| 186 | } |
|---|
| 187 | DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL; |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | { |
|---|
| 191 | // Delete D=LetterA+GapB |
|---|
| 192 | SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + |
|---|
| 193 | PA[uPrefixLengthA-1].m_scoreGapOpen; |
|---|
| 194 | SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend; |
|---|
| 195 | |
|---|
| 196 | SCORE scoreBest; |
|---|
| 197 | if (scoreMD >= scoreDD) |
|---|
| 198 | { |
|---|
| 199 | scoreBest = scoreMD; |
|---|
| 200 | TBD(uPrefixLengthA, uPrefixLengthB) = 'M'; |
|---|
| 201 | } |
|---|
| 202 | else |
|---|
| 203 | { |
|---|
| 204 | assert(scoreDD >= scoreMD); |
|---|
| 205 | scoreBest = scoreDD; |
|---|
| 206 | TBD(uPrefixLengthA, uPrefixLengthB) = 'D'; |
|---|
| 207 | } |
|---|
| 208 | DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest; |
|---|
| 209 | } |
|---|
| 210 | |
|---|
| 211 | // Insert I=GapA+LetterB |
|---|
| 212 | { |
|---|
| 213 | SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + |
|---|
| 214 | PB[uPrefixLengthB - 1].m_scoreGapOpen; |
|---|
| 215 | SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend; |
|---|
| 216 | |
|---|
| 217 | SCORE scoreBest; |
|---|
| 218 | if (scoreMI >= scoreII) |
|---|
| 219 | { |
|---|
| 220 | scoreBest = scoreMI; |
|---|
| 221 | TBI(uPrefixLengthA, uPrefixLengthB) = 'M'; |
|---|
| 222 | } |
|---|
| 223 | else |
|---|
| 224 | { |
|---|
| 225 | assert(scoreII > scoreMI); |
|---|
| 226 | scoreBest = scoreII; |
|---|
| 227 | TBI(uPrefixLengthA, uPrefixLengthB) = 'I'; |
|---|
| 228 | } |
|---|
| 229 | DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest; |
|---|
| 230 | } |
|---|
| 231 | |
|---|
| 232 | scoreGapCloseA = PPA.m_scoreGapClose; |
|---|
| 233 | } |
|---|
| 234 | scoreGapCloseB = PPB.m_scoreGapClose; |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | #if TRACE |
|---|
| 238 | Log("\n"); |
|---|
| 239 | Log("Simple DPL:\n"); |
|---|
| 240 | ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 241 | Log("\n"); |
|---|
| 242 | Log("Simple DPM:\n"); |
|---|
| 243 | ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 244 | Log("\n"); |
|---|
| 245 | Log("Simple DPD:\n"); |
|---|
| 246 | ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 247 | Log("\n"); |
|---|
| 248 | Log("Simple DPI:\n"); |
|---|
| 249 | ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 250 | Log("\n"); |
|---|
| 251 | Log("Simple TBM:\n"); |
|---|
| 252 | ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 253 | Log("\n"); |
|---|
| 254 | Log("Simple TBD:\n"); |
|---|
| 255 | ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 256 | Log("\n"); |
|---|
| 257 | Log("Simple TBI:\n"); |
|---|
| 258 | ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB); |
|---|
| 259 | #endif |
|---|
| 260 | |
|---|
| 261 | // Trace-back |
|---|
| 262 | // ========== |
|---|
| 263 | Path.Clear(); |
|---|
| 264 | |
|---|
| 265 | // Find last edge |
|---|
| 266 | SCORE M = DPM(uLengthA, uLengthB); |
|---|
| 267 | SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose; |
|---|
| 268 | SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose; |
|---|
| 269 | char cEdgeType = '?'; |
|---|
| 270 | |
|---|
| 271 | SCORE BestScore = MINUS_INFINITY; |
|---|
| 272 | if (M >= D && M >= I) |
|---|
| 273 | { |
|---|
| 274 | cEdgeType = 'M'; |
|---|
| 275 | BestScore = M; |
|---|
| 276 | } |
|---|
| 277 | else if (D >= M && D >= I) |
|---|
| 278 | { |
|---|
| 279 | cEdgeType = 'D'; |
|---|
| 280 | BestScore = D; |
|---|
| 281 | } |
|---|
| 282 | else |
|---|
| 283 | { |
|---|
| 284 | assert(I >= M && I >= D); |
|---|
| 285 | cEdgeType = 'I'; |
|---|
| 286 | BestScore = I; |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | #if TRACE |
|---|
| 290 | Log("Simple: MAB=%.4g DAB=%.4g IAB=%.4g best=%c\n", M, D, I, cEdgeType); |
|---|
| 291 | #endif |
|---|
| 292 | |
|---|
| 293 | unsigned PLA = uLengthA; |
|---|
| 294 | unsigned PLB = uLengthB; |
|---|
| 295 | for (;;) |
|---|
| 296 | { |
|---|
| 297 | PWEdge Edge; |
|---|
| 298 | Edge.cType = cEdgeType; |
|---|
| 299 | Edge.uPrefixLengthA = PLA; |
|---|
| 300 | Edge.uPrefixLengthB = PLB; |
|---|
| 301 | #if TRACE |
|---|
| 302 | Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB); |
|---|
| 303 | #endif |
|---|
| 304 | Path.PrependEdge(Edge); |
|---|
| 305 | |
|---|
| 306 | switch (cEdgeType) |
|---|
| 307 | { |
|---|
| 308 | case 'M': |
|---|
| 309 | assert(PLA > 0); |
|---|
| 310 | assert(PLB > 0); |
|---|
| 311 | cEdgeType = TBM(PLA, PLB); |
|---|
| 312 | --PLA; |
|---|
| 313 | --PLB; |
|---|
| 314 | break; |
|---|
| 315 | |
|---|
| 316 | case 'D': |
|---|
| 317 | assert(PLA > 0); |
|---|
| 318 | cEdgeType = TBD(PLA, PLB); |
|---|
| 319 | --PLA; |
|---|
| 320 | break; |
|---|
| 321 | |
|---|
| 322 | case 'I': |
|---|
| 323 | assert(PLB > 0); |
|---|
| 324 | cEdgeType = TBI(PLA, PLB); |
|---|
| 325 | --PLB; |
|---|
| 326 | break; |
|---|
| 327 | |
|---|
| 328 | default: |
|---|
| 329 | Quit("Invalid edge %c", cEdgeType); |
|---|
| 330 | } |
|---|
| 331 | if (0 == PLA && 0 == PLB) |
|---|
| 332 | break; |
|---|
| 333 | } |
|---|
| 334 | Path.Validate(); |
|---|
| 335 | |
|---|
| 336 | // SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path); |
|---|
| 337 | |
|---|
| 338 | #if TRACE |
|---|
| 339 | SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path); |
|---|
| 340 | Path.LogMe(); |
|---|
| 341 | Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath)); |
|---|
| 342 | #endif |
|---|
| 343 | |
|---|
| 344 | if (g_bKeepSimpleDP) |
|---|
| 345 | { |
|---|
| 346 | g_DPM = DPM_; |
|---|
| 347 | g_DPD = DPD_; |
|---|
| 348 | g_DPI = DPI_; |
|---|
| 349 | |
|---|
| 350 | g_TBM = TBM_; |
|---|
| 351 | g_TBD = TBD_; |
|---|
| 352 | g_TBI = TBI_; |
|---|
| 353 | } |
|---|
| 354 | else |
|---|
| 355 | { |
|---|
| 356 | delete[] DPM_; |
|---|
| 357 | delete[] DPD_; |
|---|
| 358 | delete[] DPI_; |
|---|
| 359 | |
|---|
| 360 | delete[] TBM_; |
|---|
| 361 | delete[] TBD_; |
|---|
| 362 | delete[] TBI_; |
|---|
| 363 | } |
|---|
| 364 | |
|---|
| 365 | return BestScore; |
|---|
| 366 | } |
|---|
| 367 | |
|---|
| 368 | #endif // SINLGLE_AFFINE |
|---|