| 1 | #include "muscle.h" |
|---|
| 2 | #include "profile.h" |
|---|
| 3 | #include "pwpath.h" |
|---|
| 4 | #include "seq.h" |
|---|
| 5 | |
|---|
| 6 | extern SCOREMATRIX VTML_SP; |
|---|
| 7 | |
|---|
| 8 | // #define SUBST(i, j) Subst(seqA, seqB, i, j) |
|---|
| 9 | #define SUBST(i, j) MxRowA[i][seqB.GetLetter(j)] |
|---|
| 10 | |
|---|
| 11 | static SCORE Subst(const Seq &seqA, const Seq &seqB, unsigned i, unsigned j) |
|---|
| 12 | { |
|---|
| 13 | assert(i < seqA.Length()); |
|---|
| 14 | assert(j < seqB.Length()); |
|---|
| 15 | |
|---|
| 16 | unsigned uLetterA = seqA.GetLetter(i); |
|---|
| 17 | unsigned uLetterB = seqB.GetLetter(j); |
|---|
| 18 | return VTML_SP[uLetterA][uLetterB] + g_scoreCenter; |
|---|
| 19 | } |
|---|
| 20 | |
|---|
| 21 | struct DP_MEMORY |
|---|
| 22 | { |
|---|
| 23 | unsigned uLength; |
|---|
| 24 | SCORE *MPrev; |
|---|
| 25 | SCORE *MCurr; |
|---|
| 26 | SCORE *MWork; |
|---|
| 27 | SCORE *DPrev; |
|---|
| 28 | SCORE *DCurr; |
|---|
| 29 | SCORE *DWork; |
|---|
| 30 | SCORE **MxRowA; |
|---|
| 31 | unsigned *LettersB; |
|---|
| 32 | unsigned *uDeletePos; |
|---|
| 33 | int **TraceBack; |
|---|
| 34 | }; |
|---|
| 35 | |
|---|
| 36 | static struct DP_MEMORY DPM; |
|---|
| 37 | |
|---|
| 38 | static void AllocDPMem(unsigned uLengthA, unsigned uLengthB) |
|---|
| 39 | { |
|---|
| 40 | // Max prefix length |
|---|
| 41 | unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1; |
|---|
| 42 | if (uLength < DPM.uLength) |
|---|
| 43 | return; |
|---|
| 44 | |
|---|
| 45 | // Add 256 to allow for future expansion and |
|---|
| 46 | // round up to next multiple of 32. |
|---|
| 47 | uLength += 256; |
|---|
| 48 | uLength += 32 - uLength%32; |
|---|
| 49 | |
|---|
| 50 | const unsigned uOldLength = DPM.uLength; |
|---|
| 51 | if (uOldLength > 0) |
|---|
| 52 | { |
|---|
| 53 | for (unsigned i = 0; i < uOldLength; ++i) |
|---|
| 54 | delete[] DPM.TraceBack[i]; |
|---|
| 55 | |
|---|
| 56 | delete[] DPM.MPrev; |
|---|
| 57 | delete[] DPM.MCurr; |
|---|
| 58 | delete[] DPM.MWork; |
|---|
| 59 | delete[] DPM.DPrev; |
|---|
| 60 | delete[] DPM.DCurr; |
|---|
| 61 | delete[] DPM.DWork; |
|---|
| 62 | delete[] DPM.MxRowA; |
|---|
| 63 | delete[] DPM.LettersB; |
|---|
| 64 | delete[] DPM.uDeletePos; |
|---|
| 65 | delete[] DPM.TraceBack; |
|---|
| 66 | } |
|---|
| 67 | |
|---|
| 68 | DPM.uLength = uLength; |
|---|
| 69 | |
|---|
| 70 | DPM.MPrev = new SCORE[uLength]; |
|---|
| 71 | DPM.MCurr = new SCORE[uLength]; |
|---|
| 72 | DPM.MWork = new SCORE[uLength]; |
|---|
| 73 | |
|---|
| 74 | DPM.DPrev = new SCORE[uLength]; |
|---|
| 75 | DPM.DCurr = new SCORE[uLength]; |
|---|
| 76 | DPM.DWork = new SCORE[uLength]; |
|---|
| 77 | DPM.MxRowA = new SCORE *[uLength]; |
|---|
| 78 | DPM.LettersB = new unsigned[uLength]; |
|---|
| 79 | DPM.uDeletePos = new unsigned[uLength]; |
|---|
| 80 | |
|---|
| 81 | DPM.TraceBack = new int*[uLength]; |
|---|
| 82 | |
|---|
| 83 | for (unsigned i = 0; i < uLength; ++i) |
|---|
| 84 | DPM.TraceBack[i] = new int[uLength]; |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | static void RowFromSeq(const Seq &s, SCORE *Row[]) |
|---|
| 88 | { |
|---|
| 89 | const unsigned uLength = s.Length(); |
|---|
| 90 | for (unsigned i = 0; i < uLength; ++i) |
|---|
| 91 | { |
|---|
| 92 | char c = s.GetChar(i); |
|---|
| 93 | unsigned uLetter = CharToLetter(c); |
|---|
| 94 | if (uLetter < 20) |
|---|
| 95 | Row[i] = VTML_SP[uLetter]; |
|---|
| 96 | else |
|---|
| 97 | Row[i] = VTML_SP[AX_X]; |
|---|
| 98 | } |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | static void LettersFromSeq(const Seq &s, unsigned Letters[]) |
|---|
| 102 | { |
|---|
| 103 | const unsigned uLength = s.Length(); |
|---|
| 104 | for (unsigned i = 0; i < uLength; ++i) |
|---|
| 105 | { |
|---|
| 106 | char c = s.GetChar(i); |
|---|
| 107 | unsigned uLetter = CharToLetter(c); |
|---|
| 108 | if (uLetter < 20) |
|---|
| 109 | Letters[i] = uLetter; |
|---|
| 110 | else |
|---|
| 111 | Letters[i] = AX_X; |
|---|
| 112 | } |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | SCORE GlobalAlignSS(const Seq &seqA, const Seq &seqB, PWPath &Path) |
|---|
| 116 | { |
|---|
| 117 | const unsigned uLengthA = seqA.Length(); |
|---|
| 118 | const unsigned uLengthB = seqB.Length(); |
|---|
| 119 | const unsigned uPrefixCountA = uLengthA + 1; |
|---|
| 120 | const unsigned uPrefixCountB = uLengthB + 1; |
|---|
| 121 | |
|---|
| 122 | AllocDPMem(uLengthA, uLengthB); |
|---|
| 123 | |
|---|
| 124 | SCORE *MPrev = DPM.MPrev; |
|---|
| 125 | SCORE *MCurr = DPM.MCurr; |
|---|
| 126 | SCORE *MWork = DPM.MWork; |
|---|
| 127 | |
|---|
| 128 | SCORE *DPrev = DPM.DPrev; |
|---|
| 129 | SCORE *DCurr = DPM.DCurr; |
|---|
| 130 | SCORE *DWork = DPM.DWork; |
|---|
| 131 | SCORE **MxRowA = DPM.MxRowA; |
|---|
| 132 | unsigned *LettersB = DPM.LettersB; |
|---|
| 133 | |
|---|
| 134 | RowFromSeq(seqA, MxRowA); |
|---|
| 135 | LettersFromSeq(seqB, LettersB); |
|---|
| 136 | |
|---|
| 137 | unsigned *uDeletePos = DPM.uDeletePos; |
|---|
| 138 | |
|---|
| 139 | int **TraceBack = DPM.TraceBack; |
|---|
| 140 | |
|---|
| 141 | #if DEBUG |
|---|
| 142 | for (unsigned i = 0; i < uPrefixCountA; ++i) |
|---|
| 143 | memset(TraceBack[i], 0, uPrefixCountB*sizeof(int)); |
|---|
| 144 | #endif |
|---|
| 145 | |
|---|
| 146 | // Special case for i=0 |
|---|
| 147 | TraceBack[0][0] = 0; |
|---|
| 148 | MPrev[0] = MxRowA[0][LettersB[0]]; |
|---|
| 149 | |
|---|
| 150 | // D(0,0) is -infinity (requires I->D). |
|---|
| 151 | DPrev[0] = MINUS_INFINITY; |
|---|
| 152 | |
|---|
| 153 | for (unsigned j = 1; j < uLengthB; ++j) |
|---|
| 154 | { |
|---|
| 155 | unsigned uLetterB = LettersB[j]; |
|---|
| 156 | |
|---|
| 157 | // Only way to get M(0, j) looks like this: |
|---|
| 158 | // A ----X |
|---|
| 159 | // B XXXXX |
|---|
| 160 | // 0 j |
|---|
| 161 | // So gap-open at j=0, gap-close at j-1. |
|---|
| 162 | MPrev[j] = MxRowA[0][uLetterB] + g_scoreGapOpen/2; // term gaps half |
|---|
| 163 | TraceBack[0][j] = -(int) j; |
|---|
| 164 | |
|---|
| 165 | // Assume no D->I transitions, then can't be a delete if only |
|---|
| 166 | // one letter from A. |
|---|
| 167 | DPrev[j] = MINUS_INFINITY; |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | SCORE IPrev_j_1; |
|---|
| 171 | for (unsigned i = 1; i < uLengthA; ++i) |
|---|
| 172 | { |
|---|
| 173 | SCORE *ptrMCurr_j = MCurr; |
|---|
| 174 | memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE)); |
|---|
| 175 | |
|---|
| 176 | const SCORE *RowA = MxRowA[i]; |
|---|
| 177 | const SCORE *ptrRowA = MxRowA[i]; |
|---|
| 178 | const SCORE *ptrMCurrEnd = ptrMCurr_j + uLengthB; |
|---|
| 179 | unsigned *ptrLettersB = LettersB; |
|---|
| 180 | for (; ptrMCurr_j != ptrMCurrEnd; ++ptrMCurr_j) |
|---|
| 181 | { |
|---|
| 182 | *ptrMCurr_j = RowA[*ptrLettersB]; |
|---|
| 183 | ++ptrLettersB; |
|---|
| 184 | } |
|---|
| 185 | |
|---|
| 186 | unsigned *ptrDeletePos = uDeletePos; |
|---|
| 187 | |
|---|
| 188 | // Special case for j=0 |
|---|
| 189 | // Only way to get M(i, 0) looks like this: |
|---|
| 190 | // 0 i |
|---|
| 191 | // A XXXXX |
|---|
| 192 | // B ----X |
|---|
| 193 | // So gap-open at i=0, gap-close at i-1. |
|---|
| 194 | ptrMCurr_j = MCurr; |
|---|
| 195 | assert(ptrMCurr_j == &(MCurr[0])); |
|---|
| 196 | *ptrMCurr_j += g_scoreGapOpen/2; // term gaps half |
|---|
| 197 | |
|---|
| 198 | ++ptrMCurr_j; |
|---|
| 199 | |
|---|
| 200 | int *ptrTraceBack_ij = TraceBack[i]; |
|---|
| 201 | *ptrTraceBack_ij++ = (int) i; |
|---|
| 202 | |
|---|
| 203 | SCORE *ptrMPrev_j = MPrev; |
|---|
| 204 | SCORE *ptrDPrev = DPrev; |
|---|
| 205 | SCORE d = *ptrDPrev; |
|---|
| 206 | SCORE DNew = *ptrMPrev_j + g_scoreGapOpen; |
|---|
| 207 | if (DNew > d) |
|---|
| 208 | { |
|---|
| 209 | d = DNew; |
|---|
| 210 | *ptrDeletePos = i; |
|---|
| 211 | } |
|---|
| 212 | |
|---|
| 213 | SCORE *ptrDCurr = DCurr; |
|---|
| 214 | |
|---|
| 215 | assert(ptrDCurr == &(DCurr[0])); |
|---|
| 216 | *ptrDCurr = d; |
|---|
| 217 | |
|---|
| 218 | // Can't have an insert if no letters from B |
|---|
| 219 | IPrev_j_1 = MINUS_INFINITY; |
|---|
| 220 | |
|---|
| 221 | unsigned uInsertPos; |
|---|
| 222 | |
|---|
| 223 | for (unsigned j = 1; j < uLengthB; ++j) |
|---|
| 224 | { |
|---|
| 225 | // Here, MPrev_j is preserved from previous |
|---|
| 226 | // iteration so with current i,j is M[i-1][j-1] |
|---|
| 227 | SCORE MPrev_j = *ptrMPrev_j; |
|---|
| 228 | SCORE INew = MPrev_j + g_scoreGapOpen; |
|---|
| 229 | if (INew > IPrev_j_1) |
|---|
| 230 | { |
|---|
| 231 | IPrev_j_1 = INew; |
|---|
| 232 | uInsertPos = j; |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | SCORE scoreMax = MPrev_j; |
|---|
| 236 | |
|---|
| 237 | assert(ptrDPrev == &(DPrev[j-1])); |
|---|
| 238 | SCORE scoreD = *ptrDPrev++; |
|---|
| 239 | if (scoreD > scoreMax) |
|---|
| 240 | { |
|---|
| 241 | scoreMax = scoreD; |
|---|
| 242 | assert(ptrDeletePos == &(uDeletePos[j-1])); |
|---|
| 243 | *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos; |
|---|
| 244 | assert(*ptrTraceBack_ij > 0); |
|---|
| 245 | } |
|---|
| 246 | ++ptrDeletePos; |
|---|
| 247 | |
|---|
| 248 | SCORE scoreI = IPrev_j_1; |
|---|
| 249 | if (scoreI > scoreMax) |
|---|
| 250 | { |
|---|
| 251 | scoreMax = scoreI; |
|---|
| 252 | *ptrTraceBack_ij = (int) uInsertPos - (int) j; |
|---|
| 253 | assert(*ptrTraceBack_ij < 0); |
|---|
| 254 | } |
|---|
| 255 | |
|---|
| 256 | *ptrMCurr_j += scoreMax; |
|---|
| 257 | assert(ptrMCurr_j == &(MCurr[j])); |
|---|
| 258 | ++ptrMCurr_j; |
|---|
| 259 | |
|---|
| 260 | MPrev_j = *(++ptrMPrev_j); |
|---|
| 261 | assert(ptrDPrev == &(DPrev[j])); |
|---|
| 262 | SCORE d = *ptrDPrev; |
|---|
| 263 | SCORE DNew = MPrev_j + g_scoreGapOpen; |
|---|
| 264 | if (DNew > d) |
|---|
| 265 | { |
|---|
| 266 | d = DNew; |
|---|
| 267 | assert(ptrDeletePos == &uDeletePos[j]); |
|---|
| 268 | *ptrDeletePos = i; |
|---|
| 269 | } |
|---|
| 270 | assert(ptrDCurr + 1 == &(DCurr[j])); |
|---|
| 271 | *(++ptrDCurr) = d; |
|---|
| 272 | |
|---|
| 273 | ++ptrTraceBack_ij; |
|---|
| 274 | } |
|---|
| 275 | |
|---|
| 276 | Rotate(MPrev, MCurr, MWork); |
|---|
| 277 | Rotate(DPrev, DCurr, DWork); |
|---|
| 278 | } |
|---|
| 279 | |
|---|
| 280 | // Special case for i=uLengthA |
|---|
| 281 | SCORE IPrev = MINUS_INFINITY; |
|---|
| 282 | |
|---|
| 283 | unsigned uInsertPos; |
|---|
| 284 | |
|---|
| 285 | for (unsigned j = 1; j < uLengthB; ++j) |
|---|
| 286 | { |
|---|
| 287 | SCORE INew = MPrev[j-1]; |
|---|
| 288 | if (INew > IPrev) |
|---|
| 289 | { |
|---|
| 290 | uInsertPos = j; |
|---|
| 291 | IPrev = INew; |
|---|
| 292 | } |
|---|
| 293 | } |
|---|
| 294 | |
|---|
| 295 | // Special case for i=uLengthA, j=uLengthB |
|---|
| 296 | SCORE scoreMax = MPrev[uLengthB-1]; |
|---|
| 297 | int iTraceBack = 0; |
|---|
| 298 | |
|---|
| 299 | SCORE scoreD = DPrev[uLengthB-1] - g_scoreGapOpen/2; // term gaps half |
|---|
| 300 | if (scoreD > scoreMax) |
|---|
| 301 | { |
|---|
| 302 | scoreMax = scoreD; |
|---|
| 303 | iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1]; |
|---|
| 304 | } |
|---|
| 305 | |
|---|
| 306 | SCORE scoreI = IPrev - g_scoreGapOpen/2; |
|---|
| 307 | if (scoreI > scoreMax) |
|---|
| 308 | { |
|---|
| 309 | scoreMax = scoreI; |
|---|
| 310 | iTraceBack = (int) uInsertPos - (int) uLengthB; |
|---|
| 311 | } |
|---|
| 312 | |
|---|
| 313 | TraceBack[uLengthA][uLengthB] = iTraceBack; |
|---|
| 314 | |
|---|
| 315 | TraceBackToPath(TraceBack, uLengthA, uLengthB, Path); |
|---|
| 316 | |
|---|
| 317 | return scoreMax; |
|---|
| 318 | } |
|---|