| 1 | #include "muscle.h" |
|---|
| 2 | #include "pwpath.h" |
|---|
| 3 | #include "seq.h" |
|---|
| 4 | #include "textfile.h" |
|---|
| 5 | #include "msa.h" |
|---|
| 6 | |
|---|
| 7 | PWPath::PWPath() |
|---|
| 8 | { |
|---|
| 9 | m_uArraySize = 0; |
|---|
| 10 | m_uEdgeCount = 0; |
|---|
| 11 | m_Edges = 0; |
|---|
| 12 | } |
|---|
| 13 | |
|---|
| 14 | PWPath::~PWPath() |
|---|
| 15 | { |
|---|
| 16 | Clear(); |
|---|
| 17 | } |
|---|
| 18 | |
|---|
| 19 | void PWPath::Clear() |
|---|
| 20 | { |
|---|
| 21 | delete[] m_Edges; |
|---|
| 22 | m_Edges = 0; |
|---|
| 23 | m_uArraySize = 0; |
|---|
| 24 | m_uEdgeCount = 0; |
|---|
| 25 | } |
|---|
| 26 | |
|---|
| 27 | void PWPath::ExpandPath(unsigned uAdditionalEdgeCount) |
|---|
| 28 | { |
|---|
| 29 | PWEdge *OldPath = m_Edges; |
|---|
| 30 | unsigned uEdgeCount = m_uArraySize + uAdditionalEdgeCount; |
|---|
| 31 | |
|---|
| 32 | m_Edges = new PWEdge[uEdgeCount]; |
|---|
| 33 | m_uArraySize = uEdgeCount; |
|---|
| 34 | if (m_uEdgeCount > 0) |
|---|
| 35 | memcpy(m_Edges, OldPath, m_uEdgeCount*sizeof(PWEdge)); |
|---|
| 36 | delete[] OldPath; |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | void PWPath::AppendEdge(const PWEdge &Edge) |
|---|
| 40 | { |
|---|
| 41 | if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize) |
|---|
| 42 | ExpandPath(200); |
|---|
| 43 | |
|---|
| 44 | m_Edges[m_uEdgeCount] = Edge; |
|---|
| 45 | ++m_uEdgeCount; |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | void PWPath::AppendEdge(char cType, unsigned uPrefixLengthA, unsigned uPrefixLengthB) |
|---|
| 49 | { |
|---|
| 50 | PWEdge e; |
|---|
| 51 | e.uPrefixLengthA = uPrefixLengthA; |
|---|
| 52 | e.uPrefixLengthB = uPrefixLengthB; |
|---|
| 53 | e.cType = cType; |
|---|
| 54 | AppendEdge(e); |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | void PWPath::PrependEdge(const PWEdge &Edge) |
|---|
| 58 | { |
|---|
| 59 | if (0 == m_uArraySize || m_uEdgeCount + 1 == m_uArraySize) |
|---|
| 60 | ExpandPath(1000); |
|---|
| 61 | if (m_uEdgeCount > 0) |
|---|
| 62 | memmove(m_Edges + 1, m_Edges, sizeof(PWEdge)*m_uEdgeCount); |
|---|
| 63 | m_Edges[0] = Edge; |
|---|
| 64 | ++m_uEdgeCount; |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | const PWEdge &PWPath::GetEdge(unsigned uEdgeIndex) const |
|---|
| 68 | { |
|---|
| 69 | assert(uEdgeIndex < m_uEdgeCount); |
|---|
| 70 | return m_Edges[uEdgeIndex]; |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | void PWPath::Validate() const |
|---|
| 74 | { |
|---|
| 75 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 76 | if (0 == uEdgeCount) |
|---|
| 77 | return; |
|---|
| 78 | const PWEdge &FirstEdge = GetEdge(0); |
|---|
| 79 | const PWEdge &LastEdge = GetEdge(uEdgeCount - 1); |
|---|
| 80 | unsigned uStartA = FirstEdge.uPrefixLengthA; |
|---|
| 81 | unsigned uStartB = FirstEdge.uPrefixLengthB; |
|---|
| 82 | if (FirstEdge.cType != 'I') |
|---|
| 83 | --uStartA; |
|---|
| 84 | if (FirstEdge.cType != 'D') |
|---|
| 85 | --uStartB; |
|---|
| 86 | |
|---|
| 87 | unsigned uPrefixLengthA = FirstEdge.uPrefixLengthA; |
|---|
| 88 | unsigned uPrefixLengthB = FirstEdge.uPrefixLengthB; |
|---|
| 89 | for (unsigned uEdgeIndex = 1; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 90 | { |
|---|
| 91 | const PWEdge &Edge = GetEdge(uEdgeIndex); |
|---|
| 92 | switch (Edge.cType) |
|---|
| 93 | { |
|---|
| 94 | case 'M': |
|---|
| 95 | if (uPrefixLengthA + 1 != Edge.uPrefixLengthA) |
|---|
| 96 | Quit("PWPath::Validate MA %u", uPrefixLengthA); |
|---|
| 97 | if (uPrefixLengthB + 1 != Edge.uPrefixLengthB) |
|---|
| 98 | Quit("PWPath::Validate MB %u", uPrefixLengthB); |
|---|
| 99 | ++uPrefixLengthA; |
|---|
| 100 | ++uPrefixLengthB; |
|---|
| 101 | break; |
|---|
| 102 | case 'D': |
|---|
| 103 | if (uPrefixLengthA + 1 != Edge.uPrefixLengthA) |
|---|
| 104 | Quit("PWPath::Validate DA %u", uPrefixLengthA); |
|---|
| 105 | if (uPrefixLengthB != Edge.uPrefixLengthB) |
|---|
| 106 | Quit("PWPath::Validate DB %u", uPrefixLengthB); |
|---|
| 107 | ++uPrefixLengthA; |
|---|
| 108 | break; |
|---|
| 109 | case 'I': |
|---|
| 110 | if (uPrefixLengthA != Edge.uPrefixLengthA) |
|---|
| 111 | Quit("PWPath::Validate IA %u", uPrefixLengthA); |
|---|
| 112 | if (uPrefixLengthB + 1 != Edge.uPrefixLengthB) |
|---|
| 113 | Quit("PWPath::Validate IB %u", uPrefixLengthB); |
|---|
| 114 | ++uPrefixLengthB; |
|---|
| 115 | break; |
|---|
| 116 | } |
|---|
| 117 | } |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | void PWPath::LogMe() const |
|---|
| 121 | { |
|---|
| 122 | for (unsigned uEdgeIndex = 0; uEdgeIndex < GetEdgeCount(); ++uEdgeIndex) |
|---|
| 123 | { |
|---|
| 124 | const PWEdge &Edge = GetEdge(uEdgeIndex); |
|---|
| 125 | if (uEdgeIndex > 0) |
|---|
| 126 | Log(" "); |
|---|
| 127 | Log("%c%d.%d", |
|---|
| 128 | Edge.cType, |
|---|
| 129 | Edge.uPrefixLengthA, |
|---|
| 130 | Edge.uPrefixLengthB); |
|---|
| 131 | if ((uEdgeIndex > 0 && uEdgeIndex%10 == 0) || |
|---|
| 132 | uEdgeIndex == GetEdgeCount() - 1) |
|---|
| 133 | Log("\n"); |
|---|
| 134 | } |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | void PWPath::Copy(const PWPath &Path) |
|---|
| 138 | { |
|---|
| 139 | Clear(); |
|---|
| 140 | const unsigned uEdgeCount = Path.GetEdgeCount(); |
|---|
| 141 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 142 | { |
|---|
| 143 | const PWEdge &Edge = Path.GetEdge(uEdgeIndex); |
|---|
| 144 | AppendEdge(Edge); |
|---|
| 145 | } |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | void PWPath::FromMSAPair(const MSA &msaA, const MSA &msaB) |
|---|
| 149 | { |
|---|
| 150 | const unsigned uColCount = msaA.GetColCount(); |
|---|
| 151 | if (uColCount != msaB.GetColCount()) |
|---|
| 152 | Quit("PWPath::FromMSAPair, lengths differ"); |
|---|
| 153 | |
|---|
| 154 | Clear(); |
|---|
| 155 | |
|---|
| 156 | unsigned uPrefixLengthA = 0; |
|---|
| 157 | unsigned uPrefixLengthB = 0; |
|---|
| 158 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
|---|
| 159 | { |
|---|
| 160 | bool bIsGapA = msaA.IsGapColumn(uColIndex); |
|---|
| 161 | bool bIsGapB = msaB.IsGapColumn(uColIndex); |
|---|
| 162 | |
|---|
| 163 | PWEdge Edge; |
|---|
| 164 | char cType; |
|---|
| 165 | if (!bIsGapA && !bIsGapB) |
|---|
| 166 | { |
|---|
| 167 | cType = 'M'; |
|---|
| 168 | ++uPrefixLengthA; |
|---|
| 169 | ++uPrefixLengthB; |
|---|
| 170 | } |
|---|
| 171 | else if (bIsGapA && !bIsGapB) |
|---|
| 172 | { |
|---|
| 173 | cType = 'I'; |
|---|
| 174 | ++uPrefixLengthB; |
|---|
| 175 | } |
|---|
| 176 | else if (!bIsGapA && bIsGapB) |
|---|
| 177 | { |
|---|
| 178 | cType = 'D'; |
|---|
| 179 | ++uPrefixLengthA; |
|---|
| 180 | } |
|---|
| 181 | else |
|---|
| 182 | { |
|---|
| 183 | assert(bIsGapB && bIsGapA); |
|---|
| 184 | continue; |
|---|
| 185 | } |
|---|
| 186 | |
|---|
| 187 | Edge.cType = cType; |
|---|
| 188 | Edge.uPrefixLengthA = uPrefixLengthA; |
|---|
| 189 | Edge.uPrefixLengthB = uPrefixLengthB; |
|---|
| 190 | AppendEdge(Edge); |
|---|
| 191 | } |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | // Very similar to HMMPath::FromFile, should consolidate. |
|---|
| 195 | void PWPath::FromFile(TextFile &File) |
|---|
| 196 | { |
|---|
| 197 | Clear(); |
|---|
| 198 | char szToken[1024]; |
|---|
| 199 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 200 | if (0 != strcmp(szToken, "Path")) |
|---|
| 201 | Quit("Invalid path file (Path)"); |
|---|
| 202 | |
|---|
| 203 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 204 | if (0 != strcmp(szToken, "edges")) |
|---|
| 205 | Quit("Invalid path file (edges)"); |
|---|
| 206 | |
|---|
| 207 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 208 | if (!IsValidInteger(szToken)) |
|---|
| 209 | Quit("Invalid path file (edges value)"); |
|---|
| 210 | |
|---|
| 211 | const unsigned uEdgeCount = (unsigned) atoi(szToken); |
|---|
| 212 | unsigned uEdgeIndex = 0; |
|---|
| 213 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 214 | { |
|---|
| 215 | // index |
|---|
| 216 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 217 | if (!IsValidInteger(szToken)) |
|---|
| 218 | Quit("Invalid path file, invalid index '%s'", szToken); |
|---|
| 219 | unsigned n = (unsigned) atoi(szToken); |
|---|
| 220 | if (n != uEdgeIndex) |
|---|
| 221 | Quit("Invalid path file, expecting edge %u got %u", uEdgeIndex, n); |
|---|
| 222 | |
|---|
| 223 | // type |
|---|
| 224 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 225 | if (1 != strlen(szToken)) |
|---|
| 226 | Quit("Invalid path file, expecting state, got '%s'", szToken); |
|---|
| 227 | const char cType = szToken[0]; |
|---|
| 228 | if ('M' != cType && 'D' != cType && cType != 'I' && 'S' != cType) |
|---|
| 229 | Quit("Invalid path file, expecting state, got '%c'", cType); |
|---|
| 230 | |
|---|
| 231 | // prefix length A |
|---|
| 232 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 233 | if (!IsValidInteger(szToken)) |
|---|
| 234 | Quit("Invalid path file, bad prefix length A '%s'", szToken); |
|---|
| 235 | const unsigned uPrefixLengthA = (unsigned) atoi(szToken); |
|---|
| 236 | |
|---|
| 237 | // prefix length B |
|---|
| 238 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 239 | if (!IsValidInteger(szToken)) |
|---|
| 240 | Quit("Invalid path file, bad prefix length B '%s'", szToken); |
|---|
| 241 | const unsigned uPrefixLengthB = (unsigned) atoi(szToken); |
|---|
| 242 | |
|---|
| 243 | PWEdge Edge; |
|---|
| 244 | Edge.cType = cType; |
|---|
| 245 | Edge.uPrefixLengthA = uPrefixLengthA; |
|---|
| 246 | Edge.uPrefixLengthB = uPrefixLengthB; |
|---|
| 247 | AppendEdge(Edge); |
|---|
| 248 | } |
|---|
| 249 | File.GetTokenX(szToken, sizeof(szToken)); |
|---|
| 250 | if (0 != strcmp(szToken, "//")) |
|---|
| 251 | Quit("Invalid path file (//)"); |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | void PWPath::ToFile(TextFile &File) const |
|---|
| 255 | { |
|---|
| 256 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 257 | |
|---|
| 258 | File.PutString("Path\n"); |
|---|
| 259 | File.PutFormat("edges %u\n", uEdgeCount); |
|---|
| 260 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 261 | { |
|---|
| 262 | const PWEdge &Edge = GetEdge(uEdgeIndex); |
|---|
| 263 | File.PutFormat("%u %c %u %u\n", |
|---|
| 264 | uEdgeIndex, |
|---|
| 265 | Edge.cType, |
|---|
| 266 | Edge.uPrefixLengthA, |
|---|
| 267 | Edge.uPrefixLengthB); |
|---|
| 268 | } |
|---|
| 269 | File.PutString("//\n"); |
|---|
| 270 | } |
|---|
| 271 | |
|---|
| 272 | void PWPath::AssertEqual(const PWPath &Path) const |
|---|
| 273 | { |
|---|
| 274 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 275 | if (uEdgeCount != Path.GetEdgeCount()) |
|---|
| 276 | { |
|---|
| 277 | Log("PWPath::AssertEqual, this=\n"); |
|---|
| 278 | LogMe(); |
|---|
| 279 | Log("\nOther path=\n"); |
|---|
| 280 | Path.LogMe(); |
|---|
| 281 | Log("\n"); |
|---|
| 282 | Quit("PWPath::AssertEqual, Edge count different %u %u\n", |
|---|
| 283 | uEdgeCount, Path.GetEdgeCount()); |
|---|
| 284 | } |
|---|
| 285 | |
|---|
| 286 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 287 | { |
|---|
| 288 | const PWEdge &e1 = GetEdge(uEdgeIndex); |
|---|
| 289 | const PWEdge &e2 = Path.GetEdge(uEdgeIndex); |
|---|
| 290 | if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA || |
|---|
| 291 | e1.uPrefixLengthB != e2.uPrefixLengthB) |
|---|
| 292 | { |
|---|
| 293 | Log("PWPath::AssertEqual, this=\n"); |
|---|
| 294 | LogMe(); |
|---|
| 295 | Log("\nOther path=\n"); |
|---|
| 296 | Path.LogMe(); |
|---|
| 297 | Log("\n"); |
|---|
| 298 | Log("This edge %c%u.%u, other edge %c%u.%u\n", |
|---|
| 299 | e1.cType, e1.uPrefixLengthA, e1.uPrefixLengthB, |
|---|
| 300 | e2.cType, e2.uPrefixLengthA, e2.uPrefixLengthB); |
|---|
| 301 | Quit("PWPath::AssertEqual, edge %u different\n", uEdgeIndex); |
|---|
| 302 | } |
|---|
| 303 | } |
|---|
| 304 | } |
|---|
| 305 | |
|---|
| 306 | bool PWPath::Equal(const PWPath &Path) const |
|---|
| 307 | { |
|---|
| 308 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 309 | if (uEdgeCount != Path.GetEdgeCount()) |
|---|
| 310 | return false; |
|---|
| 311 | |
|---|
| 312 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 313 | { |
|---|
| 314 | const PWEdge &e1 = GetEdge(uEdgeIndex); |
|---|
| 315 | const PWEdge &e2 = Path.GetEdge(uEdgeIndex); |
|---|
| 316 | if (e1.cType != e2.cType || e1.uPrefixLengthA != e2.uPrefixLengthA || |
|---|
| 317 | e1.uPrefixLengthB != e2.uPrefixLengthB) |
|---|
| 318 | return false; |
|---|
| 319 | } |
|---|
| 320 | return true; |
|---|
| 321 | } |
|---|
| 322 | |
|---|
| 323 | unsigned PWPath::GetMatchCount() const |
|---|
| 324 | { |
|---|
| 325 | unsigned uMatchCount = 0; |
|---|
| 326 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 327 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 328 | { |
|---|
| 329 | const PWEdge &e = GetEdge(uEdgeIndex); |
|---|
| 330 | if ('M' == e.cType) |
|---|
| 331 | ++uMatchCount; |
|---|
| 332 | } |
|---|
| 333 | return uMatchCount; |
|---|
| 334 | } |
|---|
| 335 | |
|---|
| 336 | unsigned PWPath::GetInsertCount() const |
|---|
| 337 | { |
|---|
| 338 | unsigned uInsertCount = 0; |
|---|
| 339 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 340 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 341 | { |
|---|
| 342 | const PWEdge &e = GetEdge(uEdgeIndex); |
|---|
| 343 | if ('I' == e.cType) |
|---|
| 344 | ++uInsertCount; |
|---|
| 345 | } |
|---|
| 346 | return uInsertCount; |
|---|
| 347 | } |
|---|
| 348 | |
|---|
| 349 | unsigned PWPath::GetDeleteCount() const |
|---|
| 350 | { |
|---|
| 351 | unsigned uDeleteCount = 0; |
|---|
| 352 | const unsigned uEdgeCount = GetEdgeCount(); |
|---|
| 353 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 354 | { |
|---|
| 355 | const PWEdge &e = GetEdge(uEdgeIndex); |
|---|
| 356 | if ('D' == e.cType) |
|---|
| 357 | ++uDeleteCount; |
|---|
| 358 | } |
|---|
| 359 | return uDeleteCount; |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | void PWPath::FromStr(const char Str[]) |
|---|
| 363 | { |
|---|
| 364 | Clear(); |
|---|
| 365 | unsigned uPrefixLengthA = 0; |
|---|
| 366 | unsigned uPrefixLengthB = 0; |
|---|
| 367 | while (char c = *Str++) |
|---|
| 368 | { |
|---|
| 369 | switch (c) |
|---|
| 370 | { |
|---|
| 371 | case 'M': |
|---|
| 372 | ++uPrefixLengthA; |
|---|
| 373 | ++uPrefixLengthB; |
|---|
| 374 | break; |
|---|
| 375 | case 'D': |
|---|
| 376 | ++uPrefixLengthA; |
|---|
| 377 | break; |
|---|
| 378 | case 'I': |
|---|
| 379 | ++uPrefixLengthB; |
|---|
| 380 | break; |
|---|
| 381 | default: |
|---|
| 382 | Quit("PWPath::FromStr, invalid state %c", c); |
|---|
| 383 | } |
|---|
| 384 | AppendEdge(c, uPrefixLengthA, uPrefixLengthB); |
|---|
| 385 | } |
|---|
| 386 | } |
|---|