| 1 | #include "muscle.h" |
|---|
| 2 | #include "msa.h" |
|---|
| 3 | #include "pwpath.h" |
|---|
| 4 | #include "profile.h" |
|---|
| 5 | |
|---|
| 6 | #define TRACE 0 |
|---|
| 7 | |
|---|
| 8 | static void AppendDelete(const MSA &msaA, unsigned &uColIndexA, |
|---|
| 9 | unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined, |
|---|
| 10 | unsigned &uColIndexCombined) |
|---|
| 11 | { |
|---|
| 12 | #if TRACE |
|---|
| 13 | Log("AppendDelete ColIxA=%u ColIxCmb=%u\n", |
|---|
| 14 | uColIndexA, uColIndexCombined); |
|---|
| 15 | #endif |
|---|
| 16 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
|---|
| 17 | { |
|---|
| 18 | char c = msaA.GetChar(uSeqIndexA, uColIndexA); |
|---|
| 19 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c); |
|---|
| 20 | } |
|---|
| 21 | |
|---|
| 22 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
|---|
| 23 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-'); |
|---|
| 24 | |
|---|
| 25 | ++uColIndexCombined; |
|---|
| 26 | ++uColIndexA; |
|---|
| 27 | } |
|---|
| 28 | |
|---|
| 29 | static void AppendInsert(const MSA &msaB, unsigned &uColIndexB, |
|---|
| 30 | unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined, |
|---|
| 31 | unsigned &uColIndexCombined) |
|---|
| 32 | { |
|---|
| 33 | #if TRACE |
|---|
| 34 | Log("AppendInsert ColIxB=%u ColIxCmb=%u\n", |
|---|
| 35 | uColIndexB, uColIndexCombined); |
|---|
| 36 | #endif |
|---|
| 37 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
|---|
| 38 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-'); |
|---|
| 39 | |
|---|
| 40 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
|---|
| 41 | { |
|---|
| 42 | char c = msaB.GetChar(uSeqIndexB, uColIndexB); |
|---|
| 43 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c); |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | ++uColIndexCombined; |
|---|
| 47 | ++uColIndexB; |
|---|
| 48 | } |
|---|
| 49 | |
|---|
| 50 | static void AppendUnalignedTerminals(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA, |
|---|
| 51 | const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA, |
|---|
| 52 | unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined) |
|---|
| 53 | { |
|---|
| 54 | #if TRACE |
|---|
| 55 | Log("AppendUnalignedTerminals ColIxA=%u ColIxB=%u ColIxCmb=%u\n", |
|---|
| 56 | uColIndexA, uColIndexB, uColIndexCombined); |
|---|
| 57 | #endif |
|---|
| 58 | const unsigned uLengthA = msaA.GetColCount(); |
|---|
| 59 | const unsigned uLengthB = msaB.GetColCount(); |
|---|
| 60 | |
|---|
| 61 | unsigned uNewColCount = uColCountA; |
|---|
| 62 | if (uColCountB > uNewColCount) |
|---|
| 63 | uNewColCount = uColCountB; |
|---|
| 64 | |
|---|
| 65 | for (unsigned n = 0; n < uColCountA; ++n) |
|---|
| 66 | { |
|---|
| 67 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
|---|
| 68 | { |
|---|
| 69 | char c = msaA.GetChar(uSeqIndexA, uColIndexA + n); |
|---|
| 70 | c = UnalignChar(c); |
|---|
| 71 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c); |
|---|
| 72 | } |
|---|
| 73 | } |
|---|
| 74 | for (unsigned n = uColCountA; n < uNewColCount; ++n) |
|---|
| 75 | { |
|---|
| 76 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
|---|
| 77 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.'); |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | for (unsigned n = 0; n < uColCountB; ++n) |
|---|
| 81 | { |
|---|
| 82 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
|---|
| 83 | { |
|---|
| 84 | char c = msaB.GetChar(uSeqIndexB, uColIndexB + n); |
|---|
| 85 | c = UnalignChar(c); |
|---|
| 86 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c); |
|---|
| 87 | } |
|---|
| 88 | } |
|---|
| 89 | for (unsigned n = uColCountB; n < uNewColCount; ++n) |
|---|
| 90 | { |
|---|
| 91 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
|---|
| 92 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.'); |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | uColIndexCombined += uNewColCount; |
|---|
| 96 | uColIndexA += uColCountA; |
|---|
| 97 | uColIndexB += uColCountB; |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB, |
|---|
| 101 | unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB, |
|---|
| 102 | MSA &msaCombined, unsigned &uColIndexCombined) |
|---|
| 103 | { |
|---|
| 104 | #if TRACE |
|---|
| 105 | Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n", |
|---|
| 106 | uColIndexA, uColIndexB, uColIndexCombined); |
|---|
| 107 | #endif |
|---|
| 108 | |
|---|
| 109 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
|---|
| 110 | { |
|---|
| 111 | char c = msaA.GetChar(uSeqIndexA, uColIndexA); |
|---|
| 112 | msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c); |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
|---|
| 116 | { |
|---|
| 117 | char c = msaB.GetChar(uSeqIndexB, uColIndexB); |
|---|
| 118 | msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c); |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | ++uColIndexA; |
|---|
| 122 | ++uColIndexB; |
|---|
| 123 | ++uColIndexCombined; |
|---|
| 124 | } |
|---|
| 125 | |
|---|
| 126 | void AlignTwoMSAsGivenPathSW(const PWPath &Path, const MSA &msaA, const MSA &msaB, |
|---|
| 127 | MSA &msaCombined) |
|---|
| 128 | { |
|---|
| 129 | msaCombined.Clear(); |
|---|
| 130 | |
|---|
| 131 | #if TRACE |
|---|
| 132 | Log("AlignTwoMSAsGivenPathSW\n"); |
|---|
| 133 | Log("Template A:\n"); |
|---|
| 134 | msaA.LogMe(); |
|---|
| 135 | Log("Template B:\n"); |
|---|
| 136 | msaB.LogMe(); |
|---|
| 137 | #endif |
|---|
| 138 | |
|---|
| 139 | const unsigned uColCountA = msaA.GetColCount(); |
|---|
| 140 | const unsigned uColCountB = msaB.GetColCount(); |
|---|
| 141 | |
|---|
| 142 | const unsigned uSeqCountA = msaA.GetSeqCount(); |
|---|
| 143 | const unsigned uSeqCountB = msaB.GetSeqCount(); |
|---|
| 144 | |
|---|
| 145 | msaCombined.SetSeqCount(uSeqCountA + uSeqCountB); |
|---|
| 146 | |
|---|
| 147 | // Copy sequence names into combined MSA |
|---|
| 148 | for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA) |
|---|
| 149 | { |
|---|
| 150 | msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA)); |
|---|
| 151 | msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA)); |
|---|
| 152 | } |
|---|
| 153 | |
|---|
| 154 | for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB) |
|---|
| 155 | { |
|---|
| 156 | msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB)); |
|---|
| 157 | msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB)); |
|---|
| 158 | } |
|---|
| 159 | |
|---|
| 160 | unsigned uColIndexA = 0; |
|---|
| 161 | unsigned uColIndexB = 0; |
|---|
| 162 | unsigned uColIndexCombined = 0; |
|---|
| 163 | const unsigned uEdgeCount = Path.GetEdgeCount(); |
|---|
| 164 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
|---|
| 165 | { |
|---|
| 166 | const PWEdge &Edge = Path.GetEdge(uEdgeIndex); |
|---|
| 167 | #if TRACE |
|---|
| 168 | Log("\nEdge %u %c%u.%u\n", |
|---|
| 169 | uEdgeIndex, |
|---|
| 170 | Edge.cType, |
|---|
| 171 | Edge.uPrefixLengthA, |
|---|
| 172 | Edge.uPrefixLengthB); |
|---|
| 173 | #endif |
|---|
| 174 | const char cType = Edge.cType; |
|---|
| 175 | const unsigned uPrefixLengthA = Edge.uPrefixLengthA; |
|---|
| 176 | unsigned uColCountA = 0; |
|---|
| 177 | if (uPrefixLengthA > 0) |
|---|
| 178 | { |
|---|
| 179 | const unsigned uNodeIndexA = uPrefixLengthA - 1; |
|---|
| 180 | const unsigned uTplColIndexA = uNodeIndexA; |
|---|
| 181 | if (uTplColIndexA > uColIndexA) |
|---|
| 182 | uColCountA = uTplColIndexA - uColIndexA; |
|---|
| 183 | } |
|---|
| 184 | |
|---|
| 185 | const unsigned uPrefixLengthB = Edge.uPrefixLengthB; |
|---|
| 186 | unsigned uColCountB = 0; |
|---|
| 187 | if (uPrefixLengthB > 0) |
|---|
| 188 | { |
|---|
| 189 | const unsigned uNodeIndexB = uPrefixLengthB - 1; |
|---|
| 190 | const unsigned uTplColIndexB = uNodeIndexB; |
|---|
| 191 | if (uTplColIndexB > uColIndexB) |
|---|
| 192 | uColCountB = uTplColIndexB - uColIndexB; |
|---|
| 193 | } |
|---|
| 194 | |
|---|
| 195 | AppendUnalignedTerminals(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB, |
|---|
| 196 | uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
|---|
| 197 | |
|---|
| 198 | switch (cType) |
|---|
| 199 | { |
|---|
| 200 | case 'M': |
|---|
| 201 | { |
|---|
| 202 | assert(uPrefixLengthA > 0); |
|---|
| 203 | assert(uPrefixLengthB > 0); |
|---|
| 204 | const unsigned uColA = uPrefixLengthA - 1; |
|---|
| 205 | const unsigned uColB = uPrefixLengthB - 1; |
|---|
| 206 | assert(uColIndexA == uColA); |
|---|
| 207 | assert(uColIndexB == uColB); |
|---|
| 208 | AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB, |
|---|
| 209 | msaCombined, uColIndexCombined); |
|---|
| 210 | break; |
|---|
| 211 | } |
|---|
| 212 | case 'D': |
|---|
| 213 | { |
|---|
| 214 | assert(uPrefixLengthA > 0); |
|---|
| 215 | const unsigned uColA = uPrefixLengthA - 1; |
|---|
| 216 | assert(uColIndexA == uColA); |
|---|
| 217 | AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
|---|
| 218 | break; |
|---|
| 219 | } |
|---|
| 220 | case 'I': |
|---|
| 221 | { |
|---|
| 222 | assert(uPrefixLengthB > 0); |
|---|
| 223 | const unsigned uColB = uPrefixLengthB - 1; |
|---|
| 224 | assert(uColIndexB == uColB); |
|---|
| 225 | AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
|---|
| 226 | break; |
|---|
| 227 | } |
|---|
| 228 | default: |
|---|
| 229 | assert(false); |
|---|
| 230 | } |
|---|
| 231 | } |
|---|
| 232 | unsigned uInsertColCountA = uColCountA - uColIndexA; |
|---|
| 233 | unsigned uInsertColCountB = uColCountB - uColIndexB; |
|---|
| 234 | |
|---|
| 235 | AppendUnalignedTerminals(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB, |
|---|
| 236 | uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined); |
|---|
| 237 | } |
|---|