1 | #include "muscle.h" |
---|
2 | #include "msa.h" |
---|
3 | #include "seqvect.h" |
---|
4 | #include "profile.h" |
---|
5 | #include "tree.h" |
---|
6 | |
---|
7 | // These global variables are a hack to allow the tree |
---|
8 | // dependent iteration code to communicate the edge |
---|
9 | // used to divide the tree. The three-way weighting |
---|
10 | // scheme needs to know this edge in order to compute |
---|
11 | // sequence weights. |
---|
12 | static const Tree *g_ptrMuscleTree = 0; |
---|
13 | unsigned g_uTreeSplitNode1 = NULL_NEIGHBOR; |
---|
14 | unsigned g_uTreeSplitNode2 = NULL_NEIGHBOR; |
---|
15 | |
---|
16 | void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize, |
---|
17 | FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd, |
---|
18 | FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc, |
---|
19 | FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const |
---|
20 | { |
---|
21 | const unsigned uSeqCount = GetSeqCount(); |
---|
22 | const unsigned uColCount = GetColCount(); |
---|
23 | |
---|
24 | memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT)); |
---|
25 | WEIGHT wTotal = 0; |
---|
26 | FCOUNT fGap = 0; |
---|
27 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
28 | { |
---|
29 | const WEIGHT w = GetSeqWeight(uSeqIndex); |
---|
30 | if (IsGap(uSeqIndex, uColIndex)) |
---|
31 | { |
---|
32 | fGap += w; |
---|
33 | continue; |
---|
34 | } |
---|
35 | else if (IsWildcard(uSeqIndex, uColIndex)) |
---|
36 | { |
---|
37 | const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex); |
---|
38 | switch (g_Alpha) |
---|
39 | { |
---|
40 | case ALPHA_Amino: |
---|
41 | switch (uLetter) |
---|
42 | { |
---|
43 | case AX_B: // D or N |
---|
44 | fcCounts[AX_D] += w/2; |
---|
45 | fcCounts[AX_N] += w/2; |
---|
46 | break; |
---|
47 | case AX_Z: // E or Q |
---|
48 | fcCounts[AX_E] += w/2; |
---|
49 | fcCounts[AX_Q] += w/2; |
---|
50 | break; |
---|
51 | default: // any |
---|
52 | { |
---|
53 | const FCOUNT f = w/20; |
---|
54 | for (unsigned uLetter = 0; uLetter < 20; ++uLetter) |
---|
55 | fcCounts[uLetter] += f; |
---|
56 | break; |
---|
57 | } |
---|
58 | } |
---|
59 | break; |
---|
60 | |
---|
61 | case ALPHA_DNA: |
---|
62 | case ALPHA_RNA: |
---|
63 | switch (uLetter) |
---|
64 | { |
---|
65 | case AX_R: // G or A |
---|
66 | fcCounts[NX_G] += w/2; |
---|
67 | fcCounts[NX_A] += w/2; |
---|
68 | break; |
---|
69 | case AX_Y: // C or T/U |
---|
70 | fcCounts[NX_C] += w/2; |
---|
71 | fcCounts[NX_T] += w/2; |
---|
72 | break; |
---|
73 | default: // any |
---|
74 | const FCOUNT f = w/20; |
---|
75 | for (unsigned uLetter = 0; uLetter < 4; ++uLetter) |
---|
76 | fcCounts[uLetter] += f; |
---|
77 | break; |
---|
78 | } |
---|
79 | break; |
---|
80 | |
---|
81 | default: |
---|
82 | Quit("Alphabet %d not supported", g_Alpha); |
---|
83 | } |
---|
84 | continue; |
---|
85 | } |
---|
86 | unsigned uLetter = GetLetter(uSeqIndex, uColIndex); |
---|
87 | fcCounts[uLetter] += w; |
---|
88 | wTotal += w; |
---|
89 | } |
---|
90 | *ptrfOcc = (float) (1.0 - fGap); |
---|
91 | |
---|
92 | if (bNormalize && wTotal > 0) |
---|
93 | { |
---|
94 | if (wTotal > 1.001) |
---|
95 | Quit("wTotal=%g\n", wTotal); |
---|
96 | for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter) |
---|
97 | fcCounts[uLetter] /= wTotal; |
---|
98 | // AssertNormalized(fcCounts); |
---|
99 | } |
---|
100 | |
---|
101 | FCOUNT fcStartCount = 0; |
---|
102 | if (uColIndex == 0) |
---|
103 | { |
---|
104 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
105 | if (IsGap(uSeqIndex, uColIndex)) |
---|
106 | fcStartCount += GetSeqWeight(uSeqIndex); |
---|
107 | } |
---|
108 | else |
---|
109 | { |
---|
110 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
111 | if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1)) |
---|
112 | fcStartCount += GetSeqWeight(uSeqIndex); |
---|
113 | } |
---|
114 | |
---|
115 | FCOUNT fcEndCount = 0; |
---|
116 | if (uColCount - 1 == uColIndex) |
---|
117 | { |
---|
118 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
119 | if (IsGap(uSeqIndex, uColIndex)) |
---|
120 | fcEndCount += GetSeqWeight(uSeqIndex); |
---|
121 | } |
---|
122 | else |
---|
123 | { |
---|
124 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
125 | if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1)) |
---|
126 | fcEndCount += GetSeqWeight(uSeqIndex); |
---|
127 | } |
---|
128 | |
---|
129 | FCOUNT LL = 0; |
---|
130 | FCOUNT LG = 0; |
---|
131 | FCOUNT GL = 0; |
---|
132 | FCOUNT GG = 0; |
---|
133 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
134 | { |
---|
135 | WEIGHT w = GetSeqWeight(uSeqIndex); |
---|
136 | bool bLetterHere = !IsGap(uSeqIndex, uColIndex); |
---|
137 | bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1)); |
---|
138 | if (bLetterHere) |
---|
139 | { |
---|
140 | if (bLetterPrev) |
---|
141 | LL += w; |
---|
142 | else |
---|
143 | GL += w; |
---|
144 | } |
---|
145 | else |
---|
146 | { |
---|
147 | if (bLetterPrev) |
---|
148 | LG += w; |
---|
149 | else |
---|
150 | GG += w; |
---|
151 | } |
---|
152 | } |
---|
153 | |
---|
154 | FCOUNT fcExtendCount = 0; |
---|
155 | if (uColIndex > 0 && uColIndex < GetColCount() - 1) |
---|
156 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
157 | if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) && |
---|
158 | IsGap(uSeqIndex, uColIndex + 1)) |
---|
159 | fcExtendCount += GetSeqWeight(uSeqIndex); |
---|
160 | |
---|
161 | *ptrfcLL = LL; |
---|
162 | *ptrfcLG = LG; |
---|
163 | *ptrfcGL = GL; |
---|
164 | *ptrfcGG = GG; |
---|
165 | *ptrfcGapStart = fcStartCount; |
---|
166 | *ptrfcGapEnd = fcEndCount; |
---|
167 | *ptrfcGapExtend = fcExtendCount; |
---|
168 | } |
---|
169 | |
---|
170 | // Return true if the given column has no gaps and all |
---|
171 | // its residues are in the same biochemical group. |
---|
172 | bool MSAColIsConservative(const MSA &msa, unsigned uColIndex) |
---|
173 | { |
---|
174 | extern unsigned ResidueGroup[]; |
---|
175 | |
---|
176 | const unsigned uSeqCount = msa.GetColCount(); |
---|
177 | if (0 == uSeqCount) |
---|
178 | Quit("MSAColIsConservative: empty alignment"); |
---|
179 | |
---|
180 | if (msa.IsGap(0, uColIndex)) |
---|
181 | return false; |
---|
182 | |
---|
183 | unsigned uLetter = msa.GetLetterEx(0, uColIndex); |
---|
184 | const unsigned uGroup = ResidueGroup[uLetter]; |
---|
185 | |
---|
186 | for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
187 | { |
---|
188 | if (msa.IsGap(uSeqIndex, uColIndex)) |
---|
189 | return false; |
---|
190 | uLetter = msa.GetLetter(uSeqIndex, uColIndex); |
---|
191 | if (ResidueGroup[uLetter] != uGroup) |
---|
192 | return false; |
---|
193 | } |
---|
194 | return true; |
---|
195 | } |
---|
196 | |
---|
197 | void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount, |
---|
198 | MSA &msaOut) |
---|
199 | { |
---|
200 | const unsigned uColCount = msaIn.GetColCount(); |
---|
201 | msaOut.SetSize(uSeqCount, uColCount); |
---|
202 | |
---|
203 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
204 | { |
---|
205 | const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex); |
---|
206 | msaOut.SetSeqName(uSeqIndex, ptrName); |
---|
207 | |
---|
208 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
209 | { |
---|
210 | const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex); |
---|
211 | msaOut.SetChar(uSeqIndex, uColIndex, c); |
---|
212 | } |
---|
213 | } |
---|
214 | } |
---|
215 | |
---|
216 | void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount, |
---|
217 | MSA &msaOut) |
---|
218 | { |
---|
219 | const unsigned uSeqCount = msaIn.GetSeqCount(); |
---|
220 | const unsigned uInColCount = msaIn.GetColCount(); |
---|
221 | |
---|
222 | if (uFromColIndex + uColCount - 1 > uInColCount) |
---|
223 | Quit("MSAFromColRange, out of bounds"); |
---|
224 | |
---|
225 | msaOut.SetSize(uSeqCount, uColCount); |
---|
226 | |
---|
227 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
228 | { |
---|
229 | const char *ptrName = msaIn.GetSeqName(uSeqIndex); |
---|
230 | unsigned uId = msaIn.GetSeqId(uSeqIndex); |
---|
231 | msaOut.SetSeqName(uSeqIndex, ptrName); |
---|
232 | msaOut.SetSeqId(uSeqIndex, uId); |
---|
233 | |
---|
234 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
235 | { |
---|
236 | const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex); |
---|
237 | msaOut.SetChar(uSeqIndex, uColIndex, c); |
---|
238 | } |
---|
239 | } |
---|
240 | } |
---|
241 | |
---|
242 | void SeqVectFromMSA(const MSA &msa, SeqVect &v) |
---|
243 | { |
---|
244 | v.Clear(); |
---|
245 | const unsigned uSeqCount = msa.GetSeqCount(); |
---|
246 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
247 | { |
---|
248 | Seq s; |
---|
249 | msa.GetSeq(uSeqIndex, s); |
---|
250 | |
---|
251 | s.StripGaps(); |
---|
252 | //if (0 == s.Length()) |
---|
253 | // continue; |
---|
254 | |
---|
255 | const char *ptrName = msa.GetSeqName(uSeqIndex); |
---|
256 | s.SetName(ptrName); |
---|
257 | |
---|
258 | v.AppendSeq(s); |
---|
259 | } |
---|
260 | } |
---|
261 | |
---|
262 | void DeleteGappedCols(MSA &msa) |
---|
263 | { |
---|
264 | unsigned uColIndex = 0; |
---|
265 | for (;;) |
---|
266 | { |
---|
267 | if (uColIndex >= msa.GetColCount()) |
---|
268 | break; |
---|
269 | if (msa.IsGapColumn(uColIndex)) |
---|
270 | msa.DeleteCol(uColIndex); |
---|
271 | else |
---|
272 | ++uColIndex; |
---|
273 | } |
---|
274 | } |
---|
275 | |
---|
276 | void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount, |
---|
277 | MSA &msaOut) |
---|
278 | { |
---|
279 | const unsigned uColCount = msaIn.GetColCount(); |
---|
280 | msaOut.SetSize(uSeqCount, uColCount); |
---|
281 | for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut) |
---|
282 | { |
---|
283 | unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut]; |
---|
284 | const char *ptrName = msaIn.GetSeqName(uSeqIndexIn); |
---|
285 | unsigned uId = msaIn.GetSeqId(uSeqIndexIn); |
---|
286 | msaOut.SetSeqName(uSeqIndexOut, ptrName); |
---|
287 | msaOut.SetSeqId(uSeqIndexOut, uId); |
---|
288 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
289 | { |
---|
290 | const char c = msaIn.GetChar(uSeqIndexIn, uColIndex); |
---|
291 | msaOut.SetChar(uSeqIndexOut, uColIndex, c); |
---|
292 | } |
---|
293 | } |
---|
294 | } |
---|
295 | |
---|
296 | void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2) |
---|
297 | { |
---|
298 | const unsigned uSeqCount1 = msa1.GetSeqCount(); |
---|
299 | const unsigned uSeqCount2 = msa2.GetSeqCount(); |
---|
300 | if (uSeqCount1 != uSeqCount2) |
---|
301 | Quit("Seq count differs"); |
---|
302 | |
---|
303 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex) |
---|
304 | { |
---|
305 | Seq seq1; |
---|
306 | msa1.GetSeq(uSeqIndex, seq1); |
---|
307 | |
---|
308 | unsigned uId = msa1.GetSeqId(uSeqIndex); |
---|
309 | unsigned uSeqIndex2 = msa2.GetSeqIndex(uId); |
---|
310 | |
---|
311 | Seq seq2; |
---|
312 | msa2.GetSeq(uSeqIndex2, seq2); |
---|
313 | |
---|
314 | if (!seq1.EqIgnoreCaseAndGaps(seq2)) |
---|
315 | { |
---|
316 | Log("Input:\n"); |
---|
317 | seq1.LogMe(); |
---|
318 | Log("Output:\n"); |
---|
319 | seq2.LogMe(); |
---|
320 | Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex)); |
---|
321 | } |
---|
322 | } |
---|
323 | } |
---|
324 | |
---|
325 | void AssertMSAEq(const MSA &msa1, const MSA &msa2) |
---|
326 | { |
---|
327 | const unsigned uSeqCount1 = msa1.GetSeqCount(); |
---|
328 | const unsigned uSeqCount2 = msa2.GetSeqCount(); |
---|
329 | if (uSeqCount1 != uSeqCount2) |
---|
330 | Quit("Seq count differs"); |
---|
331 | |
---|
332 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex) |
---|
333 | { |
---|
334 | Seq seq1; |
---|
335 | msa1.GetSeq(uSeqIndex, seq1); |
---|
336 | |
---|
337 | unsigned uId = msa1.GetSeqId(uSeqIndex); |
---|
338 | unsigned uSeqIndex2 = msa2.GetSeqIndex(uId); |
---|
339 | |
---|
340 | Seq seq2; |
---|
341 | msa2.GetSeq(uSeqIndex2, seq2); |
---|
342 | |
---|
343 | if (!seq1.Eq(seq2)) |
---|
344 | { |
---|
345 | Log("Input:\n"); |
---|
346 | seq1.LogMe(); |
---|
347 | Log("Output:\n"); |
---|
348 | seq2.LogMe(); |
---|
349 | Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex)); |
---|
350 | } |
---|
351 | } |
---|
352 | } |
---|
353 | |
---|
354 | void SetMSAWeightsMuscle(MSA &msa) |
---|
355 | { |
---|
356 | SEQWEIGHT Method = GetSeqWeightMethod(); |
---|
357 | switch (Method) |
---|
358 | { |
---|
359 | case SEQWEIGHT_None: |
---|
360 | msa.SetUniformWeights(); |
---|
361 | return; |
---|
362 | |
---|
363 | case SEQWEIGHT_Henikoff: |
---|
364 | msa.SetHenikoffWeights(); |
---|
365 | return; |
---|
366 | |
---|
367 | case SEQWEIGHT_HenikoffPB: |
---|
368 | msa.SetHenikoffWeightsPB(); |
---|
369 | return; |
---|
370 | |
---|
371 | case SEQWEIGHT_GSC: |
---|
372 | msa.SetGSCWeights(); |
---|
373 | return; |
---|
374 | |
---|
375 | case SEQWEIGHT_ClustalW: |
---|
376 | SetClustalWWeightsMuscle(msa); |
---|
377 | return; |
---|
378 | |
---|
379 | case SEQWEIGHT_ThreeWay: |
---|
380 | SetThreeWayWeightsMuscle(msa); |
---|
381 | return; |
---|
382 | } |
---|
383 | Quit("SetMSAWeightsMuscle, Invalid method=%d", Method); |
---|
384 | } |
---|
385 | |
---|
386 | static WEIGHT *g_MuscleWeights; |
---|
387 | static unsigned g_uMuscleIdCount; |
---|
388 | |
---|
389 | WEIGHT GetMuscleSeqWeightById(unsigned uId) |
---|
390 | { |
---|
391 | if (0 == g_MuscleWeights) |
---|
392 | Quit("g_MuscleWeights = 0"); |
---|
393 | if (uId >= g_uMuscleIdCount) |
---|
394 | Quit("GetMuscleSeqWeightById(%u): count=%u", |
---|
395 | uId, g_uMuscleIdCount); |
---|
396 | |
---|
397 | return g_MuscleWeights[uId]; |
---|
398 | } |
---|
399 | |
---|
400 | void SetMuscleTree(const Tree &tree) |
---|
401 | { |
---|
402 | g_ptrMuscleTree = &tree; |
---|
403 | |
---|
404 | if (SEQWEIGHT_ClustalW != GetSeqWeightMethod()) |
---|
405 | return; |
---|
406 | |
---|
407 | delete[] g_MuscleWeights; |
---|
408 | |
---|
409 | const unsigned uLeafCount = tree.GetLeafCount(); |
---|
410 | g_uMuscleIdCount = uLeafCount; |
---|
411 | g_MuscleWeights = new WEIGHT[uLeafCount]; |
---|
412 | CalcClustalWWeights(tree, g_MuscleWeights); |
---|
413 | } |
---|
414 | |
---|
415 | void SetClustalWWeightsMuscle(MSA &msa) |
---|
416 | { |
---|
417 | if (0 == g_MuscleWeights) |
---|
418 | Quit("g_MuscleWeights = 0"); |
---|
419 | const unsigned uSeqCount = msa.GetSeqCount(); |
---|
420 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
421 | { |
---|
422 | const unsigned uId = msa.GetSeqId(uSeqIndex); |
---|
423 | if (uId >= g_uMuscleIdCount) |
---|
424 | Quit("SetClustalWWeightsMuscle: id out of range"); |
---|
425 | msa.SetSeqWeight(uSeqIndex, g_MuscleWeights[uId]); |
---|
426 | } |
---|
427 | msa.NormalizeWeights((WEIGHT) 1.0); |
---|
428 | } |
---|
429 | |
---|
430 | #define LOCAL_VERBOSE 0 |
---|
431 | |
---|
432 | void SetThreeWayWeightsMuscle(MSA &msa) |
---|
433 | { |
---|
434 | if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2) |
---|
435 | { |
---|
436 | msa.SetHenikoffWeightsPB(); |
---|
437 | return; |
---|
438 | } |
---|
439 | |
---|
440 | const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount(); |
---|
441 | WEIGHT *Weights = new WEIGHT[uMuscleSeqCount]; |
---|
442 | |
---|
443 | CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2, |
---|
444 | Weights); |
---|
445 | |
---|
446 | const unsigned uMSASeqCount = msa.GetSeqCount(); |
---|
447 | for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex) |
---|
448 | { |
---|
449 | const unsigned uId = msa.GetSeqId(uSeqIndex); |
---|
450 | if (uId >= uMuscleSeqCount) |
---|
451 | Quit("SetThreeWayWeightsMuscle: id out of range"); |
---|
452 | msa.SetSeqWeight(uSeqIndex, Weights[uId]); |
---|
453 | } |
---|
454 | #if LOCAL_VERBOSE |
---|
455 | { |
---|
456 | Log("SetThreeWayWeightsMuscle\n"); |
---|
457 | for (unsigned n = 0; n < uMSASeqCount; ++n) |
---|
458 | { |
---|
459 | const unsigned uId = msa.GetSeqId(n); |
---|
460 | Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]); |
---|
461 | } |
---|
462 | } |
---|
463 | #endif |
---|
464 | msa.NormalizeWeights((WEIGHT) 1.0); |
---|
465 | |
---|
466 | delete[] Weights; |
---|
467 | } |
---|
468 | |
---|
469 | // Append msa2 at the end of msa1 |
---|
470 | void MSAAppend(MSA &msa1, const MSA &msa2) |
---|
471 | { |
---|
472 | const unsigned uSeqCount = msa1.GetSeqCount(); |
---|
473 | |
---|
474 | const unsigned uColCount1 = msa1.GetColCount(); |
---|
475 | const unsigned uColCount2 = msa2.GetColCount(); |
---|
476 | const unsigned uColCountCat = uColCount1 + uColCount2; |
---|
477 | |
---|
478 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
479 | { |
---|
480 | unsigned uId = msa1.GetSeqId(uSeqIndex); |
---|
481 | unsigned uSeqIndex2 = msa2.GetSeqIndex(uId); |
---|
482 | for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) |
---|
483 | { |
---|
484 | const char c = msa2.GetChar(uSeqIndex2, uColIndex); |
---|
485 | msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c); |
---|
486 | } |
---|
487 | } |
---|
488 | } |
---|
489 | |
---|
490 | // "Catenate" two MSAs (by bad analogy with UNIX cat command). |
---|
491 | // msa1 and msa2 must have same sequence names, but possibly |
---|
492 | // in a different order. |
---|
493 | // msaCat is the combined alignment produce by appending |
---|
494 | // sequences in msa2 to sequences in msa1. |
---|
495 | void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat) |
---|
496 | { |
---|
497 | const unsigned uSeqCount = msa1.GetSeqCount(); |
---|
498 | |
---|
499 | const unsigned uColCount1 = msa1.GetColCount(); |
---|
500 | const unsigned uColCount2 = msa2.GetColCount(); |
---|
501 | const unsigned uColCountCat = uColCount1 + uColCount2; |
---|
502 | |
---|
503 | msaCat.SetSize(uSeqCount, uColCountCat); |
---|
504 | |
---|
505 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
506 | { |
---|
507 | for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex) |
---|
508 | { |
---|
509 | const char c = msa1.GetChar(uSeqIndex, uColIndex); |
---|
510 | msaCat.SetChar(uSeqIndex, uColIndex, c); |
---|
511 | } |
---|
512 | |
---|
513 | const char *ptrSeqName = msa1.GetSeqName(uSeqIndex); |
---|
514 | unsigned uSeqIndex2; |
---|
515 | msaCat.SetSeqName(uSeqIndex, ptrSeqName); |
---|
516 | bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2); |
---|
517 | if (bFound) |
---|
518 | { |
---|
519 | for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) |
---|
520 | { |
---|
521 | const char c = msa2.GetChar(uSeqIndex2, uColIndex); |
---|
522 | msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c); |
---|
523 | } |
---|
524 | } |
---|
525 | else |
---|
526 | { |
---|
527 | for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex) |
---|
528 | msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-'); |
---|
529 | } |
---|
530 | } |
---|
531 | } |
---|