1 | #include "muscle.h" |
---|
2 | #include "profile.h" |
---|
3 | #include "msa.h" |
---|
4 | #include "pwpath.h" |
---|
5 | #include "seqvect.h" |
---|
6 | #include "clust.h" |
---|
7 | #include "tree.h" |
---|
8 | |
---|
9 | #define TRACE 0 |
---|
10 | |
---|
11 | struct Range |
---|
12 | { |
---|
13 | unsigned m_uBestColLeft; |
---|
14 | unsigned m_uBestColRight; |
---|
15 | }; |
---|
16 | |
---|
17 | static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount, |
---|
18 | const Range *Ranges, unsigned uRangeCount) |
---|
19 | { |
---|
20 | if (!g_bVerbose || !g_bAnchors) |
---|
21 | return; |
---|
22 | double dTotalArea = uColCount*uColCount; |
---|
23 | double dArea = 0.0; |
---|
24 | for (unsigned i = 0; i < uRangeCount; ++i) |
---|
25 | { |
---|
26 | unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft; |
---|
27 | dArea += uLength*uLength; |
---|
28 | } |
---|
29 | double dPct = (dTotalArea - dArea)*100.0/dTotalArea; |
---|
30 | Log("Anchor columns found %u\n", uAnchorColCount); |
---|
31 | Log("DP area saved by anchors %-4.1f%%\n", dPct); |
---|
32 | } |
---|
33 | |
---|
34 | static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount, |
---|
35 | unsigned uColCount, Range Ranges[]) |
---|
36 | { |
---|
37 | // N best columns produces N+1 vertical blocks. |
---|
38 | const unsigned uRangeCount = uBestColCount + 1; |
---|
39 | for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex) |
---|
40 | { |
---|
41 | unsigned uBestColLeft = 0; |
---|
42 | if (uIndex > 0) |
---|
43 | uBestColLeft = BestCols[uIndex-1]; |
---|
44 | |
---|
45 | unsigned uBestColRight = uColCount; |
---|
46 | if (uIndex < uBestColCount) |
---|
47 | uBestColRight = BestCols[uIndex]; |
---|
48 | |
---|
49 | Ranges[uIndex].m_uBestColLeft = uBestColLeft; |
---|
50 | Ranges[uIndex].m_uBestColRight = uBestColRight; |
---|
51 | } |
---|
52 | } |
---|
53 | |
---|
54 | // Return true if any changes made |
---|
55 | bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters) |
---|
56 | { |
---|
57 | bool bAnyChanges = false; |
---|
58 | |
---|
59 | const unsigned uColCountIn = msaIn.GetColCount(); |
---|
60 | const unsigned uSeqCountIn = msaIn.GetSeqCount(); |
---|
61 | |
---|
62 | if (uColCountIn < 3 || uSeqCountIn < 3) |
---|
63 | return false; |
---|
64 | |
---|
65 | unsigned *AnchorCols = new unsigned[uColCountIn]; |
---|
66 | unsigned uAnchorColCount; |
---|
67 | SetMSAWeightsMuscle(msaIn); |
---|
68 | FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount); |
---|
69 | |
---|
70 | const unsigned uRangeCount = uAnchorColCount + 1; |
---|
71 | Range *Ranges = new Range[uRangeCount]; |
---|
72 | |
---|
73 | #if TRACE |
---|
74 | Log("%u ranges\n", uRangeCount); |
---|
75 | #endif |
---|
76 | |
---|
77 | ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges); |
---|
78 | ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount); |
---|
79 | |
---|
80 | #if TRACE |
---|
81 | { |
---|
82 | Log("Anchor cols: "); |
---|
83 | for (unsigned i = 0; i < uAnchorColCount; ++i) |
---|
84 | Log(" %u", AnchorCols[i]); |
---|
85 | Log("\n"); |
---|
86 | |
---|
87 | Log("Ranges:\n"); |
---|
88 | for (unsigned i = 0; i < uRangeCount; ++i) |
---|
89 | Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight); |
---|
90 | } |
---|
91 | #endif |
---|
92 | |
---|
93 | delete[] AnchorCols; |
---|
94 | |
---|
95 | MSA msaOut; |
---|
96 | msaOut.SetSize(uSeqCountIn, 0); |
---|
97 | |
---|
98 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex) |
---|
99 | { |
---|
100 | const char *ptrName = msaIn.GetSeqName(uSeqIndex); |
---|
101 | unsigned uId = msaIn.GetSeqId(uSeqIndex); |
---|
102 | msaOut.SetSeqName(uSeqIndex, ptrName); |
---|
103 | msaOut.SetSeqId(uSeqIndex, uId); |
---|
104 | } |
---|
105 | |
---|
106 | for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex) |
---|
107 | { |
---|
108 | MSA msaRange; |
---|
109 | |
---|
110 | const Range &r = Ranges[uRangeIndex]; |
---|
111 | |
---|
112 | const unsigned uFromColIndex = r.m_uBestColLeft; |
---|
113 | const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex; |
---|
114 | |
---|
115 | if (0 == uRangeColCount) |
---|
116 | continue; |
---|
117 | else if (1 == uRangeColCount) |
---|
118 | { |
---|
119 | MSAFromColRange(msaIn, uFromColIndex, 1, msaRange); |
---|
120 | MSAAppend(msaOut, msaRange); |
---|
121 | continue; |
---|
122 | } |
---|
123 | MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange); |
---|
124 | |
---|
125 | #if TRACE |
---|
126 | Log("\n-------------\n"); |
---|
127 | Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount); |
---|
128 | Log("Before:\n"); |
---|
129 | msaRange.LogMe(); |
---|
130 | #endif |
---|
131 | |
---|
132 | bool bLockLeft = (0 != uRangeIndex); |
---|
133 | bool bLockRight = (uRangeCount - 1 != uRangeIndex); |
---|
134 | bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight); |
---|
135 | bAnyChanges = (bAnyChanges || bAnyChangesThisBlock); |
---|
136 | |
---|
137 | #if TRACE |
---|
138 | Log("After:\n"); |
---|
139 | msaRange.LogMe(); |
---|
140 | #endif |
---|
141 | |
---|
142 | MSAAppend(msaOut, msaRange); |
---|
143 | |
---|
144 | #if TRACE |
---|
145 | Log("msaOut after Cat:\n"); |
---|
146 | msaOut.LogMe(); |
---|
147 | #endif |
---|
148 | } |
---|
149 | |
---|
150 | #if DEBUG |
---|
151 | // Sanity check |
---|
152 | AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut); |
---|
153 | #endif |
---|
154 | |
---|
155 | delete[] Ranges; |
---|
156 | if (bAnyChanges) |
---|
157 | msaIn.Copy(msaOut); |
---|
158 | return bAnyChanges; |
---|
159 | } |
---|