| 1 | #include "muscle.h" |
|---|
| 2 | #include "profile.h" |
|---|
| 3 | #include "msa.h" |
|---|
| 4 | #include "pwpath.h" |
|---|
| 5 | #include "seqvect.h" |
|---|
| 6 | #include "clust.h" |
|---|
| 7 | #include "tree.h" |
|---|
| 8 | |
|---|
| 9 | #define TRACE 0 |
|---|
| 10 | |
|---|
| 11 | struct Range |
|---|
| 12 | { |
|---|
| 13 | unsigned m_uBestColLeft; |
|---|
| 14 | unsigned m_uBestColRight; |
|---|
| 15 | }; |
|---|
| 16 | |
|---|
| 17 | static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount, |
|---|
| 18 | const Range *Ranges, unsigned uRangeCount) |
|---|
| 19 | { |
|---|
| 20 | if (!g_bVerbose || !g_bAnchors) |
|---|
| 21 | return; |
|---|
| 22 | double dTotalArea = uColCount*uColCount; |
|---|
| 23 | double dArea = 0.0; |
|---|
| 24 | for (unsigned i = 0; i < uRangeCount; ++i) |
|---|
| 25 | { |
|---|
| 26 | unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft; |
|---|
| 27 | dArea += uLength*uLength; |
|---|
| 28 | } |
|---|
| 29 | double dPct = (dTotalArea - dArea)*100.0/dTotalArea; |
|---|
| 30 | Log("Anchor columns found %u\n", uAnchorColCount); |
|---|
| 31 | Log("DP area saved by anchors %-4.1f%%\n", dPct); |
|---|
| 32 | } |
|---|
| 33 | |
|---|
| 34 | static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount, |
|---|
| 35 | unsigned uColCount, Range Ranges[]) |
|---|
| 36 | { |
|---|
| 37 | // N best columns produces N+1 vertical blocks. |
|---|
| 38 | const unsigned uRangeCount = uBestColCount + 1; |
|---|
| 39 | for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex) |
|---|
| 40 | { |
|---|
| 41 | unsigned uBestColLeft = 0; |
|---|
| 42 | if (uIndex > 0) |
|---|
| 43 | uBestColLeft = BestCols[uIndex-1]; |
|---|
| 44 | |
|---|
| 45 | unsigned uBestColRight = uColCount; |
|---|
| 46 | if (uIndex < uBestColCount) |
|---|
| 47 | uBestColRight = BestCols[uIndex]; |
|---|
| 48 | |
|---|
| 49 | Ranges[uIndex].m_uBestColLeft = uBestColLeft; |
|---|
| 50 | Ranges[uIndex].m_uBestColRight = uBestColRight; |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | |
|---|
| 54 | // Return true if any changes made |
|---|
| 55 | bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters) |
|---|
| 56 | { |
|---|
| 57 | bool bAnyChanges = false; |
|---|
| 58 | |
|---|
| 59 | const unsigned uColCountIn = msaIn.GetColCount(); |
|---|
| 60 | const unsigned uSeqCountIn = msaIn.GetSeqCount(); |
|---|
| 61 | |
|---|
| 62 | if (uColCountIn < 3 || uSeqCountIn < 3) |
|---|
| 63 | return false; |
|---|
| 64 | |
|---|
| 65 | unsigned *AnchorCols = new unsigned[uColCountIn]; |
|---|
| 66 | unsigned uAnchorColCount; |
|---|
| 67 | SetMSAWeightsMuscle(msaIn); |
|---|
| 68 | FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount); |
|---|
| 69 | |
|---|
| 70 | const unsigned uRangeCount = uAnchorColCount + 1; |
|---|
| 71 | Range *Ranges = new Range[uRangeCount]; |
|---|
| 72 | |
|---|
| 73 | #if TRACE |
|---|
| 74 | Log("%u ranges\n", uRangeCount); |
|---|
| 75 | #endif |
|---|
| 76 | |
|---|
| 77 | ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges); |
|---|
| 78 | ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount); |
|---|
| 79 | |
|---|
| 80 | #if TRACE |
|---|
| 81 | { |
|---|
| 82 | Log("Anchor cols: "); |
|---|
| 83 | for (unsigned i = 0; i < uAnchorColCount; ++i) |
|---|
| 84 | Log(" %u", AnchorCols[i]); |
|---|
| 85 | Log("\n"); |
|---|
| 86 | |
|---|
| 87 | Log("Ranges:\n"); |
|---|
| 88 | for (unsigned i = 0; i < uRangeCount; ++i) |
|---|
| 89 | Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight); |
|---|
| 90 | } |
|---|
| 91 | #endif |
|---|
| 92 | |
|---|
| 93 | delete[] AnchorCols; |
|---|
| 94 | |
|---|
| 95 | MSA msaOut; |
|---|
| 96 | msaOut.SetSize(uSeqCountIn, 0); |
|---|
| 97 | |
|---|
| 98 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex) |
|---|
| 99 | { |
|---|
| 100 | const char *ptrName = msaIn.GetSeqName(uSeqIndex); |
|---|
| 101 | unsigned uId = msaIn.GetSeqId(uSeqIndex); |
|---|
| 102 | msaOut.SetSeqName(uSeqIndex, ptrName); |
|---|
| 103 | msaOut.SetSeqId(uSeqIndex, uId); |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex) |
|---|
| 107 | { |
|---|
| 108 | MSA msaRange; |
|---|
| 109 | |
|---|
| 110 | const Range &r = Ranges[uRangeIndex]; |
|---|
| 111 | |
|---|
| 112 | const unsigned uFromColIndex = r.m_uBestColLeft; |
|---|
| 113 | const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex; |
|---|
| 114 | |
|---|
| 115 | if (0 == uRangeColCount) |
|---|
| 116 | continue; |
|---|
| 117 | else if (1 == uRangeColCount) |
|---|
| 118 | { |
|---|
| 119 | MSAFromColRange(msaIn, uFromColIndex, 1, msaRange); |
|---|
| 120 | MSAAppend(msaOut, msaRange); |
|---|
| 121 | continue; |
|---|
| 122 | } |
|---|
| 123 | MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange); |
|---|
| 124 | |
|---|
| 125 | #if TRACE |
|---|
| 126 | Log("\n-------------\n"); |
|---|
| 127 | Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount); |
|---|
| 128 | Log("Before:\n"); |
|---|
| 129 | msaRange.LogMe(); |
|---|
| 130 | #endif |
|---|
| 131 | |
|---|
| 132 | bool bLockLeft = (0 != uRangeIndex); |
|---|
| 133 | bool bLockRight = (uRangeCount - 1 != uRangeIndex); |
|---|
| 134 | bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight); |
|---|
| 135 | bAnyChanges = (bAnyChanges || bAnyChangesThisBlock); |
|---|
| 136 | |
|---|
| 137 | #if TRACE |
|---|
| 138 | Log("After:\n"); |
|---|
| 139 | msaRange.LogMe(); |
|---|
| 140 | #endif |
|---|
| 141 | |
|---|
| 142 | MSAAppend(msaOut, msaRange); |
|---|
| 143 | |
|---|
| 144 | #if TRACE |
|---|
| 145 | Log("msaOut after Cat:\n"); |
|---|
| 146 | msaOut.LogMe(); |
|---|
| 147 | #endif |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | #if DEBUG |
|---|
| 151 | // Sanity check |
|---|
| 152 | AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut); |
|---|
| 153 | #endif |
|---|
| 154 | |
|---|
| 155 | delete[] Ranges; |
|---|
| 156 | if (bAnyChanges) |
|---|
| 157 | msaIn.Copy(msaOut); |
|---|
| 158 | return bAnyChanges; |
|---|
| 159 | } |
|---|