| 1 | #include "muscle.h" |
|---|
| 2 | #include "tree.h" |
|---|
| 3 | #include "seqvect.h" |
|---|
| 4 | #include "profile.h" |
|---|
| 5 | #include "msa.h" |
|---|
| 6 | #include "pwpath.h" |
|---|
| 7 | #include "distfunc.h" |
|---|
| 8 | #include "textfile.h" |
|---|
| 9 | #include "estring.h" |
|---|
| 10 | |
|---|
| 11 | #define TRACE 0 |
|---|
| 12 | #define VALIDATE 0 |
|---|
| 13 | #define TRACE_LENGTH_DELTA 0 |
|---|
| 14 | |
|---|
| 15 | static void LogLeafNames(const Tree &tree, unsigned uNodeIndex) |
|---|
| 16 | { |
|---|
| 17 | const unsigned uNodeCount = tree.GetNodeCount(); |
|---|
| 18 | unsigned *Leaves = new unsigned[uNodeCount]; |
|---|
| 19 | unsigned uLeafCount; |
|---|
| 20 | GetLeaves(tree, uNodeIndex, Leaves, &uLeafCount); |
|---|
| 21 | for (unsigned i = 0; i < uLeafCount; ++i) |
|---|
| 22 | { |
|---|
| 23 | if (i > 0) |
|---|
| 24 | Log(","); |
|---|
| 25 | Log("%s", tree.GetLeafName(Leaves[i])); |
|---|
| 26 | } |
|---|
| 27 | delete[] Leaves; |
|---|
| 28 | } |
|---|
| 29 | |
|---|
| 30 | ProgNode *ProgressiveAlignE(const SeqVect &v, const Tree &GuideTree, MSA &a) |
|---|
| 31 | { |
|---|
| 32 | assert(GuideTree.IsRooted()); |
|---|
| 33 | |
|---|
| 34 | #if TRACE |
|---|
| 35 | Log("GuideTree:\n"); |
|---|
| 36 | GuideTree.LogMe(); |
|---|
| 37 | #endif |
|---|
| 38 | |
|---|
| 39 | const unsigned uSeqCount = v.Length(); |
|---|
| 40 | const unsigned uNodeCount = 2*uSeqCount - 1; |
|---|
| 41 | const unsigned uIterCount = uSeqCount - 1; |
|---|
| 42 | |
|---|
| 43 | WEIGHT *Weights = new WEIGHT[uSeqCount]; |
|---|
| 44 | CalcClustalWWeights(GuideTree, Weights); |
|---|
| 45 | |
|---|
| 46 | ProgNode *ProgNodes = new ProgNode[uNodeCount]; |
|---|
| 47 | |
|---|
| 48 | unsigned uJoin = 0; |
|---|
| 49 | unsigned uTreeNodeIndex = GuideTree.FirstDepthFirstNode(); |
|---|
| 50 | SetProgressDesc("Align node"); |
|---|
| 51 | do |
|---|
| 52 | { |
|---|
| 53 | if (GuideTree.IsLeaf(uTreeNodeIndex)) |
|---|
| 54 | { |
|---|
| 55 | if (uTreeNodeIndex >= uNodeCount) |
|---|
| 56 | Quit("TreeNodeIndex=%u NodeCount=%u\n", uTreeNodeIndex, uNodeCount); |
|---|
| 57 | ProgNode &Node = ProgNodes[uTreeNodeIndex]; |
|---|
| 58 | unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex); |
|---|
| 59 | if (uId >= uSeqCount) |
|---|
| 60 | Quit("Seq index out of range"); |
|---|
| 61 | const Seq &s = *(v[uId]); |
|---|
| 62 | Node.m_MSA.FromSeq(s); |
|---|
| 63 | Node.m_MSA.SetSeqId(0, uId); |
|---|
| 64 | Node.m_uLength = Node.m_MSA.GetColCount(); |
|---|
| 65 | Node.m_Weight = Weights[uId]; |
|---|
| 66 | // TODO: Term gaps settable |
|---|
| 67 | Node.m_Prof = ProfileFromMSA(Node.m_MSA); |
|---|
| 68 | Node.m_EstringL = 0; |
|---|
| 69 | Node.m_EstringR = 0; |
|---|
| 70 | #if TRACE |
|---|
| 71 | Log("Leaf id=%u\n", uId); |
|---|
| 72 | Log("MSA=\n"); |
|---|
| 73 | Node.m_MSA.LogMe(); |
|---|
| 74 | Log("Profile (from MSA)=\n"); |
|---|
| 75 | ListProfile(Node.m_Prof, Node.m_uLength, &Node.m_MSA); |
|---|
| 76 | #endif |
|---|
| 77 | } |
|---|
| 78 | else |
|---|
| 79 | { |
|---|
| 80 | Progress(uJoin, uSeqCount - 1); |
|---|
| 81 | ++uJoin; |
|---|
| 82 | |
|---|
| 83 | const unsigned uMergeNodeIndex = uTreeNodeIndex; |
|---|
| 84 | ProgNode &Parent = ProgNodes[uMergeNodeIndex]; |
|---|
| 85 | |
|---|
| 86 | const unsigned uLeft = GuideTree.GetLeft(uTreeNodeIndex); |
|---|
| 87 | const unsigned uRight = GuideTree.GetRight(uTreeNodeIndex); |
|---|
| 88 | |
|---|
| 89 | if (g_bVerbose) |
|---|
| 90 | { |
|---|
| 91 | Log("Align: ("); |
|---|
| 92 | LogLeafNames(GuideTree, uLeft); |
|---|
| 93 | Log(") ("); |
|---|
| 94 | LogLeafNames(GuideTree, uRight); |
|---|
| 95 | Log(")\n"); |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | ProgNode &Node1 = ProgNodes[uLeft]; |
|---|
| 99 | ProgNode &Node2 = ProgNodes[uRight]; |
|---|
| 100 | |
|---|
| 101 | #if TRACE |
|---|
| 102 | Log("AlignTwoMSAs:\n"); |
|---|
| 103 | #endif |
|---|
| 104 | AlignTwoProfs( |
|---|
| 105 | Node1.m_Prof, Node1.m_uLength, Node1.m_Weight, |
|---|
| 106 | Node2.m_Prof, Node2.m_uLength, Node2.m_Weight, |
|---|
| 107 | Parent.m_Path, |
|---|
| 108 | &Parent.m_Prof, &Parent.m_uLength); |
|---|
| 109 | #if TRACE_LENGTH_DELTA |
|---|
| 110 | { |
|---|
| 111 | unsigned L = Node1.m_uLength; |
|---|
| 112 | unsigned R = Node2.m_uLength; |
|---|
| 113 | unsigned P = Parent.m_Path.GetEdgeCount(); |
|---|
| 114 | unsigned Max = L > R ? L : R; |
|---|
| 115 | unsigned d = P - Max; |
|---|
| 116 | Log("LD%u;%u;%u;%u\n", L, R, P, d); |
|---|
| 117 | } |
|---|
| 118 | #endif |
|---|
| 119 | PathToEstrings(Parent.m_Path, &Parent.m_EstringL, &Parent.m_EstringR); |
|---|
| 120 | |
|---|
| 121 | Parent.m_Weight = Node1.m_Weight + Node2.m_Weight; |
|---|
| 122 | |
|---|
| 123 | #if VALIDATE |
|---|
| 124 | { |
|---|
| 125 | #if TRACE |
|---|
| 126 | Log("AlignTwoMSAs:\n"); |
|---|
| 127 | #endif |
|---|
| 128 | PWPath TmpPath; |
|---|
| 129 | AlignTwoMSAs(Node1.m_MSA, Node2.m_MSA, Parent.m_MSA, TmpPath); |
|---|
| 130 | ProfPos *P1 = ProfileFromMSA(Node1.m_MSA, true); |
|---|
| 131 | ProfPos *P2 = ProfileFromMSA(Node2.m_MSA, true); |
|---|
| 132 | unsigned uLength = Parent.m_MSA.GetColCount(); |
|---|
| 133 | ProfPos *TmpProf = ProfileFromMSA(Parent.m_MSA, true); |
|---|
| 134 | |
|---|
| 135 | #if TRACE |
|---|
| 136 | Log("Node1 MSA=\n"); |
|---|
| 137 | Node1.m_MSA.LogMe(); |
|---|
| 138 | |
|---|
| 139 | Log("Node1 prof=\n"); |
|---|
| 140 | ListProfile(Node1.m_Prof, Node1.m_MSA.GetColCount(), &Node1.m_MSA); |
|---|
| 141 | Log("Node1 prof (from MSA)=\n"); |
|---|
| 142 | ListProfile(P1, Node1.m_MSA.GetColCount(), &Node1.m_MSA); |
|---|
| 143 | |
|---|
| 144 | AssertProfsEq(Node1.m_Prof, Node1.m_uLength, P1, Node1.m_MSA.GetColCount()); |
|---|
| 145 | |
|---|
| 146 | Log("Node2 prof=\n"); |
|---|
| 147 | ListProfile(Node2.m_Prof, Node2.m_MSA.GetColCount(), &Node2.m_MSA); |
|---|
| 148 | |
|---|
| 149 | Log("Node2 MSA=\n"); |
|---|
| 150 | Node2.m_MSA.LogMe(); |
|---|
| 151 | |
|---|
| 152 | Log("Node2 prof (from MSA)=\n"); |
|---|
| 153 | ListProfile(P2, Node2.m_MSA.GetColCount(), &Node2.m_MSA); |
|---|
| 154 | |
|---|
| 155 | AssertProfsEq(Node2.m_Prof, Node2.m_uLength, P2, Node2.m_MSA.GetColCount()); |
|---|
| 156 | |
|---|
| 157 | TmpPath.AssertEqual(Parent.m_Path); |
|---|
| 158 | |
|---|
| 159 | Log("Parent MSA=\n"); |
|---|
| 160 | Parent.m_MSA.LogMe(); |
|---|
| 161 | |
|---|
| 162 | Log("Parent prof=\n"); |
|---|
| 163 | ListProfile(Parent.m_Prof, Parent.m_uLength, &Parent.m_MSA); |
|---|
| 164 | |
|---|
| 165 | Log("Parent prof (from MSA)=\n"); |
|---|
| 166 | ListProfile(TmpProf, Parent.m_MSA.GetColCount(), &Parent.m_MSA); |
|---|
| 167 | |
|---|
| 168 | #endif // TRACE |
|---|
| 169 | AssertProfsEq(Parent.m_Prof, Parent.m_uLength, |
|---|
| 170 | TmpProf, Parent.m_MSA.GetColCount()); |
|---|
| 171 | delete[] P1; |
|---|
| 172 | delete[] P2; |
|---|
| 173 | delete[] TmpProf; |
|---|
| 174 | } |
|---|
| 175 | #endif // VALIDATE |
|---|
| 176 | |
|---|
| 177 | Node1.m_MSA.Clear(); |
|---|
| 178 | Node2.m_MSA.Clear(); |
|---|
| 179 | |
|---|
| 180 | // Don't delete profiles, may need them for tree refinement. |
|---|
| 181 | //delete[] Node1.m_Prof; |
|---|
| 182 | //delete[] Node2.m_Prof; |
|---|
| 183 | //Node1.m_Prof = 0; |
|---|
| 184 | //Node2.m_Prof = 0; |
|---|
| 185 | } |
|---|
| 186 | uTreeNodeIndex = GuideTree.NextDepthFirstNode(uTreeNodeIndex); |
|---|
| 187 | } |
|---|
| 188 | while (NULL_NEIGHBOR != uTreeNodeIndex); |
|---|
| 189 | ProgressStepsDone(); |
|---|
| 190 | |
|---|
| 191 | if (g_bBrenner) |
|---|
| 192 | MakeRootMSABrenner((SeqVect &) v, GuideTree, ProgNodes, a); |
|---|
| 193 | else |
|---|
| 194 | MakeRootMSA(v, GuideTree, ProgNodes, a); |
|---|
| 195 | |
|---|
| 196 | #if VALIDATE |
|---|
| 197 | { |
|---|
| 198 | unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex(); |
|---|
| 199 | const ProgNode &RootProgNode = ProgNodes[uRootNodeIndex]; |
|---|
| 200 | AssertMSAEq(a, RootProgNode.m_MSA); |
|---|
| 201 | } |
|---|
| 202 | #endif |
|---|
| 203 | |
|---|
| 204 | delete[] Weights; |
|---|
| 205 | return ProgNodes; |
|---|
| 206 | } |
|---|