1 | #include "muscle.h" |
---|
2 | #include "tree.h" |
---|
3 | #include "seqvect.h" |
---|
4 | #include "profile.h" |
---|
5 | #include "msa.h" |
---|
6 | #include "pwpath.h" |
---|
7 | #include "estring.h" |
---|
8 | |
---|
9 | #define TRACE 0 |
---|
10 | #define VALIDATE 0 |
---|
11 | |
---|
12 | static void PathSeq(const Seq &s, const PWPath &Path, bool bRight, Seq &sOut) |
---|
13 | { |
---|
14 | short *esA; |
---|
15 | short *esB; |
---|
16 | PathToEstrings(Path, &esA, &esB); |
---|
17 | |
---|
18 | const unsigned uSeqLength = s.Length(); |
---|
19 | const unsigned uEdgeCount = Path.GetEdgeCount(); |
---|
20 | |
---|
21 | sOut.Clear(); |
---|
22 | sOut.SetName(s.GetName()); |
---|
23 | unsigned uPos = 0; |
---|
24 | for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex) |
---|
25 | { |
---|
26 | const PWEdge &Edge = Path.GetEdge(uEdgeIndex); |
---|
27 | char cType = Edge.cType; |
---|
28 | if (bRight) |
---|
29 | { |
---|
30 | if (cType == 'I') |
---|
31 | cType = 'D'; |
---|
32 | else if (cType == 'D') |
---|
33 | cType = 'I'; |
---|
34 | } |
---|
35 | switch (cType) |
---|
36 | { |
---|
37 | case 'M': |
---|
38 | sOut.AppendChar(s[uPos++]); |
---|
39 | break; |
---|
40 | case 'D': |
---|
41 | sOut.AppendChar('-'); |
---|
42 | break; |
---|
43 | case 'I': |
---|
44 | sOut.AppendChar(s[uPos++]); |
---|
45 | break; |
---|
46 | default: |
---|
47 | Quit("PathSeq, invalid edge type %c", cType); |
---|
48 | } |
---|
49 | } |
---|
50 | } |
---|
51 | |
---|
52 | #if VALIDATE |
---|
53 | |
---|
54 | static void MakeRootSeq(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex, |
---|
55 | const ProgNode Nodes[], Seq &sRoot) |
---|
56 | { |
---|
57 | sRoot.Copy(s); |
---|
58 | unsigned uNodeIndex = uLeafNodeIndex; |
---|
59 | for (;;) |
---|
60 | { |
---|
61 | unsigned uParent = GuideTree.GetParent(uNodeIndex); |
---|
62 | if (NULL_NEIGHBOR == uParent) |
---|
63 | break; |
---|
64 | bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex); |
---|
65 | uNodeIndex = uParent; |
---|
66 | const PWPath &Path = Nodes[uNodeIndex].m_Path; |
---|
67 | Seq sTmp; |
---|
68 | PathSeq(sRoot, Path, bRight, sTmp); |
---|
69 | sTmp.SetId(0); |
---|
70 | sRoot.Copy(sTmp); |
---|
71 | } |
---|
72 | } |
---|
73 | |
---|
74 | #endif // VALIDATE |
---|
75 | |
---|
76 | static short *MakeRootSeqE(const Seq &s, const Tree &GuideTree, unsigned uLeafNodeIndex, |
---|
77 | const ProgNode Nodes[], Seq &sRoot, short *Estring1, short *Estring2) |
---|
78 | { |
---|
79 | short *EstringCurr = Estring1; |
---|
80 | short *EstringNext = Estring2; |
---|
81 | |
---|
82 | const unsigned uSeqLength = s.Length(); |
---|
83 | EstringCurr[0] = uSeqLength; |
---|
84 | EstringCurr[1] = 0; |
---|
85 | |
---|
86 | unsigned uNodeIndex = uLeafNodeIndex; |
---|
87 | for (;;) |
---|
88 | { |
---|
89 | unsigned uParent = GuideTree.GetParent(uNodeIndex); |
---|
90 | if (NULL_NEIGHBOR == uParent) |
---|
91 | break; |
---|
92 | bool bRight = (GuideTree.GetLeft(uParent) == uNodeIndex); |
---|
93 | uNodeIndex = uParent; |
---|
94 | const PWPath &Path = Nodes[uNodeIndex].m_Path; |
---|
95 | const short *EstringNode = bRight ? |
---|
96 | Nodes[uNodeIndex].m_EstringL : Nodes[uNodeIndex].m_EstringR; |
---|
97 | |
---|
98 | MulEstrings(EstringCurr, EstringNode, EstringNext); |
---|
99 | #if TRACE |
---|
100 | Log("\n"); |
---|
101 | Log("Curr="); |
---|
102 | LogEstring(EstringCurr); |
---|
103 | Log("\n"); |
---|
104 | Log("Node="); |
---|
105 | LogEstring(EstringNode); |
---|
106 | Log("\n"); |
---|
107 | Log("Prod="); |
---|
108 | LogEstring(EstringNext); |
---|
109 | Log("\n"); |
---|
110 | #endif |
---|
111 | short *EstringTmp = EstringNext; |
---|
112 | EstringNext = EstringCurr; |
---|
113 | EstringCurr = EstringTmp; |
---|
114 | } |
---|
115 | EstringOp(EstringCurr, s, sRoot); |
---|
116 | |
---|
117 | #if TRACE |
---|
118 | Log("Root estring="); |
---|
119 | LogEstring(EstringCurr); |
---|
120 | Log("\n"); |
---|
121 | Log("Root seq="); |
---|
122 | sRoot.LogMe(); |
---|
123 | #endif |
---|
124 | return EstringCurr; |
---|
125 | } |
---|
126 | |
---|
127 | static unsigned GetFirstNodeIndex(const Tree &tree) |
---|
128 | { |
---|
129 | if (g_bStable) |
---|
130 | return 0; |
---|
131 | return tree.FirstDepthFirstNode(); |
---|
132 | } |
---|
133 | |
---|
134 | static unsigned GetNextNodeIndex(const Tree &tree, unsigned uPrevNodeIndex) |
---|
135 | { |
---|
136 | if (g_bStable) |
---|
137 | { |
---|
138 | const unsigned uNodeCount = tree.GetNodeCount(); |
---|
139 | unsigned uNodeIndex = uPrevNodeIndex; |
---|
140 | for (;;) |
---|
141 | { |
---|
142 | ++uNodeIndex; |
---|
143 | if (uNodeIndex >= uNodeCount) |
---|
144 | return NULL_NEIGHBOR; |
---|
145 | if (tree.IsLeaf(uNodeIndex)) |
---|
146 | return uNodeIndex; |
---|
147 | } |
---|
148 | } |
---|
149 | unsigned uNodeIndex = uPrevNodeIndex; |
---|
150 | for (;;) |
---|
151 | { |
---|
152 | uNodeIndex = tree.NextDepthFirstNode(uNodeIndex); |
---|
153 | if (NULL_NEIGHBOR == uNodeIndex || tree.IsLeaf(uNodeIndex)) |
---|
154 | return uNodeIndex; |
---|
155 | } |
---|
156 | } |
---|
157 | |
---|
158 | void MakeRootMSA(const SeqVect &v, const Tree &GuideTree, ProgNode Nodes[], |
---|
159 | MSA &a) |
---|
160 | { |
---|
161 | #if TRACE |
---|
162 | Log("MakeRootMSA Tree="); |
---|
163 | GuideTree.LogMe(); |
---|
164 | #endif |
---|
165 | const unsigned uSeqCount = v.GetSeqCount(); |
---|
166 | unsigned uColCount = uInsane; |
---|
167 | unsigned uSeqIndex = 0; |
---|
168 | const unsigned uTreeNodeCount = GuideTree.GetNodeCount(); |
---|
169 | const unsigned uRootNodeIndex = GuideTree.GetRootNodeIndex(); |
---|
170 | const PWPath &RootPath = Nodes[uRootNodeIndex].m_Path; |
---|
171 | const unsigned uRootColCount = RootPath.GetEdgeCount(); |
---|
172 | const unsigned uEstringSize = uRootColCount + 1; |
---|
173 | short *Estring1 = new short[uEstringSize]; |
---|
174 | short *Estring2 = new short[uEstringSize]; |
---|
175 | SetProgressDesc("Root alignment"); |
---|
176 | |
---|
177 | unsigned uTreeNodeIndex = GetFirstNodeIndex(GuideTree); |
---|
178 | do |
---|
179 | { |
---|
180 | Progress(uSeqIndex, uSeqCount); |
---|
181 | |
---|
182 | unsigned uId = GuideTree.GetLeafId(uTreeNodeIndex); |
---|
183 | const Seq &s = *(v[uId]); |
---|
184 | |
---|
185 | Seq sRootE; |
---|
186 | short *es = MakeRootSeqE(s, GuideTree, uTreeNodeIndex, Nodes, sRootE, |
---|
187 | Estring1, Estring2); |
---|
188 | Nodes[uTreeNodeIndex].m_EstringL = EstringNewCopy(es); |
---|
189 | |
---|
190 | #if VALIDATE |
---|
191 | Seq sRoot; |
---|
192 | MakeRootSeq(s, GuideTree, uTreeNodeIndex, Nodes, sRoot); |
---|
193 | if (!sRoot.Eq(sRootE)) |
---|
194 | { |
---|
195 | Log("sRoot="); |
---|
196 | sRoot.LogMe(); |
---|
197 | Log("sRootE="); |
---|
198 | sRootE.LogMe(); |
---|
199 | Quit("Root seqs differ"); |
---|
200 | } |
---|
201 | #if TRACE |
---|
202 | Log("MakeRootSeq=\n"); |
---|
203 | sRoot.LogMe(); |
---|
204 | #endif |
---|
205 | #endif |
---|
206 | |
---|
207 | if (uInsane == uColCount) |
---|
208 | { |
---|
209 | uColCount = sRootE.Length(); |
---|
210 | a.SetSize(uSeqCount, uColCount); |
---|
211 | } |
---|
212 | else |
---|
213 | { |
---|
214 | assert(uColCount == sRootE.Length()); |
---|
215 | } |
---|
216 | a.SetSeqName(uSeqIndex, s.GetName()); |
---|
217 | a.SetSeqId(uSeqIndex, uId); |
---|
218 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
219 | a.SetChar(uSeqIndex, uColIndex, sRootE[uColIndex]); |
---|
220 | ++uSeqIndex; |
---|
221 | |
---|
222 | uTreeNodeIndex = GetNextNodeIndex(GuideTree, uTreeNodeIndex); |
---|
223 | } |
---|
224 | while (NULL_NEIGHBOR != uTreeNodeIndex); |
---|
225 | |
---|
226 | delete[] Estring1; |
---|
227 | delete[] Estring2; |
---|
228 | |
---|
229 | ProgressStepsDone(); |
---|
230 | assert(uSeqIndex == uSeqCount); |
---|
231 | } |
---|