1 | #include "muscle.h" |
---|
2 | #include "clust.h" |
---|
3 | #include "clustset.h" |
---|
4 | #include <stdio.h> |
---|
5 | |
---|
6 | #define TRACE 0 |
---|
7 | |
---|
8 | Clust::Clust() |
---|
9 | { |
---|
10 | m_Nodes = 0; |
---|
11 | m_uNodeCount = 0; |
---|
12 | m_uLeafCount = 0; |
---|
13 | m_uClusterCount = 0; |
---|
14 | m_JoinStyle = JOIN_Undefined; |
---|
15 | m_dDist = 0; |
---|
16 | m_uLeafCount = 0; |
---|
17 | m_ptrSet = 0; |
---|
18 | } |
---|
19 | |
---|
20 | Clust::~Clust() |
---|
21 | { |
---|
22 | delete[] m_Nodes; |
---|
23 | delete[] m_dDist; |
---|
24 | delete[] m_ClusterIndexToNodeIndex; |
---|
25 | } |
---|
26 | |
---|
27 | void Clust::Create(ClustSet &Set, CLUSTER Method) |
---|
28 | { |
---|
29 | m_ptrSet = &Set; |
---|
30 | |
---|
31 | SetLeafCount(Set.GetLeafCount()); |
---|
32 | |
---|
33 | switch (Method) |
---|
34 | { |
---|
35 | case CLUSTER_UPGMA: |
---|
36 | m_JoinStyle = JOIN_NearestNeighbor; |
---|
37 | m_CentroidStyle = LINKAGE_Avg; |
---|
38 | break; |
---|
39 | |
---|
40 | case CLUSTER_UPGMAMax: |
---|
41 | m_JoinStyle = JOIN_NearestNeighbor; |
---|
42 | m_CentroidStyle = LINKAGE_Max; |
---|
43 | break; |
---|
44 | |
---|
45 | case CLUSTER_UPGMAMin: |
---|
46 | m_JoinStyle = JOIN_NearestNeighbor; |
---|
47 | m_CentroidStyle = LINKAGE_Min; |
---|
48 | break; |
---|
49 | |
---|
50 | case CLUSTER_UPGMB: |
---|
51 | m_JoinStyle = JOIN_NearestNeighbor; |
---|
52 | m_CentroidStyle = LINKAGE_Biased; |
---|
53 | break; |
---|
54 | |
---|
55 | case CLUSTER_NeighborJoining: |
---|
56 | m_JoinStyle = JOIN_NeighborJoining; |
---|
57 | m_CentroidStyle = LINKAGE_NeighborJoining; |
---|
58 | break; |
---|
59 | |
---|
60 | default: |
---|
61 | Quit("Clust::Create, invalid method %d", Method); |
---|
62 | } |
---|
63 | |
---|
64 | if (m_uLeafCount <= 1) |
---|
65 | Quit("Clust::Create: no leaves"); |
---|
66 | |
---|
67 | m_uNodeCount = 2*m_uLeafCount - 1; |
---|
68 | m_Nodes = new ClustNode[m_uNodeCount]; |
---|
69 | m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount]; |
---|
70 | |
---|
71 | m_ptrClusterList = 0; |
---|
72 | for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex) |
---|
73 | { |
---|
74 | ClustNode &Node = m_Nodes[uNodeIndex]; |
---|
75 | Node.m_uIndex = uNodeIndex; |
---|
76 | if (uNodeIndex < m_uLeafCount) |
---|
77 | { |
---|
78 | Node.m_uSize = 1; |
---|
79 | Node.m_uLeafIndexes = new unsigned[1]; |
---|
80 | Node.m_uLeafIndexes[0] = uNodeIndex; |
---|
81 | AddToClusterList(uNodeIndex); |
---|
82 | } |
---|
83 | else |
---|
84 | Node.m_uSize = 0; |
---|
85 | } |
---|
86 | |
---|
87 | // Compute initial distance matrix between leaves |
---|
88 | SetProgressDesc("Build dist matrix"); |
---|
89 | unsigned uPairIndex = 0; |
---|
90 | const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2; |
---|
91 | for (unsigned i = 0; i < m_uLeafCount; ++i) |
---|
92 | for (unsigned j = 0; j < i; ++j) |
---|
93 | { |
---|
94 | const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j); |
---|
95 | SetDist(i, j, dDist); |
---|
96 | if (0 == uPairIndex%10000) |
---|
97 | Progress(uPairIndex, uPairCount); |
---|
98 | ++uPairIndex; |
---|
99 | } |
---|
100 | ProgressStepsDone(); |
---|
101 | |
---|
102 | // Call CreateCluster once for each internal node in the tree |
---|
103 | SetProgressDesc("Build guide tree"); |
---|
104 | m_uClusterCount = m_uLeafCount; |
---|
105 | const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount; |
---|
106 | for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex) |
---|
107 | { |
---|
108 | unsigned i = uNodeIndex + 1 - m_uLeafCount; |
---|
109 | Progress(i, uInternalNodeCount); |
---|
110 | CreateCluster(); |
---|
111 | } |
---|
112 | ProgressStepsDone(); |
---|
113 | } |
---|
114 | |
---|
115 | void Clust::CreateCluster() |
---|
116 | { |
---|
117 | unsigned uLeftNodeIndex; |
---|
118 | unsigned uRightNodeIndex; |
---|
119 | float dLeftLength; |
---|
120 | float dRightLength; |
---|
121 | ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength); |
---|
122 | |
---|
123 | const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1; |
---|
124 | |
---|
125 | JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength, |
---|
126 | uNewNodeIndex); |
---|
127 | |
---|
128 | #if TRACE |
---|
129 | Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n", |
---|
130 | uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength); |
---|
131 | #endif |
---|
132 | |
---|
133 | // Compute distances to other clusters |
---|
134 | --m_uClusterCount; |
---|
135 | for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane; |
---|
136 | uNodeIndex = GetNextCluster(uNodeIndex)) |
---|
137 | { |
---|
138 | if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex) |
---|
139 | continue; |
---|
140 | |
---|
141 | if (uNewNodeIndex == uNodeIndex) |
---|
142 | continue; |
---|
143 | |
---|
144 | const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex); |
---|
145 | SetDist(uNewNodeIndex, uNodeIndex, dDist); |
---|
146 | } |
---|
147 | |
---|
148 | for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane; |
---|
149 | uNodeIndex = GetNextCluster(uNodeIndex)) |
---|
150 | { |
---|
151 | if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex) |
---|
152 | continue; |
---|
153 | |
---|
154 | if (uNewNodeIndex == uNodeIndex) |
---|
155 | continue; |
---|
156 | |
---|
157 | #if REDLACK |
---|
158 | const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex); |
---|
159 | InsertMetric(uNewNodeIndex, uNodeIndex, dMetric); |
---|
160 | #endif |
---|
161 | } |
---|
162 | } |
---|
163 | |
---|
164 | void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex, |
---|
165 | float *ptrdLeftLength, float *ptrdRightLength) |
---|
166 | { |
---|
167 | switch (m_JoinStyle) |
---|
168 | { |
---|
169 | case JOIN_NearestNeighbor: |
---|
170 | ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength, |
---|
171 | ptrdRightLength); |
---|
172 | return; |
---|
173 | case JOIN_NeighborJoining: |
---|
174 | ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength, |
---|
175 | ptrdRightLength); |
---|
176 | return; |
---|
177 | } |
---|
178 | Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle); |
---|
179 | } |
---|
180 | |
---|
181 | void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex, |
---|
182 | unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength) |
---|
183 | { |
---|
184 | const unsigned uClusterCount = GetClusterCount(); |
---|
185 | |
---|
186 | unsigned uMinLeftNodeIndex; |
---|
187 | unsigned uMinRightNodeIndex; |
---|
188 | GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex); |
---|
189 | |
---|
190 | float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex); |
---|
191 | |
---|
192 | const float dLeftHeight = GetHeight(uMinLeftNodeIndex); |
---|
193 | const float dRightHeight = GetHeight(uMinRightNodeIndex); |
---|
194 | |
---|
195 | *ptruLeftIndex = uMinLeftNodeIndex; |
---|
196 | *ptruRightIndex = uMinRightNodeIndex; |
---|
197 | *ptrdLeftLength = dMinDist/2 - dLeftHeight; |
---|
198 | *ptrdRightLength = dMinDist/2 - dRightHeight; |
---|
199 | } |
---|
200 | |
---|
201 | void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex, |
---|
202 | unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength) |
---|
203 | { |
---|
204 | const unsigned uClusterCount = GetClusterCount(); |
---|
205 | |
---|
206 | //unsigned uMinLeftNodeIndex = uInsane; |
---|
207 | //unsigned uMinRightNodeIndex = uInsane; |
---|
208 | //float dMinD = PLUS_INFINITY; |
---|
209 | //for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i)) |
---|
210 | // { |
---|
211 | // const float ri = Calc_r(i); |
---|
212 | // for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j)) |
---|
213 | // { |
---|
214 | // const float rj = Calc_r(j); |
---|
215 | // const float dij = GetDist(i, j); |
---|
216 | // const float Dij = dij - (ri + rj); |
---|
217 | // if (Dij < dMinD) |
---|
218 | // { |
---|
219 | // dMinD = Dij; |
---|
220 | // uMinLeftNodeIndex = i; |
---|
221 | // uMinRightNodeIndex = j; |
---|
222 | // } |
---|
223 | // } |
---|
224 | // } |
---|
225 | |
---|
226 | unsigned uMinLeftNodeIndex; |
---|
227 | unsigned uMinRightNodeIndex; |
---|
228 | GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex); |
---|
229 | |
---|
230 | const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex); |
---|
231 | const float rL = Calc_r(uMinLeftNodeIndex); |
---|
232 | const float rR = Calc_r(uMinRightNodeIndex); |
---|
233 | |
---|
234 | const float dLeftLength = (dDistLR + rL - rR)/2; |
---|
235 | const float dRightLength = (dDistLR - rL + rR)/2; |
---|
236 | |
---|
237 | *ptruLeftIndex = uMinLeftNodeIndex; |
---|
238 | *ptruRightIndex = uMinRightNodeIndex; |
---|
239 | *ptrdLeftLength = dLeftLength; |
---|
240 | *ptrdRightLength = dRightLength; |
---|
241 | } |
---|
242 | |
---|
243 | void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength, |
---|
244 | float dRightLength, unsigned uNodeIndex) |
---|
245 | { |
---|
246 | ClustNode &Parent = m_Nodes[uNodeIndex]; |
---|
247 | ClustNode &Left = m_Nodes[uLeftIndex]; |
---|
248 | ClustNode &Right = m_Nodes[uRightIndex]; |
---|
249 | |
---|
250 | Left.m_dLength = dLeftLength; |
---|
251 | Right.m_dLength = dRightLength; |
---|
252 | |
---|
253 | Parent.m_ptrLeft = &Left; |
---|
254 | Parent.m_ptrRight = &Right; |
---|
255 | |
---|
256 | Left.m_ptrParent = &Parent; |
---|
257 | Right.m_ptrParent = &Parent; |
---|
258 | |
---|
259 | const unsigned uLeftSize = Left.m_uSize; |
---|
260 | const unsigned uRightSize = Right.m_uSize; |
---|
261 | const unsigned uParentSize = uLeftSize + uRightSize; |
---|
262 | Parent.m_uSize = uParentSize; |
---|
263 | |
---|
264 | assert(0 == Parent.m_uLeafIndexes); |
---|
265 | Parent.m_uLeafIndexes = new unsigned[uParentSize]; |
---|
266 | |
---|
267 | const unsigned uLeftBytes = uLeftSize*sizeof(unsigned); |
---|
268 | const unsigned uRightBytes = uRightSize*sizeof(unsigned); |
---|
269 | memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes); |
---|
270 | memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes); |
---|
271 | |
---|
272 | DeleteFromClusterList(uLeftIndex); |
---|
273 | DeleteFromClusterList(uRightIndex); |
---|
274 | AddToClusterList(uNodeIndex); |
---|
275 | } |
---|
276 | |
---|
277 | float Clust::Calc_r(unsigned uNodeIndex) const |
---|
278 | { |
---|
279 | const unsigned uClusterCount = GetClusterCount(); |
---|
280 | if (2 == uClusterCount) |
---|
281 | return 0; |
---|
282 | |
---|
283 | float dSum = 0; |
---|
284 | for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i)) |
---|
285 | { |
---|
286 | if (i == uNodeIndex) |
---|
287 | continue; |
---|
288 | dSum += GetDist(uNodeIndex, i); |
---|
289 | } |
---|
290 | return dSum/(uClusterCount - 2); |
---|
291 | } |
---|
292 | |
---|
293 | float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex) |
---|
294 | { |
---|
295 | switch (m_CentroidStyle) |
---|
296 | { |
---|
297 | case LINKAGE_Avg: |
---|
298 | return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex); |
---|
299 | |
---|
300 | case LINKAGE_Min: |
---|
301 | return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex); |
---|
302 | |
---|
303 | case LINKAGE_Max: |
---|
304 | return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex); |
---|
305 | |
---|
306 | case LINKAGE_Biased: |
---|
307 | return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex); |
---|
308 | |
---|
309 | case LINKAGE_NeighborJoining: |
---|
310 | return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex); |
---|
311 | } |
---|
312 | Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle); |
---|
313 | return (float) g_dNAN; |
---|
314 | } |
---|
315 | |
---|
316 | float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex) |
---|
317 | { |
---|
318 | const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex); |
---|
319 | const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex); |
---|
320 | const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex); |
---|
321 | const float dDistR = GetDist(uRightNodeIndex, uNodeIndex); |
---|
322 | return (dDistL < dDistR ? dDistL : dDistR); |
---|
323 | } |
---|
324 | |
---|
325 | float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex) |
---|
326 | { |
---|
327 | const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex); |
---|
328 | const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex); |
---|
329 | const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex); |
---|
330 | const float dDistR = GetDist(uRightNodeIndex, uNodeIndex); |
---|
331 | return (dDistL > dDistR ? dDistL : dDistR); |
---|
332 | } |
---|
333 | |
---|
334 | float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex) |
---|
335 | { |
---|
336 | const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex); |
---|
337 | const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex); |
---|
338 | const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex); |
---|
339 | const float dDistR = GetDist(uRightNodeIndex, uNodeIndex); |
---|
340 | return (dDistL + dDistR)/2; |
---|
341 | } |
---|
342 | |
---|
343 | float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex) |
---|
344 | { |
---|
345 | const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex); |
---|
346 | const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex); |
---|
347 | const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex); |
---|
348 | const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex); |
---|
349 | const float dDistR = GetDist(uRightNodeIndex, uNodeIndex); |
---|
350 | const float dDist = (dDistL + dDistR - dDistLR)/2; |
---|
351 | return dDist; |
---|
352 | } |
---|
353 | |
---|
354 | // This is a mysterious variant of UPGMA reverse-engineered from MAFFT source. |
---|
355 | float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex) |
---|
356 | { |
---|
357 | const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex); |
---|
358 | const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex); |
---|
359 | |
---|
360 | const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex); |
---|
361 | const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex); |
---|
362 | const float dDistR = GetDist(uRightNodeIndex, uNodeIndex); |
---|
363 | const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR); |
---|
364 | const float dSumDistLR = dDistL + dDistR; |
---|
365 | const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2; |
---|
366 | return dDist; |
---|
367 | } |
---|
368 | |
---|
369 | unsigned Clust::GetClusterCount() const |
---|
370 | { |
---|
371 | return m_uClusterCount; |
---|
372 | } |
---|
373 | |
---|
374 | void Clust::LogMe() const |
---|
375 | { |
---|
376 | Log("Clust %u leaves, %u nodes, %u clusters.\n", |
---|
377 | m_uLeafCount, m_uNodeCount, m_uClusterCount); |
---|
378 | |
---|
379 | Log("Distance matrix\n"); |
---|
380 | const unsigned uNodeCount = GetNodeCount(); |
---|
381 | Log(" "); |
---|
382 | for (unsigned i = 0; i < uNodeCount - 1; ++i) |
---|
383 | Log(" %7u", i); |
---|
384 | Log("\n"); |
---|
385 | |
---|
386 | Log(" "); |
---|
387 | for (unsigned i = 0; i < uNodeCount - 1; ++i) |
---|
388 | Log(" ------"); |
---|
389 | Log("\n"); |
---|
390 | |
---|
391 | for (unsigned i = 0; i < uNodeCount - 1; ++i) |
---|
392 | { |
---|
393 | Log("%4u: ", i); |
---|
394 | for (unsigned j = 0; j < i; ++j) |
---|
395 | Log(" %7.2g", GetDist(i, j)); |
---|
396 | Log("\n"); |
---|
397 | } |
---|
398 | |
---|
399 | Log("\n"); |
---|
400 | Log("Node Size Prnt Left Rght Length Name\n"); |
---|
401 | Log("---- ---- ---- ---- ---- ------ ----\n"); |
---|
402 | for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex) |
---|
403 | { |
---|
404 | const ClustNode &Node = m_Nodes[uNodeIndex]; |
---|
405 | Log("%4u %4u", uNodeIndex, Node.m_uSize); |
---|
406 | if (0 != Node.m_ptrParent) |
---|
407 | Log(" %4u", Node.m_ptrParent->m_uIndex); |
---|
408 | else |
---|
409 | Log(" "); |
---|
410 | |
---|
411 | if (0 != Node.m_ptrLeft) |
---|
412 | Log(" %4u", Node.m_ptrLeft->m_uIndex); |
---|
413 | else |
---|
414 | Log(" "); |
---|
415 | |
---|
416 | if (0 != Node.m_ptrRight) |
---|
417 | Log(" %4u", Node.m_ptrRight->m_uIndex); |
---|
418 | else |
---|
419 | Log(" "); |
---|
420 | |
---|
421 | if (uNodeIndex != m_uNodeCount - 1) |
---|
422 | Log(" %7.3g", Node.m_dLength); |
---|
423 | if (IsLeaf(uNodeIndex)) |
---|
424 | { |
---|
425 | const char *ptrName = GetNodeName(uNodeIndex); |
---|
426 | if (0 != ptrName) |
---|
427 | Log(" %s", ptrName); |
---|
428 | } |
---|
429 | if (GetRootNodeIndex() == uNodeIndex) |
---|
430 | Log(" [ROOT]"); |
---|
431 | Log("\n"); |
---|
432 | } |
---|
433 | } |
---|
434 | |
---|
435 | const ClustNode &Clust::GetNode(unsigned uNodeIndex) const |
---|
436 | { |
---|
437 | if (uNodeIndex >= m_uNodeCount) |
---|
438 | Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount); |
---|
439 | return m_Nodes[uNodeIndex]; |
---|
440 | } |
---|
441 | |
---|
442 | bool Clust::IsLeaf(unsigned uNodeIndex) const |
---|
443 | { |
---|
444 | return uNodeIndex < m_uLeafCount; |
---|
445 | } |
---|
446 | |
---|
447 | unsigned Clust::GetClusterSize(unsigned uNodeIndex) const |
---|
448 | { |
---|
449 | const ClustNode &Node = GetNode(uNodeIndex); |
---|
450 | return Node.m_uSize; |
---|
451 | } |
---|
452 | |
---|
453 | unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const |
---|
454 | { |
---|
455 | const ClustNode &Node = GetNode(uNodeIndex); |
---|
456 | if (0 == Node.m_ptrLeft) |
---|
457 | Quit("Clust::GetLeftIndex: leaf"); |
---|
458 | return Node.m_ptrLeft->m_uIndex; |
---|
459 | } |
---|
460 | |
---|
461 | unsigned Clust::GetRightIndex(unsigned uNodeIndex) const |
---|
462 | { |
---|
463 | const ClustNode &Node = GetNode(uNodeIndex); |
---|
464 | if (0 == Node.m_ptrRight) |
---|
465 | Quit("Clust::GetRightIndex: leaf"); |
---|
466 | return Node.m_ptrRight->m_uIndex; |
---|
467 | } |
---|
468 | |
---|
469 | float Clust::GetLength(unsigned uNodeIndex) const |
---|
470 | { |
---|
471 | const ClustNode &Node = GetNode(uNodeIndex); |
---|
472 | return Node.m_dLength; |
---|
473 | } |
---|
474 | |
---|
475 | void Clust::SetLeafCount(unsigned uLeafCount) |
---|
476 | { |
---|
477 | if (uLeafCount <= 1) |
---|
478 | Quit("Clust::SetLeafCount(%u)", uLeafCount); |
---|
479 | |
---|
480 | m_uLeafCount = uLeafCount; |
---|
481 | const unsigned uNodeCount = GetNodeCount(); |
---|
482 | |
---|
483 | // Triangular matrix size excluding diagonal (all zeros in our case). |
---|
484 | m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2; |
---|
485 | m_dDist = new float[m_uTriangularMatrixSize]; |
---|
486 | } |
---|
487 | |
---|
488 | unsigned Clust::GetLeafCount() const |
---|
489 | { |
---|
490 | return m_uLeafCount; |
---|
491 | } |
---|
492 | |
---|
493 | unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const |
---|
494 | { |
---|
495 | const unsigned uNodeCount = GetNodeCount(); |
---|
496 | if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount) |
---|
497 | Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount); |
---|
498 | unsigned v; |
---|
499 | if (uIndex1 >= uIndex2) |
---|
500 | v = uIndex2 + (uIndex1*(uIndex1 - 1))/2; |
---|
501 | else |
---|
502 | v = uIndex1 + (uIndex2*(uIndex2 - 1))/2; |
---|
503 | assert(v < m_uTriangularMatrixSize); |
---|
504 | return v; |
---|
505 | } |
---|
506 | |
---|
507 | float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const |
---|
508 | { |
---|
509 | unsigned v = VectorIndex(uIndex1, uIndex2); |
---|
510 | return m_dDist[v]; |
---|
511 | } |
---|
512 | |
---|
513 | void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist) |
---|
514 | { |
---|
515 | unsigned v = VectorIndex(uIndex1, uIndex2); |
---|
516 | m_dDist[v] = dDist; |
---|
517 | } |
---|
518 | |
---|
519 | float Clust::GetHeight(unsigned uNodeIndex) const |
---|
520 | { |
---|
521 | if (IsLeaf(uNodeIndex)) |
---|
522 | return 0; |
---|
523 | |
---|
524 | const unsigned uLeftIndex = GetLeftIndex(uNodeIndex); |
---|
525 | const unsigned uRightIndex = GetRightIndex(uNodeIndex); |
---|
526 | const float dLeftLength = GetLength(uLeftIndex); |
---|
527 | const float dRightLength = GetLength(uRightIndex); |
---|
528 | const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex); |
---|
529 | const float dRightHeight = dRightLength + GetHeight(uRightIndex); |
---|
530 | return (dLeftHeight + dRightHeight)/2; |
---|
531 | } |
---|
532 | |
---|
533 | const char *Clust::GetNodeName(unsigned uNodeIndex) const |
---|
534 | { |
---|
535 | if (!IsLeaf(uNodeIndex)) |
---|
536 | Quit("Clust::GetNodeName, is not leaf"); |
---|
537 | return m_ptrSet->GetLeafName(uNodeIndex); |
---|
538 | } |
---|
539 | |
---|
540 | unsigned Clust::GetNodeId(unsigned uNodeIndex) const |
---|
541 | { |
---|
542 | if (uNodeIndex >= GetLeafCount()) |
---|
543 | return 0; |
---|
544 | return m_ptrSet->GetLeafId(uNodeIndex); |
---|
545 | } |
---|
546 | |
---|
547 | unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const |
---|
548 | { |
---|
549 | const ClustNode &Node = GetNode(uNodeIndex); |
---|
550 | const unsigned uLeafCount = Node.m_uSize; |
---|
551 | if (uLeafIndex >= uLeafCount) |
---|
552 | Quit("Clust::GetLeaf, invalid index"); |
---|
553 | const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex]; |
---|
554 | if (uIndex >= m_uNodeCount) |
---|
555 | Quit("Clust::GetLeaf, index out of range"); |
---|
556 | return uIndex; |
---|
557 | } |
---|
558 | |
---|
559 | unsigned Clust::GetFirstCluster() const |
---|
560 | { |
---|
561 | if (0 == m_ptrClusterList) |
---|
562 | return uInsane; |
---|
563 | return m_ptrClusterList->m_uIndex; |
---|
564 | } |
---|
565 | |
---|
566 | unsigned Clust::GetNextCluster(unsigned uIndex) const |
---|
567 | { |
---|
568 | ClustNode *ptrNode = &m_Nodes[uIndex]; |
---|
569 | if (0 == ptrNode->m_ptrNextCluster) |
---|
570 | return uInsane; |
---|
571 | return ptrNode->m_ptrNextCluster->m_uIndex; |
---|
572 | } |
---|
573 | |
---|
574 | void Clust::DeleteFromClusterList(unsigned uNodeIndex) |
---|
575 | { |
---|
576 | assert(uNodeIndex < m_uNodeCount); |
---|
577 | ClustNode *ptrNode = &m_Nodes[uNodeIndex]; |
---|
578 | ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster; |
---|
579 | ClustNode *ptrNext = ptrNode->m_ptrNextCluster; |
---|
580 | |
---|
581 | if (0 != ptrNext) |
---|
582 | ptrNext->m_ptrPrevCluster = ptrPrev; |
---|
583 | if (0 == ptrPrev) |
---|
584 | { |
---|
585 | assert(m_ptrClusterList == ptrNode); |
---|
586 | m_ptrClusterList = ptrNext; |
---|
587 | } |
---|
588 | else |
---|
589 | ptrPrev->m_ptrNextCluster = ptrNext; |
---|
590 | |
---|
591 | ptrNode->m_ptrNextCluster = 0; |
---|
592 | ptrNode->m_ptrPrevCluster = 0; |
---|
593 | } |
---|
594 | |
---|
595 | void Clust::AddToClusterList(unsigned uNodeIndex) |
---|
596 | { |
---|
597 | assert(uNodeIndex < m_uNodeCount); |
---|
598 | ClustNode *ptrNode = &m_Nodes[uNodeIndex]; |
---|
599 | |
---|
600 | if (0 != m_ptrClusterList) |
---|
601 | m_ptrClusterList->m_ptrPrevCluster = ptrNode; |
---|
602 | |
---|
603 | ptrNode->m_ptrNextCluster = m_ptrClusterList; |
---|
604 | ptrNode->m_ptrPrevCluster = 0; |
---|
605 | |
---|
606 | m_ptrClusterList = ptrNode; |
---|
607 | } |
---|
608 | |
---|
609 | float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const |
---|
610 | { |
---|
611 | switch (m_JoinStyle) |
---|
612 | { |
---|
613 | case JOIN_NearestNeighbor: |
---|
614 | return ComputeMetricNearestNeighbor(uIndex1, uIndex2); |
---|
615 | |
---|
616 | case JOIN_NeighborJoining: |
---|
617 | return ComputeMetricNeighborJoining(uIndex1, uIndex2); |
---|
618 | } |
---|
619 | Quit("Clust::ComputeMetric"); |
---|
620 | return 0; |
---|
621 | } |
---|
622 | |
---|
623 | float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const |
---|
624 | { |
---|
625 | float ri = Calc_r(i); |
---|
626 | float rj = Calc_r(j); |
---|
627 | float dij = GetDist(i, j); |
---|
628 | float dMetric = dij - (ri + rj); |
---|
629 | return (float) dMetric; |
---|
630 | } |
---|
631 | |
---|
632 | float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const |
---|
633 | { |
---|
634 | return (float) GetDist(i, j); |
---|
635 | } |
---|
636 | |
---|
637 | float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const |
---|
638 | { |
---|
639 | unsigned uMinLeftNodeIndex = uInsane; |
---|
640 | unsigned uMinRightNodeIndex = uInsane; |
---|
641 | float dMinMetric = PLUS_INFINITY; |
---|
642 | for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane; |
---|
643 | uLeftNodeIndex = GetNextCluster(uLeftNodeIndex)) |
---|
644 | { |
---|
645 | for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex); |
---|
646 | uRightNodeIndex != uInsane; |
---|
647 | uRightNodeIndex = GetNextCluster(uRightNodeIndex)) |
---|
648 | { |
---|
649 | float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex); |
---|
650 | if (dMetric < dMinMetric) |
---|
651 | { |
---|
652 | dMinMetric = dMetric; |
---|
653 | uMinLeftNodeIndex = uLeftNodeIndex; |
---|
654 | uMinRightNodeIndex = uRightNodeIndex; |
---|
655 | } |
---|
656 | } |
---|
657 | } |
---|
658 | *ptruIndex1 = uMinLeftNodeIndex; |
---|
659 | *ptruIndex2 = uMinRightNodeIndex; |
---|
660 | return dMinMetric; |
---|
661 | } |
---|
662 | |
---|
663 | float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const |
---|
664 | { |
---|
665 | return GetMinMetricBruteForce(ptruIndex1, ptruIndex2); |
---|
666 | } |
---|