source: trunk/GDE/MUSCLE/src/henikoffweight.cpp

Last change on this file was 10390, checked in by aboeckma, 11 years ago

added muscle sourcles amd makefile

File size: 2.4 KB
Line 
1#include "muscle.h"
2#include "msa.h"
3
4/***
5Compute Henikoff weights.
6Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
7J. Mol. Biol., 243(4):574-578.
8
9Award each different residue an equal share of the weight, and then to divide up
10that weight equally among the sequences sharing the same residue. So if in a
11position of a multiple alignment, r different residues are represented, a residue
12represented in only one sequence contributes a score of 1/r to that sequence, whereas a
13residue represented in s sequences contributes a score of 1/rs to each of the s
14sequences. For each sequence, the contributions from each position are summed to give
15a sequence weight.
16
17See also HenikoffWeightPB.
18***/
19
20void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
21        {
22        const unsigned uSeqCount = GetSeqCount();
23
24// Compute letter counts in this column
25        unsigned uLetterCount[MAX_ALPHA];
26        memset(uLetterCount, 0, sizeof(uLetterCount));
27        unsigned uDifferentLetterCount = 0;
28        for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
29                {
30                unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
31                if (uLetter >= 20)
32                        continue;
33                unsigned uNewCount = uLetterCount[uLetter] + 1;
34                uLetterCount[uLetter] = uNewCount;
35                if (1 == uNewCount)
36                        ++uDifferentLetterCount;
37                }
38
39// Compute weight contributions
40        for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
41                {
42                unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
43                if (uLetter >= 20)
44                        continue;
45                const unsigned uCount = uLetterCount[uLetter];
46                unsigned uDenom = uCount*uDifferentLetterCount;
47                if (uDenom == 0)
48                        continue;
49                m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
50                }
51        }
52
53void MSA::SetHenikoffWeights() const
54        {
55        const unsigned uColCount = GetColCount();
56        const unsigned uSeqCount = GetSeqCount();
57
58        if (0 == uSeqCount)
59                return;
60        else if (1 == uSeqCount)
61                {
62                m_Weights[0] = (WEIGHT) 1.0;
63                return;
64                }
65        else if (2 == uSeqCount)
66                {
67                m_Weights[0] = (WEIGHT) 0.5;
68                m_Weights[1] = (WEIGHT) 0.5;
69                return;
70                }
71
72        for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
73                m_Weights[uSeqIndex] = 0.0;
74
75        for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
76                CalcHenikoffWeightsCol(uColIndex);
77
78// Set all-gap seqs weight to 0
79        for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
80                if (IsGapSeq(uSeqIndex))
81                        m_Weights[uSeqIndex] = 0.0;
82
83        Normalize(m_Weights, uSeqCount);
84        }
Note: See TracBrowser for help on using the repository browser.