1 | #include "muscle.h" |
---|
2 | #include <stdio.h> |
---|
3 | #include <ctype.h> |
---|
4 | #include "msa.h" |
---|
5 | #include "textfile.h" |
---|
6 | |
---|
7 | const unsigned uCharsPerLine = 60; |
---|
8 | const int MIN_NAME = 10; |
---|
9 | const int MAX_NAME = 32; |
---|
10 | |
---|
11 | static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex); |
---|
12 | |
---|
13 | void MSA::ToAlnFile(TextFile &File) const |
---|
14 | { |
---|
15 | if (g_bClwStrict) |
---|
16 | File.PutString("CLUSTAL W (1.81) multiple sequence alignment\n"); |
---|
17 | else |
---|
18 | { |
---|
19 | File.PutString("MUSCLE (" |
---|
20 | SHORT_VERSION ")" |
---|
21 | " multiple sequence alignment\n"); |
---|
22 | File.PutString("\n"); |
---|
23 | } |
---|
24 | |
---|
25 | int iLongestNameLength = 0; |
---|
26 | for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex) |
---|
27 | { |
---|
28 | const char *ptrName = GetSeqName(uSeqIndex); |
---|
29 | const char *ptrBlank = strchr(ptrName, ' '); |
---|
30 | int iLength; |
---|
31 | if (0 != ptrBlank) |
---|
32 | iLength = (int) (ptrBlank - ptrName); |
---|
33 | else |
---|
34 | iLength = (int) strlen(ptrName); |
---|
35 | if (iLength > iLongestNameLength) |
---|
36 | iLongestNameLength = iLength; |
---|
37 | } |
---|
38 | if (iLongestNameLength > MAX_NAME) |
---|
39 | iLongestNameLength = MAX_NAME; |
---|
40 | if (iLongestNameLength < MIN_NAME) |
---|
41 | iLongestNameLength = MIN_NAME; |
---|
42 | |
---|
43 | unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1; |
---|
44 | for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex) |
---|
45 | { |
---|
46 | File.PutString("\n"); |
---|
47 | unsigned uStartColIndex = uLineIndex*uCharsPerLine; |
---|
48 | unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1; |
---|
49 | if (uEndColIndex >= GetColCount()) |
---|
50 | uEndColIndex = GetColCount() - 1; |
---|
51 | char Name[MAX_NAME+1]; |
---|
52 | for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex) |
---|
53 | { |
---|
54 | const char *ptrName = GetSeqName(uSeqIndex); |
---|
55 | const char *ptrBlank = strchr(ptrName, ' '); |
---|
56 | int iLength; |
---|
57 | if (0 != ptrBlank) |
---|
58 | iLength = (int) (ptrBlank - ptrName); |
---|
59 | else |
---|
60 | iLength = (int) strlen(ptrName); |
---|
61 | if (iLength > MAX_NAME) |
---|
62 | iLength = MAX_NAME; |
---|
63 | memset(Name, ' ', MAX_NAME); |
---|
64 | memcpy(Name, ptrName, iLength); |
---|
65 | Name[iLongestNameLength] = 0; |
---|
66 | |
---|
67 | File.PutFormat("%s ", Name); |
---|
68 | for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex; |
---|
69 | ++uColIndex) |
---|
70 | { |
---|
71 | const char c = GetChar(uSeqIndex, uColIndex); |
---|
72 | File.PutFormat("%c", toupper(c)); |
---|
73 | } |
---|
74 | File.PutString("\n"); |
---|
75 | } |
---|
76 | |
---|
77 | memset(Name, ' ', MAX_NAME); |
---|
78 | Name[iLongestNameLength] = 0; |
---|
79 | File.PutFormat("%s ", Name); |
---|
80 | for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex; |
---|
81 | ++uColIndex) |
---|
82 | { |
---|
83 | const char c = GetAlnConsensusChar(*this, uColIndex); |
---|
84 | File.PutChar(c); |
---|
85 | } |
---|
86 | File.PutString("\n"); |
---|
87 | } |
---|
88 | } |
---|
89 | |
---|
90 | static char GetAlnConsensusChar(const MSA &a, unsigned uColIndex) |
---|
91 | { |
---|
92 | const unsigned uSeqCount = a.GetSeqCount(); |
---|
93 | unsigned BitMap = 0; |
---|
94 | unsigned Count = 0; |
---|
95 | for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex) |
---|
96 | { |
---|
97 | unsigned uLetter = a.GetLetterEx(uSeqIndex, uColIndex); |
---|
98 | assert(uLetter < 32); |
---|
99 | unsigned Bit = (1 << uLetter); |
---|
100 | if (!(BitMap & Bit)) |
---|
101 | ++Count; |
---|
102 | BitMap |= Bit; |
---|
103 | } |
---|
104 | |
---|
105 | // '*' indicates positions which have a single, fully conserved residue |
---|
106 | if (1 == Count) |
---|
107 | return '*'; |
---|
108 | |
---|
109 | if (ALPHA_Amino != g_Alpha) |
---|
110 | return ' '; |
---|
111 | |
---|
112 | #define B(a) (1 << AX_##a) |
---|
113 | #define S2(a, b) S(B(a) | B(b)) |
---|
114 | #define S3(a, b, c) S(B(a) | B(b) | B(c)) |
---|
115 | #define S4(a, b, c, d) S(B(a) | B(b) | B(c) | B(d)) |
---|
116 | #define S(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return ':'; |
---|
117 | |
---|
118 | #define W3(a, b, c) W(B(a) | B(b) | B(c)) |
---|
119 | #define W4(a, b, c, d) W(B(a) | B(b) | B(c) | B(d)) |
---|
120 | #define W5(a, b, c, d, e) W(B(a) | B(b) | B(c) | B(d) | B(e)) |
---|
121 | #define W6(a, b, c, d, e, f) W(B(a) | B(b) | B(c) | B(d) | B(e) | B(f)) |
---|
122 | #define W(w) if (0 == (BitMap & ~(w)) && (BitMap & (w)) != 0) return '.'; |
---|
123 | |
---|
124 | // ':' indicates that one of the following 'strong' |
---|
125 | // groups is fully conserved |
---|
126 | // STA |
---|
127 | // NEQK |
---|
128 | // NHQK |
---|
129 | // NDEQ |
---|
130 | // QHRK |
---|
131 | // MILV |
---|
132 | // MILF |
---|
133 | // HY |
---|
134 | // FYW |
---|
135 | // |
---|
136 | S3(S, T, A) |
---|
137 | S4(N, E, Q, K) |
---|
138 | S4(N, H, Q, K) |
---|
139 | S4(N, D, E, Q) |
---|
140 | S4(M, I, L, V) |
---|
141 | S4(M, I, L, F) |
---|
142 | S2(H, Y) |
---|
143 | S3(F, Y, W) |
---|
144 | |
---|
145 | // '.' indicates that one of the following 'weaker' |
---|
146 | // groups is fully conserved |
---|
147 | // CSA |
---|
148 | // ATV |
---|
149 | // SAG |
---|
150 | // STNK |
---|
151 | // STPA |
---|
152 | // SGND |
---|
153 | // SNDEQK |
---|
154 | // NDEQHK |
---|
155 | // NEQHRK |
---|
156 | // FVLIM |
---|
157 | // HFY |
---|
158 | W3(C, S, A) |
---|
159 | W3(A, T, V) |
---|
160 | W3(S, A, G) |
---|
161 | W4(S, T, N, K) |
---|
162 | W4(S, T, P, A) |
---|
163 | W4(S, G, N, D) |
---|
164 | W6(S, N, D, E, Q, K) |
---|
165 | W6(N, W, Q, H, R, K) |
---|
166 | W5(F, V, L, I, M) |
---|
167 | W3(H, F, Y) |
---|
168 | |
---|
169 | return ' '; |
---|
170 | } |
---|