1 | #include "muscle.h" |
---|
2 | #include <stdio.h> |
---|
3 | #include <ctype.h> |
---|
4 | #include "msa.h" |
---|
5 | #include "textfile.h" |
---|
6 | |
---|
7 | const int MAX_NAME = 63; |
---|
8 | |
---|
9 | const unsigned uCharsPerLine = 50; |
---|
10 | const unsigned uCharsPerBlock = 10; |
---|
11 | |
---|
12 | // Truncate at first white space or MAX_NAME, whichever comes |
---|
13 | // first, then pad with blanks up to PadLength. |
---|
14 | static const char *GetPaddedName(const char *Name, int PadLength) |
---|
15 | { |
---|
16 | static char PaddedName[MAX_NAME+1]; |
---|
17 | memset(PaddedName, ' ', MAX_NAME); |
---|
18 | size_t n = strcspn(Name, " \t"); |
---|
19 | memcpy(PaddedName, Name, n); |
---|
20 | PaddedName[PadLength] = 0; |
---|
21 | return PaddedName; |
---|
22 | } |
---|
23 | |
---|
24 | static const char *strfind(const char *s, const char *t) |
---|
25 | { |
---|
26 | size_t n = strcspn(s, t); |
---|
27 | if (0 == n) |
---|
28 | return 0; |
---|
29 | return s + n; |
---|
30 | } |
---|
31 | |
---|
32 | // GCG checksum code kindly provided by Eric Martel. |
---|
33 | unsigned MSA::GetGCGCheckSum(unsigned uSeqIndex) const |
---|
34 | { |
---|
35 | unsigned CheckSum = 0; |
---|
36 | const unsigned uColCount = GetColCount(); |
---|
37 | for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex) |
---|
38 | { |
---|
39 | unsigned c = (unsigned) GetChar(uSeqIndex, uColIndex); |
---|
40 | CheckSum += c*(uColIndex%57 + 1); |
---|
41 | CheckSum %= 10000; |
---|
42 | } |
---|
43 | return CheckSum; |
---|
44 | } |
---|
45 | |
---|
46 | static void MSFFixGaps(MSA &a) |
---|
47 | { |
---|
48 | const int SeqCount = a.GetSeqCount(); |
---|
49 | const int ColCount = a.GetColCount(); |
---|
50 | for (int SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex) |
---|
51 | { |
---|
52 | for (int ColIndex = 0; ColIndex < ColCount; ++ColIndex) |
---|
53 | if (a.IsGap(SeqIndex, ColIndex)) |
---|
54 | a.SetChar(SeqIndex, ColIndex, '.'); |
---|
55 | } |
---|
56 | } |
---|
57 | |
---|
58 | void MSA::ToMSFFile(TextFile &File, const char *ptrComment) const |
---|
59 | { |
---|
60 | // Cast away const, yuck |
---|
61 | SetMSAWeightsMuscle((MSA &) *this); |
---|
62 | MSFFixGaps((MSA &) *this); |
---|
63 | |
---|
64 | File.PutString("PileUp\n"); |
---|
65 | |
---|
66 | if (0 != ptrComment) |
---|
67 | File.PutFormat("Comment: %s\n", ptrComment); |
---|
68 | else |
---|
69 | File.PutString("\n"); |
---|
70 | |
---|
71 | char seqtype = (g_Alpha == ALPHA_DNA || g_Alpha == ALPHA_RNA) ? 'N' : 'A'; |
---|
72 | File.PutFormat(" MSF: %u Type: %c Check: 0000 ..\n\n", |
---|
73 | GetColCount(), seqtype); |
---|
74 | |
---|
75 | int iLongestNameLength = 0; |
---|
76 | for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex) |
---|
77 | { |
---|
78 | const char *Name = GetSeqName(uSeqIndex); |
---|
79 | const char *PaddedName = GetPaddedName(Name, MAX_NAME); |
---|
80 | int iLength = (int) strcspn(PaddedName, " \t"); |
---|
81 | if (iLength > iLongestNameLength) |
---|
82 | iLongestNameLength = iLength; |
---|
83 | } |
---|
84 | |
---|
85 | for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex) |
---|
86 | { |
---|
87 | const char *Name = GetSeqName(uSeqIndex); |
---|
88 | const char *PaddedName = GetPaddedName(Name, iLongestNameLength); |
---|
89 | File.PutFormat(" Name: %s", PaddedName); |
---|
90 | File.PutFormat(" Len: %u Check: %5u Weight: %g\n", |
---|
91 | GetColCount(), GetGCGCheckSum(uSeqIndex), GetSeqWeight(uSeqIndex)); |
---|
92 | } |
---|
93 | File.PutString("\n//\n"); |
---|
94 | if (0 == GetColCount()) |
---|
95 | return; |
---|
96 | |
---|
97 | unsigned uLineCount = (GetColCount() - 1)/uCharsPerLine + 1; |
---|
98 | for (unsigned uLineIndex = 0; uLineIndex < uLineCount; ++uLineIndex) |
---|
99 | { |
---|
100 | File.PutString("\n"); |
---|
101 | unsigned uStartColIndex = uLineIndex*uCharsPerLine; |
---|
102 | unsigned uEndColIndex = uStartColIndex + uCharsPerLine - 1; |
---|
103 | if (uEndColIndex >= GetColCount()) |
---|
104 | uEndColIndex = GetColCount() - 1; |
---|
105 | for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex) |
---|
106 | { |
---|
107 | const char *Name = GetSeqName(uSeqIndex); |
---|
108 | const char *PaddedName = GetPaddedName(Name, iLongestNameLength); |
---|
109 | File.PutFormat("%s ", PaddedName); |
---|
110 | for (unsigned uColIndex = uStartColIndex; uColIndex <= uEndColIndex; |
---|
111 | ++uColIndex) |
---|
112 | { |
---|
113 | if (0 == uColIndex%uCharsPerBlock) |
---|
114 | File.PutString(" "); |
---|
115 | char c = GetChar(uSeqIndex, uColIndex); |
---|
116 | File.PutFormat("%c", c); |
---|
117 | } |
---|
118 | File.PutString("\n"); |
---|
119 | } |
---|
120 | } |
---|
121 | } |
---|