| 1 | #include "muscle.h" |
|---|
| 2 | #include "textfile.h" |
|---|
| 3 | |
|---|
| 4 | #define TRACE 0 |
|---|
| 5 | |
|---|
| 6 | const int MAX_LINE = 4096; |
|---|
| 7 | const int MAX_HEADINGS = 32; |
|---|
| 8 | static char Heading[MAX_HEADINGS]; |
|---|
| 9 | static unsigned HeadingCount = 0; |
|---|
| 10 | static float Mx[32][32]; |
|---|
| 11 | |
|---|
| 12 | static void LogMx() |
|---|
| 13 | { |
|---|
| 14 | Log("Matrix\n"); |
|---|
| 15 | Log(" "); |
|---|
| 16 | for (int i = 0; i < 20; ++i) |
|---|
| 17 | Log(" %c", LetterToChar(i)); |
|---|
| 18 | Log("\n"); |
|---|
| 19 | |
|---|
| 20 | for (int i = 0; i < 20; ++i) |
|---|
| 21 | { |
|---|
| 22 | Log("%c ", LetterToChar(i)); |
|---|
| 23 | for (int j = 0; j < 20; ++j) |
|---|
| 24 | Log("%5.1f", Mx[i][j]); |
|---|
| 25 | Log("\n"); |
|---|
| 26 | } |
|---|
| 27 | Log("\n"); |
|---|
| 28 | } |
|---|
| 29 | |
|---|
| 30 | static unsigned MxCharToLetter(char c) |
|---|
| 31 | { |
|---|
| 32 | for (unsigned Letter = 0; Letter < HeadingCount; ++Letter) |
|---|
| 33 | if (Heading[Letter] == c) |
|---|
| 34 | return Letter; |
|---|
| 35 | Quit("Letter '%c' has no heading", c); |
|---|
| 36 | return 0; |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | PTR_SCOREMATRIX ReadMx(TextFile &File) |
|---|
| 40 | { |
|---|
| 41 | // Find column headers |
|---|
| 42 | char Line[MAX_LINE]; |
|---|
| 43 | for (;;) |
|---|
| 44 | { |
|---|
| 45 | bool EndOfFile = File.GetLine(Line, sizeof(Line)); |
|---|
| 46 | if (EndOfFile) |
|---|
| 47 | Quit("Premature EOF in matrix file"); |
|---|
| 48 | |
|---|
| 49 | if (Line[0] == '#') |
|---|
| 50 | continue; |
|---|
| 51 | else if (Line[0] == ' ') |
|---|
| 52 | break; |
|---|
| 53 | else |
|---|
| 54 | Quit("Invalid line in matrix file: '%s'", Line); |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | // Read column headers |
|---|
| 58 | HeadingCount = 0; |
|---|
| 59 | for (char *p = Line; *p; ++p) |
|---|
| 60 | { |
|---|
| 61 | char c = *p; |
|---|
| 62 | if (!isspace(c)) |
|---|
| 63 | Heading[HeadingCount++] = c; |
|---|
| 64 | } |
|---|
| 65 | |
|---|
| 66 | if (HeadingCount > 0 && Heading[HeadingCount-1] == '*') |
|---|
| 67 | --HeadingCount; |
|---|
| 68 | |
|---|
| 69 | if (HeadingCount < 20) |
|---|
| 70 | Quit("Error in matrix file: < 20 headers, line='%s'", Line); |
|---|
| 71 | |
|---|
| 72 | #if TRACE |
|---|
| 73 | { |
|---|
| 74 | Log("ReadMx\n"); |
|---|
| 75 | Log("%d headings: ", HeadingCount); |
|---|
| 76 | for (unsigned i = 0; i < HeadingCount; ++i) |
|---|
| 77 | Log("%c", Heading[i]); |
|---|
| 78 | Log("\n"); |
|---|
| 79 | } |
|---|
| 80 | #endif |
|---|
| 81 | |
|---|
| 82 | // Zero out matrix |
|---|
| 83 | for (int i = 0; i < MAX_ALPHA; ++i) |
|---|
| 84 | for (int j = 0; j < MAX_ALPHA; ++j) |
|---|
| 85 | Mx[i][j] = 0.0; |
|---|
| 86 | |
|---|
| 87 | // Read data lines |
|---|
| 88 | for (unsigned RowIndex = 0; RowIndex < HeadingCount; ++RowIndex) |
|---|
| 89 | { |
|---|
| 90 | bool EndOfFile = File.GetTrimLine(Line, sizeof(Line)); |
|---|
| 91 | if (EndOfFile) |
|---|
| 92 | Quit("Premature EOF in matrix file"); |
|---|
| 93 | #if TRACE |
|---|
| 94 | Log("Line=%s\n", Line); |
|---|
| 95 | #endif |
|---|
| 96 | if (Line[0] == '#') |
|---|
| 97 | continue; |
|---|
| 98 | |
|---|
| 99 | char c = Line[0]; |
|---|
| 100 | #if TRACE |
|---|
| 101 | Log("Row char=%c\n", c); |
|---|
| 102 | #endif |
|---|
| 103 | if (!IsResidueChar(c)) |
|---|
| 104 | continue; |
|---|
| 105 | unsigned RowLetter = CharToLetter(c); |
|---|
| 106 | if (RowLetter >= 20) |
|---|
| 107 | continue; |
|---|
| 108 | #if TRACE |
|---|
| 109 | Log("Row letter = %u\n", RowLetter); |
|---|
| 110 | #endif |
|---|
| 111 | |
|---|
| 112 | char *p = Line + 1; |
|---|
| 113 | char *maxp = p + strlen(Line); |
|---|
| 114 | for (unsigned Col = 0; Col < HeadingCount - 1; ++Col) |
|---|
| 115 | { |
|---|
| 116 | if (p >= maxp) |
|---|
| 117 | Quit("Too few fields in line of matrix file: '%s'", Line); |
|---|
| 118 | while (isspace(*p)) |
|---|
| 119 | ++p; |
|---|
| 120 | char *Value = p; |
|---|
| 121 | while (!isspace(*p)) |
|---|
| 122 | ++p; |
|---|
| 123 | float v = (float) atof(Value); |
|---|
| 124 | char HeaderChar = Heading[Col]; |
|---|
| 125 | if (IsResidueChar(HeaderChar)) |
|---|
| 126 | { |
|---|
| 127 | unsigned ColLetter = CharToLetter(HeaderChar); |
|---|
| 128 | if (ColLetter >= 20) |
|---|
| 129 | continue; |
|---|
| 130 | Mx[RowLetter][ColLetter] = v; |
|---|
| 131 | } |
|---|
| 132 | p += 1; |
|---|
| 133 | } |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | // Sanity check for symmetry |
|---|
| 137 | for (int i = 0; i < 20; ++i) |
|---|
| 138 | for (int j = 0; j < i; ++j) |
|---|
| 139 | { |
|---|
| 140 | if (Mx[i][j] != Mx[j][i]) |
|---|
| 141 | { |
|---|
| 142 | Warning("Matrix is not symmetrical, %c->%c=%g, %c->%c=%g", |
|---|
| 143 | CharToLetter(i), |
|---|
| 144 | CharToLetter(j), |
|---|
| 145 | Mx[i][j], |
|---|
| 146 | CharToLetter(j), |
|---|
| 147 | CharToLetter(i), |
|---|
| 148 | Mx[j][i]); |
|---|
| 149 | goto ExitLoop; |
|---|
| 150 | } |
|---|
| 151 | } |
|---|
| 152 | ExitLoop:; |
|---|
| 153 | |
|---|
| 154 | if (g_bVerbose) |
|---|
| 155 | LogMx(); |
|---|
| 156 | |
|---|
| 157 | return &Mx; |
|---|
| 158 | } |
|---|