1 | #include "muscle.h" |
---|
2 | #include <stdio.h> |
---|
3 | #include <errno.h> |
---|
4 | |
---|
5 | //const int BUFFER_BYTES = 16*1024; |
---|
6 | const int BUFFER_BYTES = 128; |
---|
7 | const int CR = '\r'; |
---|
8 | const int NL = '\n'; |
---|
9 | |
---|
10 | #define ADD(c) \ |
---|
11 | { \ |
---|
12 | if (Pos >= BufferLength) \ |
---|
13 | { \ |
---|
14 | const int NewBufferLength = BufferLength + BUFFER_BYTES; \ |
---|
15 | char *NewBuffer = new char[NewBufferLength]; \ |
---|
16 | memcpy(NewBuffer, Buffer, BufferLength); \ |
---|
17 | delete[] Buffer; \ |
---|
18 | Buffer = NewBuffer; \ |
---|
19 | BufferLength = NewBufferLength; \ |
---|
20 | } \ |
---|
21 | Buffer[Pos++] = c; \ |
---|
22 | } |
---|
23 | |
---|
24 | // Get next sequence from file. |
---|
25 | char *GetFastaSeq(FILE *f, unsigned *ptrSeqLength, char **ptrLabel, bool DeleteGaps) |
---|
26 | { |
---|
27 | unsigned BufferLength = 0; |
---|
28 | unsigned Pos = 0; |
---|
29 | char *Buffer = 0; |
---|
30 | |
---|
31 | int c = fgetc(f); |
---|
32 | if (EOF == c) |
---|
33 | return 0; |
---|
34 | if ('>' != c) |
---|
35 | Quit("Invalid file format, expected '>' to start FASTA label"); |
---|
36 | |
---|
37 | for (;;) |
---|
38 | { |
---|
39 | int c = fgetc(f); |
---|
40 | if (EOF == c) |
---|
41 | Quit("End-of-file or input error in FASTA label"); |
---|
42 | |
---|
43 | // NL or CR terminates label |
---|
44 | if (NL == c || CR == c) |
---|
45 | break; |
---|
46 | |
---|
47 | // All other characters added to label |
---|
48 | ADD(c) |
---|
49 | } |
---|
50 | |
---|
51 | // Nul-terminate label |
---|
52 | ADD(0) |
---|
53 | *ptrLabel = Buffer; |
---|
54 | |
---|
55 | BufferLength = 0; |
---|
56 | Pos = 0; |
---|
57 | Buffer = 0; |
---|
58 | int PreviousChar = NL; |
---|
59 | for (;;) |
---|
60 | { |
---|
61 | int c = fgetc(f); |
---|
62 | if (EOF == c) |
---|
63 | { |
---|
64 | if (feof(f)) |
---|
65 | break; |
---|
66 | else if (ferror(f)) |
---|
67 | Quit("Error reading FASTA file, ferror=TRUE feof=FALSE errno=%d %s", |
---|
68 | errno, strerror(errno)); |
---|
69 | else |
---|
70 | Quit("Error reading FASTA file, fgetc=EOF feof=FALSE ferror=FALSE errno=%d %s", |
---|
71 | errno, strerror(errno)); |
---|
72 | } |
---|
73 | |
---|
74 | if ('>' == c) |
---|
75 | { |
---|
76 | if (NL == PreviousChar || CR == PreviousChar) |
---|
77 | { |
---|
78 | ungetc(c, f); |
---|
79 | break; |
---|
80 | } |
---|
81 | else |
---|
82 | Quit("Unexpected '>' in FASTA sequence data"); |
---|
83 | } |
---|
84 | else if (isspace(c)) |
---|
85 | ; |
---|
86 | else if (IsGapChar(c)) |
---|
87 | { |
---|
88 | if (!DeleteGaps) |
---|
89 | ADD(c) |
---|
90 | } |
---|
91 | else if (isalpha(c)) |
---|
92 | { |
---|
93 | c = toupper(c); |
---|
94 | ADD(c) |
---|
95 | } |
---|
96 | else if (isprint(c)) |
---|
97 | { |
---|
98 | Warning("Invalid character '%c' in FASTA sequence data, ignored", c); |
---|
99 | continue; |
---|
100 | } |
---|
101 | else |
---|
102 | { |
---|
103 | Warning("Invalid byte hex %02x in FASTA sequence data, ignored", (unsigned char) c); |
---|
104 | continue; |
---|
105 | } |
---|
106 | PreviousChar = c; |
---|
107 | } |
---|
108 | |
---|
109 | if (0 == Pos) |
---|
110 | return GetFastaSeq(f, ptrSeqLength, ptrLabel, DeleteGaps); |
---|
111 | |
---|
112 | *ptrSeqLength = Pos; |
---|
113 | return Buffer; |
---|
114 | } |
---|