1 | #include "input_format.h" |
---|
2 | #include "reader.h" |
---|
3 | #include "ali.h" |
---|
4 | |
---|
5 | #include <cerrno> |
---|
6 | |
---|
7 | static void phylip_print_line(const Seq& seq, int index, Writer& write) { |
---|
8 | // Print phylip line. |
---|
9 | ca_assert(seq.get_len()>0); |
---|
10 | |
---|
11 | int length; |
---|
12 | if (index == 0) { |
---|
13 | int bnum; |
---|
14 | const char *name = seq.get_id(); |
---|
15 | int nlen = str0len(name); |
---|
16 | if (nlen > 10) { |
---|
17 | // truncate id length of sequence ID is greater than 10 |
---|
18 | for (int indi = 0; indi < 10; indi++) write.out(name[indi]); |
---|
19 | bnum = 1; |
---|
20 | } |
---|
21 | else { |
---|
22 | write.out(name); |
---|
23 | bnum = 10 - nlen + 1; |
---|
24 | } |
---|
25 | // fill in blanks to make up 10 chars for ID. |
---|
26 | for (int indi = 0; indi < bnum; indi++) write.out(' '); |
---|
27 | length = SEQLINE - 10; |
---|
28 | } |
---|
29 | else if (index >= seq.get_len()) { |
---|
30 | length = 0; |
---|
31 | } |
---|
32 | else { |
---|
33 | length = SEQLINE; |
---|
34 | } |
---|
35 | |
---|
36 | const char *sequence = seq.get_seq(); |
---|
37 | for (int indi = 0, indj = 0; indi < length; indi++) { |
---|
38 | if ((index + indi) < seq.get_len()) { |
---|
39 | char c = sequence[index + indi]; |
---|
40 | |
---|
41 | if (c == '.') |
---|
42 | c = '?'; |
---|
43 | write.out(c); |
---|
44 | indj++; |
---|
45 | if (indj == 10 && (index + indi) < (seq.get_len() - 1) && indi < (length - 1)) { |
---|
46 | write.out(' '); |
---|
47 | indj = 0; |
---|
48 | } |
---|
49 | } |
---|
50 | else |
---|
51 | break; |
---|
52 | } |
---|
53 | write.out('\n'); |
---|
54 | } |
---|
55 | |
---|
56 | static void inject_STDIN(FileWriter& write) { |
---|
57 | while (1) { |
---|
58 | int ch = fgetc(stdin); |
---|
59 | if (ch == EOF) break; |
---|
60 | write.out(ch); |
---|
61 | } |
---|
62 | } |
---|
63 | |
---|
64 | void to_phylip(const FormattedFile& in, const char *outf, bool for_fastdnaml) { |
---|
65 | // Convert from some format to PHYLIP format. |
---|
66 | // |
---|
67 | // if 'for_fastdnaml' is true, then |
---|
68 | // - read extra parameters from STDIN and merge them at end of first line. |
---|
69 | // Info on STDIN normally is generated by arb_export_rates and |
---|
70 | // is expected by arb_fastdnaml. |
---|
71 | |
---|
72 | if (!is_input_format(in.type())) { |
---|
73 | throw_conversion_not_supported(in.type(), PHYLIP); |
---|
74 | } |
---|
75 | |
---|
76 | FileWriter write(outf); |
---|
77 | |
---|
78 | if (write.get_FILE() == stdout) { |
---|
79 | ca_assert(0); // can't use stdout (because rewind is used below) |
---|
80 | throw_error(140, "Cannot write to standard output"); |
---|
81 | } |
---|
82 | |
---|
83 | Alignment ali; |
---|
84 | read_alignment(ali, in); |
---|
85 | |
---|
86 | int maxsize = ali.get_max_len(); |
---|
87 | int total_seq = ali.get_count(); |
---|
88 | int current = 0; |
---|
89 | int headersize1 = write.outf("%8d %8d", maxsize, current); |
---|
90 | |
---|
91 | if (for_fastdnaml) inject_STDIN(write); |
---|
92 | write.out('\n'); |
---|
93 | |
---|
94 | while (maxsize > current) { |
---|
95 | for (int indi = 0; indi < total_seq; indi++) { |
---|
96 | phylip_print_line(ali.get(indi), current, write); |
---|
97 | } |
---|
98 | if (current == 0) |
---|
99 | current += (SEQLINE - 10); |
---|
100 | else |
---|
101 | current += SEQLINE; |
---|
102 | if (maxsize > current) |
---|
103 | write.out('\n'); |
---|
104 | } |
---|
105 | // rewrite output header |
---|
106 | errno = 0; |
---|
107 | rewind(write.get_FILE()); |
---|
108 | ca_assert(errno == 0); |
---|
109 | if (errno) { |
---|
110 | perror("rewind error"); |
---|
111 | throw_errorf(141, "Failed to rewind file (errno=%i)", errno); |
---|
112 | } |
---|
113 | |
---|
114 | int headersize2 = write.outf("%8d %8d", total_seq, maxsize); |
---|
115 | |
---|
116 | if (headersize1 != headersize2) { |
---|
117 | ca_assert(0); |
---|
118 | throw_errorf(142, "Failed to rewrite header (headersize differs: %i != %i)", headersize1, headersize2); |
---|
119 | } |
---|
120 | |
---|
121 | write.seq_done(ali.get_count()); |
---|
122 | write.expect_written(); |
---|
123 | } |
---|
124 | |
---|