| 1 | #include "input_format.h" |
|---|
| 2 | #include "reader.h" |
|---|
| 3 | #include "ali.h" |
|---|
| 4 | |
|---|
| 5 | #include <cerrno> |
|---|
| 6 | |
|---|
| 7 | static void phylip_print_line(const Seq& seq, int index, Writer& write) { |
|---|
| 8 | // Print phylip line. |
|---|
| 9 | ca_assert(seq.get_len()>0); |
|---|
| 10 | |
|---|
| 11 | int length; |
|---|
| 12 | if (index == 0) { |
|---|
| 13 | int bnum; |
|---|
| 14 | const char *name = seq.get_id(); |
|---|
| 15 | int nlen = str0len(name); |
|---|
| 16 | if (nlen > 10) { |
|---|
| 17 | // truncate id length of sequence ID is greater than 10 |
|---|
| 18 | for (int indi = 0; indi < 10; indi++) write.out(name[indi]); |
|---|
| 19 | bnum = 1; |
|---|
| 20 | } |
|---|
| 21 | else { |
|---|
| 22 | write.out(name); |
|---|
| 23 | bnum = 10 - nlen + 1; |
|---|
| 24 | } |
|---|
| 25 | // fill in blanks to make up 10 chars for ID. |
|---|
| 26 | for (int indi = 0; indi < bnum; indi++) write.out(' '); |
|---|
| 27 | length = SEQLINE - 10; |
|---|
| 28 | } |
|---|
| 29 | else if (index >= seq.get_len()) { |
|---|
| 30 | length = 0; |
|---|
| 31 | } |
|---|
| 32 | else { |
|---|
| 33 | length = SEQLINE; |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | const char *sequence = seq.get_seq(); |
|---|
| 37 | for (int indi = 0, indj = 0; indi < length; indi++) { |
|---|
| 38 | if ((index + indi) < seq.get_len()) { |
|---|
| 39 | char c = sequence[index + indi]; |
|---|
| 40 | |
|---|
| 41 | if (c == '.') |
|---|
| 42 | c = '?'; |
|---|
| 43 | write.out(c); |
|---|
| 44 | indj++; |
|---|
| 45 | if (indj == 10 && (index + indi) < (seq.get_len() - 1) && indi < (length - 1)) { |
|---|
| 46 | write.out(' '); |
|---|
| 47 | indj = 0; |
|---|
| 48 | } |
|---|
| 49 | } |
|---|
| 50 | else |
|---|
| 51 | break; |
|---|
| 52 | } |
|---|
| 53 | write.out('\n'); |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | static void inject_STDIN(FileWriter& write) { |
|---|
| 57 | while (1) { |
|---|
| 58 | int ch = fgetc(stdin); |
|---|
| 59 | if (ch == EOF) break; |
|---|
| 60 | write.out(ch); |
|---|
| 61 | } |
|---|
| 62 | } |
|---|
| 63 | |
|---|
| 64 | void to_phylip(const FormattedFile& in, const char *outf, bool for_fastdnaml) { |
|---|
| 65 | // Convert from some format to PHYLIP format. |
|---|
| 66 | // |
|---|
| 67 | // if 'for_fastdnaml' is true, then |
|---|
| 68 | // - read extra parameters from STDIN and merge them at end of first line. |
|---|
| 69 | // Info on STDIN normally is generated by arb_export_rates and |
|---|
| 70 | // is expected by arb_fastdnaml. |
|---|
| 71 | |
|---|
| 72 | if (!is_input_format(in.type())) { |
|---|
| 73 | throw_conversion_not_supported(in.type(), PHYLIP); |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | FileWriter write(outf); |
|---|
| 77 | |
|---|
| 78 | if (write.get_FILE() == stdout) { |
|---|
| 79 | ca_assert(0); // can't use stdout (because rewind is used below) |
|---|
| 80 | throw_error(140, "Cannot write to standard output"); |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | Alignment ali; |
|---|
| 84 | read_alignment(ali, in); |
|---|
| 85 | |
|---|
| 86 | int maxsize = ali.get_max_len(); |
|---|
| 87 | int total_seq = ali.get_count(); |
|---|
| 88 | int current = 0; |
|---|
| 89 | int headersize1 = write.outf("%8d %8d", maxsize, current); |
|---|
| 90 | |
|---|
| 91 | if (for_fastdnaml) inject_STDIN(write); |
|---|
| 92 | write.out('\n'); |
|---|
| 93 | |
|---|
| 94 | while (maxsize > current) { |
|---|
| 95 | for (int indi = 0; indi < total_seq; indi++) { |
|---|
| 96 | phylip_print_line(ali.get(indi), current, write); |
|---|
| 97 | } |
|---|
| 98 | if (current == 0) |
|---|
| 99 | current += (SEQLINE - 10); |
|---|
| 100 | else |
|---|
| 101 | current += SEQLINE; |
|---|
| 102 | if (maxsize > current) |
|---|
| 103 | write.out('\n'); |
|---|
| 104 | } |
|---|
| 105 | // rewrite output header |
|---|
| 106 | errno = 0; |
|---|
| 107 | rewind(write.get_FILE()); |
|---|
| 108 | ca_assert(errno == 0); |
|---|
| 109 | if (errno) { |
|---|
| 110 | perror("rewind error"); |
|---|
| 111 | throw_errorf(141, "Failed to rewind file (errno=%i)", errno); |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | int headersize2 = write.outf("%8d %8d", total_seq, maxsize); |
|---|
| 115 | |
|---|
| 116 | if (headersize1 != headersize2) { |
|---|
| 117 | ca_assert(0); |
|---|
| 118 | throw_errorf(142, "Failed to rewrite header (headersize differs: %i != %i)", headersize1, headersize2); |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | write.seq_done(ali.get_count()); |
|---|
| 122 | write.expect_written(); |
|---|
| 123 | } |
|---|
| 124 | |
|---|