| 1 | #include "input_format.h" |
|---|
| 2 | #include "reader.h" |
|---|
| 3 | #include "paup.h" |
|---|
| 4 | #include "ali.h" |
|---|
| 5 | |
|---|
| 6 | static void paup_verify_name(char*& Str) { |
|---|
| 7 | // Verify short_id in NEXUS format. |
|---|
| 8 | if (strpbrk(Str, "*(){/,;_=:\\\'")) { |
|---|
| 9 | char temp[TOKENSIZE]; |
|---|
| 10 | temp[0] = '\''; |
|---|
| 11 | |
|---|
| 12 | int len = str0len(Str); |
|---|
| 13 | int indi = 0; |
|---|
| 14 | int index = 1; |
|---|
| 15 | for (; indi < len; indi++, index++) { |
|---|
| 16 | temp[index] = Str[indi]; |
|---|
| 17 | if (Str[indi] == '\'') temp[++index] = '\''; |
|---|
| 18 | } |
|---|
| 19 | temp[index++] = '\''; |
|---|
| 20 | temp[index] = '\0'; |
|---|
| 21 | |
|---|
| 22 | freedup(Str, temp); |
|---|
| 23 | } |
|---|
| 24 | } |
|---|
| 25 | |
|---|
| 26 | static void paup_print_line(const Seq& seq, int offset, int first_line, Writer& write) { |
|---|
| 27 | // print paup file. |
|---|
| 28 | int length = SEQLINE - 10; |
|---|
| 29 | write.out(" "); |
|---|
| 30 | |
|---|
| 31 | int indi; |
|---|
| 32 | |
|---|
| 33 | const char *id = seq.get_id(); |
|---|
| 34 | for (indi = 0; indi < 10 && id[indi]; indi++) // truncate id to 10 characters |
|---|
| 35 | write.out(id[indi]); |
|---|
| 36 | |
|---|
| 37 | if (offset < seq.get_len()) { |
|---|
| 38 | for (; indi < 11; indi++) write.out(' '); |
|---|
| 39 | |
|---|
| 40 | const char *sequence = seq.get_seq(); |
|---|
| 41 | |
|---|
| 42 | int indj = 0; |
|---|
| 43 | for (indi = indj = 0; indi < length; indi++) { |
|---|
| 44 | if ((offset + indi) < seq.get_len()) { |
|---|
| 45 | write.out(sequence[offset + indi]); |
|---|
| 46 | indj++; |
|---|
| 47 | if (indj == 10 && indi < (length - 1) && (indi + offset) < (seq.get_len() - 1)) { |
|---|
| 48 | write.out(' '); |
|---|
| 49 | indj = 0; |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | else |
|---|
| 53 | break; |
|---|
| 54 | } |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | if (first_line) |
|---|
| 58 | write.outf(" [%d - %d]", offset + 1, (offset + indi)); |
|---|
| 59 | |
|---|
| 60 | write.out('\n'); |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | static void paup_print_headerstart(Writer& write) { |
|---|
| 64 | write.out("#NEXUS\n"); |
|---|
| 65 | write.outf("[! RDP - the Ribosomal Database Project, (%s).]\n", today_date()); |
|---|
| 66 | write.out("[! To get started, send HELP to rdp@info.mcs.anl.gov ]\n"); |
|---|
| 67 | write.out("BEGIN DATA;\n DIMENSIONS\n"); |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | static void paup_print_header_counters(Writer& write) { |
|---|
| 71 | write.outf(" NTAX = %6s\n NCHAR = %6s\n ;\n", "", ""); |
|---|
| 72 | } |
|---|
| 73 | static void paup_print_header_counters(Writer& write, int total_seq, int maxsize) { |
|---|
| 74 | write.outf(" NTAX = %6d\n NCHAR = %6d\n ;\n", total_seq, maxsize); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | static void paup_print_header(const Paup& paup, Writer& write) { |
|---|
| 78 | // Print out the header of each paup format. |
|---|
| 79 | paup_print_headerstart(write); |
|---|
| 80 | paup_print_header_counters(write); |
|---|
| 81 | |
|---|
| 82 | write.out(" FORMAT\n LABELPOS = LEFT\n"); |
|---|
| 83 | write.outf(" MISSING = .\n EQUATE = \"%s\"\n", paup.equate); |
|---|
| 84 | write.outf(" INTERLEAVE\n DATATYPE = RNA\n GAP = %c\n ;\n", paup.gap); |
|---|
| 85 | write.out(" OPTIONS\n GAPMODE = MISSING\n ;\n MATRIX\n"); |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | void to_paup(const FormattedFile& in, const char *outf) { |
|---|
| 89 | // Convert from some format to NEXUS format. |
|---|
| 90 | if (!is_input_format(in.type())) { |
|---|
| 91 | throw_conversion_not_supported(in.type(), NEXUS); |
|---|
| 92 | } |
|---|
| 93 | |
|---|
| 94 | FileWriter write(outf); |
|---|
| 95 | Paup paup; |
|---|
| 96 | |
|---|
| 97 | paup_print_header(paup, write); |
|---|
| 98 | |
|---|
| 99 | Alignment ali; |
|---|
| 100 | read_alignment(ali, in); |
|---|
| 101 | |
|---|
| 102 | for (int i = 0; i<ali.get_count(); ++i) { |
|---|
| 103 | SeqPtr seq = ali.getSeqPtr(i); |
|---|
| 104 | char *name = ARB_strdup(seq->get_id()); |
|---|
| 105 | paup_verify_name(name); |
|---|
| 106 | seq->replace_id(name); |
|---|
| 107 | ca_assert(seq->get_id()); |
|---|
| 108 | free(name); |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | int maxsize = ali.get_max_len(); |
|---|
| 112 | int total_seq = ali.get_count(); |
|---|
| 113 | int current = 0; |
|---|
| 114 | |
|---|
| 115 | while (maxsize > current) { |
|---|
| 116 | int first_line = 0; |
|---|
| 117 | for (int indi = 0; indi < total_seq; indi++) { |
|---|
| 118 | if (current < ali.get_len(indi)) |
|---|
| 119 | first_line++; |
|---|
| 120 | paup_print_line(ali.get(indi), current, (first_line == 1), write); |
|---|
| 121 | |
|---|
| 122 | // Avoid repeating |
|---|
| 123 | if (first_line == 1) |
|---|
| 124 | first_line++; |
|---|
| 125 | } |
|---|
| 126 | current += (SEQLINE - 10); |
|---|
| 127 | if (maxsize > current) write.out('\n'); |
|---|
| 128 | } |
|---|
| 129 | |
|---|
| 130 | write.out(" ;\nENDBLOCK;\n"); |
|---|
| 131 | |
|---|
| 132 | // rewrite output header |
|---|
| 133 | rewind(write.get_FILE()); |
|---|
| 134 | paup_print_headerstart(write); |
|---|
| 135 | paup_print_header_counters(write, total_seq, maxsize); |
|---|
| 136 | |
|---|
| 137 | write.seq_done(ali.get_count()); |
|---|
| 138 | write.expect_written(); |
|---|
| 139 | } |
|---|