1 | #include "input_format.h" |
---|
2 | #include "reader.h" |
---|
3 | #include "paup.h" |
---|
4 | #include "ali.h" |
---|
5 | |
---|
6 | static void paup_verify_name(char*& Str) { |
---|
7 | // Verify short_id in NEXUS format. |
---|
8 | if (strpbrk(Str, "*(){/,;_=:\\\'")) { |
---|
9 | char temp[TOKENSIZE]; |
---|
10 | temp[0] = '\''; |
---|
11 | |
---|
12 | int len = str0len(Str); |
---|
13 | int indi = 0; |
---|
14 | int index = 1; |
---|
15 | for (; indi < len; indi++, index++) { |
---|
16 | temp[index] = Str[indi]; |
---|
17 | if (Str[indi] == '\'') temp[++index] = '\''; |
---|
18 | } |
---|
19 | temp[index++] = '\''; |
---|
20 | temp[index] = '\0'; |
---|
21 | |
---|
22 | freedup(Str, temp); |
---|
23 | } |
---|
24 | } |
---|
25 | |
---|
26 | static void paup_print_line(const Seq& seq, int offset, int first_line, Writer& write) { |
---|
27 | // print paup file. |
---|
28 | int length = SEQLINE - 10; |
---|
29 | write.out(" "); |
---|
30 | |
---|
31 | int indi; |
---|
32 | |
---|
33 | const char *id = seq.get_id(); |
---|
34 | for (indi = 0; indi < 10 && id[indi]; indi++) // truncate id to 10 characters |
---|
35 | write.out(id[indi]); |
---|
36 | |
---|
37 | if (offset < seq.get_len()) { |
---|
38 | for (; indi < 11; indi++) write.out(' '); |
---|
39 | |
---|
40 | const char *sequence = seq.get_seq(); |
---|
41 | |
---|
42 | int indj = 0; |
---|
43 | for (indi = indj = 0; indi < length; indi++) { |
---|
44 | if ((offset + indi) < seq.get_len()) { |
---|
45 | write.out(sequence[offset + indi]); |
---|
46 | indj++; |
---|
47 | if (indj == 10 && indi < (length - 1) && (indi + offset) < (seq.get_len() - 1)) { |
---|
48 | write.out(' '); |
---|
49 | indj = 0; |
---|
50 | } |
---|
51 | } |
---|
52 | else |
---|
53 | break; |
---|
54 | } |
---|
55 | } |
---|
56 | |
---|
57 | if (first_line) |
---|
58 | write.outf(" [%d - %d]", offset + 1, (offset + indi)); |
---|
59 | |
---|
60 | write.out('\n'); |
---|
61 | } |
---|
62 | |
---|
63 | static void paup_print_headerstart(Writer& write) { |
---|
64 | write.out("#NEXUS\n"); |
---|
65 | write.outf("[! RDP - the Ribosomal Database Project, (%s).]\n", today_date()); |
---|
66 | write.out("[! To get started, send HELP to rdp@info.mcs.anl.gov ]\n"); |
---|
67 | write.out("BEGIN DATA;\n DIMENSIONS\n"); |
---|
68 | } |
---|
69 | |
---|
70 | static void paup_print_header_counters(Writer& write) { |
---|
71 | write.outf(" NTAX = %6s\n NCHAR = %6s\n ;\n", "", ""); |
---|
72 | } |
---|
73 | static void paup_print_header_counters(Writer& write, int total_seq, int maxsize) { |
---|
74 | write.outf(" NTAX = %6d\n NCHAR = %6d\n ;\n", total_seq, maxsize); |
---|
75 | } |
---|
76 | |
---|
77 | static void paup_print_header(const Paup& paup, Writer& write) { |
---|
78 | // Print out the header of each paup format. |
---|
79 | paup_print_headerstart(write); |
---|
80 | paup_print_header_counters(write); |
---|
81 | |
---|
82 | write.out(" FORMAT\n LABELPOS = LEFT\n"); |
---|
83 | write.outf(" MISSING = .\n EQUATE = \"%s\"\n", paup.equate); |
---|
84 | write.outf(" INTERLEAVE\n DATATYPE = RNA\n GAP = %c\n ;\n", paup.gap); |
---|
85 | write.out(" OPTIONS\n GAPMODE = MISSING\n ;\n MATRIX\n"); |
---|
86 | } |
---|
87 | |
---|
88 | void to_paup(const FormattedFile& in, const char *outf) { |
---|
89 | // Convert from some format to NEXUS format. |
---|
90 | if (!is_input_format(in.type())) { |
---|
91 | throw_conversion_not_supported(in.type(), NEXUS); |
---|
92 | } |
---|
93 | |
---|
94 | FileWriter write(outf); |
---|
95 | Paup paup; |
---|
96 | |
---|
97 | paup_print_header(paup, write); |
---|
98 | |
---|
99 | Alignment ali; |
---|
100 | read_alignment(ali, in); |
---|
101 | |
---|
102 | for (int i = 0; i<ali.get_count(); ++i) { |
---|
103 | SeqPtr seq = ali.getSeqPtr(i); |
---|
104 | char *name = ARB_strdup(seq->get_id()); |
---|
105 | paup_verify_name(name); |
---|
106 | seq->replace_id(name); |
---|
107 | ca_assert(seq->get_id()); |
---|
108 | free(name); |
---|
109 | } |
---|
110 | |
---|
111 | int maxsize = ali.get_max_len(); |
---|
112 | int total_seq = ali.get_count(); |
---|
113 | int current = 0; |
---|
114 | |
---|
115 | while (maxsize > current) { |
---|
116 | int first_line = 0; |
---|
117 | for (int indi = 0; indi < total_seq; indi++) { |
---|
118 | if (current < ali.get_len(indi)) |
---|
119 | first_line++; |
---|
120 | paup_print_line(ali.get(indi), current, (first_line == 1), write); |
---|
121 | |
---|
122 | // Avoid repeating |
---|
123 | if (first_line == 1) |
---|
124 | first_line++; |
---|
125 | } |
---|
126 | current += (SEQLINE - 10); |
---|
127 | if (maxsize > current) write.out('\n'); |
---|
128 | } |
---|
129 | |
---|
130 | write.out(" ;\nENDBLOCK;\n"); |
---|
131 | |
---|
132 | // rewrite output header |
---|
133 | rewind(write.get_FILE()); |
---|
134 | paup_print_headerstart(write); |
---|
135 | paup_print_header_counters(write, total_seq, maxsize); |
---|
136 | |
---|
137 | write.seq_done(ali.get_count()); |
---|
138 | write.expect_written(); |
---|
139 | } |
---|