1 | // ------------- File format converting subroutine ------------- |
---|
2 | |
---|
3 | #include "defs.h" |
---|
4 | #include "fun.h" |
---|
5 | #include "global.h" |
---|
6 | #include <static_assert.h> |
---|
7 | #include <unistd.h> |
---|
8 | #include <arb_diff.h> |
---|
9 | |
---|
10 | static const char *format2name(Format type) { |
---|
11 | switch (type) { |
---|
12 | case EMBL: return "EMBL"; |
---|
13 | case GCG: return "GCG"; |
---|
14 | case GENBANK: return "GENBANK"; |
---|
15 | case MACKE: return "MACKE"; |
---|
16 | case NEXUS: return "NEXUS"; |
---|
17 | case PHYLIP: return "PHYLIP"; |
---|
18 | case FASTDNAML: return "FASTDNAML"; |
---|
19 | case PRINTABLE: return "PRINTABLE"; |
---|
20 | case SWISSPROT: return "SWISSPROT"; |
---|
21 | |
---|
22 | case UNKNOWN: ca_assert(0); |
---|
23 | } |
---|
24 | return NULL; |
---|
25 | } |
---|
26 | |
---|
27 | void throw_conversion_not_supported(Format inType, Format ouType) { // __ATTR__NORETURN |
---|
28 | throw_errorf(90, "Conversion from %s to %s is not supported", |
---|
29 | format2name(inType), format2name(ouType)); |
---|
30 | } |
---|
31 | void throw_conversion_failure(Format inType, Format ouType) { // __ATTR__NORETURN |
---|
32 | throw_errorf(91, "Conversion from %s to %s fails", |
---|
33 | format2name(inType), format2name(ouType)); |
---|
34 | } |
---|
35 | void throw_conversion_not_implemented(Format inType, Format ouType) { // __ATTR__NORETURN |
---|
36 | throw_errorf(92, "Conversion from %s to %s is not implemented (but is expected to be here)", |
---|
37 | format2name(inType), format2name(ouType)); |
---|
38 | } |
---|
39 | void throw_unsupported_input_format(Format inType) { // __ATTR__NORETURN |
---|
40 | throw_errorf(93, "Unsupported input format %s", format2name(inType)); |
---|
41 | } |
---|
42 | |
---|
43 | void throw_incomplete_entry() { // __ATTR__NORETURN |
---|
44 | throw_error(84, "Reached EOF before complete entry has been read"); |
---|
45 | } |
---|
46 | |
---|
47 | static int log_processed_counter = 0; |
---|
48 | static int log_seq_counter = 0; |
---|
49 | |
---|
50 | void log_processed(int seqCount) { |
---|
51 | #if defined(CALOG) |
---|
52 | fprintf(stderr, "Total %d sequences have been processed\n", seqCount); |
---|
53 | #endif // CALOG |
---|
54 | |
---|
55 | log_processed_counter++; |
---|
56 | log_seq_counter += seqCount; |
---|
57 | } |
---|
58 | |
---|
59 | // -------------------------------------------------------------------------------- |
---|
60 | |
---|
61 | #ifdef UNIT_TESTS |
---|
62 | #include <arbdbt.h> // before test_unit.h! |
---|
63 | #include <arb_file.h> |
---|
64 | #include <test_unit.h> |
---|
65 | |
---|
66 | |
---|
67 | #define TEST_THROW // comment out to temp. disable intentional throws |
---|
68 | |
---|
69 | struct FormatSpec { |
---|
70 | Format type; // GENBANK, MACKE, ... |
---|
71 | const char *name; |
---|
72 | const char *testfile; // existing testfile (or NULL) |
---|
73 | int sequence_count; // number of sequences in 'testfile' |
---|
74 | }; |
---|
75 | |
---|
76 | #define FORMATSPEC_OUT_ONLY(tag) { tag, #tag, NULL, 1 } |
---|
77 | #define FORMATSPEC_GOT______(tag,file) { tag, #tag, "impexp/" file ".eft.exported", 1 } |
---|
78 | #define FORMATSPEC_GOT_PLAIN(tag,file,seqcount) { tag, #tag, "impexp/" file, seqcount } |
---|
79 | |
---|
80 | static FormatSpec format_spec[] = { |
---|
81 | // input formats |
---|
82 | // FORMATSPEC_GOT______(GENBANK, "genbank"), |
---|
83 | FORMATSPEC_GOT_PLAIN(GENBANK, "genbank.input", 3), |
---|
84 | FORMATSPEC_GOT_PLAIN(EMBL, "embl.input", 5), |
---|
85 | FORMATSPEC_GOT_PLAIN(MACKE, "macke.input", 5), |
---|
86 | FORMATSPEC_GOT_PLAIN(SWISSPROT, "swissprot.input", 1), // SWISSPROT |
---|
87 | |
---|
88 | // output formats |
---|
89 | FORMATSPEC_OUT_ONLY(GCG), |
---|
90 | FORMATSPEC_OUT_ONLY(NEXUS), |
---|
91 | FORMATSPEC_OUT_ONLY(PHYLIP), |
---|
92 | FORMATSPEC_OUT_ONLY(PRINTABLE), |
---|
93 | }; |
---|
94 | static const int fcount = ARRAY_ELEMS(format_spec); |
---|
95 | |
---|
96 | enum FormatNum { // same order as above |
---|
97 | NUM_GENBANK, |
---|
98 | NUM_EMBL, |
---|
99 | NUM_MACKE, |
---|
100 | NUM_SWISSPROT, |
---|
101 | |
---|
102 | NUM_GCG, |
---|
103 | NUM_NEXUS, |
---|
104 | NUM_PHYLIP, |
---|
105 | |
---|
106 | NUM_PRINTABLE, |
---|
107 | |
---|
108 | FORMATNUM_COUNT, |
---|
109 | }; |
---|
110 | |
---|
111 | struct Capabilities { |
---|
112 | bool supported; |
---|
113 | bool neverReturns; |
---|
114 | |
---|
115 | Capabilities() : |
---|
116 | supported(true), |
---|
117 | neverReturns(false) |
---|
118 | {} |
---|
119 | |
---|
120 | bool shall_be_tested() { |
---|
121 | #if defined(TEST_THROW) |
---|
122 | return !neverReturns; |
---|
123 | #else // !defined(TEST_THROW) |
---|
124 | return supported && !neverReturns; |
---|
125 | #endif |
---|
126 | } |
---|
127 | }; |
---|
128 | |
---|
129 | static Capabilities cap[fcount][fcount]; |
---|
130 | #define CAP(from,to) (cap[NUM_##from][NUM_##to]) |
---|
131 | |
---|
132 | #define TYPE(f) format_spec[f].type |
---|
133 | #define NAME(f) format_spec[f].name |
---|
134 | #define INPUT(f) format_spec[f].testfile |
---|
135 | #define EXSEQ(f) format_spec[f].sequence_count |
---|
136 | |
---|
137 | // ---------------------------------- |
---|
138 | // update .expected files ? |
---|
139 | |
---|
140 | // #define TEST_AUTO_UPDATE // never does update if undefined |
---|
141 | // #define UPDATE_ONLY_IF_MISSING |
---|
142 | #define UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS |
---|
143 | |
---|
144 | inline bool more_than_date_differs(const char *file, const char *expected) { |
---|
145 | return !ARB_textfiles_have_difflines(file, expected, 0, 1); |
---|
146 | } |
---|
147 | |
---|
148 | #if defined(TEST_AUTO_UPDATE) |
---|
149 | inline bool want_auto_update(const char *file, const char *expected) { |
---|
150 | bool shall_update = true; |
---|
151 | |
---|
152 | file = file; |
---|
153 | expected = expected; |
---|
154 | |
---|
155 | #if defined(UPDATE_ONLY_IF_MISSING) |
---|
156 | shall_update = shall_update && !GB_is_regularfile(expected); |
---|
157 | #endif |
---|
158 | #if defined(UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS) |
---|
159 | shall_update = shall_update && more_than_date_differs(file, expected); |
---|
160 | #endif |
---|
161 | return shall_update; |
---|
162 | } |
---|
163 | #else // !TEST_AUTO_UPDATE |
---|
164 | inline bool want_auto_update(const char * /* file */, const char * /* expected */) { |
---|
165 | return false; |
---|
166 | } |
---|
167 | #endif |
---|
168 | |
---|
169 | static void test_expected_conversion(const char *file, const char *flavor) { |
---|
170 | char *expected; |
---|
171 | if (flavor) expected = GBS_global_string_copy("%s.%s.expected", file, flavor); |
---|
172 | else expected = GBS_global_string_copy("%s.expected", file); |
---|
173 | |
---|
174 | bool shall_update = want_auto_update(file, expected); |
---|
175 | if (shall_update) { |
---|
176 | // TEST_EXPECT(0); // completely avoid real update |
---|
177 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(system(GBS_global_string("cp %s %s", file, expected))); |
---|
178 | } |
---|
179 | else { |
---|
180 | TEST_REJECT(more_than_date_differs(file, expected)); |
---|
181 | } |
---|
182 | free(expected); |
---|
183 | } |
---|
184 | |
---|
185 | static const char *test_convert(const char *inf, const char *outf, Format inType, Format ouType) { |
---|
186 | const char *error = NULL; |
---|
187 | try { |
---|
188 | convert(FormattedFile(inf ? inf : "infilename", inType), |
---|
189 | FormattedFile(outf ? outf : "outfilename", ouType)); |
---|
190 | } |
---|
191 | catch (Convaln_exception& exc) { error = GBS_global_string("%s (#%i)", exc.get_msg(), exc.get_code()); } |
---|
192 | return error; |
---|
193 | } |
---|
194 | |
---|
195 | static void test_convert_by_format_num(int from, int to) { |
---|
196 | char *toFile = GBS_global_string_copy("impexp/conv.%s_2_%s", NAME(from), NAME(to)); |
---|
197 | if (GB_is_regularfile(toFile)) TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(toFile)); |
---|
198 | |
---|
199 | int old_processed_counter = log_processed_counter; |
---|
200 | int old_seq_counter = log_seq_counter; |
---|
201 | |
---|
202 | const char *error = test_convert(INPUT(from), toFile, TYPE(from), TYPE(to)); |
---|
203 | |
---|
204 | int converted_seqs = log_seq_counter-old_seq_counter; |
---|
205 | int expected_seqs = EXSEQ(from); |
---|
206 | if (to == NUM_GCG) expected_seqs = 1; // we stop after first file (useless to generate numerous files) |
---|
207 | |
---|
208 | Capabilities& me = cap[from][to]; |
---|
209 | |
---|
210 | if (me.supported) { |
---|
211 | if (error) TEST_ERROR("convert() reports error: '%s' (for supported conversion)", error); |
---|
212 | TEST_EXPECT(GB_is_regularfile(toFile)); |
---|
213 | TEST_EXPECT_EQUAL(converted_seqs, expected_seqs); |
---|
214 | TEST_EXPECT_EQUAL(log_processed_counter, old_processed_counter+1); |
---|
215 | |
---|
216 | TEST_EXPECT_LESS_EQUAL(10, GB_size_of_file(toFile)); // less than 10 bytes |
---|
217 | test_expected_conversion(toFile, NULL); |
---|
218 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(toFile)); |
---|
219 | } |
---|
220 | else { |
---|
221 | if (!error) TEST_ERROR("No error for unsupported conversion '%s'", GBS_global_string("%s -> %s", NAME(from), NAME(to))); |
---|
222 | TEST_REJECT_NULL(strstr(error, "supported")); // wrong error |
---|
223 | TEST_REJECT(GB_is_regularfile(toFile)); // unsupported produced output |
---|
224 | } |
---|
225 | TEST_EXPECT_EQUAL(me.supported, !error); |
---|
226 | |
---|
227 | #if defined(TEST_THROW) |
---|
228 | { |
---|
229 | // test if conversion from empty and text file fails |
---|
230 | |
---|
231 | const char *fromFile = "general/empty.input"; |
---|
232 | |
---|
233 | error = test_convert(fromFile, toFile, TYPE(from), TYPE(to)); |
---|
234 | TEST_REJECT_NULL(error); |
---|
235 | |
---|
236 | fromFile = "general/text.input"; |
---|
237 | error = test_convert(fromFile, toFile, TYPE(from), TYPE(to)); |
---|
238 | TEST_REJECT_NULL(error); |
---|
239 | } |
---|
240 | #endif |
---|
241 | |
---|
242 | free(toFile); |
---|
243 | } |
---|
244 | |
---|
245 | inline bool isInputFormat(int num) { return is_input_format(TYPE(num)); } |
---|
246 | |
---|
247 | static void init_cap() { |
---|
248 | for (int from = 0; from<fcount; from++) { |
---|
249 | for (int to = 0; to<fcount; to++) { |
---|
250 | Capabilities& me = cap[from][to]; |
---|
251 | if (!isInputFormat(from)) me.supported = false; |
---|
252 | } |
---|
253 | } |
---|
254 | } |
---|
255 | |
---|
256 | #define NOT_SUPPORTED(t1,t2) TEST_EXPECT(isInputFormat(NUM_##t1)); cap[NUM_##t1][NUM_##t2].supported = false |
---|
257 | |
---|
258 | static int will_convert(int from) { |
---|
259 | int will = 0; |
---|
260 | for (int to = 0; to<fcount; to++) { |
---|
261 | Capabilities& me = cap[from][to]; |
---|
262 | if (me.supported && me.shall_be_tested()) { |
---|
263 | will++; |
---|
264 | } |
---|
265 | } |
---|
266 | return will; |
---|
267 | } |
---|
268 | |
---|
269 | void TEST_SLOW_converter() { |
---|
270 | STATIC_ASSERT(FORMATNUM_COUNT == fcount); |
---|
271 | |
---|
272 | init_cap(); |
---|
273 | |
---|
274 | NOT_SUPPORTED(GENBANK, SWISSPROT); |
---|
275 | NOT_SUPPORTED(EMBL, SWISSPROT); |
---|
276 | NOT_SUPPORTED(SWISSPROT, GENBANK); |
---|
277 | NOT_SUPPORTED(SWISSPROT, EMBL); |
---|
278 | |
---|
279 | int possible = 0; |
---|
280 | int tested = 0; |
---|
281 | int unsupported = 0; |
---|
282 | int neverReturns = 0; |
---|
283 | |
---|
284 | for (int from = 0; from<fcount; from++) { |
---|
285 | TEST_ANNOTATE(GBS_global_string("while converting from '%s'", NAME(from))); |
---|
286 | if (isInputFormat(from)) { |
---|
287 | if (will_convert(from)<1) { |
---|
288 | TEST_ERROR("Conversion from %s seems unsupported", NAME(from)); |
---|
289 | } |
---|
290 | } |
---|
291 | for (int to = 0; to<fcount; to++) { |
---|
292 | possible++; |
---|
293 | Capabilities& me = cap[from][to]; |
---|
294 | |
---|
295 | if (me.shall_be_tested()) { |
---|
296 | TEST_ANNOTATE(GBS_global_string("while converting %s -> %s", NAME(from), NAME(to))); |
---|
297 | test_convert_by_format_num(from, to); |
---|
298 | tested++; |
---|
299 | } |
---|
300 | |
---|
301 | unsupported += !me.supported; |
---|
302 | neverReturns += me.neverReturns; |
---|
303 | } |
---|
304 | } |
---|
305 | TEST_ANNOTATE(NULL); |
---|
306 | |
---|
307 | fprintf(stderr, |
---|
308 | "Conversion test summary:\n" |
---|
309 | " - formats: %3i\n" |
---|
310 | " - conversions: %3i (possible)\n" |
---|
311 | " - unsupported: %3i\n" |
---|
312 | " - tested: %3i\n" |
---|
313 | " - neverReturns: %3i (would never return - not checked)\n" |
---|
314 | " - converted: %3i\n", |
---|
315 | fcount, |
---|
316 | possible, |
---|
317 | unsupported, |
---|
318 | tested, |
---|
319 | neverReturns, |
---|
320 | tested-unsupported); |
---|
321 | |
---|
322 | int untested = possible - tested; |
---|
323 | TEST_EXPECT_EQUAL(untested, neverReturns); |
---|
324 | } |
---|
325 | |
---|
326 | #endif // UNIT_TESTS |
---|