| 1 | // ------------- File format converting subroutine ------------- |
|---|
| 2 | |
|---|
| 3 | #include "defs.h" |
|---|
| 4 | #include "fun.h" |
|---|
| 5 | #include "global.h" |
|---|
| 6 | #include <static_assert.h> |
|---|
| 7 | |
|---|
| 8 | static const char *format2name(Format type) { |
|---|
| 9 | switch (type) { |
|---|
| 10 | case EMBL: return "EMBL"; |
|---|
| 11 | case GCG: return "GCG"; |
|---|
| 12 | case GENBANK: return "GENBANK"; |
|---|
| 13 | case MACKE: return "MACKE"; |
|---|
| 14 | case NEXUS: return "NEXUS"; |
|---|
| 15 | case PHYLIP: return "PHYLIP"; |
|---|
| 16 | case FASTDNAML: return "FASTDNAML"; |
|---|
| 17 | case PRINTABLE: return "PRINTABLE"; |
|---|
| 18 | case SWISSPROT: return "SWISSPROT"; |
|---|
| 19 | |
|---|
| 20 | case UNKNOWN: ca_assert(0); |
|---|
| 21 | } |
|---|
| 22 | return NULL; |
|---|
| 23 | } |
|---|
| 24 | |
|---|
| 25 | void throw_conversion_not_supported(Format inType, Format ouType) { // __ATTR__NORETURN |
|---|
| 26 | throw_errorf(90, "Conversion from %s to %s is not supported", |
|---|
| 27 | format2name(inType), format2name(ouType)); |
|---|
| 28 | } |
|---|
| 29 | void throw_conversion_failure(Format inType, Format ouType) { // __ATTR__NORETURN |
|---|
| 30 | throw_errorf(91, "Conversion from %s to %s fails", |
|---|
| 31 | format2name(inType), format2name(ouType)); |
|---|
| 32 | } |
|---|
| 33 | void throw_conversion_not_implemented(Format inType, Format ouType) { // __ATTR__NORETURN |
|---|
| 34 | throw_errorf(92, "Conversion from %s to %s is not implemented (but is expected to be here)", |
|---|
| 35 | format2name(inType), format2name(ouType)); |
|---|
| 36 | } |
|---|
| 37 | void throw_unsupported_input_format(Format inType) { // __ATTR__NORETURN |
|---|
| 38 | throw_errorf(93, "Unsupported input format %s", format2name(inType)); |
|---|
| 39 | } |
|---|
| 40 | |
|---|
| 41 | void throw_incomplete_entry() { // __ATTR__NORETURN |
|---|
| 42 | throw_error(84, "Reached EOF before complete entry has been read"); |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | static int log_processed_counter = 0; |
|---|
| 46 | static int log_seq_counter = 0; |
|---|
| 47 | |
|---|
| 48 | void log_processed(int seqCount) { |
|---|
| 49 | #if defined(CALOG) |
|---|
| 50 | fprintf(stderr, "Total %d sequences have been processed\n", seqCount); |
|---|
| 51 | #endif // CALOG |
|---|
| 52 | |
|---|
| 53 | log_processed_counter++; |
|---|
| 54 | log_seq_counter += seqCount; |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | // -------------------------------------------------------------------------------- |
|---|
| 58 | |
|---|
| 59 | #ifdef UNIT_TESTS |
|---|
| 60 | #include <arbdbt.h> // before test_unit.h! |
|---|
| 61 | #include <arb_file.h> |
|---|
| 62 | #include <test_unit.h> |
|---|
| 63 | |
|---|
| 64 | |
|---|
| 65 | #define TEST_THROW // comment out to temp. disable intentional throws |
|---|
| 66 | |
|---|
| 67 | struct FormatSpec { |
|---|
| 68 | Format type; // GENBANK, MACKE, ... |
|---|
| 69 | const char *name; |
|---|
| 70 | const char *testfile; // existing testfile (or NULL) |
|---|
| 71 | int sequence_count; // number of sequences in 'testfile' |
|---|
| 72 | }; |
|---|
| 73 | |
|---|
| 74 | #define FORMATSPEC_OUT_ONLY(tag) { tag, #tag, NULL, 1 } |
|---|
| 75 | #define FORMATSPEC_GOT______(tag,file) { tag, #tag, "impexp/" file ".eft.exported", 1 } |
|---|
| 76 | #define FORMATSPEC_GOT_PLAIN(tag,file,seqcount) { tag, #tag, "impexp/" file, seqcount } |
|---|
| 77 | |
|---|
| 78 | static FormatSpec format_spec[] = { |
|---|
| 79 | // input formats |
|---|
| 80 | // FORMATSPEC_GOT______(GENBANK, "genbank"), |
|---|
| 81 | FORMATSPEC_GOT_PLAIN(GENBANK, "genbank.input", 3), |
|---|
| 82 | FORMATSPEC_GOT_PLAIN(EMBL, "embl.input", 5), |
|---|
| 83 | FORMATSPEC_GOT_PLAIN(MACKE, "macke.input", 5), |
|---|
| 84 | FORMATSPEC_GOT_PLAIN(SWISSPROT, "swissprot.input", 1), // SWISSPROT |
|---|
| 85 | |
|---|
| 86 | // output formats |
|---|
| 87 | FORMATSPEC_OUT_ONLY(GCG), |
|---|
| 88 | FORMATSPEC_OUT_ONLY(NEXUS), |
|---|
| 89 | FORMATSPEC_OUT_ONLY(PHYLIP), |
|---|
| 90 | FORMATSPEC_OUT_ONLY(PRINTABLE), |
|---|
| 91 | }; |
|---|
| 92 | static const int fcount = ARRAY_ELEMS(format_spec); |
|---|
| 93 | |
|---|
| 94 | enum FormatNum { // same order as above |
|---|
| 95 | NUM_GENBANK, |
|---|
| 96 | NUM_EMBL, |
|---|
| 97 | NUM_MACKE, |
|---|
| 98 | NUM_SWISSPROT, |
|---|
| 99 | |
|---|
| 100 | NUM_GCG, |
|---|
| 101 | NUM_NEXUS, |
|---|
| 102 | NUM_PHYLIP, |
|---|
| 103 | |
|---|
| 104 | NUM_PRINTABLE, |
|---|
| 105 | |
|---|
| 106 | FORMATNUM_COUNT, |
|---|
| 107 | }; |
|---|
| 108 | |
|---|
| 109 | struct Capabilities { |
|---|
| 110 | bool supported; |
|---|
| 111 | bool neverReturns; |
|---|
| 112 | |
|---|
| 113 | Capabilities() : |
|---|
| 114 | supported(true), |
|---|
| 115 | neverReturns(false) |
|---|
| 116 | {} |
|---|
| 117 | |
|---|
| 118 | bool shall_be_tested() { |
|---|
| 119 | #if defined(TEST_THROW) |
|---|
| 120 | return !neverReturns; |
|---|
| 121 | #else // !defined(TEST_THROW) |
|---|
| 122 | return supported && !neverReturns; |
|---|
| 123 | #endif |
|---|
| 124 | } |
|---|
| 125 | }; |
|---|
| 126 | |
|---|
| 127 | static Capabilities cap[fcount][fcount]; |
|---|
| 128 | #define CAP(from,to) (cap[NUM_##from][NUM_##to]) |
|---|
| 129 | |
|---|
| 130 | #define TYPE(f) format_spec[f].type |
|---|
| 131 | #define NAME(f) format_spec[f].name |
|---|
| 132 | #define INPUT(f) format_spec[f].testfile |
|---|
| 133 | #define EXSEQ(f) format_spec[f].sequence_count |
|---|
| 134 | |
|---|
| 135 | // ---------------------------------- |
|---|
| 136 | // update .expected files ? |
|---|
| 137 | |
|---|
| 138 | // #define TEST_AUTO_UPDATE // never does update if undefined |
|---|
| 139 | // #define UPDATE_ONLY_IF_MISSING |
|---|
| 140 | #define UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS |
|---|
| 141 | |
|---|
| 142 | inline bool more_than_date_differs(const char *file, const char *expected) { |
|---|
| 143 | return !GB_test_textfile_difflines(file, expected, 0, 1); |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | #if defined(TEST_AUTO_UPDATE) |
|---|
| 147 | inline bool want_auto_update(const char *file, const char *expected) { |
|---|
| 148 | bool shall_update = true; |
|---|
| 149 | |
|---|
| 150 | file = file; |
|---|
| 151 | expected = expected; |
|---|
| 152 | |
|---|
| 153 | #if defined(UPDATE_ONLY_IF_MISSING) |
|---|
| 154 | shall_update = shall_update && !GB_is_regularfile(expected); |
|---|
| 155 | #endif |
|---|
| 156 | #if defined(UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS) |
|---|
| 157 | shall_update = shall_update && more_than_date_differs(file, expected); |
|---|
| 158 | #endif |
|---|
| 159 | return shall_update; |
|---|
| 160 | } |
|---|
| 161 | #else // !TEST_AUTO_UPDATE |
|---|
| 162 | inline bool want_auto_update(const char * /* file */, const char * /* expected */) { |
|---|
| 163 | return false; |
|---|
| 164 | } |
|---|
| 165 | #endif |
|---|
| 166 | |
|---|
| 167 | static void test_expected_conversion(const char *file, const char *flavor) { |
|---|
| 168 | char *expected; |
|---|
| 169 | if (flavor) expected = GBS_global_string_copy("%s.%s.expected", file, flavor); |
|---|
| 170 | else expected = GBS_global_string_copy("%s.expected", file); |
|---|
| 171 | |
|---|
| 172 | bool shall_update = want_auto_update(file, expected); |
|---|
| 173 | if (shall_update) { |
|---|
| 174 | // TEST_ASSERT(0); // completely avoid real update |
|---|
| 175 | TEST_ASSERT_ZERO_OR_SHOW_ERRNO(system(GBS_global_string("cp %s %s", file, expected))); |
|---|
| 176 | } |
|---|
| 177 | else { |
|---|
| 178 | TEST_ASSERT(!more_than_date_differs(file, expected)); |
|---|
| 179 | } |
|---|
| 180 | free(expected); |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | static const char *test_convert(const char *inf, const char *outf, Format inType, Format ouType) { |
|---|
| 184 | const char *error = NULL; |
|---|
| 185 | try { |
|---|
| 186 | convert(FormattedFile(inf ? inf : "infilename", inType), |
|---|
| 187 | FormattedFile(outf ? outf : "outfilename", ouType)); |
|---|
| 188 | } |
|---|
| 189 | catch (Convaln_exception& exc) { error = GBS_global_string("%s (#%i)", exc.get_msg(), exc.get_code()); } |
|---|
| 190 | return error; |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | static void test_convert_by_format_num(int from, int to) { |
|---|
| 194 | char *toFile = GBS_global_string_copy("impexp/conv.%s_2_%s", NAME(from), NAME(to)); |
|---|
| 195 | if (GB_is_regularfile(toFile)) TEST_ASSERT_ZERO_OR_SHOW_ERRNO(unlink(toFile)); |
|---|
| 196 | |
|---|
| 197 | int old_processed_counter = log_processed_counter; |
|---|
| 198 | int old_seq_counter = log_seq_counter; |
|---|
| 199 | |
|---|
| 200 | const char *error = test_convert(INPUT(from), toFile, TYPE(from), TYPE(to)); |
|---|
| 201 | |
|---|
| 202 | int converted_seqs = log_seq_counter-old_seq_counter; |
|---|
| 203 | int expected_seqs = EXSEQ(from); |
|---|
| 204 | if (to == NUM_GCG) expected_seqs = 1; // we stop after first file (useless to generate numerous files) |
|---|
| 205 | |
|---|
| 206 | Capabilities& me = cap[from][to]; |
|---|
| 207 | |
|---|
| 208 | if (me.supported) { |
|---|
| 209 | if (error) TEST_ERROR("convert() reports error: '%s' (for supported conversion)", error); |
|---|
| 210 | TEST_ASSERT(GB_is_regularfile(toFile)); |
|---|
| 211 | TEST_ASSERT_EQUAL(converted_seqs, expected_seqs); |
|---|
| 212 | TEST_ASSERT_EQUAL(log_processed_counter, old_processed_counter+1); |
|---|
| 213 | |
|---|
| 214 | TEST_ASSERT_LOWER_EQUAL(10, GB_size_of_file(toFile)); // less than 10 bytes |
|---|
| 215 | test_expected_conversion(toFile, NULL); |
|---|
| 216 | TEST_ASSERT_ZERO_OR_SHOW_ERRNO(unlink(toFile)); |
|---|
| 217 | } |
|---|
| 218 | else { |
|---|
| 219 | if (!error) TEST_ERROR("No error for unsupported conversion '%s'", GBS_global_string("%s -> %s", NAME(from), NAME(to))); |
|---|
| 220 | TEST_ASSERT(strstr(error, "supported")); // wring error |
|---|
| 221 | TEST_ASSERT(!GB_is_regularfile(toFile)); // unsupported produced output |
|---|
| 222 | } |
|---|
| 223 | TEST_ASSERT(me.supported == !error); |
|---|
| 224 | |
|---|
| 225 | #if defined(TEST_THROW) |
|---|
| 226 | { |
|---|
| 227 | // test if conversion from empty and text file fails |
|---|
| 228 | |
|---|
| 229 | const char *fromFile = "general/empty.input"; |
|---|
| 230 | |
|---|
| 231 | error = test_convert(fromFile, toFile, TYPE(from), TYPE(to)); |
|---|
| 232 | TEST_ASSERT(error); |
|---|
| 233 | |
|---|
| 234 | fromFile = "general/text.input"; |
|---|
| 235 | error = test_convert(fromFile, toFile, TYPE(from), TYPE(to)); |
|---|
| 236 | TEST_ASSERT(error); |
|---|
| 237 | } |
|---|
| 238 | #endif |
|---|
| 239 | |
|---|
| 240 | free(toFile); |
|---|
| 241 | } |
|---|
| 242 | |
|---|
| 243 | inline bool isInputFormat(int num) { return is_input_format(TYPE(num)); } |
|---|
| 244 | |
|---|
| 245 | static void init_cap() { |
|---|
| 246 | for (int from = 0; from<fcount; from++) { |
|---|
| 247 | for (int to = 0; to<fcount; to++) { |
|---|
| 248 | Capabilities& me = cap[from][to]; |
|---|
| 249 | if (!isInputFormat(from)) me.supported = false; |
|---|
| 250 | } |
|---|
| 251 | } |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | #define NOT_SUPPORTED(t1,t2) TEST_ASSERT(isInputFormat(NUM_##t1)); cap[NUM_##t1][NUM_##t2].supported = false |
|---|
| 255 | |
|---|
| 256 | static int will_convert(int from) { |
|---|
| 257 | int will = 0; |
|---|
| 258 | for (int to = 0; to<fcount; to++) { |
|---|
| 259 | Capabilities& me = cap[from][to]; |
|---|
| 260 | if (me.supported && me.shall_be_tested()) { |
|---|
| 261 | will++; |
|---|
| 262 | } |
|---|
| 263 | } |
|---|
| 264 | return will; |
|---|
| 265 | } |
|---|
| 266 | |
|---|
| 267 | void TEST_converter() { |
|---|
| 268 | COMPILE_ASSERT(FORMATNUM_COUNT == fcount); |
|---|
| 269 | |
|---|
| 270 | init_cap(); |
|---|
| 271 | |
|---|
| 272 | NOT_SUPPORTED(GENBANK, SWISSPROT); |
|---|
| 273 | NOT_SUPPORTED(EMBL, SWISSPROT); |
|---|
| 274 | NOT_SUPPORTED(SWISSPROT, GENBANK); |
|---|
| 275 | NOT_SUPPORTED(SWISSPROT, EMBL); |
|---|
| 276 | |
|---|
| 277 | int possible = 0; |
|---|
| 278 | int tested = 0; |
|---|
| 279 | int unsupported = 0; |
|---|
| 280 | int neverReturns = 0; |
|---|
| 281 | |
|---|
| 282 | for (int from = 0; from<fcount; from++) { |
|---|
| 283 | TEST_ANNOTATE_ASSERT(GBS_global_string("while converting from '%s'", NAME(from))); |
|---|
| 284 | if (isInputFormat(from)) { |
|---|
| 285 | if (will_convert(from)<1) { |
|---|
| 286 | TEST_ERROR("Conversion from %s seems unsupported", NAME(from)); |
|---|
| 287 | } |
|---|
| 288 | } |
|---|
| 289 | for (int to = 0; to<fcount; to++) { |
|---|
| 290 | possible++; |
|---|
| 291 | Capabilities& me = cap[from][to]; |
|---|
| 292 | |
|---|
| 293 | if (me.shall_be_tested()) { |
|---|
| 294 | TEST_ANNOTATE_ASSERT(GBS_global_string("while converting %s -> %s", NAME(from), NAME(to))); |
|---|
| 295 | test_convert_by_format_num(from, to); |
|---|
| 296 | tested++; |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | unsupported += !me.supported; |
|---|
| 300 | neverReturns += me.neverReturns; |
|---|
| 301 | } |
|---|
| 302 | } |
|---|
| 303 | |
|---|
| 304 | fprintf(stderr, |
|---|
| 305 | "Conversion test summary:\n" |
|---|
| 306 | " - formats: %3i\n" |
|---|
| 307 | " - conversions: %3i (possible)\n" |
|---|
| 308 | " - unsupported: %3i\n" |
|---|
| 309 | " - tested: %3i\n" |
|---|
| 310 | " - neverReturns: %3i (would never return - not checked)\n" |
|---|
| 311 | " - converted: %3i\n", |
|---|
| 312 | fcount, |
|---|
| 313 | possible, |
|---|
| 314 | unsupported, |
|---|
| 315 | tested, |
|---|
| 316 | neverReturns, |
|---|
| 317 | tested-unsupported); |
|---|
| 318 | |
|---|
| 319 | int untested = possible - tested; |
|---|
| 320 | TEST_ASSERT_EQUAL(untested, neverReturns); |
|---|
| 321 | } |
|---|
| 322 | |
|---|
| 323 | #endif // UNIT_TESTS |
|---|