source: branches/stable/CONVERTALN/fconv.cxx

Last change on this file was 16861, checked in by westram, 6 years ago
  • reintegrates 'unittest' into 'trunk'
    • fixed non-deterministic behavior of add-species:
      • 2 sequences in test-DB were identical (changed 1 bp)
      • AP_tree_edge cannot be used to store insert-positions (uses pair of AP_tree_nlen instead)
      • corrected some undefined behavior (results did depend on compiler version+flags):
        • general order of inserts was undefined
        • order of initial-insert was undefined (used by complete tree reconstruction)
    • bugs fixed:
      • not all possible insert-positions were tested
      • sometimes species were added at wrong positions
      • if multiple species were inserted at the same position, the following optimization
        • did modify topology
        • now optimizes all multi-inserts globally
        • at leaf-positions: includes the leaf (Note: this does not modify the original topology)
      • insert order now is "longest sequence first" (was "shortest sequence first")
    • fixed unwanted behavior when testing for content of generated files
  • adds: log:branches/unittest@16807:16860
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 10.1 KB
Line 
1// ------------- File format converting subroutine -------------
2
3#include "defs.h"
4#include "fun.h"
5#include "global.h"
6#include <static_assert.h>
7#include <unistd.h>
8#include <arb_diff.h>
9
10static const char *format2name(Format type) {
11    switch (type) {
12        case EMBL:      return "EMBL";
13        case GCG:       return "GCG";
14        case GENBANK:   return "GENBANK";
15        case MACKE:     return "MACKE";
16        case NEXUS:     return "NEXUS";
17        case PHYLIP:    return "PHYLIP";
18        case FASTDNAML: return "FASTDNAML";
19        case PRINTABLE: return "PRINTABLE";
20        case SWISSPROT: return "SWISSPROT";
21
22        case UNKNOWN: ca_assert(0);
23    }
24    return NULp;
25}
26
27void throw_conversion_not_supported(Format inType, Format ouType) { // __ATTR__NORETURN
28    throw_errorf(90, "Conversion from %s to %s is not supported",
29                 format2name(inType), format2name(ouType));
30}
31void throw_conversion_failure(Format inType, Format ouType) { // __ATTR__NORETURN
32    throw_errorf(91, "Conversion from %s to %s fails",
33                 format2name(inType), format2name(ouType));
34}
35void throw_conversion_not_implemented(Format inType, Format ouType) { // __ATTR__NORETURN
36    throw_errorf(92, "Conversion from %s to %s is not implemented (but is expected to be here)",
37                 format2name(inType), format2name(ouType));
38}
39void throw_unsupported_input_format(Format inType) {  // __ATTR__NORETURN
40    throw_errorf(93, "Unsupported input format %s", format2name(inType));
41}
42
43void throw_incomplete_entry() { // __ATTR__NORETURN
44    throw_error(84, "Reached EOF before complete entry has been read");
45}
46
47static int log_processed_counter = 0;
48static int log_seq_counter       = 0;
49
50void log_processed(int seqCount) {
51#if defined(CALOG)
52    fprintf(stderr, "Total %d sequences have been processed\n", seqCount);
53#endif // CALOG
54
55    log_processed_counter++;
56    log_seq_counter += seqCount;
57}
58
59// --------------------------------------------------------------------------------
60
61#ifdef UNIT_TESTS
62#include <arbdbt.h> // before test_unit.h!
63#include <arb_file.h>
64#include <test_unit.h>
65
66
67#define TEST_THROW // comment out to temp. disable intentional throws
68
69struct FormatSpec {
70    Format      type;           // GENBANK, MACKE, ...
71    const char *name;
72    const char *testfile;       // existing testfile (or NULp)
73    int         sequence_count; // number of sequences in 'testfile'
74};
75
76#define FORMATSPEC_OUT_ONLY(tag)                { tag, #tag, NULp, 1 }
77#define FORMATSPEC_GOT______(tag,file)          { tag, #tag, "impexp/" file ".eft.exported", 1 }
78#define FORMATSPEC_GOT_PLAIN(tag,file,seqcount) { tag, #tag, "impexp/" file, seqcount }
79
80static FormatSpec format_spec[] = {
81    // input formats
82    // FORMATSPEC_GOT______(GENBANK, "genbank"),
83    FORMATSPEC_GOT_PLAIN(GENBANK, "genbank.input", 3),
84    FORMATSPEC_GOT_PLAIN(EMBL, "embl.input", 5),
85    FORMATSPEC_GOT_PLAIN(MACKE, "macke.input", 5),
86    FORMATSPEC_GOT_PLAIN(SWISSPROT, "swissprot.input", 1), // SWISSPROT
87
88    // output formats
89    FORMATSPEC_OUT_ONLY(GCG),
90    FORMATSPEC_OUT_ONLY(NEXUS),
91    FORMATSPEC_OUT_ONLY(PHYLIP),
92    FORMATSPEC_OUT_ONLY(PRINTABLE),
93};
94static const int fcount = ARRAY_ELEMS(format_spec);
95
96enum FormatNum { // same order as above
97    NUM_GENBANK,
98    NUM_EMBL,
99    NUM_MACKE,
100    NUM_SWISSPROT,
101
102    NUM_GCG,
103    NUM_NEXUS,
104    NUM_PHYLIP,
105
106    NUM_PRINTABLE,
107
108    FORMATNUM_COUNT,
109};
110
111struct Capabilities {
112    bool supported;
113    bool neverReturns;
114
115    Capabilities() :
116        supported(true),
117        neverReturns(false)
118    {}
119
120    bool shall_be_tested() {
121#if defined(TEST_THROW)
122        return !neverReturns;
123#else // !defined(TEST_THROW)
124        return supported && !neverReturns;
125#endif
126    }
127};
128
129static Capabilities cap[fcount][fcount];
130#define CAP(from,to) (cap[NUM_##from][NUM_##to])
131
132#define TYPE(f)  format_spec[f].type
133#define NAME(f)  format_spec[f].name
134#define INPUT(f) format_spec[f].testfile
135#define EXSEQ(f) format_spec[f].sequence_count
136
137// ----------------------------------
138//      update .expected files ?
139
140// #define TEST_AUTO_UPDATE // never does update if undefined
141// #define UPDATE_ONLY_IF_MISSING
142#define UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS
143
144inline bool more_than_date_differs(const char *file, const char *expected) {
145    return ARB_textfiles_have_difflines(file, expected, 0, TextDiffMode(TDM_NOT_DIFF_LINECOUNT|TDM_IGNORE_TIMESTAMPS));
146}
147
148#if defined(TEST_AUTO_UPDATE)
149inline bool want_auto_update(const char *file, const char *expected) {
150    bool shall_update = true;
151
152    file     = file;
153    expected = expected;
154
155#if defined(UPDATE_ONLY_IF_MISSING)
156    shall_update = shall_update && !GB_is_regularfile(expected);
157#endif
158#if defined(UPDATE_ONLY_IF_MORE_THAN_DATE_DIFFERS)
159    shall_update = shall_update && more_than_date_differs(file, expected);
160#endif
161    return shall_update;
162}
163#else // !TEST_AUTO_UPDATE
164inline bool want_auto_update(const char * /* file */, const char * /* expected */) {
165    return false;
166}
167#endif
168
169static void test_expected_conversion(const char *file, const char *flavor) {
170    char *expected;
171    if (flavor) expected = GBS_global_string_copy("%s.%s.expected", file, flavor);
172    else expected = GBS_global_string_copy("%s.expected", file);
173
174    bool shall_update = want_auto_update(file, expected);
175    if (shall_update) {
176        // TEST_EXPECT(0); // completely avoid real update
177        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(system(GBS_global_string("cp %s %s", file, expected)));
178    }
179    else {
180        TEST_REJECT(more_than_date_differs(file, expected));
181    }
182    free(expected);
183}
184
185static const char *test_convert(const char *inf, const char *outf, Format inType, Format ouType) {
186    const char *error = NULp;
187    try {
188        convert(FormattedFile(inf ? inf : "infilename", inType),
189                FormattedFile(outf ? outf : "outfilename", ouType));
190    }
191    catch (Convaln_exception& exc) { error = GBS_global_string("%s (#%i)", exc.get_msg(), exc.get_code()); }
192    return error;
193}
194
195static void test_convert_by_format_num(int from, int to) {
196    char *toFile = GBS_global_string_copy("impexp/conv.%s_2_%s", NAME(from), NAME(to));
197    if (GB_is_regularfile(toFile)) TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(toFile));
198
199    int old_processed_counter = log_processed_counter;
200    int old_seq_counter       = log_seq_counter;
201
202    const char *error = test_convert(INPUT(from), toFile, TYPE(from), TYPE(to));
203
204    int converted_seqs = log_seq_counter-old_seq_counter;
205    int expected_seqs  = EXSEQ(from);
206    if (to == NUM_GCG) expected_seqs = 1; // we stop after first file (useless to generate numerous files)
207
208    Capabilities& me = cap[from][to];
209
210    if (me.supported) {
211        if (error) TEST_ERROR("convert() reports error: '%s' (for supported conversion)", error);
212        TEST_EXPECT(GB_is_regularfile(toFile));
213        TEST_EXPECT_EQUAL(converted_seqs, expected_seqs);
214        TEST_EXPECT_EQUAL(log_processed_counter, old_processed_counter+1);
215
216        TEST_EXPECT_LESS_EQUAL(10, GB_size_of_file(toFile)); // less than 10 bytes
217        test_expected_conversion(toFile, NULp);
218        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(unlink(toFile));
219    }
220    else {
221        if (!error) TEST_ERROR("No error for unsupported conversion '%s'", GBS_global_string("%s -> %s", NAME(from), NAME(to)));
222        TEST_REJECT_NULL(strstr(error, "supported")); // wrong error
223        TEST_REJECT(GB_is_regularfile(toFile)); // unsupported produced output
224    }
225    TEST_EXPECT_EQUAL(me.supported, !error);
226
227#if defined(TEST_THROW)
228    {
229        // test if conversion from empty and text file fails
230
231        const char *fromFile = "general/empty.input";
232
233        error = test_convert(fromFile, toFile, TYPE(from), TYPE(to));
234        TEST_REJECT_NULL(error);
235
236        fromFile = "general/text.input";
237        error = test_convert(fromFile, toFile, TYPE(from), TYPE(to));
238        TEST_REJECT_NULL(error);
239    }
240#endif
241
242    free(toFile);
243}
244
245inline bool isInputFormat(int num) { return is_input_format(TYPE(num)); }
246
247static void init_cap() {
248    for (int from = 0; from<fcount; from++) {
249        for (int to = 0; to<fcount; to++) {
250            Capabilities& me = cap[from][to];
251            if (!isInputFormat(from)) me.supported = false;
252        }
253    }
254}
255
256#define NOT_SUPPORTED(t1,t2) TEST_EXPECT(isInputFormat(NUM_##t1)); cap[NUM_##t1][NUM_##t2].supported = false
257
258static int will_convert(int from) {
259    int will = 0;
260    for (int to = 0; to<fcount; to++) {
261        Capabilities& me = cap[from][to];
262        if (me.supported && me.shall_be_tested()) {
263            will++;
264        }
265    }
266    return will;
267}
268
269void TEST_SLOW_converter() {
270    STATIC_ASSERT(FORMATNUM_COUNT == fcount);
271
272    init_cap();
273
274    NOT_SUPPORTED(GENBANK, SWISSPROT);
275    NOT_SUPPORTED(EMBL, SWISSPROT);
276    NOT_SUPPORTED(SWISSPROT, GENBANK);
277    NOT_SUPPORTED(SWISSPROT, EMBL);
278
279    int possible     = 0;
280    int tested       = 0;
281    int unsupported  = 0;
282    int neverReturns = 0;
283
284    for (int from = 0; from<fcount; from++) {
285        TEST_ANNOTATE(GBS_global_string("while converting from '%s'", NAME(from)));
286        if (isInputFormat(from)) {
287            if (will_convert(from)<1) {
288                TEST_ERROR("Conversion from %s seems unsupported", NAME(from));
289            }
290        }
291        for (int to = 0; to<fcount; to++) {
292            possible++;
293            Capabilities& me = cap[from][to];
294
295            if (me.shall_be_tested()) {
296                TEST_ANNOTATE(GBS_global_string("while converting %s -> %s", NAME(from), NAME(to)));
297                test_convert_by_format_num(from, to);
298                tested++;
299            }
300
301            unsupported  += !me.supported;
302            neverReturns += me.neverReturns;
303        }
304    }
305    TEST_ANNOTATE(NULp);
306
307    fprintf(stderr,
308            "Conversion test summary:\n"
309            " - formats:      %3i\n"
310            " - conversions:  %3i (possible)\n"
311            " - unsupported:  %3i\n"
312            " - tested:       %3i\n"
313            " - neverReturns: %3i (would never return - not checked)\n"
314            " - converted:    %3i\n",
315            fcount,
316            possible,
317            unsupported,
318            tested,
319            neverReturns,
320            tested-unsupported);
321
322    int untested = possible - tested;
323    TEST_EXPECT_EQUAL(untested, neverReturns);
324}
325
326#endif // UNIT_TESTS
Note: See TracBrowser for help on using the repository browser.