source: tags/ms_r18q1/TOOLS/arb_test.cxx

Last change on this file was 16919, checked in by westram, 6 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 27.7 KB
Line 
1// =============================================================== //
2//                                                                 //
3//   File      : arb_test.cxx                                      //
4//   Purpose   : unit tester for tools                             //
5//                                                                 //
6//   Coded by Ralf Westram (coder@reallysoft.de) in February 2011  //
7//   Institute of Microbiology (Technical University Munich)       //
8//   http://www.arb-home.de/                                       //
9//                                                                 //
10// =============================================================== //
11
12#include <arbdbt.h>
13#include <arb_defs.h>
14#include <arb_sleep.h>
15#include <arb_diff.h>
16#include <unistd.h>
17
18int ARB_main(int , char *[]) {
19    fputs("don't call us\n", stderr);
20    return EXIT_SUCCESS;
21}
22
23// --------------------------------------------------------------------------------
24
25#ifdef UNIT_TESTS
26#include <arb_file.h>
27#include <test_unit.h>
28#include <test_runtool.h>
29
30// --------------------------------------------------------------------------------
31
32
33void TEST_SLOW_ascii_2_bin_2_ascii() {
34    const char *ascii_ORG  = "TEST_loadsave_ascii.arb";
35    const char *ascii      = "bin2ascii.arb";
36    const char *binary     = "ascii2bin.arb";
37    const char *binary_2ND = "ascii2bin2.arb";
38    const char *binary_3RD = "ascii2bin3.arb";
39
40    // test that errors from _each_ part of a piped command propagate correctly:
41    const char *failing_piped_cmds[] = {
42        "arb_weirdo | wc -l",      // first command fails
43        "echo hello | arb_weirdo", // second command fails
44        "arb_weirdo | arb_weirdo", // both commands fail
45
46        "arb_weirdo | wc -l | sort",       // first command fails
47        "echo hello | arb_weirdo | sort",  // second command fails
48        "echo hello | wc -l | arb_weirdo", // third command fails
49    };
50    for (unsigned c = 0; c<ARRAY_ELEMS(failing_piped_cmds); ++c) {
51        TEST_EXPECT_ERROR_CONTAINS(RUN_TOOL(failing_piped_cmds[c]), "System call failed");
52    }
53
54    TEST_RUN_TOOL("arb_2_ascii --help"); // checks proper documentation of available compression flags (in GB_get_supported_compression_flags)
55
56    // test conversion file -> file
57    TEST_RUN_TOOL(GBS_global_string("arb_2_bin   %s %s", ascii_ORG, binary));
58    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s %s", binary, ascii));
59
60    TEST_EXPECT_TEXTFILES_EQUAL(ascii, ascii_ORG);
61
62    // test conversion (bin->ascii->bin) via stream (this tests 'arb_repair')
63    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s - | arb_2_bin - %s", binary, binary_2ND));
64    // TEST_EXPECT_FILES_EQUAL(binary, binary_2ND); // can't compare binary files (binary_2ND differs (keys?))
65    // instead convert back to ascii and compare result with original
66    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(ascii));
67    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s %s", binary_2ND, ascii));
68    TEST_EXPECT_FILES_EQUAL(ascii, ascii_ORG);
69
70
71    // test same using compression (gzip and bzip2)
72    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii -Cz %s - | arb_2_bin -CB - %s", binary, binary_3RD));
73    // TEST_EXPECT_FILES_EQUAL(binary, binary_2ND); // can't compare binary files (binary_3RD differs (keys?))
74    // instead convert back to ascii and compare result with original
75    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(ascii));
76    TEST_RUN_TOOL(GBS_global_string("arb_2_ascii %s %s", binary_3RD, ascii));
77    TEST_EXPECT_FILES_EQUAL(ascii, ascii_ORG);
78
79    TEST_EXPECT_ERROR_CONTAINS(RUN_TOOL("arb_2_ascii -Cq -"), "System call failed"); // "Unknown compression flag 'q'"
80
81    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(ascii));
82    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(binary));
83    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(binary_2ND));
84    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(binary_3RD));
85    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("ascii2bin.ARF"));
86    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("ascii2bin2.ARF"));
87    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("ascii2bin3.ARF"));
88}
89
90void TEST_arb_primer() {
91    const char *primer_db       = "TEST_nuc.arb";
92    const char *primer_stdin    = "tools/arb_primer.in";
93    const char *primer_out      = "tools/arb_primer.out";
94    const char *primer_expected = "tools/arb_primer_expected.out";
95
96    TEST_RUN_TOOL(GBS_global_string("arb_primer %s < %s", primer_db, primer_stdin));
97    TEST_EXPECT_FILES_EQUAL(primer_out, primer_expected);
98    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(primer_out));
99}
100
101static GB_ERROR removeVaryingDateFromTreeRemarks(const char *dbname) {
102    GB_ERROR  error     = NULp;
103    GB_shell  shell;
104    GBDATA   *gb_main   = GB_open(dbname, "rw");
105    if (!gb_main) error = GB_await_error();
106    else {
107        {
108            GB_transaction ta(gb_main);
109
110            GBDATA     *gb_tree_data    = GBT_get_tree_data(gb_main);
111            const char *truncate_after  = "\nunittest-tree\n";
112            size_t      truncate_offset = strlen(truncate_after);
113
114            if (!gb_tree_data) error = GB_await_error();
115            else {
116                for (GBDATA *gb_tree = GB_child(gb_tree_data);
117                     gb_tree && !error;
118                     gb_tree = GB_nextChild(gb_tree))
119                {
120                    GBDATA *gb_remark = GB_entry(gb_tree, "remark");
121                    if (!gb_remark) {
122                        error = "could not find 'remark' entry";
123                    }
124                    else {
125                        char *remark = GB_read_string(gb_remark);
126                        char *found  = strstr(remark, truncate_after);
127
128                        if (found) {
129                            strcpy(found+truncate_offset, "<date removed for testing>");
130                            error                  = GB_write_string(gb_remark, remark);
131                        }
132                        free(remark);
133                    }
134                }
135            }
136
137            ta.close(error);
138        }
139        if (!error) error = GB_save_as(gb_main, dbname, "a");
140        GB_close(gb_main);
141    }
142    return error;
143}
144
145// #define TEST_AUTO_UPDATE_TREE // uncomment to auto-update expected tree
146
147void TEST_SLOW_arb_read_tree() {
148    struct {
149        const char *basename;
150        const char *extraArgs;
151    }
152    run[] = {
153        { "newick",           "" },
154        { "newick_sq",        "-commentFromFile general/text.input" },
155        { "newick_dq",        "-scale 0.5" },
156        { "newick_group",     "-scale 10 -consense 10" },
157        { "newick_len",       "" },
158        { "newick_len_group", "" },
159    };
160
161    const char *dbin       = "min_ascii.arb";
162    const char *dbout      = "tools/read_tree_out.arb";
163    const char *dbexpected = "tools/read_tree_out_expected.arb";
164
165    for (size_t b = 0; b<ARRAY_ELEMS(run); ++b) {
166        const char *basename  = run[b].basename;
167        const char *extraArgs = run[b].extraArgs;
168        char       *treefile  = GBS_global_string_copy("tools/%s.tree", basename);
169        char       *treename  = GBS_global_string_copy("tree_%s", basename);
170
171        TEST_RUN_TOOL(GBS_global_string("arb_read_tree -db %s %s %s %s \"test %s\" %s",
172                                                   dbin, dbout, treename, treefile, basename, extraArgs));
173
174        dbin = dbout; // use out-db from previous loop ( = write all trees into one db)
175
176        free(treename);
177        free(treefile);
178    }
179
180    TEST_EXPECT_NO_ERROR(removeVaryingDateFromTreeRemarks(dbout));
181#if defined(TEST_AUTO_UPDATE_TREE)
182    TEST_COPY_FILE(dbout, dbexpected);
183#else // !defined(TEST_AUTO_UPDATE_TREE)
184    TEST_EXPECT_TEXTFILES_EQUAL(dbexpected, dbout);
185#endif
186    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(dbout));
187}
188
189#define TEST_ARB_REPLACE(infile,expected,args) do {                     \
190        char *tmpfile = GBS_global_string_copy("%s.tmp", expected);     \
191        TEST_RUN_TOOL_NEVER_VALGRIND(GBS_global_string("cp %s %s", infile, tmpfile));  \
192        TEST_RUN_TOOL(GBS_global_string("arb_replace %s %s", args, tmpfile)); \
193        TEST_EXPECT_TEXTFILES_EQUAL(tmpfile, expected);                 \
194        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(tmpfile));             \
195        free(tmpfile);                                                  \
196    } while(0)
197
198void TEST_arb_replace() {
199    const char *infile = "tools/arb_replace.in";
200    const char *file1  = "tools/arb_replace_1.out";
201    const char *file2  = "tools/arb_replace_2.out";
202
203    TEST_ARB_REPLACE(infile, "tools/arb_replace_1.out", "'gene=GONE'");
204    TEST_ARB_REPLACE(file1,  infile,                    "-l 'GONE=gene'");
205    TEST_ARB_REPLACE(file1,  file2,                     "-L 'GONE=gene:\"*\"=( * )'");
206}
207
208// --------------------------------------------------------------------------------
209
210#include "command_output.h"
211
212void TEST_arb_message() {
213    TEST_STDERR_CONTAINS("arb_message \"this is the test message\"",
214                         "arb_message: this is the test message\n");
215}
216
217void TEST_SLOW_arb_probe() {
218    // test called here currently are duplicating the tests in
219    // arb_probe.cxx@TEST_SLOW_match_probe
220    // and arb_probe.cxx@TEST_SLOW_design_probe
221    //
222    // Here test of functionality is secondary.
223    // The primary goal here is to test calling the tools (i.e. arb_probe)
224   
225    TEST_SETUP_GLOBAL_ENVIRONMENT("ptserver");
226    TEST_STDOUT_EQUALS("arb_probe"
227                       " serverid=-666"
228                       " matchsequence=UAUCGGAGAGUUUGA",
229
230                       /* ---- */ "    name---- fullname mis N_mis wmis pos ecoli rev          'UAUCGGAGAGUUUGA'\1"
231                       "BcSSSS00\1" "  BcSSSS00            0     0  0.0   3     2 0   .......UU-===============-UCAAGUCGA\1"
232        );
233
234    TEST_STDOUT_EQUALS("arb_probe"
235                       " serverid=-666"
236                       " designnames=ClnCorin#CltBotul#CPPParap#ClfPerfr"
237                       " designmintargets=100",
238
239                       "Probe design parameters:\n"
240                       "Length of probe    18\n"
241                       "Temperature        [ 0.0 -400.0]\n"
242                       "GC-content         [30.0 - 80.0]\n"
243                       "E.Coli position    [any]\n"
244                       "Max. nongroup hits 0\n"
245                       "Min. group hits    100% (max. rejected coverage: 75%)\n"
246                       "Target             le apos ecol qual grps   G+C temp     Probe sequence | Decrease T by n*.3C -> probe matches n non group species\n"
247                       "CGAAAGGAAGAUUAAUAC 18 A=94   82   77    4  33.3 48.0 GUAUUAAUCUUCCUUUCG | - - - - - - - - - - - - - - - - - - - -\n"
248                       "GAAAGGAAGAUUAAUACC 18 A+ 1   83   77    4  33.3 48.0 GGUAUUAAUCUUCCUUUC | - - - - - - - - - - - - - - - - - - - -\n"
249                       "UCAAGUCGAGCGAUGAAG 18 B=18   17   61    4  50.0 54.0 CUUCAUCGCUCGACUUGA | - - - - - - - - - - - - - - - 2 2 2 2 2\n"
250                       "AUCAAGUCGAGCGAUGAA 18 B- 1   16   45    4  44.4 52.0 UUCAUCGCUCGACUUGAU | - - - - - - - - - - - 2 2 2 2 2 2 2 2 2\n"
251                       );
252}
253
254void TEST_SLOW_arb_probe_match() {
255    TEST_SETUP_GLOBAL_ENVIRONMENT("ptserver");
256
257    // this probe-match is also tested with 'arb_probe'. see arb_probe.cxx@TEST_arb_probe_match
258    TEST_STDOUT_EQUALS("arb_probe_match"
259                       " --port :../sok/pt.socket"
260                       " --n-matches 0"
261                       " --n-match-bound 4"
262                       " --mismatches 3"
263                       " --sequence GAGCGGUCAG",
264
265                       "acc     \t"     "start\t" "stop\t" "pos\t" "mis\t" "wmis\t" "nmis\t" "dt\t" "rev\t" "seq\n"
266                       "ARB_2CA9F764\t" "0\t"     "0\t"    "24\t"  "1\t"   "1.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-======A===-AUGGGAGCU\t" "\n"
267                       "ARB_6B04C30A\t" "10\t"    "20\t"   "24\t"  "2\t"   "2.2\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-======A=C=-ACGGGAGCU\t" "\n"
268                       "ARB_4C6C9E8C\t" "20\t"    "170\t"  "67\t"  "3\t"   "2.4\t"  "0\t"    "0\t"  "0\t"   "GGAUUUGUU-=g====CG==-CGGCGGACG\t" "\n"
269                       "ARB_948948A3\t" "0\t"     "0\t"    "81\t"  "3\t"   "2.8\t"  "0\t"    "0\t"  "0\t"   "ACGAGUGGC-=gA===C===-UUGGAAACG\t" "\n"
270                       "ARB_5BEE4C92\t" "0\t"     "0\t"    "85\t"  "3\t"   "3.2\t"  "0\t"    "0\t"  "0\t"   "CGGCGGGAC-=g==CU====-AACCUGCGG\t" "\n"
271                       "ARB_2180C521\t" "0\t"     "0\t"    "24\t"  "3\t"   "3.6\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-======Aa=C-GAUGGAAGC\t" "\n"
272                       "ARB_815E94DB\t" "0\t"     "0\t"    "94\t"  "3\t"   "3.6\t"  "0\t"    "0\t"  "0\t"   "GGACUGCCC-==Aa==A===-CUAAUACCG\t" "\n"
273                       "ARB_948948A3\t" "0\t"     "0\t"    "24\t"  "3\t"   "4\t"    "0\t"    "0\t"  "0\t"   "GAUCAAGUC-==A====a=C-AGGUCUUCG\t" "\n"
274                       "ARB_9E1D1B16\t" "0\t"     "0\t"    "28\t"  "3\t"   "4\t"    "0\t"    "0\t"  "0\t"   "GAUCAAGUC-==A====a=C-GGGAAGGGA\t" "\n"
275                       "ARB_CEB24FD3\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-=====A=G=A-GUUCCUUCG\t" "\n"
276                       "ARB_4FCDD74F\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   ".AUCAAGUC-=====A=G=A-GCUUCUUCG\t" "\n"
277                       "ARB_CF69AC5C\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-=====A=G=A-GUUCCUUCG\t" "\n"
278                       "ARB_5BEE4C92\t" "0\t"     "0\t"    "24\t"  "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GAUCAAGUC-=====A=G=A-GUUUCCUUC\t" "\n"
279                       "ARB_815E94DB\t" "0\t"     "0\t"    "156\t" "3\t"   "4.1\t"  "0\t"    "0\t"  "0\t"   "GUAGCCGUU-===GAA====-CGGCUGGAU\t" "\n"
280                       "ARB_1763CF6\t"  "0\t"     "0\t"    "24\t"  "3\t"   "2.4\t"  "3\t"    "0\t"  "0\t"   "GAUCAAGUC-=======...-<more>\t" "\n"
281                       "ARB_ED8B86F\t"  "0\t"     "0\t"    "28\t"  "3\t"   "2.4\t"  "3\t"    "0\t"  "0\t"   "GAUCAAGUC-=======...-<more>\t" "\n"
282        );
283}
284
285#define IN_DB     "tools/dnarates.arb"
286#define OUT_DB    "tools/dnarates_result.arb"
287#define WANTED_DB "tools/dnarates_expected.arb"
288
289// #define TEST_AUTO_UPDATE_SAI // uncomment to auto-update expected SAI
290
291void TEST_SLOW_arb_dna_rates() {
292    TEST_STDOUT_CONTAINS("arb_dnarates tools/dnarates.inp " IN_DB " " OUT_DB, "\nWriting 'POS_VAR_BY_ML_1'\n");
293
294#if defined(TEST_AUTO_UPDATE_SAI)
295    TEST_COPY_FILE(OUT_DB, WANTED_DB);
296#else // !defined(TEST_AUTO_UPDATE_SAI)
297    TEST_EXPECT_TEXTFILES_EQUAL(WANTED_DB, OUT_DB);
298#endif
299    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(OUT_DB));
300}
301
302#define RATES_DB "tools/exportrates.arb"
303
304void TEST_arb_export_rates() {
305    // Note: just testing against regression here.
306    // Since the output is quite longish, we just test the checksums of the results.
307    //
308    // If one of the checksums changes unexpectedly and you want to see more details about the change,
309    // - go back to a revision with a correct checksum,
310    // - add passing TEST_OUTPUT_EQUALS for broken command and
311    // - move that test to broken revision.
312
313    TEST_OUTPUT_HAS_CHECKSUM("arb_export_rates -d " RATES_DB " POS_VAR_BY_PARSIMONY", 0xc75a5fad);
314    TEST_OUTPUT_HAS_CHECKSUM("arb_export_rates -d " RATES_DB " -r POS_VAR_BY_PARSIMONY", 0xd69fb01e);
315    TEST_OUTPUT_HAS_CHECKSUM("arb_export_rates -d " RATES_DB " -r \"\"", 0xad0461ce);
316}
317
318#define TREE_DB "tools/tree.arb"
319
320void TEST_arb_export_tree() {
321    TEST_STDOUT_EQUALS("arb_export_tree tree_mini " TREE_DB,
322                       "((( 'VibFurni' :0.02952, 'VibVulni' :0.01880):0.04015, 'VibChole' :0.03760):1.00000,( 'AcnPleur' :0.12011, 'PrtVulga' :0.06756):1.00000, 'HlmHalod' :1.00000);\n");
323    TEST_STDOUT_EQUALS("arb_export_tree --bifurcated tree_mini " TREE_DB,
324                       "(((( 'VibFurni' :0.02952, 'VibVulni' :0.01880):0.04015, 'VibChole' :0.03760):0.04610,( 'AcnPleur' :0.12011, 'PrtVulga' :0.06756):0.01732):0.07176, 'HlmHalod' :0.12399);\n");
325    TEST_STDOUT_EQUALS("arb_export_tree --doublequotes tree_mini " TREE_DB,
326                       "((( \"VibFurni\" :0.02952, \"VibVulni\" :0.01880):0.04015, \"VibChole\" :0.03760):1.00000,( \"AcnPleur\" :0.12011, \"PrtVulga\" :0.06756):1.00000, \"HlmHalod\" :1.00000);\n");
327
328    TEST_STDOUT_EQUALS("arb_export_tree --nobranchlens tree_mini " TREE_DB,
329                       "((( 'VibFurni'  'VibVulni' ) 'VibChole' ),( 'AcnPleur'  'PrtVulga' ), 'HlmHalod' );\n");
330    TEST_EXPECT__BROKEN(0); // the test above returns a wrong result (commas are missing)
331
332    TEST_OUTPUT_EQUALS("arb_export_tree \"\" " TREE_DB,
333                       ";\n",                                                                    // shall export an empty newick tree
334                       "");                                                                      // without error!
335    TEST_OUTPUT_EQUALS("arb_export_tree tree_nosuch " TREE_DB,
336                       ";\n",                                                                    // shall export an empty newick tree
337                       "arb_export_tree from '" TREE_DB "': ARB ERROR: Failed to read tree 'tree_nosuch' (Reason: tree not found)\n"); // with error!
338}
339TEST_PUBLISH(TEST_arb_export_tree);
340
341// --------------------------------------------------------------------------------
342
343// #define TEST_AUTO_UPDATE_EXP_SEQ // uncomment to auto-update expected sequence exports
344
345#define EXPECTED(file) file ".expected"
346#if defined(TEST_AUTO_UPDATE_EXP_SEQ)
347#define UPDATE_OR_COMPARE(outfile) TEST_COPY_FILE(outfile, EXPECTED(outfile))
348#else // !defined(TEST_AUTO_UPDATE_EXP_SEQ)
349#define UPDATE_OR_COMPARE(outfile) TEST_EXPECT_TEXTFILES_EQUAL(outfile, EXPECTED(outfile))
350#endif
351#define TEST_OUTFILE_EXPECTED(outfile) do{                     \
352        UPDATE_OR_COMPARE(outfile);                            \
353        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile));    \
354    }while(0)
355
356#define TEST_OUTFILE_EXPECTED__BROKEN(outfile) do{                       \
357        TEST_EXPECT_TEXTFILES_EQUAL__BROKEN(outfile, EXPECTED(outfile)); \
358        TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(outfile));              \
359    }while(0)
360
361#define SEQ_DB          "TEST_loadsave.arb"
362#define TEMPLATE_DB     "tools/min_template.arb"
363#define EFT             "../../lib/export/fasta_wide.eft" // ../lib/export/fasta_wide.eft
364#define EXSEQ_EFT       "tools/exseq_via_eft.fasta"
365#define EXSEQ_FASTA     "tools/exseq.fasta"
366#define EXSEQ_ARB       "tools/exseq.arb"
367#define EXSEQ_ARB_ASCII "tools/exseq_ascii.arb"
368#define EXSEQ_RESTRICT  "tools/acc.list"
369
370void TEST_arb_export_sequences() {
371    TEST_RUN_TOOL("arb_export_sequences --source " SEQ_DB " --format FASTA   --dest " EXSEQ_FASTA);
372    TEST_OUTFILE_EXPECTED(EXSEQ_FASTA);
373
374    TEST_RUN_TOOL("arb_export_sequences --source " SEQ_DB " --format " EFT " --dest " EXSEQ_EFT   " --accs " EXSEQ_RESTRICT);
375    TEST_OUTFILE_EXPECTED(EXSEQ_EFT);
376
377    TEST_RUN_TOOL("arb_export_sequences --source " SEQ_DB " --format ARB     --dest " EXSEQ_ARB   " --arb-template " TEMPLATE_DB
378                  " && "
379                  "arb_2_ascii " EXSEQ_ARB " " EXSEQ_ARB_ASCII
380        );
381    TEST_OUTFILE_EXPECTED(EXSEQ_ARB_ASCII);
382    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(EXSEQ_ARB));
383}
384
385#define FILTSEQ_DB            "TEST_prot_tiny.arb" // ../UNIT_TESTER/run/TEST_prot_tiny.arb
386#define EXFLT_DNA_UNFILTERED  "tools/exflt_DNA_unfiltered.fasta"
387#define EXFLT_AMI_UNFILTERED  "tools/exflt_AMI_unfiltered.fasta"
388#define EXFLT_DNA_PARTIAL_ALI "tools/exflt_DNA_partial.fasta"
389#define EXFLT_AMI_SKIP_SHORT  "tools/exflt_AMI_skipShort.fasta"
390#define EXFLT_DNA_PVP_FILT    "tools/exflt_DNA_pvp.fasta"
391#define EXFLT_DNA_MF_FILT     "tools/exflt_DNA_mf.fasta"
392#define EXFLT_DNA_MFPVP_FILT  "tools/exflt_DNA_mfpvp.fasta"
393#define EXFLT_AMI_MF_FILT     "tools/exflt_AMI_mf.fasta"
394#define EXFLT_EMPTY           "tools/exflt_empty.fasta"
395#define EXFLT_NOTSAVED        "tools/exflt_notSaved.fasta"
396
397void TEST_arb_filtered_sequence_export() {
398    // see also ../SL/FILTSEQEXP/FilteredExport.cxx@SAI_FILTERED_EXPORT_TESTS
399    // ./arb_export_seq_filtered.cxx@show_help
400
401#define EXPORT_FILTERED_TO "arb_export_seq_filtered --db " FILTSEQ_DB " --fasta "
402
403    // fail to export from missing database:
404    TEST_FAILURE_OUTPUT_CONTAINS("arb_export_seq_filtered --db " FILTSEQ_DB "xxx --fasta " EXFLT_NOTSAVED,
405                                 NULp,
406                                 "Database 'TEST_prot_tiny.arbxxx' not found");
407
408    // export default alignment ('ali_dna'):
409    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_UNFILTERED " --id \"readdb(name);\\\" \\\";readdb(acc)\"");
410    TEST_OUTFILE_EXPECTED(EXFLT_DNA_UNFILTERED);
411
412    // export 'ali_prot':
413    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_UNFILTERED " --ali ali_prot");
414    TEST_OUTFILE_EXPECTED(EXFLT_AMI_UNFILTERED);
415
416    // fail to export unknown alignment:
417    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --ali ali_nosuch",
418                                 NULp,
419                                 "alignment 'ali_nosuch' not found");
420
421    // fail if not all species contain data
422    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --ali ali_dna_incomplete",
423                                 NULp,
424                                 "species 'TaxOcell' has no data in 'ali_dna_incomplete'");
425
426    // fail if ACI is broken
427    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --id \"readdb(name)|invalid\"",
428                                 NULp,
429                                 "Command 'readdb(name)|invalid' failed:\nReason: Unknown command 'invalid'");
430
431    // export 'ali_dna_incomplete' (where only 1 species has data):
432    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PARTIAL_ALI " --ali ali_dna_incomplete --accept-missing-data");
433    TEST_EXPECTATION(CommandOutput(EXPORT_FILTERED_TO EXFLT_DNA_PARTIAL_ALI " --ali ali_dna_incomplete --accept-missing-data", true).Contains(NULp, "Skipped species 'StrCoel9' (Reason: has no data)"));
434    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PARTIAL_ALI);
435
436    // skip "short" species:
437    TEST_EXPECTATION(CommandOutput(EXPORT_FILTERED_TO EXFLT_AMI_SKIP_SHORT " --ali ali_prot --min-bases 62", true).Contains(NULp, "Skipped species 'BctFra12' (Reason: not enough base-characters left)")); // below 62 less species are filtered
438    TEST_OUTFILE_EXPECTED(EXFLT_AMI_SKIP_SHORT);
439    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_UNFILTERED " --ali ali_prot --min-bases 57"); // all sequences contain 57 AA
440    TEST_OUTFILE_EXPECTED(EXFLT_AMI_UNFILTERED);
441    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_EMPTY " --ali ali_prot --min-bases 94");          // no sequence contains 94 AA
442    TEST_OUTFILE_EXPECTED(EXFLT_EMPTY);
443
444    // pass custom bases to count
445    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_SKIP_SHORT " --ali ali_prot --min-bases 62 --count-bases \"AC-IN-KP-TY-V\"");
446    TEST_OUTFILE_EXPECTED(EXFLT_AMI_SKIP_SHORT);
447    TEST_EXPECTATION(CommandOutput(EXPORT_FILTERED_TO EXFLT_EMPTY " --count-bases \"z-x\" --min-bases 1", true).Contains(NULp, "warning: generated empty file")); // no data contains 'xyz' -> skips all
448    TEST_OUTFILE_EXPECTED(EXFLT_EMPTY);
449
450    // apply SAI filters:
451    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --ali ali_prot --filterby \"POS_VAR_BY_PARSIMONY\" --block \"-.=012345\"",
452                                 NULp,
453                                 "SAI 'POS_VAR_BY_PARSIMONY' has no data in alignment 'ali_prot'");
454
455    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT " --filterby POS_VAR_BY_PARSIMONY --block \"-.=0123\"");
456    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT);
457
458    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_PVP_FILT " --filterby POS_VAR_BY_PARSIMONY --pass allbut \"-.=0123\"");
459    TEST_OUTFILE_EXPECTED(EXFLT_DNA_PVP_FILT);
460
461    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MF_FILT " --filterby MAX_FREQUENCY --pass \"789\"");
462    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MF_FILT);
463
464    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MF_FILT " --filterby MAX_FREQUENCY --pass \"7-9\""); // use char-range
465    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MF_FILT);
466
467    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_MF_FILT " --ali ali_prot --filterby MAX_FREQUENCY --block allbut 9-5");
468    TEST_OUTFILE_EXPECTED(EXFLT_AMI_MF_FILT);
469
470    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_AMI_MF_FILT " --ali ali_prot --filterby MAX_FREQUENCY --pass \"5-9\"");
471    TEST_OUTFILE_EXPECTED(EXFLT_AMI_MF_FILT);
472
473    // test failure of block/pass w/o filterby:
474    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --block \"-.=012345\"",
475                                 NULp,
476                                 "--pass and --block have to be preceeded by --filterby");
477    // test failure of filterby w/o block/pass:
478    TEST_FAILURE_OUTPUT_CONTAINS(EXPORT_FILTERED_TO EXFLT_NOTSAVED " --filterby \"POS_VAR_BY_PARSIMONY\"",
479                                 NULp,
480                                 "--filterby has to be followed by --pass or --block");
481
482    // apply combined filters:
483    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MF_FILT
484                  " --filterby MAX_FREQUENCY --pass 0-9"
485                  " --filterby MAX_FREQUENCY --block allbut 987");
486    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MF_FILT);
487
488    TEST_RUN_TOOL(EXPORT_FILTERED_TO EXFLT_DNA_MFPVP_FILT
489                  " --seqpp :.=-"
490                  " --filterby MAX_FREQUENCY        --pass 7-9"
491                  " --filterby POS_VAR_BY_PARSIMONY --pass 4-7");
492    TEST_OUTFILE_EXPECTED(EXFLT_DNA_MFPVP_FILT);
493
494}
495
496// --------------------------------------------------------------------------------
497
498#undef UPDATE_OR_COMPARE
499
500// #define TEST_AUTO_UPDATE_EXP_PVP // uncomment to auto-update expected pvp data
501
502#if defined(TEST_AUTO_UPDATE_EXP_PVP)
503#define UPDATE_OR_COMPARE(outfile) TEST_COPY_FILE(outfile, EXPECTED(outfile))
504#else // !defined(TEST_AUTO_UPDATE_EXP_SEQ)
505#define UPDATE_OR_COMPARE(outfile) TEST_EXPECT_TEXTFILES_EQUAL(outfile, EXPECTED(outfile))
506#endif
507
508#define TEST_DBPART_EQUALS(sub,saveName)                                       \
509    TEST_RUN_TOOL("arb_sub2ascii pvp/calc.arb " sub " pvp/" saveName ".dump"); \
510    TEST_OUTFILE_EXPECTED("pvp/" saveName ".dump")
511
512#define TEST_CALC_PVP(treeName,addArgs,sub,saveName)                                                            \
513    TEST_RUN_TOOL("arb_calc_pvp TEST_nuc.arb --savename pvp/calc.arb " treeName " ali_16s " addArgs);           \
514    TEST_DBPART_EQUALS(sub,saveName)
515
516void TEST_arb_calc_pvp() {
517    // see also ../SL/PVP/pvp.cxx@TEST_pvp
518    // ./arb_calc_pvp.cxx@show_help
519
520    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp",         NULp, "argument <database> has not been provided");
521    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db",      NULp, "argument <treename>");
522    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db tree", NULp, "argument <aliname>");
523
524    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db tree_bla ali_123 xtra",      NULp, "too many arguments");
525    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp db tree_bla ali_123",           NULp, "Database 'db' not found");
526    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp TEST_nuc.arb tree_bla ali_123", NULp, "alignment 'ali_123' not found");
527    TEST_FAILURE_OUTPUT_CONTAINS("arb_calc_pvp TEST_nuc.arb tree_bla ali_16s", NULp, "Failed to read tree 'tree_bla'");
528
529    // expected results in ../UNIT_TESTER/run/pvp
530
531    TEST_CALC_PVP("tree_nuc",    "",                                     "extended_data", "standard");          // use tree in DB
532    TEST_CALC_PVP("tree_import", "--tree-import trees/nuc_flipped.tree", "extended_data", "flippedTreeImported"); // load tree from file
533    TEST_DBPART_EQUALS("tree_data/tree_import/node",     "gottree");                                            // test imported tree was saved to DB (just a hacked test, otherwise problems with date in remark)
534    TEST_CALC_PVP("tree_nuc",    "--tree-import trees/nuc_flipped.tree", "extended_data", "standard");          // load + overwrite tree
535    TEST_CALC_PVP("tree_nuc",    "--tree-delete",                        "tree_data",     "notree");            // use tree in DB + delete tree
536    TEST_CALC_PVP("tree_nuc",    "--sainame custPVP",                    "extended_data", "custom");            // test alternate SAI-name
537
538    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("pvp/calc.arb"));
539    TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink("pvp/calc.ARF"));
540}
541
542#endif // UNIT_TESTS
543
Note: See TracBrowser for help on using the repository browser.