source: trunk/NTREE/AP_consensus.cxx

Last change on this file was 19432, checked in by westram, 17 months ago
  • GBT_get_alignment_len
    • now also reports error if alignment length is zero
      • this case often was unhandled and did easily lead to allocation problems.
    • catch error in case of zero alignment length ⇒ fixes alloc-size-larger-than-warning (in NT_count_different_chars).
    • check + fix callers.
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 32.8 KB
Line 
1// ================================================================= //
2//                                                                   //
3//   File      : AP_consensus.cxx                                    //
4//   Purpose   : calculate consensus SAIs                            //
5//                                                                   //
6//   http://www.arb-home.de/                                         //
7//                                                                   //
8// ================================================================= //
9
10#include "NT_local.h"
11
12#include <aw_root.hxx>
13#include <aw_msg.hxx>
14#include <aw_awar.hxx>
15
16#include <arbdbt.h>
17
18#include <arb_strbuf.h>
19#include <arb_defs.h>
20#include <arb_progress.h>
21
22#include <awt_config_manager.hxx>
23#include <awt_misc.hxx>
24#include <awt_sel_boxes.hxx>
25
26// AISC_MKPT_PROMOTE:#ifndef AW_BASE_HXX
27// AISC_MKPT_PROMOTE:#include <aw_base.hxx>
28// AISC_MKPT_PROMOTE:#endif
29
30#define AWAR_MAX_FREQ_PREFIX      "tmp/CON_MAX_FREQ/"
31#define AWAR_CONSENSUS_PREFIX     "consensus/"
32#define AWAR_CONSENSUS_PREFIX_TMP "tmp/" AWAR_CONSENSUS_PREFIX
33
34#define AWAR_MAX_FREQ_IGNORE_GAPS AWAR_MAX_FREQ_PREFIX "no_gaps"
35#define AWAR_MAX_FREQ_SAI_NAME    AWAR_MAX_FREQ_PREFIX "sai_name"
36
37#define AWAR_CONSENSUS_MARKED_ONLY AWAR_CONSENSUS_PREFIX_TMP "marked_only"
38#define AWAR_CONSENSUS_ALIGNMENT   AWAR_CONSENSUS_PREFIX_TMP "alignment"
39#define AWAR_CONSENSUS_COUNTGAPS   AWAR_CONSENSUS_PREFIX "countgaps"
40#define AWAR_CONSENSUS_GAPBOUND    AWAR_CONSENSUS_PREFIX "gapbound"
41#define AWAR_CONSENSUS_GROUP       AWAR_CONSENSUS_PREFIX "group"
42#define AWAR_CONSENSUS_CONSIDBOUND AWAR_CONSENSUS_PREFIX "considbound"
43#define AWAR_CONSENSUS_UPPER       AWAR_CONSENSUS_PREFIX "upper"
44#define AWAR_CONSENSUS_LOWER       AWAR_CONSENSUS_PREFIX "lower"
45#define AWAR_CONSENSUS_NAME        AWAR_CONSENSUS_PREFIX_TMP "name"
46
47#define CONSENSUS_AWAR_SOURCE CAS_NTREE
48#include <consensus.h>
49#include <consensus_config.h>
50#include <chartable.h>
51
52static int CON_insertSequences(GBDATA *gb_main, const char *aliname, long IF_ASSERTION_USED(maxalignlen), bool onlymarked, BaseFrequencies& freqs) {
53    /*! read sequence data and fill into 'freqs'
54     * @param gb_main       database
55     * @param aliname       name of alignment
56     * @param maxalignlen   length of alignment
57     * @param onlymarked    true -> marked only
58     * @param freqs         sequences are inserted here (has to be empty)
59     * @return number of inserted sequences
60     */
61    long nrofspecies = onlymarked ? GBT_count_marked_species(gb_main) : GBT_get_species_count(gb_main);
62
63    arb_progress progress(nrofspecies);
64    progress.auto_subtitles("Examining sequence");
65
66    GBDATA *gb_species = onlymarked ? GBT_first_marked_species(gb_main) : GBT_first_species(gb_main);
67    while (gb_species) {
68        GBDATA *alidata = GBT_find_sequence(gb_species, aliname);
69        if (alidata) {
70            const char *data   = GB_read_char_pntr(alidata);
71            size_t      length = GB_read_string_count(alidata);
72
73            nt_assert(long(length)<=maxalignlen);
74            freqs.add(data, length);
75        }
76        gb_species = onlymarked ? GBT_next_marked_species(gb_species) : GBT_next_species(gb_species);
77        ++progress;
78    }
79
80    int inserted = freqs.added_sequences();
81    if (nrofspecies < inserted) {
82        GBT_message(gb_main, GBS_global_string("Only %i of %li %sspecies contain data in alignment '%s'",
83                                               inserted, nrofspecies, onlymarked ? "marked " : "", aliname));
84        progress.done();
85    }
86
87    return inserted;
88}
89
90static GB_ERROR CON_export(GBDATA *gb_main, const char *savename, const char *align, const char *result, bool onlymarked, long nrofspecies, const ConsensusBuildParams& BK) {
91    /*! writes consensus SAI to DB
92     * @param gb_main      database
93     * @param savename     name of SAI to save to
94     * @param align        alignment name
95     * @param result       SAI data to write
96     * @param onlymarked   true -> was calculated on marked only (used for SAI comment)
97     * @param nrofspecies  number of used sequences (used for SAI comment; if less than 20 -> add an explicit list to field '_SPECIES')
98     * @param BK           parameters used for consensus calculation (used for SAI comment)
99     * @return error if something goes wrong
100     */
101    const char *off = "off";
102    const char *on  = "on";
103
104    GBDATA   *gb_extended = GBT_find_or_create_SAI(gb_main, savename);
105    GBDATA   *gb_data     = GBT_add_data(gb_extended, align, "data", GB_STRING);
106    GB_ERROR  err         = GB_write_string(gb_data, result);
107    if (!err) {
108        GBDATA *gb_options = GBT_add_data(gb_extended, align, "_TYPE", GB_STRING);
109
110        const char *allvsmarked     = onlymarked ? "marked" : "all";
111        const char *countgapsstring = BK.countgaps ? on : off;
112        const char *simplifystring  = BK.group ? on : off;
113
114        {
115            char *buffer = ARB_alloc<char>(2000);
116            sprintf(buffer, "CON: [species: %s]  [number: %ld]  [count gaps: %s] "
117                    "[threshold for gaps: %d]  [simplify: %s] "
118                    "[threshold for group: %d]  [upper: %d]  [lower: %d]",
119                    allvsmarked, nrofspecies, countgapsstring,
120                    BK.gapbound, simplifystring,
121                    BK.considbound, BK.upper, BK.lower);
122
123            err = GB_write_string(gb_options, buffer);
124            free(buffer);
125        }
126
127        if (!err) {
128            GBDATA *gb_names  = GB_search(GB_get_father(gb_options), "_SPECIES", GB_FIND);
129            if (gb_names) err = GB_delete(gb_names); // delete old entry
130        }
131
132        if (!err && nrofspecies<20) {
133            GBS_strstruct namelist(1000);
134
135            GBDATA *gb_species =
136                onlymarked
137                ? GBT_first_marked_species(gb_main)
138                : GBT_first_species(gb_main);
139
140            while (gb_species) {
141                if (GBT_find_sequence(gb_species, align)) {
142                    GBDATA     *gb_speciesname = GB_search(gb_species, "name", GB_FIND);
143                    const char *name           = GB_read_char_pntr(gb_speciesname);
144
145                    namelist.cat(name);
146                    namelist.put( ' ');
147                }
148                if (onlymarked) gb_species = GBT_next_marked_species(gb_species);
149                else gb_species            = GBT_next_species(gb_species);
150            }
151
152            err = GBT_write_string(GB_get_father(gb_options), "_SPECIES", namelist.get_data());
153        }
154
155        // remove data relicts from "complex consensus" (no longer supported)
156        if (!err) {
157            char    buffer2[256];
158            sprintf(buffer2, "%s/FREQUENCIES", align);
159            GBDATA *gb_graph  = GB_search(gb_extended, buffer2, GB_FIND);
160            if (gb_graph) err = GB_delete(gb_graph);  // delete old entry
161        }
162    }
163
164    if (err) err = GBS_global_string("Failed to store consensus '%s' (Reason: %s)", savename, err);
165    return err;
166}
167
168static GB_ERROR CON_calculate(GBDATA *gb_main, const ConsensusBuildParams& BK, const char *aliname, bool onlymarked, const char *sainame) {
169    /*! calculates the consensus and writes it to SAI 'sainame'
170     * Description how consensus is calculated: ../HELP_SOURCE/source/consensus_def.hlp
171     * @param gb_main     database
172     * @param BK          parameters for consensus calculation
173     * @param aliname     alignment name
174     * @param onlymarked  true -> use marked sequences only
175     * @param sainame     name of destination SAI
176     * @return error if something goes wrong
177     */
178    GB_ERROR error = NULp;
179
180    GB_push_transaction(gb_main);
181
182    long maxalignlen = GBT_get_alignment_len(gb_main, aliname);
183    if (maxalignlen <= 0) {
184        error = GB_append_exportedError(GB_await_error());
185    }
186
187    if (!error) {
188        arb_progress progress("Calculating consensus");
189
190        GB_alignment_type alitype = GBT_get_alignment_type(gb_main, aliname);
191        arb_assert(alitype != GB_AT_UNKNOWN);
192        BaseFrequencies::setup("-.", alitype);
193
194        BaseFrequencies freqs(maxalignlen);
195        int nrofspecies = CON_insertSequences(gb_main, aliname, maxalignlen, onlymarked, freqs);
196
197        if (BK.lower>BK.upper) {
198            error = "fault: lower greater than upper";
199        }
200        else {
201            char *result = freqs.build_consensus_string(BK);
202            error = CON_export(gb_main, sainame, aliname, result, onlymarked, nrofspecies, BK);
203            free(result);
204        }
205    }
206
207    error = GB_end_transaction(gb_main, error);
208
209    return error;
210}
211
212static void CON_calculate_cb(AW_window *aw) {
213    AW_root *awr        = aw->get_root();
214    char    *aliname    = awr->awar(AWAR_CONSENSUS_ALIGNMENT)->read_string();
215    char    *sainame    = awr->awar(AWAR_CONSENSUS_NAME)->read_string();
216    bool     onlymarked = awr->awar(AWAR_CONSENSUS_MARKED_ONLY)->read_int();
217
218    ConsensusBuildParams BK(awr);
219
220    {
221#if defined(ASSERTION_USED)
222        GB_transaction ta(GLOBAL.gb_main);
223        LocallyModify<bool> denyAwarReads(AW_awar::deny_read, true);
224        LocallyModify<bool> denyAwarWrites(AW_awar::deny_write, true);
225#endif
226
227        GB_ERROR error = CON_calculate(GLOBAL.gb_main, BK, aliname, onlymarked, sainame);
228        aw_message_if(error);
229    }
230
231    free(sainame);
232    free(aliname);
233}
234
235static void consensus_upper_lower_changed_cb(AW_root *awr, bool upper_changed) {
236    AW_awar *awar_lower = awr->awar(AWAR_CONSENSUS_LOWER);
237    AW_awar *awar_upper = awr->awar(AWAR_CONSENSUS_UPPER);
238
239    int lower = awar_lower->read_int();
240    int upper = awar_upper->read_int();
241
242    if (upper<lower) {
243        if (upper_changed) awar_lower->write_int(upper);
244        else               awar_upper->write_int(lower);
245    }
246}
247
248void AP_create_consensus_var(AW_root *aw_root, AW_default aw_def) {
249    GB_transaction ta(GLOBAL.gb_main);
250    {
251        char *defali = GBT_get_default_alignment(GLOBAL.gb_main);
252        if (!defali) {
253            GB_clear_error();
254            defali = ARB_strdup("no-default-alignment");
255        }
256        aw_root->awar_string(AWAR_CONSENSUS_ALIGNMENT, defali, aw_def);
257        free(defali);
258    }
259    aw_root->awar_int(AWAR_CONSENSUS_MARKED_ONLY, 1,  aw_def);
260    aw_root->awar_int(AWAR_CONSENSUS_GROUP,       0,  aw_def);
261    aw_root->awar_int(AWAR_CONSENSUS_COUNTGAPS,   1,  aw_def);
262    aw_root->awar_int(AWAR_CONSENSUS_UPPER,       95, aw_def)->set_minmax(0, 100)->add_callback(makeRootCallback(consensus_upper_lower_changed_cb, true));
263    aw_root->awar_int(AWAR_CONSENSUS_LOWER,       70, aw_def)->set_minmax(0, 100)->add_callback(makeRootCallback(consensus_upper_lower_changed_cb, false));
264    aw_root->awar_int(AWAR_CONSENSUS_GAPBOUND,    60, aw_def)->set_minmax(0, 100);
265    aw_root->awar_int(AWAR_CONSENSUS_CONSIDBOUND, 30, aw_def)->set_minmax(0, 100);
266    aw_root->awar_int(AWAR_MAX_FREQ_IGNORE_GAPS,  1,  aw_def);
267
268    aw_root->awar_string(AWAR_CONSENSUS_NAME,    "CONSENSUS",     aw_def);
269    aw_root->awar_string(AWAR_MAX_FREQ_SAI_NAME, "MAX_FREQUENCY", aw_def);
270}
271
272static AWT_config_mapping_def consensus_config_mapping[] = {
273    { AWAR_CONSENSUS_COUNTGAPS,   CONSENSUS_CONFIG_COUNTGAPS },
274    { AWAR_CONSENSUS_GAPBOUND,    CONSENSUS_CONFIG_GAPBOUND },
275    { AWAR_CONSENSUS_GROUP,       CONSENSUS_CONFIG_GROUP },
276    { AWAR_CONSENSUS_CONSIDBOUND, CONSENSUS_CONFIG_CONSIDBOUND },
277    { AWAR_CONSENSUS_UPPER,       CONSENSUS_CONFIG_UPPER },
278    { AWAR_CONSENSUS_LOWER,       CONSENSUS_CONFIG_LOWER },
279
280    // make sure the keywords of the following entries
281    // DIFFER from those defined at ../TEMPLATES/consensus_config.h@CommonEntries
282
283    { AWAR_CONSENSUS_MARKED_ONLY, "marked_only" },
284    { AWAR_CONSENSUS_NAME,        "name" },
285
286    { NULp, NULp }
287};
288
289AW_window *AP_create_con_expert_window(AW_root *aw_root) {
290    // keep in sync with ../EDIT4/ED4_no_class.cxx@ED4_create_consensus_definition_window
291
292    AW_window_simple *aws = new AW_window_simple;
293    aws->init(aw_root, "CALCULATE_CONSENSUS", "CONSENSUS OF SEQUENCES");
294    aws->load_xfig("consensus/expert.fig");
295
296    aws->auto_space(5, 5);
297
298    const int SCALEDCOLUMNS = 3;
299    const int SCALERSIZE    = 150;
300
301    // top part of window:
302    aws->button_length(9);
303
304    aws->at("cancel");
305    aws->callback(AW_POPDOWN);
306    aws->create_button("CLOSE", "CLOSE", "C");
307
308    aws->at("help");
309    aws->callback(makeHelpCallback("consensus.hlp"));
310    aws->create_button("HELP", "HELP", "H");
311
312    // left part of window:
313    aws->at("which_alignment");
314    awt_create_ALI_selection_list(GLOBAL.gb_main, (AW_window *)aws, AWAR_CONSENSUS_ALIGNMENT, "*=");
315
316    aws->at("which_species");
317    aws->create_toggle_field(AWAR_CONSENSUS_MARKED_ONLY);
318    aws->insert_toggle        ("all",    "a", 0);
319    aws->insert_default_toggle("marked", "m", 1);
320    aws->update_toggle_field();
321
322    aws->at("save_box");
323    awt_create_SAI_selection_list(GLOBAL.gb_main, aws, AWAR_CONSENSUS_NAME);
324
325    aws->at("name");
326    aws->create_input_field(AWAR_CONSENSUS_NAME, 10);
327
328    // right part of window (same as in EDIT4):
329    aws->at("countgaps");
330    aws->create_toggle_field(AWAR_CONSENSUS_COUNTGAPS);
331    aws->insert_toggle        ("on",  "1", 1);
332    aws->insert_default_toggle("off", "0", 0);
333    aws->update_toggle_field();
334
335    aws->at("gapbound");
336    aws->create_input_field_with_scaler(AWAR_CONSENSUS_GAPBOUND, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
337
338    aws->at("group");
339    aws->create_toggle_field(AWAR_CONSENSUS_GROUP);
340    aws->insert_toggle        ("on",  "1", 1);
341    aws->insert_default_toggle("off", "0", 0);
342    aws->update_toggle_field();
343
344    aws->at("considbound");
345    aws->create_input_field_with_scaler(AWAR_CONSENSUS_CONSIDBOUND, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
346
347    aws->at("showgroups");
348    aws->callback(AWT_create_IUPAC_info_window);
349    aws->create_autosize_button("SHOW_IUPAC", "Show IUPAC groups", "s");
350
351    aws->at("upper");
352    aws->create_input_field_with_scaler(AWAR_CONSENSUS_UPPER, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
353
354    aws->at("lower");
355    aws->create_input_field_with_scaler(AWAR_CONSENSUS_LOWER, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
356
357    // bottom part of window:
358    aws->at("calculate");
359    aws->callback(CON_calculate_cb);
360    aws->create_button("GO", "GO", "G");
361
362    aws->at("config");
363    AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, CONSENSUS_CONFIG_ID, consensus_config_mapping);
364
365    return aws;
366}
367
368static GB_ERROR CON_calc_max_freq(GBDATA *gb_main, bool ignore_gaps, const char *savename, const char *aliname) {
369    /*! calculates the maximum frequency for each column and write to SAI
370     * @param gb_main      database
371     * @param ignore_gaps  true -> ignore gaps; see ../HELP_SOURCE/source/max_freq.hlp@Gaps
372     * @param savename     name of destination SAI
373     * @param aliname      name of alignment to use
374     * @return error if something goes wrong
375     */
376    arb_assert(!GB_have_error());
377
378    GB_ERROR       error = NULp;
379    GB_transaction ta(gb_main);
380
381    long maxalignlen = GBT_get_alignment_len(gb_main, aliname);
382    if (maxalignlen<=0) {
383        error = GB_append_exportedError(GB_await_error());
384    }
385    else {
386        arb_progress progress("Calculating max. frequency");
387
388        GB_alignment_type alitype = GBT_get_alignment_type(gb_main, aliname);
389        arb_assert(alitype != GB_AT_UNKNOWN);
390        BaseFrequencies::setup("-.", alitype);
391
392        const int onlymarked  = 1;
393        BaseFrequencies freqs(maxalignlen);
394        long nrofspecies = CON_insertSequences(gb_main, aliname, maxalignlen, onlymarked, freqs);
395
396        char *result1 = new char[maxalignlen+1];
397        char *result2 = new char[maxalignlen+1];
398
399        result1[maxalignlen] = 0;
400        result2[maxalignlen] = 0;
401
402        for (int pos = 0; pos < maxalignlen; pos++) {
403            double mf  = freqs.max_frequency_at(pos, ignore_gaps);
404            int    mfi = int(mf*100.0+0.01); // frequency -> [0..100]; add 1/100 to reduce incompatibilities caused by 32/64 bit differences
405
406            if (mfi) {
407                if (mfi<10) mfi = 10; // hack: otherwise SAI will contain '0' (meaning 100% frequency)
408
409                int mfh = int(mfi/10);
410                int mfl = mfi-10*mfh;
411
412                result1[pos] = "?1234567890"[mfh];
413                result2[pos] = "0123456789"[mfl];
414            }
415            else {
416                result1[pos] = '=';
417                result2[pos] = '=';
418            }
419        }
420
421        GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, savename);
422        if (!gb_extended) {
423            error = GB_await_error();
424        }
425        else {
426            GBDATA *gb_data1 = GBT_add_data(gb_extended, aliname, "data", GB_STRING);
427            GBDATA *gb_data2 = GBT_add_data(gb_extended, aliname, "dat2", GB_STRING);
428
429            error             = GB_write_string(gb_data1, result1);
430            if (!error) error = GB_write_string(gb_data2, result2);
431
432            GBDATA *gb_options = GBT_add_data(gb_extended, aliname, "_TYPE", GB_STRING);
433
434            if (!error) {
435                const char *type = GBS_global_string("MFQ: [species: %li] [ignore gaps: %s]", nrofspecies, ignore_gaps ? "yes" : "no");
436                error            = GB_write_string(gb_options, type);
437            }
438        }
439
440        delete [] result1;
441        delete [] result2;
442    }
443
444    error = ta.close(error);
445    arb_assert(!GB_have_error());
446
447    return error;
448}
449
450static void CON_calc_max_freq_cb(AW_window *aw) {
451    char     *aliname = GBT_get_default_alignment(GLOBAL.gb_main);
452    GB_ERROR  error;
453
454    if (!aliname) {
455        error = GB_await_error();
456    }
457    else {
458        AW_root    *awr         = aw->get_root();
459        bool        ignore_gaps = awr->awar(AWAR_MAX_FREQ_IGNORE_GAPS)->read_int();
460        const char *savename    = awr->awar(AWAR_MAX_FREQ_SAI_NAME)->read_char_pntr();
461
462        error = CON_calc_max_freq(GLOBAL.gb_main, ignore_gaps, savename, aliname);
463        free(aliname);
464    }
465    aw_message_if(error);
466}
467
468AW_window *AP_create_max_freq_window(AW_root *aw_root) {
469    AW_window_simple *aws = new AW_window_simple;
470    aws->init(aw_root, "MAX_FREQUENCY", "MAX FREQUENCY");
471    aws->load_xfig("consensus/max_freq.fig");
472
473    GB_push_transaction(GLOBAL.gb_main);
474
475    aws->button_length(6);
476
477    aws->at("cancel");
478    aws->callback(AW_POPDOWN);
479    aws->create_button("CLOSE", "CLOSE", "C");
480
481    aws->at("help"); aws->callback(makeHelpCallback("max_freq.hlp"));
482    aws->create_button("HELP", "HELP", "H");
483
484    // activation of consensus calculation by button ...
485    aws->at("go");
486    aws->callback(CON_calc_max_freq_cb);
487    aws->create_button("GO", "GO", "C");
488
489    aws->at("save");
490    aws->create_input_field(AWAR_MAX_FREQ_SAI_NAME, 1);
491
492    aws->at("sai");
493    awt_create_SAI_selection_list(GLOBAL.gb_main, aws, AWAR_MAX_FREQ_SAI_NAME);
494
495    aws->at("gaps");
496    aws->create_toggle(AWAR_MAX_FREQ_IGNORE_GAPS);
497
498    GB_pop_transaction(GLOBAL.gb_main);
499
500    return aws;
501}
502
503// --------------------------------------------------------------------------------
504
505#ifdef UNIT_TESTS
506#ifndef TEST_UNIT_H
507#include <test_unit.h>
508#endif
509
510static GBDATA *create_simple_seq_db(const char *aliname, const char *alitype, const char **sequence, int sequenceCount, int sequenceLength) {
511    GBDATA *gb_main = GB_open("nosuch.arb", "wc");
512
513    {
514        GB_transaction  ta(gb_main);
515        GBDATA         *gb_species_data = GBT_get_species_data(gb_main);
516        int             specCounter     = 0;
517
518        TEST_EXPECT_RESULT__NOERROREXPORTED(GBT_create_alignment(gb_main, aliname, sequenceLength, true, 6, alitype, "by create_simple_seq_db"));
519
520        for (int s = 0; s<sequenceCount; ++s) {
521            GBDATA *gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, GBS_global_string("name%04i", ++specCounter), true);
522            GBDATA *gb_data    = GBT_add_data(gb_species, aliname, "data", GB_STRING);
523
524            TEST_EXPECT_EQUAL(strlen(sequence[s]), sequenceLength);
525            TEST_EXPECT_NO_ERROR(GB_write_string(gb_data, sequence[s]));
526        }
527    }
528
529#if 0
530    // save DB (to view data; should be inactive when committed)
531    char *dbname = GBS_global_string_copy("cons_%s.arb", alitype);
532    TEST_EXPECT_NO_ERROR(GB_save(gb_main, dbname, "a"));
533    free(dbname);
534#endif
535
536    return gb_main;
537}
538
539static void read_frequency(GBDATA *gb_main, const char *sainame, const char *aliname, const char*& data, const char*& dat2) {
540    GB_transaction ta(gb_main);
541
542    GBDATA *gb_maxFreq = GBT_find_SAI(gb_main, sainame);
543    GBDATA *gb_ali     = GB_entry(gb_maxFreq, aliname);
544    GBDATA *gb_data    = GB_entry(gb_ali, "data");
545    GBDATA *gb_dat2    = GB_entry(gb_ali, "dat2");
546
547    data = GB_read_char_pntr(gb_data);
548    dat2 = GB_read_char_pntr(gb_dat2);
549}
550
551void TEST_nucleotide_consensus_and_maxFrequency() {
552    const char *sequence[] = {
553        "-.AAAAAAAAAAcAAAAAAAAATTTTTTTTTTTTTTTTTAAAAAAAAgggggAAAAgAA----m-----yykm-mmmAAAAAAAAAmmmmmmmmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKKKKKKKKKWWWWWWWWW",
554        "-.-AAAAAAAAAccAAAAAAAAggTTgTTTTgTTTTTTTcccAAAAAgggggAAAAgAA----k-----kykr-rrrAAAAAAAAmmmmmmmmmT-NNNNNNNNNANNNNNbNNNNNNNNkNNNNNNNNaNNNNNNNNbKKKKKKKKbWWWWWWWW",
555        "-.--AAAAAAAAcccAAAAAAA-ggTggTTTggTTTTTTccccAAAAgggCCtAAAgAC----m-----sykw-wvsAAAAAAAmmmmmmmmmTT--NNNNNNNNCANNNNbbNNNNNNNkkNNNNNNNaaNNNNNNNbbKKKKKKKbbWWWWWWW",
556        "-.---AAAAAAAccccAAAAAA-ggggggTTgggTTTTTcccccAAAggCCC-tAACtC----k----yyyys-smvAAAAAAmmmmmmmmmTTT---NNNNNNNGCANNNbbbNNNNNNkkkNNNNNNaaaNNNNNNbbbKKKKKKbbbWWWWWW",
557        "-.----AAAAAAcccccAAAAA----ggggTggggTTTTGGGcccAAgCCCt-ttACtG----m---nkkkky-yrmAAAAAmmmmmmmmmTTTT----NNNNNNTGCANNbbbbNNNNNkkkkNNNNNaaaaNNNNNbbbbKKKKKbbbbWWWWW",
558        "-.-----AAAAAccccccAAAA----ggggggggggTTgGGGGcccAcCCtt--tttCG----k--nnssssk-kvrAAAAmmmmmmmmmTTTTT-----NNNNN-TGCANbbbbbNNNNkkkkkNNNNaaaaaNNNNbbbbbKKKKbbbbbWWWW",
559        "-.------AAAAcccccccAAA---------ggggggTgGGGGGccccCt----tt-gT----mydddyyyy-vvmsAAAmmmmmmmmmTTTTTT------NNNN-ATGCAbbbbbbNNNkkkkkkNNNaaaaaaNNNbbbbbbKKKbbbbbbWWW",
560        "-.-------AAAccccccccAA---------ggggggggttGGGGccct------t--T-yykkkbbbkkkk-hhrvAAmmmmmmmmmTTTTTTT-------NNN-C-TGCbbbbbbbNNkkkkkkkNNaaaaaaaNNbbbbbbbKKbbbbbbbWW",
561        "-.--------AAcccccccccA----------------gttGGGGGct-------t----ymmmmnnnssss-ddvmAmmmmmmmmmTTTTTTTT--------NN-G--TGbbbbbbbbNkkkkkkkkNaaaaaaaaNbbbbbbbbKbbbbbbbbW",
562        "-.---------Acccccccccc----------------gtAGGGGGG----------------k---------bbmrmmmmmmmmmTTTTTTTTT---------N-T---Tbbbbbbbbbkkkkkkkkkaaaaaaaaabbbbbbbbbbbbbbbbbb",
563    };
564    const char *expected_frequency[] = {
565        // considering gaps:
566        "0=9876567890098765678986665444576545675336544565434475454320888277654333462439988776654434567899876543222523222333322222444433332987765443333444444333444444",
567        "0=0000000000000000000000000000000000000000000000000000000000000500000055005565050505055050000000000000025050025210098765752075207257025702568013568568013568",
568        // ignoring gaps:
569        "==000000000009876567895757865688765678533654456554536655542=552233223333222439988776654434567892222222222222222333322222444433332987765443333444444333444444",
570        "==000000000000000000000505360637520257000000000502036075025=005530983388555565050505055050000005555555555555555210098765752075207257025702568013568568013568",
571    };
572    const char *expected_consensus[] = {
573        "==----..aaaACccMMMMMaa----.....g.kkk.uKb.ssVVmmss...-.ww...=---.---..byk.-.mVAaaaaMMMMmmHH..uuu----............BBbb.....Kkkkkk...aaaa.....BkkkkkkkKB....wwww", // default settings (see ConsensusBuildParams-ctor), gapbound=60, considbound=30, lower/upper=70/95
574        "==AAAAAAAAAACccMMMMMaaKgKugKKKuggKKKuuKb.ssVVmmssssBWWWWs..=Y.......BByk...mVAaaaaMMMMmmHH..uuu................BBbb.....Kkkkkk...aaaa.....BkkkkkkkKB....wwww", // countgaps=0
575        "==AAAAAAAAAACCCMMMMMAAKGKUGKKKUGGKKKUUKBsSSVVMMSSSSBWWWWSwa=YcaaykkkBBYKaaaMVAAAAAMMMMMMHHuuuUUaaaaaaaaaaaaaaaaBBBBBBBBcKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuuwWWW", // countgaps=0,              considbound=26, lower=0, upper=75 (as described in #663)
576        "==AAAAAAAAAACCCMMMMMAAKKKKGKKKUGKKKKKUKBsSSVVMMSSSSBWWWWSwN=YHNNykkkBBYKNNNVVAAAAMMMMMMMHHHuuUUNNNNNNNNNNNNNNNNBBBBBBBBBKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuwwWWW", // countgaps=0,              considbound=25, lower=0, upper=75
577        "==---aaaaAAACCCMMMMMAA-gkugkkkuggKKKuuKBsSSVVMMSsssb-wwWswa=---a--kkbBykaaaMVAAAAAMMMMMMHHuuuUU---aaaaaaaaaaaaaBBBBBBBBcKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuuwWWW", // countgaps=1, gapbound=70, considbound=26, lower=0, upper=75
578        "==---aaaaAAACCMMMMMMMA-kkkgkkkugKKKKKuKBNSVVVVMSsssb-wwWswN=---N--nnbBBBnnNVVAAAMMMMMMMHHHHHuUU---nnnnNNNnNnNNNBBBBBBBNNKKKKKkkNNAAAaaaaNNBBBBKKKKKBBBNwwWWW", // countgaps=1, gapbound=70, considbound=20, lower=0, upper=75
579        "==---aaaaAAACMMMMMMMMM-kkkkkkkkKKKKKKKKNNVVVVVVBBbbb-wwWbnN=---N--nnbBBBnnNVVMMMMMMMMMHHHHHHHHH---nnnnNNNnNnNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=1, gapbound=70, considbound= 1, lower=0, upper=75
580        "==---aaaaAAACMMMMMMMMM-kkkkkkkkKKKKKKKKNNVVVVVVBBbbb-wwWbnN=---N--nnbBBBnnNVVMMMMMMMMMHHHHHHHHH---nnnnNNNnNnNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=1, gapbound=70, considbound= 0, lower=0, upper=75
581        "==AAAAAAAAAACMMMMMMMMMKKKKKKKKKKKKKKKKKNNVVVVVVBBBBBWWWWBNN=YHNNNNNNBBBBNNNVVMMMMMMMMMHHHHHHHHHNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=0,              considbound= 0, lower=0, upper=75
582    };
583    const size_t seqlen         = strlen(sequence[0]);
584    const int    sequenceCount  = ARRAY_ELEMS(sequence);
585    const int    consensusCount = ARRAY_ELEMS(expected_consensus);
586
587    // create DB
588    GB_shell    shell;
589    const char *aliname = "ali_nuc";
590    GBDATA     *gb_main = create_simple_seq_db(aliname, "rna", sequence, sequenceCount, seqlen);
591
592    ConsensusBuildParams BK;
593    for (int c = 0; c<consensusCount; ++c) {
594        TEST_ANNOTATE(GBS_global_string("c=%i", c));
595        switch (c) {
596            case 0: break;                                                     // use default settings
597            case 1: BK.countgaps   = false; break;                             // dont count gaps
598            case 2: BK.considbound = 26; BK.lower = 0; BK.upper = 75; break;   // settings from #663
599            case 3: BK.considbound = 25; break;
600            case 4: BK.considbound = 26; BK.countgaps = true; BK.gapbound = 70; break;
601            case 5: BK.considbound = 20; break;
602            case 6: BK.considbound = 1; break;
603            case 7: BK.considbound = 0; break;
604            case 8: BK.countgaps   = false; break;
605            default: arb_assert(0); break;                                     // missing
606        }
607
608        {
609            GB_transaction  ta(gb_main);
610            const char     *sainame = "CONSENSUS";
611            TEST_EXPECT_NO_ERROR(CON_calculate(gb_main, BK, aliname, false, sainame));
612
613            GBDATA     *gb_consensus = GBT_find_SAI(gb_main, sainame);
614            GBDATA     *gb_seq       = GBT_find_sequence(gb_consensus, aliname);
615            const char *consensus    = GB_read_char_pntr(gb_seq); // @@@ NOT_ALL_SAI_HAVE_DATA
616
617            TEST_EXPECT_EQUAL(consensus, expected_consensus[c]);
618        }
619    }
620
621    // test max.frequency
622    const char *sainame = "MAXFREQ";
623    for (int ignore_gaps = 0; ignore_gaps<=1; ++ignore_gaps) {
624        TEST_ANNOTATE(GBS_global_string("ignore_gaps=%i", ignore_gaps));
625        TEST_EXPECT_NO_ERROR(CON_calc_max_freq(gb_main, ignore_gaps, sainame, aliname));
626        const char *data, *dat2;
627        read_frequency(gb_main, sainame, aliname, data, dat2);
628        TEST_EXPECT_EQUAL(data, expected_frequency[ignore_gaps*2]);
629        TEST_EXPECT_EQUAL(dat2, expected_frequency[ignore_gaps*2+1]);
630    }
631
632    GB_close(gb_main);
633}
634
635void TEST_amino_consensus_and_maxFrequency() {
636    const char *sequence[] = {
637        "-.ppppppppppQQQQQQQQQDDDDDELLLLLwwwwwwwwwwwwwwwwgggggggggggSSSe-PPP-DELp",
638        "-.-pppppppppkQQQQQQQQnDDDDELLLLLVVwwVwwwwVwwwwwwSgggggggggSSSee-QPP-DELa",
639        "-.--ppppppppkkQQQQQQQnnDDDELLLLL-VVwVVwwwVVwwwwwSSgggggggSSSeee-KQP-DEIg",
640        "-.---pppppppkkkQQQQQQnnnDDELLLLL-VVVVVVwwVVVwwwwSSSgggggSSSeee--LQQ-DQIs",
641        "-.----ppppppkkkkQQQQQnnnnDELLLLL----VVVVwVVVVwwweSSSgggSSSeee---WKQ-NQJt",
642        "-.-----pppppkkkkkQQQQnnnnnqiLLLL----VVVVVVVVVVwweeSSSggSSeee-----KQ-NQJq",
643        "-.------ppppkkkkkkQQQnnnnnqiiLLL---------VVVVVVweeeSSSgSeee------LK-NZJn",
644        "-.-------pppkkkkkkkQQnnnnnqiiiLL---------VVVVVVVeeeeSSSeee-------LK-NZJe",
645        "-.--------ppkkkkkkkkQnnnnnqiiiiL----------------eeeeeSSee--------WK-BZJd",
646        "-.---------pkkkkkkkkknnnnnqiiiii----------------eeeeeeSe---------WK-BZJb",
647        "-.ppppppppppQQQQQQQQQDDDDDELLLLLwwwwwwwwwwwwwwwwgggggggggggSSSe-PPP-DELz",
648        "-.-pppppppppkQQQQQQQQnDDDDELLLLLVVwwVwwwwVwwwwwwSgggggggggSSSee-QPP-DELh",
649        "-.--ppppppppkkQQQQQQQnnDDDELLLLL-VVwVVwwwVVwwwwwSSgggggggSSSeee-KQP-DEIk",
650        "-.---pppppppkkkQQQQQQnnnDDELLLLL-VVVVVVwwVVVwwwwSSSgggggSSSeee--LQQ-DQIr",
651        "-.----ppppppkkkkQQQQQnnnnDELLLLL----VVVVwVVVVwwweSSSgggSSSeee---WKQ-NQJl",
652        "-.-----pppppkkkkkQQQQnnnnnqiLLLL----VVVVVVVVVVwweeSSSggSSeee-----KQ-NQJi",
653        "-.------ppppkkkkkkQQQnnnnnqiiLLL---------VVVVVVweeeSSSgSeee------LK-NZJv",
654        "-.-------pppkkkkkkkQQnnnnnqiiiLL---------VVVVVVVeeeeSSSeee-------LK-NZJm",
655        "-.--------ppkkkkkkkkQnnnnnqiiiiL----------------eeeeeSSee--------WK-BZJf",
656        "-.---------pkkkkkkkkknnnnnqiiiii----------------eeeeeeSe---------WK-BZJy",
657    };
658    const char *expected_frequency[] = {
659        // considering gaps:
660        "0=9876567890987656789987655567898666544457654567654456743334567052404461",
661        "0=0000000000000000000000000000000000000000000000000000000000000000000000",
662        // ignoring gaps:
663        "==0000000000987656789987655567895757865688765678654456743345670=224=4461",
664        "==0000000000000000000000000000000505360637520257000000003720050=000=0000",
665    };
666    const char *expected_consensus[] = {
667        "==----..aaaAhhh...dddDDDDDDIIIII----.....i.....f...aaaAa.....--=.X.=DDI.", // default settings (see ConsensusBuildParams-ctor), gapbound=60, considbound=30, lower/upper=70/95
668        "==AAAAAAAAAAhhh...dddDDDDDDIIIII.i.fi...fii...ff...aaaAa.....dD=XX.=DDI.", // countgaps=0
669        "==AAAAAAAAAAHHhhdddDDDDDDDDIIIIIiIiFIiifFIIiifFFdaaaAAAaaaaadDD=XXh=DDId", // countgaps=0,              considbound= 26, lower=0, upper=75
670        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=xXh=DDId", // countgaps=1, gapbound=70, considbound= 26, lower=0, upper=75
671        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=aah=DDId", // countgaps=1, gapbound=70, considbound= 20, lower=0, upper=75
672        "==---aaaaAAAHHhhXddDDDDDDDDIIIII-ixfiixffiiiXfffdXaaAAAaaaaxdd-=xXX=DDIX", // countgaps=1, gapbound=70, considbound= 51, lower=0, upper=75
673        "==---aaaaAAAHXXXXXXXDDDDDDDIIIII-xxxxxxxxXXXXXXXXXXXXAAXXXxxxx-=xXX=DDIX", // countgaps=1, gapbound=70, considbound= 90, lower=0, upper=75
674        "==---aaaaAAAXXXXXXXXXDDDDDDIIIII-xxxxxxxxXXXXXXXXXXXXXAXXXxxxx-=xXX=DDIX", // countgaps=1, gapbound=70, considbound=100, lower=0, upper=75
675        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=aah=DDId", // countgaps=1, gapbound=70, considbound=  0, lower=0, upper=75
676    };
677    const size_t seqlen         = strlen(sequence[0]);
678    const int    sequenceCount  = ARRAY_ELEMS(sequence);
679    const int    consensusCount = ARRAY_ELEMS(expected_consensus);
680
681    // create DB
682    GB_shell    shell;
683    const char *aliname = "ali_ami";
684    GBDATA     *gb_main = create_simple_seq_db(aliname, "ami", sequence, sequenceCount, seqlen);
685
686    ConsensusBuildParams BK;
687    for (int c = 0; c<consensusCount; ++c) {
688        TEST_ANNOTATE(GBS_global_string("c=%i", c));
689        switch (c) {
690            case 0: break;                                                     // use default settings
691            case 1: BK.countgaps   = false; break;                             // dont count gaps
692            case 2: BK.considbound = 26; BK.lower = 0; BK.upper = 75; break;   // settings from #663
693            case 3: BK.countgaps   = true; BK.gapbound = 70; break;
694            case 4: BK.considbound = 20; break;
695            case 5: BK.considbound = 51; break;
696            case 6: BK.considbound = 90; break;
697            case 7: BK.considbound = 100; break;
698            case 8: BK.considbound = 0; break;
699            default: arb_assert(0); break;                                     // missing
700        }
701
702        {
703            GB_transaction  ta(gb_main);
704            const char     *sainame = "CONSENSUS";
705            TEST_EXPECT_NO_ERROR(CON_calculate(gb_main, BK, aliname, false, sainame));
706
707            GBDATA     *gb_consensus = GBT_find_SAI(gb_main, sainame);
708            GBDATA     *gb_seq       = GBT_find_sequence(gb_consensus, aliname);
709            const char *consensus    = GB_read_char_pntr(gb_seq);
710
711            TEST_EXPECT_EQUAL(consensus, expected_consensus[c]);
712        }
713    }
714
715    // test max.frequency
716    const char *sainame = "MAXFREQ";
717    for (int ignore_gaps = 0; ignore_gaps<=1; ++ignore_gaps) {
718        TEST_ANNOTATE(GBS_global_string("ignore_gaps=%i", ignore_gaps));
719        TEST_EXPECT_NO_ERROR(CON_calc_max_freq(gb_main, ignore_gaps, sainame, aliname));
720        const char *data, *dat2;
721        read_frequency(gb_main, sainame, aliname, data, dat2);
722        TEST_EXPECT_EQUAL(data, expected_frequency[ignore_gaps*2]);
723        TEST_EXPECT_EQUAL(dat2, expected_frequency[ignore_gaps*2+1]);
724    }
725
726    GB_close(gb_main);
727}
728
729#endif // UNIT_TESTS
730
Note: See TracBrowser for help on using the repository browser.