source: branches/help/NTREE/AP_consensus.cxx

Last change on this file was 18781, checked in by westram, 3 years ago
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 32.5 KB
Line 
1// ================================================================= //
2//                                                                   //
3//   File      : AP_consensus.cxx                                    //
4//   Purpose   : calculate consensus SAIs                            //
5//                                                                   //
6//   http://www.arb-home.de/                                         //
7//                                                                   //
8// ================================================================= //
9
10#include "NT_local.h"
11
12#include <aw_root.hxx>
13#include <aw_msg.hxx>
14#include <aw_awar.hxx>
15
16#include <arbdbt.h>
17
18#include <arb_strbuf.h>
19#include <arb_defs.h>
20#include <arb_progress.h>
21
22#include <awt_config_manager.hxx>
23#include <awt_misc.hxx>
24#include <awt_sel_boxes.hxx>
25
26// AISC_MKPT_PROMOTE:#ifndef AW_BASE_HXX
27// AISC_MKPT_PROMOTE:#include <aw_base.hxx>
28// AISC_MKPT_PROMOTE:#endif
29
30#define AWAR_MAX_FREQ_PREFIX      "tmp/CON_MAX_FREQ/"
31#define AWAR_CONSENSUS_PREFIX     "consensus/"
32#define AWAR_CONSENSUS_PREFIX_TMP "tmp/" AWAR_CONSENSUS_PREFIX
33
34#define AWAR_MAX_FREQ_IGNORE_GAPS AWAR_MAX_FREQ_PREFIX "no_gaps"
35#define AWAR_MAX_FREQ_SAI_NAME    AWAR_MAX_FREQ_PREFIX "sai_name"
36
37#define AWAR_CONSENSUS_MARKED_ONLY AWAR_CONSENSUS_PREFIX_TMP "marked_only"
38#define AWAR_CONSENSUS_ALIGNMENT   AWAR_CONSENSUS_PREFIX_TMP "alignment"
39#define AWAR_CONSENSUS_COUNTGAPS   AWAR_CONSENSUS_PREFIX "countgaps"
40#define AWAR_CONSENSUS_GAPBOUND    AWAR_CONSENSUS_PREFIX "gapbound"
41#define AWAR_CONSENSUS_GROUP       AWAR_CONSENSUS_PREFIX "group"
42#define AWAR_CONSENSUS_CONSIDBOUND AWAR_CONSENSUS_PREFIX "considbound"
43#define AWAR_CONSENSUS_UPPER       AWAR_CONSENSUS_PREFIX "upper"
44#define AWAR_CONSENSUS_LOWER       AWAR_CONSENSUS_PREFIX "lower"
45#define AWAR_CONSENSUS_NAME        AWAR_CONSENSUS_PREFIX_TMP "name"
46
47#define CONSENSUS_AWAR_SOURCE CAS_NTREE
48#include <consensus.h>
49#include <consensus_config.h>
50#include <chartable.h>
51
52static int CON_insertSequences(GBDATA *gb_main, const char *aliname, long IF_ASSERTION_USED(maxalignlen), bool onlymarked, BaseFrequencies& freqs) {
53    /*! read sequence data and fill into 'freqs'
54     * @param gb_main       database
55     * @param aliname       name of alignment
56     * @param maxalignlen   length of alignment
57     * @param onlymarked    true -> marked only
58     * @param freqs         sequences are inserted here (has to be empty)
59     * @return number of inserted sequences
60     */
61    long nrofspecies = onlymarked ? GBT_count_marked_species(gb_main) : GBT_get_species_count(gb_main);
62
63    arb_progress progress(nrofspecies);
64    progress.auto_subtitles("Examining sequence");
65
66    GBDATA *gb_species = onlymarked ? GBT_first_marked_species(gb_main) : GBT_first_species(gb_main);
67    while (gb_species) {
68        GBDATA *alidata = GBT_find_sequence(gb_species, aliname);
69        if (alidata) {
70            const char *data   = GB_read_char_pntr(alidata);
71            size_t      length = GB_read_string_count(alidata);
72
73            nt_assert(long(length)<=maxalignlen);
74            freqs.add(data, length);
75        }
76        gb_species = onlymarked ? GBT_next_marked_species(gb_species) : GBT_next_species(gb_species);
77        ++progress;
78    }
79
80    int inserted = freqs.added_sequences();
81    if (nrofspecies < inserted) {
82        GBT_message(gb_main, GBS_global_string("Only %i of %li %sspecies contain data in alignment '%s'",
83                                               inserted, nrofspecies, onlymarked ? "marked " : "", aliname));
84        progress.done();
85    }
86
87    return inserted;
88}
89
90static GB_ERROR CON_export(GBDATA *gb_main, const char *savename, const char *align, const char *result, bool onlymarked, long nrofspecies, const ConsensusBuildParams& BK) {
91    /*! writes consensus SAI to DB
92     * @param gb_main      database
93     * @param savename     name of SAI to save to
94     * @param align        alignment name
95     * @param result       SAI data to write
96     * @param onlymarked   true -> was calculated on marked only (used for SAI comment)
97     * @param nrofspecies  number of used sequences (used for SAI comment; if less than 20 -> add an explicit list to field '_SPECIES')
98     * @param BK           parameters used for consensus calculation (used for SAI comment)
99     * @return error if something goes wrong
100     */
101    const char *off = "off";
102    const char *on  = "on";
103
104    GBDATA   *gb_extended = GBT_find_or_create_SAI(gb_main, savename);
105    GBDATA   *gb_data     = GBT_add_data(gb_extended, align, "data", GB_STRING);
106    GB_ERROR  err         = GB_write_string(gb_data, result);
107    if (!err) {
108        GBDATA *gb_options = GBT_add_data(gb_extended, align, "_TYPE", GB_STRING);
109
110        const char *allvsmarked     = onlymarked ? "marked" : "all";
111        const char *countgapsstring = BK.countgaps ? on : off;
112        const char *simplifystring  = BK.group ? on : off;
113
114        {
115            char *buffer = ARB_alloc<char>(2000);
116            sprintf(buffer, "CON: [species: %s]  [number: %ld]  [count gaps: %s] "
117                    "[threshold for gaps: %d]  [simplify: %s] "
118                    "[threshold for group: %d]  [upper: %d]  [lower: %d]",
119                    allvsmarked, nrofspecies, countgapsstring,
120                    BK.gapbound, simplifystring,
121                    BK.considbound, BK.upper, BK.lower);
122
123            err = GB_write_string(gb_options, buffer);
124            free(buffer);
125        }
126
127        if (!err) {
128            GBDATA *gb_names  = GB_search(GB_get_father(gb_options), "_SPECIES", GB_FIND);
129            if (gb_names) err = GB_delete(gb_names); // delete old entry
130        }
131
132        if (!err && nrofspecies<20) {
133            GBS_strstruct namelist(1000);
134
135            GBDATA *gb_species =
136                onlymarked
137                ? GBT_first_marked_species(gb_main)
138                : GBT_first_species(gb_main);
139
140            while (gb_species) {
141                if (GBT_find_sequence(gb_species, align)) {
142                    GBDATA     *gb_speciesname = GB_search(gb_species, "name", GB_FIND);
143                    const char *name           = GB_read_char_pntr(gb_speciesname);
144
145                    namelist.cat(name);
146                    namelist.put( ' ');
147                }
148                if (onlymarked) gb_species = GBT_next_marked_species(gb_species);
149                else gb_species            = GBT_next_species(gb_species);
150            }
151
152            err = GBT_write_string(GB_get_father(gb_options), "_SPECIES", namelist.get_data());
153        }
154
155        // remove data relicts from "complex consensus" (no longer supported)
156        if (!err) {
157            char    buffer2[256];
158            sprintf(buffer2, "%s/FREQUENCIES", align);
159            GBDATA *gb_graph  = GB_search(gb_extended, buffer2, GB_FIND);
160            if (gb_graph) err = GB_delete(gb_graph);  // delete old entry
161        }
162    }
163
164    if (err) err = GBS_global_string("Failed to store consensus '%s' (Reason: %s)", savename, err);
165    return err;
166}
167
168static GB_ERROR CON_calculate(GBDATA *gb_main, const ConsensusBuildParams& BK, const char *aliname, bool onlymarked, const char *sainame) {
169    /*! calculates the consensus and writes it to SAI 'sainame'
170     * Description how consensus is calculated: ../HELP_SOURCE/source/consensus_def.hlp
171     * @param gb_main     database
172     * @param BK          parameters for consensus calculation
173     * @param aliname     alignment name
174     * @param onlymarked  true -> use marked sequences only
175     * @param sainame     name of destination SAI
176     * @return error if something goes wrong
177     */
178    GB_ERROR error = NULp;
179
180    GB_push_transaction(gb_main);
181
182    long maxalignlen = GBT_get_alignment_len(gb_main, aliname);
183    if (maxalignlen <= 0) error = GB_export_errorf("alignment '%s' doesn't exist", aliname);
184
185    if (!error) {
186        arb_progress progress("Calculating consensus");
187
188        GB_alignment_type alitype = GBT_get_alignment_type(gb_main, aliname);
189        BaseFrequencies::setup("-.", alitype);
190
191        BaseFrequencies freqs(maxalignlen);
192        int nrofspecies = CON_insertSequences(gb_main, aliname, maxalignlen, onlymarked, freqs);
193
194        if (BK.lower>BK.upper) {
195            error = "fault: lower greater than upper";
196        }
197        else {
198            char *result = freqs.build_consensus_string(BK);
199            error = CON_export(gb_main, sainame, aliname, result, onlymarked, nrofspecies, BK);
200            free(result);
201        }
202    }
203
204    error = GB_end_transaction(gb_main, error);
205
206    return error;
207}
208
209static void CON_calculate_cb(AW_window *aw) {
210    AW_root *awr        = aw->get_root();
211    char    *aliname    = awr->awar(AWAR_CONSENSUS_ALIGNMENT)->read_string();
212    char    *sainame    = awr->awar(AWAR_CONSENSUS_NAME)->read_string();
213    bool     onlymarked = awr->awar(AWAR_CONSENSUS_MARKED_ONLY)->read_int();
214
215    ConsensusBuildParams BK(awr);
216
217    {
218#if defined(ASSERTION_USED)
219        GB_transaction ta(GLOBAL.gb_main);
220        LocallyModify<bool> denyAwarReads(AW_awar::deny_read, true);
221        LocallyModify<bool> denyAwarWrites(AW_awar::deny_write, true);
222#endif
223
224        GB_ERROR error = CON_calculate(GLOBAL.gb_main, BK, aliname, onlymarked, sainame);
225        aw_message_if(error);
226    }
227
228    free(sainame);
229    free(aliname);
230}
231
232static void consensus_upper_lower_changed_cb(AW_root *awr, bool upper_changed) {
233    AW_awar *awar_lower = awr->awar(AWAR_CONSENSUS_LOWER);
234    AW_awar *awar_upper = awr->awar(AWAR_CONSENSUS_UPPER);
235
236    int lower = awar_lower->read_int();
237    int upper = awar_upper->read_int();
238
239    if (upper<lower) {
240        if (upper_changed) awar_lower->write_int(upper);
241        else               awar_upper->write_int(lower);
242    }
243}
244
245void AP_create_consensus_var(AW_root *aw_root, AW_default aw_def) {
246    GB_transaction ta(GLOBAL.gb_main);
247    {
248        char *defali = GBT_get_default_alignment(GLOBAL.gb_main);
249        aw_root->awar_string(AWAR_CONSENSUS_ALIGNMENT, defali, aw_def);
250        free(defali);
251    }
252    aw_root->awar_int(AWAR_CONSENSUS_MARKED_ONLY, 1,  aw_def);
253    aw_root->awar_int(AWAR_CONSENSUS_GROUP,       0,  aw_def);
254    aw_root->awar_int(AWAR_CONSENSUS_COUNTGAPS,   1,  aw_def);
255    aw_root->awar_int(AWAR_CONSENSUS_UPPER,       95, aw_def)->set_minmax(0, 100)->add_callback(makeRootCallback(consensus_upper_lower_changed_cb, true));
256    aw_root->awar_int(AWAR_CONSENSUS_LOWER,       70, aw_def)->set_minmax(0, 100)->add_callback(makeRootCallback(consensus_upper_lower_changed_cb, false));
257    aw_root->awar_int(AWAR_CONSENSUS_GAPBOUND,    60, aw_def)->set_minmax(0, 100);
258    aw_root->awar_int(AWAR_CONSENSUS_CONSIDBOUND, 30, aw_def)->set_minmax(0, 100);
259    aw_root->awar_int(AWAR_MAX_FREQ_IGNORE_GAPS,  1,  aw_def);
260
261    aw_root->awar_string(AWAR_CONSENSUS_NAME,    "CONSENSUS",     aw_def);
262    aw_root->awar_string(AWAR_MAX_FREQ_SAI_NAME, "MAX_FREQUENCY", aw_def);
263}
264
265static AWT_config_mapping_def consensus_config_mapping[] = {
266    { AWAR_CONSENSUS_COUNTGAPS,   CONSENSUS_CONFIG_COUNTGAPS },
267    { AWAR_CONSENSUS_GAPBOUND,    CONSENSUS_CONFIG_GAPBOUND },
268    { AWAR_CONSENSUS_GROUP,       CONSENSUS_CONFIG_GROUP },
269    { AWAR_CONSENSUS_CONSIDBOUND, CONSENSUS_CONFIG_CONSIDBOUND },
270    { AWAR_CONSENSUS_UPPER,       CONSENSUS_CONFIG_UPPER },
271    { AWAR_CONSENSUS_LOWER,       CONSENSUS_CONFIG_LOWER },
272
273    // make sure the keywords of the following entries
274    // DIFFER from those defined at ../TEMPLATES/consensus_config.h@CommonEntries
275
276    { AWAR_CONSENSUS_MARKED_ONLY, "marked_only" },
277    { AWAR_CONSENSUS_NAME,        "name" },
278
279    { NULp, NULp }
280};
281
282AW_window *AP_create_con_expert_window(AW_root *aw_root) {
283    // keep in sync with ../EDIT4/ED4_no_class.cxx@ED4_create_consensus_definition_window
284
285    AW_window_simple *aws = new AW_window_simple;
286    aws->init(aw_root, "CALCULATE_CONSENSUS", "CONSENSUS OF SEQUENCES");
287    aws->load_xfig("consensus/expert.fig");
288
289    aws->auto_space(5, 5);
290
291    const int SCALEDCOLUMNS = 3;
292    const int SCALERSIZE    = 150;
293
294    // top part of window:
295    aws->button_length(9);
296
297    aws->at("cancel");
298    aws->callback(AW_POPDOWN);
299    aws->create_button("CLOSE", "CLOSE", "C");
300
301    aws->at("help");
302    aws->callback(makeHelpCallback("consensus.hlp"));
303    aws->create_button("HELP", "HELP", "H");
304
305    // left part of window:
306    aws->at("which_alignment");
307    awt_create_ALI_selection_list(GLOBAL.gb_main, (AW_window *)aws, AWAR_CONSENSUS_ALIGNMENT, "*=");
308
309    aws->at("which_species");
310    aws->create_toggle_field(AWAR_CONSENSUS_MARKED_ONLY);
311    aws->insert_toggle        ("all",    "a", 0);
312    aws->insert_default_toggle("marked", "m", 1);
313    aws->update_toggle_field();
314
315    aws->at("save_box");
316    awt_create_SAI_selection_list(GLOBAL.gb_main, aws, AWAR_CONSENSUS_NAME, false);
317
318    aws->at("name");
319    aws->create_input_field(AWAR_CONSENSUS_NAME, 10);
320
321    // right part of window (same as in EDIT4):
322    aws->at("countgaps");
323    aws->create_toggle_field(AWAR_CONSENSUS_COUNTGAPS, NULp, "");
324    aws->insert_toggle        ("on",  "1", 1);
325    aws->insert_default_toggle("off", "0", 0);
326    aws->update_toggle_field();
327
328    aws->at("gapbound");
329    aws->create_input_field_with_scaler(AWAR_CONSENSUS_GAPBOUND, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
330
331    aws->at("group");
332    aws->create_toggle_field(AWAR_CONSENSUS_GROUP, NULp, "");
333    aws->insert_toggle        ("on",  "1", 1);
334    aws->insert_default_toggle("off", "0", 0);
335    aws->update_toggle_field();
336
337    aws->at("considbound");
338    aws->create_input_field_with_scaler(AWAR_CONSENSUS_CONSIDBOUND, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
339
340    aws->at("showgroups");
341    aws->callback(AWT_create_IUPAC_info_window);
342    aws->create_autosize_button("SHOW_IUPAC", "Show IUPAC groups", "s");
343
344    aws->at("upper");
345    aws->create_input_field_with_scaler(AWAR_CONSENSUS_UPPER, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
346
347    aws->at("lower");
348    aws->create_input_field_with_scaler(AWAR_CONSENSUS_LOWER, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
349
350    // bottom part of window:
351    aws->at("calculate");
352    aws->callback(CON_calculate_cb);
353    aws->create_button("GO", "GO", "G");
354
355    aws->at("config");
356    AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, CONSENSUS_CONFIG_ID, consensus_config_mapping);
357
358    return aws;
359}
360
361static GB_ERROR CON_calc_max_freq(GBDATA *gb_main, bool ignore_gaps, const char *savename, const char *aliname) {
362    /*! calculates the maximum frequency for each column and write to SAI
363     * @param gb_main      database
364     * @param ignore_gaps  true -> ignore gaps; see ../HELP_SOURCE/source/max_freq.hlp@Gaps
365     * @param savename     name of destination SAI
366     * @param aliname      name of alignment to use
367     * @return error if something goes wrong
368     */
369    arb_assert(!GB_have_error());
370
371    GB_ERROR       error = NULp;
372    GB_transaction ta(gb_main);
373
374    long maxalignlen = GBT_get_alignment_len(gb_main, aliname);
375    if (maxalignlen<=0) {
376        GB_clear_error();
377        error = "alignment doesn't exist!";
378    }
379    else {
380        arb_progress progress("Calculating max. frequency");
381
382        GB_alignment_type alitype = GBT_get_alignment_type(gb_main, aliname);
383        BaseFrequencies::setup("-.", alitype);
384
385        const int onlymarked  = 1;
386        BaseFrequencies freqs(maxalignlen);
387        long nrofspecies = CON_insertSequences(gb_main, aliname, maxalignlen, onlymarked, freqs);
388
389        char *result1 = new char[maxalignlen+1];
390        char *result2 = new char[maxalignlen+1];
391
392        result1[maxalignlen] = 0;
393        result2[maxalignlen] = 0;
394
395        for (int pos = 0; pos < maxalignlen; pos++) {
396            double mf  = freqs.max_frequency_at(pos, ignore_gaps);
397            int    mfi = int(mf*100.0+0.01); // frequency -> [0..100]; add 1/100 to reduce incompatibilities caused by 32/64 bit differences
398
399            if (mfi) {
400                if (mfi<10) mfi = 10; // hack: otherwise SAI will contain '0' (meaning 100% frequency)
401
402                int mfh = int(mfi/10);
403                int mfl = mfi-10*mfh;
404
405                result1[pos] = "?1234567890"[mfh];
406                result2[pos] = "0123456789"[mfl];
407            }
408            else {
409                result1[pos] = '=';
410                result2[pos] = '=';
411            }
412        }
413
414        GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, savename);
415        if (!gb_extended) {
416            error = GB_await_error();
417        }
418        else {
419            GBDATA *gb_data1 = GBT_add_data(gb_extended, aliname, "data", GB_STRING);
420            GBDATA *gb_data2 = GBT_add_data(gb_extended, aliname, "dat2", GB_STRING);
421
422            error             = GB_write_string(gb_data1, result1);
423            if (!error) error = GB_write_string(gb_data2, result2);
424
425            GBDATA *gb_options = GBT_add_data(gb_extended, aliname, "_TYPE", GB_STRING);
426
427            if (!error) {
428                const char *type = GBS_global_string("MFQ: [species: %li] [ignore gaps: %s]", nrofspecies, ignore_gaps ? "yes" : "no");
429                error            = GB_write_string(gb_options, type);
430            }
431        }
432
433        delete [] result1;
434        delete [] result2;
435    }
436
437    error = ta.close(error);
438    arb_assert(!GB_have_error());
439
440    return error;
441}
442
443static void CON_calc_max_freq_cb(AW_window *aw) {
444    AW_root    *awr         = aw->get_root();
445    bool        ignore_gaps = awr->awar(AWAR_MAX_FREQ_IGNORE_GAPS)->read_int();
446    const char *savename    = awr->awar(AWAR_MAX_FREQ_SAI_NAME)->read_char_pntr();
447    char       *aliname     = GBT_get_default_alignment(GLOBAL.gb_main);
448
449    GB_ERROR error = CON_calc_max_freq(GLOBAL.gb_main, ignore_gaps, savename, aliname);
450    if (error) aw_message(error);
451
452    free(aliname);
453}
454
455AW_window *AP_create_max_freq_window(AW_root *aw_root) {
456    AW_window_simple *aws = new AW_window_simple;
457    aws->init(aw_root, "MAX_FREQUENCY", "MAX FREQUENCY");
458    aws->load_xfig("consensus/max_freq.fig");
459
460    GB_push_transaction(GLOBAL.gb_main);
461
462    aws->button_length(6);
463
464    aws->at("cancel");
465    aws->callback(AW_POPDOWN);
466    aws->create_button("CLOSE", "CLOSE", "C");
467
468    aws->at("help"); aws->callback(makeHelpCallback("max_freq.hlp"));
469    aws->create_button("HELP", "HELP", "H");
470
471    // activation of consensus calculation by button ...
472    aws->at("go");
473    aws->callback(CON_calc_max_freq_cb);
474    aws->create_button("GO", "GO", "C");
475
476    aws->at("save");
477    aws->create_input_field(AWAR_MAX_FREQ_SAI_NAME, 1);
478
479    aws->at("sai");
480    awt_create_SAI_selection_list(GLOBAL.gb_main, aws, AWAR_MAX_FREQ_SAI_NAME, false);
481
482    aws->at("gaps");
483    aws->create_toggle(AWAR_MAX_FREQ_IGNORE_GAPS);
484
485    GB_pop_transaction(GLOBAL.gb_main);
486
487    return aws;
488}
489
490// --------------------------------------------------------------------------------
491
492#ifdef UNIT_TESTS
493#ifndef TEST_UNIT_H
494#include <test_unit.h>
495#endif
496
497static GBDATA *create_simple_seq_db(const char *aliname, const char *alitype, const char **sequence, int sequenceCount, int sequenceLength) {
498    GBDATA *gb_main = GB_open("nosuch.arb", "wc");
499
500    {
501        GB_transaction  ta(gb_main);
502        GBDATA         *gb_species_data = GBT_get_species_data(gb_main);
503        int             specCounter     = 0;
504
505        TEST_EXPECT_RESULT__NOERROREXPORTED(GBT_create_alignment(gb_main, aliname, sequenceLength, true, 6, alitype));
506
507        for (int s = 0; s<sequenceCount; ++s) {
508            GBDATA *gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, GBS_global_string("name%04i", ++specCounter), true);
509            GBDATA *gb_data    = GBT_add_data(gb_species, aliname, "data", GB_STRING);
510
511            TEST_EXPECT_EQUAL(strlen(sequence[s]), sequenceLength);
512            TEST_EXPECT_NO_ERROR(GB_write_string(gb_data, sequence[s]));
513        }
514    }
515
516#if 0
517    // save DB (to view data; should be inactive when committed)
518    char *dbname = GBS_global_string_copy("cons_%s.arb", alitype);
519    TEST_EXPECT_NO_ERROR(GB_save(gb_main, dbname, "a"));
520    free(dbname);
521#endif
522
523    return gb_main;
524}
525
526static void read_frequency(GBDATA *gb_main, const char *sainame, const char *aliname, const char*& data, const char*& dat2) {
527    GB_transaction ta(gb_main);
528
529    GBDATA *gb_maxFreq = GBT_find_SAI(gb_main, sainame);
530    GBDATA *gb_ali     = GB_entry(gb_maxFreq, aliname);
531    GBDATA *gb_data    = GB_entry(gb_ali, "data");
532    GBDATA *gb_dat2    = GB_entry(gb_ali, "dat2");
533
534    data = GB_read_char_pntr(gb_data);
535    dat2 = GB_read_char_pntr(gb_dat2);
536}
537
538void TEST_nucleotide_consensus_and_maxFrequency() {
539    const char *sequence[] = {
540        "-.AAAAAAAAAAcAAAAAAAAATTTTTTTTTTTTTTTTTAAAAAAAAgggggAAAAgAA----m-----yykm-mmmAAAAAAAAAmmmmmmmmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKKKKKKKKKWWWWWWWWW",
541        "-.-AAAAAAAAAccAAAAAAAAggTTgTTTTgTTTTTTTcccAAAAAgggggAAAAgAA----k-----kykr-rrrAAAAAAAAmmmmmmmmmT-NNNNNNNNNANNNNNbNNNNNNNNkNNNNNNNNaNNNNNNNNbKKKKKKKKbWWWWWWWW",
542        "-.--AAAAAAAAcccAAAAAAA-ggTggTTTggTTTTTTccccAAAAgggCCtAAAgAC----m-----sykw-wvsAAAAAAAmmmmmmmmmTT--NNNNNNNNCANNNNbbNNNNNNNkkNNNNNNNaaNNNNNNNbbKKKKKKKbbWWWWWWW",
543        "-.---AAAAAAAccccAAAAAA-ggggggTTgggTTTTTcccccAAAggCCC-tAACtC----k----yyyys-smvAAAAAAmmmmmmmmmTTT---NNNNNNNGCANNNbbbNNNNNNkkkNNNNNNaaaNNNNNNbbbKKKKKKbbbWWWWWW",
544        "-.----AAAAAAcccccAAAAA----ggggTggggTTTTGGGcccAAgCCCt-ttACtG----m---nkkkky-yrmAAAAAmmmmmmmmmTTTT----NNNNNNTGCANNbbbbNNNNNkkkkNNNNNaaaaNNNNNbbbbKKKKKbbbbWWWWW",
545        "-.-----AAAAAccccccAAAA----ggggggggggTTgGGGGcccAcCCtt--tttCG----k--nnssssk-kvrAAAAmmmmmmmmmTTTTT-----NNNNN-TGCANbbbbbNNNNkkkkkNNNNaaaaaNNNNbbbbbKKKKbbbbbWWWW",
546        "-.------AAAAcccccccAAA---------ggggggTgGGGGGccccCt----tt-gT----mydddyyyy-vvmsAAAmmmmmmmmmTTTTTT------NNNN-ATGCAbbbbbbNNNkkkkkkNNNaaaaaaNNNbbbbbbKKKbbbbbbWWW",
547        "-.-------AAAccccccccAA---------ggggggggttGGGGccct------t--T-yykkkbbbkkkk-hhrvAAmmmmmmmmmTTTTTTT-------NNN-C-TGCbbbbbbbNNkkkkkkkNNaaaaaaaNNbbbbbbbKKbbbbbbbWW",
548        "-.--------AAcccccccccA----------------gttGGGGGct-------t----ymmmmnnnssss-ddvmAmmmmmmmmmTTTTTTTT--------NN-G--TGbbbbbbbbNkkkkkkkkNaaaaaaaaNbbbbbbbbKbbbbbbbbW",
549        "-.---------Acccccccccc----------------gtAGGGGGG----------------k---------bbmrmmmmmmmmmTTTTTTTTT---------N-T---Tbbbbbbbbbkkkkkkkkkaaaaaaaaabbbbbbbbbbbbbbbbbb",
550    };
551    const char *expected_frequency[] = {
552        // considering gaps:
553        "0=9876567890098765678986665444576545675336544565434475454320888277654333462439988776654434567899876543222523222333322222444433332987765443333444444333444444",
554        "0=0000000000000000000000000000000000000000000000000000000000000500000055005565050505055050000000000000025050025210098765752075207257025702568013568568013568",
555        // ignoring gaps:
556        "==000000000009876567895757865688765678533654456554536655542=552233223333222439988776654434567892222222222222222333322222444433332987765443333444444333444444",
557        "==000000000000000000000505360637520257000000000502036075025=005530983388555565050505055050000005555555555555555210098765752075207257025702568013568568013568",
558    };
559    const char *expected_consensus[] = {
560        "==----..aaaACccMMMMMaa----.....g.kkk.uKb.ssVVmmss...-.ww...=---.---..byk.-.mVAaaaaMMMMmmHH..uuu----............BBbb.....Kkkkkk...aaaa.....BkkkkkkkKB....wwww", // default settings (see ConsensusBuildParams-ctor), gapbound=60, considbound=30, lower/upper=70/95
561        "==AAAAAAAAAACccMMMMMaaKgKugKKKuggKKKuuKb.ssVVmmssssBWWWWs..=Y.......BByk...mVAaaaaMMMMmmHH..uuu................BBbb.....Kkkkkk...aaaa.....BkkkkkkkKB....wwww", // countgaps=0
562        "==AAAAAAAAAACCCMMMMMAAKGKUGKKKUGGKKKUUKBsSSVVMMSSSSBWWWWSwa=YcaaykkkBBYKaaaMVAAAAAMMMMMMHHuuuUUaaaaaaaaaaaaaaaaBBBBBBBBcKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuuwWWW", // countgaps=0,              considbound=26, lower=0, upper=75 (as described in #663)
563        "==AAAAAAAAAACCCMMMMMAAKKKKGKKKUGKKKKKUKBsSSVVMMSSSSBWWWWSwN=YHNNykkkBBYKNNNVVAAAAMMMMMMMHHHuuUUNNNNNNNNNNNNNNNNBBBBBBBBBKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuwwWWW", // countgaps=0,              considbound=25, lower=0, upper=75
564        "==---aaaaAAACCCMMMMMAA-gkugkkkuggKKKuuKBsSSVVMMSsssb-wwWswa=---a--kkbBykaaaMVAAAAAMMMMMMHHuuuUU---aaaaaaaaaaaaaBBBBBBBBcKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuuwWWW", // countgaps=1, gapbound=70, considbound=26, lower=0, upper=75
565        "==---aaaaAAACCMMMMMMMA-kkkgkkkugKKKKKuKBNSVVVVMSsssb-wwWswN=---N--nnbBBBnnNVVAAAMMMMMMMHHHHHuUU---nnnnNNNnNnNNNBBBBBBBNNKKKKKkkNNAAAaaaaNNBBBBKKKKKBBBNwwWWW", // countgaps=1, gapbound=70, considbound=20, lower=0, upper=75
566        "==---aaaaAAACMMMMMMMMM-kkkkkkkkKKKKKKKKNNVVVVVVBBbbb-wwWbnN=---N--nnbBBBnnNVVMMMMMMMMMHHHHHHHHH---nnnnNNNnNnNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=1, gapbound=70, considbound= 1, lower=0, upper=75
567        "==---aaaaAAACMMMMMMMMM-kkkkkkkkKKKKKKKKNNVVVVVVBBbbb-wwWbnN=---N--nnbBBBnnNVVMMMMMMMMMHHHHHHHHH---nnnnNNNnNnNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=1, gapbound=70, considbound= 0, lower=0, upper=75
568        "==AAAAAAAAAACMMMMMMMMMKKKKKKKKKKKKKKKKKNNVVVVVVBBBBBWWWWBNN=YHNNNNNNBBBBNNNVVMMMMMMMMMHHHHHHHHHNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=0,              considbound= 0, lower=0, upper=75
569    };
570    const size_t seqlen         = strlen(sequence[0]);
571    const int    sequenceCount  = ARRAY_ELEMS(sequence);
572    const int    consensusCount = ARRAY_ELEMS(expected_consensus);
573
574    // create DB
575    GB_shell    shell;
576    const char *aliname = "ali_nuc";
577    GBDATA     *gb_main = create_simple_seq_db(aliname, "rna", sequence, sequenceCount, seqlen);
578
579    ConsensusBuildParams BK;
580    for (int c = 0; c<consensusCount; ++c) {
581        TEST_ANNOTATE(GBS_global_string("c=%i", c));
582        switch (c) {
583            case 0: break;                                                     // use default settings
584            case 1: BK.countgaps   = false; break;                             // dont count gaps
585            case 2: BK.considbound = 26; BK.lower = 0; BK.upper = 75; break;   // settings from #663
586            case 3: BK.considbound = 25; break;
587            case 4: BK.considbound = 26; BK.countgaps = true; BK.gapbound = 70; break;
588            case 5: BK.considbound = 20; break;
589            case 6: BK.considbound = 1; break;
590            case 7: BK.considbound = 0; break;
591            case 8: BK.countgaps   = false; break;
592            default: arb_assert(0); break;                                     // missing
593        }
594
595        {
596            GB_transaction  ta(gb_main);
597            const char     *sainame = "CONSENSUS";
598            TEST_EXPECT_NO_ERROR(CON_calculate(gb_main, BK, aliname, false, sainame));
599
600            GBDATA     *gb_consensus = GBT_find_SAI(gb_main, sainame);
601            GBDATA     *gb_seq       = GBT_find_sequence(gb_consensus, aliname);
602            const char *consensus    = GB_read_char_pntr(gb_seq); // @@@ NOT_ALL_SAI_HAVE_DATA
603
604            TEST_EXPECT_EQUAL(consensus, expected_consensus[c]);
605        }
606    }
607
608    // test max.frequency
609    const char *sainame = "MAXFREQ";
610    for (int ignore_gaps = 0; ignore_gaps<=1; ++ignore_gaps) {
611        TEST_ANNOTATE(GBS_global_string("ignore_gaps=%i", ignore_gaps));
612        TEST_EXPECT_NO_ERROR(CON_calc_max_freq(gb_main, ignore_gaps, sainame, aliname));
613        const char *data, *dat2;
614        read_frequency(gb_main, sainame, aliname, data, dat2);
615        TEST_EXPECT_EQUAL(data, expected_frequency[ignore_gaps*2]);
616        TEST_EXPECT_EQUAL(dat2, expected_frequency[ignore_gaps*2+1]);
617    }
618
619    GB_close(gb_main);
620}
621
622void TEST_amino_consensus_and_maxFrequency() {
623    const char *sequence[] = {
624        "-.ppppppppppQQQQQQQQQDDDDDELLLLLwwwwwwwwwwwwwwwwgggggggggggSSSe-PPP-DELp",
625        "-.-pppppppppkQQQQQQQQnDDDDELLLLLVVwwVwwwwVwwwwwwSgggggggggSSSee-QPP-DELa",
626        "-.--ppppppppkkQQQQQQQnnDDDELLLLL-VVwVVwwwVVwwwwwSSgggggggSSSeee-KQP-DEIg",
627        "-.---pppppppkkkQQQQQQnnnDDELLLLL-VVVVVVwwVVVwwwwSSSgggggSSSeee--LQQ-DQIs",
628        "-.----ppppppkkkkQQQQQnnnnDELLLLL----VVVVwVVVVwwweSSSgggSSSeee---WKQ-NQJt",
629        "-.-----pppppkkkkkQQQQnnnnnqiLLLL----VVVVVVVVVVwweeSSSggSSeee-----KQ-NQJq",
630        "-.------ppppkkkkkkQQQnnnnnqiiLLL---------VVVVVVweeeSSSgSeee------LK-NZJn",
631        "-.-------pppkkkkkkkQQnnnnnqiiiLL---------VVVVVVVeeeeSSSeee-------LK-NZJe",
632        "-.--------ppkkkkkkkkQnnnnnqiiiiL----------------eeeeeSSee--------WK-BZJd",
633        "-.---------pkkkkkkkkknnnnnqiiiii----------------eeeeeeSe---------WK-BZJb",
634        "-.ppppppppppQQQQQQQQQDDDDDELLLLLwwwwwwwwwwwwwwwwgggggggggggSSSe-PPP-DELz",
635        "-.-pppppppppkQQQQQQQQnDDDDELLLLLVVwwVwwwwVwwwwwwSgggggggggSSSee-QPP-DELh",
636        "-.--ppppppppkkQQQQQQQnnDDDELLLLL-VVwVVwwwVVwwwwwSSgggggggSSSeee-KQP-DEIk",
637        "-.---pppppppkkkQQQQQQnnnDDELLLLL-VVVVVVwwVVVwwwwSSSgggggSSSeee--LQQ-DQIr",
638        "-.----ppppppkkkkQQQQQnnnnDELLLLL----VVVVwVVVVwwweSSSgggSSSeee---WKQ-NQJl",
639        "-.-----pppppkkkkkQQQQnnnnnqiLLLL----VVVVVVVVVVwweeSSSggSSeee-----KQ-NQJi",
640        "-.------ppppkkkkkkQQQnnnnnqiiLLL---------VVVVVVweeeSSSgSeee------LK-NZJv",
641        "-.-------pppkkkkkkkQQnnnnnqiiiLL---------VVVVVVVeeeeSSSeee-------LK-NZJm",
642        "-.--------ppkkkkkkkkQnnnnnqiiiiL----------------eeeeeSSee--------WK-BZJf",
643        "-.---------pkkkkkkkkknnnnnqiiiii----------------eeeeeeSe---------WK-BZJy",
644    };
645    const char *expected_frequency[] = {
646        // considering gaps:
647        "0=9876567890987656789987655567898666544457654567654456743334567052404461",
648        "0=0000000000000000000000000000000000000000000000000000000000000000000000",
649        // ignoring gaps:
650        "==0000000000987656789987655567895757865688765678654456743345670=224=4461",
651        "==0000000000000000000000000000000505360637520257000000003720050=000=0000",
652    };
653    const char *expected_consensus[] = {
654        "==----..aaaAhhh...dddDDDDDDIIIII----.....i.....f...aaaAa.....--=.X.=DDI.", // default settings (see ConsensusBuildParams-ctor), gapbound=60, considbound=30, lower/upper=70/95
655        "==AAAAAAAAAAhhh...dddDDDDDDIIIII.i.fi...fii...ff...aaaAa.....dD=XX.=DDI.", // countgaps=0
656        "==AAAAAAAAAAHHhhdddDDDDDDDDIIIIIiIiFIiifFIIiifFFdaaaAAAaaaaadDD=XXh=DDId", // countgaps=0,              considbound= 26, lower=0, upper=75
657        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=xXh=DDId", // countgaps=1, gapbound=70, considbound= 26, lower=0, upper=75
658        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=aah=DDId", // countgaps=1, gapbound=70, considbound= 20, lower=0, upper=75
659        "==---aaaaAAAHHhhXddDDDDDDDDIIIII-ixfiixffiiiXfffdXaaAAAaaaaxdd-=xXX=DDIX", // countgaps=1, gapbound=70, considbound= 51, lower=0, upper=75
660        "==---aaaaAAAHXXXXXXXDDDDDDDIIIII-xxxxxxxxXXXXXXXXXXXXAAXXXxxxx-=xXX=DDIX", // countgaps=1, gapbound=70, considbound= 90, lower=0, upper=75
661        "==---aaaaAAAXXXXXXXXXDDDDDDIIIII-xxxxxxxxXXXXXXXXXXXXXAXXXxxxx-=xXX=DDIX", // countgaps=1, gapbound=70, considbound=100, lower=0, upper=75
662        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=aah=DDId", // countgaps=1, gapbound=70, considbound=  0, lower=0, upper=75
663    };
664    const size_t seqlen         = strlen(sequence[0]);
665    const int    sequenceCount  = ARRAY_ELEMS(sequence);
666    const int    consensusCount = ARRAY_ELEMS(expected_consensus);
667
668    // create DB
669    GB_shell    shell;
670    const char *aliname = "ali_ami";
671    GBDATA     *gb_main = create_simple_seq_db(aliname, "ami", sequence, sequenceCount, seqlen);
672
673    ConsensusBuildParams BK;
674    for (int c = 0; c<consensusCount; ++c) {
675        TEST_ANNOTATE(GBS_global_string("c=%i", c));
676        switch (c) {
677            case 0: break;                                                     // use default settings
678            case 1: BK.countgaps   = false; break;                             // dont count gaps
679            case 2: BK.considbound = 26; BK.lower = 0; BK.upper = 75; break;   // settings from #663
680            case 3: BK.countgaps   = true; BK.gapbound = 70; break;
681            case 4: BK.considbound = 20; break;
682            case 5: BK.considbound = 51; break;
683            case 6: BK.considbound = 90; break;
684            case 7: BK.considbound = 100; break;
685            case 8: BK.considbound = 0; break;
686            default: arb_assert(0); break;                                     // missing
687        }
688
689        {
690            GB_transaction  ta(gb_main);
691            const char     *sainame = "CONSENSUS";
692            TEST_EXPECT_NO_ERROR(CON_calculate(gb_main, BK, aliname, false, sainame));
693
694            GBDATA     *gb_consensus = GBT_find_SAI(gb_main, sainame);
695            GBDATA     *gb_seq       = GBT_find_sequence(gb_consensus, aliname);
696            const char *consensus    = GB_read_char_pntr(gb_seq);
697
698            TEST_EXPECT_EQUAL(consensus, expected_consensus[c]);
699        }
700    }
701
702    // test max.frequency
703    const char *sainame = "MAXFREQ";
704    for (int ignore_gaps = 0; ignore_gaps<=1; ++ignore_gaps) {
705        TEST_ANNOTATE(GBS_global_string("ignore_gaps=%i", ignore_gaps));
706        TEST_EXPECT_NO_ERROR(CON_calc_max_freq(gb_main, ignore_gaps, sainame, aliname));
707        const char *data, *dat2;
708        read_frequency(gb_main, sainame, aliname, data, dat2);
709        TEST_EXPECT_EQUAL(data, expected_frequency[ignore_gaps*2]);
710        TEST_EXPECT_EQUAL(dat2, expected_frequency[ignore_gaps*2+1]);
711    }
712
713    GB_close(gb_main);
714}
715
716#endif // UNIT_TESTS
717
Note: See TracBrowser for help on using the repository browser.