source: tags/ms_r16q2/NTREE/AP_consensus.cxx

Last change on this file was 14454, checked in by westram, 7 years ago
  • remove other unwanted callback casts
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 32.5 KB
Line 
1// ================================================================= //
2//                                                                   //
3//   File      : AP_consensus.cxx                                    //
4//   Purpose   : calculate consensus SAIs                            //
5//                                                                   //
6//   http://www.arb-home.de/                                         //
7//                                                                   //
8// ================================================================= //
9
10#include "NT_local.h"
11
12#include <aw_root.hxx>
13#include <aw_msg.hxx>
14#include <aw_awar.hxx>
15
16#include <arbdbt.h>
17
18#include <arb_strbuf.h>
19#include <arb_defs.h>
20#include <arb_progress.h>
21
22#include <awt_config_manager.hxx>
23#include <awt_misc.hxx>
24#include <awt_sel_boxes.hxx>
25
26
27#define AWAR_MAX_FREQ_PREFIX      "tmp/CON_MAX_FREQ/"
28#define AWAR_CONSENSUS_PREFIX     "consensus/"
29#define AWAR_CONSENSUS_PREFIX_TMP "tmp/" AWAR_CONSENSUS_PREFIX
30
31#define AWAR_MAX_FREQ_IGNORE_GAPS AWAR_MAX_FREQ_PREFIX "no_gaps"
32#define AWAR_MAX_FREQ_SAI_NAME    AWAR_MAX_FREQ_PREFIX "sai_name"
33
34#define AWAR_CONSENSUS_MARKED_ONLY AWAR_CONSENSUS_PREFIX_TMP "marked_only"
35#define AWAR_CONSENSUS_ALIGNMENT   AWAR_CONSENSUS_PREFIX_TMP "alignment"
36#define AWAR_CONSENSUS_COUNTGAPS   AWAR_CONSENSUS_PREFIX "countgaps"
37#define AWAR_CONSENSUS_GAPBOUND    AWAR_CONSENSUS_PREFIX "gapbound"
38#define AWAR_CONSENSUS_GROUP       AWAR_CONSENSUS_PREFIX "group"
39#define AWAR_CONSENSUS_CONSIDBOUND AWAR_CONSENSUS_PREFIX "considbound"
40#define AWAR_CONSENSUS_UPPER       AWAR_CONSENSUS_PREFIX "upper"
41#define AWAR_CONSENSUS_LOWER       AWAR_CONSENSUS_PREFIX "lower"
42#define AWAR_CONSENSUS_NAME        AWAR_CONSENSUS_PREFIX_TMP "name"
43
44#define CONSENSUS_AWAR_SOURCE CAS_NTREE
45#include <consensus.h>
46#include <consensus_config.h>
47#include <chartable.h>
48
49static int CON_insertSequences(GBDATA *gb_main, const char *aliname, long maxalignlen, bool onlymarked, BaseFrequencies& freqs) {
50    /*! read sequence data and fill into 'freqs'
51     * @param gb_main       database
52     * @param aliname       name of alignment
53     * @param maxalignlen   length of alignment
54     * @param onlymarked    true -> marked only
55     * @param freqs         sequences are inserted here (has to be empty)
56     * @return number of inserted sequences
57     */
58    int nrofspecies = onlymarked ? GBT_count_marked_species(gb_main) : GBT_get_species_count(gb_main);
59
60    arb_progress progress(nrofspecies);
61    progress.auto_subtitles("Examining sequence");
62
63    GBDATA *gb_species = onlymarked ? GBT_first_marked_species(gb_main) : GBT_first_species(gb_main);
64    while (gb_species) {
65        GBDATA *alidata = GBT_find_sequence(gb_species, aliname);
66        if (alidata) {
67            const char *data   = GB_read_char_pntr(alidata);
68            size_t      length = GB_read_string_count(alidata);
69
70            nt_assert(long(length)<=maxalignlen);
71            freqs.add(data, length);
72        }
73        gb_species = onlymarked ? GBT_next_marked_species(gb_species) : GBT_next_species(gb_species);
74        ++progress;
75    }
76
77    int inserted = freqs.added_sequences();
78    if (nrofspecies < inserted) {
79        GBT_message(gb_main, GBS_global_string("Only %i of %i %sspecies contain data in alignment '%s'",
80                                               inserted, nrofspecies, onlymarked ? "marked " : "", aliname));
81        progress.done();
82    }
83
84    return inserted;
85}
86
87static GB_ERROR CON_export(GBDATA *gb_main, const char *savename, const char *align, const char *result, bool onlymarked, long nrofspecies, const ConsensusBuildParams& BK) {
88    /*! writes consensus SAI to DB
89     * @param gb_main      database
90     * @param savename     name of SAI to save to
91     * @param align        alignment name
92     * @param result       SAI data to write
93     * @param onlymarked   true -> was calculated on marked only (used for SAI comment)
94     * @param nrofspecies  number of used sequences (used for SAI comment; if less than 20 -> add an explicit list to field '_SPECIES')
95     * @param BK           parameters used for consensus calculation (used for SAI comment)
96     * @return error if something goes wrong
97     */
98    const char *off = "off";
99    const char *on  = "on";
100
101    char *buffer = (char *)GB_calloc(2000, sizeof(char));
102
103    GBDATA   *gb_extended = GBT_find_or_create_SAI(gb_main, savename);
104    GBDATA   *gb_data     = GBT_add_data(gb_extended, align, "data", GB_STRING);
105    GB_ERROR  err         = GB_write_string(gb_data, result);
106    if (!err) {
107        GBDATA *gb_options = GBT_add_data(gb_extended, align, "_TYPE", GB_STRING);
108
109        const char *allvsmarked     = onlymarked ? "marked" : "all";
110        const char *countgapsstring = BK.countgaps ? on : off;
111        const char *simplifystring  = BK.group ? on : off;
112
113        sprintf(buffer, "CON: [species: %s]  [number: %ld]  [count gaps: %s] "
114                "[threshold for gaps: %d]  [simplify: %s] "
115                "[threshold for group: %d]  [upper: %d]  [lower: %d]",
116                allvsmarked, nrofspecies, countgapsstring,
117                BK.gapbound, simplifystring,
118                BK.considbound, BK.upper, BK.lower);
119
120        err = GB_write_string(gb_options, buffer);
121        if (!err) {
122            GBDATA *gb_names  = GB_search(GB_get_father(gb_options), "_SPECIES", GB_FIND);
123            if (gb_names) err = GB_delete(gb_names); // delete old entry
124        }
125
126        if (!err && nrofspecies<20) {
127            GBDATA        *gb_species;
128            GBS_strstruct *strstruct = GBS_stropen(1000);
129
130            if (onlymarked) gb_species = GBT_first_marked_species(gb_main);
131            else gb_species            = GBT_first_species(gb_main);
132
133            while (gb_species) {
134                if (GBT_find_sequence(gb_species, align)) {
135                    GBDATA     *gb_speciesname = GB_search(gb_species, "name", GB_FIND);
136                    const char *name           = GB_read_char_pntr(gb_speciesname);
137
138                    GBS_strcat(strstruct, name);
139                    GBS_chrcat(strstruct, ' ');
140                }
141                if (onlymarked) gb_species = GBT_next_marked_species(gb_species);
142                else gb_species            = GBT_next_species(gb_species);
143            }
144
145            char *allnames = GBS_strclose(strstruct);
146            err            = GBT_write_string(GB_get_father(gb_options), "_SPECIES", allnames);
147            free(allnames);
148        }
149
150        // remove data relicts from "complex consensus" (no longer supported)
151        if (!err) {
152            char    buffer2[256];
153            sprintf(buffer2, "%s/FREQUENCIES", align);
154            GBDATA *gb_graph  = GB_search(gb_extended, buffer2, GB_FIND);
155            if (gb_graph) err = GB_delete(gb_graph);  // delete old entry
156        }
157    }
158
159    free(buffer);
160    if (err) err = GBS_global_string("Failed to store consensus '%s' (Reason: %s)", savename, err);
161    return err;
162}
163
164static GB_ERROR CON_calculate(GBDATA *gb_main, const ConsensusBuildParams& BK, const char *aliname, bool onlymarked, const char *sainame) {
165    /*! calculates the consensus and writes it to SAI 'sainame'
166     * Description how consensus is calculated: ../HELP_SOURCE/oldhelp/consensus_def.hlp
167     * @param gb_main     database
168     * @param BK          parameters for consensus calculation
169     * @param aliname     alignment name
170     * @param onlymarked  true -> use marked sequences only
171     * @param sainame     name of destination SAI
172     * @return error if something goes wrong
173     */
174    GB_ERROR error = 0;
175
176    GB_push_transaction(gb_main);
177
178    long maxalignlen = GBT_get_alignment_len(gb_main, aliname);
179    if (maxalignlen <= 0) error = GB_export_errorf("alignment '%s' doesn't exist", aliname);
180
181    if (!error) {
182        arb_progress progress("Calculating consensus");
183
184        GB_alignment_type alitype = GBT_get_alignment_type(gb_main, aliname);
185        BaseFrequencies::setup("-.", alitype);
186
187        BaseFrequencies freqs(maxalignlen);
188        int nrofspecies = CON_insertSequences(gb_main, aliname, maxalignlen, onlymarked, freqs);
189
190        if (BK.lower>BK.upper) {
191            error = "fault: lower greater than upper";
192        }
193        else {
194            char *result = freqs.build_consensus_string(BK);
195            error = CON_export(gb_main, sainame, aliname, result, onlymarked, nrofspecies, BK);
196            free(result);
197        }
198    }
199
200    error = GB_end_transaction(gb_main, error);
201
202    return error;
203}
204
205static void CON_calculate_cb(AW_window *aw) {
206    AW_root *awr        = aw->get_root();
207    char    *aliname    = awr->awar(AWAR_CONSENSUS_ALIGNMENT)->read_string();
208    char    *sainame    = awr->awar(AWAR_CONSENSUS_NAME)->read_string();
209    bool     onlymarked = awr->awar(AWAR_CONSENSUS_MARKED_ONLY)->read_int();
210
211    ConsensusBuildParams BK(awr);
212
213    {
214#if defined(ASSERTION_USED)
215        GB_transaction ta(GLOBAL.gb_main);
216        LocallyModify<bool> denyAwarReads(AW_awar::deny_read, true);
217        LocallyModify<bool> denyAwarWrites(AW_awar::deny_write, true);
218#endif
219
220        GB_ERROR error = CON_calculate(GLOBAL.gb_main, BK, aliname, onlymarked, sainame);
221        aw_message_if(error);
222    }
223
224    free(sainame);
225    free(aliname);
226}
227
228static void consensus_upper_lower_changed_cb(AW_root *awr, bool upper_changed) {
229    AW_awar *awar_lower = awr->awar(AWAR_CONSENSUS_LOWER);
230    AW_awar *awar_upper = awr->awar(AWAR_CONSENSUS_UPPER);
231
232    int lower = awar_lower->read_int();
233    int upper = awar_upper->read_int();
234
235    if (upper<lower) {
236        if (upper_changed) awar_lower->write_int(upper);
237        else               awar_upper->write_int(lower);
238    }
239}
240
241void AP_create_consensus_var(AW_root *aw_root, AW_default aw_def) {
242    GB_transaction ta(GLOBAL.gb_main);
243    {
244        char *defali = GBT_get_default_alignment(GLOBAL.gb_main);
245        aw_root->awar_string(AWAR_CONSENSUS_ALIGNMENT, defali, aw_def);
246        free(defali);
247    }
248    aw_root->awar_int(AWAR_CONSENSUS_MARKED_ONLY, 1,  aw_def);
249    aw_root->awar_int(AWAR_CONSENSUS_GROUP,       0,  aw_def);
250    aw_root->awar_int(AWAR_CONSENSUS_COUNTGAPS,   1,  aw_def);
251    aw_root->awar_int(AWAR_CONSENSUS_UPPER,       95, aw_def)->set_minmax(0, 100)->add_callback(makeRootCallback(consensus_upper_lower_changed_cb, true));
252    aw_root->awar_int(AWAR_CONSENSUS_LOWER,       70, aw_def)->set_minmax(0, 100)->add_callback(makeRootCallback(consensus_upper_lower_changed_cb, false));
253    aw_root->awar_int(AWAR_CONSENSUS_GAPBOUND,    60, aw_def)->set_minmax(0, 100);
254    aw_root->awar_int(AWAR_CONSENSUS_CONSIDBOUND, 30, aw_def)->set_minmax(0, 100);
255    aw_root->awar_int(AWAR_MAX_FREQ_IGNORE_GAPS,  1,  aw_def);
256
257    aw_root->awar_string(AWAR_CONSENSUS_NAME,    "CONSENSUS",     aw_def);
258    aw_root->awar_string(AWAR_MAX_FREQ_SAI_NAME, "MAX_FREQUENCY", aw_def);
259}
260
261static AWT_config_mapping_def consensus_config_mapping[] = {
262    { AWAR_CONSENSUS_COUNTGAPS,   CONSENSUS_CONFIG_COUNTGAPS },
263    { AWAR_CONSENSUS_GAPBOUND,    CONSENSUS_CONFIG_GAPBOUND },
264    { AWAR_CONSENSUS_GROUP,       CONSENSUS_CONFIG_GROUP },
265    { AWAR_CONSENSUS_CONSIDBOUND, CONSENSUS_CONFIG_CONSIDBOUND },
266    { AWAR_CONSENSUS_UPPER,       CONSENSUS_CONFIG_UPPER },
267    { AWAR_CONSENSUS_LOWER,       CONSENSUS_CONFIG_LOWER },
268
269    // make sure the keywords of the following entries
270    // DIFFER from those defined at ../TEMPLATES/consensus_config.h@CommonEntries
271
272    { AWAR_CONSENSUS_MARKED_ONLY, "marked_only" },
273    { AWAR_CONSENSUS_NAME,        "name" },
274
275    { 0, 0 }
276};
277
278AW_window *AP_create_con_expert_window(AW_root *aw_root) {
279    // keep in sync with ../EDIT4/ED4_no_class.cxx@ED4_create_consensus_definition_window
280
281    AW_window_simple *aws = new AW_window_simple;
282    aws->init(aw_root, "CALCULATE_CONSENSUS", "CONSENSUS OF SEQUENCES");
283    aws->load_xfig("consensus/expert.fig");
284
285    aws->auto_space(5, 5);
286
287    const int SCALEDCOLUMNS = 3;
288    const int SCALERSIZE    = 150;
289
290    // top part of window:
291    aws->button_length(9);
292
293    aws->at("cancel");
294    aws->callback(AW_POPDOWN);
295    aws->create_button("CLOSE", "CLOSE", "C");
296
297    aws->at("help");
298    aws->callback(makeHelpCallback("consensus.hlp"));
299    aws->create_button("HELP", "HELP", "H");
300
301    // left part of window:
302    aws->at("which_alignment");
303    awt_create_ALI_selection_list(GLOBAL.gb_main, (AW_window *)aws, AWAR_CONSENSUS_ALIGNMENT, "*=");
304
305    aws->at("which_species");
306    aws->create_toggle_field(AWAR_CONSENSUS_MARKED_ONLY);
307    aws->insert_toggle        ("all",    "a", 0);
308    aws->insert_default_toggle("marked", "m", 1);
309    aws->update_toggle_field();
310
311    aws->at("save_box");
312    awt_create_SAI_selection_list(GLOBAL.gb_main, aws, AWAR_CONSENSUS_NAME, false);
313
314    aws->at("name");
315    aws->create_input_field(AWAR_CONSENSUS_NAME, 10);
316
317    // right part of window (same as in EDIT4):
318    aws->at("countgaps");
319    aws->create_toggle_field(AWAR_CONSENSUS_COUNTGAPS, NULL, "");
320    aws->insert_toggle        ("on",  "1", 1);
321    aws->insert_default_toggle("off", "0", 0);
322    aws->update_toggle_field();
323
324    aws->at("gapbound");
325    aws->create_input_field_with_scaler(AWAR_CONSENSUS_GAPBOUND, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
326
327    aws->at("group");
328    aws->create_toggle_field(AWAR_CONSENSUS_GROUP, NULL, "");
329    aws->insert_toggle        ("on",  "1", 1);
330    aws->insert_default_toggle("off", "0", 0);
331    aws->update_toggle_field();
332
333    aws->at("considbound");
334    aws->create_input_field_with_scaler(AWAR_CONSENSUS_CONSIDBOUND, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
335
336    aws->at("showgroups");
337    aws->callback(AWT_create_IUPAC_info_window);
338    aws->create_autosize_button("SHOW_IUPAC", "Show IUPAC groups", "s");
339
340    aws->at("upper");
341    aws->create_input_field_with_scaler(AWAR_CONSENSUS_UPPER, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
342
343    aws->at("lower");
344    aws->create_input_field_with_scaler(AWAR_CONSENSUS_LOWER, SCALEDCOLUMNS, SCALERSIZE, AW_SCALER_LINEAR);
345
346    // bottom part of window:
347    aws->at("calculate");
348    aws->callback(CON_calculate_cb);
349    aws->create_button("GO", "GO", "G");
350
351    aws->at("config");
352    AWT_insert_config_manager(aws, AW_ROOT_DEFAULT, CONSENSUS_CONFIG_ID, consensus_config_mapping);
353
354    return aws;
355}
356
357static GB_ERROR CON_calc_max_freq(GBDATA *gb_main, bool ignore_gaps, const char *savename, const char *aliname) {
358    /*! calculates the maximum frequency for each column and write to SAI
359     * @param gb_main      database
360     * @param ignore_gaps  true -> ignore gaps; see ../HELP_SOURCE/oldhelp/max_freq.hlp@Gaps
361     * @param savename     name of destination SAI
362     * @param aliname      name of alignment to use
363     * @return error if something goes wrong
364     */
365    arb_assert(!GB_have_error());
366
367    GB_ERROR       error = NULL;
368    GB_transaction ta(gb_main);
369
370    long maxalignlen = GBT_get_alignment_len(gb_main, aliname);
371    if (maxalignlen<=0) {
372        GB_clear_error();
373        error = "alignment doesn't exist!";
374    }
375    else {
376        arb_progress progress("Calculating max. frequency");
377
378        GB_alignment_type alitype = GBT_get_alignment_type(gb_main, aliname);
379        BaseFrequencies::setup("-.", alitype);
380
381        const int onlymarked  = 1;
382        BaseFrequencies freqs(maxalignlen);
383        long nrofspecies = CON_insertSequences(gb_main, aliname, maxalignlen, onlymarked, freqs);
384
385        char *result1 = new char[maxalignlen+1];
386        char *result2 = new char[maxalignlen+1];
387
388        result1[maxalignlen] = 0;
389        result2[maxalignlen] = 0;
390
391        for (int pos = 0; pos < maxalignlen; pos++) {
392            double mf  = freqs.max_frequency_at(pos, ignore_gaps);
393            int    mfi = int(mf*100.0+0.01); // frequency -> [0..100]; add 1/100 to reduce incompatibilities caused by 32/64 bit differences
394
395            if (mfi) {
396                if (mfi<10) mfi = 10; // hack: otherwise SAI will contain '0' (meaning 100% frequency)
397
398                int mfh = int(mfi/10);
399                int mfl = mfi-10*mfh;
400
401                result1[pos] = "?1234567890"[mfh];
402                result2[pos] = "0123456789"[mfl];
403            }
404            else {
405                result1[pos] = '=';
406                result2[pos] = '=';
407            }
408        }
409
410        GBDATA *gb_extended = GBT_find_or_create_SAI(gb_main, savename);
411        if (!gb_extended) {
412            error = GB_await_error();
413        }
414        else {
415            GBDATA *gb_data1 = GBT_add_data(gb_extended, aliname, "data", GB_STRING);
416            GBDATA *gb_data2 = GBT_add_data(gb_extended, aliname, "dat2", GB_STRING);
417
418            error             = GB_write_string(gb_data1, result1);
419            if (!error) error = GB_write_string(gb_data2, result2);
420
421            GBDATA *gb_options = GBT_add_data(gb_extended, aliname, "_TYPE", GB_STRING);
422
423            if (!error) {
424                const char *type = GBS_global_string("MFQ: [species: %li] [ignore gaps: %s]", nrofspecies, ignore_gaps ? "yes" : "no");
425                error            = GB_write_string(gb_options, type);
426            }
427        }
428
429        delete [] result1;
430        delete [] result2;
431    }
432
433    error = ta.close(error);
434    arb_assert(!GB_have_error());
435
436    return error;
437}
438
439static void CON_calc_max_freq_cb(AW_window *aw) {
440    AW_root    *awr         = aw->get_root();
441    bool        ignore_gaps = awr->awar(AWAR_MAX_FREQ_IGNORE_GAPS)->read_int();
442    const char *savename    = awr->awar(AWAR_MAX_FREQ_SAI_NAME)->read_char_pntr();
443    char       *aliname     = GBT_get_default_alignment(GLOBAL.gb_main);
444
445    GB_ERROR error = CON_calc_max_freq(GLOBAL.gb_main, ignore_gaps, savename, aliname);
446    if (error) aw_message(error);
447
448    free(aliname);
449}
450
451AW_window *AP_create_max_freq_window(AW_root *aw_root) {
452    AW_window_simple *aws = new AW_window_simple;
453    aws->init(aw_root, "MAX_FREQUENCY", "MAX FREQUENCY");
454    aws->load_xfig("consensus/max_freq.fig");
455
456    GB_push_transaction(GLOBAL.gb_main);
457
458    aws->button_length(6);
459
460    aws->at("cancel");
461    aws->callback(AW_POPDOWN);
462    aws->create_button("CLOSE", "CLOSE", "C");
463
464    aws->at("help"); aws->callback(makeHelpCallback("max_freq.hlp"));
465    aws->create_button("HELP", "HELP", "H");
466
467    // activation of consensus calculation by button ...
468    aws->at("go");
469    aws->callback(CON_calc_max_freq_cb);
470    aws->create_button("GO", "GO", "C");
471
472    aws->at("save");
473    aws->create_input_field(AWAR_MAX_FREQ_SAI_NAME, 1);
474
475    aws->at("sai");
476    awt_create_SAI_selection_list(GLOBAL.gb_main, aws, AWAR_MAX_FREQ_SAI_NAME, false);
477
478    aws->at("gaps");
479    aws->create_toggle(AWAR_MAX_FREQ_IGNORE_GAPS);
480
481    GB_pop_transaction(GLOBAL.gb_main);
482
483    return aws;
484}
485
486// --------------------------------------------------------------------------------
487
488#ifdef UNIT_TESTS
489#ifndef TEST_UNIT_H
490#include <test_unit.h>
491#endif
492
493static GBDATA *create_simple_seq_db(const char *aliname, const char *alitype, const char **sequence, int sequenceCount, int sequenceLength) {
494    GBDATA *gb_main = GB_open("nosuch.arb", "wc");
495
496    {
497        GB_transaction  ta(gb_main);
498        GBDATA         *gb_species_data = GBT_get_species_data(gb_main);
499        int             specCounter     = 0;
500
501        TEST_EXPECT_RESULT__NOERROREXPORTED(GBT_create_alignment(gb_main, aliname, sequenceLength, true, 6, alitype));
502
503        for (int s = 0; s<sequenceCount; ++s) {
504            GBDATA *gb_species = GBT_find_or_create_species_rel_species_data(gb_species_data, GBS_global_string("name%04i", ++specCounter));
505            GBDATA *gb_data    = GBT_add_data(gb_species, aliname, "data", GB_STRING);
506
507            TEST_EXPECT_EQUAL(strlen(sequence[s]), sequenceLength);
508            TEST_EXPECT_NO_ERROR(GB_write_string(gb_data, sequence[s]));
509        }
510    }
511
512#if 0
513    // save DB (to view data; should be inactive when committed)
514    char *dbname = GBS_global_string_copy("cons_%s.arb", alitype);
515    TEST_EXPECT_NO_ERROR(GB_save(gb_main, dbname, "a"));
516    free(dbname);
517#endif
518
519    return gb_main;
520}
521
522static void read_frequency(GBDATA *gb_main, const char *sainame, const char *aliname, const char*& data, const char*& dat2) {
523    GB_transaction ta(gb_main);
524
525    GBDATA *gb_maxFreq = GBT_find_SAI(gb_main, sainame);
526    GBDATA *gb_ali     = GB_entry(gb_maxFreq, aliname);
527    GBDATA *gb_data    = GB_entry(gb_ali, "data");
528    GBDATA *gb_dat2    = GB_entry(gb_ali, "dat2");
529
530    data = GB_read_char_pntr(gb_data);
531    dat2 = GB_read_char_pntr(gb_dat2);
532}
533
534void TEST_nucleotide_consensus_and_maxFrequency() {
535    const char *sequence[] = {
536        "-.AAAAAAAAAAcAAAAAAAAATTTTTTTTTTTTTTTTTAAAAAAAAgggggAAAAgAA----m-----yykm-mmmAAAAAAAAAmmmmmmmmmNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKKKKKKKKKWWWWWWWWW",
537        "-.-AAAAAAAAAccAAAAAAAAggTTgTTTTgTTTTTTTcccAAAAAgggggAAAAgAA----k-----kykr-rrrAAAAAAAAmmmmmmmmmT-NNNNNNNNNANNNNNbNNNNNNNNkNNNNNNNNaNNNNNNNNbKKKKKKKKbWWWWWWWW",
538        "-.--AAAAAAAAcccAAAAAAA-ggTggTTTggTTTTTTccccAAAAgggCCtAAAgAC----m-----sykw-wvsAAAAAAAmmmmmmmmmTT--NNNNNNNNCANNNNbbNNNNNNNkkNNNNNNNaaNNNNNNNbbKKKKKKKbbWWWWWWW",
539        "-.---AAAAAAAccccAAAAAA-ggggggTTgggTTTTTcccccAAAggCCC-tAACtC----k----yyyys-smvAAAAAAmmmmmmmmmTTT---NNNNNNNGCANNNbbbNNNNNNkkkNNNNNNaaaNNNNNNbbbKKKKKKbbbWWWWWW",
540        "-.----AAAAAAcccccAAAAA----ggggTggggTTTTGGGcccAAgCCCt-ttACtG----m---nkkkky-yrmAAAAAmmmmmmmmmTTTT----NNNNNNTGCANNbbbbNNNNNkkkkNNNNNaaaaNNNNNbbbbKKKKKbbbbWWWWW",
541        "-.-----AAAAAccccccAAAA----ggggggggggTTgGGGGcccAcCCtt--tttCG----k--nnssssk-kvrAAAAmmmmmmmmmTTTTT-----NNNNN-TGCANbbbbbNNNNkkkkkNNNNaaaaaNNNNbbbbbKKKKbbbbbWWWW",
542        "-.------AAAAcccccccAAA---------ggggggTgGGGGGccccCt----tt-gT----mydddyyyy-vvmsAAAmmmmmmmmmTTTTTT------NNNN-ATGCAbbbbbbNNNkkkkkkNNNaaaaaaNNNbbbbbbKKKbbbbbbWWW",
543        "-.-------AAAccccccccAA---------ggggggggttGGGGccct------t--T-yykkkbbbkkkk-hhrvAAmmmmmmmmmTTTTTTT-------NNN-C-TGCbbbbbbbNNkkkkkkkNNaaaaaaaNNbbbbbbbKKbbbbbbbWW",
544        "-.--------AAcccccccccA----------------gttGGGGGct-------t----ymmmmnnnssss-ddvmAmmmmmmmmmTTTTTTTT--------NN-G--TGbbbbbbbbNkkkkkkkkNaaaaaaaaNbbbbbbbbKbbbbbbbbW",
545        "-.---------Acccccccccc----------------gtAGGGGGG----------------k---------bbmrmmmmmmmmmTTTTTTTTT---------N-T---Tbbbbbbbbbkkkkkkkkkaaaaaaaaabbbbbbbbbbbbbbbbbb",
546    };
547    const char *expected_frequency[] = {
548        // considering gaps:
549        "0=9876567890098765678986665444576545675336544565434475454320888277654333462439988776654434567899876543222523222333322222444433332987765443333444444333444444",
550        "0=0000000000000000000000000000000000000000000000000000000000000500000055005565050505055050000000000000025050025210098765752075207257025702568013568568013568",
551        // ignoring gaps:
552        "==000000000009876567895757865688765678533654456554536655542=552233223333222439988776654434567892222222222222222333322222444433332987765443333444444333444444",
553        "==000000000000000000000505360637520257000000000502036075025=005530983388555565050505055050000005555555555555555210098765752075207257025702568013568568013568",
554    };
555    const char *expected_consensus[] = {
556        "==----..aaaACccMMMMMaa----.....g.kkk.uKb.ssVVmmss...-.ww...=---.---..byk.-.mVAaaaaMMMMmmHH..uuu----............BBbb.....Kkkkkk...aaaa.....BkkkkkkkKB....wwww", // default settings (see ConsensusBuildParams-ctor), gapbound=60, considbound=30, lower/upper=70/95
557        "==AAAAAAAAAACccMMMMMaaKgKugKKKuggKKKuuKb.ssVVmmssssBWWWWs..=Y.......BByk...mVAaaaaMMMMmmHH..uuu................BBbb.....Kkkkkk...aaaa.....BkkkkkkkKB....wwww", // countgaps=0
558        "==AAAAAAAAAACCCMMMMMAAKGKUGKKKUGGKKKUUKBsSSVVMMSSSSBWWWWSwa=YcaaykkkBBYKaaaMVAAAAAMMMMMMHHuuuUUaaaaaaaaaaaaaaaaBBBBBBBBcKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuuwWWW", // countgaps=0,              considbound=26, lower=0, upper=75 (as described in #663)
559        "==AAAAAAAAAACCCMMMMMAAKKKKGKKKUGKKKKKUKBsSSVVMMSSSSBWWWWSwN=YHNNykkkBBYKNNNVVAAAAMMMMMMMHHHuuUUNNNNNNNNNNNNNNNNBBBBBBBBBKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuwwWWW", // countgaps=0,              considbound=25, lower=0, upper=75
560        "==---aaaaAAACCCMMMMMAA-gkugkkkuggKKKuuKBsSSVVMMSsssb-wwWswa=---a--kkbBykaaaMVAAAAAMMMMMMHHuuuUU---aaaaaaaaaaaaaBBBBBBBBcKKKKKkkkkAAAaaaaaaBBKKKKKKKBBuuuwWWW", // countgaps=1, gapbound=70, considbound=26, lower=0, upper=75
561        "==---aaaaAAACCMMMMMMMA-kkkgkkkugKKKKKuKBNSVVVVMSsssb-wwWswN=---N--nnbBBBnnNVVAAAMMMMMMMHHHHHuUU---nnnnNNNnNnNNNBBBBBBBNNKKKKKkkNNAAAaaaaNNBBBBKKKKKBBBNwwWWW", // countgaps=1, gapbound=70, considbound=20, lower=0, upper=75
562        "==---aaaaAAACMMMMMMMMM-kkkkkkkkKKKKKKKKNNVVVVVVBBbbb-wwWbnN=---N--nnbBBBnnNVVMMMMMMMMMHHHHHHHHH---nnnnNNNnNnNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=1, gapbound=70, considbound= 1, lower=0, upper=75
563        "==---aaaaAAACMMMMMMMMM-kkkkkkkkKKKKKKKKNNVVVVVVBBbbb-wwWbnN=---N--nnbBBBnnNVVMMMMMMMMMHHHHHHHHH---nnnnNNNnNnNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=1, gapbound=70, considbound= 0, lower=0, upper=75
564        "==AAAAAAAAAACMMMMMMMMMKKKKKKKKKKKKKKKKKNNVVVVVVBBBBBWWWWBNN=YHNNNNNNBBBBNNNVVMMMMMMMMMHHHHHHHHHNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNBBBBBBBBBNNNNNNNNN", // countgaps=0,              considbound= 0, lower=0, upper=75
565    };
566    const size_t seqlen         = strlen(sequence[0]);
567    const int    sequenceCount  = ARRAY_ELEMS(sequence);
568    const int    consensusCount = ARRAY_ELEMS(expected_consensus);
569
570    // create DB
571    GB_shell    shell;
572    const char *aliname = "ali_nuc";
573    GBDATA     *gb_main = create_simple_seq_db(aliname, "rna", sequence, sequenceCount, seqlen);
574
575    ConsensusBuildParams BK;
576    for (int c = 0; c<consensusCount; ++c) {
577        TEST_ANNOTATE(GBS_global_string("c=%i", c));
578        switch (c) {
579            case 0: break;                                                     // use default settings
580            case 1: BK.countgaps   = false; break;                             // dont count gaps
581            case 2: BK.considbound = 26; BK.lower = 0; BK.upper = 75; break;   // settings from #663
582            case 3: BK.considbound = 25; break;
583            case 4: BK.considbound = 26; BK.countgaps = true; BK.gapbound = 70; break;
584            case 5: BK.considbound = 20; break;
585            case 6: BK.considbound = 1; break;
586            case 7: BK.considbound = 0; break;
587            case 8: BK.countgaps   = false; break;
588            default: arb_assert(0); break;                                     // missing
589        }
590
591        {
592            GB_transaction  ta(gb_main);
593            const char     *sainame = "CONSENSUS";
594            TEST_EXPECT_NO_ERROR(CON_calculate(gb_main, BK, aliname, false, sainame));
595
596            GBDATA     *gb_consensus = GBT_find_SAI(gb_main, sainame);
597            GBDATA     *gb_seq       = GBT_find_sequence(gb_consensus, aliname);
598            const char *consensus    = GB_read_char_pntr(gb_seq);
599
600            TEST_EXPECT_EQUAL(consensus, expected_consensus[c]);
601        }
602    }
603
604    // test max.frequency
605    const char *sainame = "MAXFREQ";
606    for (int ignore_gaps = 0; ignore_gaps<=1; ++ignore_gaps) {
607        TEST_ANNOTATE(GBS_global_string("ignore_gaps=%i", ignore_gaps));
608        TEST_EXPECT_NO_ERROR(CON_calc_max_freq(gb_main, ignore_gaps, sainame, aliname));
609        const char *data, *dat2;
610        read_frequency(gb_main, sainame, aliname, data, dat2);
611        TEST_EXPECT_EQUAL(data, expected_frequency[ignore_gaps*2]);
612        TEST_EXPECT_EQUAL(dat2, expected_frequency[ignore_gaps*2+1]);
613    }
614
615    GB_close(gb_main);
616}
617
618void TEST_amino_consensus_and_maxFrequency() {
619    const char *sequence[] = {
620        "-.ppppppppppQQQQQQQQQDDDDDELLLLLwwwwwwwwwwwwwwwwgggggggggggSSSe-PPP-DELp",
621        "-.-pppppppppkQQQQQQQQnDDDDELLLLLVVwwVwwwwVwwwwwwSgggggggggSSSee-QPP-DELa",
622        "-.--ppppppppkkQQQQQQQnnDDDELLLLL-VVwVVwwwVVwwwwwSSgggggggSSSeee-KQP-DEIg",
623        "-.---pppppppkkkQQQQQQnnnDDELLLLL-VVVVVVwwVVVwwwwSSSgggggSSSeee--LQQ-DQIs",
624        "-.----ppppppkkkkQQQQQnnnnDELLLLL----VVVVwVVVVwwweSSSgggSSSeee---WKQ-NQJt",
625        "-.-----pppppkkkkkQQQQnnnnnqiLLLL----VVVVVVVVVVwweeSSSggSSeee-----KQ-NQJq",
626        "-.------ppppkkkkkkQQQnnnnnqiiLLL---------VVVVVVweeeSSSgSeee------LK-NZJn",
627        "-.-------pppkkkkkkkQQnnnnnqiiiLL---------VVVVVVVeeeeSSSeee-------LK-NZJe",
628        "-.--------ppkkkkkkkkQnnnnnqiiiiL----------------eeeeeSSee--------WK-BZJd",
629        "-.---------pkkkkkkkkknnnnnqiiiii----------------eeeeeeSe---------WK-BZJb",
630        "-.ppppppppppQQQQQQQQQDDDDDELLLLLwwwwwwwwwwwwwwwwgggggggggggSSSe-PPP-DELz",
631        "-.-pppppppppkQQQQQQQQnDDDDELLLLLVVwwVwwwwVwwwwwwSgggggggggSSSee-QPP-DELh",
632        "-.--ppppppppkkQQQQQQQnnDDDELLLLL-VVwVVwwwVVwwwwwSSgggggggSSSeee-KQP-DEIk",
633        "-.---pppppppkkkQQQQQQnnnDDELLLLL-VVVVVVwwVVVwwwwSSSgggggSSSeee--LQQ-DQIr",
634        "-.----ppppppkkkkQQQQQnnnnDELLLLL----VVVVwVVVVwwweSSSgggSSSeee---WKQ-NQJl",
635        "-.-----pppppkkkkkQQQQnnnnnqiLLLL----VVVVVVVVVVwweeSSSggSSeee-----KQ-NQJi",
636        "-.------ppppkkkkkkQQQnnnnnqiiLLL---------VVVVVVweeeSSSgSeee------LK-NZJv",
637        "-.-------pppkkkkkkkQQnnnnnqiiiLL---------VVVVVVVeeeeSSSeee-------LK-NZJm",
638        "-.--------ppkkkkkkkkQnnnnnqiiiiL----------------eeeeeSSee--------WK-BZJf",
639        "-.---------pkkkkkkkkknnnnnqiiiii----------------eeeeeeSe---------WK-BZJy",
640    };
641    const char *expected_frequency[] = {
642        // considering gaps:
643        "0=9876567890987656789987655567898666544457654567654456743334567052404451",
644        "0=0000000000000000000000000000000000000000000000000000000000000000000000",
645        // ignoring gaps:
646        "==0000000000987656789987655567895757865688765678654456743345670=224=4451",
647        "==0000000000000000000000000000000505360637520257000000003720050=000=0000",
648    };
649    const char *expected_consensus[] = {
650        "==----..aaaAhhh...dddDDDDDDIIIII----.....i.....f...aaaAa.....--=.X.=DD-.", // default settings (see ConsensusBuildParams-ctor), gapbound=60, considbound=30, lower/upper=70/95
651        "==AAAAAAAAAAhhh...dddDDDDDDIIIII.i.fi...fii...ff...aaaAa.....dD=XX.=DDI.", // countgaps=0
652        "==AAAAAAAAAAHHhhdddDDDDDDDDIIIIIiIiFIiifFIIiifFFdaaaAAAaaaaadDD=XXh=DDId", // countgaps=0,              considbound= 26, lower=0, upper=75
653        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=xXh=DDid", // countgaps=1, gapbound=70, considbound= 26, lower=0, upper=75
654        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=aah=DDid", // countgaps=1, gapbound=70, considbound= 20, lower=0, upper=75
655        "==---aaaaAAAHHhhXddDDDDDDDDIIIII-ixfiixffiiiXfffdXaaAAAaaaaxdd-=xXX=DDiX", // countgaps=1, gapbound=70, considbound= 51, lower=0, upper=75
656        "==---aaaaAAAHXXXXXXXDDDDDDDIIIII-xxxxxxxxXXXXXXXXXXXXAAXXXxxxx-=xXX=DDiX", // countgaps=1, gapbound=70, considbound= 90, lower=0, upper=75
657        "==---aaaaAAAXXXXXXXXXDDDDDDIIIII-xxxxxxxxXXXXXXXXXXXXXAXXXxxxx-=xXX=DDiX", // countgaps=1, gapbound=70, considbound=100, lower=0, upper=75
658        "==---aaaaAAAHHhhdddDDDDDDDDIIIII-iifiiiffiiiifffdaaaAAAaaaaadd-=aah=DDid", // countgaps=1, gapbound=70, considbound=  0, lower=0, upper=75
659    };
660    const size_t seqlen         = strlen(sequence[0]);
661    const int    sequenceCount  = ARRAY_ELEMS(sequence);
662    const int    consensusCount = ARRAY_ELEMS(expected_consensus);
663
664    // create DB
665    GB_shell    shell;
666    const char *aliname = "ali_ami";
667    GBDATA     *gb_main = create_simple_seq_db(aliname, "ami", sequence, sequenceCount, seqlen);
668
669    ConsensusBuildParams BK;
670    for (int c = 0; c<consensusCount; ++c) {
671        TEST_ANNOTATE(GBS_global_string("c=%i", c));
672        switch (c) {
673            case 0: break;                                                     // use default settings
674            case 1: BK.countgaps   = false; break;                             // dont count gaps
675            case 2: BK.considbound = 26; BK.lower = 0; BK.upper = 75; break;   // settings from #663
676            case 3: BK.countgaps   = true; BK.gapbound = 70; break;
677            case 4: BK.considbound = 20; break;
678            case 5: BK.considbound = 51; break;
679            case 6: BK.considbound = 90; break;
680            case 7: BK.considbound = 100; break;
681            case 8: BK.considbound = 0; break;
682            default: arb_assert(0); break;                                     // missing
683        }
684
685        {
686            GB_transaction  ta(gb_main);
687            const char     *sainame = "CONSENSUS";
688            TEST_EXPECT_NO_ERROR(CON_calculate(gb_main, BK, aliname, false, sainame));
689
690            GBDATA     *gb_consensus = GBT_find_SAI(gb_main, sainame);
691            GBDATA     *gb_seq       = GBT_find_sequence(gb_consensus, aliname);
692            const char *consensus    = GB_read_char_pntr(gb_seq);
693
694            TEST_EXPECT_EQUAL(consensus, expected_consensus[c]);
695        }
696    }
697
698    // test max.frequency
699    const char *sainame = "MAXFREQ";
700    for (int ignore_gaps = 0; ignore_gaps<=1; ++ignore_gaps) {
701        TEST_ANNOTATE(GBS_global_string("ignore_gaps=%i", ignore_gaps));
702        TEST_EXPECT_NO_ERROR(CON_calc_max_freq(gb_main, ignore_gaps, sainame, aliname));
703        const char *data, *dat2;
704        read_frequency(gb_main, sainame, aliname, data, dat2);
705        TEST_EXPECT_EQUAL(data, expected_frequency[ignore_gaps*2]);
706        TEST_EXPECT_EQUAL(dat2, expected_frequency[ignore_gaps*2+1]);
707    }
708
709    GB_close(gb_main);
710}
711
712#endif // UNIT_TESTS
713
Note: See TracBrowser for help on using the repository browser.