source: branches/port5/TOOLS/arb_count_chars.cxx

Last change on this file was 5823, checked in by westram, 16 years ago
  • more uniform function names
    • EXP_create_experiment_rel_experiment_data → EXP_find_or_create_experiment_rel_exp_data
    • EXP_find_experiment_rel_experiment_data → EXP_find_experiment_rel_exp_data
    • GBT_create_SAI → GBT_find_or_create_SAI
    • GBT_create_item → GBT_find_or_create_item_rel_item_data
    • GBT_create_species → GBT_find_or_create_species
    • GBT_create_species_rel_species_data → GBT_find_or_create_species_rel_species_data
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3// #include <malloc.h>
4#include <string.h>
5#include <ctype.h>
6#include <arbdb.h>
7#include <arbdbt.h>
8
9/*  count different kind of nucleotides for each alignment position */
10
11#define MAXLETTER ('Z'-'A'+1)
12#define RESULTNAME "COUNTED_CHARS"
13
14int main(int argc, char **argv){
15    const char *db_name = ":";      // default name -> link to server
16    if (argc >1) db_name = argv[1];     // get the name of the database
17    int is_amino = 0;
18
19    printf("Counting the number of different chars of all marked sequences\n");
20
21    GBDATA *gb_main = GB_open(":","rw");    // open database
22    if (!gb_main){
23        GB_print_error();
24        return -1;
25    }
26
27    GB_begin_transaction(gb_main); // open transaction
28
29    char *alignment_name = GBT_get_default_alignment(gb_main);  // info about sequences
30    int alignment_len = GBT_get_alignment_len(gb_main,alignment_name);
31    is_amino = GBT_is_alignment_protein(gb_main,alignment_name);
32    char filter[256];
33    if (is_amino){
34        memset(filter,1,256);
35        filter[(unsigned char)'B'] = 0;
36        filter[(unsigned char)'J'] = 0;
37        filter[(unsigned char)'O'] = 0;
38        filter[(unsigned char)'U'] = 0;
39        filter[(unsigned char)'Z'] = 0;
40    }else{
41        memset(filter,0,256);
42        filter[(unsigned char)'A'] = 1;
43        filter[(unsigned char)'C'] = 1;
44        filter[(unsigned char)'G'] = 1;
45        filter[(unsigned char)'T'] = 1;
46        filter[(unsigned char)'U'] = 1;
47    }
48
49    // malloc and clear arrays for counting characters (only letters)
50    int *counters[MAXLETTER];
51    int i;
52    for (i=0;i<MAXLETTER;i++){
53        counters[i] = (int *)calloc(sizeof(int),alignment_len);
54    }
55
56
57    // loop over all marked species
58    int slider = 0;
59    int all_marked = GBT_count_marked_species(gb_main);
60
61    for (GBDATA *gb_species = GBT_first_marked_species(gb_main);
62         gb_species;
63         gb_species = GBT_next_marked_species(gb_species)){
64        if ((slider++)%10 == 0) printf("%i:%i\n",slider,all_marked);
65
66        GBDATA *gb_ali = GB_entry(gb_species, alignment_name); // search the sequence database entry ( ali_xxx/data )
67        if (!gb_ali) continue;
68        GBDATA *gb_data = GB_entry(gb_ali, "data");
69        if(!gb_data) continue;
70
71        const char *seq = GB_read_char_pntr(gb_data);
72        if (!seq) continue;
73
74        unsigned char c;
75        for ( i=0; (i< alignment_len) && (c = *(seq)); i++, seq++){
76            if (!isalpha(c)) continue;
77            c = toupper(c);
78            if (filter[c]){
79                counters[ c - 'A' ][i] ++;
80            }
81        }
82    }
83
84    char *result = (char *)calloc(sizeof(char), alignment_len + 1);
85    for (i=0; i<alignment_len; i++) { 
86        int sum = 0;
87        for (int l = 0; l < MAXLETTER; l++){
88            if (counters[l][i]>0) sum++;
89        }
90        result[i] = sum<10 ? '0'+sum : 'A'-10+sum;
91    }
92    // save result as SAI COUNTED_CHARS
93    {
94        GBDATA *gb_sai = GBT_find_or_create_SAI(gb_main,RESULTNAME);
95        if (!gb_sai) {
96            GB_print_error();
97            return -1;
98        }
99        GBDATA *gb_data = GBT_add_data(gb_sai, alignment_name, "data", GB_STRING);
100        GB_write_string(gb_data, result);
101    }
102
103    GB_commit_transaction(gb_main); // commit it
104    GB_close(gb_main);      // politely disconnect from server
105    return 0;
106
107}
Note: See TracBrowser for help on using the repository browser.