source: tags/arb_5.1/PROBE/probe.h

Last change on this file was 5919, checked in by westram, 16 years ago
  • PT-server
    • DEVEL_JB → ARB_64
    • save information block (containing magic and db-version)
    • fail on 32bit when trying to load big database (>4Gb) created with 64-bit version
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1#ifndef PROBE_H
2#define PROBE_H
3
4#ifndef __LIST__
5#include <list>
6#endif
7#ifndef __SET__
8#include <set>
9#endif
10
11#ifndef ARBDB_H
12#include <arbdb.h>
13#endif
14#ifndef PT_COM_H
15#include <PT_com.h>
16#endif
17#ifndef AISC_GEN_SERVER_INCLUDED
18#include <PT_server.h>
19#endif
20
21#ifndef ARB_ASSERT_H
22#include <arb_assert.h>
23#endif
24
25#define PT_SERVER_MAGIC   0x32108765                // pt server identifier
26#define PT_SERVER_VERSION 2                         // version of pt server database (no versioning till 2009/05/13)
27
28#define pt_assert(bed) arb_assert(bed)
29
30#if defined(DEBUG)
31#define PTM_DEBUG
32#endif // DEBUG
33
34typedef unsigned long ULONG;
35typedef unsigned int  UINT;
36typedef unsigned char uchar;
37
38#define  min(a, b) ((a) < (b)) ? (a) : (b)
39#define  max(a, b) ((a) > (b)) ? (a) : (b)
40
41#define PT_CORE *(int *)0 = 0;
42
43#if defined(DEVEL_KAI)
44#define PT_MAX_MATCHES     1024*256
45#else
46#define PT_MAX_MATCHES     100000
47#endif // DEVEL_KAI
48
49// #define PT_MAX_IDENTS      10000
50#define PT_POS_TREE_HEIGHT 20
51#define PT_POS_SECURITY    10
52#define MIN_PROBE_LENGTH   6
53#define PT_NAME_HASH_SIZE  10000
54
55enum PT_MATCH_TYPE {
56    PT_MATCH_TYPE_INTEGER           = 0,
57    PT_MATCH_TYPE_WEIGHTED_PLUS_POS = 1,
58    PT_MATCH_TYPE_WEIGHTED          = -1
59};
60
61
62
63#define MATCHANSWER 50  /* private msg type: match result answer */
64#define CREATEANSWER 51 /* private msg type: create result answer */
65#define FINDANSWER 52   /* private msg type: find result answer */
66
67extern ULONG physical_memory;
68struct Hs_struct;
69extern char *pt_error_buffer;
70
71typedef enum type_types_type {
72    t_int    = 1,
73    t_string = 0,
74    t_float  = 2
75} type_types;
76
77typedef enum PT_bases_enum  {
78    PT_QU = 0,
79    PT_N  = 1,
80    PT_A,
81    PT_C,
82    PT_G,
83    PT_T,
84    PT_B_MAX,
85    PT_B_UNDEF, 
86} PT_BASES;
87
88/*  POS TREE */
89typedef enum enum_PT_NODE_TYPE {
90    PT_NT_LEAF        = 0,
91    PT_NT_CHAIN       = 1,
92    PT_NT_NODE        = 2,
93    PT_NT_SINGLE_NODE = 3,      /* stage 3 */
94    PT_NT_SAVED       = 3,      /* stage 1+2 */
95    PT_NT_UNDEF       = 4
96} PT_NODE_TYPE;
97
98typedef struct POS_TREE_struct {
99    uchar flags;
100    char  data;
101} POS_TREE;
102
103typedef struct PTMM_struct {
104    char *data_start; // points to start of data
105    int   stage1;
106    int   stage2;
107    int   stage3;
108    int   mode;
109} PTM2;
110
111
112
113/* Probe search */
114
115
116struct probe_statistic {
117    int match_count;        /* Counter for matches */
118    double  rel_match_count;    /* match_count / (seq_len - probe_len + 1) */
119};
120
121struct probe_input_data {    /* every taxas own data */
122    /******* name and sequence *********/
123    char   *data;
124    long    checksum;
125    int     size;
126    char   *name;
127    char   *fullname;
128    GBDATA *gbd;
129
130    /********* probe design ************/
131    int is_group;   /* -1:  nevermind
132                        0:  no group
133                        1:  group */
134
135    /* probe design (match) */
136    PT_probematch *match;       /* best hit for PT_new_design */
137
138    /********* find family  ************/
139    struct probe_statistic stat;
140
141    /********* free pointer  ************/
142    int next;
143};
144
145struct probe_statistic_struct {
146#ifdef ARB_64
147    int cut_offs;       /* statistic of chains */
148    int single_node;
149    int short_node;
150    int int_node;
151    int long_node;
152    int longs;
153    int ints;
154    int ints2;
155    int shorts;
156    int shorts2;
157    int chars;
158    long maxdiff;
159#else
160    int cut_offs;       /* statistic of chains */
161    int single_node;
162    int short_node;
163    int long_node;
164    int longs;
165    int shorts;
166    int shorts2;
167    int chars;
168#endif
169
170};
171
172class BI_ecoli_ref;
173
174extern struct probe_struct_global   {
175    GBDATA  *gb_main;           /* ARBDB interface */
176    GBDATA  *gb_species_data;
177    GBDATA  *gb_sai_data;
178    char    *alignment_name;
179    GB_HASH *namehash;          /* name to int */
180
181    struct probe_input_data *data; /* the internal database */
182
183    char         *ecoli;        /* the ecoli sequenz */
184    BI_ecoli_ref *bi_ecoli;
185
186    int  data_count;
187    int  max_size;              /* maximum sequence len */
188    long char_count;            /* number of all 'acgtuACGTU' */
189
190    int    mismatches;          /* chain handle in match */
191    double wmismatches;
192    int    N_mismatches;
193    int    w_N_mismatches[PT_POS_TREE_HEIGHT+PT_POS_SECURITY+1];
194
195    int reversed;       /* tell the matcher whether probe is reversed */
196
197    double *pos_to_weight;      /* position to weight */
198    char    complement[256];    /* complement */
199
200    int deep;                   /* for probe matching */
201    int height;
202    int length;
203    int apos;
204
205    int sort_by;
206
207    char *probe;                /* probe design + chains */
208    char *main_probe;
209
210    char             *server_name; /* name of this server */
211    aisc_com         *link;
212    T_PT_MAIN         main;
213    struct Hs_struct *com_so;   /* the communication socket */
214    POS_TREE         *pt;
215    PTM2             *ptmain;
216
217    probe_statistic_struct stat;
218
219} psg;
220
221class gene_struct {
222    char       *gene_name;
223    const char *arb_species_name; // pointers into 'gene_name'
224    const char *arb_gene_name;
225
226    void init(const char *gene_name_, const char *arb_species_name_, const char *arb_gene_name_) {
227        int gene_name_len        = strlen(gene_name_);
228        int arb_species_name_len = strlen(arb_species_name_);
229        int arb_gene_name_len    = strlen(arb_gene_name_);
230
231        int fulllen      = gene_name_len+1+arb_species_name_len+1+arb_gene_name_len+1;
232        gene_name        = new char[fulllen];
233        strcpy(gene_name, gene_name_);
234        arb_species_name = gene_name+(gene_name_len+1);
235        strcpy((char*)arb_species_name, arb_species_name_);
236        arb_gene_name    = arb_species_name+(arb_species_name_len+1);
237        strcpy((char*)arb_gene_name, arb_gene_name_);
238    }
239
240public:
241    gene_struct(const char *gene_name_, const char *arb_species_name_, const char *arb_gene_name_) {
242        init(gene_name_, arb_species_name_, arb_gene_name_);
243    }
244    gene_struct(const gene_struct& other) {
245        if (&other != this) {
246            init(other.get_internal_gene_name(), other.get_arb_species_name(), other.get_arb_gene_name());
247        }
248    }
249    gene_struct& operator = (const gene_struct& other) {
250        if (&other != this) {
251            delete [] gene_name;
252            init(other.get_internal_gene_name(), other.get_arb_species_name(), other.get_arb_gene_name());
253        }
254        return *this;
255    }
256
257    ~gene_struct() {
258        delete [] gene_name;
259    }
260
261    const char *get_internal_gene_name() const { return gene_name; }
262    const char *get_arb_species_name() const { return arb_species_name; }
263    const char *get_arb_gene_name() const { return arb_gene_name; }
264};
265
266extern int gene_flag;           // if 'gene_flag' == 1 -> we are a gene pt server
267
268struct ltByArbName {
269    bool operator()(const gene_struct *gs1, const gene_struct *gs2) const {
270        int cmp           = strcmp(gs1->get_arb_species_name(), gs2->get_arb_species_name());
271        if (cmp == 0) { cmp = strcmp(gs1->get_arb_gene_name(), gs2->get_arb_gene_name()); }
272        return cmp<0;
273    }
274};
275struct ltByInternalName {
276    bool operator()(const gene_struct *gs1, const gene_struct *gs2) const {
277        int cmp = strcmp(gs1->get_internal_gene_name(), gs2->get_internal_gene_name());
278        return cmp<0;
279    }
280};
281
282typedef std::list<gene_struct>                          gene_struct_list;
283typedef std::set<const gene_struct *, ltByInternalName> gene_struct_index_internal;
284typedef std::set<const gene_struct *, ltByArbName>      gene_struct_index_arb;
285
286extern gene_struct_list           all_gene_structs; // stores all gene_structs
287extern gene_struct_index_arb      gene_struct_arb2internal; // sorted by arb speces+gene name
288extern gene_struct_index_internal gene_struct_internal2arb; // sorted by internal name
289
290#define PT_base_string_counter_eof(str) (*(unsigned char *)(str) == 255)
291
292#else
293#error probe.h included twice
294#endif
Note: See TracBrowser for help on using the repository browser.