source: tags/cvs_2_svn/PROBE/probe.h

Last change on this file was 4793, checked in by westram, 17 years ago
  • forward declare BI_ecoli_ref instead of using void*
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1#ifndef PROBE_H
2#define PROBE_H
3
4#ifndef __LIST__
5#include <list>
6#endif
7#ifndef __SET__
8#include <set>
9#endif
10
11#ifndef ARBDB_H
12#include <arbdb.h>
13#endif
14#ifndef PT_COM_H
15#include <PT_com.h>
16#endif
17#ifndef AISC_GEN_SERVER_INCLUDED
18#include <PT_server.h>
19#endif
20
21#ifndef ARB_ASSERT_H
22#include <arb_assert.h>
23#endif
24
25#define pt_assert(bed) arb_assert(bed)
26
27#if defined(DEBUG)
28#define PTM_DEBUG
29#endif // DEBUG
30
31typedef unsigned long ulong;
32typedef unsigned int  uint;
33typedef unsigned char uchar;
34
35#define  min(a, b) ((a) < (b)) ? (a) : (b)
36#define  max(a, b) ((a) > (b)) ? (a) : (b)
37
38#define PT_CORE *(int *)0 = 0;
39
40#if defined(DEVEL_KAI)
41#define PT_MAX_MATCHES     1024*256
42#else
43#define PT_MAX_MATCHES     100000
44#endif // DEVEL_KAI
45
46// #define PT_MAX_IDENTS      10000
47#define PT_POS_TREE_HEIGHT 20
48#define PT_POS_SECURITY    10
49#define MIN_PROBE_LENGTH   6
50#define PT_NAME_HASH_SIZE  10000
51
52enum PT_MATCH_TYPE {
53    PT_MATCH_TYPE_INTEGER           = 0,
54    PT_MATCH_TYPE_WEIGHTED_PLUS_POS = 1,
55    PT_MATCH_TYPE_WEIGHTED          = -1
56};
57
58
59
60#define MATCHANSWER 50  /* private msg type: match result answer */
61#define CREATEANSWER 51 /* private msg type: create result answer */
62#define FINDANSWER 52   /* private msg type: find result answer */
63
64extern ulong physical_memory;
65struct Hs_struct;
66extern char *pt_error_buffer;
67
68typedef enum type_types_type {
69    t_int    = 1,
70    t_string = 0,
71    t_float  = 2
72} type_types;
73
74typedef enum PT_bases_enum  {
75    PT_QU = 0,
76    PT_N  = 1,
77    PT_A,
78    PT_C,
79    PT_G,
80    PT_T,
81    PT_B_MAX
82} PT_BASES;
83
84/*  POS TREE */
85typedef enum enum_PT_NODE_TYPE {
86    PT_NT_LEAF        = 0,
87    PT_NT_CHAIN       = 1,
88    PT_NT_NODE        = 2,
89    PT_NT_SINGLE_NODE = 3,      /* stage 3 */
90    PT_NT_SAVED       = 3,      /* stage 1+2 */
91    PT_NT_UNDEF       = 4
92} PT_NODE_TYPE;
93
94typedef struct POS_TREE_struct {
95    uchar flags;
96    char  data;
97} POS_TREE;
98
99typedef struct PTMM_struct {
100    char *base;
101    int   stage1;
102    int   stage2;
103    int   stage3;
104    int   mode;
105} PTM2;
106
107
108
109/* Probe search */
110
111
112struct probe_statistic {
113    int match_count;        /* Counter for matches */
114    double  rel_match_count;    /* match_count / (seq_len - probe_len + 1) */
115};
116
117struct probe_input_data {    /* every taxas own data */
118    /******* name and sequence *********/
119    char   *data;
120    long    checksum;
121    int     size;
122    char   *name;
123    char   *fullname;
124    GBDATA *gbd;
125
126    /********* probe design ************/
127    int is_group;   /* -1:  nevermind
128                        0:  no group
129                        1:  group */
130
131    /* probe design (match) */
132    PT_probematch *match;       /* best hit for PT_new_design */
133
134    /********* find family  ************/
135    struct probe_statistic stat;
136
137    /********* free pointer  ************/
138    int next;
139};
140
141struct probe_statistic_struct {
142    int cut_offs;       /* statistic of chains */
143    int single_node;
144    int short_node;
145    int long_node;
146    int longs;
147    int shorts;
148    int shorts2;
149    int chars;
150
151};
152
153class BI_ecoli_ref;
154
155extern struct probe_struct_global   {
156    GBDATA  *gb_main;           /* ARBDB interface */
157    GBDATA  *gb_species_data;
158    GBDATA  *gb_extended_data;
159    char    *alignment_name;
160    GB_HASH *namehash;          /* name to int */
161
162    struct probe_input_data *data; /* the internal database */
163
164    char         *ecoli;        /* the ecoli sequenz */
165    BI_ecoli_ref *bi_ecoli;
166
167    int  data_count;
168    int  max_size;              /* maximum sequence len */
169    long char_count;            /* number of all 'acgtuACGTU' */
170
171    int    mismatches;          /* chain handle in match */
172    double wmismatches;
173    int    N_mismatches;
174    int    w_N_mismatches[PT_POS_TREE_HEIGHT+PT_POS_SECURITY+1];
175
176    int reversed;       /* tell the matcher whether probe is reversed */
177
178    double *pos_to_weight;      /* position to weight */
179    char    complement[256];    /* complement */
180
181    int deep;                   /* for probe matching */
182    int height;
183    int length;
184    int apos;
185
186    int sort_by;
187
188    char *probe;                /* probe design + chains */
189    char *main_probe;
190
191    char             *server_name; /* name of this server */
192    aisc_com         *link;
193    T_PT_MAIN         main;
194    struct Hs_struct *com_so;   /* the communication socket */
195    POS_TREE         *pt;
196    PTM2             *ptmain;
197
198    probe_statistic_struct stat;
199
200} psg;
201
202class gene_struct {
203    char       *gene_name;
204    const char *arb_species_name; // pointers into 'gene_name'
205    const char *arb_gene_name;
206
207    void init(const char *gene_name_, const char *arb_species_name_, const char *arb_gene_name_) {
208        int gene_name_len        = strlen(gene_name_);
209        int arb_species_name_len = strlen(arb_species_name_);
210        int arb_gene_name_len    = strlen(arb_gene_name_);
211
212        int fulllen      = gene_name_len+1+arb_species_name_len+1+arb_gene_name_len+1;
213        gene_name        = new char[fulllen];
214        strcpy(gene_name, gene_name_);
215        arb_species_name = gene_name+(gene_name_len+1);
216        strcpy((char*)arb_species_name, arb_species_name_);
217        arb_gene_name    = arb_species_name+(arb_species_name_len+1);
218        strcpy((char*)arb_gene_name, arb_gene_name_);
219    }
220
221public:
222    gene_struct(const char *gene_name_, const char *arb_species_name_, const char *arb_gene_name_) {
223        init(gene_name_, arb_species_name_, arb_gene_name_);
224    }
225    gene_struct(const gene_struct& other) {
226        if (&other != this) {
227            init(other.get_internal_gene_name(), other.get_arb_species_name(), other.get_arb_gene_name());
228        }
229    }
230    gene_struct& operator = (const gene_struct& other) {
231        if (&other != this) {
232            delete [] gene_name;
233            init(other.get_internal_gene_name(), other.get_arb_species_name(), other.get_arb_gene_name());
234        }
235        return *this;
236    }
237
238    ~gene_struct() {
239        delete [] gene_name;
240    }
241
242    const char *get_internal_gene_name() const { return gene_name; }
243    const char *get_arb_species_name() const { return arb_species_name; }
244    const char *get_arb_gene_name() const { return arb_gene_name; }
245};
246
247extern int gene_flag;           // if 'gene_flag' == 1 -> we are a gene pt server
248
249struct ltByArbName {
250    bool operator()(const gene_struct *gs1, const gene_struct *gs2) const {
251        int cmp           = strcmp(gs1->get_arb_species_name(), gs2->get_arb_species_name());
252        if (cmp == 0) { cmp = strcmp(gs1->get_arb_gene_name(), gs2->get_arb_gene_name()); }
253        return cmp<0;
254    }
255};
256struct ltByInternalName {
257    bool operator()(const gene_struct *gs1, const gene_struct *gs2) const {
258        int cmp = strcmp(gs1->get_internal_gene_name(), gs2->get_internal_gene_name());
259        return cmp<0;
260    }
261};
262
263typedef std::list<gene_struct>                          gene_struct_list;
264typedef std::set<const gene_struct *, ltByInternalName> gene_struct_index_internal;
265typedef std::set<const gene_struct *, ltByArbName>      gene_struct_index_arb;
266
267extern gene_struct_list           all_gene_structs; // stores all gene_structs
268extern gene_struct_index_arb      gene_struct_arb2internal; // sorted by arb speces+gene name
269extern gene_struct_index_internal gene_struct_internal2arb; // sorted by internal name
270
271#define PT_base_string_counter_eof(str) (*(unsigned char *)(str) == 255)
272
273#else
274#error probe.h included twice
275#endif
Note: See TracBrowser for help on using the repository browser.