1 | #include <stdio.h> |
---|
2 | #include <stdlib.h> |
---|
3 | #include <string.h> |
---|
4 | #include <malloc.h> |
---|
5 | |
---|
6 | #include <adlocal.h> |
---|
7 | #include <arbdb.h> |
---|
8 | #include <arbdbt.h> |
---|
9 | #include "adseqcompr.h" |
---|
10 | #define MAX_SEQUENCE_PER_MASTER 10 |
---|
11 | |
---|
12 | double g_b_number_of_sequences_to_compress; |
---|
13 | int g_b_counter_of_sequences_to_compress; |
---|
14 | |
---|
15 | GB_Consensus *g_b_new_Consensus(long len){ |
---|
16 | GB_Consensus *gcon = (GB_Consensus *)GB_calloc(sizeof(GB_Consensus),1); |
---|
17 | int i; |
---|
18 | unsigned char *data = (unsigned char *)GB_calloc(sizeof(char)*256,len); |
---|
19 | gcon->len = len; |
---|
20 | |
---|
21 | for (i=0;i<256;i++){ |
---|
22 | gcon->con[i] = data + len*i; |
---|
23 | } |
---|
24 | return gcon; |
---|
25 | } |
---|
26 | |
---|
27 | |
---|
28 | void g_b_delete_Consensus(GB_Consensus *gcon){ |
---|
29 | free((char *)gcon->con[0]); |
---|
30 | free((char *)gcon); |
---|
31 | } |
---|
32 | |
---|
33 | |
---|
34 | void g_b_Consensus_add(GB_Consensus *gcon, unsigned char *seq, long seq_len){ |
---|
35 | register int i; |
---|
36 | register int li; |
---|
37 | register int c; |
---|
38 | register unsigned char *s; |
---|
39 | register int last; |
---|
40 | register unsigned char *p; |
---|
41 | int eq_count; |
---|
42 | const int max_priority = 255/MAX_SEQUENCE_PER_MASTER; /* No overflow possible */ |
---|
43 | |
---|
44 | if (seq_len > gcon->len) seq_len = gcon->len; |
---|
45 | |
---|
46 | /* Search for runs */ |
---|
47 | s = seq; |
---|
48 | last = 0; |
---|
49 | eq_count = 0; |
---|
50 | for (li = i = 0; i < seq_len; i++ ){ |
---|
51 | c = *(s++); |
---|
52 | if (c == last){ |
---|
53 | continue; |
---|
54 | }else{ |
---|
55 | inc_hits: |
---|
56 | eq_count = i-li; |
---|
57 | gcon->used[c] = 1; |
---|
58 | p = gcon->con[last]; |
---|
59 | last = c; |
---|
60 | if (eq_count <= GB_RUNLENGTH_SIZE) { |
---|
61 | c = max_priority; |
---|
62 | while (li < i) p[li++] += c; |
---|
63 | }else{ |
---|
64 | c = max_priority * ( GB_RUNLENGTH_SIZE ) / eq_count; |
---|
65 | if (c){ |
---|
66 | while (li < i) p[li++] += c; |
---|
67 | }else{ |
---|
68 | while (li < i) p[li++] |= 1; |
---|
69 | } |
---|
70 | } |
---|
71 | } |
---|
72 | } |
---|
73 | if (li<seq_len){ |
---|
74 | c = last; |
---|
75 | i = seq_len; |
---|
76 | goto inc_hits; |
---|
77 | } |
---|
78 | } |
---|
79 | |
---|
80 | char *g_b_Consensus_get_sequence(GB_Consensus *gcon){ |
---|
81 | register int pos; |
---|
82 | register unsigned char *s; |
---|
83 | register unsigned char *max = (unsigned char *)GB_calloc(sizeof(char), gcon->len); |
---|
84 | int c; |
---|
85 | char *seq = (char *)GB_calloc(sizeof(char),gcon->len+1); |
---|
86 | |
---|
87 | memset(seq,'@',gcon->len); |
---|
88 | |
---|
89 | for (c = 1; c<256;c++){ /* Find maximum frequency of non run */ |
---|
90 | if (!gcon->used[c]) continue; |
---|
91 | s = gcon->con[c]; |
---|
92 | for (pos= 0;pos<gcon->len;pos++){ |
---|
93 | if (*s > max[pos]) { |
---|
94 | max[pos] = *s; |
---|
95 | seq[pos] = c; |
---|
96 | } |
---|
97 | s++; |
---|
98 | } |
---|
99 | } |
---|
100 | free((char *)max); |
---|
101 | return seq; |
---|
102 | } |
---|
103 | |
---|
104 | |
---|
105 | int g_b_count_leafs(GB_CTREE *node){ |
---|
106 | if (node->is_leaf) return 1; |
---|
107 | node->gb_node = 0; |
---|
108 | return (g_b_count_leafs(node->leftson) + g_b_count_leafs(node->rightson)); |
---|
109 | } |
---|
110 | |
---|
111 | void g_b_put_sequences_in_container(GB_CTREE *ctree,GB_Sequence *seqs,GB_Master **masters,GB_Consensus *gcon){ |
---|
112 | if (ctree->is_leaf){ |
---|
113 | char *data; |
---|
114 | long len; |
---|
115 | if (ctree->index<0) return; |
---|
116 | g_b_counter_of_sequences_to_compress++; |
---|
117 | GB_status(g_b_counter_of_sequences_to_compress / g_b_number_of_sequences_to_compress); |
---|
118 | data = GB_read_char_pntr(seqs[ctree->index].gbd); |
---|
119 | len = GB_read_string_count(seqs[ctree->index].gbd); |
---|
120 | g_b_Consensus_add(gcon,(unsigned char *)data,len); |
---|
121 | return; |
---|
122 | } |
---|
123 | if (ctree->index<0){ |
---|
124 | g_b_put_sequences_in_container(ctree->leftson,seqs,masters,gcon); |
---|
125 | g_b_put_sequences_in_container(ctree->rightson,seqs,masters,gcon); |
---|
126 | return; |
---|
127 | } |
---|
128 | { |
---|
129 | char *data = GB_read_char_pntr(masters[ctree->index]->gbd); |
---|
130 | long len = GB_read_string_count(masters[ctree->index]->gbd); |
---|
131 | g_b_Consensus_add(gcon,(unsigned char *)data,len); |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | void g_b_create_master( GB_CTREE *node, |
---|
136 | GB_Sequence *seqs, |
---|
137 | GB_Master **masters, |
---|
138 | int my_master, |
---|
139 | const char *ali_name, long seq_len){ |
---|
140 | if (node->is_leaf){ |
---|
141 | GBDATA *gb_data; |
---|
142 | if (node->index < 0) return; |
---|
143 | gb_data = GBT_read_sequence(node->gb_node,(char *)ali_name); |
---|
144 | seqs[node->index].gbd = gb_data; |
---|
145 | seqs[node->index].master = my_master; |
---|
146 | return; |
---|
147 | } |
---|
148 | if (node->index>=0){ |
---|
149 | masters[node->index]->master = my_master; |
---|
150 | my_master = node->index; |
---|
151 | } |
---|
152 | g_b_create_master(node->leftson,seqs,masters,my_master,ali_name,seq_len); |
---|
153 | g_b_create_master(node->rightson,seqs,masters,my_master,ali_name,seq_len); |
---|
154 | if (node->index>=0){ /* build me */ |
---|
155 | char *data; |
---|
156 | GB_Consensus *gcon = g_b_new_Consensus(seq_len); |
---|
157 | g_b_put_sequences_in_container(node->leftson,seqs,masters,gcon); |
---|
158 | g_b_put_sequences_in_container(node->rightson,seqs,masters,gcon); |
---|
159 | data = g_b_Consensus_get_sequence(gcon); |
---|
160 | GB_write_string(masters[node->index]->gbd,data); |
---|
161 | GB_write_security_write(masters[node->index]->gbd,7); |
---|
162 | |
---|
163 | g_b_delete_Consensus(gcon); |
---|
164 | free(data); |
---|
165 | } |
---|
166 | } |
---|
167 | |
---|
168 | int g_b_set_masters_and_set_leafs(GB_CTREE *node, |
---|
169 | GB_Sequence *seqs, int *scount, |
---|
170 | GB_Master **masters, int *mcount, |
---|
171 | char *ali_name, |
---|
172 | GBDATA *master_folder ){ |
---|
173 | int sumsons; |
---|
174 | if (node->is_leaf){ |
---|
175 | if (node->gb_node ==0 || !GBT_read_sequence(node->gb_node,(char *)ali_name)){ |
---|
176 | node->index = -1; |
---|
177 | return 0; |
---|
178 | } |
---|
179 | |
---|
180 | node->index = *scount; |
---|
181 | (*scount)++; |
---|
182 | return 1; |
---|
183 | } |
---|
184 | node->index = -1; |
---|
185 | sumsons = |
---|
186 | g_b_set_masters_and_set_leafs( node->leftson, seqs,scount,masters,mcount,ali_name, master_folder) + |
---|
187 | g_b_set_masters_and_set_leafs( node->rightson, seqs,scount,masters,mcount,ali_name, master_folder); |
---|
188 | if (sumsons < MAX_SEQUENCE_PER_MASTER && node->father){ |
---|
189 | return sumsons; |
---|
190 | } |
---|
191 | /* Now we should create a master, there are enough sons */ |
---|
192 | node->index = *mcount; |
---|
193 | masters[*mcount] = (GB_Master *)GB_calloc(sizeof(GB_Master),1); |
---|
194 | masters[*mcount]->gbd = gb_create(master_folder,"@master",GB_STRING); |
---|
195 | (*mcount)++; |
---|
196 | return 1; |
---|
197 | } |
---|
198 | |
---|
199 | static GB_INLINE long g_b_read_number2(unsigned char **s) { |
---|
200 | register unsigned int c0,c1,c2,c3,c4; |
---|
201 | c0 = (*((*s)++)); |
---|
202 | if (c0 & 0x80){ |
---|
203 | c1 = (*((*s)++)); |
---|
204 | if (c0 & 0x40) { |
---|
205 | c2 = (*((*s)++)); |
---|
206 | if (c0 & 0x20) { |
---|
207 | c3 = (*((*s)++)); |
---|
208 | if (c0 &0x10) { |
---|
209 | c4 = (*((*s)++)); |
---|
210 | return c4 | (c3<<8) | (c2<<16) | (c1<<8); |
---|
211 | }else{ |
---|
212 | return (c3) | (c2<<8 ) | (c1<<16) | ((c0 & 0x0f)<<24); |
---|
213 | } |
---|
214 | }else{ |
---|
215 | return (c2) | (c1<<8) | ((c0 & 0x1f)<<16); |
---|
216 | } |
---|
217 | }else{ |
---|
218 | return (c1) | ((c0 & 0x3f)<<8); |
---|
219 | } |
---|
220 | }else{ |
---|
221 | return c0; |
---|
222 | } |
---|
223 | } |
---|
224 | |
---|
225 | static GB_INLINE void g_b_put_number2(int i, unsigned char **s) { |
---|
226 | register int j; |
---|
227 | if (i< 0x80 ){ *((*s)++)=i;return; } |
---|
228 | if (i<0x4000) { |
---|
229 | j = (i>>8) | 0x80; |
---|
230 | *((*s)++)=j; |
---|
231 | *((*s)++)=i; |
---|
232 | }else if (i<0x200000) { |
---|
233 | j = (i>>16) | 0xC0; |
---|
234 | *((*s)++)=j; |
---|
235 | j = (i>>8); |
---|
236 | *((*s)++)=j; |
---|
237 | *((*s)++)=i; |
---|
238 | } else if (i<0x10000000) { |
---|
239 | j = (i>>24) | 0xE0; |
---|
240 | *((*s)++)=j; |
---|
241 | j = (i>>16); |
---|
242 | *((*s)++)=j; |
---|
243 | j = (i>>8); |
---|
244 | *((*s)++)=j; |
---|
245 | *((*s)++)=i; |
---|
246 | } |
---|
247 | } |
---|
248 | |
---|
249 | |
---|
250 | char *gb_compress_seq_by_master(const char *master,int master_len,int master_index, |
---|
251 | GBQUARK q, const char *seq, long seq_len, |
---|
252 | long *memsize, int old_flag){ |
---|
253 | unsigned char *buffer; |
---|
254 | int rest = 0; |
---|
255 | register unsigned char *d; |
---|
256 | register int i,cs,cm; |
---|
257 | int last; |
---|
258 | long len = seq_len; |
---|
259 | d = buffer = (unsigned char *)GB_give_other_buffer(seq,seq_len); |
---|
260 | |
---|
261 | if (seq_len > master_len){ |
---|
262 | rest = seq_len - master_len; |
---|
263 | len = master_len; |
---|
264 | } |
---|
265 | |
---|
266 | last = -1000; /* Convert Sequence relative to Master */ |
---|
267 | for( i = len; i>0; i--){ |
---|
268 | cm = *(master++); |
---|
269 | cs = *(seq++); |
---|
270 | if (cm==cs && cs != last){ |
---|
271 | *(d++) = 0; |
---|
272 | last = 1000; |
---|
273 | }else{ |
---|
274 | *(d++) = cs; |
---|
275 | last = cs; |
---|
276 | } |
---|
277 | } |
---|
278 | for( i = rest; i>0; i--){ |
---|
279 | *(d++) = *(seq++); |
---|
280 | } |
---|
281 | |
---|
282 | { /* Append run length compression method */ |
---|
283 | unsigned char *buffer2; |
---|
284 | unsigned char *dest2; |
---|
285 | buffer2 = dest2 = (unsigned char *)GB_give_other_buffer((char *)buffer,seq_len+100); |
---|
286 | *(dest2++) = GB_COMPRESSION_SEQUENCE | old_flag; |
---|
287 | |
---|
288 | g_b_put_number2(master_index,&dest2); /* Tags */ |
---|
289 | g_b_put_number2(q,&dest2); |
---|
290 | |
---|
291 | gb_compress_equal_bytes_2((char *)buffer,seq_len,memsize,(char *)dest2); /* append runlength compressed sequences to tags */ |
---|
292 | |
---|
293 | *memsize = *memsize + (dest2-buffer2); |
---|
294 | return (char *)buffer2; |
---|
295 | } |
---|
296 | } |
---|
297 | |
---|
298 | char *gb_compress_sequence_by_master(GBDATA *gbd,const char *master,int master_len,int master_index, |
---|
299 | GBQUARK q, const char *seq, int seq_len, long *memsize){ |
---|
300 | char *is; |
---|
301 | char *res; |
---|
302 | long size; |
---|
303 | is = gb_compress_seq_by_master(master,master_len,master_index,q,seq,seq_len,&size,GB_COMPRESSION_LAST); |
---|
304 | res = gb_compress_data(gbd,0,is,size,memsize, ~(GB_COMPRESSION_DICTIONARY|GB_COMPRESSION_SORTBYTES|GB_COMPRESSION_RUNLENGTH),GB_TRUE); |
---|
305 | return res; |
---|
306 | } |
---|
307 | |
---|
308 | GB_ERROR GBT_compress_sequence_tree(GBDATA *gb_main, GB_CTREE *tree, const char *ali_name){ |
---|
309 | GB_ERROR error = 0; |
---|
310 | char *masterfoldername = 0; |
---|
311 | int leafcount = 0; |
---|
312 | int mastercount = 0; |
---|
313 | int seqcount = 0; |
---|
314 | int main_clock; |
---|
315 | GB_Sequence *seqs = 0; |
---|
316 | GB_Master **masters = 0; |
---|
317 | GBDATA *gb_master_ali; |
---|
318 | GBDATA *old_gb_master_ali; |
---|
319 | int q; |
---|
320 | GB_MAIN_TYPE *Main = GB_MAIN(gb_main); |
---|
321 | q = gb_key_2_quark(Main,ali_name); |
---|
322 | |
---|
323 | do { |
---|
324 | long seq_len = GBT_get_alignment_len(gb_main,(char *)ali_name); |
---|
325 | main_clock = GB_read_clock(gb_main); |
---|
326 | if (seq_len<0){ |
---|
327 | error = GB_export_error("Alignment '%s' is not a valid alignment",ali_name); |
---|
328 | return error; |
---|
329 | } |
---|
330 | leafcount = g_b_count_leafs(tree); |
---|
331 | if (!leafcount) return 0; |
---|
332 | |
---|
333 | g_b_number_of_sequences_to_compress = (1 + leafcount) * (2.0 + 1.0 / MAX_SEQUENCE_PER_MASTER); |
---|
334 | g_b_counter_of_sequences_to_compress = 0; |
---|
335 | seqs = (GB_Sequence *)GB_calloc(sizeof(GB_Sequence),leafcount); |
---|
336 | masters = (GB_Master **)GB_calloc(sizeof(void *),leafcount); |
---|
337 | masterfoldername = strdup(GBS_global_string("%s/@master_data/@%s",GB_SYSTEM_FOLDER,ali_name)); |
---|
338 | old_gb_master_ali = GB_search( gb_main, masterfoldername,GB_FIND); |
---|
339 | free(masterfoldername); |
---|
340 | { |
---|
341 | char *master_data_name = strdup(GBS_global_string("%s/@master_data",GB_SYSTEM_FOLDER)); |
---|
342 | char *master_name = strdup(GBS_global_string("@%s",ali_name)); |
---|
343 | GBDATA *gb_master_data = gb_search( gb_main, master_data_name,GB_CREATE_CONTAINER,1); |
---|
344 | gb_master_ali = gb_create_container( gb_master_data, master_name); /* create a master container always, |
---|
345 | the old is deleted as soon as all sequences are |
---|
346 | compressed by the new method*/ |
---|
347 | GB_write_security_delete(gb_master_ali,7); |
---|
348 | free(master_name); |
---|
349 | free(master_data_name); |
---|
350 | } |
---|
351 | GB_status(0); |
---|
352 | g_b_set_masters_and_set_leafs(tree,seqs,&seqcount,masters,&mastercount,(char *)ali_name,gb_master_ali); |
---|
353 | g_b_create_master(tree,seqs,masters,-1,ali_name,seq_len); |
---|
354 | } while(0); |
---|
355 | |
---|
356 | /* Now compress everything !!! */ |
---|
357 | { |
---|
358 | long sizes =0 ; |
---|
359 | long sizen = 0; |
---|
360 | char *seq; |
---|
361 | int seq_len; |
---|
362 | int si; |
---|
363 | char *ss; |
---|
364 | long sumorg = 0; |
---|
365 | long sumold = 0; |
---|
366 | long sumnew = 0; |
---|
367 | |
---|
368 | /* Compress sequences in tree */ |
---|
369 | for (si=0;si<seqcount;si++){ |
---|
370 | int mi = seqs[si].master; |
---|
371 | GB_Master *master = masters[mi]; |
---|
372 | GBDATA *gbd = seqs[si].gbd; |
---|
373 | |
---|
374 | char *seqm = GB_read_string(master->gbd); |
---|
375 | int master_len = GB_read_string_count(master->gbd); |
---|
376 | if (GB_read_clock(gbd) >= main_clock){ |
---|
377 | GB_warning("A Species seems to be more than once in the tree"); |
---|
378 | continue; /* Check for sequences which are more than once in the tree */ |
---|
379 | } |
---|
380 | |
---|
381 | seq = GB_read_string(gbd); |
---|
382 | seq_len = GB_read_string_count(gbd); |
---|
383 | sizen = GB_read_memuse(gbd); |
---|
384 | |
---|
385 | ss = gb_compress_sequence_by_master(gbd,seqm,master_len,mi,q,seq,seq_len,&sizes); |
---|
386 | gb_write_compressed_pntr(gbd,ss,sizes,seq_len); |
---|
387 | |
---|
388 | GB_status( (++g_b_counter_of_sequences_to_compress) / g_b_number_of_sequences_to_compress); |
---|
389 | |
---|
390 | sumorg += seq_len; |
---|
391 | sumold += sizen; |
---|
392 | sumnew+= sizes; |
---|
393 | GB_FREE(seqm); |
---|
394 | GB_FREE(seq); |
---|
395 | } |
---|
396 | GB_FREE(seqs); |
---|
397 | /* Compress rest of sequences */ |
---|
398 | { |
---|
399 | GBDATA *gb_species_data = GB_search(gb_main,"species_data",GB_CREATE_CONTAINER); |
---|
400 | GBDATA *gb_species; |
---|
401 | for (gb_species = GBT_first_species_rel_species_data(gb_species_data); |
---|
402 | gb_species; |
---|
403 | gb_species = GBT_next_species(gb_species)){ |
---|
404 | GBDATA *gb_data = GBT_read_sequence(gb_species,ali_name); |
---|
405 | char *data; |
---|
406 | if(!gb_data) continue; |
---|
407 | if (GB_read_clock(gb_data) >= main_clock) continue; /* Compress only those which are not compressed by masters */ |
---|
408 | data = GB_read_string(gb_data); |
---|
409 | GB_write_string(gb_data,""); |
---|
410 | GB_write_string(gb_data,data); |
---|
411 | GB_FREE(data); |
---|
412 | } |
---|
413 | } |
---|
414 | |
---|
415 | /* Compress all masters */ |
---|
416 | for (si=0;si<mastercount;si++){ |
---|
417 | int mi = masters[si]->master; |
---|
418 | GB_Master *master; |
---|
419 | GBDATA *gbd; |
---|
420 | char *seqm; |
---|
421 | int master_len; |
---|
422 | |
---|
423 | if (mi>0) { /* master available */ |
---|
424 | ad_assert(mi>si); /* we don't want a rekursion, because we cannot uncompress sequence compressed masters, Main->gb_master_data is wrong */ |
---|
425 | master = masters[mi]; |
---|
426 | gbd = masters[si]->gbd; |
---|
427 | seqm = GB_read_string(master->gbd); |
---|
428 | master_len = GB_read_string_count(master->gbd); |
---|
429 | if (gb_read_nr(gbd) != si) { /* Check database */ |
---|
430 | GB_internal_error("Sequence Compression: Master Index Conflict"); |
---|
431 | error = GB_export_error("Sequence Compression: Master Index Conflict"); |
---|
432 | break; |
---|
433 | } |
---|
434 | |
---|
435 | seq = GB_read_string(gbd); |
---|
436 | seq_len = GB_read_string_count(gbd); |
---|
437 | sizen = GB_read_memuse(gbd); |
---|
438 | ss = gb_compress_sequence_by_master(gbd,seqm,master_len,mi,q,seq,seq_len,&sizes); |
---|
439 | gb_write_compressed_pntr(gbd,ss,sizes,seq_len); |
---|
440 | |
---|
441 | g_b_counter_of_sequences_to_compress++; |
---|
442 | GB_status(g_b_counter_of_sequences_to_compress / g_b_number_of_sequences_to_compress); |
---|
443 | |
---|
444 | sumnew+= sizes; |
---|
445 | free(seqm); |
---|
446 | free(seq); |
---|
447 | } |
---|
448 | GB_FREE(masters[si]); |
---|
449 | |
---|
450 | } |
---|
451 | GB_FREE(masters); |
---|
452 | GB_warning("Alignment '%s': Sum Orig %6lik Old %5lik New %5lik",ali_name,sumorg/1024,sumold/1024,sumnew/1024); |
---|
453 | |
---|
454 | } |
---|
455 | if(!error){ |
---|
456 | if (old_gb_master_ali){ |
---|
457 | error = GB_delete(old_gb_master_ali); |
---|
458 | } |
---|
459 | Main->keys[q].gb_master_ali = gb_master_ali; |
---|
460 | } |
---|
461 | return error; |
---|
462 | } |
---|
463 | |
---|
464 | /* Compress sequences, call only outside a transaction */ |
---|
465 | GB_ERROR GBT_compress_sequence_tree2(GBDATA *gb_main, const char *tree_name, const char *ali_name){ |
---|
466 | GB_ERROR error = 0; |
---|
467 | GB_CTREE *ctree; |
---|
468 | GB_UNDO_TYPE undo_type = GB_get_requested_undo_type(gb_main); |
---|
469 | GB_MAIN_TYPE *Main = GB_MAIN(gb_main); |
---|
470 | char *to_free = 0; |
---|
471 | if (Main->transaction>0){ |
---|
472 | GB_internal_error("Internal Error: Compress Sequences called during a running transacton"); |
---|
473 | return GB_export_error("Internal Error: Compress Sequences called during a running transacton"); |
---|
474 | } |
---|
475 | GB_request_undo_type(gb_main,GB_UNDO_KILL); |
---|
476 | GB_begin_transaction(gb_main); |
---|
477 | GB_push_my_security(gb_main); |
---|
478 | |
---|
479 | if (!tree_name || !strlen(tree_name)) { |
---|
480 | to_free = (char *)(tree_name = (const char *)GBT_find_largest_tree(gb_main)); |
---|
481 | } |
---|
482 | ctree = (GB_CTREE *)GBT_read_tree(gb_main,(char *)tree_name,-sizeof(GB_CTREE)); |
---|
483 | if (!ctree) return GB_export_error("Tree %s not found in database",tree_name); |
---|
484 | error = GBT_link_tree((GBT_TREE *)ctree,gb_main,GB_FALSE); |
---|
485 | if (!error) error = GBT_compress_sequence_tree(gb_main,ctree,ali_name); |
---|
486 | GBT_delete_tree((GBT_TREE *)ctree); |
---|
487 | GB_pop_my_security(gb_main); |
---|
488 | if (error){ |
---|
489 | GB_abort_transaction(gb_main); |
---|
490 | }else{ |
---|
491 | GB_commit_transaction(gb_main); |
---|
492 | } |
---|
493 | GB_disable_quicksave(gb_main,"Database optimized"); |
---|
494 | GB_request_undo_type(gb_main,undo_type); |
---|
495 | if (to_free) free(to_free); |
---|
496 | return error; |
---|
497 | } |
---|
498 | |
---|
499 | void GBT_compression_test(void *dummy, GBDATA *gb_main){ |
---|
500 | GB_ERROR error; |
---|
501 | char *ali_name; |
---|
502 | GBUSE(dummy); |
---|
503 | GB_begin_transaction(gb_main); |
---|
504 | ali_name = GBT_get_default_alignment(gb_main); |
---|
505 | error = GBT_compress_sequence_tree2(gb_main,"tree_test",ali_name); |
---|
506 | if (error){ |
---|
507 | GB_warning("%s",error); |
---|
508 | GB_abort_transaction(gb_main); |
---|
509 | }else{ |
---|
510 | GB_commit_transaction(gb_main); |
---|
511 | } |
---|
512 | } |
---|
513 | |
---|
514 | |
---|
515 | |
---|
516 | /* ******************** Decompress Sequences ******************** */ |
---|
517 | /* ******************** Decompress Sequences ******************** */ |
---|
518 | /* ******************** Decompress Sequences ******************** */ |
---|
519 | char *g_b_uncompress_single_sequence_by_master(const char *s, const char *master, long size) |
---|
520 | { |
---|
521 | register const signed char *source = (signed char *)s; |
---|
522 | register char *dest; |
---|
523 | register const char *m = master; |
---|
524 | register unsigned int c; |
---|
525 | register int j; |
---|
526 | int i; |
---|
527 | char *buffer; |
---|
528 | |
---|
529 | dest = buffer = GB_give_other_buffer((char *)source,size); |
---|
530 | |
---|
531 | for (i=size;i;) { |
---|
532 | j = *(source++); |
---|
533 | if (j>0) { /* uncompressed data block */ |
---|
534 | if (j>i) j=i; |
---|
535 | i -= j; |
---|
536 | for (;j;j--) { |
---|
537 | c = *(source++); |
---|
538 | if (!c) c = *m; |
---|
539 | *(dest++) = c; |
---|
540 | m++; |
---|
541 | } |
---|
542 | }else{ /* equal bytes compressed */ |
---|
543 | if (!j) break; /* end symbol */ |
---|
544 | if (j== -122) { |
---|
545 | j = *(source++) & 0xff; |
---|
546 | j |= ((*(source++)) <<8) &0xff00; |
---|
547 | j = -j; |
---|
548 | } |
---|
549 | c = *(source++); |
---|
550 | i += j; |
---|
551 | if (i<0) { |
---|
552 | GB_internal_error("Internal Error: Missing end in data"); |
---|
553 | j += -i; |
---|
554 | i = 0; |
---|
555 | } |
---|
556 | if (c==0){ |
---|
557 | for (;j<-4;j+=4){ /* copy 4 bytes at a time */ |
---|
558 | int a,b; |
---|
559 | a = m[0]; |
---|
560 | b = m[1]; dest[0] = a; |
---|
561 | a = m[2]; dest[1] = b; |
---|
562 | b = m[3]; dest[2] = a; |
---|
563 | dest[3] = b; |
---|
564 | m+= 4; |
---|
565 | dest += 4; |
---|
566 | } |
---|
567 | for (;j;j++) *(dest++) = *(m++); |
---|
568 | }else{ |
---|
569 | m -= j; |
---|
570 | if (j<-16){ /* set multiple bytes */ |
---|
571 | int k; |
---|
572 | c &= 0xff; /* copy c to upper bytes */ |
---|
573 | c |= (c<<8); |
---|
574 | c |= (c<<16); |
---|
575 | j = -j; |
---|
576 | if ( ((long)dest) & 1) { |
---|
577 | *(dest++) = c; |
---|
578 | j--; |
---|
579 | } |
---|
580 | if ( ((long)dest) &2){ |
---|
581 | *((short *)dest) = c; |
---|
582 | dest += 2; |
---|
583 | j-=2; |
---|
584 | } |
---|
585 | |
---|
586 | k = j&3; |
---|
587 | j = j>>2; |
---|
588 | for (;j;j--){ |
---|
589 | *((GB_UINT4 *)dest) = c; |
---|
590 | dest += sizeof(GB_UINT4); |
---|
591 | } |
---|
592 | j = k; |
---|
593 | for (;j;j--) *(dest++) = c; |
---|
594 | }else{ |
---|
595 | for (;j;j++) *(dest++) = c; |
---|
596 | } |
---|
597 | } |
---|
598 | } |
---|
599 | } |
---|
600 | *(dest++) = 0; /* NULL of NULL terminated string */ |
---|
601 | ad_assert(dest - buffer == size); |
---|
602 | return buffer; |
---|
603 | } |
---|
604 | |
---|
605 | char *gb_uncompress_by_sequence(GBDATA *gbd, const char *s,long size, GB_ERROR *error){ |
---|
606 | register char *dest; |
---|
607 | GB_MAIN_TYPE *Main; |
---|
608 | GBDATA *gb_main; |
---|
609 | GBDATA *gb_master; |
---|
610 | char *master; |
---|
611 | GBQUARK quark; |
---|
612 | int index; |
---|
613 | long master_size; |
---|
614 | char *to_free; |
---|
615 | |
---|
616 | *error = 0; |
---|
617 | |
---|
618 | if (!GB_FATHER(gbd)) { |
---|
619 | *error = "Cannot uncompress this sequence: Sequence has no father"; |
---|
620 | return 0; |
---|
621 | } |
---|
622 | |
---|
623 | Main = GB_MAIN(gbd); |
---|
624 | gb_main = (GBDATA *)Main->data; |
---|
625 | |
---|
626 | to_free = gb_check_out_buffer(s); /* Get our own buffer, maybe load_single_key_data will destroy it */ |
---|
627 | |
---|
628 | index = g_b_read_number2((unsigned char **)&s); |
---|
629 | quark = g_b_read_number2((unsigned char **)&s); |
---|
630 | |
---|
631 | if (!Main->keys[quark].gb_master_ali){ |
---|
632 | gb_load_single_key_data(gb_main,quark); |
---|
633 | } |
---|
634 | if (!Main->keys[quark].gb_master_ali){ |
---|
635 | *error = "Cannot uncompress this sequence: Cannot find a master sequence"; |
---|
636 | return 0; |
---|
637 | } |
---|
638 | |
---|
639 | gb_master = gb_find_by_nr(Main->keys[quark].gb_master_ali,index); |
---|
640 | if (!gb_master){ |
---|
641 | return (char *)GB_get_error(); |
---|
642 | } |
---|
643 | master_size = GB_read_string_count(gb_master); |
---|
644 | master = GB_read_char_pntr(gb_master); /* make sure that this is not a buffer !!! */ |
---|
645 | dest = g_b_uncompress_single_sequence_by_master(s,master,size); |
---|
646 | if (to_free) free(to_free); |
---|
647 | return (char *)dest; |
---|
648 | } |
---|