1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : adcompr.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include <climits> |
---|
12 | |
---|
13 | #include <arbdbt.h> |
---|
14 | |
---|
15 | #include "gb_key.h" |
---|
16 | #include "gb_t_prot.h" |
---|
17 | #include "gb_compress.h" |
---|
18 | #include "gb_localdata.h" |
---|
19 | |
---|
20 | #include <stdint.h> |
---|
21 | |
---|
22 | #if defined(DEBUG) |
---|
23 | // #define TEST_HUFFMAN_CODE |
---|
24 | #endif // DEBUG |
---|
25 | |
---|
26 | #define GBTUM_COMPRESS_TREE_SIZE 32 |
---|
27 | |
---|
28 | |
---|
29 | #define GB_READ_BIT(p, c, bp, result) if (!bp) { c = *(p++); bp = 8; }; result = (c>>7); result &= 1; c <<= 1; bp-- |
---|
30 | |
---|
31 | #define GB_INIT_WRITE_BITS(p, bp) *p = 0; bp = 8 |
---|
32 | |
---|
33 | #define GB_WRITE_BITS(p, bp, bitc, bits, i) \ |
---|
34 | if (bp<=0) { bp += 8; p++; *p = 0; } \ |
---|
35 | if ((i=bp-bitc)<0) { \ |
---|
36 | *p |= bits>>(-i); \ |
---|
37 | i += 8; \ |
---|
38 | bp += 8; p++; *p = 0; \ |
---|
39 | } \ |
---|
40 | *p |= bits<<i; \ |
---|
41 | bp-=bitc; |
---|
42 | |
---|
43 | #define GB_READ_BITS(p, c, bp, bitc, bits) { \ |
---|
44 | long _i; \ |
---|
45 | int _r; \ |
---|
46 | bits = 0; \ |
---|
47 | for (_i=bitc-1; _i>=0; _i--) { \ |
---|
48 | GB_READ_BIT(p, c, bp, _r); \ |
---|
49 | bits = (bits<<1) + _r; \ |
---|
50 | } } |
---|
51 | |
---|
52 | |
---|
53 | __ATTR__USERESULT static GB_ERROR gb_check_huffmann_tree(gb_compress_tree *t) { |
---|
54 | if (t->leaf) |
---|
55 | return 0; |
---|
56 | if (!t->son[0]) |
---|
57 | return GB_export_error("Database entry corrupt (zero left son)"); |
---|
58 | if (!t->son[1]) |
---|
59 | return GB_export_error("Database entry corrupt (zero right son)"); |
---|
60 | |
---|
61 | { |
---|
62 | GB_ERROR err = gb_check_huffmann_tree(t->son[0]); |
---|
63 | if (err) return err; |
---|
64 | } |
---|
65 | return gb_check_huffmann_tree(t->son[1]); |
---|
66 | } |
---|
67 | |
---|
68 | #if defined(TEST_HUFFMAN_CODE) |
---|
69 | static void gb_dump_huffmann_tree(gb_compress_tree *t, const char *prefix) { |
---|
70 | if (t->leaf) { |
---|
71 | long command = (long)t->son[1]; |
---|
72 | printf("%s", prefix); |
---|
73 | |
---|
74 | switch (command) { |
---|
75 | case GB_CS_END: printf(" [GB_CS_END]\n"); break; |
---|
76 | case GB_CS_ID: printf(" [GB_CS_ID]\n"); break; |
---|
77 | case GB_CS_OK: { |
---|
78 | long val = (long)t->son[0]; |
---|
79 | printf(": value=%li (='%c')\n", val, (char)val); |
---|
80 | break; |
---|
81 | } |
---|
82 | default: { |
---|
83 | long val = (long)t->son[0]; |
---|
84 | printf(" other command (%li) value=%li (='%c')\n", command, val, (char)val); |
---|
85 | break; |
---|
86 | } |
---|
87 | } |
---|
88 | } |
---|
89 | else { |
---|
90 | int len = strlen(prefix); |
---|
91 | char *my_prefix = ARB_alloc<char>(len+2); |
---|
92 | strcpy(my_prefix, prefix); |
---|
93 | my_prefix[len+1] = 0; |
---|
94 | my_prefix[len] = '0'; |
---|
95 | gb_dump_huffmann_tree(t->son[0], my_prefix); |
---|
96 | my_prefix[len] = '1'; |
---|
97 | gb_dump_huffmann_tree(t->son[1], my_prefix); |
---|
98 | free(my_prefix); |
---|
99 | } |
---|
100 | } |
---|
101 | static void gb_dump_huffmann_list(gb_compress_list *bc, const char *prefix) { |
---|
102 | if (bc->command == GB_CD_NODE) { |
---|
103 | int len = strlen(prefix); |
---|
104 | char *my_prefix = ARB_alloc<char>(len+2); |
---|
105 | strcpy(my_prefix, prefix); |
---|
106 | my_prefix[len+1] = 0; |
---|
107 | my_prefix[len] = '0'; |
---|
108 | gb_dump_huffmann_list(bc->son[0], my_prefix); |
---|
109 | my_prefix[len] = '1'; |
---|
110 | gb_dump_huffmann_list(bc->son[1], my_prefix); |
---|
111 | free(my_prefix); |
---|
112 | } |
---|
113 | else { |
---|
114 | printf("%s value=%i (='%c') count=%li\n", prefix, bc->value, (char)bc->value, bc->count); |
---|
115 | } |
---|
116 | } |
---|
117 | |
---|
118 | #endif // TEST_HUFFMAN_CODE |
---|
119 | |
---|
120 | gb_compress_tree *gb_build_uncompress_tree(const unsigned char *data, long short_flag, char **end) { |
---|
121 | gb_compress_tree *Main, *t; |
---|
122 | long bits, mask, i; |
---|
123 | const unsigned char *p; |
---|
124 | GB_ERROR error; |
---|
125 | |
---|
126 | Main = (gb_compress_tree *)gbm_get_mem(sizeof(gb_compress_tree), GBM_CB_INDEX); |
---|
127 | for (p=data; *p; p+=3+short_flag) { |
---|
128 | bits = p[0]; |
---|
129 | mask = 0x80; |
---|
130 | for (i=7; i; i--, mask=mask>>1) if (mask & bits) break; // find first bit |
---|
131 | if (!i) { |
---|
132 | GB_internal_error("Data corrupt"); |
---|
133 | return 0; |
---|
134 | } |
---|
135 | t = Main; |
---|
136 | for (; i; i--) { |
---|
137 | if (t->leaf) { |
---|
138 | GB_export_error("Corrupt data !!!"); |
---|
139 | return 0; |
---|
140 | } |
---|
141 | mask = mask>>1; |
---|
142 | if (mask & bits) { |
---|
143 | if (!t->son[1]) { |
---|
144 | t->son[1] = (gb_compress_tree *)gbm_get_mem(sizeof(gb_compress_tree), GBM_CB_INDEX); |
---|
145 | } |
---|
146 | t=t->son[1]; |
---|
147 | } |
---|
148 | else { |
---|
149 | if (!t->son[0]) { |
---|
150 | t->son[0] = (gb_compress_tree *)gbm_get_mem(sizeof(gb_compress_tree), GBM_CB_INDEX); |
---|
151 | } |
---|
152 | t=t->son[0]; |
---|
153 | } |
---|
154 | } |
---|
155 | if (t->leaf) { |
---|
156 | GB_export_error("Corrupt data !!!"); |
---|
157 | return 0; |
---|
158 | } |
---|
159 | t->leaf = 1; |
---|
160 | if (short_flag) { |
---|
161 | t->son[0] = (gb_compress_tree *)(long)((p[2]<<8)+p[3]); |
---|
162 | } |
---|
163 | else { |
---|
164 | t->son[0] = (gb_compress_tree *)(long)(p[2]); |
---|
165 | } |
---|
166 | t->son[1] = (gb_compress_tree *)(long)p[1]; // command |
---|
167 | } |
---|
168 | if (end) *end = ((char *)p)+1; |
---|
169 | if ((error = gb_check_huffmann_tree(Main))) { |
---|
170 | GB_internal_errorf("%s", error); |
---|
171 | gb_free_compress_tree(Main); |
---|
172 | return 0; |
---|
173 | } |
---|
174 | |
---|
175 | #if defined(TEST_HUFFMAN_CODE) |
---|
176 | printf("Huffman tree:\n"); |
---|
177 | gb_dump_huffmann_tree(Main, ""); |
---|
178 | #endif // TEST_HUFFMAN_CODE |
---|
179 | |
---|
180 | return Main; |
---|
181 | } |
---|
182 | |
---|
183 | void gb_free_compress_tree(gb_compress_tree *tree) { |
---|
184 | if (tree) { |
---|
185 | if (!tree->leaf) { |
---|
186 | if (tree->son[0]) gb_free_compress_tree(tree->son[0]); |
---|
187 | if (tree->son[1])gb_free_compress_tree(tree->son[1]); |
---|
188 | } |
---|
189 | } |
---|
190 | gbm_free_mem(tree, sizeof(gb_compress_tree), GBM_CB_INDEX); |
---|
191 | } |
---|
192 | |
---|
193 | gb_compress_list *gb_build_compress_list(const unsigned char *data, long short_flag, long *size) { |
---|
194 | int i, maxi, bitc; |
---|
195 | int val, bits, mask, value; |
---|
196 | const unsigned char *p; |
---|
197 | |
---|
198 | enum gb_compress_list_commands command = (enum gb_compress_list_commands)0; |
---|
199 | |
---|
200 | maxi = 0; |
---|
201 | val = bits = mask = value = bitc = 0; |
---|
202 | for (p=data; *p; p+=3+short_flag) { |
---|
203 | i = (p[2]); |
---|
204 | if (short_flag) { |
---|
205 | i = (i<<8)+p[3]; |
---|
206 | } |
---|
207 | if (i>maxi) maxi = i; |
---|
208 | } |
---|
209 | *size = maxi; |
---|
210 | |
---|
211 | gb_compress_list *list = ARB_calloc<gb_compress_list>(maxi+1); |
---|
212 | |
---|
213 | maxi = 0; |
---|
214 | val = -1; |
---|
215 | |
---|
216 | for (p=data; *p; p+=3+short_flag) { |
---|
217 | val = (p[2]); |
---|
218 | if (short_flag) val = (val<<8)+p[3]; |
---|
219 | |
---|
220 | for (i=maxi; i<val; i++) { |
---|
221 | list[i].command = command; |
---|
222 | list[i].mask = mask; |
---|
223 | list[i].bitcnt = bitc; |
---|
224 | list[i].bits = bits; |
---|
225 | list[i].value = value; |
---|
226 | } |
---|
227 | maxi = val; |
---|
228 | |
---|
229 | command = (enum gb_compress_list_commands)(p[1]); |
---|
230 | bits = p[0]; |
---|
231 | mask = 0x80; |
---|
232 | for (bitc=7; bitc; bitc--, mask=mask>>1) if (mask & bits) break; // find first bit |
---|
233 | mask = 0xff>>(8-bitc); |
---|
234 | bits = bits & mask; |
---|
235 | value = val; |
---|
236 | } |
---|
237 | i = val; |
---|
238 | list[i].command = command; |
---|
239 | list[i].mask = mask; |
---|
240 | list[i].bitcnt = bitc; |
---|
241 | list[i].bits = bits; |
---|
242 | list[i].value = value; |
---|
243 | return list; |
---|
244 | } |
---|
245 | |
---|
246 | // ------------------------ |
---|
247 | // bit compression |
---|
248 | |
---|
249 | |
---|
250 | char *gb_compress_bits(const char *source, long size, const unsigned char *c_0, long *msize) |
---|
251 | { |
---|
252 | const unsigned char *s = (const unsigned char *)source; |
---|
253 | char *buffer = GB_give_other_buffer(source, size); |
---|
254 | char *dest = buffer; |
---|
255 | |
---|
256 | long i, j; |
---|
257 | int bitptr, bits, bitc; |
---|
258 | int zo_flag = 0; |
---|
259 | long len; |
---|
260 | int h_i, command; |
---|
261 | int isNull[256]; |
---|
262 | |
---|
263 | for (i = 0; i<256; ++i) isNull[i] = 0; |
---|
264 | for (i = 0; c_0[i]; ++i) isNull[c_0[i]] = 1; |
---|
265 | |
---|
266 | GB_INIT_WRITE_BITS(dest, bitptr); |
---|
267 | for (len = size, i = 0; len; len--) { |
---|
268 | if (zo_flag == isNull[*(s++)]) { |
---|
269 | zo_flag = 1 - zo_flag; |
---|
270 | for (command = GB_CS_SUB; command != GB_CS_OK;) { |
---|
271 | if (i>gb_local->bc_size) j = gb_local->bc_size; else j = i; |
---|
272 | bits = gb_local->bitcompress[j].bits; |
---|
273 | bitc = gb_local->bitcompress[j].bitcnt; |
---|
274 | command = gb_local->bitcompress[j].command; |
---|
275 | i -= gb_local->bitcompress[j].value; |
---|
276 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
---|
277 | } |
---|
278 | i = 0; |
---|
279 | } |
---|
280 | i++; |
---|
281 | } |
---|
282 | for (command = GB_CS_SUB; command != GB_CS_OK;) { |
---|
283 | if (i>gb_local->bc_size) j = gb_local->bc_size; else j = i; |
---|
284 | bits = gb_local->bitcompress[j].bits; |
---|
285 | bitc = gb_local->bitcompress[j].bitcnt; |
---|
286 | command = gb_local->bitcompress[j].command; |
---|
287 | i -= gb_local->bitcompress[j].value; |
---|
288 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
---|
289 | } |
---|
290 | *msize = dest - buffer + 1; |
---|
291 | return buffer; |
---|
292 | } |
---|
293 | |
---|
294 | |
---|
295 | GB_BUFFER gb_uncompress_bits(const char *source, long size, char c_0, char c_1) { |
---|
296 | gb_compress_tree *Main = gb_local->bituncompress; |
---|
297 | |
---|
298 | uint8_t ch = 0; |
---|
299 | |
---|
300 | long bitp = 0; |
---|
301 | char outc = c_0; |
---|
302 | |
---|
303 | const uint8_t *s = (const uint8_t*)(source); |
---|
304 | |
---|
305 | char *p = GB_give_other_buffer(source, size+1); |
---|
306 | char *buffer = p; |
---|
307 | |
---|
308 | for (long pos = 0; pos<size;) { |
---|
309 | long lastpos = pos; |
---|
310 | for (long command = GB_CS_SUB; command != GB_CS_OK;) { |
---|
311 | gb_compress_tree *t; |
---|
312 | for (t = Main; !t->leaf;) { |
---|
313 | int bit; |
---|
314 | GB_READ_BIT(s, ch, bitp, bit); |
---|
315 | t = t->son[bit]; |
---|
316 | } |
---|
317 | command = (long) t->son[1]; |
---|
318 | pos += (long)t->son[0]; |
---|
319 | } |
---|
320 | for (; lastpos<pos; lastpos++) *(p++) = outc; |
---|
321 | if (outc==c_0) outc=c_1; else outc=c_0; |
---|
322 | } |
---|
323 | *p = 0; |
---|
324 | return buffer; |
---|
325 | } |
---|
326 | |
---|
327 | |
---|
328 | |
---|
329 | /* Runlength encoding produces.. |
---|
330 | * |
---|
331 | * from source: string |
---|
332 | * dest: [command]+ |
---|
333 | * |
---|
334 | * command data meaning |
---|
335 | * |
---|
336 | * -122 low high byte 'byte' was repeated 'low'+256*'high' times |
---|
337 | * -119..-1 byte 'byte' was repeated -'command' times |
---|
338 | * 0 --- end |
---|
339 | * 1..119 [byte]+ 'command' data bytes follow |
---|
340 | */ |
---|
341 | |
---|
342 | static char *g_b_write_run(char *dest, int scount, int lastbyte) { |
---|
343 | while (scount> 0xffff) { |
---|
344 | *(dest++) = 0x100-122; |
---|
345 | *(dest++) = 0xff; |
---|
346 | *(dest++) = 0xff; |
---|
347 | *(dest++) = lastbyte; |
---|
348 | scount -= 0xffff; |
---|
349 | } |
---|
350 | if (scount >250) { |
---|
351 | *(dest++) = 0x100-122; |
---|
352 | *(dest++) = scount & 0xff; |
---|
353 | *(dest++) = (scount >> 8) & 0xff; |
---|
354 | *(dest++) = lastbyte; |
---|
355 | return dest; |
---|
356 | } |
---|
357 | |
---|
358 | while (scount>120) { // 120 blocks |
---|
359 | *(dest++) = 0x100 - 120; |
---|
360 | *(dest++) = lastbyte; |
---|
361 | scount -= 120; |
---|
362 | } |
---|
363 | |
---|
364 | if (scount) { // and rest |
---|
365 | *(dest++) = -scount; |
---|
366 | *(dest++) = lastbyte; |
---|
367 | } |
---|
368 | return dest; |
---|
369 | } |
---|
370 | |
---|
371 | #define GB_COPY_NONRUN(dest, source, len) \ |
---|
372 | while (len > 120) { \ |
---|
373 | int _i = 120; \ |
---|
374 | char *_p; \ |
---|
375 | len -= _i; \ |
---|
376 | *(dest++) = _i; \ |
---|
377 | _p = dest; dest+=_i; \ |
---|
378 | while (_i--) { \ |
---|
379 | *(_p++) = *(source++); \ |
---|
380 | } \ |
---|
381 | } \ |
---|
382 | if (len >0) { \ |
---|
383 | int _i = len; \ |
---|
384 | char *_p; \ |
---|
385 | len = 0; \ |
---|
386 | *(dest++) = _i; \ |
---|
387 | _p = dest; dest+=_i; \ |
---|
388 | while (_i--) { \ |
---|
389 | *(_p++) = *(source++); \ |
---|
390 | } \ |
---|
391 | } |
---|
392 | |
---|
393 | void gb_compress_equal_bytes_2(const char *source, size_t size, size_t *msize, char *dest) { |
---|
394 | long i; // length counter |
---|
395 | int last, rbyte; // next and akt value |
---|
396 | long scount; // same count; count equal bytes |
---|
397 | char *buffer = dest; |
---|
398 | const char *sourcenequal; // begin of non equal section |
---|
399 | long hi; // to temporary store i |
---|
400 | int hsize; |
---|
401 | |
---|
402 | sourcenequal = source; |
---|
403 | rbyte = *(source ++); |
---|
404 | last = -1; |
---|
405 | for (i=size-1; i;) { |
---|
406 | if (rbyte!=last) { // Search an equal pair |
---|
407 | last = rbyte; |
---|
408 | rbyte = *(source ++); i--; |
---|
409 | continue; |
---|
410 | } // last rbyte *(source) |
---|
411 | hi = i+1; |
---|
412 | while (i) { // get equal bytes |
---|
413 | rbyte = *(source ++); i--; |
---|
414 | if (rbyte!=last) break; |
---|
415 | } |
---|
416 | scount = hi-i; // number of equal bytes ; at end e.b.-1 |
---|
417 | if (scount <= GB_RUNLENGTH_SIZE) continue; // less than 4-> no runlength compression |
---|
418 | |
---|
419 | /* compression !!! |
---|
420 | * 1. copy unequal bytes |
---|
421 | * 2. fill rest |
---|
422 | */ |
---|
423 | |
---|
424 | hsize = source - sourcenequal - scount -1; // hsize of non equal bytes |
---|
425 | source = sourcenequal; |
---|
426 | hi = i; // i=unequal length |
---|
427 | GB_COPY_NONRUN(dest, source, hsize); |
---|
428 | dest = g_b_write_run(dest, scount, last); |
---|
429 | source += scount; // now equal bytes |
---|
430 | |
---|
431 | sourcenequal = source++; // no rbyte written yet |
---|
432 | last = rbyte; |
---|
433 | i = hi; |
---|
434 | if (i) { |
---|
435 | rbyte = *(source++); i--; |
---|
436 | } |
---|
437 | } |
---|
438 | // and now rest which is not compressed |
---|
439 | hsize = source - sourcenequal; |
---|
440 | source = sourcenequal; |
---|
441 | GB_COPY_NONRUN(dest, source, hsize); |
---|
442 | |
---|
443 | *(dest++) = 0; // end of data |
---|
444 | *msize = dest-buffer; |
---|
445 | if (*msize >size*9/8) printf("ssize %d, dsize %d\n", (int)size, (int)*msize); |
---|
446 | } |
---|
447 | |
---|
448 | static GB_BUFFER gb_compress_equal_bytes(const char *source, size_t size, size_t *msize, int last_flag) { |
---|
449 | char *dest; |
---|
450 | char *buffer; |
---|
451 | |
---|
452 | dest = buffer = GB_give_other_buffer(source, size*9/8); |
---|
453 | *(dest++) = GB_COMPRESSION_RUNLENGTH | last_flag; |
---|
454 | gb_compress_equal_bytes_2(source, size, msize, dest); |
---|
455 | (*msize) ++; // Tag byte |
---|
456 | return buffer; |
---|
457 | } |
---|
458 | |
---|
459 | struct huffmann_list { |
---|
460 | huffmann_list *next; |
---|
461 | long val; |
---|
462 | gb_compress_list *element; |
---|
463 | }; |
---|
464 | |
---|
465 | static huffmann_list *huffmann_listhead = NULL; |
---|
466 | |
---|
467 | static void gb_compress_huffmann_add_to_list(long val, gb_compress_list *element) { |
---|
468 | huffmann_list *dat, *search, *searchlast; |
---|
469 | |
---|
470 | dat = (huffmann_list *)gbm_get_mem(sizeof(huffmann_list), GBM_CB_INDEX); |
---|
471 | dat->val = val; |
---|
472 | dat->element = element; |
---|
473 | |
---|
474 | searchlast = 0; |
---|
475 | for (search = huffmann_listhead; search; search = search->next) { |
---|
476 | if (val<search->val) break; |
---|
477 | searchlast = search; |
---|
478 | } |
---|
479 | if (huffmann_listhead) { |
---|
480 | if (searchlast) { |
---|
481 | dat->next = searchlast->next; |
---|
482 | searchlast->next = dat; |
---|
483 | } |
---|
484 | else { |
---|
485 | dat->next = huffmann_listhead; |
---|
486 | huffmann_listhead = dat; |
---|
487 | } |
---|
488 | } |
---|
489 | else { |
---|
490 | huffmann_listhead = dat; |
---|
491 | } |
---|
492 | } |
---|
493 | |
---|
494 | static long gb_compress_huffmann_pop(long *val, gb_compress_list **element) { |
---|
495 | huffmann_list *dat; |
---|
496 | if ((dat = huffmann_listhead)) { |
---|
497 | huffmann_listhead = dat->next; |
---|
498 | *val = dat->val; |
---|
499 | *element = dat->element; |
---|
500 | gbm_free_mem(dat, sizeof(huffmann_list), GBM_CB_INDEX); |
---|
501 | return 1; |
---|
502 | } |
---|
503 | else { |
---|
504 | GB_internal_error("huffman compression failed"); |
---|
505 | return 0; |
---|
506 | } |
---|
507 | } |
---|
508 | |
---|
509 | static char *gb_compress_huffmann_rek(gb_compress_list *bc, int bits, int bitcnt, char *dest) { |
---|
510 | if (bc->command == GB_CD_NODE) { |
---|
511 | dest = gb_compress_huffmann_rek(bc->son[0], (bits<<1), bitcnt+1, dest); |
---|
512 | dest = gb_compress_huffmann_rek(bc->son[1], (bits<<1)+1, bitcnt+1, dest); |
---|
513 | gbm_free_mem(bc, sizeof(gb_compress_list), GBM_CB_INDEX); |
---|
514 | return dest; |
---|
515 | } |
---|
516 | else { |
---|
517 | *(dest++) = bits; |
---|
518 | *(dest++) = bc->command; |
---|
519 | *(dest++) = bc->value; |
---|
520 | bc->bitcnt = bitcnt; |
---|
521 | bc->mask = 0xff>>(8-bitcnt); |
---|
522 | bc->bits = bits&bc->mask; |
---|
523 | return dest; |
---|
524 | } |
---|
525 | } |
---|
526 | |
---|
527 | static GB_BUFFER gb_compress_huffmann(GB_CBUFFER source, size_t size, size_t *msize, int last_flag) { |
---|
528 | const int COMPRESS_LIST_SIZE = 257; |
---|
529 | gb_compress_list bitcompress[COMPRESS_LIST_SIZE]; |
---|
530 | |
---|
531 | memset((char *)(&bitcompress[0]), 0, sizeof(gb_compress_list)*COMPRESS_LIST_SIZE); |
---|
532 | |
---|
533 | char *buffer = GB_give_other_buffer(source, size*2+3*GBTUM_COMPRESS_TREE_SIZE+1); |
---|
534 | char *dest = buffer; |
---|
535 | *(dest++) = GB_COMPRESSION_HUFFMANN | last_flag; |
---|
536 | |
---|
537 | long id = 0; |
---|
538 | |
---|
539 | { |
---|
540 | long level; |
---|
541 | long i; |
---|
542 | long restcount; |
---|
543 | |
---|
544 | long vali[2] = { 0, 0 }; |
---|
545 | |
---|
546 | gb_compress_list *element1 = 0; |
---|
547 | gb_compress_list *element2 = 0; |
---|
548 | gb_compress_list *bc = 0; |
---|
549 | |
---|
550 | unsigned char *s = (unsigned char *)source; |
---|
551 | for (long len = size; len; len--) { |
---|
552 | bitcompress[*(s++)].count++; |
---|
553 | } |
---|
554 | level = size/GBTUM_COMPRESS_TREE_SIZE; |
---|
555 | restcount = 0; |
---|
556 | for (i=0; i<256; i++) { |
---|
557 | bitcompress[i].value = (int)i; |
---|
558 | if (bitcompress[i].count>level) { |
---|
559 | gb_compress_huffmann_add_to_list(bitcompress[i].count, &bitcompress[i]); |
---|
560 | bitcompress[i].command = GB_CS_OK; |
---|
561 | } |
---|
562 | else { |
---|
563 | restcount += bitcompress[i].count; |
---|
564 | bitcompress[i].count = 0; |
---|
565 | id = i; |
---|
566 | bitcompress[i].command = GB_CS_ID; |
---|
567 | } |
---|
568 | } |
---|
569 | bitcompress[COMPRESS_LIST_SIZE-1].command = GB_CS_END; |
---|
570 | |
---|
571 | gb_compress_huffmann_add_to_list(restcount, &bitcompress[id]); |
---|
572 | gb_compress_huffmann_add_to_list(1, &bitcompress[COMPRESS_LIST_SIZE-1]); |
---|
573 | while (huffmann_listhead->next) { |
---|
574 | gb_compress_huffmann_pop(&(vali[0]), &element1); |
---|
575 | gb_compress_huffmann_pop(&(vali[1]), &element2); |
---|
576 | |
---|
577 | bc = (gb_compress_list *)gbm_get_mem(sizeof(gb_compress_list), GBM_CB_INDEX); |
---|
578 | bc->command = GB_CD_NODE; |
---|
579 | bc->son[0] = element1; |
---|
580 | bc->son[1] = element2; |
---|
581 | |
---|
582 | if (element1->command == GB_CD_NODE) { |
---|
583 | bc->bits = element1->bits+1; |
---|
584 | if (element2->command == GB_CD_NODE && element2->bits >= bc->bits) bc->bits = element2->bits+1; |
---|
585 | } |
---|
586 | else { |
---|
587 | if (element2->command == GB_CD_NODE) { |
---|
588 | bc->bits = element2->bits+1; |
---|
589 | } |
---|
590 | else { |
---|
591 | bc->bits = 1; |
---|
592 | } |
---|
593 | } |
---|
594 | |
---|
595 | gb_assert(bc->bits <= 7); // max. 7 bits allowed |
---|
596 | |
---|
597 | // if already 6 bits used -> put to end of list; otherwise sort in |
---|
598 | gb_compress_huffmann_add_to_list(bc->bits >= 6 ? LONG_MAX : vali[0]+vali[1], bc); |
---|
599 | } |
---|
600 | gb_compress_huffmann_pop(&(vali[0]), &element1); |
---|
601 | #if defined(TEST_HUFFMAN_CODE) |
---|
602 | printf("huffman list:\n"); |
---|
603 | gb_dump_huffmann_list(bc, ""); |
---|
604 | #endif // TEST_HUFFMAN_CODE |
---|
605 | dest = gb_compress_huffmann_rek(bc, 1, 0, dest); |
---|
606 | *(dest++) = 0; |
---|
607 | } |
---|
608 | |
---|
609 | gb_compress_list *pbid = &bitcompress[id]; |
---|
610 | unsigned char *s = (unsigned char *)source; |
---|
611 | { |
---|
612 | int bitptr, bits, bitc; |
---|
613 | |
---|
614 | GB_INIT_WRITE_BITS(dest, bitptr); |
---|
615 | int h_i; |
---|
616 | for (long len = size; len; len--) { |
---|
617 | int val = *(s++); |
---|
618 | int command = bitcompress[val].command; |
---|
619 | if (command == GB_CS_OK) { |
---|
620 | gb_compress_list *pbc = &bitcompress[val]; |
---|
621 | bits = pbc->bits; |
---|
622 | bitc = pbc->bitcnt; |
---|
623 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
---|
624 | } |
---|
625 | else { |
---|
626 | bits = pbid->bits; |
---|
627 | bitc = pbid->bitcnt; |
---|
628 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
---|
629 | |
---|
630 | bits = val; |
---|
631 | bitc = 8; |
---|
632 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
---|
633 | } |
---|
634 | } |
---|
635 | bits = bitcompress[COMPRESS_LIST_SIZE-1].bits; |
---|
636 | bitc = bitcompress[COMPRESS_LIST_SIZE-1].bitcnt; |
---|
637 | // cppcheck-suppress unreadVariable (bitptr is assigned but never used) |
---|
638 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
---|
639 | } |
---|
640 | *msize = dest - buffer + 1; |
---|
641 | #if defined(TEST_HUFFMAN_CODE) |
---|
642 | printf("huffman compression %zu -> %zu (%5.2f %%)\n", size, *msize, (double)((double)(*msize)/size*100)); |
---|
643 | #endif // TEST_HUFFMAN_CODE |
---|
644 | if (*msize >size*2) printf("ssize %d, size %d\n", (int)size, (int)*msize); |
---|
645 | return buffer; |
---|
646 | } |
---|
647 | |
---|
648 | |
---|
649 | static GB_BUFFER gb_uncompress_equal_bytes(GB_CBUFFER s, size_t size, size_t *new_size) { |
---|
650 | const signed char *source = (signed char*)s; |
---|
651 | char *dest; |
---|
652 | unsigned int c; |
---|
653 | long j; |
---|
654 | long i, k; |
---|
655 | char *buffer; |
---|
656 | |
---|
657 | dest = buffer = GB_give_other_buffer((char *)source, size); |
---|
658 | |
---|
659 | #if defined(DEBUG) && 0 |
---|
660 | printf("gb_uncompress_equal_bytes(size=%li):\n", size); |
---|
661 | #endif // DEBUG |
---|
662 | |
---|
663 | for (i=size; i;) { |
---|
664 | j = *(source++); |
---|
665 | #if defined(DEBUG) && 0 |
---|
666 | printf("size=%li (code=%i)\n", i, (int)j); |
---|
667 | #endif // DEBUG |
---|
668 | if (j>0) { // uncompressed data block |
---|
669 | if (j>i) j=i; |
---|
670 | i -= j; |
---|
671 | for (; j; j--) { |
---|
672 | *(dest++) = (char)(*(source++)); |
---|
673 | } |
---|
674 | } |
---|
675 | else { // equal bytes compressed |
---|
676 | if (!j) break; // end symbol |
---|
677 | if (j == -122) { |
---|
678 | j = *(source++) & 0xff; |
---|
679 | j |= ((*(source++)) << 8) &0xff00; |
---|
680 | j = -j; |
---|
681 | } |
---|
682 | c = *(source++); |
---|
683 | i += j; |
---|
684 | if (i<0) { |
---|
685 | j += -i; |
---|
686 | i = 0; |
---|
687 | } |
---|
688 | if (j<-30) { // set multiple bytes |
---|
689 | j = -j; |
---|
690 | if (((long)dest) & 1) { |
---|
691 | *(dest++) = c; |
---|
692 | j--; |
---|
693 | } |
---|
694 | if (((long)dest) &2) { |
---|
695 | *(dest++) = c; |
---|
696 | *(dest++) = c; |
---|
697 | j-=2; |
---|
698 | } |
---|
699 | c &= 0xff; // copy c to upper bytes |
---|
700 | c |= (c<<8); |
---|
701 | c |= (c<<16); |
---|
702 | k = j&3; |
---|
703 | j = j>>2; |
---|
704 | for (; j; j--) { |
---|
705 | *((GB_UINT4 *)dest) = c; |
---|
706 | dest += sizeof(GB_UINT4); |
---|
707 | } |
---|
708 | j = k; |
---|
709 | for (; j; j--) *(dest++) = c; |
---|
710 | } |
---|
711 | else { |
---|
712 | for (; j; j++) *(dest++) = c; |
---|
713 | } |
---|
714 | } |
---|
715 | } |
---|
716 | |
---|
717 | *new_size = dest-buffer; |
---|
718 | gb_assert(size >= *new_size); // buffer overflow |
---|
719 | |
---|
720 | return buffer; |
---|
721 | } |
---|
722 | |
---|
723 | static GB_BUFFER gb_uncompress_huffmann(GB_CBUFFER source, size_t maxsize, size_t *new_size) { |
---|
724 | gb_compress_tree *un_tree, *t; |
---|
725 | |
---|
726 | char *data[1]; |
---|
727 | char *p, *buffer; |
---|
728 | long bitp; |
---|
729 | uint8_t ch = 0, *s; |
---|
730 | long val, command; |
---|
731 | |
---|
732 | un_tree = gb_build_uncompress_tree((const unsigned char *)source, 0, data); |
---|
733 | if (!un_tree) return 0; |
---|
734 | |
---|
735 | bitp = 0; |
---|
736 | buffer = p = GB_give_other_buffer(source, maxsize); |
---|
737 | s = (uint8_t*)data[0]; |
---|
738 | while (1) { |
---|
739 | for (t = un_tree; !t->leaf;) { |
---|
740 | int bit; |
---|
741 | GB_READ_BIT(s, ch, bitp, bit); |
---|
742 | t = t->son[bit]; |
---|
743 | } |
---|
744 | command = (long) t->son[1]; |
---|
745 | if (command == GB_CS_END) break; |
---|
746 | if (command == GB_CS_ID) { |
---|
747 | GB_READ_BITS(s, ch, bitp, 8, val); |
---|
748 | } |
---|
749 | else { |
---|
750 | val = (long) t->son[0]; |
---|
751 | } |
---|
752 | *(p++) = (int)val; |
---|
753 | } |
---|
754 | gb_free_compress_tree(un_tree); |
---|
755 | |
---|
756 | *new_size = p-buffer; |
---|
757 | gb_assert(maxsize >= *new_size); // buffer overflow |
---|
758 | |
---|
759 | return buffer; |
---|
760 | } |
---|
761 | |
---|
762 | GB_BUFFER gb_uncompress_bytes(GB_CBUFFER source, size_t size, size_t *new_size) { |
---|
763 | char *data = gb_uncompress_huffmann(source, size, new_size); |
---|
764 | if (data) data = gb_uncompress_equal_bytes(data, size, new_size); |
---|
765 | gb_assert(!data || size >= *new_size); // buffer overflow |
---|
766 | return data; |
---|
767 | } |
---|
768 | |
---|
769 | // ------------------------------------------------- |
---|
770 | // Compress long and floats (4 byte values) |
---|
771 | |
---|
772 | |
---|
773 | GB_BUFFER gb_uncompress_longs_old(GB_CBUFFER source, size_t size, size_t *new_size) |
---|
774 | { // size is byte value |
---|
775 | char *res = 0; |
---|
776 | char *data = gb_uncompress_huffmann(source, (size*9)/8, new_size); |
---|
777 | if (data) { |
---|
778 | char *p, *s0, *s1, *s2, *s3; |
---|
779 | GB_UINT4 mi, i; |
---|
780 | |
---|
781 | data = gb_uncompress_equal_bytes(data, size, new_size); |
---|
782 | |
---|
783 | gb_assert(*new_size == size); |
---|
784 | res = p = GB_give_other_buffer(data, size); |
---|
785 | |
---|
786 | STATIC_ASSERT(sizeof(GB_UINT4) == 4); |
---|
787 | |
---|
788 | mi = (GB_UINT4)(size / 4); |
---|
789 | s0 = data + 0 * mi; |
---|
790 | s1 = data + 1 * mi; |
---|
791 | s2 = data + 2 * mi; |
---|
792 | s3 = data + 3 * mi; |
---|
793 | |
---|
794 | for (i = 0; i < mi; i++) { |
---|
795 | *(p++) = *(s0++); |
---|
796 | *(p++) = *(s1++); |
---|
797 | *(p++) = *(s2++); |
---|
798 | *(p++) = *(s3++); |
---|
799 | } |
---|
800 | |
---|
801 | *new_size = mi*4; |
---|
802 | } |
---|
803 | return res; |
---|
804 | } |
---|
805 | |
---|
806 | |
---|
807 | static GB_BUFFER gb_uncompress_longs(GB_CBUFFER data, size_t size, size_t *new_size) { |
---|
808 | const char *s0, *s1, *s2, *s3; |
---|
809 | char *p, *res; |
---|
810 | long mi, i; |
---|
811 | |
---|
812 | res = p = GB_give_other_buffer(data, size); |
---|
813 | |
---|
814 | STATIC_ASSERT(sizeof(GB_UINT4) == 4); |
---|
815 | |
---|
816 | mi = size / 4; |
---|
817 | s0 = data + 0 * mi; |
---|
818 | s1 = data + 1 * mi; |
---|
819 | s2 = data + 2 * mi; |
---|
820 | s3 = data + 3 * mi; |
---|
821 | |
---|
822 | for (i = 0; i < mi; i++) { |
---|
823 | *(p++) = *(s0++); |
---|
824 | *(p++) = *(s1++); |
---|
825 | *(p++) = *(s2++); |
---|
826 | *(p++) = *(s3++); |
---|
827 | } |
---|
828 | |
---|
829 | *new_size = mi*4; |
---|
830 | return res; |
---|
831 | } |
---|
832 | |
---|
833 | static GB_BUFFER gb_compress_longs(GB_CBUFFER source, long size, int last_flag) { |
---|
834 | long mi, i; |
---|
835 | const char *p; |
---|
836 | char *s0, *s1, *s2, *s3; |
---|
837 | char *dest = GB_give_other_buffer(source, size+1); |
---|
838 | |
---|
839 | mi = size/4; |
---|
840 | p = source; |
---|
841 | *(dest) = GB_COMPRESSION_SORTBYTES | last_flag; |
---|
842 | s0 = dest + 0*mi + 1; |
---|
843 | s1 = dest + 1*mi + 1; |
---|
844 | s2 = dest + 2*mi + 1; |
---|
845 | s3 = dest + 3*mi + 1; |
---|
846 | for (i=0; i<mi; i++) { |
---|
847 | *(s0++) = *(p++); |
---|
848 | *(s1++) = *(p++); |
---|
849 | *(s2++) = *(p++); |
---|
850 | *(s3++) = *(p++); |
---|
851 | } |
---|
852 | return dest; |
---|
853 | } |
---|
854 | |
---|
855 | // ----------------------------- |
---|
856 | // Dictionary Functions |
---|
857 | |
---|
858 | GB_DICTIONARY * gb_get_dictionary(GB_MAIN_TYPE *Main, GBQUARK key) { |
---|
859 | gb_Key *ks = &Main->keys[key]; |
---|
860 | if (ks->gb_key_disabled) return 0; |
---|
861 | if (!ks->gb_key) { |
---|
862 | gb_load_single_key_data(Main->gb_main(), key); |
---|
863 | if (Main->gb_key_data && !ks->gb_key) { |
---|
864 | GB_internal_error("Couldn't load gb_key"); |
---|
865 | } |
---|
866 | } |
---|
867 | return Main->keys[key].dictionary; |
---|
868 | } |
---|
869 | |
---|
870 | bool GB_is_dictionary_compressed(GBDATA *gbd) { |
---|
871 | if (gbd->is_entry()) { |
---|
872 | GBENTRY *gbe = gbd->as_entry(); |
---|
873 | const char *data = gbe->data(); |
---|
874 | |
---|
875 | if (data) { |
---|
876 | if (gbe->flags.compressed_data) { |
---|
877 | size_t size = gbe->uncompressed_size(); |
---|
878 | int last = 0; |
---|
879 | GB_ERROR error = 0; |
---|
880 | size_t new_size = -1; |
---|
881 | |
---|
882 | while (!last) { |
---|
883 | int c = *((unsigned char *)(data++)); |
---|
884 | |
---|
885 | if (c & GB_COMPRESSION_LAST) { |
---|
886 | last = 1; |
---|
887 | c &= ~GB_COMPRESSION_LAST; |
---|
888 | } |
---|
889 | |
---|
890 | if (c == GB_COMPRESSION_DICTIONARY) { |
---|
891 | return true; |
---|
892 | } |
---|
893 | |
---|
894 | if (c == GB_COMPRESSION_HUFFMANN) { |
---|
895 | data = gb_uncompress_huffmann(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
---|
896 | } |
---|
897 | else if (c == GB_COMPRESSION_RUNLENGTH) { |
---|
898 | data = gb_uncompress_equal_bytes(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
---|
899 | } |
---|
900 | else if (c == GB_COMPRESSION_SEQUENCE) { |
---|
901 | data = gb_uncompress_by_sequence(gbe, data, size, &error, &new_size); |
---|
902 | } |
---|
903 | else if (c == GB_COMPRESSION_SORTBYTES) { |
---|
904 | data = gb_uncompress_longs(data, size, &new_size); |
---|
905 | } |
---|
906 | else { |
---|
907 | error = GB_export_errorf("Internal Error: Cannot uncompress data of field '%s'", GB_read_key_pntr(gbe)); |
---|
908 | } |
---|
909 | |
---|
910 | if (error) { |
---|
911 | GB_internal_error(error); |
---|
912 | break; |
---|
913 | } |
---|
914 | } |
---|
915 | } |
---|
916 | } |
---|
917 | } |
---|
918 | return false; |
---|
919 | } |
---|
920 | |
---|
921 | // ----------------------------------- |
---|
922 | // Top compression algorithms |
---|
923 | |
---|
924 | GB_BUFFER gb_compress_data(GBDATA *gbd, int key, GB_CBUFFER source, size_t size, size_t *msize, GB_COMPRESSION_MASK max_compr, bool pre_compressed) { |
---|
925 | // Compress a data string. |
---|
926 | // |
---|
927 | // Returns: |
---|
928 | // - NULL if no compression makes sense |
---|
929 | // - otherwise returns compressed data and stores its size in 'msize' |
---|
930 | |
---|
931 | char *data; |
---|
932 | int last_flag = GB_COMPRESSION_LAST; |
---|
933 | |
---|
934 | if (pre_compressed) { |
---|
935 | last_flag = 0; |
---|
936 | } |
---|
937 | |
---|
938 | gb_assert(1); |
---|
939 | |
---|
940 | if (max_compr & GB_COMPRESSION_SORTBYTES) { |
---|
941 | source = gb_compress_longs(source, size, last_flag); |
---|
942 | last_flag = 0; |
---|
943 | size++; // @@@ OLI |
---|
944 | } |
---|
945 | else if (max_compr & GB_COMPRESSION_DICTIONARY) { |
---|
946 | GB_MAIN_TYPE *Main = GB_MAIN(gbd); |
---|
947 | GB_DICTIONARY *dict; |
---|
948 | if (!key) { |
---|
949 | key = GB_KEY_QUARK(gbd); |
---|
950 | } |
---|
951 | dict = gb_get_dictionary(Main, key); |
---|
952 | if (dict) { |
---|
953 | size_t real_size = size-(gbd->type()==GB_STRING); // for strings w/o trailing zero; @@@ or use is_a_string()? |
---|
954 | |
---|
955 | if (real_size) { |
---|
956 | data = gb_compress_by_dictionary(dict, source, real_size, msize, last_flag, 9999, 3); |
---|
957 | |
---|
958 | if ((*msize<=10 && size>10) || *msize < size*7/8) { // successful compression |
---|
959 | source = data; |
---|
960 | size = *msize; |
---|
961 | last_flag = 0; |
---|
962 | } |
---|
963 | } |
---|
964 | } |
---|
965 | |
---|
966 | } |
---|
967 | if (max_compr & GB_COMPRESSION_RUNLENGTH && size > GB_RUNLENGTH_MIN_SIZE) { |
---|
968 | data = gb_compress_equal_bytes(source, size, msize, last_flag); |
---|
969 | if (*msize < size-10 && *msize < size*7/8) { // successful compression |
---|
970 | source = data; |
---|
971 | size = *msize; |
---|
972 | last_flag = 0; |
---|
973 | } |
---|
974 | } |
---|
975 | |
---|
976 | if (max_compr & GB_COMPRESSION_HUFFMANN && size > GB_HUFFMAN_MIN_SIZE) { |
---|
977 | data = gb_compress_huffmann(source, size, msize, last_flag); |
---|
978 | if (*msize < size-10 && *msize < size*7/8) { // successful compression |
---|
979 | source = data; |
---|
980 | size = *msize; |
---|
981 | last_flag = 0; |
---|
982 | } |
---|
983 | } |
---|
984 | |
---|
985 | *msize = size; |
---|
986 | |
---|
987 | if (last_flag) return 0; // no compression |
---|
988 | return (char *)source; |
---|
989 | } |
---|
990 | |
---|
991 | GB_CBUFFER gb_uncompress_data(GBDATA *gbd, GB_CBUFFER source, size_t size) { |
---|
992 | int last = 0; |
---|
993 | const char *data = (char *)source; |
---|
994 | size_t new_size = -1; |
---|
995 | GB_ERROR error = 0; |
---|
996 | |
---|
997 | while (!last) { |
---|
998 | int c = *((unsigned char *)(data++)); |
---|
999 | if (c & GB_COMPRESSION_LAST) { |
---|
1000 | last = 1; |
---|
1001 | c &= ~GB_COMPRESSION_LAST; |
---|
1002 | } |
---|
1003 | if (c == GB_COMPRESSION_HUFFMANN) { |
---|
1004 | data = gb_uncompress_huffmann(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
---|
1005 | } |
---|
1006 | else if (c == GB_COMPRESSION_RUNLENGTH) { |
---|
1007 | data = gb_uncompress_equal_bytes(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
---|
1008 | } |
---|
1009 | else if (c == GB_COMPRESSION_DICTIONARY) { |
---|
1010 | data = gb_uncompress_by_dictionary(gbd, data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
---|
1011 | } |
---|
1012 | else if (c == GB_COMPRESSION_SEQUENCE) { |
---|
1013 | data = gb_uncompress_by_sequence(gbd, data, size, &error, &new_size); |
---|
1014 | } |
---|
1015 | else if (c == GB_COMPRESSION_SORTBYTES) { |
---|
1016 | data = gb_uncompress_longs(data, size, &new_size); |
---|
1017 | } |
---|
1018 | else { |
---|
1019 | error = GBS_global_string("Internal Error: Cannot uncompress data of field '%s'", GB_read_key_pntr(gbd)); |
---|
1020 | } |
---|
1021 | |
---|
1022 | if (!data && !error) error = GB_await_error(); |
---|
1023 | if (error) last = 1; // sth went wrong, stop |
---|
1024 | } |
---|
1025 | |
---|
1026 | if (!error && new_size != size) { |
---|
1027 | error = GBS_global_string("Wrong decompressed size (expected=%zi, got=%zi)", size, new_size); |
---|
1028 | } |
---|
1029 | |
---|
1030 | if (error) { |
---|
1031 | GB_export_error(error); |
---|
1032 | data = 0; |
---|
1033 | } |
---|
1034 | |
---|
1035 | return data; |
---|
1036 | } |
---|
1037 | |
---|