| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : adcompr.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #include <climits> |
|---|
| 12 | |
|---|
| 13 | #include <arbdbt.h> |
|---|
| 14 | |
|---|
| 15 | #include "gb_key.h" |
|---|
| 16 | #include "gb_t_prot.h" |
|---|
| 17 | #include "gb_compress.h" |
|---|
| 18 | #include "gb_localdata.h" |
|---|
| 19 | |
|---|
| 20 | #include <stdint.h> |
|---|
| 21 | |
|---|
| 22 | #if defined(DEBUG) |
|---|
| 23 | // #define TEST_HUFFMAN_CODE |
|---|
| 24 | #endif // DEBUG |
|---|
| 25 | |
|---|
| 26 | #define GBTUM_COMPRESS_TREE_SIZE 32 |
|---|
| 27 | |
|---|
| 28 | |
|---|
| 29 | #define GB_READ_BIT(p, c, bp, result) if (!bp) { c = *(p++); bp = 8; }; result = (c>>7); result &= 1; c <<= 1; bp-- |
|---|
| 30 | |
|---|
| 31 | #define GB_INIT_WRITE_BITS(p, bp) *p = 0; bp = 8 |
|---|
| 32 | |
|---|
| 33 | #define GB_WRITE_BITS(p, bp, bitc, bits, i) \ |
|---|
| 34 | if (bp<=0) { bp += 8; p++; *p = 0; } \ |
|---|
| 35 | if ((i=bp-bitc)<0) { \ |
|---|
| 36 | *p |= bits>>(-i); \ |
|---|
| 37 | i += 8; \ |
|---|
| 38 | bp += 8; p++; *p = 0; \ |
|---|
| 39 | } \ |
|---|
| 40 | *p |= bits<<i; \ |
|---|
| 41 | bp-=bitc; |
|---|
| 42 | |
|---|
| 43 | #define GB_READ_BITS(p, c, bp, bitc, bits) { \ |
|---|
| 44 | long _i; \ |
|---|
| 45 | int _r; \ |
|---|
| 46 | bits = 0; \ |
|---|
| 47 | for (_i=bitc-1; _i>=0; _i--) { \ |
|---|
| 48 | GB_READ_BIT(p, c, bp, _r); \ |
|---|
| 49 | bits = (bits<<1) + _r; \ |
|---|
| 50 | } } |
|---|
| 51 | |
|---|
| 52 | |
|---|
| 53 | __ATTR__USERESULT static GB_ERROR gb_check_huffmann_tree(gb_compress_tree *t) { |
|---|
| 54 | if (t->leaf) |
|---|
| 55 | return NULp; |
|---|
| 56 | if (!t->son[0]) |
|---|
| 57 | return GB_export_error("Database entry corrupt (zero left son)"); |
|---|
| 58 | if (!t->son[1]) |
|---|
| 59 | return GB_export_error("Database entry corrupt (zero right son)"); |
|---|
| 60 | |
|---|
| 61 | { |
|---|
| 62 | GB_ERROR err = gb_check_huffmann_tree(t->son[0]); |
|---|
| 63 | if (err) return err; |
|---|
| 64 | } |
|---|
| 65 | return gb_check_huffmann_tree(t->son[1]); |
|---|
| 66 | } |
|---|
| 67 | |
|---|
| 68 | #if defined(TEST_HUFFMAN_CODE) |
|---|
| 69 | static void gb_dump_huffmann_tree(gb_compress_tree *t, const char *prefix) { |
|---|
| 70 | if (t->leaf) { |
|---|
| 71 | long command = (long)t->son[1]; |
|---|
| 72 | printf("%s", prefix); |
|---|
| 73 | |
|---|
| 74 | switch (command) { |
|---|
| 75 | case GB_CS_END: printf(" [GB_CS_END]\n"); break; |
|---|
| 76 | case GB_CS_ID: printf(" [GB_CS_ID]\n"); break; |
|---|
| 77 | case GB_CS_OK: { |
|---|
| 78 | long val = (long)t->son[0]; |
|---|
| 79 | printf(": value=%li (='%c')\n", val, (char)val); |
|---|
| 80 | break; |
|---|
| 81 | } |
|---|
| 82 | default: { |
|---|
| 83 | long val = (long)t->son[0]; |
|---|
| 84 | printf(" other command (%li) value=%li (='%c')\n", command, val, (char)val); |
|---|
| 85 | break; |
|---|
| 86 | } |
|---|
| 87 | } |
|---|
| 88 | } |
|---|
| 89 | else { |
|---|
| 90 | int len = strlen(prefix); |
|---|
| 91 | char *my_prefix = ARB_alloc<char>(len+2); |
|---|
| 92 | strcpy(my_prefix, prefix); |
|---|
| 93 | my_prefix[len+1] = 0; |
|---|
| 94 | my_prefix[len] = '0'; |
|---|
| 95 | gb_dump_huffmann_tree(t->son[0], my_prefix); |
|---|
| 96 | my_prefix[len] = '1'; |
|---|
| 97 | gb_dump_huffmann_tree(t->son[1], my_prefix); |
|---|
| 98 | free(my_prefix); |
|---|
| 99 | } |
|---|
| 100 | } |
|---|
| 101 | static void gb_dump_huffmann_list(gb_compress_list *bc, const char *prefix) { |
|---|
| 102 | if (bc->command == GB_CD_NODE) { |
|---|
| 103 | int len = strlen(prefix); |
|---|
| 104 | char *my_prefix = ARB_alloc<char>(len+2); |
|---|
| 105 | strcpy(my_prefix, prefix); |
|---|
| 106 | my_prefix[len+1] = 0; |
|---|
| 107 | my_prefix[len] = '0'; |
|---|
| 108 | gb_dump_huffmann_list(bc->son[0], my_prefix); |
|---|
| 109 | my_prefix[len] = '1'; |
|---|
| 110 | gb_dump_huffmann_list(bc->son[1], my_prefix); |
|---|
| 111 | free(my_prefix); |
|---|
| 112 | } |
|---|
| 113 | else { |
|---|
| 114 | printf("%s value=%i (='%c') count=%li\n", prefix, bc->value, (char)bc->value, bc->count); |
|---|
| 115 | } |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | #endif // TEST_HUFFMAN_CODE |
|---|
| 119 | |
|---|
| 120 | gb_compress_tree *gb_build_uncompress_tree(const unsigned char *data, long short_flag, char **end) { |
|---|
| 121 | gb_compress_tree *Main, *t; |
|---|
| 122 | long bits, mask, i; |
|---|
| 123 | const unsigned char *p; |
|---|
| 124 | GB_ERROR error; |
|---|
| 125 | |
|---|
| 126 | Main = (gb_compress_tree *)gbm_get_mem(sizeof(gb_compress_tree), GBM_CB_INDEX); |
|---|
| 127 | for (p=data; *p; p+=3+short_flag) { |
|---|
| 128 | bits = p[0]; |
|---|
| 129 | mask = 0x80; |
|---|
| 130 | for (i=7; i; i--, mask=mask>>1) if (mask & bits) break; // find first bit |
|---|
| 131 | if (!i) { |
|---|
| 132 | GB_internal_error("Data corrupt"); |
|---|
| 133 | return NULp; |
|---|
| 134 | } |
|---|
| 135 | t = Main; |
|---|
| 136 | for (; i; i--) { |
|---|
| 137 | if (t->leaf) { |
|---|
| 138 | GB_export_error("Corrupt data !!!"); |
|---|
| 139 | return NULp; |
|---|
| 140 | } |
|---|
| 141 | mask = mask>>1; |
|---|
| 142 | if (mask & bits) { |
|---|
| 143 | if (!t->son[1]) { |
|---|
| 144 | t->son[1] = (gb_compress_tree *)gbm_get_mem(sizeof(gb_compress_tree), GBM_CB_INDEX); |
|---|
| 145 | } |
|---|
| 146 | t=t->son[1]; |
|---|
| 147 | } |
|---|
| 148 | else { |
|---|
| 149 | if (!t->son[0]) { |
|---|
| 150 | t->son[0] = (gb_compress_tree *)gbm_get_mem(sizeof(gb_compress_tree), GBM_CB_INDEX); |
|---|
| 151 | } |
|---|
| 152 | t=t->son[0]; |
|---|
| 153 | } |
|---|
| 154 | } |
|---|
| 155 | if (t->leaf) { |
|---|
| 156 | GB_export_error("Corrupt data !!!"); |
|---|
| 157 | return NULp; |
|---|
| 158 | } |
|---|
| 159 | t->leaf = 1; |
|---|
| 160 | if (short_flag) { |
|---|
| 161 | t->son[0] = (gb_compress_tree *)(long)((p[2]<<8)+p[3]); |
|---|
| 162 | } |
|---|
| 163 | else { |
|---|
| 164 | t->son[0] = (gb_compress_tree *)(long)(p[2]); |
|---|
| 165 | } |
|---|
| 166 | t->son[1] = (gb_compress_tree *)(long)p[1]; // command |
|---|
| 167 | } |
|---|
| 168 | if (end) *end = ((char *)p)+1; |
|---|
| 169 | if ((error = gb_check_huffmann_tree(Main))) { |
|---|
| 170 | GB_internal_errorf("%s", error); |
|---|
| 171 | gb_free_compress_tree(Main); |
|---|
| 172 | return NULp; |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | #if defined(TEST_HUFFMAN_CODE) |
|---|
| 176 | printf("Huffman tree:\n"); |
|---|
| 177 | gb_dump_huffmann_tree(Main, ""); |
|---|
| 178 | #endif // TEST_HUFFMAN_CODE |
|---|
| 179 | |
|---|
| 180 | return Main; |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | void gb_free_compress_tree(gb_compress_tree *tree) { |
|---|
| 184 | if (tree) { |
|---|
| 185 | if (!tree->leaf) { |
|---|
| 186 | if (tree->son[0]) gb_free_compress_tree(tree->son[0]); |
|---|
| 187 | if (tree->son[1])gb_free_compress_tree(tree->son[1]); |
|---|
| 188 | } |
|---|
| 189 | } |
|---|
| 190 | gbm_free_mem(tree, sizeof(gb_compress_tree), GBM_CB_INDEX); |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | gb_compress_list *gb_build_compress_list(const unsigned char *data, long short_flag, long *size) { |
|---|
| 194 | int i, maxi, bitc; |
|---|
| 195 | int val, bits, mask, value; |
|---|
| 196 | const unsigned char *p; |
|---|
| 197 | |
|---|
| 198 | enum gb_compress_list_commands command = (enum gb_compress_list_commands)0; |
|---|
| 199 | |
|---|
| 200 | maxi = 0; |
|---|
| 201 | val = bits = mask = value = bitc = 0; |
|---|
| 202 | for (p=data; *p; p+=3+short_flag) { |
|---|
| 203 | i = (p[2]); |
|---|
| 204 | if (short_flag) { |
|---|
| 205 | i = (i<<8)+p[3]; |
|---|
| 206 | } |
|---|
| 207 | if (i>maxi) maxi = i; |
|---|
| 208 | } |
|---|
| 209 | *size = maxi; |
|---|
| 210 | |
|---|
| 211 | gb_compress_list *list = ARB_calloc<gb_compress_list>(maxi+1); |
|---|
| 212 | |
|---|
| 213 | maxi = 0; |
|---|
| 214 | val = -1; |
|---|
| 215 | |
|---|
| 216 | for (p=data; *p; p+=3+short_flag) { |
|---|
| 217 | val = (p[2]); |
|---|
| 218 | if (short_flag) val = (val<<8)+p[3]; |
|---|
| 219 | |
|---|
| 220 | for (i=maxi; i<val; i++) { // LOOP_VECTORIZED[!<910] |
|---|
| 221 | list[i].command = command; |
|---|
| 222 | list[i].mask = mask; |
|---|
| 223 | list[i].bitcnt = bitc; |
|---|
| 224 | list[i].bits = bits; |
|---|
| 225 | list[i].value = value; |
|---|
| 226 | } |
|---|
| 227 | maxi = val; |
|---|
| 228 | |
|---|
| 229 | command = (enum gb_compress_list_commands)(p[1]); |
|---|
| 230 | bits = p[0]; |
|---|
| 231 | mask = 0x80; |
|---|
| 232 | for (bitc=7; bitc; bitc--, mask=mask>>1) if (mask & bits) break; // find first bit |
|---|
| 233 | mask = 0xff>>(8-bitc); |
|---|
| 234 | bits = bits & mask; |
|---|
| 235 | value = val; |
|---|
| 236 | } |
|---|
| 237 | i = val; |
|---|
| 238 | list[i].command = command; |
|---|
| 239 | list[i].mask = mask; |
|---|
| 240 | list[i].bitcnt = bitc; |
|---|
| 241 | list[i].bits = bits; |
|---|
| 242 | list[i].value = value; |
|---|
| 243 | return list; |
|---|
| 244 | } |
|---|
| 245 | |
|---|
| 246 | // ------------------------ |
|---|
| 247 | // bit compression |
|---|
| 248 | |
|---|
| 249 | |
|---|
| 250 | char *gb_compress_bits(const char *source, long size, const unsigned char *c_0, long *msize) { |
|---|
| 251 | const unsigned char *s = (const unsigned char *)source; |
|---|
| 252 | char *buffer = GB_give_other_buffer(source, size); |
|---|
| 253 | char *dest = buffer; |
|---|
| 254 | |
|---|
| 255 | long i, j; |
|---|
| 256 | int bitptr, bits, bitc; |
|---|
| 257 | int zo_flag = 0; |
|---|
| 258 | long len; |
|---|
| 259 | int h_i, command; |
|---|
| 260 | int isNull[256]; |
|---|
| 261 | |
|---|
| 262 | for (i = 0; i<256; ++i) isNull[i] = 0; |
|---|
| 263 | for (i = 0; c_0[i]; ++i) isNull[c_0[i]] = 1; |
|---|
| 264 | |
|---|
| 265 | GB_INIT_WRITE_BITS(dest, bitptr); |
|---|
| 266 | for (len = size, i = 0; len; len--) { |
|---|
| 267 | if (zo_flag == isNull[*(s++)]) { |
|---|
| 268 | zo_flag = 1 - zo_flag; |
|---|
| 269 | for (command = GB_CS_SUB; command != GB_CS_OK;) { |
|---|
| 270 | if (i>gb_local->bc_size) j = gb_local->bc_size; else j = i; |
|---|
| 271 | bits = gb_local->bitcompress[j].bits; |
|---|
| 272 | bitc = gb_local->bitcompress[j].bitcnt; |
|---|
| 273 | command = gb_local->bitcompress[j].command; |
|---|
| 274 | i -= gb_local->bitcompress[j].value; |
|---|
| 275 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
|---|
| 276 | } |
|---|
| 277 | i = 0; |
|---|
| 278 | } |
|---|
| 279 | i++; |
|---|
| 280 | } |
|---|
| 281 | for (command = GB_CS_SUB; command != GB_CS_OK;) { |
|---|
| 282 | if (i>gb_local->bc_size) j = gb_local->bc_size; else j = i; |
|---|
| 283 | bits = gb_local->bitcompress[j].bits; |
|---|
| 284 | bitc = gb_local->bitcompress[j].bitcnt; |
|---|
| 285 | command = gb_local->bitcompress[j].command; |
|---|
| 286 | i -= gb_local->bitcompress[j].value; |
|---|
| 287 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
|---|
| 288 | } |
|---|
| 289 | *msize = dest - buffer + 1; |
|---|
| 290 | return buffer; |
|---|
| 291 | } |
|---|
| 292 | |
|---|
| 293 | |
|---|
| 294 | GB_BUFFER gb_uncompress_bits(const char *source, long size, char c_0, char c_1) { |
|---|
| 295 | gb_compress_tree *Main = gb_local->bituncompress; |
|---|
| 296 | |
|---|
| 297 | uint8_t ch = 0; |
|---|
| 298 | |
|---|
| 299 | long bitp = 0; |
|---|
| 300 | char outc = c_0; |
|---|
| 301 | |
|---|
| 302 | const uint8_t *s = (const uint8_t*)(source); |
|---|
| 303 | |
|---|
| 304 | char *p = GB_give_other_buffer(source, size+1); |
|---|
| 305 | char *buffer = p; |
|---|
| 306 | |
|---|
| 307 | for (long pos = 0; pos<size;) { |
|---|
| 308 | long lastpos = pos; |
|---|
| 309 | for (long command = GB_CS_SUB; command != GB_CS_OK;) { |
|---|
| 310 | gb_compress_tree *t; |
|---|
| 311 | for (t = Main; !t->leaf;) { |
|---|
| 312 | int bit; |
|---|
| 313 | GB_READ_BIT(s, ch, bitp, bit); |
|---|
| 314 | t = t->son[bit]; |
|---|
| 315 | } |
|---|
| 316 | command = (long) t->son[1]; |
|---|
| 317 | pos += (long)t->son[0]; |
|---|
| 318 | } |
|---|
| 319 | for (; lastpos<pos; lastpos++) *(p++) = outc; |
|---|
| 320 | if (outc==c_0) outc=c_1; else outc=c_0; |
|---|
| 321 | } |
|---|
| 322 | *p = 0; |
|---|
| 323 | return buffer; |
|---|
| 324 | } |
|---|
| 325 | |
|---|
| 326 | |
|---|
| 327 | |
|---|
| 328 | /* Runlength encoding produces.. |
|---|
| 329 | * |
|---|
| 330 | * from source: string |
|---|
| 331 | * dest: [command]+ |
|---|
| 332 | * |
|---|
| 333 | * command data meaning |
|---|
| 334 | * |
|---|
| 335 | * -122 low high byte 'byte' was repeated 'low'+256*'high' times |
|---|
| 336 | * -119..-1 byte 'byte' was repeated -'command' times |
|---|
| 337 | * 0 --- end |
|---|
| 338 | * 1..119 [byte]+ 'command' data bytes follow |
|---|
| 339 | */ |
|---|
| 340 | |
|---|
| 341 | static char *g_b_write_run(char *dest, int scount, int lastbyte) { |
|---|
| 342 | while (scount> 0xffff) { |
|---|
| 343 | *(dest++) = 0x100-122; |
|---|
| 344 | *(dest++) = 0xff; |
|---|
| 345 | *(dest++) = 0xff; |
|---|
| 346 | *(dest++) = lastbyte; |
|---|
| 347 | scount -= 0xffff; |
|---|
| 348 | } |
|---|
| 349 | if (scount >250) { |
|---|
| 350 | *(dest++) = 0x100-122; |
|---|
| 351 | *(dest++) = scount & 0xff; |
|---|
| 352 | *(dest++) = (scount >> 8) & 0xff; |
|---|
| 353 | *(dest++) = lastbyte; |
|---|
| 354 | return dest; |
|---|
| 355 | } |
|---|
| 356 | |
|---|
| 357 | while (scount>120) { // 120 blocks |
|---|
| 358 | *(dest++) = 0x100 - 120; |
|---|
| 359 | *(dest++) = lastbyte; |
|---|
| 360 | scount -= 120; |
|---|
| 361 | } |
|---|
| 362 | |
|---|
| 363 | if (scount) { // and rest |
|---|
| 364 | *(dest++) = -scount; |
|---|
| 365 | *(dest++) = lastbyte; |
|---|
| 366 | } |
|---|
| 367 | return dest; |
|---|
| 368 | } |
|---|
| 369 | |
|---|
| 370 | #define GB_COPY_NONRUN(dest, source, len) \ |
|---|
| 371 | while (len > 120) { \ |
|---|
| 372 | int _i = 120; \ |
|---|
| 373 | char *_p; \ |
|---|
| 374 | len -= _i; \ |
|---|
| 375 | *(dest++) = _i; \ |
|---|
| 376 | _p = dest; dest+=_i; \ |
|---|
| 377 | while (_i--) { \ |
|---|
| 378 | *(_p++) = *(source++); \ |
|---|
| 379 | } \ |
|---|
| 380 | } \ |
|---|
| 381 | if (len >0) { \ |
|---|
| 382 | int _i = len; \ |
|---|
| 383 | char *_p; \ |
|---|
| 384 | len = 0; \ |
|---|
| 385 | *(dest++) = _i; \ |
|---|
| 386 | _p = dest; dest+=_i; \ |
|---|
| 387 | while (_i--) { \ |
|---|
| 388 | *(_p++) = *(source++); \ |
|---|
| 389 | } \ |
|---|
| 390 | } |
|---|
| 391 | |
|---|
| 392 | void gb_compress_equal_bytes_2(const char *source, size_t size, size_t *msize, char *dest) { |
|---|
| 393 | long i; // length counter |
|---|
| 394 | int last, rbyte; // next and akt value |
|---|
| 395 | long scount; // same count; count equal bytes |
|---|
| 396 | char *buffer = dest; |
|---|
| 397 | const char *sourcenequal; // begin of non equal section |
|---|
| 398 | long hi; // to temporary store i |
|---|
| 399 | int hsize; |
|---|
| 400 | |
|---|
| 401 | sourcenequal = source; |
|---|
| 402 | rbyte = *(source ++); |
|---|
| 403 | last = -1; |
|---|
| 404 | for (i=size-1; i;) { |
|---|
| 405 | if (rbyte!=last) { // Search an equal pair |
|---|
| 406 | last = rbyte; |
|---|
| 407 | rbyte = *(source ++); i--; |
|---|
| 408 | continue; |
|---|
| 409 | } // last rbyte *(source) |
|---|
| 410 | hi = i+1; |
|---|
| 411 | while (i) { // get equal bytes |
|---|
| 412 | rbyte = *(source ++); i--; |
|---|
| 413 | if (rbyte!=last) break; |
|---|
| 414 | } |
|---|
| 415 | scount = hi-i; // number of equal bytes ; at end e.b.-1 |
|---|
| 416 | if (scount <= GB_RUNLENGTH_SIZE) continue; // less than 4-> no runlength compression |
|---|
| 417 | |
|---|
| 418 | /* compression !!! |
|---|
| 419 | * 1. copy unequal bytes |
|---|
| 420 | * 2. fill rest |
|---|
| 421 | */ |
|---|
| 422 | |
|---|
| 423 | hsize = source - sourcenequal - scount -1; // hsize of non equal bytes |
|---|
| 424 | source = sourcenequal; |
|---|
| 425 | hi = i; // i=unequal length |
|---|
| 426 | GB_COPY_NONRUN(dest, source, hsize); // LOOP_VECTORIZED=2 |
|---|
| 427 | dest = g_b_write_run(dest, scount, last); |
|---|
| 428 | source += scount; // now equal bytes |
|---|
| 429 | |
|---|
| 430 | sourcenequal = source++; // no rbyte written yet |
|---|
| 431 | last = rbyte; |
|---|
| 432 | i = hi; |
|---|
| 433 | if (i) { |
|---|
| 434 | rbyte = *(source++); i--; |
|---|
| 435 | } |
|---|
| 436 | } |
|---|
| 437 | // and now rest which is not compressed |
|---|
| 438 | hsize = source - sourcenequal; |
|---|
| 439 | source = sourcenequal; |
|---|
| 440 | GB_COPY_NONRUN(dest, source, hsize); // LOOP_VECTORIZED=2 |
|---|
| 441 | |
|---|
| 442 | *(dest++) = 0; // end of data |
|---|
| 443 | *msize = dest-buffer; |
|---|
| 444 | if (*msize >size*9/8) printf("ssize %d, dsize %d\n", (int)size, (int)*msize); |
|---|
| 445 | } |
|---|
| 446 | |
|---|
| 447 | static GB_BUFFER gb_compress_equal_bytes(const char *source, size_t size, size_t *msize, int last_flag) { |
|---|
| 448 | char *dest; |
|---|
| 449 | char *buffer; |
|---|
| 450 | |
|---|
| 451 | dest = buffer = GB_give_other_buffer(source, size*9/8); |
|---|
| 452 | *(dest++) = GB_COMPRESSION_RUNLENGTH | last_flag; |
|---|
| 453 | gb_compress_equal_bytes_2(source, size, msize, dest); |
|---|
| 454 | (*msize) ++; // Tag byte |
|---|
| 455 | return buffer; |
|---|
| 456 | } |
|---|
| 457 | |
|---|
| 458 | struct huffmann_list { |
|---|
| 459 | huffmann_list *next; |
|---|
| 460 | long val; |
|---|
| 461 | gb_compress_list *element; |
|---|
| 462 | }; |
|---|
| 463 | |
|---|
| 464 | static huffmann_list *huffmann_listhead = NULp; |
|---|
| 465 | |
|---|
| 466 | static void gb_compress_huffmann_add_to_list(long val, gb_compress_list *element) { |
|---|
| 467 | huffmann_list *dat, *search, *searchlast; |
|---|
| 468 | |
|---|
| 469 | dat = (huffmann_list *)gbm_get_mem(sizeof(huffmann_list), GBM_CB_INDEX); |
|---|
| 470 | dat->val = val; |
|---|
| 471 | dat->element = element; |
|---|
| 472 | |
|---|
| 473 | searchlast = NULp; |
|---|
| 474 | for (search = huffmann_listhead; search; search = search->next) { |
|---|
| 475 | if (val<search->val) break; |
|---|
| 476 | searchlast = search; |
|---|
| 477 | } |
|---|
| 478 | if (huffmann_listhead) { |
|---|
| 479 | if (searchlast) { |
|---|
| 480 | dat->next = searchlast->next; |
|---|
| 481 | searchlast->next = dat; |
|---|
| 482 | } |
|---|
| 483 | else { |
|---|
| 484 | dat->next = huffmann_listhead; |
|---|
| 485 | huffmann_listhead = dat; |
|---|
| 486 | } |
|---|
| 487 | } |
|---|
| 488 | else { |
|---|
| 489 | huffmann_listhead = dat; |
|---|
| 490 | } |
|---|
| 491 | } |
|---|
| 492 | |
|---|
| 493 | static long gb_compress_huffmann_pop(long *val, gb_compress_list **element) { |
|---|
| 494 | huffmann_list *dat; |
|---|
| 495 | if ((dat = huffmann_listhead)) { |
|---|
| 496 | huffmann_listhead = dat->next; |
|---|
| 497 | *val = dat->val; |
|---|
| 498 | *element = dat->element; |
|---|
| 499 | gbm_free_mem(dat, sizeof(huffmann_list), GBM_CB_INDEX); |
|---|
| 500 | return 1; |
|---|
| 501 | } |
|---|
| 502 | else { |
|---|
| 503 | GB_internal_error("huffman compression failed"); |
|---|
| 504 | return 0; |
|---|
| 505 | } |
|---|
| 506 | } |
|---|
| 507 | |
|---|
| 508 | static char *gb_compress_huffmann_rek(gb_compress_list *bc, int bits, int bitcnt, char *dest) { |
|---|
| 509 | if (bc->command == GB_CD_NODE) { |
|---|
| 510 | dest = gb_compress_huffmann_rek(bc->son[0], (bits<<1), bitcnt+1, dest); |
|---|
| 511 | dest = gb_compress_huffmann_rek(bc->son[1], (bits<<1)+1, bitcnt+1, dest); |
|---|
| 512 | gbm_free_mem(bc, sizeof(gb_compress_list), GBM_CB_INDEX); |
|---|
| 513 | return dest; |
|---|
| 514 | } |
|---|
| 515 | else { |
|---|
| 516 | *(dest++) = bits; |
|---|
| 517 | *(dest++) = bc->command; |
|---|
| 518 | *(dest++) = bc->value; |
|---|
| 519 | bc->bitcnt = bitcnt; |
|---|
| 520 | bc->mask = 0xff>>(8-bitcnt); |
|---|
| 521 | bc->bits = bits&bc->mask; |
|---|
| 522 | return dest; |
|---|
| 523 | } |
|---|
| 524 | } |
|---|
| 525 | |
|---|
| 526 | static GB_BUFFER gb_compress_huffmann(GB_CBUFFER source, size_t size, size_t *msize, int last_flag) { |
|---|
| 527 | const int COMPRESS_LIST_SIZE = 257; |
|---|
| 528 | gb_compress_list bitcompress[COMPRESS_LIST_SIZE]; |
|---|
| 529 | |
|---|
| 530 | memset((char *)(&bitcompress[0]), 0, sizeof(gb_compress_list)*COMPRESS_LIST_SIZE); |
|---|
| 531 | |
|---|
| 532 | char *buffer = GB_give_other_buffer(source, size*2+3*GBTUM_COMPRESS_TREE_SIZE+1); |
|---|
| 533 | char *dest = buffer; |
|---|
| 534 | *(dest++) = GB_COMPRESSION_HUFFMANN | last_flag; |
|---|
| 535 | |
|---|
| 536 | long id = 0; |
|---|
| 537 | |
|---|
| 538 | { |
|---|
| 539 | long level; |
|---|
| 540 | long i; |
|---|
| 541 | long restcount; |
|---|
| 542 | |
|---|
| 543 | long vali[2] = { 0, 0 }; |
|---|
| 544 | |
|---|
| 545 | gb_compress_list *element1 = NULp; |
|---|
| 546 | gb_compress_list *element2 = NULp; |
|---|
| 547 | gb_compress_list *bc = NULp; |
|---|
| 548 | |
|---|
| 549 | unsigned char *s = (unsigned char *)source; |
|---|
| 550 | for (long len = size; len; len--) { |
|---|
| 551 | bitcompress[*(s++)].count++; |
|---|
| 552 | } |
|---|
| 553 | level = size/GBTUM_COMPRESS_TREE_SIZE; |
|---|
| 554 | restcount = 0; |
|---|
| 555 | for (i=0; i<256; i++) { |
|---|
| 556 | bitcompress[i].value = (int)i; |
|---|
| 557 | if (bitcompress[i].count>level) { |
|---|
| 558 | gb_compress_huffmann_add_to_list(bitcompress[i].count, &bitcompress[i]); |
|---|
| 559 | bitcompress[i].command = GB_CS_OK; |
|---|
| 560 | } |
|---|
| 561 | else { |
|---|
| 562 | restcount += bitcompress[i].count; |
|---|
| 563 | bitcompress[i].count = 0; |
|---|
| 564 | id = i; |
|---|
| 565 | bitcompress[i].command = GB_CS_ID; |
|---|
| 566 | } |
|---|
| 567 | } |
|---|
| 568 | bitcompress[COMPRESS_LIST_SIZE-1].command = GB_CS_END; |
|---|
| 569 | |
|---|
| 570 | gb_compress_huffmann_add_to_list(restcount, &bitcompress[id]); |
|---|
| 571 | gb_compress_huffmann_add_to_list(1, &bitcompress[COMPRESS_LIST_SIZE-1]); |
|---|
| 572 | while (huffmann_listhead->next) { |
|---|
| 573 | gb_compress_huffmann_pop(&(vali[0]), &element1); |
|---|
| 574 | gb_compress_huffmann_pop(&(vali[1]), &element2); |
|---|
| 575 | |
|---|
| 576 | bc = (gb_compress_list *)gbm_get_mem(sizeof(gb_compress_list), GBM_CB_INDEX); |
|---|
| 577 | bc->command = GB_CD_NODE; |
|---|
| 578 | bc->son[0] = element1; |
|---|
| 579 | bc->son[1] = element2; |
|---|
| 580 | |
|---|
| 581 | if (element1->command == GB_CD_NODE) { |
|---|
| 582 | bc->bits = element1->bits+1; |
|---|
| 583 | if (element2->command == GB_CD_NODE && element2->bits >= bc->bits) bc->bits = element2->bits+1; |
|---|
| 584 | } |
|---|
| 585 | else { |
|---|
| 586 | if (element2->command == GB_CD_NODE) { |
|---|
| 587 | bc->bits = element2->bits+1; |
|---|
| 588 | } |
|---|
| 589 | else { |
|---|
| 590 | bc->bits = 1; |
|---|
| 591 | } |
|---|
| 592 | } |
|---|
| 593 | |
|---|
| 594 | gb_assert(bc->bits <= 7); // max. 7 bits allowed |
|---|
| 595 | |
|---|
| 596 | // if already 6 bits used -> put to end of list; otherwise sort in |
|---|
| 597 | gb_compress_huffmann_add_to_list(bc->bits >= 6 ? LONG_MAX : vali[0]+vali[1], bc); |
|---|
| 598 | } |
|---|
| 599 | gb_compress_huffmann_pop(&(vali[0]), &element1); |
|---|
| 600 | #if defined(TEST_HUFFMAN_CODE) |
|---|
| 601 | printf("huffman list:\n"); |
|---|
| 602 | gb_dump_huffmann_list(bc, ""); |
|---|
| 603 | #endif // TEST_HUFFMAN_CODE |
|---|
| 604 | dest = gb_compress_huffmann_rek(bc, 1, 0, dest); |
|---|
| 605 | *(dest++) = 0; |
|---|
| 606 | } |
|---|
| 607 | |
|---|
| 608 | gb_compress_list *pbid = &bitcompress[id]; |
|---|
| 609 | unsigned char *s = (unsigned char *)source; |
|---|
| 610 | { |
|---|
| 611 | int bitptr, bits, bitc; |
|---|
| 612 | |
|---|
| 613 | GB_INIT_WRITE_BITS(dest, bitptr); |
|---|
| 614 | int h_i; |
|---|
| 615 | for (long len = size; len; len--) { |
|---|
| 616 | int val = *(s++); |
|---|
| 617 | int command = bitcompress[val].command; |
|---|
| 618 | if (command == GB_CS_OK) { |
|---|
| 619 | gb_compress_list *pbc = &bitcompress[val]; |
|---|
| 620 | bits = pbc->bits; |
|---|
| 621 | bitc = pbc->bitcnt; |
|---|
| 622 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
|---|
| 623 | } |
|---|
| 624 | else { |
|---|
| 625 | bits = pbid->bits; |
|---|
| 626 | bitc = pbid->bitcnt; |
|---|
| 627 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
|---|
| 628 | |
|---|
| 629 | bits = val; |
|---|
| 630 | bitc = 8; |
|---|
| 631 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
|---|
| 632 | } |
|---|
| 633 | } |
|---|
| 634 | bits = bitcompress[COMPRESS_LIST_SIZE-1].bits; |
|---|
| 635 | bitc = bitcompress[COMPRESS_LIST_SIZE-1].bitcnt; |
|---|
| 636 | // cppcheck-suppress unreadVariable (bitptr is assigned but never used) |
|---|
| 637 | GB_WRITE_BITS(dest, bitptr, bitc, bits, h_i); |
|---|
| 638 | } |
|---|
| 639 | *msize = dest - buffer + 1; |
|---|
| 640 | #if defined(TEST_HUFFMAN_CODE) |
|---|
| 641 | printf("huffman compression %zu -> %zu (%5.2f %%)\n", size, *msize, (double)((double)(*msize)/size*100)); |
|---|
| 642 | #endif // TEST_HUFFMAN_CODE |
|---|
| 643 | if (*msize >size*2) printf("ssize %d, size %d\n", (int)size, (int)*msize); |
|---|
| 644 | return buffer; |
|---|
| 645 | } |
|---|
| 646 | |
|---|
| 647 | |
|---|
| 648 | static GB_BUFFER gb_uncompress_equal_bytes(GB_CBUFFER s, size_t size, size_t *new_size) { |
|---|
| 649 | const signed char *source = (signed char*)s; |
|---|
| 650 | char *dest; |
|---|
| 651 | unsigned int c; |
|---|
| 652 | long j; |
|---|
| 653 | long i, k; |
|---|
| 654 | char *buffer; |
|---|
| 655 | |
|---|
| 656 | dest = buffer = GB_give_other_buffer((char *)source, size); |
|---|
| 657 | |
|---|
| 658 | #if defined(DEBUG) && 0 |
|---|
| 659 | printf("gb_uncompress_equal_bytes(size=%li):\n", size); |
|---|
| 660 | #endif // DEBUG |
|---|
| 661 | |
|---|
| 662 | for (i=size; i;) { |
|---|
| 663 | j = *(source++); |
|---|
| 664 | #if defined(DEBUG) && 0 |
|---|
| 665 | printf("size=%li (code=%i)\n", i, (int)j); |
|---|
| 666 | #endif // DEBUG |
|---|
| 667 | if (j>0) { // uncompressed data block |
|---|
| 668 | if (j>i) j=i; |
|---|
| 669 | i -= j; |
|---|
| 670 | for (; j; j--) { // LOOP_VECTORIZED |
|---|
| 671 | *(dest++) = (char)(*(source++)); |
|---|
| 672 | } |
|---|
| 673 | } |
|---|
| 674 | else { // equal bytes compressed |
|---|
| 675 | if (!j) break; // end symbol |
|---|
| 676 | if (j == -122) { |
|---|
| 677 | j = *(source++) & 0xff; |
|---|
| 678 | j |= (uint16_t(*(source++)) << 8) & 0xff00; // cast to unsigned (left shifting negative values is undefined behavior) |
|---|
| 679 | j = -j; |
|---|
| 680 | } |
|---|
| 681 | c = *(source++); |
|---|
| 682 | i += j; |
|---|
| 683 | if (i<0) { |
|---|
| 684 | j += -i; |
|---|
| 685 | i = 0; |
|---|
| 686 | } |
|---|
| 687 | if (j<-30) { // set multiple bytes |
|---|
| 688 | j = -j; |
|---|
| 689 | if (((long)dest) & 1) { |
|---|
| 690 | *(dest++) = c; |
|---|
| 691 | j--; |
|---|
| 692 | } |
|---|
| 693 | if (((long)dest) &2) { |
|---|
| 694 | *(dest++) = c; |
|---|
| 695 | *(dest++) = c; |
|---|
| 696 | j-=2; |
|---|
| 697 | } |
|---|
| 698 | c &= 0xff; // copy c to upper bytes |
|---|
| 699 | c |= (c<<8); |
|---|
| 700 | c |= (c<<16); |
|---|
| 701 | k = j&3; |
|---|
| 702 | j = j>>2; |
|---|
| 703 | for (; j; j--) { // LOOP_VECTORIZED |
|---|
| 704 | *((GB_UINT4 *)dest) = c; |
|---|
| 705 | dest += sizeof(GB_UINT4); |
|---|
| 706 | } |
|---|
| 707 | j = k; |
|---|
| 708 | for (; j; j--) *(dest++) = c; |
|---|
| 709 | } |
|---|
| 710 | else { |
|---|
| 711 | for (; j; j++) *(dest++) = c; |
|---|
| 712 | } |
|---|
| 713 | } |
|---|
| 714 | } |
|---|
| 715 | |
|---|
| 716 | *new_size = dest-buffer; |
|---|
| 717 | gb_assert(size >= *new_size); // buffer overflow |
|---|
| 718 | |
|---|
| 719 | return buffer; |
|---|
| 720 | } |
|---|
| 721 | |
|---|
| 722 | static GB_BUFFER gb_uncompress_huffmann(GB_CBUFFER source, size_t maxsize, size_t *new_size) { |
|---|
| 723 | gb_compress_tree *un_tree, *t; |
|---|
| 724 | |
|---|
| 725 | char *data[1]; |
|---|
| 726 | char *p, *buffer; |
|---|
| 727 | long bitp; |
|---|
| 728 | uint8_t ch = 0, *s; |
|---|
| 729 | long val, command; |
|---|
| 730 | |
|---|
| 731 | un_tree = gb_build_uncompress_tree((const unsigned char *)source, 0, data); |
|---|
| 732 | if (!un_tree) return NULp; |
|---|
| 733 | |
|---|
| 734 | bitp = 0; |
|---|
| 735 | buffer = p = GB_give_other_buffer(source, maxsize); |
|---|
| 736 | s = (uint8_t*)data[0]; |
|---|
| 737 | while (1) { |
|---|
| 738 | for (t = un_tree; !t->leaf;) { |
|---|
| 739 | int bit; |
|---|
| 740 | GB_READ_BIT(s, ch, bitp, bit); |
|---|
| 741 | t = t->son[bit]; |
|---|
| 742 | } |
|---|
| 743 | command = (long) t->son[1]; |
|---|
| 744 | if (command == GB_CS_END) break; |
|---|
| 745 | if (command == GB_CS_ID) { |
|---|
| 746 | GB_READ_BITS(s, ch, bitp, 8, val); |
|---|
| 747 | } |
|---|
| 748 | else { |
|---|
| 749 | val = (long) t->son[0]; |
|---|
| 750 | } |
|---|
| 751 | *(p++) = (int)val; |
|---|
| 752 | } |
|---|
| 753 | gb_free_compress_tree(un_tree); |
|---|
| 754 | |
|---|
| 755 | *new_size = p-buffer; |
|---|
| 756 | gb_assert(maxsize >= *new_size); // buffer overflow |
|---|
| 757 | |
|---|
| 758 | return buffer; |
|---|
| 759 | } |
|---|
| 760 | |
|---|
| 761 | GB_BUFFER gb_uncompress_bytes(GB_CBUFFER source, size_t size, size_t *new_size) { |
|---|
| 762 | char *data = gb_uncompress_huffmann(source, size, new_size); |
|---|
| 763 | if (data) data = gb_uncompress_equal_bytes(data, size, new_size); |
|---|
| 764 | gb_assert(!data || size >= *new_size); // buffer overflow |
|---|
| 765 | return data; |
|---|
| 766 | } |
|---|
| 767 | |
|---|
| 768 | // ------------------------------------------------- |
|---|
| 769 | // Compress long and floats (4 byte values) |
|---|
| 770 | |
|---|
| 771 | |
|---|
| 772 | GB_BUFFER gb_uncompress_longs_old(GB_CBUFFER source, size_t size, size_t *new_size) { |
|---|
| 773 | // size is byte value |
|---|
| 774 | char *res = NULp; |
|---|
| 775 | char *data = gb_uncompress_huffmann(source, (size*9)/8, new_size); |
|---|
| 776 | if (data) { |
|---|
| 777 | char *p, *s0, *s1, *s2, *s3; |
|---|
| 778 | GB_UINT4 mi, i; |
|---|
| 779 | |
|---|
| 780 | data = gb_uncompress_equal_bytes(data, size, new_size); |
|---|
| 781 | |
|---|
| 782 | gb_assert(*new_size == size); |
|---|
| 783 | res = p = GB_give_other_buffer(data, size); |
|---|
| 784 | |
|---|
| 785 | STATIC_ASSERT(sizeof(GB_UINT4) == 4); |
|---|
| 786 | |
|---|
| 787 | mi = (GB_UINT4)(size / 4); |
|---|
| 788 | s0 = data + 0 * mi; |
|---|
| 789 | s1 = data + 1 * mi; |
|---|
| 790 | s2 = data + 2 * mi; |
|---|
| 791 | s3 = data + 3 * mi; |
|---|
| 792 | |
|---|
| 793 | for (i = 0; i < mi; i++) { // LOOP_VECTORIZED |
|---|
| 794 | *(p++) = *(s0++); |
|---|
| 795 | *(p++) = *(s1++); |
|---|
| 796 | *(p++) = *(s2++); |
|---|
| 797 | *(p++) = *(s3++); |
|---|
| 798 | } |
|---|
| 799 | |
|---|
| 800 | *new_size = mi*4; |
|---|
| 801 | } |
|---|
| 802 | return res; |
|---|
| 803 | } |
|---|
| 804 | |
|---|
| 805 | |
|---|
| 806 | static GB_BUFFER gb_uncompress_longs(GB_CBUFFER data, size_t size, size_t *new_size) { |
|---|
| 807 | const char *s0, *s1, *s2, *s3; |
|---|
| 808 | char *p, *res; |
|---|
| 809 | long mi, i; |
|---|
| 810 | |
|---|
| 811 | res = p = GB_give_other_buffer(data, size); |
|---|
| 812 | |
|---|
| 813 | STATIC_ASSERT(sizeof(GB_UINT4) == 4); |
|---|
| 814 | |
|---|
| 815 | mi = size / 4; |
|---|
| 816 | s0 = data + 0 * mi; |
|---|
| 817 | s1 = data + 1 * mi; |
|---|
| 818 | s2 = data + 2 * mi; |
|---|
| 819 | s3 = data + 3 * mi; |
|---|
| 820 | |
|---|
| 821 | for (i = 0; i < mi; i++) { // LOOP_VECTORIZED =2[<5][>=54] =1 // 1x with 5.x; 2x with 4.9.x + 5.4 + 5.5 + 9.5 + 10.x |
|---|
| 822 | *(p++) = *(s0++); |
|---|
| 823 | *(p++) = *(s1++); |
|---|
| 824 | *(p++) = *(s2++); |
|---|
| 825 | *(p++) = *(s3++); |
|---|
| 826 | } |
|---|
| 827 | |
|---|
| 828 | *new_size = mi*4; |
|---|
| 829 | return res; |
|---|
| 830 | } |
|---|
| 831 | |
|---|
| 832 | static GB_BUFFER gb_compress_longs(GB_CBUFFER source, long size, int last_flag) { |
|---|
| 833 | long mi, i; |
|---|
| 834 | const char *p; |
|---|
| 835 | char *s0, *s1, *s2, *s3; |
|---|
| 836 | char *dest = GB_give_other_buffer(source, size+1); |
|---|
| 837 | |
|---|
| 838 | mi = size/4; |
|---|
| 839 | p = source; |
|---|
| 840 | *(dest) = GB_COMPRESSION_SORTBYTES | last_flag; |
|---|
| 841 | s0 = dest + 0*mi + 1; |
|---|
| 842 | s1 = dest + 1*mi + 1; |
|---|
| 843 | s2 = dest + 2*mi + 1; |
|---|
| 844 | s3 = dest + 3*mi + 1; |
|---|
| 845 | for (i=0; i<mi; i++) { // LOOP_VECTORIZED |
|---|
| 846 | *(s0++) = *(p++); |
|---|
| 847 | *(s1++) = *(p++); |
|---|
| 848 | *(s2++) = *(p++); |
|---|
| 849 | *(s3++) = *(p++); |
|---|
| 850 | } |
|---|
| 851 | return dest; |
|---|
| 852 | } |
|---|
| 853 | |
|---|
| 854 | // ----------------------------- |
|---|
| 855 | // Dictionary Functions |
|---|
| 856 | |
|---|
| 857 | GB_DICTIONARY * gb_get_dictionary(GB_MAIN_TYPE *Main, GBQUARK key) { |
|---|
| 858 | gb_Key *ks = &Main->keys[key]; |
|---|
| 859 | if (ks->gb_key_disabled) return NULp; |
|---|
| 860 | if (!ks->gb_key) { |
|---|
| 861 | gb_load_single_key_data(Main->gb_main(), key); |
|---|
| 862 | if (Main->gb_key_data && !ks->gb_key) { |
|---|
| 863 | GB_internal_error("Couldn't load gb_key"); |
|---|
| 864 | } |
|---|
| 865 | } |
|---|
| 866 | return Main->keys[key].dictionary; |
|---|
| 867 | } |
|---|
| 868 | |
|---|
| 869 | bool GB_is_dictionary_compressed(GBDATA *gbd) { |
|---|
| 870 | if (gbd->is_entry()) { |
|---|
| 871 | GBENTRY *gbe = gbd->as_entry(); |
|---|
| 872 | const char *data = gbe->data(); |
|---|
| 873 | |
|---|
| 874 | if (data) { |
|---|
| 875 | if (gbe->flags.compressed_data) { |
|---|
| 876 | size_t size = gbe->uncompressed_size(); |
|---|
| 877 | int last = 0; |
|---|
| 878 | GB_ERROR error = NULp; |
|---|
| 879 | size_t new_size = -1; |
|---|
| 880 | |
|---|
| 881 | while (!last) { |
|---|
| 882 | int c = *((unsigned char *)(data++)); |
|---|
| 883 | |
|---|
| 884 | if (c & GB_COMPRESSION_LAST) { |
|---|
| 885 | last = 1; |
|---|
| 886 | c &= ~GB_COMPRESSION_LAST; |
|---|
| 887 | } |
|---|
| 888 | |
|---|
| 889 | if (c == GB_COMPRESSION_DICTIONARY) { |
|---|
| 890 | return true; |
|---|
| 891 | } |
|---|
| 892 | |
|---|
| 893 | if (c == GB_COMPRESSION_HUFFMANN) { |
|---|
| 894 | data = gb_uncompress_huffmann(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
|---|
| 895 | } |
|---|
| 896 | else if (c == GB_COMPRESSION_RUNLENGTH) { |
|---|
| 897 | data = gb_uncompress_equal_bytes(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
|---|
| 898 | } |
|---|
| 899 | else if (c == GB_COMPRESSION_SEQUENCE) { |
|---|
| 900 | data = gb_uncompress_by_sequence(gbe, data, size, &error, &new_size); |
|---|
| 901 | } |
|---|
| 902 | else if (c == GB_COMPRESSION_SORTBYTES) { |
|---|
| 903 | data = gb_uncompress_longs(data, size, &new_size); |
|---|
| 904 | } |
|---|
| 905 | else { |
|---|
| 906 | error = GB_export_errorf("Internal Error: Cannot uncompress data of field '%s'", GB_read_key_pntr(gbe)); |
|---|
| 907 | } |
|---|
| 908 | |
|---|
| 909 | if (error) { |
|---|
| 910 | GB_internal_error(error); |
|---|
| 911 | break; |
|---|
| 912 | } |
|---|
| 913 | } |
|---|
| 914 | } |
|---|
| 915 | } |
|---|
| 916 | } |
|---|
| 917 | return false; |
|---|
| 918 | } |
|---|
| 919 | |
|---|
| 920 | // ----------------------------------- |
|---|
| 921 | // Top compression algorithms |
|---|
| 922 | |
|---|
| 923 | GB_BUFFER gb_compress_data(GBDATA *gbd, int key, GB_CBUFFER source, size_t size, size_t *msize, GB_COMPRESSION_MASK max_compr, bool pre_compressed) { |
|---|
| 924 | // Compress a data string. |
|---|
| 925 | // |
|---|
| 926 | // Returns: |
|---|
| 927 | // - NULp if no compression makes sense |
|---|
| 928 | // - otherwise returns compressed data and stores its size in 'msize' |
|---|
| 929 | |
|---|
| 930 | char *data; |
|---|
| 931 | int last_flag = GB_COMPRESSION_LAST; |
|---|
| 932 | |
|---|
| 933 | if (pre_compressed) { |
|---|
| 934 | last_flag = 0; |
|---|
| 935 | } |
|---|
| 936 | |
|---|
| 937 | gb_assert(1); |
|---|
| 938 | |
|---|
| 939 | if (max_compr & GB_COMPRESSION_SORTBYTES) { |
|---|
| 940 | source = gb_compress_longs(source, size, last_flag); |
|---|
| 941 | last_flag = 0; |
|---|
| 942 | size++; // @@@ OLI |
|---|
| 943 | } |
|---|
| 944 | else if (max_compr & GB_COMPRESSION_DICTIONARY) { |
|---|
| 945 | GB_MAIN_TYPE *Main = GB_MAIN(gbd); |
|---|
| 946 | GB_DICTIONARY *dict; |
|---|
| 947 | if (!key) { |
|---|
| 948 | key = GB_KEY_QUARK(gbd); |
|---|
| 949 | } |
|---|
| 950 | dict = gb_get_dictionary(Main, key); |
|---|
| 951 | if (dict) { |
|---|
| 952 | size_t real_size = size-(gbd->type()==GB_STRING); // for strings w/o trailing zero; @@@ or use is_a_string()? |
|---|
| 953 | |
|---|
| 954 | if (real_size) { |
|---|
| 955 | data = gb_compress_by_dictionary(dict, source, real_size, msize, last_flag, 9999, 3); |
|---|
| 956 | |
|---|
| 957 | if ((*msize<=10 && size>10) || *msize < size*7/8) { // successful compression |
|---|
| 958 | source = data; |
|---|
| 959 | size = *msize; |
|---|
| 960 | last_flag = 0; |
|---|
| 961 | } |
|---|
| 962 | } |
|---|
| 963 | } |
|---|
| 964 | |
|---|
| 965 | } |
|---|
| 966 | if (max_compr & GB_COMPRESSION_RUNLENGTH && size > GB_RUNLENGTH_MIN_SIZE) { |
|---|
| 967 | data = gb_compress_equal_bytes(source, size, msize, last_flag); |
|---|
| 968 | if (*msize < size-10 && *msize < size*7/8) { // successful compression |
|---|
| 969 | source = data; |
|---|
| 970 | size = *msize; |
|---|
| 971 | last_flag = 0; |
|---|
| 972 | } |
|---|
| 973 | } |
|---|
| 974 | |
|---|
| 975 | if (max_compr & GB_COMPRESSION_HUFFMANN && size > GB_HUFFMAN_MIN_SIZE) { |
|---|
| 976 | data = gb_compress_huffmann(source, size, msize, last_flag); |
|---|
| 977 | if (*msize < size-10 && *msize < size*7/8) { // successful compression |
|---|
| 978 | source = data; |
|---|
| 979 | size = *msize; |
|---|
| 980 | last_flag = 0; |
|---|
| 981 | } |
|---|
| 982 | } |
|---|
| 983 | |
|---|
| 984 | *msize = size; |
|---|
| 985 | |
|---|
| 986 | if (last_flag) return NULp; // no compression |
|---|
| 987 | return (char *)source; |
|---|
| 988 | } |
|---|
| 989 | |
|---|
| 990 | GB_CBUFFER gb_uncompress_data(GBDATA *gbd, GB_CBUFFER source, size_t size) { |
|---|
| 991 | int last = 0; |
|---|
| 992 | const char *data = (char *)source; |
|---|
| 993 | size_t new_size = -1; |
|---|
| 994 | GB_ERROR error = NULp; |
|---|
| 995 | |
|---|
| 996 | while (!last) { |
|---|
| 997 | int c = *((unsigned char *)(data++)); |
|---|
| 998 | if (c & GB_COMPRESSION_LAST) { |
|---|
| 999 | last = 1; |
|---|
| 1000 | c &= ~GB_COMPRESSION_LAST; |
|---|
| 1001 | } |
|---|
| 1002 | if (c == GB_COMPRESSION_HUFFMANN) { |
|---|
| 1003 | data = gb_uncompress_huffmann(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
|---|
| 1004 | } |
|---|
| 1005 | else if (c == GB_COMPRESSION_RUNLENGTH) { |
|---|
| 1006 | data = gb_uncompress_equal_bytes(data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
|---|
| 1007 | } |
|---|
| 1008 | else if (c == GB_COMPRESSION_DICTIONARY) { |
|---|
| 1009 | data = gb_uncompress_by_dictionary(gbd, data, size + GB_COMPRESSION_TAGS_SIZE_MAX, &new_size); |
|---|
| 1010 | } |
|---|
| 1011 | else if (c == GB_COMPRESSION_SEQUENCE) { |
|---|
| 1012 | data = gb_uncompress_by_sequence(gbd, data, size, &error, &new_size); |
|---|
| 1013 | } |
|---|
| 1014 | else if (c == GB_COMPRESSION_SORTBYTES) { |
|---|
| 1015 | data = gb_uncompress_longs(data, size, &new_size); |
|---|
| 1016 | } |
|---|
| 1017 | else { |
|---|
| 1018 | error = GBS_global_string("Internal Error: Cannot uncompress data of field '%s'", GB_read_key_pntr(gbd)); |
|---|
| 1019 | } |
|---|
| 1020 | |
|---|
| 1021 | if (!data && !error) error = GB_await_error(); |
|---|
| 1022 | if (error) last = 1; // sth went wrong, stop |
|---|
| 1023 | } |
|---|
| 1024 | |
|---|
| 1025 | if (!error && new_size != size) { |
|---|
| 1026 | error = GBS_global_string("Wrong decompressed size (expected=%zi, got=%zi)", size, new_size); |
|---|
| 1027 | } |
|---|
| 1028 | |
|---|
| 1029 | if (error) { |
|---|
| 1030 | GB_export_error(error); |
|---|
| 1031 | data = NULp; |
|---|
| 1032 | } |
|---|
| 1033 | |
|---|
| 1034 | return data; |
|---|
| 1035 | } |
|---|
| 1036 | |
|---|