| 1 | #include <stdio.h> |
|---|
| 2 | #include <stdlib.h> |
|---|
| 3 | #include <sys/time.h> |
|---|
| 4 | // #include <malloc.h> |
|---|
| 5 | #include <memory.h> |
|---|
| 6 | #include <string.h> |
|---|
| 7 | #include <math.h> |
|---|
| 8 | #include <PT_server.h> |
|---|
| 9 | #include "ptpan.h" |
|---|
| 10 | #include "pt_prototypes.h" |
|---|
| 11 | #include <arbdbt.h> |
|---|
| 12 | #include <BI_helix.hxx> |
|---|
| 13 | |
|---|
| 14 | #ifdef BENCHMARK |
|---|
| 15 | /* /// "BenchTimePassed()" */ |
|---|
| 16 | ULONG BenchTimePassed(struct PTPanGlobal *pg) |
|---|
| 17 | { |
|---|
| 18 | ULONG ms; |
|---|
| 19 | |
|---|
| 20 | gettimeofday(&pg->pg_Bench.ts_Now, NULL); |
|---|
| 21 | ms = (pg->pg_Bench.ts_Now.tv_sec - pg->pg_Bench.ts_Last.tv_sec) * 1000; |
|---|
| 22 | if(pg->pg_Bench.ts_Now.tv_usec < pg->pg_Bench.ts_Last.tv_usec) |
|---|
| 23 | { |
|---|
| 24 | ms -= 1000 - (pg->pg_Bench.ts_Last.tv_usec - pg->pg_Bench.ts_Now.tv_usec) / 1000; |
|---|
| 25 | } else { |
|---|
| 26 | ms += (pg->pg_Bench.ts_Now.tv_usec - pg->pg_Bench.ts_Last.tv_usec) / 1000; |
|---|
| 27 | } |
|---|
| 28 | pg->pg_Bench.ts_Last = pg->pg_Bench.ts_Now; |
|---|
| 29 | return(ms); |
|---|
| 30 | } |
|---|
| 31 | /* \\\ */ |
|---|
| 32 | |
|---|
| 33 | /* /// "BenchOutput()" */ |
|---|
| 34 | void BenchOutput(struct PTPanGlobal *pg) |
|---|
| 35 | { |
|---|
| 36 | struct PTPanPartition *pp; |
|---|
| 37 | ULONG diskidxspace = 0; |
|---|
| 38 | ULONG disknodecount = 0; |
|---|
| 39 | ULONG disknodespace = 0; |
|---|
| 40 | ULONG diskleafcount = 0; |
|---|
| 41 | ULONG diskleafspace = 0; |
|---|
| 42 | ULONG diskouterleaves = 0; |
|---|
| 43 | ULONG memused; |
|---|
| 44 | ULONG memusedmax = 0; |
|---|
| 45 | |
|---|
| 46 | pg->pg_Bench.ts_Last = pg->pg_Bench.ts_Init; |
|---|
| 47 | pg->pg_Bench.ts_TotalBuild = BenchTimePassed(pg); |
|---|
| 48 | printf("pDAT: (id np fsize tsize bufmem used 2e 5e depth pdepth plen edges ledges dictsize node# nodespc leaf# leafcnt outl)\n"); |
|---|
| 49 | pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head; |
|---|
| 50 | while(pp->pp_Node.ln_Succ) |
|---|
| 51 | { |
|---|
| 52 | diskidxspace += pp->pp_DiskIdxSpace; |
|---|
| 53 | disknodecount += pp->pp_DiskNodeCount; |
|---|
| 54 | disknodespace += pp->pp_DiskNodeSpace; |
|---|
| 55 | diskleafcount += pp->pp_DiskLeafCount; |
|---|
| 56 | diskleafspace += pp->pp_DiskLeafSpace; |
|---|
| 57 | diskouterleaves += pp->pp_DiskOuterLeaves; |
|---|
| 58 | memused = pp->pp_SfxMemorySize - (pp->pp_Sfx2EdgeOffset - pp->pp_SfxNEdgeOffset); |
|---|
| 59 | if(memused > memusedmax) |
|---|
| 60 | { |
|---|
| 61 | memusedmax = memused; |
|---|
| 62 | } |
|---|
| 63 | printf("%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %s PDAT\n", |
|---|
| 64 | pp->pp_ID, |
|---|
| 65 | pp->pp_Size, |
|---|
| 66 | pp->pp_DiskIdxSpace, |
|---|
| 67 | pp->pp_DiskTreeSize, |
|---|
| 68 | pp->pp_SfxMemorySize, |
|---|
| 69 | memused, |
|---|
| 70 | pp->pp_NumSmallNodes, |
|---|
| 71 | pp->pp_NumBigNodes, |
|---|
| 72 | pp->pp_MaxTreeDepth, |
|---|
| 73 | pp->pp_TreePruneDepth, |
|---|
| 74 | pp->pp_TreePruneLength, |
|---|
| 75 | pp->pp_EdgeCount, |
|---|
| 76 | pp->pp_LongEdgeCount, |
|---|
| 77 | pp->pp_LongDictRawSize, |
|---|
| 78 | pp->pp_DiskNodeCount, |
|---|
| 79 | pp->pp_DiskNodeSpace, |
|---|
| 80 | pp->pp_DiskLeafCount, |
|---|
| 81 | pp->pp_DiskLeafSpace, |
|---|
| 82 | pp->pp_DiskOuterLeaves, |
|---|
| 83 | pg->pg_DBName); |
|---|
| 84 | |
|---|
| 85 | pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ; |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | printf("gDAT: (n s lb np t idxsize memusedmax node# nodespc leaf# leafcnt outl Total CollDB MergeDB PScan MemTree Stats LDPre LDBuild Reloc Disk)\n" |
|---|
| 89 | "%lld %ld %ld %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %s GDAT\n", |
|---|
| 90 | pg->pg_TotalRawSize, |
|---|
| 91 | pg->pg_NumSpecies, |
|---|
| 92 | pg->pg_MaxBaseLength, |
|---|
| 93 | pg->pg_NumPartitions, |
|---|
| 94 | pg->pg_MaxPartitionSize, |
|---|
| 95 | diskidxspace, |
|---|
| 96 | memusedmax, |
|---|
| 97 | disknodecount, |
|---|
| 98 | disknodespace, |
|---|
| 99 | diskleafcount, |
|---|
| 100 | diskleafspace, |
|---|
| 101 | diskouterleaves, |
|---|
| 102 | pg->pg_Bench.ts_TotalBuild, |
|---|
| 103 | pg->pg_Bench.ts_CollectDB, |
|---|
| 104 | pg->pg_Bench.ts_MergeDB, |
|---|
| 105 | pg->pg_Bench.ts_PrefixScan, |
|---|
| 106 | pg->pg_Bench.ts_MemTree, |
|---|
| 107 | pg->pg_Bench.ts_TreeStats, |
|---|
| 108 | pg->pg_Bench.ts_LongDictPre, |
|---|
| 109 | pg->pg_Bench.ts_LongDictBuild, |
|---|
| 110 | pg->pg_Bench.ts_Reloc, |
|---|
| 111 | pg->pg_Bench.ts_Writing, |
|---|
| 112 | pg->pg_DBName); |
|---|
| 113 | }; |
|---|
| 114 | /* \\\ */ |
|---|
| 115 | #endif |
|---|
| 116 | |
|---|
| 117 | /* /// "GetSequenceRelPos()" */ |
|---|
| 118 | /* |
|---|
| 119 | ULONG GetSequenceRelPos(struct PTPanGlobal *pg, STRPTR srcseq, ULONG abspos) |
|---|
| 120 | { |
|---|
| 121 | ULONG relpos = 0; |
|---|
| 122 | // given an absolute sequence position, search for the relative one, |
|---|
| 123 | // e.g. abspos 2 on "-----UU-C-C" will yield 8 |
|---|
| 124 | while(*srcseq) |
|---|
| 125 | { |
|---|
| 126 | if(pg->pg_SeqCodeValidTable[*srcseq++]) |
|---|
| 127 | { |
|---|
| 128 | if(!(abspos--)) |
|---|
| 129 | { |
|---|
| 130 | break; // position found |
|---|
| 131 | } |
|---|
| 132 | } |
|---|
| 133 | relpos++; |
|---|
| 134 | } |
|---|
| 135 | return(relpos); |
|---|
| 136 | } |
|---|
| 137 | */ |
|---|
| 138 | /* \\\ */ |
|---|
| 139 | |
|---|
| 140 | /* /// "GetSequenceAbsPos()" */ |
|---|
| 141 | /* |
|---|
| 142 | ULONG GetSequenceAbsPos(struct PTPanGlobal *pg, STRPTR srcseq, ULONG relpos) |
|---|
| 143 | { |
|---|
| 144 | ULONG abspos = 0; |
|---|
| 145 | // given an absolute sequence position, search for the relative one, |
|---|
| 146 | // e.g. relpos 8 on "-----UU-C-C" will yield 3 |
|---|
| 147 | while(*srcseq && relpos--) |
|---|
| 148 | { |
|---|
| 149 | if(pg->pg_SeqCodeValidTable[*srcseq++]) |
|---|
| 150 | { |
|---|
| 151 | abspos++; |
|---|
| 152 | } |
|---|
| 153 | } |
|---|
| 154 | return(abspos); |
|---|
| 155 | } |
|---|
| 156 | */ |
|---|
| 157 | /* \\\ */ |
|---|
| 158 | |
|---|
| 159 | /* /// "CalcLengthForFilteredSequence()" */ |
|---|
| 160 | ULONG CalcLengthForFilteredSequence(struct PTPanGlobal *pg, STRPTR srcseq) |
|---|
| 161 | { |
|---|
| 162 | ULONG len = 0; |
|---|
| 163 | /* calculate size of compressed sequence */ |
|---|
| 164 | while(*srcseq) |
|---|
| 165 | { |
|---|
| 166 | len += pg->pg_SeqCodeValidTable[*srcseq++]; |
|---|
| 167 | } |
|---|
| 168 | return(len); |
|---|
| 169 | } |
|---|
| 170 | /* \\\ */ |
|---|
| 171 | |
|---|
| 172 | /* /// "FilterSequenceTo()" */ |
|---|
| 173 | ULONG FilterSequenceTo(struct PTPanGlobal *pg, STRPTR srcstr, STRPTR filtptr) |
|---|
| 174 | { |
|---|
| 175 | ULONG len = 0; |
|---|
| 176 | UBYTE code; |
|---|
| 177 | |
|---|
| 178 | /* now actually filter the sequence */ |
|---|
| 179 | while((code = *srcstr++)) |
|---|
| 180 | { |
|---|
| 181 | if(pg->pg_SeqCodeValidTable[code]) |
|---|
| 182 | { |
|---|
| 183 | /* add sequence code */ |
|---|
| 184 | *filtptr++ = pg->pg_DecompressTable[pg->pg_CompressTable[code]]; |
|---|
| 185 | len++; |
|---|
| 186 | } |
|---|
| 187 | } |
|---|
| 188 | *filtptr = 0; |
|---|
| 189 | return(len); |
|---|
| 190 | } |
|---|
| 191 | /* \\\ */ |
|---|
| 192 | |
|---|
| 193 | /* /// "FilterSequence()" */ |
|---|
| 194 | STRPTR FilterSequence(struct PTPanGlobal *pg, STRPTR srcseq) |
|---|
| 195 | { |
|---|
| 196 | ULONG len; |
|---|
| 197 | STRPTR filtseq; |
|---|
| 198 | |
|---|
| 199 | len = CalcLengthForFilteredSequence(pg, srcseq); |
|---|
| 200 | filtseq = (STRPTR) malloc(len + 1); |
|---|
| 201 | if(!filtseq) |
|---|
| 202 | { |
|---|
| 203 | return(NULL); /* out of memory */ |
|---|
| 204 | } |
|---|
| 205 | /* now actually compress the sequence */ |
|---|
| 206 | len = FilterSequenceTo(pg, srcseq, filtseq); |
|---|
| 207 | //printf("%ld bytes used.\n", len); |
|---|
| 208 | |
|---|
| 209 | return(filtseq); |
|---|
| 210 | } |
|---|
| 211 | /* \\\ */ |
|---|
| 212 | |
|---|
| 213 | /* /// "CompressSequenceTo()" */ |
|---|
| 214 | ULONG CompressSequenceTo(struct PTPanGlobal *pg, STRPTR srcseq, ULONG *seqptr) |
|---|
| 215 | { |
|---|
| 216 | ULONG len; |
|---|
| 217 | ULONG seqcode; |
|---|
| 218 | UWORD cnt; |
|---|
| 219 | ULONG pval; |
|---|
| 220 | UBYTE code; |
|---|
| 221 | |
|---|
| 222 | /* now actually compress the sequence */ |
|---|
| 223 | len = 4; |
|---|
| 224 | cnt = 0; |
|---|
| 225 | pval = 0; |
|---|
| 226 | while((code = *srcseq++)) |
|---|
| 227 | { |
|---|
| 228 | if(pg->pg_SeqCodeValidTable[code]) |
|---|
| 229 | { |
|---|
| 230 | /* add sequence code */ |
|---|
| 231 | seqcode = pg->pg_CompressTable[code]; |
|---|
| 232 | pval *= pg->pg_AlphaSize; |
|---|
| 233 | pval += seqcode; |
|---|
| 234 | /* check, if storage capacity was reached? */ |
|---|
| 235 | if(++cnt == MAXCODEFITLONG) |
|---|
| 236 | { |
|---|
| 237 | /* write out compressed longword (with eof bit) */ |
|---|
| 238 | //printf("[%08lx]", pval | pg->pg_BitsMaskTable[cnt]); |
|---|
| 239 | *seqptr++ = (pval << pg->pg_BitsShiftTable[MAXCODEFITLONG]) | pg->pg_BitsMaskTable[MAXCODEFITLONG]; |
|---|
| 240 | len += 4; |
|---|
| 241 | cnt = 0; |
|---|
| 242 | pval = 0; |
|---|
| 243 | } |
|---|
| 244 | } |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | /* write pending bits (with eof bit) */ |
|---|
| 248 | *seqptr = (pval << pg->pg_BitsShiftTable[cnt]) | pg->pg_BitsMaskTable[cnt]; |
|---|
| 249 | //printf("[%08lx]\n", *seqptr); |
|---|
| 250 | return(len); |
|---|
| 251 | } |
|---|
| 252 | /* \\\ */ |
|---|
| 253 | |
|---|
| 254 | /* /// "CompressSequence()" */ |
|---|
| 255 | ULONG * CompressSequence(struct PTPanGlobal *pg, STRPTR srcseq) |
|---|
| 256 | { |
|---|
| 257 | ULONG len; |
|---|
| 258 | ULONG *compseq; |
|---|
| 259 | |
|---|
| 260 | len = CalcLengthForFilteredSequence(pg, srcseq); |
|---|
| 261 | //printf("compressing %s (%ld/%ld)...", srcseq, len, (ULONG) strlen(srcseq)); |
|---|
| 262 | |
|---|
| 263 | /* that's all we need: ceil(len/MAXCODEFITLONG) longwords */ |
|---|
| 264 | compseq = (ULONG *) malloc(((len / MAXCODEFITLONG) + 1) * sizeof(ULONG)); |
|---|
| 265 | if(!compseq) |
|---|
| 266 | { |
|---|
| 267 | return(NULL); /* out of memory */ |
|---|
| 268 | } |
|---|
| 269 | /* now actually compress the sequence */ |
|---|
| 270 | len = CompressSequenceTo(pg, srcseq, compseq); |
|---|
| 271 | //printf("%ld bytes used.\n", len); |
|---|
| 272 | |
|---|
| 273 | return(compseq); |
|---|
| 274 | } |
|---|
| 275 | /* \\\ */ |
|---|
| 276 | |
|---|
| 277 | /* /// "GetLengthOfCompressedSequence() */ |
|---|
| 278 | ULONG GetLengthOfCompressedSequence(struct PTPanGlobal *pg, ULONG *seqptr) |
|---|
| 279 | { |
|---|
| 280 | ULONG len = 0; |
|---|
| 281 | UWORD cnt; |
|---|
| 282 | ULONG mask = pg->pg_BitsMaskTable[MAXCODEFITLONG]; |
|---|
| 283 | do |
|---|
| 284 | { |
|---|
| 285 | if(*seqptr++ & mask) /* check, if lowest bit is set */ |
|---|
| 286 | { |
|---|
| 287 | len += MAXCODEFITLONG; |
|---|
| 288 | } else { |
|---|
| 289 | /* okay, we seem to be at the end of the compressed sequence, |
|---|
| 290 | and we need to find out the actual size */ |
|---|
| 291 | --seqptr; |
|---|
| 292 | cnt = MAXCODEFITLONG; |
|---|
| 293 | while(--cnt) |
|---|
| 294 | { |
|---|
| 295 | if(*seqptr & pg->pg_BitsMaskTable[cnt]) /* seems like we found it */ |
|---|
| 296 | { |
|---|
| 297 | len += cnt; |
|---|
| 298 | break; |
|---|
| 299 | } |
|---|
| 300 | } |
|---|
| 301 | break; |
|---|
| 302 | } |
|---|
| 303 | } while(TRUE); |
|---|
| 304 | return(len); |
|---|
| 305 | } |
|---|
| 306 | /* \\\ */ |
|---|
| 307 | |
|---|
| 308 | /* /// "GetCompressedLongSize()" */ |
|---|
| 309 | UWORD GetCompressedLongSize(struct PTPanGlobal *pg, ULONG pval) |
|---|
| 310 | { |
|---|
| 311 | UWORD cnt = MAXCODEFITLONG; |
|---|
| 312 | while(!(pval & pg->pg_BitsMaskTable[cnt])) /* check, if termination bit is set */ |
|---|
| 313 | { |
|---|
| 314 | cnt--; |
|---|
| 315 | } |
|---|
| 316 | return(cnt); |
|---|
| 317 | } |
|---|
| 318 | /* \\\ */ |
|---|
| 319 | |
|---|
| 320 | /* /// "DecompressSequenceTo() */ |
|---|
| 321 | ULONG DecompressSequenceTo(struct PTPanGlobal *pg, ULONG *seqptr, STRPTR tarseq) |
|---|
| 322 | { |
|---|
| 323 | ULONG len = 0; |
|---|
| 324 | BOOL lastlong; |
|---|
| 325 | UWORD cnt; |
|---|
| 326 | ULONG pval; |
|---|
| 327 | do |
|---|
| 328 | { |
|---|
| 329 | /* get next longword */ |
|---|
| 330 | pval = *seqptr++; |
|---|
| 331 | cnt = GetCompressedLongSize(pg, pval); |
|---|
| 332 | pval >>= pg->pg_BitsShiftTable[cnt]; |
|---|
| 333 | lastlong = (cnt < MAXCODEFITLONG); /* last longword reached? */ |
|---|
| 334 | |
|---|
| 335 | /* unpack compressed longword */ |
|---|
| 336 | if(cnt) |
|---|
| 337 | { |
|---|
| 338 | do |
|---|
| 339 | { |
|---|
| 340 | *tarseq++ = pg->pg_DecompressTable[(pval / pg->pg_PowerTable[--cnt]) |
|---|
| 341 | % pg->pg_AlphaSize]; |
|---|
| 342 | len++; |
|---|
| 343 | } while(cnt); |
|---|
| 344 | } |
|---|
| 345 | } while(!lastlong); |
|---|
| 346 | *tarseq = 0; /* null terminate string */ |
|---|
| 347 | |
|---|
| 348 | return(len); |
|---|
| 349 | } |
|---|
| 350 | /* \\\ */ |
|---|
| 351 | |
|---|
| 352 | /* /// "DecompressCompressedLongTo() */ |
|---|
| 353 | ULONG DecompressCompressedLongTo(struct PTPanGlobal *pg, ULONG pval, STRPTR tarseq) |
|---|
| 354 | { |
|---|
| 355 | ULONG len; |
|---|
| 356 | UWORD cnt; |
|---|
| 357 | |
|---|
| 358 | len = cnt = GetCompressedLongSize(pg, pval); |
|---|
| 359 | pval >>= pg->pg_BitsShiftTable[cnt]; |
|---|
| 360 | /* unpack compressed longword */ |
|---|
| 361 | do |
|---|
| 362 | { |
|---|
| 363 | *tarseq++ = pg->pg_DecompressTable[(pval / pg->pg_PowerTable[--cnt]) |
|---|
| 364 | % pg->pg_AlphaSize]; |
|---|
| 365 | } while(cnt); |
|---|
| 366 | *tarseq = 0; /* null terminate string */ |
|---|
| 367 | return(len); |
|---|
| 368 | } |
|---|
| 369 | /* \\\ */ |
|---|
| 370 | |
|---|
| 371 | /* /// "DecompressSequence()" */ |
|---|
| 372 | STRPTR DecompressSequence(struct PTPanGlobal *pg, ULONG *seqptr) |
|---|
| 373 | { |
|---|
| 374 | ULONG len; |
|---|
| 375 | STRPTR tarseq; |
|---|
| 376 | /* first get length */ |
|---|
| 377 | len = GetLengthOfCompressedSequence(pg, seqptr); |
|---|
| 378 | |
|---|
| 379 | /* allocate memory for uncompressed sequence */ |
|---|
| 380 | tarseq = (STRPTR) malloc(len + 1); |
|---|
| 381 | if(!tarseq) |
|---|
| 382 | { |
|---|
| 383 | return(NULL); /* out of memory */ |
|---|
| 384 | } |
|---|
| 385 | |
|---|
| 386 | /* decompress sequence */ |
|---|
| 387 | DecompressSequenceTo(pg, seqptr, tarseq); |
|---|
| 388 | //printf("Decompressed sequence '%s'\n", tarseq); |
|---|
| 389 | return(tarseq); |
|---|
| 390 | } |
|---|
| 391 | /* \\\ */ |
|---|
| 392 | |
|---|
| 393 | /* /// "DecompressSequencePartTo()" */ |
|---|
| 394 | LONG DecompressSequencePartTo(struct PTPanGlobal *pg, |
|---|
| 395 | ULONG *seqptr, ULONG seqpos, ULONG length, |
|---|
| 396 | STRPTR tarseq) |
|---|
| 397 | { |
|---|
| 398 | ULONG off = seqpos / MAXCODEFITLONG; |
|---|
| 399 | UWORD codeoff = seqpos % MAXCODEFITLONG; |
|---|
| 400 | UWORD cnt; |
|---|
| 401 | ULONG len = 0; |
|---|
| 402 | ULONG pval; |
|---|
| 403 | BOOL lastlong; |
|---|
| 404 | BOOL first; |
|---|
| 405 | |
|---|
| 406 | if(!length) /* empty sequence requested? */ |
|---|
| 407 | { |
|---|
| 408 | *tarseq = 0; |
|---|
| 409 | return(0); |
|---|
| 410 | } |
|---|
| 411 | |
|---|
| 412 | /* decompress sequence */ |
|---|
| 413 | first = TRUE; |
|---|
| 414 | seqptr += off; |
|---|
| 415 | do |
|---|
| 416 | { |
|---|
| 417 | /* get next longword */ |
|---|
| 418 | pval = *seqptr++; |
|---|
| 419 | cnt = GetCompressedLongSize(pg, pval); |
|---|
| 420 | pval >>= pg->pg_BitsShiftTable[cnt]; |
|---|
| 421 | lastlong = (cnt < MAXCODEFITLONG); /* last longword reached? */ |
|---|
| 422 | |
|---|
| 423 | if(first) /* do we need to start at a certain offset? */ |
|---|
| 424 | { |
|---|
| 425 | if(codeoff > cnt) /* past end of sequence? */ |
|---|
| 426 | { |
|---|
| 427 | break; |
|---|
| 428 | } |
|---|
| 429 | cnt -= codeoff; |
|---|
| 430 | first = FALSE; |
|---|
| 431 | } |
|---|
| 432 | /* unpack compressed longword */ |
|---|
| 433 | do |
|---|
| 434 | { |
|---|
| 435 | *tarseq++ = pg->pg_DecompressTable[(pval / pg->pg_PowerTable[--cnt]) |
|---|
| 436 | % pg->pg_AlphaSize]; |
|---|
| 437 | len++; |
|---|
| 438 | length--; |
|---|
| 439 | } while(cnt && length); |
|---|
| 440 | } while(length && !lastlong); |
|---|
| 441 | *tarseq = 0; /* null terminate string */ |
|---|
| 442 | |
|---|
| 443 | return(len); |
|---|
| 444 | } |
|---|
| 445 | /* \\\ */ |
|---|
| 446 | |
|---|
| 447 | |
|---|
| 448 | /* /// "GetNextCharacter()" */ |
|---|
| 449 | UBYTE GetNextCharacter(struct PTPanGlobal *pg, UBYTE* buffer, ULONG &bitpos, ULONG &count) |
|---|
| 450 | { |
|---|
| 451 | UBYTE character = 0xff; // return the next character of |
|---|
| 452 | UBYTE code; // sequence or 0xff if end flag found |
|---|
| 453 | // increase bitpos by consumed bits |
|---|
| 454 | code = ReadBits(buffer, bitpos, 3); // set count to the number of |
|---|
| 455 | bitpos += 3; // found characters |
|---|
| 456 | |
|---|
| 457 | if (code == 0x07) // end flag |
|---|
| 458 | { |
|---|
| 459 | return 0xff; |
|---|
| 460 | } else if (code <= SEQCODE_T) // valid character |
|---|
| 461 | { |
|---|
| 462 | character = pg->pg_DecompressTable[code]; |
|---|
| 463 | count = 1; |
|---|
| 464 | } else if ((code == SEQCODE_DOT) || (code == SEQCODE_HYPHEN)) // '.' or '-' |
|---|
| 465 | { // skip ... chars |
|---|
| 466 | if (code == SEQCODE_DOT) character = '.'; |
|---|
| 467 | if (code == SEQCODE_HYPHEN) character = '-'; |
|---|
| 468 | |
|---|
| 469 | code = ReadBits(buffer, bitpos, 4); |
|---|
| 470 | if ((code >> 3) == 0x01) // 1xxx skip one |
|---|
| 471 | { |
|---|
| 472 | count = 1; |
|---|
| 473 | ++bitpos; |
|---|
| 474 | } else if ((code >> 2) == 0x01) // 01xx skip two |
|---|
| 475 | { |
|---|
| 476 | count = 2; |
|---|
| 477 | bitpos += 2; |
|---|
| 478 | } else if ((code >> 1) == 0x01) // 001x skip up to 63 |
|---|
| 479 | { |
|---|
| 480 | bitpos += 3; |
|---|
| 481 | count = ReadBits(buffer, bitpos, 6); |
|---|
| 482 | bitpos += 6; |
|---|
| 483 | } else if ((code) == 0x01) // 0001 skip up to 1023 |
|---|
| 484 | { |
|---|
| 485 | bitpos += 4; |
|---|
| 486 | count = ReadBits(buffer, bitpos, 10); |
|---|
| 487 | bitpos += 10; |
|---|
| 488 | } else if ((code) == 0x00) // 0000 skip up to 8191 |
|---|
| 489 | { |
|---|
| 490 | bitpos += 4; |
|---|
| 491 | count = ReadBits(buffer, bitpos, 13); |
|---|
| 492 | bitpos += 13; |
|---|
| 493 | |
|---|
| 494 | ULONG tmpbitpos = bitpos; // test if next char is also the same. |
|---|
| 495 | ULONG tmpcount = count; |
|---|
| 496 | UBYTE tmpcode = GetNextCharacter(pg, buffer, tmpbitpos, tmpcount); |
|---|
| 497 | if (character == tmpcode) // it is -> the number of same characters |
|---|
| 498 | { // was splitted (i.e. >8191) |
|---|
| 499 | arb_assert(count == 8191); |
|---|
| 500 | bitpos = tmpbitpos; // consume bits... |
|---|
| 501 | count += tmpcount; // ...and add count |
|---|
| 502 | } |
|---|
| 503 | } else // |
|---|
| 504 | { |
|---|
| 505 | arb_assert(false); // shouldn't be possible to get to this line |
|---|
| 506 | } |
|---|
| 507 | } else // neither end-flag nor valid char |
|---|
| 508 | { // nor '.' nor '-' => something went wrong |
|---|
| 509 | arb_assert(false); |
|---|
| 510 | } |
|---|
| 511 | return character; |
|---|
| 512 | } |
|---|
| 513 | /* \\\ */ |
|---|
| 514 | |
|---|
| 515 | |
|---|
| 516 | ULONG WriteManyChars(UBYTE* buffer, ULONG bitpos, BYTE c, ULONG i) |
|---|
| 517 | { |
|---|
| 518 | arb_assert((c == SEQCODE_DOT) || (c == SEQCODE_HYPHEN)); // only '.' and '-' are allowed |
|---|
| 519 | while (i > 0) |
|---|
| 520 | { |
|---|
| 521 | bitpos = WriteBits(buffer, bitpos, c, 3); // code for character |
|---|
| 522 | if (i == 1) |
|---|
| 523 | { |
|---|
| 524 | bitpos = WriteBits(buffer, bitpos, 0x01, 1); // 1 |
|---|
| 525 | return bitpos; |
|---|
| 526 | } |
|---|
| 527 | if (i == 2) |
|---|
| 528 | { |
|---|
| 529 | bitpos = WriteBits(buffer, bitpos, 0x01, 2); // 01 |
|---|
| 530 | return bitpos; |
|---|
| 531 | } |
|---|
| 532 | if (i <= 63) |
|---|
| 533 | { |
|---|
| 534 | bitpos = WriteBits(buffer, bitpos, 0x01, 3); // 001 |
|---|
| 535 | bitpos = WriteBits(buffer, bitpos, (i & 0x3f), 6); // 6 bit payload (up to 63) |
|---|
| 536 | return bitpos; |
|---|
| 537 | } |
|---|
| 538 | if (i <= 1023) |
|---|
| 539 | { |
|---|
| 540 | bitpos = WriteBits(buffer, bitpos, 0x01, 4); // 0001 |
|---|
| 541 | bitpos = WriteBits(buffer, bitpos, (i & 0x3ff), 10); // 10 bit payload (up to 1023) |
|---|
| 542 | return bitpos; |
|---|
| 543 | } |
|---|
| 544 | if (i <= 8191) |
|---|
| 545 | { |
|---|
| 546 | bitpos = WriteBits(buffer, bitpos, 0x00, 4); // 0000 |
|---|
| 547 | bitpos = WriteBits(buffer, bitpos, (i & 0x1fff), 13); // 13 bit payload (up to 8191) |
|---|
| 548 | return bitpos; |
|---|
| 549 | } |
|---|
| 550 | bitpos = WriteBits(buffer, bitpos, 0x00, 4); // 0000 |
|---|
| 551 | bitpos = WriteBits(buffer, bitpos, 8191, 13); // 13 bit payload (exactly 8191) |
|---|
| 552 | i -= 8191; |
|---|
| 553 | } |
|---|
| 554 | return bitpos; |
|---|
| 555 | } |
|---|
| 556 | |
|---|
| 557 | |
|---|
| 558 | /* /// "CompressSequenceWithDotsAndHyphens()" */ |
|---|
| 559 | ULONG CompressSequenceWithDotsAndHyphens(struct PTPanGlobal *pg, struct PTPanSpecies *ps) |
|---|
| 560 | { |
|---|
| 561 | ULONG len = 0; // len is the count of characters inserted into ps_RawData |
|---|
| 562 | ULONG bitpos = 0; // ...ps_RawDataSize will be set to len later |
|---|
| 563 | UBYTE* ptr = (UBYTE*) ps->ps_SeqData; |
|---|
| 564 | UBYTE* buffer = (UBYTE*) malloc((ps->ps_SeqDataSize * 3 / 8) + 1); // TODO: look over it and find a good |
|---|
| 565 | if (buffer == NULL) // estimation of needed size |
|---|
| 566 | { // TODO: what is faster, precount or |
|---|
| 567 | // estimate and realloc? |
|---|
| 568 | printf("Error: Could not get enough memory to compress sequences with dots and hyphens\n"); |
|---|
| 569 | return FALSE; |
|---|
| 570 | } |
|---|
| 571 | while (*ptr) |
|---|
| 572 | { |
|---|
| 573 | arb_assert(((bitpos >> 3) + 1 < ps->ps_SeqDataSize)); |
|---|
| 574 | if (*ptr == '.') |
|---|
| 575 | { // found a '.' |
|---|
| 576 | ULONG count; |
|---|
| 577 | for (count = 0; *ptr == '.'; ++count, ++ptr) { } // count all '.' |
|---|
| 578 | #ifdef ALLOWDOTSINMATCH |
|---|
| 579 | if (count <= MAXDOTSINMATCH) |
|---|
| 580 | { |
|---|
| 581 | len += count; |
|---|
| 582 | while (count-- > 0) // write 'count' |
|---|
| 583 | { // times one '.' |
|---|
| 584 | bitpos = WriteManyChars(buffer, bitpos, SEQCODE_DOT, 1); |
|---|
| 585 | } |
|---|
| 586 | } else bitpos = WriteManyChars(buffer, bitpos, SEQCODE_DOT, count); // write all '.' |
|---|
| 587 | #else |
|---|
| 588 | bitpos = WriteManyChars(buffer, bitpos, SEQCODE_DOT, count); // write all '.' |
|---|
| 589 | #endif |
|---|
| 590 | } else if (*ptr == '-') |
|---|
| 591 | { // found a '-' |
|---|
| 592 | ULONG count; |
|---|
| 593 | for (count = 0; *ptr == '-'; ++count, ++ptr) { } // count all '-' |
|---|
| 594 | bitpos = WriteManyChars(buffer, bitpos, SEQCODE_HYPHEN, count); // write all '-' |
|---|
| 595 | } else if (pg->pg_SeqCodeValidTable[*ptr]) |
|---|
| 596 | { // found a valid character |
|---|
| 597 | UBYTE seqcode = pg->pg_CompressTable[*ptr]; |
|---|
| 598 | arb_assert(seqcode <= SEQCODE_T); |
|---|
| 599 | bitpos = WriteBits(buffer, bitpos, seqcode, 3); // write valid char |
|---|
| 600 | ++ptr; |
|---|
| 601 | ++len; |
|---|
| 602 | } else |
|---|
| 603 | { // found an unknown char |
|---|
| 604 | // printf("Found an unknown char in Species Sequence - ignoring\n"); |
|---|
| 605 | bitpos = WriteManyChars(buffer, bitpos, SEQCODE_HYPHEN, 1); // write one '-' |
|---|
| 606 | ++ptr; |
|---|
| 607 | } |
|---|
| 608 | } |
|---|
| 609 | bitpos = WriteBits(buffer, bitpos, 0x07, 3); // write end flag (111) |
|---|
| 610 | |
|---|
| 611 | ps->ps_SeqDataCompressedSize = bitpos; |
|---|
| 612 | ps->ps_SeqDataCompressed = (UBYTE*) realloc(buffer, (bitpos >> 3) + 1); |
|---|
| 613 | if (ps->ps_SeqDataCompressed == NULL) |
|---|
| 614 | { |
|---|
| 615 | printf("Error: Could not get enough memory to compress sequences with dots and hyphens\n"); |
|---|
| 616 | return -1; |
|---|
| 617 | } |
|---|
| 618 | if (ReadBits(buffer, bitpos - 3, 3) != 0x07) |
|---|
| 619 | { |
|---|
| 620 | printf("Error Compressing SeqData (with '.' and '-')\tSpecies: %s\n", ps->ps_Name); |
|---|
| 621 | return -1; |
|---|
| 622 | } |
|---|
| 623 | |
|---|
| 624 | pg->pg_TotalSeqCompressedSize += ((ps->ps_SeqDataCompressedSize >> 3) + 1); // convert from bit to byte |
|---|
| 625 | return len; |
|---|
| 626 | } |
|---|
| 627 | |
|---|
| 628 | |
|---|
| 629 | /* /// "ComplementSequence()" */ |
|---|
| 630 | void ComplementSequence(struct PTPanGlobal *pg, STRPTR seqstr) |
|---|
| 631 | { |
|---|
| 632 | UBYTE code; |
|---|
| 633 | /* flip A<->T and C<->G */ |
|---|
| 634 | while((code = *seqstr)) |
|---|
| 635 | { |
|---|
| 636 | *seqstr++ = pg->pg_DecompressTable[pg->pg_ComplementTable[pg->pg_CompressTable[code]]]; |
|---|
| 637 | } |
|---|
| 638 | } |
|---|
| 639 | /* \\\ */ |
|---|
| 640 | |
|---|
| 641 | /* /// "ReverseSequence()" */ |
|---|
| 642 | void ReverseSequence(struct PTPanGlobal *, STRPTR seqstr) |
|---|
| 643 | { |
|---|
| 644 | char code; |
|---|
| 645 | STRPTR leftptr = seqstr; |
|---|
| 646 | STRPTR rightptr = &seqstr[strlen(seqstr)]; |
|---|
| 647 | |
|---|
| 648 | /* reverse the sequence string */ |
|---|
| 649 | while(leftptr < rightptr) |
|---|
| 650 | { |
|---|
| 651 | code = *leftptr; |
|---|
| 652 | *leftptr++ = *--rightptr; |
|---|
| 653 | *rightptr = code; |
|---|
| 654 | } |
|---|
| 655 | } |
|---|
| 656 | /* \\\ */ |
|---|
| 657 | |
|---|
| 658 | /* /// "OpenDataBase()" |
|---|
| 659 | initially open the database and read the species data. |
|---|
| 660 | */ |
|---|
| 661 | BOOL OpenDataBase(struct PTPanGlobal *pg) |
|---|
| 662 | { |
|---|
| 663 | GB_set_verbose(); |
|---|
| 664 | /* open the database */ |
|---|
| 665 | if(!(pg->pg_MainDB = GB_open(pg->pg_DBName, "r"))) |
|---|
| 666 | { |
|---|
| 667 | printf("Error reading file %s\n", pg->pg_DBName); |
|---|
| 668 | return(FALSE); |
|---|
| 669 | } |
|---|
| 670 | GB_begin_transaction(pg->pg_MainDB); |
|---|
| 671 | /* open the species data */ |
|---|
| 672 | if(!(pg->pg_SpeciesData = GB_find(pg->pg_MainDB, "species_data", down_level))) |
|---|
| 673 | { |
|---|
| 674 | printf("Database %s is empty\n", pg->pg_DBName); |
|---|
| 675 | return(FALSE); |
|---|
| 676 | } |
|---|
| 677 | /* add the extended data container */ |
|---|
| 678 | pg->pg_SaiData = GBT_get_SAI_data(pg->pg_MainDB); |
|---|
| 679 | pg->pg_AlignmentName = GBT_get_default_alignment(pg->pg_MainDB); |
|---|
| 680 | |
|---|
| 681 | printf("Building PT-Server for alignment '%s'...\n", pg->pg_AlignmentName); |
|---|
| 682 | |
|---|
| 683 | GB_commit_transaction(pg->pg_MainDB); |
|---|
| 684 | |
|---|
| 685 | return(TRUE); |
|---|
| 686 | } |
|---|
| 687 | /* \\\ */ |
|---|
| 688 | |
|---|
| 689 | /* /// "LoadEcoliSequence()" */ |
|---|
| 690 | BOOL LoadEcoliSequence(struct PTPanGlobal *pg) |
|---|
| 691 | { |
|---|
| 692 | GBDATA *gb_extdata; |
|---|
| 693 | STRPTR defaultref = GBT_get_default_ref(pg->pg_MainDB); |
|---|
| 694 | |
|---|
| 695 | gb_extdata = GBT_find_SAI_rel_SAI_data(pg->pg_SaiData, defaultref); |
|---|
| 696 | free(defaultref); |
|---|
| 697 | |
|---|
| 698 | /* free memory if previously allocated */ |
|---|
| 699 | freeset(pg->pg_EcoliSeq, NULL); |
|---|
| 700 | freeset(pg->pg_EcoliBaseTable, NULL); |
|---|
| 701 | |
|---|
| 702 | /* prepare ecoli sequence */ |
|---|
| 703 | if(gb_extdata) |
|---|
| 704 | { |
|---|
| 705 | GBDATA *gb_data; |
|---|
| 706 | gb_data = GBT_read_sequence(gb_extdata, pg->pg_AlignmentName); |
|---|
| 707 | if(gb_data) |
|---|
| 708 | { |
|---|
| 709 | ULONG abspos = 0; |
|---|
| 710 | STRPTR srcseq; |
|---|
| 711 | ULONG *posptr; |
|---|
| 712 | |
|---|
| 713 | /* load sequence */ |
|---|
| 714 | pg->pg_EcoliSeqSize = GB_read_string_count(gb_data); |
|---|
| 715 | pg->pg_EcoliSeq = GB_read_string(gb_data); |
|---|
| 716 | |
|---|
| 717 | /* calculate look up table to speed up ecoli position calculation */ |
|---|
| 718 | pg->pg_EcoliBaseTable = (ULONG *) calloc(pg->pg_EcoliSeqSize + 1, sizeof(ULONG)); |
|---|
| 719 | if(pg->pg_EcoliBaseTable) |
|---|
| 720 | { |
|---|
| 721 | srcseq = pg->pg_EcoliSeq; |
|---|
| 722 | posptr = pg->pg_EcoliBaseTable; // TODO: check if this works well |
|---|
| 723 | while(*srcseq) // with ALLOWDOTSINMATCH |
|---|
| 724 | { |
|---|
| 725 | *posptr++ = abspos; |
|---|
| 726 | if(pg->pg_SeqCodeValidTable[*srcseq++]) |
|---|
| 727 | { |
|---|
| 728 | abspos++; |
|---|
| 729 | } |
|---|
| 730 | } |
|---|
| 731 | *posptr = abspos; |
|---|
| 732 | return(TRUE); |
|---|
| 733 | } else { |
|---|
| 734 | printf("Out of memory for ecoli position table!\n"); |
|---|
| 735 | } |
|---|
| 736 | } |
|---|
| 737 | } |
|---|
| 738 | return(FALSE); |
|---|
| 739 | } |
|---|
| 740 | /* \\\ */ |
|---|
| 741 | |
|---|
| 742 | /* /// "FreeAllSpecies()" */ |
|---|
| 743 | void FreeAllSpecies(struct PTPanGlobal *pg) |
|---|
| 744 | { |
|---|
| 745 | struct PTPanSpecies *ps; |
|---|
| 746 | FlushCache(pg->pg_SpeciesCache); |
|---|
| 747 | ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head; |
|---|
| 748 | while(ps->ps_Node.ln_Succ) |
|---|
| 749 | { |
|---|
| 750 | FreeCacheNode(pg->pg_SpeciesCache, ps->ps_CacheNode); |
|---|
| 751 | Remove(&ps->ps_Node); |
|---|
| 752 | free(ps->ps_Name); |
|---|
| 753 | free(ps->ps_FullName); |
|---|
| 754 | freeset(ps, (struct PTPanSpecies *) pg->pg_Species.lh_Head); |
|---|
| 755 | } |
|---|
| 756 | FreeBinTree(pg->pg_SpeciesBinTree); |
|---|
| 757 | pg->pg_SpeciesBinTree = NULL; |
|---|
| 758 | pg->pg_NumSpecies = 0; |
|---|
| 759 | pg->pg_TotalSeqSize = 0; |
|---|
| 760 | pg->pg_TotalSeqCompressedSize = 0; |
|---|
| 761 | pg->pg_TotalRawSize = 0; |
|---|
| 762 | pg->pg_TotalRawBits = 0; |
|---|
| 763 | } |
|---|
| 764 | /* \\\ */ |
|---|
| 765 | |
|---|
| 766 | /* /// "CacheSpeciesLoad()" */ |
|---|
| 767 | BOOL CacheSpeciesLoad(struct CacheHandler *, struct PTPanSpecies *ps) |
|---|
| 768 | { |
|---|
| 769 | //struct PTPanGlobal *pg = (struct PTPanGlobal *pg) ch->ch_UserData; |
|---|
| 770 | |
|---|
| 771 | if(!ps->ps_SeqData) |
|---|
| 772 | { |
|---|
| 773 | /* load alignment data */ |
|---|
| 774 | ps->ps_SeqData = GB_read_string(ps->ps_SeqDataDB); |
|---|
| 775 | return(TRUE); |
|---|
| 776 | } |
|---|
| 777 | return(FALSE); |
|---|
| 778 | } |
|---|
| 779 | /* \\\ */ |
|---|
| 780 | |
|---|
| 781 | /* /// "CacheSpeciesUnload()" */ |
|---|
| 782 | BOOL CacheSpeciesUnload(struct CacheHandler *, struct PTPanSpecies *ps) |
|---|
| 783 | { |
|---|
| 784 | //struct PTPanGlobal *pg = (struct PTPanGlobal *pg) ch->ch_UserData; |
|---|
| 785 | |
|---|
| 786 | if(ps->ps_SeqData) |
|---|
| 787 | { |
|---|
| 788 | /* load alignment data */ |
|---|
| 789 | freeset(ps->ps_SeqData, NULL); |
|---|
| 790 | return(TRUE); |
|---|
| 791 | } |
|---|
| 792 | return(FALSE); |
|---|
| 793 | } |
|---|
| 794 | /* \\\ */ |
|---|
| 795 | |
|---|
| 796 | /* /// "CacheSpeciesSize()" */ |
|---|
| 797 | ULONG CacheSpeciesSize(struct CacheHandler *, struct PTPanSpecies *ps) |
|---|
| 798 | { |
|---|
| 799 | //struct PTPanGlobal *pg = (struct PTPanGlobal *pg) ch->ch_UserData; |
|---|
| 800 | return(ps->ps_SeqDataSize); |
|---|
| 801 | } |
|---|
| 802 | /* \\\ */ |
|---|
| 803 | |
|---|
| 804 | /* /// "LoadSpecies()" */ |
|---|
| 805 | BOOL LoadSpecies(struct PTPanGlobal *pg) |
|---|
| 806 | { |
|---|
| 807 | GBDATA *gb_species; |
|---|
| 808 | struct PTPanSpecies *ps; |
|---|
| 809 | ULONG ignorecount; |
|---|
| 810 | |
|---|
| 811 | ULONG longestali = 0; |
|---|
| 812 | |
|---|
| 813 | /* NOTE: This database scan should avoided. We should store all the |
|---|
| 814 | data that's built up here in a secondary file. That way we would |
|---|
| 815 | get rid of the loading and scanning of the sequence data in low |
|---|
| 816 | memory mode */ |
|---|
| 817 | |
|---|
| 818 | /* open data base */ |
|---|
| 819 | if(!(OpenDataBase(pg))) |
|---|
| 820 | { |
|---|
| 821 | printf("Failed to open database %s!\n", pg->pg_DBName); |
|---|
| 822 | exit(1); |
|---|
| 823 | } |
|---|
| 824 | |
|---|
| 825 | GB_begin_transaction(pg->pg_MainDB); |
|---|
| 826 | |
|---|
| 827 | /* get the ecoli reference sequence */ |
|---|
| 828 | LoadEcoliSequence(pg); |
|---|
| 829 | |
|---|
| 830 | if(pg->pg_TotalRawSize) /* seems like we've already have the list */ |
|---|
| 831 | { |
|---|
| 832 | /* only load in alignment data */ |
|---|
| 833 | if(pg->pg_LowMemoryMode) |
|---|
| 834 | { |
|---|
| 835 | GB_commit_transaction(pg->pg_MainDB); |
|---|
| 836 | return(TRUE); |
|---|
| 837 | } |
|---|
| 838 | printf("Reloading alignment data...\n"); |
|---|
| 839 | ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head; |
|---|
| 840 | while(ps->ps_Node.ln_Succ) |
|---|
| 841 | { |
|---|
| 842 | ps->ps_CacheNode = CacheLoadData(pg->pg_SpeciesCache, ps->ps_CacheNode, ps); |
|---|
| 843 | ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ; |
|---|
| 844 | } |
|---|
| 845 | GB_commit_transaction(pg->pg_MainDB); |
|---|
| 846 | return(TRUE); |
|---|
| 847 | } |
|---|
| 848 | |
|---|
| 849 | FreeAllSpecies(pg); |
|---|
| 850 | |
|---|
| 851 | /* add the species to the list */ |
|---|
| 852 | pg->pg_MaxBaseLength = 0; |
|---|
| 853 | pg->pg_TotalSeqSize = 0; |
|---|
| 854 | pg->pg_TotalSeqCompressedSize = 0; |
|---|
| 855 | pg->pg_TotalRawSize = 0; |
|---|
| 856 | pg->pg_NumSpecies = 0; |
|---|
| 857 | ignorecount = 0; |
|---|
| 858 | for(gb_species = GBT_first_species_rel_species_data(pg->pg_SpeciesData); |
|---|
| 859 | gb_species; |
|---|
| 860 | gb_species = GBT_next_species(gb_species)) |
|---|
| 861 | { |
|---|
| 862 | GBDATA *gb_name; |
|---|
| 863 | GBDATA *gb_ali; |
|---|
| 864 | GBDATA *gb_data; |
|---|
| 865 | STRPTR spname; |
|---|
| 866 | |
|---|
| 867 | /* get name */ |
|---|
| 868 | gb_name = GB_find(gb_species, "name", down_level); |
|---|
| 869 | if(!gb_name) |
|---|
| 870 | { |
|---|
| 871 | ignorecount++; |
|---|
| 872 | continue; /* huh? couldn't find the name of the species? */ |
|---|
| 873 | } |
|---|
| 874 | spname = GB_read_string(gb_name); |
|---|
| 875 | |
|---|
| 876 | /* get alignments */ |
|---|
| 877 | gb_ali = GB_find(gb_species, pg->pg_AlignmentName, down_level); |
|---|
| 878 | if(!gb_ali) |
|---|
| 879 | { |
|---|
| 880 | ignorecount++; |
|---|
| 881 | free(spname); |
|---|
| 882 | continue; /* too bad, no alignment information found */ |
|---|
| 883 | } |
|---|
| 884 | gb_data = GB_find(gb_ali, "data", down_level); |
|---|
| 885 | if(!gb_data) |
|---|
| 886 | { |
|---|
| 887 | ignorecount++; |
|---|
| 888 | fprintf(stderr, "Species '%s' has no data in '%s'\n", |
|---|
| 889 | spname, pg->pg_AlignmentName); |
|---|
| 890 | free(spname); |
|---|
| 891 | continue; |
|---|
| 892 | } |
|---|
| 893 | |
|---|
| 894 | /* okay, cannot fail now anymore, allocate a PTPanSpecies structure */ |
|---|
| 895 | ps = (struct PTPanSpecies *) calloc(1, sizeof(struct PTPanSpecies)); |
|---|
| 896 | |
|---|
| 897 | /* write name and long name into the structure */ |
|---|
| 898 | ps->ps_SpeciesDB = gb_species; |
|---|
| 899 | ps->ps_SeqDataDB = gb_data; |
|---|
| 900 | ps->ps_IsGroup = TRUE; |
|---|
| 901 | ps->ps_Name = spname; |
|---|
| 902 | gb_name = GB_find(gb_species, "full_name", down_level); |
|---|
| 903 | if(gb_name) |
|---|
| 904 | { |
|---|
| 905 | ps->ps_FullName = GB_read_string(gb_name); |
|---|
| 906 | } else { |
|---|
| 907 | ps->ps_FullName = strdup(ps->ps_Name); |
|---|
| 908 | } |
|---|
| 909 | |
|---|
| 910 | /* (temporarily) load in the alignment and compress it */ |
|---|
| 911 | ps->ps_SeqDataSize = GB_read_string_count(ps->ps_SeqDataDB); |
|---|
| 912 | ps->ps_SeqData = GB_read_string(ps->ps_SeqDataDB); |
|---|
| 913 | |
|---|
| 914 | if(strlen(ps->ps_SeqData) != ps->ps_SeqDataSize) |
|---|
| 915 | { |
|---|
| 916 | printf("%s is corrupt, ignoring!\n", ps->ps_Name); |
|---|
| 917 | ignorecount++; |
|---|
| 918 | FreeCacheNode(pg->pg_SpeciesCache, ps->ps_CacheNode); |
|---|
| 919 | free(ps->ps_SeqData); |
|---|
| 920 | free(ps->ps_Name); |
|---|
| 921 | free(ps->ps_FullName); |
|---|
| 922 | free(ps); |
|---|
| 923 | continue; /* too bad, alignment was somehow corrupt */ |
|---|
| 924 | } |
|---|
| 925 | |
|---|
| 926 | #if 0 /* not required anymore */ |
|---|
| 927 | if(pg->pg_LowMemoryMode) /* free memory in low memory case */ |
|---|
| 928 | { |
|---|
| 929 | CacheUnloadData(pg->pg_SpeciesCache, ps->ps_CacheNode); |
|---|
| 930 | } |
|---|
| 931 | #endif |
|---|
| 932 | |
|---|
| 933 | ps->ps_RawDataSize = CompressSequenceWithDotsAndHyphens(pg, ps); |
|---|
| 934 | freeset(ps->ps_SeqData, NULL); |
|---|
| 935 | if (ps->ps_RawDataSize < 0) // TODO: problem, ps_RawDataSize is unsigned... |
|---|
| 936 | { |
|---|
| 937 | printf("%s is corrupt, ignoring!\n", ps->ps_Name); |
|---|
| 938 | ignorecount++; |
|---|
| 939 | FreeCacheNode(pg->pg_SpeciesCache, ps->ps_CacheNode); |
|---|
| 940 | free(ps->ps_Name); |
|---|
| 941 | free(ps->ps_FullName); |
|---|
| 942 | free(ps); |
|---|
| 943 | continue; |
|---|
| 944 | } |
|---|
| 945 | |
|---|
| 946 | /* enter global absolute offset in index */ |
|---|
| 947 | ps->ps_AbsOffset = pg->pg_TotalRawSize; |
|---|
| 948 | ps->ps_Node.ln_Pri = ps->ps_AbsOffset; |
|---|
| 949 | pg->pg_TotalSeqSize += ps->ps_SeqDataSize; |
|---|
| 950 | pg->pg_TotalRawSize += ps->ps_RawDataSize; |
|---|
| 951 | if(ps->ps_RawDataSize > pg->pg_MaxBaseLength) |
|---|
| 952 | { |
|---|
| 953 | pg->pg_MaxBaseLength = ps->ps_RawDataSize; |
|---|
| 954 | } |
|---|
| 955 | if(ps->ps_SeqDataSize > longestali) |
|---|
| 956 | { |
|---|
| 957 | longestali = ps->ps_SeqDataSize; |
|---|
| 958 | } |
|---|
| 959 | /* Init complete, now add it to the list */ |
|---|
| 960 | //printf("Added %s ('%s')...\n", ps->ps_Name, ps->ps_FullName); |
|---|
| 961 | AddTail(&pg->pg_Species, &ps->ps_Node); |
|---|
| 962 | pg->pg_NumSpecies++; |
|---|
| 963 | |
|---|
| 964 | /* visual feedback */ |
|---|
| 965 | if((pg->pg_NumSpecies % 10) == 0) |
|---|
| 966 | { |
|---|
| 967 | if(pg->pg_NumSpecies % 500) |
|---|
| 968 | { |
|---|
| 969 | printf("."); |
|---|
| 970 | fflush(stdout); |
|---|
| 971 | } else { |
|---|
| 972 | printf(".%6ld (%6lld KB)\n", pg->pg_NumSpecies, (ps->ps_AbsOffset >> 10)); |
|---|
| 973 | } |
|---|
| 974 | } |
|---|
| 975 | } |
|---|
| 976 | |
|---|
| 977 | /* calculate bits usage */ |
|---|
| 978 | pg->pg_TotalRawBits = 8; |
|---|
| 979 | while((1UL << pg->pg_TotalRawBits) < pg->pg_TotalRawSize) |
|---|
| 980 | { |
|---|
| 981 | pg->pg_TotalRawBits++; |
|---|
| 982 | } |
|---|
| 983 | |
|---|
| 984 | /* build tree to find species quicker by raw position */ |
|---|
| 985 | pg->pg_SpeciesBinTree = BuildBinTree(&pg->pg_Species); |
|---|
| 986 | |
|---|
| 987 | printf("\nLongest sequence was %ld bases (alignment size %ld).\n\n", |
|---|
| 988 | pg->pg_MaxBaseLength, longestali); |
|---|
| 989 | printf("Database contains %ld valid species (%ld ignored).\n" |
|---|
| 990 | "%lld bytes alignment data (%lld bases).\n", |
|---|
| 991 | pg->pg_NumSpecies, ignorecount, pg->pg_TotalSeqSize, pg->pg_TotalRawSize); |
|---|
| 992 | |
|---|
| 993 | printf("Compressed sequence data (with dots and hyphens): %llu byte (%llu kb, %llu mb)\n", |
|---|
| 994 | pg->pg_TotalSeqCompressedSize, pg->pg_TotalSeqCompressedSize >> 10, pg->pg_TotalSeqCompressedSize >> 20); |
|---|
| 995 | |
|---|
| 996 | |
|---|
| 997 | pg->pg_Bench.ts_CollectDB = BenchTimePassed(pg); |
|---|
| 998 | |
|---|
| 999 | /* done! */ |
|---|
| 1000 | GB_commit_transaction(pg->pg_MainDB); |
|---|
| 1001 | return(TRUE); |
|---|
| 1002 | } |
|---|
| 1003 | /* \\\ */ |
|---|
| 1004 | |
|---|
| 1005 | /* /// "LoadIndexHeader()" */ |
|---|
| 1006 | BOOL LoadIndexHeader(struct PTPanGlobal *pg) |
|---|
| 1007 | { |
|---|
| 1008 | FILE *fh; |
|---|
| 1009 | struct PTPanSpecies *ps; |
|---|
| 1010 | struct PTPanPartition *pp; |
|---|
| 1011 | ULONG numspec; |
|---|
| 1012 | ULONG ignorecount; |
|---|
| 1013 | ULONG endian = 0; |
|---|
| 1014 | UWORD version = 0; |
|---|
| 1015 | UWORD cnt; |
|---|
| 1016 | char idstr[16]; |
|---|
| 1017 | |
|---|
| 1018 | FreeAllSpecies(pg); |
|---|
| 1019 | FreeAllPartitions(pg); |
|---|
| 1020 | |
|---|
| 1021 | /* Does similar things as LoadSpecies() */ |
|---|
| 1022 | if(!(fh = fopen(pg->pg_IndexName, "r"))) |
|---|
| 1023 | { |
|---|
| 1024 | printf("ERROR: Couldn't open index %s!\n", pg->pg_IndexName); |
|---|
| 1025 | return(FALSE); |
|---|
| 1026 | } |
|---|
| 1027 | |
|---|
| 1028 | /* read id string */ |
|---|
| 1029 | fread(idstr, 16, 1, fh); |
|---|
| 1030 | if(strncmp("TUM PeTerPAN IDX", idstr, 16)) |
|---|
| 1031 | { |
|---|
| 1032 | printf("ERROR: This is no index file!\n"); |
|---|
| 1033 | fclose(fh); |
|---|
| 1034 | return(FALSE); |
|---|
| 1035 | } |
|---|
| 1036 | |
|---|
| 1037 | /* check endianness */ |
|---|
| 1038 | fread(&endian, sizeof(endian), 1, fh); |
|---|
| 1039 | if(endian != 0x01020304) |
|---|
| 1040 | { |
|---|
| 1041 | printf("ERROR: Index was created on a different endian machine (%08lx)!\n", endian); |
|---|
| 1042 | fclose(fh); |
|---|
| 1043 | return(FALSE); |
|---|
| 1044 | } |
|---|
| 1045 | |
|---|
| 1046 | /* check file structure version */ |
|---|
| 1047 | fread(&version, sizeof(version), 1, fh); |
|---|
| 1048 | if(version != FILESTRUCTVERSION) |
|---|
| 1049 | { |
|---|
| 1050 | printf("ERROR: Index (V%d.%d) does not match current file structure version (V%d.%d)!\n", |
|---|
| 1051 | version >> 8, version & 0xff, |
|---|
| 1052 | FILESTRUCTVERSION >> 8, FILESTRUCTVERSION & 0xff); |
|---|
| 1053 | fclose(fh); |
|---|
| 1054 | return(FALSE); |
|---|
| 1055 | } |
|---|
| 1056 | |
|---|
| 1057 | /* read the rest of the important data */ |
|---|
| 1058 | fread(&pg->pg_UseStdSfxTree, sizeof(pg->pg_UseStdSfxTree), 1, fh); |
|---|
| 1059 | fread(&pg->pg_AlphaSize , sizeof(pg->pg_AlphaSize) , 1, fh); |
|---|
| 1060 | fread(&pg->pg_TotalSeqSize , sizeof(pg->pg_TotalSeqSize) , 1, fh); |
|---|
| 1061 | fread(&pg->pg_TotalSeqCompressedSize, sizeof(pg->pg_TotalSeqCompressedSize) , 1, fh); |
|---|
| 1062 | fread(&pg->pg_TotalRawSize , sizeof(pg->pg_TotalRawSize) , 1, fh); |
|---|
| 1063 | fread(&pg->pg_TotalRawBits , sizeof(pg->pg_TotalRawBits) , 1, fh); |
|---|
| 1064 | fread(&pg->pg_AllHashSum , sizeof(pg->pg_AllHashSum) , 1, fh); |
|---|
| 1065 | fread(&pg->pg_NumSpecies , sizeof(pg->pg_NumSpecies) , 1, fh); |
|---|
| 1066 | fread(&pg->pg_NumPartitions, sizeof(pg->pg_NumPartitions), 1, fh); |
|---|
| 1067 | fread(&pg->pg_MaxPrefixLen , sizeof(pg->pg_MaxPrefixLen) , 1, fh); |
|---|
| 1068 | |
|---|
| 1069 | // read Ecoli Sequence |
|---|
| 1070 | /* free memory if previously allocated */ |
|---|
| 1071 | freeset(pg->pg_EcoliSeq, NULL); |
|---|
| 1072 | freeset(pg->pg_EcoliBaseTable, NULL); |
|---|
| 1073 | |
|---|
| 1074 | fread(&pg->pg_EcoliSeqSize, sizeof(pg->pg_EcoliSeqSize), 1, fh); |
|---|
| 1075 | if (pg->pg_EcoliSeqSize > 0) |
|---|
| 1076 | { // only read EcoliSeq and |
|---|
| 1077 | pg->pg_EcoliSeq = (char*) malloc(pg->pg_EcoliSeqSize + 1); // EcoliBaseTable if we |
|---|
| 1078 | if(!pg->pg_EcoliSeq) // fonud them earlier in |
|---|
| 1079 | { // the build process... |
|---|
| 1080 | printf("Out of memory allocating buffer for pg->pg_EcoliSeq!\n"); // aka if pg_EcoliSeqSize |
|---|
| 1081 | return(FALSE); // is greater than zero |
|---|
| 1082 | } |
|---|
| 1083 | fread(pg->pg_EcoliSeq, 1, pg->pg_EcoliSeqSize + 1, fh); |
|---|
| 1084 | |
|---|
| 1085 | pg->pg_EcoliBaseTable = (ULONG *) calloc(pg->pg_EcoliSeqSize + 1, sizeof(ULONG)); |
|---|
| 1086 | if(!pg->pg_EcoliBaseTable) |
|---|
| 1087 | { |
|---|
| 1088 | printf("Out of memory allocating buffer for pg->pg_EcoliBaseTable!\n"); |
|---|
| 1089 | return(FALSE); |
|---|
| 1090 | } |
|---|
| 1091 | fread(pg->pg_EcoliBaseTable, sizeof(ULONG), pg->pg_EcoliSeqSize + 1, fh); |
|---|
| 1092 | } |
|---|
| 1093 | |
|---|
| 1094 | /* fix partition loading routine for standard suffix tree */ |
|---|
| 1095 | if(pg->pg_UseStdSfxTree) |
|---|
| 1096 | { |
|---|
| 1097 | pg->pg_PartitionCache->ch_LoadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CacheStdSuffixPartitionLoad; |
|---|
| 1098 | pg->pg_PartitionCache->ch_UnloadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CacheStdSuffixPartitionUnload; |
|---|
| 1099 | } |
|---|
| 1100 | |
|---|
| 1101 | /* add the species to the list */ |
|---|
| 1102 | pg->pg_SpeciesMap = (struct PTPanSpecies **) calloc(sizeof(struct PTPanSpecies *), |
|---|
| 1103 | pg->pg_NumSpecies); |
|---|
| 1104 | ignorecount = 0; |
|---|
| 1105 | numspec = 0; |
|---|
| 1106 | while(numspec < pg->pg_NumSpecies) |
|---|
| 1107 | { |
|---|
| 1108 | STRPTR spname; |
|---|
| 1109 | STRPTR filespname; |
|---|
| 1110 | STRPTR fullname; |
|---|
| 1111 | UWORD len; |
|---|
| 1112 | BOOL obsolete; |
|---|
| 1113 | |
|---|
| 1114 | obsolete = FALSE; |
|---|
| 1115 | fullname = NULL; |
|---|
| 1116 | |
|---|
| 1117 | /* get name of species on disk */ |
|---|
| 1118 | fread(&len, sizeof(len), 1, fh); |
|---|
| 1119 | filespname = (STRPTR) calloc(len+1, 1); |
|---|
| 1120 | fread(filespname, len, 1, fh); |
|---|
| 1121 | |
|---|
| 1122 | fread(&len, sizeof(len), 1, fh); |
|---|
| 1123 | fullname = (STRPTR) calloc(len+1, 1); |
|---|
| 1124 | fread(fullname, len, 1, fh); |
|---|
| 1125 | |
|---|
| 1126 | /* okay, cannot fail now anymore, allocate a PTPanSpecies structure */ |
|---|
| 1127 | ps = (struct PTPanSpecies *) calloc(1, sizeof(struct PTPanSpecies)); |
|---|
| 1128 | pg->pg_SpeciesMap[numspec] = ps; |
|---|
| 1129 | ps->ps_Num = numspec + ignorecount; |
|---|
| 1130 | |
|---|
| 1131 | /* write name and long name into the structure */ |
|---|
| 1132 | ps->ps_SpeciesDB = NULL; |
|---|
| 1133 | ps->ps_SeqDataDB = NULL; |
|---|
| 1134 | ps->ps_IsGroup = FALSE; |
|---|
| 1135 | ps->ps_Obsolete = obsolete; |
|---|
| 1136 | ps->ps_Name = filespname; |
|---|
| 1137 | ps->ps_FullName = fullname; |
|---|
| 1138 | |
|---|
| 1139 | /* load in the alignment information */ |
|---|
| 1140 | fread(&ps->ps_SeqDataSize, sizeof(ps->ps_SeqDataSize), 1, fh); |
|---|
| 1141 | fread(&ps->ps_RawDataSize, sizeof(ps->ps_RawDataSize), 1, fh); |
|---|
| 1142 | fread(&ps->ps_AbsOffset, sizeof(ps->ps_AbsOffset), 1, fh); |
|---|
| 1143 | fread(&ps->ps_SeqHash, sizeof(ps->ps_SeqHash), 1, fh); |
|---|
| 1144 | fread(&ps->ps_SeqDataCompressedSize, sizeof(ps->ps_SeqDataCompressedSize), 1, fh); |
|---|
| 1145 | ps->ps_SeqDataCompressed = (UBYTE*) malloc((ps->ps_SeqDataCompressedSize >> 3) + 1); |
|---|
| 1146 | if(!ps->ps_SeqDataCompressed) |
|---|
| 1147 | { |
|---|
| 1148 | printf("Out of memory allocating buffer for compressed SeqData (with '.' and '-')!\n"); |
|---|
| 1149 | return(FALSE); |
|---|
| 1150 | } |
|---|
| 1151 | fread(ps->ps_SeqDataCompressed, 1, ((ps->ps_SeqDataCompressedSize >> 3) + 1), fh); |
|---|
| 1152 | ps->ps_Node.ln_Pri = ps->ps_AbsOffset; |
|---|
| 1153 | |
|---|
| 1154 | /* Init complete, now add it to the list */ |
|---|
| 1155 | //printf("Added %s ('%s')...\n", ps->ps_Name, ps->ps_FullName); |
|---|
| 1156 | AddTail(&pg->pg_Species, &ps->ps_Node); |
|---|
| 1157 | numspec++; |
|---|
| 1158 | |
|---|
| 1159 | /* visual feedback */ |
|---|
| 1160 | if((numspec % 20) == 0) |
|---|
| 1161 | { |
|---|
| 1162 | if(numspec % 1000) |
|---|
| 1163 | { |
|---|
| 1164 | printf("."); |
|---|
| 1165 | fflush(stdout); |
|---|
| 1166 | } else { |
|---|
| 1167 | printf(".%6ld (%6lld KB)\n", numspec, (ps->ps_AbsOffset>>10)); |
|---|
| 1168 | } |
|---|
| 1169 | } |
|---|
| 1170 | } |
|---|
| 1171 | |
|---|
| 1172 | if(numspec != pg->pg_NumSpecies) |
|---|
| 1173 | { |
|---|
| 1174 | printf("ERROR: Number of species has changed!\n"); |
|---|
| 1175 | fclose(fh); |
|---|
| 1176 | return(FALSE); |
|---|
| 1177 | } |
|---|
| 1178 | |
|---|
| 1179 | /* build tree to find species quicker by raw position */ |
|---|
| 1180 | pg->pg_SpeciesBinTree = BuildBinTree(&pg->pg_Species); |
|---|
| 1181 | |
|---|
| 1182 | /* build a species name hash to mark species in groups */ |
|---|
| 1183 | pg->pg_SpeciesNameHash = GBS_create_hash(SPECIESNAMEHASHSIZE, GB_IGNORE_CASE); // TODO: JB: check if GB_IGNORE_CASE |
|---|
| 1184 | // is right (was 0) |
|---|
| 1185 | ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head; |
|---|
| 1186 | while(ps->ps_Node.ln_Succ) |
|---|
| 1187 | { |
|---|
| 1188 | GBS_write_hash(pg->pg_SpeciesNameHash, ps->ps_Name, ps->ps_Num + 1); |
|---|
| 1189 | ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ; |
|---|
| 1190 | } |
|---|
| 1191 | |
|---|
| 1192 | printf("\n\nDatabase contains %ld valid species (%ld ignored).\n" |
|---|
| 1193 | "%lld bytes alignment data (%lld bases).\n", |
|---|
| 1194 | pg->pg_NumSpecies, ignorecount, pg->pg_TotalSeqSize, pg->pg_TotalRawSize); |
|---|
| 1195 | printf("Compressed sequence data (with dots and hyphens): %llu byte (%llu kb, %llu mb)\n", |
|---|
| 1196 | pg->pg_TotalSeqCompressedSize, pg->pg_TotalSeqCompressedSize >> 10, pg->pg_TotalSeqCompressedSize >> 20); |
|---|
| 1197 | |
|---|
| 1198 | printf("Number of partitions: %d\n", pg->pg_NumPartitions); |
|---|
| 1199 | |
|---|
| 1200 | for(cnt = 0; cnt < pg->pg_NumPartitions; cnt++) |
|---|
| 1201 | { |
|---|
| 1202 | ULONG pcnt; |
|---|
| 1203 | |
|---|
| 1204 | pp = (struct PTPanPartition *) calloc(1, sizeof(struct PTPanPartition)); |
|---|
| 1205 | if(!pp) |
|---|
| 1206 | { |
|---|
| 1207 | fclose(fh); |
|---|
| 1208 | return(FALSE); /* out of memory */ |
|---|
| 1209 | } |
|---|
| 1210 | pp->pp_PTPanGlobal = pg; |
|---|
| 1211 | fread(&pp->pp_ID, sizeof(pp->pp_ID), 1, fh); |
|---|
| 1212 | fread(&pp->pp_Prefix, sizeof(pp->pp_Prefix), 1, fh); |
|---|
| 1213 | fread(&pp->pp_PrefixLen, sizeof(pp->pp_PrefixLen), 1, fh); |
|---|
| 1214 | fread(&pp->pp_Size, sizeof(pp->pp_Size), 1, fh); |
|---|
| 1215 | fread(&pp->pp_RawOffset, sizeof(pp->pp_RawOffset), 1, fh); |
|---|
| 1216 | |
|---|
| 1217 | pp->pp_PartitionName = (STRPTR) calloc(strlen(pg->pg_IndexName) + 5, 1); |
|---|
| 1218 | if(pg->pg_UseStdSfxTree) |
|---|
| 1219 | { |
|---|
| 1220 | strncpy(pp->pp_PartitionName, pg->pg_IndexName, strlen(pg->pg_IndexName) - 3); |
|---|
| 1221 | sprintf(&pp->pp_PartitionName[strlen(pg->pg_IndexName) - 3], "sfx", |
|---|
| 1222 | pp->pp_ID); |
|---|
| 1223 | } else { |
|---|
| 1224 | strncpy(pp->pp_PartitionName, pg->pg_IndexName, strlen(pg->pg_IndexName) - 2); |
|---|
| 1225 | sprintf(&pp->pp_PartitionName[strlen(pg->pg_IndexName) - 2], "t%03ld", |
|---|
| 1226 | pp->pp_ID); |
|---|
| 1227 | } |
|---|
| 1228 | /* generate partition string */ |
|---|
| 1229 | pcnt = pp->pp_PrefixLen; |
|---|
| 1230 | pp->pp_PrefixSeq = (STRPTR) malloc(pcnt + 1); |
|---|
| 1231 | while(pcnt) |
|---|
| 1232 | { |
|---|
| 1233 | pp->pp_PrefixSeq[pp->pp_PrefixLen - pcnt] = pg->pg_DecompressTable[(pp->pp_Prefix / |
|---|
| 1234 | pg->pg_PowerTable[pcnt - 1]) % pg->pg_AlphaSize]; |
|---|
| 1235 | pcnt--; |
|---|
| 1236 | } |
|---|
| 1237 | pp->pp_PrefixSeq[pp->pp_PrefixLen] = 0; |
|---|
| 1238 | //printf("Part %ld (%ld = '%s')\n", pp->pp_ID, pp->pp_Prefix, pp->pp_PrefixSeq); |
|---|
| 1239 | /* partition ready, add it */ |
|---|
| 1240 | AddTail(&pg->pg_Partitions, &pp->pp_Node); |
|---|
| 1241 | } |
|---|
| 1242 | fclose(fh); |
|---|
| 1243 | |
|---|
| 1244 | /* done! */ |
|---|
| 1245 | return(TRUE); |
|---|
| 1246 | } |
|---|
| 1247 | /* \\\ */ |
|---|
| 1248 | |
|---|
| 1249 | /* /// "LoadAllPartitions()" */ |
|---|
| 1250 | BOOL LoadAllPartitions(struct PTPanGlobal *pg) |
|---|
| 1251 | { |
|---|
| 1252 | struct PTPanPartition *pp; |
|---|
| 1253 | |
|---|
| 1254 | /* load in each partition */ |
|---|
| 1255 | pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head; |
|---|
| 1256 | while(pp->pp_Node.ln_Succ) |
|---|
| 1257 | { |
|---|
| 1258 | if(!(pp->pp_CacheNode = CacheLoadData(pg->pg_PartitionCache, pp->pp_CacheNode, pp))) |
|---|
| 1259 | { |
|---|
| 1260 | return(FALSE); |
|---|
| 1261 | } |
|---|
| 1262 | pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ; |
|---|
| 1263 | } |
|---|
| 1264 | return(TRUE); |
|---|
| 1265 | } |
|---|
| 1266 | /* \\\ */ |
|---|
| 1267 | |
|---|
| 1268 | /* /// "FreeAllPartitions()" */ |
|---|
| 1269 | void FreeAllPartitions(struct PTPanGlobal *pg) |
|---|
| 1270 | { |
|---|
| 1271 | struct PTPanPartition *pp; |
|---|
| 1272 | FlushCache(pg->pg_PartitionCache); |
|---|
| 1273 | pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head; |
|---|
| 1274 | while(pp->pp_Node.ln_Succ) |
|---|
| 1275 | { |
|---|
| 1276 | FreeCacheNode(pg->pg_PartitionCache, pp->pp_CacheNode); |
|---|
| 1277 | Remove(&pp->pp_Node); |
|---|
| 1278 | free(pp->pp_PrefixSeq); |
|---|
| 1279 | FreeHuffmanTree(pp->pp_BranchTree); |
|---|
| 1280 | FreeHuffmanTree(pp->pp_ShortEdgeTree); |
|---|
| 1281 | FreeHuffmanTree(pp->pp_LongEdgeLenTree); |
|---|
| 1282 | freeset(pp, (struct PTPanPartition *) pg->pg_Partitions.lh_Head); |
|---|
| 1283 | } |
|---|
| 1284 | pg->pg_NumPartitions = 0; |
|---|
| 1285 | } |
|---|
| 1286 | /* \\\ */ |
|---|
| 1287 | |
|---|