source: branches/port5/ptpan/PTP_io.cxx

Last change on this file was 5908, checked in by westram, 16 years ago
  • source files with identical names are really a pain when using valgrind
File size: 38.3 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <sys/time.h>
4// #include <malloc.h>
5#include <memory.h>
6#include <string.h>
7#include <math.h>
8#include <PT_server.h>
9#include "ptpan.h"
10#include "pt_prototypes.h"
11#include <arbdbt.h>
12#include <BI_helix.hxx>
13
14#ifdef BENCHMARK
15/* /// "BenchTimePassed()" */
16ULONG BenchTimePassed(struct PTPanGlobal *pg)
17{
18  ULONG ms;
19
20  gettimeofday(&pg->pg_Bench.ts_Now, NULL);
21  ms = (pg->pg_Bench.ts_Now.tv_sec - pg->pg_Bench.ts_Last.tv_sec) * 1000;
22  if(pg->pg_Bench.ts_Now.tv_usec < pg->pg_Bench.ts_Last.tv_usec)
23  {
24    ms -= 1000 - (pg->pg_Bench.ts_Last.tv_usec - pg->pg_Bench.ts_Now.tv_usec) / 1000;
25  } else {
26    ms += (pg->pg_Bench.ts_Now.tv_usec - pg->pg_Bench.ts_Last.tv_usec) / 1000;
27  }
28  pg->pg_Bench.ts_Last = pg->pg_Bench.ts_Now;
29  return(ms);
30}
31/* \\\ */
32
33/* /// "BenchOutput()" */
34void BenchOutput(struct  PTPanGlobal *pg)
35{
36  struct PTPanPartition *pp;
37  ULONG diskidxspace = 0;
38  ULONG disknodecount = 0;
39  ULONG disknodespace = 0;
40  ULONG diskleafcount = 0;
41  ULONG diskleafspace = 0;
42  ULONG diskouterleaves = 0;
43  ULONG memused;
44  ULONG memusedmax = 0;
45
46  pg->pg_Bench.ts_Last = pg->pg_Bench.ts_Init;
47  pg->pg_Bench.ts_TotalBuild = BenchTimePassed(pg);
48  printf("pDAT: (id np fsize tsize bufmem used 2e 5e depth pdepth plen edges ledges dictsize node# nodespc leaf# leafcnt outl)\n");
49  pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head;
50  while(pp->pp_Node.ln_Succ)
51  {
52    diskidxspace += pp->pp_DiskIdxSpace;
53    disknodecount += pp->pp_DiskNodeCount;
54    disknodespace += pp->pp_DiskNodeSpace;
55    diskleafcount += pp->pp_DiskLeafCount;
56    diskleafspace += pp->pp_DiskLeafSpace;
57    diskouterleaves += pp->pp_DiskOuterLeaves;
58    memused = pp->pp_SfxMemorySize - (pp->pp_Sfx2EdgeOffset - pp->pp_SfxNEdgeOffset);
59    if(memused > memusedmax)
60    {
61      memusedmax = memused;
62    }
63    printf("%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %s PDAT\n",
64    pp->pp_ID,
65    pp->pp_Size,
66    pp->pp_DiskIdxSpace,
67    pp->pp_DiskTreeSize,
68    pp->pp_SfxMemorySize,
69    memused,
70    pp->pp_NumSmallNodes,
71    pp->pp_NumBigNodes,
72    pp->pp_MaxTreeDepth,
73    pp->pp_TreePruneDepth,
74    pp->pp_TreePruneLength,
75    pp->pp_EdgeCount,
76    pp->pp_LongEdgeCount,
77    pp->pp_LongDictRawSize,
78    pp->pp_DiskNodeCount,
79    pp->pp_DiskNodeSpace,
80    pp->pp_DiskLeafCount,
81    pp->pp_DiskLeafSpace,
82    pp->pp_DiskOuterLeaves,
83    pg->pg_DBName);
84
85    pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ;
86  }
87
88  printf("gDAT: (n s lb np t idxsize memusedmax node# nodespc leaf# leafcnt outl Total CollDB MergeDB PScan MemTree Stats LDPre LDBuild Reloc Disk)\n"
89    "%lld %ld %ld %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %s GDAT\n",
90    pg->pg_TotalRawSize,
91    pg->pg_NumSpecies,
92    pg->pg_MaxBaseLength,
93    pg->pg_NumPartitions,
94    pg->pg_MaxPartitionSize,
95    diskidxspace,
96    memusedmax,
97    disknodecount,
98    disknodespace,
99    diskleafcount,
100    diskleafspace,
101    diskouterleaves,
102    pg->pg_Bench.ts_TotalBuild,
103    pg->pg_Bench.ts_CollectDB,
104    pg->pg_Bench.ts_MergeDB,
105    pg->pg_Bench.ts_PrefixScan,
106    pg->pg_Bench.ts_MemTree,
107    pg->pg_Bench.ts_TreeStats,
108    pg->pg_Bench.ts_LongDictPre,
109    pg->pg_Bench.ts_LongDictBuild,
110    pg->pg_Bench.ts_Reloc,
111    pg->pg_Bench.ts_Writing,
112    pg->pg_DBName);
113};
114/* \\\ */
115#endif
116
117/* /// "GetSequenceRelPos()" */
118/*
119ULONG GetSequenceRelPos(struct PTPanGlobal *pg, STRPTR srcseq, ULONG abspos)
120{
121  ULONG relpos = 0;
122  // given an absolute sequence position, search for the relative one,
123  //   e.g. abspos 2 on "-----UU-C-C" will yield 8
124  while(*srcseq)
125  {
126    if(pg->pg_SeqCodeValidTable[*srcseq++])
127    {
128      if(!(abspos--))
129      {
130    break; // position found
131      }
132    }
133    relpos++;
134  }
135  return(relpos);
136}
137*/
138/* \\\ */
139
140/* /// "GetSequenceAbsPos()" */
141/*
142ULONG GetSequenceAbsPos(struct PTPanGlobal *pg, STRPTR srcseq, ULONG relpos)
143{
144  ULONG abspos = 0;
145  // given an absolute sequence position, search for the relative one,
146  //   e.g. relpos 8 on "-----UU-C-C" will yield 3
147  while(*srcseq && relpos--)
148  {
149    if(pg->pg_SeqCodeValidTable[*srcseq++])
150    {
151      abspos++;
152    }
153  }
154  return(abspos);
155}
156*/
157/* \\\ */
158
159/* /// "CalcLengthForFilteredSequence()" */
160ULONG CalcLengthForFilteredSequence(struct PTPanGlobal *pg, STRPTR srcseq)
161{
162  ULONG len = 0;
163  /* calculate size of compressed sequence */
164  while(*srcseq)
165  {
166    len += pg->pg_SeqCodeValidTable[*srcseq++];
167  }
168  return(len);
169}
170/* \\\ */
171
172/* /// "FilterSequenceTo()" */
173ULONG FilterSequenceTo(struct PTPanGlobal *pg, STRPTR srcstr, STRPTR filtptr)
174{
175  ULONG len = 0;
176  UBYTE code;
177
178  /* now actually filter the sequence */
179  while((code = *srcstr++))
180  {
181    if(pg->pg_SeqCodeValidTable[code])
182    {
183      /* add sequence code */
184      *filtptr++ = pg->pg_DecompressTable[pg->pg_CompressTable[code]];
185      len++;
186    }
187  }
188  *filtptr = 0;
189  return(len);
190}
191/* \\\ */
192
193/* /// "FilterSequence()" */
194STRPTR FilterSequence(struct PTPanGlobal *pg, STRPTR srcseq)
195{
196  ULONG len;
197  STRPTR filtseq;
198
199  len = CalcLengthForFilteredSequence(pg, srcseq);
200  filtseq = (STRPTR) malloc(len + 1);
201  if(!filtseq)
202  {
203    return(NULL); /* out of memory */
204  }
205  /* now actually compress the sequence */
206  len = FilterSequenceTo(pg, srcseq, filtseq);
207  //printf("%ld bytes used.\n", len);
208
209  return(filtseq);
210}
211/* \\\ */
212
213/* /// "CompressSequenceTo()" */
214ULONG CompressSequenceTo(struct PTPanGlobal *pg, STRPTR srcseq, ULONG *seqptr)
215{
216  ULONG len;
217  ULONG seqcode;
218  UWORD cnt;
219  ULONG pval;
220  UBYTE code;
221
222  /* now actually compress the sequence */
223  len = 4;
224  cnt = 0;
225  pval = 0;
226  while((code = *srcseq++))
227  {
228    if(pg->pg_SeqCodeValidTable[code])
229    {
230      /* add sequence code */
231      seqcode = pg->pg_CompressTable[code];
232      pval *= pg->pg_AlphaSize;
233      pval += seqcode;
234      /* check, if storage capacity was reached? */
235      if(++cnt == MAXCODEFITLONG)
236      {
237    /* write out compressed longword (with eof bit) */
238    //printf("[%08lx]", pval | pg->pg_BitsMaskTable[cnt]);
239    *seqptr++ = (pval << pg->pg_BitsShiftTable[MAXCODEFITLONG]) | pg->pg_BitsMaskTable[MAXCODEFITLONG];
240    len += 4;
241    cnt = 0;
242    pval = 0;
243      }
244    }
245  }
246
247  /* write pending bits (with eof bit) */
248  *seqptr = (pval << pg->pg_BitsShiftTable[cnt]) | pg->pg_BitsMaskTable[cnt];
249  //printf("[%08lx]\n", *seqptr);
250  return(len);
251}
252/* \\\ */
253
254/* /// "CompressSequence()" */
255ULONG * CompressSequence(struct PTPanGlobal *pg, STRPTR srcseq)
256{
257  ULONG len;
258  ULONG *compseq;
259
260  len = CalcLengthForFilteredSequence(pg, srcseq);
261  //printf("compressing %s (%ld/%ld)...", srcseq, len, (ULONG) strlen(srcseq));
262
263  /* that's all we need: ceil(len/MAXCODEFITLONG) longwords */
264  compseq = (ULONG *) malloc(((len / MAXCODEFITLONG) + 1) * sizeof(ULONG));
265  if(!compseq)
266  {
267    return(NULL); /* out of memory */
268  }
269  /* now actually compress the sequence */
270  len = CompressSequenceTo(pg, srcseq, compseq);
271  //printf("%ld bytes used.\n", len);
272
273  return(compseq);
274}
275/* \\\ */
276
277/* /// "GetLengthOfCompressedSequence() */
278ULONG GetLengthOfCompressedSequence(struct PTPanGlobal *pg, ULONG *seqptr)
279{
280  ULONG len = 0;
281  UWORD cnt;
282  ULONG mask = pg->pg_BitsMaskTable[MAXCODEFITLONG];
283  do
284  {
285    if(*seqptr++ & mask) /* check, if lowest bit is set */
286    {
287      len += MAXCODEFITLONG;
288    } else {
289      /* okay, we seem to be at the end of the compressed sequence,
290         and we need to find out the actual size */
291      --seqptr;
292      cnt = MAXCODEFITLONG;
293      while(--cnt)
294      {
295    if(*seqptr & pg->pg_BitsMaskTable[cnt]) /* seems like we found it */
296    {
297    len += cnt;
298    break;
299    }
300      }
301      break;
302    }
303  } while(TRUE);
304  return(len);
305}
306/* \\\ */
307
308/* /// "GetCompressedLongSize()" */
309UWORD GetCompressedLongSize(struct PTPanGlobal *pg, ULONG pval)
310{
311  UWORD cnt = MAXCODEFITLONG;
312  while(!(pval & pg->pg_BitsMaskTable[cnt])) /* check, if termination bit is set */
313  {
314    cnt--;
315  }
316  return(cnt);
317}
318/* \\\ */
319
320/* /// "DecompressSequenceTo() */
321ULONG DecompressSequenceTo(struct PTPanGlobal *pg, ULONG *seqptr, STRPTR tarseq)
322{
323  ULONG len = 0;
324  BOOL lastlong;
325  UWORD cnt;
326  ULONG pval;
327  do
328  {
329    /* get next longword */
330    pval = *seqptr++;
331    cnt = GetCompressedLongSize(pg, pval);
332    pval >>= pg->pg_BitsShiftTable[cnt];
333    lastlong = (cnt < MAXCODEFITLONG); /* last longword reached? */
334
335    /* unpack compressed longword */
336    if(cnt)
337    {
338      do
339      {
340    *tarseq++ = pg->pg_DecompressTable[(pval / pg->pg_PowerTable[--cnt])
341                % pg->pg_AlphaSize];
342    len++;
343      } while(cnt);
344    }
345  } while(!lastlong);
346  *tarseq = 0; /* null terminate string */
347
348  return(len);
349}
350/* \\\ */
351
352/* /// "DecompressCompressedLongTo() */
353ULONG DecompressCompressedLongTo(struct PTPanGlobal *pg, ULONG pval, STRPTR tarseq)
354{
355  ULONG len;
356  UWORD cnt;
357
358  len = cnt = GetCompressedLongSize(pg, pval);
359  pval >>= pg->pg_BitsShiftTable[cnt];
360  /* unpack compressed longword */
361  do
362  {
363    *tarseq++ = pg->pg_DecompressTable[(pval / pg->pg_PowerTable[--cnt])
364                           % pg->pg_AlphaSize];
365  } while(cnt);
366  *tarseq = 0; /* null terminate string */
367  return(len);
368}
369/* \\\ */
370
371/* /// "DecompressSequence()" */
372STRPTR DecompressSequence(struct PTPanGlobal *pg, ULONG *seqptr)
373{
374  ULONG len;
375  STRPTR tarseq;
376  /* first get length */
377  len = GetLengthOfCompressedSequence(pg, seqptr);
378
379  /* allocate memory for uncompressed sequence */
380  tarseq = (STRPTR) malloc(len + 1);
381  if(!tarseq)
382  {
383    return(NULL); /* out of memory */
384  }
385
386  /* decompress sequence */
387  DecompressSequenceTo(pg, seqptr, tarseq);
388  //printf("Decompressed sequence '%s'\n", tarseq);
389  return(tarseq);
390}
391/* \\\ */
392
393/* /// "DecompressSequencePartTo()" */
394LONG DecompressSequencePartTo(struct PTPanGlobal *pg,
395                ULONG *seqptr, ULONG seqpos, ULONG length,
396                STRPTR tarseq)
397{
398  ULONG off = seqpos / MAXCODEFITLONG;
399  UWORD codeoff = seqpos % MAXCODEFITLONG;
400  UWORD cnt;
401  ULONG len = 0;
402  ULONG pval;
403  BOOL lastlong;
404  BOOL first;
405
406  if(!length) /* empty sequence requested? */
407  {
408    *tarseq = 0;
409    return(0);
410  }
411
412  /* decompress sequence */
413  first = TRUE;
414  seqptr += off;
415  do
416  {
417    /* get next longword */
418    pval = *seqptr++;
419    cnt = GetCompressedLongSize(pg, pval);
420    pval >>= pg->pg_BitsShiftTable[cnt];
421    lastlong = (cnt < MAXCODEFITLONG); /* last longword reached? */
422
423    if(first) /* do we need to start at a certain offset? */
424    {
425      if(codeoff > cnt) /* past end of sequence? */
426      {
427        break;
428      }
429      cnt -= codeoff;
430      first = FALSE;
431    }
432    /* unpack compressed longword */
433    do
434    {
435      *tarseq++ = pg->pg_DecompressTable[(pval / pg->pg_PowerTable[--cnt])
436                    % pg->pg_AlphaSize];
437      len++;
438      length--;
439    } while(cnt && length);
440  } while(length && !lastlong);
441  *tarseq = 0; /* null terminate string */
442
443  return(len);
444}
445/* \\\ */
446
447
448/* /// "GetNextCharacter()" */
449UBYTE GetNextCharacter(struct PTPanGlobal *pg, UBYTE* buffer, ULONG &bitpos, ULONG &count)
450{
451    UBYTE character = 0xff;                                                 // return the next character of
452    UBYTE code;                                                             // sequence or 0xff if end flag found
453                                                                            // increase bitpos by consumed bits
454    code = ReadBits(buffer, bitpos, 3);                                     // set count to the number of
455    bitpos += 3;                                                            // found characters
456
457    if (code == 0x07)                                                       // end flag
458    {
459        return 0xff;
460    } else if (code <= SEQCODE_T)                                           // valid character
461    {
462        character = pg->pg_DecompressTable[code];
463        count     = 1;
464    } else if ((code == SEQCODE_DOT) || (code == SEQCODE_HYPHEN))           // '.' or '-'
465    {                                                                       // skip ... chars
466        if (code == SEQCODE_DOT)    character = '.';
467        if (code == SEQCODE_HYPHEN) character = '-';
468       
469        code = ReadBits(buffer, bitpos, 4);
470        if ((code >> 3) == 0x01)            // 1xxx     skip one
471        {
472            count = 1;
473            ++bitpos;
474        } else if ((code >> 2) == 0x01)     // 01xx     skip two
475        {
476            count = 2;
477            bitpos += 2;
478        } else if ((code >> 1) == 0x01)     // 001x     skip up to 63
479        {
480            bitpos += 3;
481            count   = ReadBits(buffer, bitpos, 6);   
482            bitpos += 6;
483        } else if ((code) == 0x01)          // 0001     skip up to 1023       
484        {
485            bitpos += 4;
486            count   = ReadBits(buffer, bitpos, 10);   
487            bitpos += 10;
488        } else if ((code) == 0x00)          // 0000     skip up to 8191
489        {
490            bitpos += 4;
491            count   = ReadBits(buffer, bitpos, 13);   
492            bitpos += 13;
493
494            ULONG tmpbitpos = bitpos;       // test if next char is also the same.
495            ULONG tmpcount  = count;
496            UBYTE tmpcode   = GetNextCharacter(pg, buffer, tmpbitpos, tmpcount);
497            if (character == tmpcode)       // it is -> the number of same characters
498            {                               //          was splitted (i.e. >8191)
499                arb_assert(count == 8191);
500                bitpos  = tmpbitpos;        // consume bits...
501                count  += tmpcount;         // ...and add count
502            }
503        } else                              //
504        {
505            arb_assert(false);              // shouldn't be possible to get to this line
506        }
507    } else                                  // neither end-flag nor valid char
508    {                                       // nor '.' nor '-' => something went wrong
509        arb_assert(false);
510    }
511    return character;
512}
513/* \\\ */
514
515
516ULONG WriteManyChars(UBYTE* buffer, ULONG bitpos, BYTE c, ULONG i)
517{
518    arb_assert((c == SEQCODE_DOT) || (c == SEQCODE_HYPHEN));        // only '.' and '-' are allowed
519    while (i > 0)
520    {
521        bitpos = WriteBits(buffer, bitpos, c, 3);                   // code for character
522        if (i == 1)
523        {
524            bitpos = WriteBits(buffer, bitpos, 0x01, 1);            // 1
525            return bitpos; 
526        }
527        if (i == 2)
528        {
529            bitpos = WriteBits(buffer, bitpos, 0x01, 2);            // 01
530            return bitpos;
531        }
532        if (i <= 63)
533        {
534            bitpos = WriteBits(buffer, bitpos, 0x01, 3);            // 001
535            bitpos = WriteBits(buffer, bitpos, (i & 0x3f), 6);      // 6 bit payload (up to 63)
536            return bitpos;
537        }
538        if (i <= 1023)
539        {
540            bitpos = WriteBits(buffer, bitpos, 0x01, 4);            // 0001
541            bitpos = WriteBits(buffer, bitpos, (i & 0x3ff), 10);    // 10 bit payload (up to 1023)
542            return bitpos;
543        }
544        if (i <= 8191)
545        {
546            bitpos = WriteBits(buffer, bitpos, 0x00, 4);            // 0000
547            bitpos = WriteBits(buffer, bitpos, (i & 0x1fff), 13);   // 13 bit payload (up to 8191)
548            return bitpos;
549        }
550        bitpos = WriteBits(buffer, bitpos, 0x00, 4);                // 0000
551        bitpos = WriteBits(buffer, bitpos, 8191, 13);               // 13 bit payload (exactly 8191)
552        i -= 8191;
553    }
554    return bitpos;
555}
556
557
558/* /// "CompressSequenceWithDotsAndHyphens()" */
559ULONG CompressSequenceWithDotsAndHyphens(struct PTPanGlobal *pg, struct PTPanSpecies *ps)
560{
561    ULONG len     = 0;          // len is the count of characters inserted into ps_RawData
562    ULONG bitpos  = 0;          // ...ps_RawDataSize will be set to len later
563    UBYTE* ptr    = (UBYTE*) ps->ps_SeqData;
564    UBYTE* buffer = (UBYTE*) malloc((ps->ps_SeqDataSize * 3 / 8) + 1);  // TODO: look over it and find a good
565    if (buffer == NULL)                                                 //       estimation of needed size
566    {                                                                   // TODO: what is faster, precount or
567                                                                        //       estimate and realloc?
568        printf("Error: Could not get enough memory to compress sequences with dots and hyphens\n");
569        return FALSE;
570    }
571    while (*ptr)
572    {
573        arb_assert(((bitpos >> 3) + 1 < ps->ps_SeqDataSize));
574        if (*ptr == '.')
575        {                                                                       // found a '.'
576            ULONG count;
577            for (count = 0; *ptr == '.'; ++count, ++ptr) { }                    // count all '.'
578#ifdef ALLOWDOTSINMATCH
579            if (count <= MAXDOTSINMATCH) 
580            {
581                len += count;
582                while (count-- > 0)                                             // write 'count'
583                {                                                               // times one '.'
584                    bitpos = WriteManyChars(buffer, bitpos, SEQCODE_DOT, 1);
585                }
586            } else bitpos = WriteManyChars(buffer, bitpos, SEQCODE_DOT, count); // write all '.'
587#else 
588            bitpos = WriteManyChars(buffer, bitpos, SEQCODE_DOT, count);        // write all '.'
589#endif           
590        } else if (*ptr == '-')
591        {                                                                       // found a '-'
592            ULONG count;
593            for (count = 0; *ptr == '-'; ++count, ++ptr) { }                    // count all '-'
594            bitpos = WriteManyChars(buffer, bitpos, SEQCODE_HYPHEN, count);     // write all '-'
595        } else if (pg->pg_SeqCodeValidTable[*ptr])
596        {                                                                       // found a valid character
597            UBYTE seqcode = pg->pg_CompressTable[*ptr];
598            arb_assert(seqcode <= SEQCODE_T);
599            bitpos = WriteBits(buffer, bitpos, seqcode, 3);                     // write valid char
600            ++ptr;
601            ++len;
602        } else
603        {                                                                       // found an unknown char
604//          printf("Found an unknown char in Species Sequence - ignoring\n");
605            bitpos = WriteManyChars(buffer, bitpos, SEQCODE_HYPHEN, 1);         // write one '-'
606            ++ptr;
607        }
608    }
609    bitpos = WriteBits(buffer, bitpos, 0x07, 3);                // write end flag (111)
610   
611    ps->ps_SeqDataCompressedSize = bitpos;
612    ps->ps_SeqDataCompressed     = (UBYTE*) realloc(buffer, (bitpos >> 3) + 1);
613    if (ps->ps_SeqDataCompressed == NULL)
614    {
615        printf("Error: Could not get enough memory to compress sequences with dots and hyphens\n");
616        return -1;
617    }
618    if (ReadBits(buffer, bitpos - 3, 3) != 0x07)
619    {
620        printf("Error Compressing SeqData (with '.' and '-')\tSpecies: %s\n", ps->ps_Name);
621        return -1;
622    }
623   
624    pg->pg_TotalSeqCompressedSize += ((ps->ps_SeqDataCompressedSize >> 3) + 1); // convert from bit to byte
625    return len;
626}
627
628
629/* /// "ComplementSequence()" */
630void ComplementSequence(struct PTPanGlobal *pg, STRPTR seqstr)
631{
632  UBYTE code;
633  /* flip A<->T and C<->G */
634  while((code = *seqstr))
635  {
636    *seqstr++ = pg->pg_DecompressTable[pg->pg_ComplementTable[pg->pg_CompressTable[code]]];
637  }
638}
639/* \\\ */
640
641/* /// "ReverseSequence()" */
642void ReverseSequence(struct PTPanGlobal *, STRPTR seqstr)
643{
644  char code;
645  STRPTR leftptr = seqstr;
646  STRPTR rightptr = &seqstr[strlen(seqstr)];
647
648  /* reverse the sequence string */
649  while(leftptr < rightptr)
650  {
651    code = *leftptr;
652    *leftptr++ = *--rightptr;
653    *rightptr = code;
654  }
655}
656/* \\\ */
657
658/* /// "OpenDataBase()"
659   initially open the database and read the species data.
660 */
661BOOL OpenDataBase(struct PTPanGlobal *pg)
662{
663  GB_set_verbose();
664  /* open the database */
665  if(!(pg->pg_MainDB = GB_open(pg->pg_DBName, "r")))
666  {
667    printf("Error reading file %s\n", pg->pg_DBName);
668    return(FALSE);
669  }
670  GB_begin_transaction(pg->pg_MainDB);
671  /* open the species data */
672  if(!(pg->pg_SpeciesData = GB_find(pg->pg_MainDB, "species_data", down_level)))
673  {
674    printf("Database %s is empty\n", pg->pg_DBName);
675    return(FALSE);
676  }
677  /* add the extended data container */
678  pg->pg_SaiData = GBT_get_SAI_data(pg->pg_MainDB);
679  pg->pg_AlignmentName = GBT_get_default_alignment(pg->pg_MainDB);
680
681  printf("Building PT-Server for alignment '%s'...\n", pg->pg_AlignmentName);
682
683  GB_commit_transaction(pg->pg_MainDB);
684
685  return(TRUE);
686}
687/* \\\ */
688
689/* /// "LoadEcoliSequence()" */
690BOOL LoadEcoliSequence(struct PTPanGlobal *pg)
691{
692  GBDATA *gb_extdata;
693  STRPTR defaultref = GBT_get_default_ref(pg->pg_MainDB);
694
695  gb_extdata = GBT_find_SAI_rel_SAI_data(pg->pg_SaiData, defaultref);
696  free(defaultref);
697
698  /* free memory if previously allocated */
699  freeset(pg->pg_EcoliSeq, NULL);
700  freeset(pg->pg_EcoliBaseTable, NULL);
701
702  /* prepare ecoli sequence */
703  if(gb_extdata)
704  {
705    GBDATA *gb_data;
706    gb_data = GBT_read_sequence(gb_extdata, pg->pg_AlignmentName);
707    if(gb_data)
708    {
709      ULONG abspos = 0;
710      STRPTR srcseq;
711      ULONG *posptr;
712
713      /* load sequence */
714      pg->pg_EcoliSeqSize = GB_read_string_count(gb_data);
715      pg->pg_EcoliSeq = GB_read_string(gb_data);
716
717      /* calculate look up table to speed up ecoli position calculation */
718      pg->pg_EcoliBaseTable = (ULONG *) calloc(pg->pg_EcoliSeqSize + 1, sizeof(ULONG));
719      if(pg->pg_EcoliBaseTable)
720      {
721        srcseq = pg->pg_EcoliSeq;
722        posptr = pg->pg_EcoliBaseTable;                     // TODO: check if this works well
723        while(*srcseq)                                      //       with ALLOWDOTSINMATCH
724        {
725          *posptr++ = abspos;
726          if(pg->pg_SeqCodeValidTable[*srcseq++])
727          {
728            abspos++;
729          }
730        }
731        *posptr = abspos;
732        return(TRUE);
733      } else {
734        printf("Out of memory for ecoli position table!\n");
735      }
736    }
737  }
738  return(FALSE);
739}
740/* \\\ */
741
742/* /// "FreeAllSpecies()" */
743void FreeAllSpecies(struct PTPanGlobal *pg)
744{
745  struct PTPanSpecies *ps;
746  FlushCache(pg->pg_SpeciesCache);
747  ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head;
748  while(ps->ps_Node.ln_Succ)
749  {
750    FreeCacheNode(pg->pg_SpeciesCache, ps->ps_CacheNode);
751    Remove(&ps->ps_Node);
752    free(ps->ps_Name);
753    free(ps->ps_FullName);
754    freeset(ps, (struct PTPanSpecies *) pg->pg_Species.lh_Head);
755  }
756  FreeBinTree(pg->pg_SpeciesBinTree);
757  pg->pg_SpeciesBinTree = NULL;
758  pg->pg_NumSpecies = 0;
759  pg->pg_TotalSeqSize = 0;
760  pg->pg_TotalSeqCompressedSize = 0;
761  pg->pg_TotalRawSize = 0;
762  pg->pg_TotalRawBits = 0;
763}
764/* \\\ */
765
766/* /// "CacheSpeciesLoad()" */
767BOOL CacheSpeciesLoad(struct CacheHandler *, struct PTPanSpecies *ps)
768{
769  //struct PTPanGlobal *pg = (struct PTPanGlobal *pg) ch->ch_UserData;
770
771  if(!ps->ps_SeqData)
772  {
773    /* load alignment data */
774    ps->ps_SeqData = GB_read_string(ps->ps_SeqDataDB);
775    return(TRUE);
776  }
777  return(FALSE);
778}
779/* \\\ */
780
781/* /// "CacheSpeciesUnload()" */
782BOOL CacheSpeciesUnload(struct CacheHandler *, struct PTPanSpecies *ps)
783{
784  //struct PTPanGlobal *pg = (struct PTPanGlobal *pg) ch->ch_UserData;
785
786  if(ps->ps_SeqData)
787  {
788    /* load alignment data */
789    freeset(ps->ps_SeqData, NULL);
790    return(TRUE);
791  }
792  return(FALSE);
793}
794/* \\\ */
795
796/* /// "CacheSpeciesSize()" */
797ULONG CacheSpeciesSize(struct CacheHandler *, struct PTPanSpecies *ps)
798{
799  //struct PTPanGlobal *pg = (struct PTPanGlobal *pg) ch->ch_UserData;
800  return(ps->ps_SeqDataSize);
801}
802/* \\\ */
803
804/* /// "LoadSpecies()" */
805BOOL LoadSpecies(struct PTPanGlobal *pg)
806{
807  GBDATA *gb_species;
808  struct PTPanSpecies *ps;
809  ULONG ignorecount;
810
811  ULONG longestali = 0;
812
813  /* NOTE: This database scan should avoided. We should store all the
814     data that's built up here in a secondary file. That way we would
815     get rid of the loading and scanning of the sequence data in low
816     memory mode */
817 
818  /* open data base */
819  if(!(OpenDataBase(pg)))
820  {
821    printf("Failed to open database %s!\n", pg->pg_DBName);
822    exit(1);
823  }
824
825  GB_begin_transaction(pg->pg_MainDB);
826
827  /* get the ecoli reference sequence */
828  LoadEcoliSequence(pg);
829
830  if(pg->pg_TotalRawSize) /* seems like we've already have the list */
831  {
832    /* only load in alignment data */
833    if(pg->pg_LowMemoryMode)
834    {
835      GB_commit_transaction(pg->pg_MainDB);
836      return(TRUE);
837    }
838    printf("Reloading alignment data...\n");
839    ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head;
840    while(ps->ps_Node.ln_Succ)
841    {
842      ps->ps_CacheNode = CacheLoadData(pg->pg_SpeciesCache, ps->ps_CacheNode, ps);
843      ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ;
844    }
845    GB_commit_transaction(pg->pg_MainDB);
846    return(TRUE);
847  }
848
849  FreeAllSpecies(pg);
850
851  /* add the species to the list */
852  pg->pg_MaxBaseLength = 0;
853  pg->pg_TotalSeqSize = 0;
854  pg->pg_TotalSeqCompressedSize = 0;
855  pg->pg_TotalRawSize = 0;
856  pg->pg_NumSpecies = 0;
857  ignorecount = 0;
858  for(gb_species = GBT_first_species_rel_species_data(pg->pg_SpeciesData);
859      gb_species;
860      gb_species = GBT_next_species(gb_species))
861  {
862    GBDATA *gb_name;
863    GBDATA *gb_ali;
864    GBDATA *gb_data;
865    STRPTR spname;
866
867    /* get name */
868    gb_name = GB_find(gb_species, "name", down_level);
869    if(!gb_name)
870    {
871      ignorecount++;
872      continue; /* huh? couldn't find the name of the species? */
873    }
874    spname = GB_read_string(gb_name);
875
876    /* get alignments */
877    gb_ali = GB_find(gb_species, pg->pg_AlignmentName, down_level);
878    if(!gb_ali)
879    {
880      ignorecount++;
881      free(spname);
882      continue; /* too bad, no alignment information found */
883    }
884    gb_data = GB_find(gb_ali, "data", down_level);
885    if(!gb_data)
886    {
887      ignorecount++;
888      fprintf(stderr, "Species '%s' has no data in '%s'\n",
889      spname, pg->pg_AlignmentName);
890      free(spname);
891      continue;
892    }
893
894    /* okay, cannot fail now anymore, allocate a PTPanSpecies structure */
895    ps = (struct PTPanSpecies *) calloc(1, sizeof(struct PTPanSpecies));
896
897    /* write name and long name into the structure */
898    ps->ps_SpeciesDB = gb_species;
899    ps->ps_SeqDataDB = gb_data;
900    ps->ps_IsGroup = TRUE;
901    ps->ps_Name = spname;
902    gb_name = GB_find(gb_species, "full_name", down_level);
903    if(gb_name)
904    {
905      ps->ps_FullName = GB_read_string(gb_name);
906    } else {
907      ps->ps_FullName = strdup(ps->ps_Name);
908    }
909
910    /* (temporarily) load in the alignment and compress it */
911    ps->ps_SeqDataSize = GB_read_string_count(ps->ps_SeqDataDB);
912    ps->ps_SeqData = GB_read_string(ps->ps_SeqDataDB);
913
914    if(strlen(ps->ps_SeqData) != ps->ps_SeqDataSize)
915    {
916      printf("%s is corrupt, ignoring!\n", ps->ps_Name);
917      ignorecount++;
918      FreeCacheNode(pg->pg_SpeciesCache, ps->ps_CacheNode);
919      free(ps->ps_SeqData);
920      free(ps->ps_Name);
921      free(ps->ps_FullName);
922      free(ps);
923      continue; /* too bad, alignment was somehow corrupt */
924    }
925
926#if 0 /* not required anymore */
927    if(pg->pg_LowMemoryMode) /* free memory in low memory case */
928    {
929      CacheUnloadData(pg->pg_SpeciesCache, ps->ps_CacheNode);
930    }
931#endif
932
933    ps->ps_RawDataSize = CompressSequenceWithDotsAndHyphens(pg, ps);
934    freeset(ps->ps_SeqData, NULL);
935    if (ps->ps_RawDataSize < 0)                                 // TODO: problem, ps_RawDataSize is unsigned...
936    {
937        printf("%s is corrupt, ignoring!\n", ps->ps_Name);
938        ignorecount++;
939        FreeCacheNode(pg->pg_SpeciesCache, ps->ps_CacheNode);
940        free(ps->ps_Name);
941        free(ps->ps_FullName);
942        free(ps);
943        continue;
944    }
945
946    /* enter global absolute offset in index */
947    ps->ps_AbsOffset = pg->pg_TotalRawSize;
948    ps->ps_Node.ln_Pri = ps->ps_AbsOffset;
949    pg->pg_TotalSeqSize += ps->ps_SeqDataSize;
950    pg->pg_TotalRawSize += ps->ps_RawDataSize;
951    if(ps->ps_RawDataSize > pg->pg_MaxBaseLength)
952    {
953      pg->pg_MaxBaseLength = ps->ps_RawDataSize;
954    }
955    if(ps->ps_SeqDataSize > longestali)
956    {
957      longestali = ps->ps_SeqDataSize;
958    }
959    /* Init complete, now add it to the list */
960    //printf("Added %s ('%s')...\n", ps->ps_Name, ps->ps_FullName);
961    AddTail(&pg->pg_Species, &ps->ps_Node);
962    pg->pg_NumSpecies++;
963
964    /* visual feedback */
965    if((pg->pg_NumSpecies % 10) == 0)
966    {
967      if(pg->pg_NumSpecies % 500)
968      {
969    printf(".");
970    fflush(stdout);
971      } else {
972    printf(".%6ld (%6lld KB)\n", pg->pg_NumSpecies, (ps->ps_AbsOffset >> 10));
973      }
974    }
975  }
976
977  /* calculate bits usage */
978  pg->pg_TotalRawBits = 8;
979  while((1UL << pg->pg_TotalRawBits) < pg->pg_TotalRawSize)
980  {
981    pg->pg_TotalRawBits++;
982  }
983
984  /* build tree to find species quicker by raw position */
985  pg->pg_SpeciesBinTree = BuildBinTree(&pg->pg_Species);
986
987  printf("\nLongest sequence was %ld bases (alignment size %ld).\n\n",
988    pg->pg_MaxBaseLength, longestali);
989  printf("Database contains %ld valid species (%ld ignored).\n"
990    "%lld bytes alignment data (%lld bases).\n",
991    pg->pg_NumSpecies, ignorecount, pg->pg_TotalSeqSize, pg->pg_TotalRawSize);
992
993  printf("Compressed sequence data (with dots and hyphens): %llu byte (%llu kb, %llu mb)\n",
994    pg->pg_TotalSeqCompressedSize, pg->pg_TotalSeqCompressedSize >> 10, pg->pg_TotalSeqCompressedSize >> 20);
995
996
997  pg->pg_Bench.ts_CollectDB = BenchTimePassed(pg);
998
999  /* done! */
1000  GB_commit_transaction(pg->pg_MainDB);
1001  return(TRUE);
1002}
1003/* \\\ */
1004
1005/* /// "LoadIndexHeader()" */
1006BOOL LoadIndexHeader(struct PTPanGlobal *pg)
1007{
1008  FILE *fh;
1009  struct PTPanSpecies *ps;
1010  struct PTPanPartition *pp;
1011  ULONG numspec;
1012  ULONG ignorecount;
1013  ULONG endian = 0;
1014  UWORD version = 0;
1015  UWORD cnt;
1016  char idstr[16];
1017
1018  FreeAllSpecies(pg);
1019  FreeAllPartitions(pg);
1020
1021  /* Does similar things as LoadSpecies() */
1022  if(!(fh = fopen(pg->pg_IndexName, "r")))
1023  {
1024    printf("ERROR: Couldn't open index %s!\n", pg->pg_IndexName);
1025    return(FALSE);
1026  }
1027
1028  /* read id string */
1029  fread(idstr, 16, 1, fh);
1030  if(strncmp("TUM PeTerPAN IDX", idstr, 16))
1031  {
1032    printf("ERROR: This is no index file!\n");
1033    fclose(fh);
1034    return(FALSE);
1035  }
1036
1037  /* check endianness */
1038  fread(&endian, sizeof(endian), 1, fh);
1039  if(endian != 0x01020304)
1040  {
1041    printf("ERROR: Index was created on a different endian machine (%08lx)!\n", endian);
1042    fclose(fh);
1043    return(FALSE);
1044  }
1045
1046  /* check file structure version */
1047  fread(&version, sizeof(version), 1, fh);
1048  if(version != FILESTRUCTVERSION)
1049  {
1050    printf("ERROR: Index (V%d.%d) does not match current file structure version (V%d.%d)!\n",
1051    version >> 8, version & 0xff,
1052    FILESTRUCTVERSION >> 8, FILESTRUCTVERSION & 0xff);
1053    fclose(fh);
1054    return(FALSE);
1055  }
1056
1057  /* read the rest of the important data */
1058  fread(&pg->pg_UseStdSfxTree, sizeof(pg->pg_UseStdSfxTree), 1, fh);
1059  fread(&pg->pg_AlphaSize    , sizeof(pg->pg_AlphaSize)    , 1, fh);
1060  fread(&pg->pg_TotalSeqSize , sizeof(pg->pg_TotalSeqSize) , 1, fh);
1061  fread(&pg->pg_TotalSeqCompressedSize, sizeof(pg->pg_TotalSeqCompressedSize) , 1, fh);
1062  fread(&pg->pg_TotalRawSize , sizeof(pg->pg_TotalRawSize) , 1, fh);
1063  fread(&pg->pg_TotalRawBits , sizeof(pg->pg_TotalRawBits) , 1, fh);
1064  fread(&pg->pg_AllHashSum   , sizeof(pg->pg_AllHashSum)   , 1, fh);
1065  fread(&pg->pg_NumSpecies   , sizeof(pg->pg_NumSpecies)   , 1, fh);
1066  fread(&pg->pg_NumPartitions, sizeof(pg->pg_NumPartitions), 1, fh);
1067  fread(&pg->pg_MaxPrefixLen , sizeof(pg->pg_MaxPrefixLen) , 1, fh);
1068
1069  // read Ecoli Sequence
1070  /* free memory if previously allocated */
1071  freeset(pg->pg_EcoliSeq, NULL);
1072  freeset(pg->pg_EcoliBaseTable, NULL);
1073
1074  fread(&pg->pg_EcoliSeqSize, sizeof(pg->pg_EcoliSeqSize), 1, fh);
1075  if (pg->pg_EcoliSeqSize > 0)
1076  {                                                                                 // only read EcoliSeq and
1077      pg->pg_EcoliSeq = (char*) malloc(pg->pg_EcoliSeqSize + 1);                    // EcoliBaseTable if we
1078      if(!pg->pg_EcoliSeq)                                                          // fonud them earlier in
1079      {                                                                             // the build process...
1080        printf("Out of memory allocating buffer for pg->pg_EcoliSeq!\n");           // aka if pg_EcoliSeqSize
1081        return(FALSE);                                                              // is greater than zero
1082      }
1083      fread(pg->pg_EcoliSeq, 1, pg->pg_EcoliSeqSize + 1, fh);
1084
1085      pg->pg_EcoliBaseTable = (ULONG *) calloc(pg->pg_EcoliSeqSize + 1, sizeof(ULONG));
1086      if(!pg->pg_EcoliBaseTable)
1087      {
1088        printf("Out of memory allocating buffer for pg->pg_EcoliBaseTable!\n");
1089        return(FALSE);
1090      }
1091      fread(pg->pg_EcoliBaseTable, sizeof(ULONG), pg->pg_EcoliSeqSize + 1, fh);
1092  }
1093
1094  /* fix partition loading routine for standard suffix tree */
1095  if(pg->pg_UseStdSfxTree)
1096  {
1097    pg->pg_PartitionCache->ch_LoadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CacheStdSuffixPartitionLoad;
1098    pg->pg_PartitionCache->ch_UnloadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CacheStdSuffixPartitionUnload;
1099  }
1100
1101  /* add the species to the list */
1102  pg->pg_SpeciesMap = (struct PTPanSpecies **) calloc(sizeof(struct PTPanSpecies *),
1103                        pg->pg_NumSpecies);
1104  ignorecount = 0;
1105  numspec = 0;
1106  while(numspec < pg->pg_NumSpecies)
1107  {
1108    STRPTR spname;
1109    STRPTR filespname;
1110    STRPTR fullname;
1111    UWORD len;
1112    BOOL obsolete;
1113
1114    obsolete = FALSE;
1115    fullname = NULL;
1116
1117    /* get name of species on disk */
1118    fread(&len, sizeof(len), 1, fh);
1119    filespname = (STRPTR) calloc(len+1, 1);
1120    fread(filespname, len, 1, fh);
1121   
1122    fread(&len, sizeof(len), 1, fh);
1123    fullname = (STRPTR) calloc(len+1, 1);
1124    fread(fullname, len, 1, fh);
1125
1126    /* okay, cannot fail now anymore, allocate a PTPanSpecies structure */
1127    ps = (struct PTPanSpecies *) calloc(1, sizeof(struct PTPanSpecies));
1128    pg->pg_SpeciesMap[numspec] = ps;
1129    ps->ps_Num = numspec + ignorecount;
1130
1131    /* write name and long name into the structure */
1132    ps->ps_SpeciesDB = NULL;
1133    ps->ps_SeqDataDB = NULL;
1134    ps->ps_IsGroup = FALSE;
1135    ps->ps_Obsolete = obsolete;
1136    ps->ps_Name = filespname;
1137    ps->ps_FullName = fullname;
1138   
1139    /* load in the alignment information */
1140    fread(&ps->ps_SeqDataSize, sizeof(ps->ps_SeqDataSize), 1, fh);
1141    fread(&ps->ps_RawDataSize, sizeof(ps->ps_RawDataSize), 1, fh);
1142    fread(&ps->ps_AbsOffset, sizeof(ps->ps_AbsOffset), 1, fh);
1143    fread(&ps->ps_SeqHash, sizeof(ps->ps_SeqHash), 1, fh);
1144    fread(&ps->ps_SeqDataCompressedSize, sizeof(ps->ps_SeqDataCompressedSize), 1, fh);
1145    ps->ps_SeqDataCompressed = (UBYTE*) malloc((ps->ps_SeqDataCompressedSize >> 3) + 1);
1146    if(!ps->ps_SeqDataCompressed)
1147    {
1148      printf("Out of memory allocating buffer for compressed SeqData (with '.' and '-')!\n");
1149      return(FALSE);
1150    }
1151    fread(ps->ps_SeqDataCompressed, 1, ((ps->ps_SeqDataCompressedSize >> 3) + 1), fh);
1152    ps->ps_Node.ln_Pri = ps->ps_AbsOffset;
1153
1154    /* Init complete, now add it to the list */
1155    //printf("Added %s ('%s')...\n", ps->ps_Name, ps->ps_FullName);
1156    AddTail(&pg->pg_Species, &ps->ps_Node);
1157    numspec++;
1158
1159    /* visual feedback */
1160    if((numspec % 20) == 0)
1161    {
1162      if(numspec % 1000)
1163      {
1164    printf(".");
1165    fflush(stdout);
1166      } else {
1167    printf(".%6ld (%6lld KB)\n", numspec, (ps->ps_AbsOffset>>10));
1168      }
1169    }
1170  }
1171
1172  if(numspec != pg->pg_NumSpecies)
1173  {
1174    printf("ERROR: Number of species has changed!\n");
1175    fclose(fh);
1176    return(FALSE);
1177  }
1178
1179  /* build tree to find species quicker by raw position */
1180  pg->pg_SpeciesBinTree = BuildBinTree(&pg->pg_Species);
1181
1182  /* build a species name hash to mark species in groups */
1183  pg->pg_SpeciesNameHash = GBS_create_hash(SPECIESNAMEHASHSIZE, GB_IGNORE_CASE);    // TODO: JB: check if GB_IGNORE_CASE
1184                                                                                    //           is right (was 0)
1185  ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head;
1186  while(ps->ps_Node.ln_Succ)
1187  {
1188    GBS_write_hash(pg->pg_SpeciesNameHash, ps->ps_Name, ps->ps_Num + 1);
1189    ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ;
1190  }
1191
1192  printf("\n\nDatabase contains %ld valid species (%ld ignored).\n"
1193    "%lld bytes alignment data (%lld bases).\n",
1194    pg->pg_NumSpecies, ignorecount, pg->pg_TotalSeqSize, pg->pg_TotalRawSize);
1195  printf("Compressed sequence data (with dots and hyphens): %llu byte (%llu kb, %llu mb)\n",
1196    pg->pg_TotalSeqCompressedSize, pg->pg_TotalSeqCompressedSize >> 10, pg->pg_TotalSeqCompressedSize >> 20);
1197
1198  printf("Number of partitions: %d\n", pg->pg_NumPartitions);
1199
1200  for(cnt = 0; cnt < pg->pg_NumPartitions; cnt++)
1201  {
1202    ULONG pcnt;
1203
1204    pp = (struct PTPanPartition *) calloc(1, sizeof(struct PTPanPartition));
1205    if(!pp)
1206    {
1207      fclose(fh);
1208      return(FALSE); /* out of memory */
1209    }
1210    pp->pp_PTPanGlobal = pg;
1211    fread(&pp->pp_ID, sizeof(pp->pp_ID), 1, fh);
1212    fread(&pp->pp_Prefix, sizeof(pp->pp_Prefix), 1, fh);
1213    fread(&pp->pp_PrefixLen, sizeof(pp->pp_PrefixLen), 1, fh);
1214    fread(&pp->pp_Size, sizeof(pp->pp_Size), 1, fh);
1215    fread(&pp->pp_RawOffset, sizeof(pp->pp_RawOffset), 1, fh);
1216
1217    pp->pp_PartitionName = (STRPTR) calloc(strlen(pg->pg_IndexName) + 5, 1);
1218    if(pg->pg_UseStdSfxTree)
1219    {
1220      strncpy(pp->pp_PartitionName, pg->pg_IndexName, strlen(pg->pg_IndexName) - 3);
1221      sprintf(&pp->pp_PartitionName[strlen(pg->pg_IndexName) - 3], "sfx",
1222      pp->pp_ID);
1223    } else {
1224      strncpy(pp->pp_PartitionName, pg->pg_IndexName, strlen(pg->pg_IndexName) - 2);
1225      sprintf(&pp->pp_PartitionName[strlen(pg->pg_IndexName) - 2], "t%03ld",
1226      pp->pp_ID);
1227    }
1228    /* generate partition string */
1229    pcnt = pp->pp_PrefixLen;
1230    pp->pp_PrefixSeq = (STRPTR) malloc(pcnt + 1);
1231    while(pcnt)
1232    {
1233      pp->pp_PrefixSeq[pp->pp_PrefixLen - pcnt] = pg->pg_DecompressTable[(pp->pp_Prefix /
1234                    pg->pg_PowerTable[pcnt - 1]) % pg->pg_AlphaSize];
1235      pcnt--;
1236    }
1237    pp->pp_PrefixSeq[pp->pp_PrefixLen] = 0;
1238    //printf("Part %ld (%ld = '%s')\n", pp->pp_ID, pp->pp_Prefix, pp->pp_PrefixSeq);
1239    /* partition ready, add it */
1240    AddTail(&pg->pg_Partitions, &pp->pp_Node);
1241  }
1242  fclose(fh);
1243
1244  /* done! */
1245  return(TRUE);
1246}
1247/* \\\ */
1248
1249/* /// "LoadAllPartitions()" */
1250BOOL LoadAllPartitions(struct PTPanGlobal *pg)
1251{
1252  struct PTPanPartition *pp;
1253
1254  /* load in each partition */
1255  pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head;
1256  while(pp->pp_Node.ln_Succ)
1257  {
1258    if(!(pp->pp_CacheNode = CacheLoadData(pg->pg_PartitionCache, pp->pp_CacheNode, pp)))
1259    {
1260      return(FALSE);
1261    }
1262    pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ;
1263  }
1264  return(TRUE);
1265}
1266/* \\\ */
1267
1268/* /// "FreeAllPartitions()" */
1269void FreeAllPartitions(struct PTPanGlobal *pg)
1270{
1271  struct PTPanPartition *pp;
1272  FlushCache(pg->pg_PartitionCache);
1273  pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head;
1274  while(pp->pp_Node.ln_Succ)
1275  {
1276    FreeCacheNode(pg->pg_PartitionCache, pp->pp_CacheNode);
1277    Remove(&pp->pp_Node);
1278    free(pp->pp_PrefixSeq);
1279    FreeHuffmanTree(pp->pp_BranchTree);
1280    FreeHuffmanTree(pp->pp_ShortEdgeTree);
1281    FreeHuffmanTree(pp->pp_LongEdgeLenTree);
1282    freeset(pp, (struct PTPanPartition *) pg->pg_Partitions.lh_Head);
1283  }
1284  pg->pg_NumPartitions = 0;
1285}
1286/* \\\ */
1287
Note: See TracBrowser for help on using the repository browser.