source: tags/arb_5.5/ptpan/PT_treepack.cxx

Last change on this file was 5908, checked in by westram, 15 years ago
  • source files with identical names are really a pain when using valgrind
File size: 88.9 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <unistd.h>
5#include <sys/stat.h>
6#include <PT_server.h>
7#include <PT_server_prototypes.h>
8#include "ptpan.h"
9#include "pt_prototypes.h"
10#include <sys/mman.h>
11#include <math.h>
12
13/* /// "WriteIndexHeader()" */
14BOOL WriteIndexHeader(struct PTPanGlobal *pg)
15{
16  struct PTPanSpecies *ps;
17  struct PTPanPartition *pp;
18  FILE *fh = pg->pg_IndexFile;
19  ULONG endian = 0x01020304;
20  UWORD version = FILESTRUCTVERSION;
21
22  /* write 16 bytes of ID */
23  fputs("TUM PeTerPAN IDX", fh);
24  /* write a specific code word to allow detection of correct endianess */
25  fwrite(&endian, sizeof(endian), 1, fh);
26  /* write file version */
27  fwrite(&version, sizeof(version), 1, fh);
28  /* write some global data */
29  fwrite(&pg->pg_UseStdSfxTree, sizeof(pg->pg_UseStdSfxTree), 1, fh);
30  fwrite(&pg->pg_AlphaSize    , sizeof(pg->pg_AlphaSize)    , 1, fh);
31  fwrite(&pg->pg_TotalSeqSize , sizeof(pg->pg_TotalSeqSize) , 1, fh);
32  fwrite(&pg->pg_TotalSeqCompressedSize, sizeof(pg->pg_TotalSeqCompressedSize) , 1, fh);
33  fwrite(&pg->pg_TotalRawSize , sizeof(pg->pg_TotalRawSize) , 1, fh);
34  fwrite(&pg->pg_TotalRawBits , sizeof(pg->pg_TotalRawBits) , 1, fh);
35  fwrite(&pg->pg_AllHashSum   , sizeof(pg->pg_AllHashSum)   , 1, fh);
36  fwrite(&pg->pg_NumSpecies   , sizeof(pg->pg_NumSpecies)   , 1, fh);
37  fwrite(&pg->pg_NumPartitions, sizeof(pg->pg_NumPartitions), 1, fh);
38  fwrite(&pg->pg_MaxPrefixLen , sizeof(pg->pg_MaxPrefixLen) , 1, fh);
39
40  // write Ecoli Sequence
41  fwrite(&pg->pg_EcoliSeqSize  , sizeof(pg->pg_EcoliSeqSize) , 1        , fh);
42  if (pg->pg_EcoliSeqSize > 0)
43  {                                                                                 // only write EcoliSeq and
44    fwrite(pg->pg_EcoliSeq       , 1            , pg->pg_EcoliSeqSize + 1 , fh);    // EcoliBaseTable if we
45    fwrite(pg->pg_EcoliBaseTable , sizeof(ULONG), pg->pg_EcoliSeqSize + 1 , fh);    // found them earlier...
46  } 
47
48  /* write species info */
49  ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head;
50  while(ps->ps_Node.ln_Succ)
51  {
52    /* write names */
53    UWORD len;
54    len = strlen(ps->ps_Name);
55    fwrite(&len, sizeof(len), 1, fh);
56    fputs(ps->ps_Name, fh);
57
58    len = strlen(ps->ps_FullName);
59    fwrite(&len, sizeof(len), 1, fh);
60    fputs(ps->ps_FullName, fh);
61
62    /* write some more relevant data */
63    fwrite(&ps->ps_SeqDataSize, sizeof(ps->ps_SeqDataSize), 1, fh);
64    fwrite(&ps->ps_RawDataSize, sizeof(ps->ps_RawDataSize), 1, fh);
65    fwrite(&ps->ps_AbsOffset, sizeof(ps->ps_AbsOffset), 1, fh);
66    fwrite(&ps->ps_SeqHash, sizeof(ps->ps_SeqHash), 1, fh);
67    fwrite(&ps->ps_SeqDataCompressedSize, sizeof(ps->ps_SeqDataCompressedSize), 1, fh);     // save compressed Seq Data
68    fwrite(ps->ps_SeqDataCompressed, 1, ((ps->ps_SeqDataCompressedSize >> 3) + 1), fh);     // .
69    free(ps->ps_SeqDataCompressed);                                                         // and free the memory
70    ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ;
71  }
72
73  /* write partition info */
74  pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head;
75  while(pp->pp_Node.ln_Succ)
76  {
77    fwrite(&pp->pp_ID, sizeof(pp->pp_ID), 1, fh);
78    fwrite(&pp->pp_Prefix, sizeof(pp->pp_Prefix), 1, fh);
79    fwrite(&pp->pp_PrefixLen, sizeof(pp->pp_PrefixLen), 1, fh);
80    fwrite(&pp->pp_Size, sizeof(pp->pp_Size), 1, fh);
81    fwrite(&pp->pp_RawOffset, sizeof(pp->pp_RawOffset), 1, fh);
82    pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ;
83  }
84  return(TRUE);
85}
86/* \\\ */
87
88/* /// "WriteTreeHeader()" */
89BOOL WriteTreeHeader(struct PTPanPartition *pp)
90{
91  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
92  FILE *fh = pp->pp_PartitionFile;
93
94  /* write 16 bytes of ID */
95  fputs("TUM PeTerPAN P3I", fh);
96  /* checksum to verify */
97  fwrite(&pg->pg_AllHashSum     , sizeof(pg->pg_AllHashSum)     , 1, fh);
98  /* write partition data */
99  fwrite(&pp->pp_ID             , sizeof(pp->pp_ID)             , 1, fh);
100  fwrite(&pp->pp_Prefix         , sizeof(pp->pp_Prefix)         , 1, fh);
101  fwrite(&pp->pp_PrefixLen      , sizeof(pp->pp_PrefixLen)      , 1, fh);
102  fwrite(&pp->pp_Size           , sizeof(pp->pp_Size)           , 1, fh);
103  fwrite(&pp->pp_RawOffset      , sizeof(pp->pp_RawOffset)      , 1, fh);
104  fwrite(&pp->pp_TreePruneDepth , sizeof(pp->pp_TreePruneDepth) , 1, fh);
105  fwrite(&pp->pp_TreePruneLength, sizeof(pp->pp_TreePruneLength), 1, fh);
106  fwrite(&pp->pp_LongDictSize   , sizeof(pp->pp_LongDictSize)   , 1, fh);
107  fwrite(&pp->pp_LongRelPtrBits , sizeof(pp->pp_LongRelPtrBits) , 1, fh);
108
109  /* branch code */
110  WriteHuffmanTree(pp->pp_BranchCode, 1UL << pg->pg_AlphaSize, fh);
111
112  /* short edge code */
113  WriteHuffmanTree(pp->pp_ShortEdgeCode, 1UL << (pg->pg_BitsUseTable[SHORTEDGEMAX]+1), fh);
114
115  /* long edge len code */
116  WriteHuffmanTree(pp->pp_LongEdgeLenCode, pp->pp_LongEdgeLenSize, fh);
117
118  /* long dictionary */
119  pp->pp_LongDictRawSize = ((pp->pp_LongDictSize / MAXCODEFITLONG) + 1) * sizeof(ULONG);
120  fwrite(&pp->pp_LongDictRawSize, sizeof(pp->pp_LongDictRawSize), 1, fh);
121  fwrite(pp->pp_LongDictRaw, pp->pp_LongDictRawSize, 1, fh);
122
123  /* write tree length */
124  fwrite(&pp->pp_DiskTreeSize, sizeof(pp->pp_DiskTreeSize)  , 1, fh);
125
126  return(TRUE);
127}
128/* \\\ */
129
130/* /// "CachePartitionLoad()" */
131BOOL CachePartitionLoad(struct CacheHandler *, struct PTPanPartition *pp)
132{
133  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
134  FILE *fh;
135  char idstr[16];
136  ULONG hashsum;
137  ULONG len;
138
139  if(!(fh = fopen(pp->pp_PartitionName, "r")))
140  {
141    printf("Couldn't open partition file %s\n", pp->pp_PartitionName);
142    return(FALSE);
143  }
144
145  printf("Loading partition file %s... ", pp->pp_PartitionName);
146
147  /* read id string */
148  fread(idstr, 16, 1, fh);
149  if(strncmp("TUM PeTerPAN P3I", idstr, 16))
150  {
151    printf("ERROR: This is no partition file!\n");
152    fclose(fh);
153    return(FALSE);
154  }
155
156  fread(&hashsum, sizeof(pg->pg_AllHashSum), 1, fh);
157  if(hashsum != pg->pg_AllHashSum)
158  {
159    printf("ERROR: Partition file does not match index file!\n");
160    fclose(fh);
161    return(FALSE);
162  }
163
164  /* read partition data */
165  fread(&pp->pp_ID             , sizeof(pp->pp_ID)             , 1, fh);
166  fread(&pp->pp_Prefix         , sizeof(pp->pp_Prefix)         , 1, fh);
167  fread(&pp->pp_PrefixLen      , sizeof(pp->pp_PrefixLen)      , 1, fh);
168  fread(&pp->pp_Size           , sizeof(pp->pp_Size)           , 1, fh);
169  fread(&pp->pp_RawOffset      , sizeof(pp->pp_RawOffset)      , 1, fh);
170  fread(&pp->pp_TreePruneDepth , sizeof(pp->pp_TreePruneDepth) , 1, fh);
171  fread(&pp->pp_TreePruneLength, sizeof(pp->pp_TreePruneLength), 1, fh);
172  fread(&pp->pp_LongDictSize   , sizeof(pp->pp_LongDictSize)   , 1, fh);
173  fread(&pp->pp_LongRelPtrBits , sizeof(pp->pp_LongRelPtrBits) , 1, fh);
174
175  /* read huffman tables */
176  pp->pp_BranchTree = ReadHuffmanTree(fh);
177  pp->pp_ShortEdgeTree = ReadHuffmanTree(fh);
178  pp->pp_LongEdgeLenTree = ReadHuffmanTree(fh);
179
180  /* read compressed dictionary */
181  fread(&len, sizeof(len), 1, fh);
182  pp->pp_LongDictRaw = (ULONG *) malloc(len);
183  if(pp->pp_LongDictRaw)
184  {
185    fread(pp->pp_LongDictRaw, len, 1, fh);
186
187    /* read compressed tree */
188    fread(&pp->pp_DiskTreeSize, sizeof(pp->pp_DiskTreeSize), 1, fh);
189
190    /* if we're low on memory, use virtual memory instead */
191    if(pg->pg_LowMemoryMode)
192    {
193      LONG pos;
194      pos = ftell(fh);
195      /* map file to virtual memory */
196      pp->pp_MapFileSize = pos + pp->pp_DiskTreeSize;
197      pp->pp_MapFileBuffer = (UBYTE *) mmap(0, pp->pp_MapFileSize,
198          PROT_READ, MAP_SHARED, fileno(fh), 0);
199      if(pp->pp_MapFileBuffer)
200      {
201        fclose(fh);
202           /* calculate start of buffer inside file */
203        pp->pp_DiskTree = &pp->pp_MapFileBuffer[pos];
204        printf("VMEM!\n");
205        return(TRUE);
206      } else {
207        printf("Unable to map file to ram!\n");
208      }
209    } else {
210      pp->pp_MapFileBuffer = NULL;
211      pp->pp_DiskTree = (UBYTE *) malloc(pp->pp_DiskTreeSize);
212      if(pp->pp_DiskTree)
213      {
214        fread(pp->pp_DiskTree, pp->pp_DiskTreeSize, 1, fh);
215        fclose(fh);
216        printf("DONE!\n");
217        return(TRUE);
218      } else {
219        printf("Out of memory while loading tree (%ld)!\n", pp->pp_DiskTreeSize);
220      }
221    }
222    freeset(pp->pp_LongDictRaw, NULL);
223  } else {
224    printf("Out of memory while loading long dictionary (%ld)!\n", len);
225  }
226  fclose(fh);
227  FreeHuffmanTree(pp->pp_BranchTree);
228  FreeHuffmanTree(pp->pp_ShortEdgeTree);
229  FreeHuffmanTree(pp->pp_LongEdgeLenTree);
230  pp->pp_BranchTree = NULL;
231  pp->pp_ShortEdgeTree = NULL;
232  pp->pp_LongEdgeLenTree = NULL;
233  return(FALSE);
234}
235/* \\\ */
236
237/* /// "CachePartitionUnload()" */
238void CachePartitionUnload(struct CacheHandler *, struct PTPanPartition *pp)
239{
240  /* free partition memory */
241  printf("Unloading %s\n", pp->pp_PartitionName);
242  FreeHuffmanTree(pp->pp_BranchTree);
243  FreeHuffmanTree(pp->pp_ShortEdgeTree);
244  FreeHuffmanTree(pp->pp_LongEdgeLenTree);
245  free(pp->pp_LongDictRaw);
246  if(pp->pp_MapFileBuffer)
247  {
248    munmap(pp->pp_MapFileBuffer, pp->pp_MapFileSize);
249    pp->pp_MapFileBuffer = NULL;
250    pp->pp_DiskTree = NULL;
251  } else {
252    freeset(pp->pp_DiskTree, NULL);
253  }
254  pp->pp_BranchTree = NULL;
255  pp->pp_ShortEdgeTree = NULL;
256  pp->pp_LongEdgeLenTree = NULL;
257  pp->pp_LongDictRaw = NULL;
258}
259/* \\\ */
260
261/* /// "CachePartitionSize()" */
262ULONG CachePartitionSize(struct CacheHandler *, struct PTPanPartition *pp)
263{
264  //struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
265  struct stat ppstat;
266
267  /* determine filesize */
268  ppstat.st_size = 0;
269  stat(pp->pp_PartitionName, &ppstat);
270
271  /* and return it */
272  return((ULONG) ppstat.st_size);
273}
274/* \\\ */
275
276/* /// "WriteStdSuffixTreeHeader()" */
277BOOL WriteStdSuffixTreeHeader(struct PTPanPartition *pp)
278{
279  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
280  FILE *fh = pp->pp_PartitionFile;
281
282  /* write 16 bytes of ID */
283  fputs("TUM StdSfxTree I", fh);
284  /* checksum to verify */
285  fwrite(&pg->pg_AllHashSum     , sizeof(pg->pg_AllHashSum)     , 1, fh);
286  /* write partition data */
287  fwrite(&pp->pp_ID             , sizeof(pp->pp_ID)             , 1, fh);
288  fwrite(&pp->pp_Prefix         , sizeof(pp->pp_Prefix)         , 1, fh);
289  fwrite(&pp->pp_PrefixLen      , sizeof(pp->pp_PrefixLen)      , 1, fh);
290  fwrite(&pp->pp_Size           , sizeof(pp->pp_Size)           , 1, fh);
291  fwrite(&pp->pp_RawOffset      , sizeof(pp->pp_RawOffset)      , 1, fh);
292
293  /* write tree length */
294  fwrite(&pp->pp_DiskTreeSize   , sizeof(pp->pp_DiskTreeSize)   , 1, fh);
295
296  return(TRUE);
297}
298/* \\\ */
299
300/* /// "CacheStdSuffixPartitionLoad()" */
301BOOL CacheStdSuffixPartitionLoad(struct CacheHandler *, struct PTPanPartition *pp)
302{
303  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
304  FILE *fh;
305  char idstr[16];
306  ULONG hashsum;
307  ULONG len;
308
309  if(!(fh = fopen(pp->pp_PartitionName, "r")))
310  {
311    printf("Couldn't open suffix tree file %s\n", pp->pp_PartitionName);
312    return(FALSE);
313  }
314
315  printf("Loading suffix tree file %s... ", pp->pp_PartitionName);
316
317  /* read id string */
318  fread(idstr, 16, 1, fh);
319  if(strncmp("TUM StdSfxTree I", idstr, 16))
320  {
321    printf("ERROR: This is no partition file!\n");
322    fclose(fh);
323    return(FALSE);
324  }
325
326  fread(&hashsum, sizeof(pg->pg_AllHashSum), 1, fh);
327  if(hashsum != pg->pg_AllHashSum)
328  {
329    printf("ERROR: Partition file does not match index file!\n");
330    fclose(fh);
331    return(FALSE);
332  }
333
334  /* read partition data */
335  fread(&pp->pp_ID             , sizeof(pp->pp_ID)             , 1, fh);
336  fread(&pp->pp_Prefix         , sizeof(pp->pp_Prefix)         , 1, fh);
337  fread(&pp->pp_PrefixLen      , sizeof(pp->pp_PrefixLen)      , 1, fh);
338  fread(&pp->pp_Size           , sizeof(pp->pp_Size)           , 1, fh);
339  fread(&pp->pp_RawOffset      , sizeof(pp->pp_RawOffset)      , 1, fh);
340  fread(&pp->pp_DiskTreeSize   , sizeof(pp->pp_DiskTreeSize)   , 1, fh);
341  if(pg->pg_LowMemoryMode)
342  {
343    LONG pos;
344    pos = ftell(fh);
345    /* map file to virtual memory */
346    pp->pp_MapFileSize = pos + pp->pp_DiskTreeSize;
347    pp->pp_MapFileBuffer = (UBYTE *) mmap(0, pp->pp_MapFileSize,
348                                     PROT_READ, MAP_SHARED, fileno(fh), 0);
349    if(pp->pp_MapFileBuffer)
350    {
351      fclose(fh);
352      /* calculate start of buffer inside file */
353      pp->pp_DiskTree = &pp->pp_MapFileBuffer[pos];
354      printf("VMEM!\n");
355      return(TRUE);
356    } else {
357      printf("Unable to map file to ram!\n");
358    }
359  } else {
360    pp->pp_MapFileBuffer = NULL;
361    pp->pp_DiskTree = (UBYTE *) malloc(pp->pp_DiskTreeSize);
362    if(pp->pp_DiskTree)
363    {
364      fread(pp->pp_DiskTree, pp->pp_DiskTreeSize, 1, fh);
365      fclose(fh);
366      printf("DONE!\n");
367      return(TRUE);
368    } else {
369      printf("Out of memory while loading tree (%ld)!\n", pp->pp_DiskTreeSize);
370    }
371  }
372  fclose(fh);
373  return(FALSE);
374}
375/* \\\ */
376
377/* /// "CacheStdSuffixPartitionUnload()" */
378void CacheStdSuffixPartitionUnload(struct CacheHandler *, struct PTPanPartition *pp)
379{
380  /* free partition memory */
381  printf("Unloading %s\n", pp->pp_PartitionName);
382  if(pp->pp_MapFileBuffer)
383  {
384    munmap(pp->pp_MapFileBuffer, pp->pp_MapFileSize);
385    pp->pp_MapFileBuffer = NULL;
386    pp->pp_DiskTree = NULL;
387  } else {
388    freeset(pp->pp_DiskTree, NULL);
389  }
390}
391/* \\\ */
392
393/* /// "FixRelativePointersRec()" */
394ULONG FixRelativePointersRec(struct PTPanPartition *pp, ULONG pos, ULONG level, ULONG elen)
395{
396  struct SfxNode *sfxnode;
397  ULONG childptr;
398  UWORD childidx;
399  ULONG nodesize = 0;
400
401  /* traverse children */
402  sfxnode = (struct SfxNode *) &pp->pp_SfxNodes[pos];
403  childidx = sfxnode->sn_Parent >> RELOFFSETBITS;
404
405  level++;
406  elen += sfxnode->sn_EdgeLen;
407  while(childidx--)
408  {
409    childptr = sfxnode->sn_Children[childidx];
410    if(!(childptr >> LEAFBIT))
411    {
412      /* this is a normal node pointer, recurse */
413      /* enter absolute position */
414      childptr &= RELOFFSETMASK;
415      childptr <<= 2;
416
417      /* check for maximum depth reached */
418      if((level >= pp->pp_TreePruneDepth) || (elen >= pp->pp_TreePruneLength))
419      {
420        struct SfxNode *childnode = (struct SfxNode *) &pp->pp_SfxNodes[childptr];
421//printf("L%08lx ", childptr);
422nodesize = CalcPackedLeafSize(pp, childptr);
423/* add leaf node to traversal path */
424               childnode->sn_Parent &= ~RELOFFSETMASK;
425               childnode->sn_Parent |= pp->pp_TraverseTreeRoot;
426               pp->pp_TraverseTreeRoot = childptr >> 2;
427               childnode->sn_AlphaMask = 0; /* indicate stop-leaf */
428      } else {
429        nodesize = FixRelativePointersRec(pp, childptr, level, elen);
430      }
431      pp->pp_DiskTreeSize += nodesize;
432      sfxnode->sn_Children[childidx] = (sfxnode->sn_Children[childidx] & ~RELOFFSETMASK) |
433              pp->pp_DiskTreeSize;
434    }
435  }
436  /* now convert absolute pointers to relative ones */
437  childidx = sfxnode->sn_Parent >> RELOFFSETBITS;
438  while(childidx--)
439  {
440    childptr = sfxnode->sn_Children[childidx];
441    if(!(childptr >> LEAFBIT))
442    {
443      /* fix relative pointer */
444      childptr &= RELOFFSETMASK;
445      childptr = pp->pp_DiskTreeSize - childptr;
446      sfxnode->sn_Children[childidx] = (sfxnode->sn_Children[childidx] & ~RELOFFSETMASK) | childptr;
447    }
448  }
449  nodesize = CalcPackedNodeSize(pp, pos);
450
451  //printf("N%08lx ", pos);
452  /* fix travel route, as we lost our children pointers */
453  sfxnode->sn_Parent &= ~RELOFFSETMASK;
454  sfxnode->sn_Parent |= pp->pp_TraverseTreeRoot;
455  pp->pp_TraverseTreeRoot = pos >> 2;
456  return(nodesize);
457}
458/* \\\ */
459
460/* /// "ULONGCompare()" */
461/* compare function for offset sorting */
462LONG ULONGCompare(const ULONG *node1, const ULONG *node2)
463{
464  return((LONG) *node1 - (LONG) *node2);
465}
466/* \\\ */
467
468/* /// "CalcPackedNodeSize()" */
469ULONG CalcPackedNodeSize(struct PTPanPartition *pp, ULONG pos)
470{
471  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
472  struct SfxNode *sfxnode;
473  ULONG childptr;
474  UWORD childidx;
475  ULONG bitscnt = 0;
476  ULONG val;
477  ULONG cnt;
478  ULONG newbase;
479
480#if 0 // debugging
481  bitscnt = 8;
482#endif
483
484  sfxnode = (struct SfxNode *) &pp->pp_SfxNodes[pos];
485
486  /* huffman code for short / long edge */
487  if(sfxnode->sn_StartPos & (1UL << 31))
488  {
489    /* long edge */
490    /*printf("LE[%02d+%02d+%02d] ",
491          pp->pp_ShortEdgeCode[0].hc_CodeLength,
492          pp->pp_LongEdgeLenCode[sfxnode->sn_EdgeLen].hc_CodeLength,
493          pp->pp_LongRelPtrBits);*/
494#if 0 // debug
495    if(sfxnode->sn_EdgeLen >= pp->pp_LongEdgeLenSize)
496    {
497      printf("LongEdgeLen: code %d out of range!\n", sfxnode->sn_EdgeLen);
498    }
499    if(!pp->pp_LongEdgeLenCode[sfxnode->sn_EdgeLen].hc_CodeLength)
500    {
501      printf("LongEdgeLen: no code for %d!\n", sfxnode->sn_EdgeLen);
502    }
503#endif
504    bitscnt += pp->pp_ShortEdgeCode[0].hc_CodeLength;
505    bitscnt += pp->pp_LongEdgeLenCode[sfxnode->sn_EdgeLen].hc_CodeLength;
506    bitscnt += pp->pp_LongRelPtrBits;
507  }
508  else if(sfxnode->sn_StartPos & (1UL << 30))
509  {
510    /* short edge */
511    /*printf("SE[      %02d] ",
512      pp->pp_ShortEdgeCode[sfxnode->sn_StartPos & RELOFFSETMASK].hc_CodeLength);*/
513#if 0 // debug
514    if((sfxnode->sn_StartPos & RELOFFSETMASK) >= (1UL << (pg->pg_BitsUseTable[SHORTEDGEMAX]+1)))
515    {
516      printf("ShortEdge: code %ld out of range!\n", sfxnode->sn_StartPos & RELOFFSETMASK);
517    }
518    if(!pp->pp_ShortEdgeCode[sfxnode->sn_StartPos & RELOFFSETMASK].hc_CodeLength)
519    {
520      printf("ShortEdge: no code for %ld!\n", sfxnode->sn_StartPos & RELOFFSETMASK);
521    }
522#endif
523    bitscnt += pp->pp_ShortEdgeCode[sfxnode->sn_StartPos & RELOFFSETMASK].hc_CodeLength;
524  } else {
525    if(pos == 0)
526    {
527      /* special root handling */
528      bitscnt += pp->pp_ShortEdgeCode[1].hc_CodeLength;
529    } else {
530      printf("Arrrrgghhh?!?\n");
531      exit(1);
532    }
533  }
534
535  /* branch code */
536  //printf("BC[%02d] ", pp->pp_BranchCode[sfxnode->sn_AlphaMask].hc_CodeLength);
537#if 0 // debug
538  if(sfxnode->sn_AlphaMask >= (1UL << pg->pg_AlphaSize))
539  {
540    printf("Branch: code %d out of range!\n", sfxnode->sn_AlphaMask);
541  }
542  if(!pp->pp_BranchCode[sfxnode->sn_AlphaMask].hc_CodeLength)
543  {
544    printf("Branch: no code for %d!\n", sfxnode->sn_AlphaMask);
545  }
546#endif
547  bitscnt += pp->pp_BranchCode[sfxnode->sn_AlphaMask].hc_CodeLength;
548
549  /* child pointers */
550  /* msb = {0}   -> rel is next
551     msb = {10}  ->  6 bit rel node ptr
552     msb = {1100} -> 11 bit rel node ptr
553     msb = {1101} -> 27 bit rel node ptr (large enough for a jump over 128 MB)
554     msb = {111} -> nn bit abs leaf ptr (nn = pg->pg_TotalRawBits)
555  */
556  cnt = sfxnode->sn_Parent >> RELOFFSETBITS;
557#if 0 /* debug */
558  if(cnt > pg->pg_AlphaSize)
559  {
560    printf("too many children %ld!\n", cnt);
561  }
562#endif
563  childidx = 0;
564  newbase = 0;
565  while(childidx < cnt)
566  {
567    childptr = sfxnode->sn_Children[childidx];
568    if(childptr >> LEAFBIT)
569    {
570      /* this is a leaf pointer and doesn't contain a seqcode */
571      //printf("LF[%02d] ", 2+pg->pg_TotalRawBits);
572      bitscnt += 3+pg->pg_TotalRawBits;
573    } else {
574      /* this is a normal node pointer, recurse */
575      val = childptr & RELOFFSETMASK;
576      val -= newbase;
577      newbase += val;
578      //printf("Dist: %6ld\n", val);
579      if(val == 0)
580      {
581        /* next */
582        bitscnt++;
583      } else {
584        if(val < (1UL << 6))
585        {
586          //printf("NP[8] ");
587          bitscnt += 2+6;
588        } else {
589          if(val < (1UL << 11))
590          {
591            //printf("NP[15] ");
592            bitscnt += 4+11;
593          } else {
594            //printf("NP[31] ");
595            bitscnt += 4+27;
596          }
597        }
598      }
599    }
600    childidx++;
601  }
602  //printf("P[%08lx] %ld\n", pos, bitscnt);
603  return((bitscnt + 7) >> 3);
604}
605/* \\\ */
606
607/* /// "CalcPackedLeafSize()" */
608ULONG CalcPackedLeafSize(struct PTPanPartition *pp, ULONG pos)
609{
610  struct SfxNode *sfxnode;
611  ULONG leafcnt;
612  ULONG cnt;
613  LONG oldval;
614  LONG val;
615  ULONG leafsize = 1;
616  ULONG sdleafsize = 1;
617
618#if 0
619  static BOOL example = TRUE;
620#endif
621
622  sfxnode = (struct SfxNode *) &pp->pp_SfxNodes[pos];
623
624  /* how many leaves do we have? */
625  leafcnt = GetTreeStatsLeafCountRec(pp, pos);
626  pp->pp_DiskOuterLeaves += leafcnt;
627  //printf("%ld leaves\n", leafcnt);
628  /* enlarge memory, if too small */
629  if(pp->pp_LeafBufferSize < leafcnt)
630  {
631    pp->pp_LeafBuffer = (LONG *) realloc(pp->pp_LeafBuffer, leafcnt * sizeof(LONG));
632    pp->pp_LeafBufferSize = leafcnt;
633  }
634  pp->pp_LeafBufferPtr = pp->pp_LeafBuffer;
635  /* collect leafs */
636  GetTreeStatsLeafCollectRec(pp, pos);
637
638  if(leafcnt > 1)
639  {
640    qsort(pp->pp_LeafBuffer, leafcnt, sizeof(ULONG),
641          (int (*)(const void *, const void *)) ULONGCompare);
642  }
643
644#if 0 /* debug */
645  if((leafcnt > 20) && example)
646  {
647    printf("Original:\n");
648    for(cnt = 0; cnt < leafcnt; cnt++)
649    {
650      printf("%ld %ld\n", cnt, pp->pp_LeafBuffer[cnt]);
651    }
652  }
653#endif
654
655  /* do delta compression */
656  oldval = pp->pp_LeafBuffer[0];
657  for(cnt = 1; cnt < leafcnt; cnt++)
658  {
659    pp->pp_LeafBuffer[cnt] -= oldval;
660    oldval += pp->pp_LeafBuffer[cnt];
661    //printf("%ld\n", pp->pp_LeafBuffer[cnt]);
662  }
663#if 0 /* debug */
664  if((leafcnt > 20) && example)
665  {
666    printf("Delta 1:\n");
667    for(cnt = 0; cnt < leafcnt; cnt++)
668    {
669      printf("%ld %ld\n", cnt, pp->pp_LeafBuffer[cnt]);
670    }
671  }
672#endif
673
674#ifdef DOUBLEDELTALEAVES
675#ifdef DOUBLEDELTAOPTIONAL
676  for(cnt = 0; cnt < leafcnt; cnt++)
677  {
678    val = pp->pp_LeafBuffer[cnt];
679    if((val < -63) || (val > 63))
680    {
681      if((val < -8192) || (val > 8191))
682      {
683        if((val < -(1L << 20)) || (val > ((1L << 20) - 1)))
684        {
685          if((val < -(1L << 27)) || (val > ((1L << 27) - 1)))
686          {
687            /* five bytes */
688            sdleafsize += 5;
689          } else {
690            /* four bytes */
691            sdleafsize += 4;
692          }
693        } else {
694          /* three bytes */
695          sdleafsize += 3;
696        }
697      } else {
698        /* two bytes */
699        sdleafsize += 2;
700      }
701    } else {
702      /* one byte */
703      sdleafsize++;
704    }
705  }
706  *pp->pp_LeafBuffer = -(*pp->pp_LeafBuffer);
707  (*pp->pp_LeafBuffer)--;
708#endif
709  /* do delta compression a second time */
710  if(leafcnt > 2)
711  {
712    oldval = pp->pp_LeafBuffer[1];
713    for(cnt = 2; cnt < leafcnt; cnt++)
714    {
715      pp->pp_LeafBuffer[cnt] -= oldval;
716      oldval += pp->pp_LeafBuffer[cnt];
717      //printf("%ld\n", pp->pp_LeafBuffer[cnt]);
718    }
719  }
720#if 0 /* debug */
721  if((leafcnt > 20) && example)
722  {
723    printf("Delta 2:\n");
724    for(cnt = 0; cnt < leafcnt; cnt++)
725    {
726      printf("%ld %ld\n", cnt, pp->pp_LeafBuffer[cnt]);
727    }
728    example = FALSE;
729  }
730#endif
731
732#endif
733  /* compression:
734       msb == {1}   -> 1 byte offset (-   63 -     63)
735       msb == {01}  -> 2 byte offset (- 8192 -   8191)
736       msb == {001} -> 3 byte offset (- 2^20 - 2^20-1)
737       msb == {000} -> 4 byte offset (- 2^28 - 2^28-1)
738     special opcodes:
739       0xff      -> end of array
740     This means the upper limit is currently 1 GB for raw sequence data
741     (this can be changed though: Just introduce another opcode)
742  */
743
744  for(cnt = 0; cnt < leafcnt; cnt++)
745  {
746    val = pp->pp_LeafBuffer[cnt];
747    if((val < -63) || (val > 63))
748    {
749      if((val < -8192) || (val > 8191))
750      {
751        if((val < -(1L << 20)) || (val > ((1L << 20) - 1)))
752        {
753          if((val < -(1L << 27)) || (val > ((1L << 27) - 1)))
754          {
755            /* five bytes */
756            leafsize += 5;
757          } else {
758            /* four bytes */
759            leafsize += 4;
760          }
761        } else {
762          /* three bytes */
763          leafsize += 3;
764        }
765      } else {
766        /* two bytes */
767        leafsize += 2;
768      }
769    } else {
770      /* one byte */
771      leafsize++;
772    }
773  }
774#ifdef DOUBLEDELTAOPTIONAL
775  return((leafsize < sdleafsize) ? leafsize : sdleafsize);
776#else
777  return(leafsize);
778#endif
779}
780/* \\\ */
781
782/* /// "DebugTreeNode()" */
783void DebugTreeNode(struct TreeNode *tn)
784{
785  struct PTPanPartition *pp = tn->tn_PTPanPartition;
786  ULONG cnt;
787
788  printf("*** NODE 0x%08lx\n"
789         "Pos: %ld [%ld bytes] (Parent: %ld [0x%08lx])\n"
790         "Level: %d (%d bases into tree)\n"
791         "Edge: %d (%s)\n"
792         "Children: ",
793         (ULONG) tn,
794         tn->tn_Pos, tn->tn_Size, tn->tn_ParentPos, (ULONG) tn->tn_Parent,
795         tn->tn_Level, tn->tn_TreeOffset,
796         tn->tn_EdgeLen, tn->tn_Edge);
797  for(cnt = 0; cnt < pp->pp_PTPanGlobal->pg_AlphaSize; cnt++)
798  {
799    printf("[0x%08lx] ", tn->tn_Children[cnt]);
800  }
801  printf("\nLeaves: %ld\n   ", tn->tn_NumLeaves);
802  for(cnt = 0; cnt < tn->tn_NumLeaves; cnt++)
803  {
804    printf("<%ld> ", tn->tn_Leaves[cnt]);
805  }
806  printf("\n");
807}
808/* \\\ */
809
810/* /// "GetTreePath()" */
811void GetTreePath(struct TreeNode *tn, STRPTR strptr, ULONG len)
812{
813  strptr[len] = 0; /* NULL termination of the string */
814  if(!len)
815  {
816    return;
817  }
818  /* if the edge is too short, fill with question marks */
819  while(len > tn->tn_TreeOffset)
820  {
821    len--;
822    strptr[len] = '?';
823  }
824  while(len)
825  {
826    while(len + tn->tn_EdgeLen > tn->tn_TreeOffset)
827    {
828      len--;
829      strptr[len] = tn->tn_Edge[len + tn->tn_EdgeLen - tn->tn_TreeOffset];
830    }
831    tn = tn->tn_Parent;
832  }
833}
834/* \\\ */
835
836/* /// "GoDownStdSuffixNodeChild()" */
837struct TreeNode * GoDownStdSuffixNodeChild(struct TreeNode *oldtn, UWORD seqcode)
838{
839  struct PTPanPartition *pp = oldtn->tn_PTPanPartition;
840  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
841  struct TreeNode *tn;
842  struct StdSfxNodeOnDisk *ssn;
843  ULONG childoff;
844
845  if(!oldtn->tn_Children[seqcode])
846  {
847    return(NULL);
848  }
849  ssn = &((struct StdSfxNodeOnDisk *) pp->pp_DiskTree)[oldtn->tn_Children[seqcode]];
850  if(!ssn->ssn_FirstChild)
851  {
852    /* create a single leaf node */
853    tn = (struct TreeNode *) calloc(sizeof(struct TreeNode), 1);
854    /* fill in data */
855    tn->tn_PTPanPartition = pp;
856    tn->tn_NumLeaves = 1;
857    tn->tn_Edge = (STRPTR) &pp->pp_StdSfxMapBuffer[ssn->ssn_StartPos];
858    tn->tn_EdgeLen = ssn->ssn_EdgeLen;
859    tn->tn_Leaves[0] = ssn->ssn_StartPos;
860  } else {
861    /* create a single leaf node */
862    tn = (struct TreeNode *) calloc(sizeof(struct TreeNode), 1);
863    tn->tn_PTPanPartition = pp;
864    tn->tn_NumLeaves = 0;
865    tn->tn_Edge = (STRPTR) &pp->pp_StdSfxMapBuffer[ssn->ssn_StartPos];
866    tn->tn_EdgeLen = ssn->ssn_EdgeLen;
867    tn->tn_Leaves[0] = ssn->ssn_StartPos;
868    /* determine children */
869    childoff = ssn->ssn_FirstChild;
870    do
871    {
872      ssn = &((struct StdSfxNodeOnDisk *) pp->pp_DiskTree)[childoff];
873      tn->tn_Children[pp->pp_StdSfxMapBuffer[ssn->ssn_StartPos]] = childoff;
874      if(!ssn->ssn_NextSibling)
875      {
876       break;
877      }
878    }
879    while((childoff = ssn->ssn_NextSibling));
880  }
881  /* fill in downlink information */
882  tn->tn_TreeOffset = oldtn->tn_TreeOffset + tn->tn_EdgeLen;
883  tn->tn_Level = oldtn->tn_Level + 1;
884  tn->tn_ParentSeq = seqcode;
885  tn->tn_ParentPos = oldtn->tn_Pos;
886  tn->tn_Parent = oldtn;
887  return(tn);
888}
889/* \\\ */
890
891/* /// "GoDownNodeChild()" */
892struct TreeNode * GoDownNodeChild(struct TreeNode *oldtn, UWORD seqcode)
893{
894  struct PTPanPartition *pp = oldtn->tn_PTPanPartition;
895  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
896  struct TreeNode *tn;
897
898  if(!oldtn->tn_Children[seqcode])
899  {
900    return(NULL);
901  }
902
903  if(oldtn->tn_Children[seqcode] & LEAFMASK)
904  {
905    /* create a single leaf node */
906    tn = (struct TreeNode *) calloc(sizeof(struct TreeNode) +
907                     sizeof(ULONG) + 1 + 1, 1);
908    /* fill in data */
909    tn->tn_PTPanPartition = pp;
910    tn->tn_NumLeaves = 1;
911    tn->tn_Edge = (STRPTR) &tn->tn_Leaves[1];
912    tn->tn_EdgeLen = 1;
913    tn->tn_Leaves[0] = oldtn->tn_Children[seqcode] & ~LEAFMASK;
914  } else {
915    if(((ULONG) oldtn->tn_Level + 1 < pp->pp_TreePruneDepth) &&
916       ((ULONG) oldtn->tn_TreeOffset + 1 <= pp->pp_TreePruneLength))
917    {
918      /* just go down normally */
919      tn = ReadPackedNode(pp, oldtn->tn_Children[seqcode]);
920    } else {
921      /* leaf reached */
922      tn = ReadPackedLeaf(pp, oldtn->tn_Children[seqcode]);
923    }
924  }
925#if 0 /* debug */
926  if(!tn)
927  {
928    printf("ERROR: Couldn't decrunch %d... going up!\n", seqcode);
929    tn = oldtn;
930    do
931    {
932      printf("[%s] ", tn->tn_Edge);
933      tn = tn->tn_Parent;
934    } while(tn);
935    printf("\n");
936    return(NULL);
937  }
938#endif
939  /* fill in downlink information */
940  tn->tn_TreeOffset = oldtn->tn_TreeOffset + tn->tn_EdgeLen;
941  if(tn->tn_TreeOffset > pp->pp_TreePruneLength)
942  {
943    tn->tn_EdgeLen = pp->pp_TreePruneLength - oldtn->tn_TreeOffset;
944  }
945  *tn->tn_Edge = pg->pg_DecompressTable[seqcode];
946  tn->tn_Level = oldtn->tn_Level + 1;
947  tn->tn_ParentSeq = seqcode;
948  tn->tn_ParentPos = oldtn->tn_Pos;
949  tn->tn_Parent = oldtn;
950  return(tn);
951}
952/* \\\ */
953
954/* /// "GoDownNodeChildNoEdge()" */
955struct TreeNode * GoDownNodeChildNoEdge(struct TreeNode *oldtn, UWORD seqcode)
956{
957  struct PTPanPartition *pp = oldtn->tn_PTPanPartition;
958  //struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
959  struct TreeNode *tn;
960
961  if(!oldtn->tn_Children[seqcode])
962  {
963    return(NULL);
964  }
965
966  if(oldtn->tn_Children[seqcode] & LEAFMASK)
967  {
968    /* create a single leaf node */
969    tn = (struct TreeNode *) calloc(sizeof(struct TreeNode), 1);
970    /* fill in data */
971    tn->tn_PTPanPartition = pp;
972    tn->tn_NumLeaves = 1;
973    tn->tn_EdgeLen = 1;
974    tn->tn_Leaves[0] = oldtn->tn_Children[seqcode] & ~LEAFMASK;
975  } else {
976    if(((ULONG) oldtn->tn_Level + 1 < pp->pp_TreePruneDepth) &&
977       ((ULONG) oldtn->tn_TreeOffset + 1 <= pp->pp_TreePruneLength))
978    {
979      /* just go down normally */
980      tn = ReadPackedNodeNoEdge(pp, oldtn->tn_Children[seqcode]);
981    } else {
982      /* leaf reached */
983      tn = ReadPackedLeaf(pp, oldtn->tn_Children[seqcode]);
984    }
985  }
986  /* fill in downlink information */
987  tn->tn_TreeOffset = oldtn->tn_TreeOffset + tn->tn_EdgeLen;
988  if(tn->tn_TreeOffset > pp->pp_TreePruneLength)
989  {
990    tn->tn_EdgeLen = pp->pp_TreePruneLength - oldtn->tn_TreeOffset;
991  }
992  tn->tn_Level = oldtn->tn_Level + 1;
993  tn->tn_ParentSeq = seqcode;
994  tn->tn_ParentPos = oldtn->tn_Pos;
995  tn->tn_Parent = oldtn;
996  return(tn);
997}
998/* \\\ */
999
1000/* /// "ReadPackedNode()" */
1001struct TreeNode * ReadPackedNode(struct PTPanPartition *pp, ULONG pos)
1002{
1003  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
1004  struct TreeNode *tn;
1005  struct HuffTree *ht;
1006  ULONG bitpos = 0;
1007  UBYTE *treeptr = &pp->pp_DiskTree[pos];
1008  ULONG edgelen;
1009  ULONG edgepos;
1010  ULONG cnt;
1011  ULONG mask;
1012  ULONG pval;
1013  ULONG alphamask;
1014  ULONG newbase;
1015  BOOL shortedge;
1016
1017#if 0 // debugging
1018  bitpos = 8;
1019  if(*treeptr != 0xf2)
1020  {
1021    printf("Pos %08lx: Magic %02x. We're in trouble.\n", pos, *treeptr);
1022    return(NULL);
1023  } else {
1024    //printf("Pos %08lx: GOOD\n", pos);
1025  }
1026#endif
1027  //printf("Pos %ld ", pos);
1028  /* read short edge tree */
1029  ht = FindHuffTreeID(pp->pp_ShortEdgeTree, treeptr, bitpos);
1030  bitpos += ht->ht_CodeLength;
1031  pval = ht->ht_ID;
1032  if(pval) /* short edge */
1033  {
1034    /* id refers to a short edge code */
1035    /* find out length of edge */
1036    //printf("SHORT EDGE ID %ld (bitlen %d)\n", pval, ht->ht_CodeLength);
1037    if(pval > 1)
1038    {
1039      edgelen = SHORTEDGEMAX;
1040      while(!(pval & (1UL << pg->pg_BitsUseTable[edgelen])))
1041      {
1042        edgelen--;
1043      }
1044      pval -= 1UL << pg->pg_BitsUseTable[edgelen];
1045      edgelen++;
1046    } else {
1047      edgelen = 1;
1048    }
1049    shortedge = TRUE;
1050  } else {
1051    /* long edge */
1052    ht = FindHuffTreeID(pp->pp_LongEdgeLenTree, treeptr, bitpos);
1053    bitpos += ht->ht_CodeLength;
1054    edgelen = ht->ht_ID;
1055    //printf("LONG EDGE ID %ld (bitlen %d) ", edgelen, ht->ht_CodeLength);
1056    edgepos = ReadBits(treeptr, bitpos, pp->pp_LongRelPtrBits);
1057    bitpos += pp->pp_LongRelPtrBits;
1058    //printf("RELPTR %ld (bitlen %d)\n", edgepos, pp->pp_LongRelPtrBits);
1059    shortedge = FALSE;
1060  }
1061
1062  tn = (struct TreeNode *) calloc(sizeof(struct TreeNode) +
1063                                  edgelen + 1, 1);
1064  /* fill in data */
1065  tn->tn_PTPanPartition = pp;
1066  tn->tn_Pos = pos;
1067  tn->tn_NumLeaves = 0;
1068  tn->tn_EdgeLen = edgelen;
1069  tn->tn_Edge = (STRPTR) &tn->tn_Leaves;
1070  *tn->tn_Edge = 'X';
1071  if(shortedge) /* decompress edge */
1072  {
1073    //printf("Short %ld ", pval);
1074    cnt = edgelen;
1075    while(--cnt)
1076    {
1077      tn->tn_Edge[cnt] = pg->pg_DecompressTable[pval % pg->pg_AlphaSize];
1078      pval /= pg->pg_AlphaSize;
1079    }
1080  } else {
1081    //printf("Long");
1082    DecompressSequencePartTo(pg, pp->pp_LongDictRaw, edgepos, edgelen - 1,
1083                                   &tn->tn_Edge[1]);
1084  }
1085
1086  /* decode branch array */
1087  ht = FindHuffTreeID(pp->pp_BranchTree, treeptr, bitpos);
1088  bitpos += ht->ht_CodeLength;
1089  alphamask = ht->ht_ID;
1090
1091  //printf("AlphaMask %lx (bitlen %d)\n", alphamask, ht->ht_CodeLength);
1092  for(cnt = 0; cnt < pg->pg_AlphaSize; cnt++)
1093  {
1094    if(alphamask & (1UL << cnt)) /* bit set? */
1095    {
1096      mask = ReadBits(treeptr, bitpos, 4);
1097      //printf("Mask %lx\n", mask);
1098      if(mask >> 3) /* {1} */
1099        {
1100          /* first bit is 1 (no "rel is next" pointer) */
1101          if((mask >> 2) == 0x2) /* {10} */
1102            {
1103              /* this is a 6 bit relative node pointer */
1104              bitpos += 2;
1105              tn->tn_Children[cnt] = ReadBits(treeptr, bitpos, 6);
1106              bitpos += 6;
1107            }
1108          else if((mask >> 1) == 0x7) /* {111} */
1109         {
1110          /* absolute leaf pointer */
1111         bitpos += 3;
1112         tn->tn_Children[cnt] = LEAFMASK | ReadBits(treeptr, bitpos, pg->pg_TotalRawBits);
1113         bitpos += pg->pg_TotalRawBits;
1114         }
1115         else if(mask == 0xc) /* {1100} */
1116         {
1117           /* this is a 11 bit relative node pointer */
1118           bitpos += 4;
1119           tn->tn_Children[cnt] = ReadBits(treeptr, bitpos, 11);
1120           bitpos += 11;
1121         } else { /* {1101} */
1122           /* this is a 27 bit relative node pointer */
1123           bitpos += 4;
1124           tn->tn_Children[cnt] = ReadBits(treeptr, bitpos, 27);
1125           bitpos += 27;
1126         }
1127         } else { /* {0} */
1128           /* rel is next pointer */
1129           bitpos++;
1130           tn->tn_Children[cnt] = 0;
1131         }
1132           //printf("Child %08lx\n", children[cnt]);
1133         }
1134         }
1135         tn->tn_Size = (bitpos + 7) >> 3;
1136
1137  /* add size and pos to relative pointers */
1138  newbase = pos;
1139  for(cnt = 0; cnt < pg->pg_AlphaSize; cnt++)
1140  {
1141    if(alphamask & (1UL << cnt)) /* bit set? */
1142    {
1143      if(!(tn->tn_Children[cnt] >> LEAFBIT))
1144      {
1145        //printf("N[%08lx] ", children[cnt]);
1146        newbase += tn->tn_Children[cnt];
1147        tn->tn_Children[cnt] = newbase;
1148        tn->tn_Children[cnt] += tn->tn_Size;
1149#if 0
1150        /* debug */
1151        if((tn->tn_Children[cnt] > pp->pp_DiskTreeSize) ||
1152           (pp->pp_DiskTree[tn->tn_Children[cnt]] != 0xf2))
1153        {
1154          ULONG bc;
1155          printf("ARGH: [%08lx]: Child Pos %08lx for %ld out of range (%ld)!\n",
1156                 pos, tn->tn_Children[cnt], cnt, pp->pp_DiskTreeSize);
1157          for(bc = 0; bc < 32; bc++)
1158          {
1159            printf("[%02x] ", treeptr[bc]);
1160          }
1161          printf("\n Size=%ld, Edge(%s %ld) = %s\n",
1162          tn->tn_Size, shortedge ? "Short" : "Long", pval, tn->tn_Edge);
1163          return(NULL);
1164         }
1165#endif
1166      }
1167    }
1168  }
1169  return(tn);
1170}
1171/* \\\ */
1172
1173/* /// "ReadPackedNodeNoEdge()" */
1174struct TreeNode * ReadPackedNodeNoEdge(struct PTPanPartition *pp, ULONG pos)
1175{
1176  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
1177  struct TreeNode *tn;
1178  struct HuffTree *ht;
1179  ULONG bitpos = 0;
1180  UBYTE *treeptr = &pp->pp_DiskTree[pos];
1181  ULONG edgelen;
1182  ULONG cnt;
1183  ULONG mask;
1184  ULONG pval;
1185  ULONG alphamask;
1186  ULONG newbase;
1187
1188  /* read short edge tree */
1189  ht = FindHuffTreeID(pp->pp_ShortEdgeTree, treeptr, bitpos);
1190  bitpos += ht->ht_CodeLength;
1191  pval = ht->ht_ID;
1192  /* this is the quick version -- no need to fully decode the edge */
1193  if(pval) /* short edge */
1194  {
1195    /* id refers to a short edge code */
1196    /* find out length of edge */
1197    if(pval > 1)
1198    {
1199      edgelen = SHORTEDGEMAX;
1200      while(!(pval & (1UL << pg->pg_BitsUseTable[edgelen])))
1201      {
1202        edgelen--;
1203      }
1204      edgelen++;
1205    } else {
1206      edgelen = 1;
1207    }
1208  } else {
1209    /* long edge */
1210    ht = FindHuffTreeID(pp->pp_LongEdgeLenTree, treeptr, bitpos);
1211    bitpos += ht->ht_CodeLength;
1212    edgelen = ht->ht_ID;
1213    bitpos += pp->pp_LongRelPtrBits;
1214  }
1215
1216  tn = (struct TreeNode *) calloc(sizeof(struct TreeNode), 1);
1217  /* fill in data */
1218  tn->tn_PTPanPartition = pp;
1219  tn->tn_Pos = pos;
1220  tn->tn_NumLeaves = 0;
1221  tn->tn_EdgeLen = edgelen;
1222
1223  /* decode branch array */
1224  ht = FindHuffTreeID(pp->pp_BranchTree, treeptr, bitpos);
1225  bitpos += ht->ht_CodeLength;
1226  alphamask = ht->ht_ID;
1227
1228  for(cnt = 0; cnt < pg->pg_AlphaSize; cnt++)
1229  {
1230    if(alphamask & (1UL << cnt)) /* bit set? */
1231    {
1232      mask = ReadBits(treeptr, bitpos, 4);
1233      if(mask >> 3) /* {1} */
1234      {
1235       /* first bit is 1 (no "rel is next" pointer) */
1236        if((mask >> 2) == 0x2) /* {10} */
1237        {
1238/* this is a 6 bit relative node pointer */
1239bitpos += 2;
1240tn->tn_Children[cnt] = ReadBits(treeptr, bitpos, 6);
1241bitpos += 6;
1242}
1243else if((mask >> 1) == 0x7) /* {111} */
1244{
1245/* absolute leaf pointer */
1246bitpos += 3;
1247tn->tn_Children[cnt] = LEAFMASK | ReadBits(treeptr, bitpos, pg->pg_TotalRawBits);
1248bitpos += pg->pg_TotalRawBits;
1249}
1250else if(mask == 0xc) /* {1100} */
1251        {
1252        /* this is a 11 bit relative node pointer */
1253        bitpos += 4;
1254        tn->tn_Children[cnt] = ReadBits(treeptr, bitpos, 11);
1255        bitpos += 11;
1256        } else { /* {1101} */
1257        /* this is a 27 bit relative node pointer */
1258        bitpos += 4;
1259        tn->tn_Children[cnt] = ReadBits(treeptr, bitpos, 27);
1260        bitpos += 27;
1261        }
1262      } else { /* {0} */
1263        /* rel is next pointer */
1264        bitpos++;
1265        tn->tn_Children[cnt] = 0;
1266      }
1267    }
1268  }
1269  tn->tn_Size = (bitpos + 7) >> 3;
1270
1271  /* add size and pos to relative pointers */
1272  newbase = pos;
1273  for(cnt = 0; cnt < pg->pg_AlphaSize; cnt++)
1274  {
1275    if(alphamask & (1UL << cnt)) /* bit set? */
1276    {
1277      if(!(tn->tn_Children[cnt] >> LEAFBIT))
1278      {
1279        newbase += tn->tn_Children[cnt];
1280        tn->tn_Children[cnt] = newbase;
1281        tn->tn_Children[cnt] += tn->tn_Size;
1282      }
1283    }
1284  }
1285  return(tn);
1286}
1287/* \\\ */
1288
1289/* /// "ReadPackedLeaf()" */
1290struct TreeNode * ReadPackedLeaf(struct PTPanPartition *pp, ULONG pos)
1291{
1292  //struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
1293  struct TreeNode *tn;
1294  UBYTE *treeptr = &pp->pp_DiskTree[pos];
1295  ULONG leaves = 0;
1296  UBYTE code;
1297  ULONG cnt;
1298  ULONG *lptr;
1299  LONG val;
1300  /* compression:
1301       msb == {1}    -> 1 byte offset (-   63 -     63)
1302       msb == {01}   -> 2 byte offset (- 8192 -   8191)
1303       msb == {001}  -> 3 byte offset (- 2^20 - 2^20-1)
1304       msb == {0001} -> 4 byte offset (- 2^27 - 2^27-1)
1305       msb == {0000} -> 5 byte offset (- 2^35 - 2^35-1)
1306  */
1307
1308  /* get number of leaves first */
1309  while((code = *treeptr) != 0xff)
1310  {
1311    leaves++;
1312    if(code >> 7) /* {1} */
1313    {
1314      /* 1 byte */
1315      treeptr++;
1316    }
1317    else if(code >> 6) /* {01} */
1318    {
1319      /* 2 bytes */
1320      treeptr += 2;
1321    }
1322    else if(code >> 5) /* {001} */
1323    {
1324      /* 3 bytes */
1325      treeptr += 3;
1326    }
1327    else if(code >> 4) /* {0001} */
1328    {
1329      /* 4 bytes */
1330      treeptr += 4;
1331    } else {           /* {0000} */
1332      /* 5 bytes */
1333      treeptr += 5;
1334    }
1335  }
1336  treeptr++;
1337  tn = (struct TreeNode *) calloc(sizeof(struct TreeNode) +
1338                                  sizeof(ULONG) * leaves +
1339                                   1 + 1, 1);
1340  /* fill in data */
1341  tn->tn_PTPanPartition = pp;
1342  tn->tn_Pos = pos;
1343  tn->tn_Size = (ULONG) (treeptr - &pp->pp_DiskTree[pos]);
1344  tn->tn_NumLeaves = leaves;
1345  tn->tn_Edge = (STRPTR) &tn->tn_Leaves[leaves];
1346  *tn->tn_Edge = 'X';
1347  tn->tn_EdgeLen = 1;
1348  treeptr = &pp->pp_DiskTree[pos];
1349  lptr = tn->tn_Leaves;
1350  while((code = *treeptr++) != 0xff)
1351  {
1352    if(code >> 7) /* {1} */
1353    {
1354      /* 1 byte */
1355      *lptr++ = (code & 0x7f) - 63;
1356    }
1357    else if(code >> 6) /* {01} */
1358    {
1359      /* 2 bytes */
1360      val = (code & 0x3f) << 8;
1361      val |= *treeptr++;
1362      *lptr++ = val - 8192;
1363    }
1364    else if(code >> 5) /* {001} */
1365    {
1366      /* 3 bytes */
1367      val = (code & 0x1f) << 8;
1368      val |= *treeptr++;
1369      val <<= 8;
1370      val |= *treeptr++;
1371      *lptr++ = val - (1L << 20);
1372    }
1373    else if(code >> 4) /* {0001} */
1374    {
1375      /* 4 bytes */
1376      val = (code & 0x0f) << 8;
1377      val |= *treeptr++;
1378      val <<= 8;
1379      val |= *treeptr++;
1380      val <<= 8;
1381      val |= *treeptr++;
1382      *lptr++ = val - (1L << 27);
1383    } else {           /* {0000} */
1384      /* 5 bytes */
1385      val = (code & 0x0f) << 8;
1386      val |= *treeptr++;
1387      val <<= 8;
1388      val |= *treeptr++;
1389      val <<= 8;
1390      val |= *treeptr++;
1391      val <<= 8;
1392      val |= *treeptr++;
1393      *lptr++ = val - (1L << 35);
1394    }
1395  }
1396  /* double delta decode */
1397#ifdef DOUBLEDELTAOPTIONAL
1398  if(((LONG) *tn->tn_Leaves) < 0)
1399  {
1400    *tn->tn_Leaves = -(*tn->tn_Leaves);
1401    (*tn->tn_Leaves)++;
1402#else
1403  {
1404#endif
1405
1406#ifdef DOUBLEDELTALEAVES
1407    for(cnt = 2; cnt < leaves; cnt++)
1408    {
1409      tn->tn_Leaves[cnt] += tn->tn_Leaves[cnt-1];
1410    }
1411#endif
1412#ifdef DOUBLEDELTAOPTIONAL
1413  }
1414#else
1415  }
1416#endif
1417  for(cnt = 1; cnt < leaves; cnt++)
1418  {
1419    tn->tn_Leaves[cnt] += tn->tn_Leaves[cnt-1];
1420    arb_assert(tn->tn_Leaves[cnt] < pp->pp_PTPanGlobal->pg_TotalRawSize);
1421  }
1422  return(tn);
1423}
1424/* \\\ */
1425
1426/* /// "WritePackedNode()" */
1427ULONG WritePackedNode(struct PTPanPartition *pp, ULONG pos, UBYTE *buf)
1428{
1429  struct PTPanGlobal *pg = pp->pp_PTPanGlobal;
1430  struct SfxNode *sfxnode;
1431  ULONG childptr;
1432  UWORD childidx;
1433  ULONG val;
1434  ULONG bpos = 0;
1435  ULONG cnt;
1436  ULONG newbase;
1437
1438  //*buf = 0xF2; bpos = 8; /* debugging */
1439  sfxnode = (struct SfxNode *) &pp->pp_SfxNodes[pos];
1440
1441  /* huffman code for short / long edge */
1442  if(sfxnode->sn_StartPos & (1UL << 31))
1443  {
1444    /* long edge */
1445    /*printf("LE[%02d+%02d+%02d] ",
1446           pp->pp_ShortEdgeCode[0].hc_CodeLength,
1447           pp->pp_LongEdgeLenCode[sfxnode->sn_EdgeLen].hc_CodeLength,
1448           pp->pp_LongRelPtrBits);*/
1449    bpos = WriteBits(buf, bpos,
1450                     pp->pp_ShortEdgeCode[0].hc_Codec,
1451                     pp->pp_ShortEdgeCode[0].hc_CodeLength);
1452    bpos = WriteBits(buf, bpos,
1453                     pp->pp_LongEdgeLenCode[sfxnode->sn_EdgeLen].hc_Codec,
1454                     pp->pp_LongEdgeLenCode[sfxnode->sn_EdgeLen].hc_CodeLength);
1455    bpos = WriteBits(buf, bpos,
1456                     sfxnode->sn_StartPos & RELOFFSETMASK,
1457                     pp->pp_LongRelPtrBits);
1458  }
1459  else if(sfxnode->sn_StartPos & (1UL << 30))
1460  {
1461    /* short edge */
1462    /*printf("SE[%04ld= %02d] ", sfxnode->sn_StartPos & RELOFFSETMASK,
1463      pp->pp_ShortEdgeCode[sfxnode->sn_StartPos & RELOFFSETMASK].hc_CodeLength);*/
1464    bpos = WriteBits(buf, bpos,
1465                     pp->pp_ShortEdgeCode[sfxnode->sn_StartPos & RELOFFSETMASK].hc_Codec,
1466                     pp->pp_ShortEdgeCode[sfxnode->sn_StartPos & RELOFFSETMASK].hc_CodeLength);
1467
1468  } else {
1469    if(pos == 0)
1470    {
1471      /* special root handling */
1472      bpos = WriteBits(buf, bpos,
1473                       pp->pp_ShortEdgeCode[1].hc_Codec,
1474                       pp->pp_ShortEdgeCode[1].hc_CodeLength);
1475    } else {
1476      printf("Internal error: Tree corrupt at %ld (%08lx)\n", pos, sfxnode->sn_StartPos);
1477    }
1478  }
1479
1480  /* branch code */
1481  //printf("BC[%02d] ", pp->pp_BranchCode[sfxnode->sn_AlphaMask].hc_CodeLength);
1482  bpos = WriteBits(buf, bpos,
1483                   pp->pp_BranchCode[sfxnode->sn_AlphaMask].hc_Codec,
1484                   pp->pp_BranchCode[sfxnode->sn_AlphaMask].hc_CodeLength);
1485
1486  /* child pointers */
1487  /* msb = {0}   -> rel is next
1488     msb = {10}  ->  6 bit rel node ptr
1489     msb = {1100} -> 11 bit rel node ptr
1490     msb = {1101} -> 27 bit rel node ptr (large enough for a jump over 128 MB)
1491     msb = {111} -> nn bit abs leaf ptr (nn = pg->pg_TotalRawBits)
1492  */
1493  cnt = sfxnode->sn_Parent >> RELOFFSETBITS;
1494  childidx = 0;
1495  newbase = 0;
1496  while(childidx < cnt)
1497  {
1498    childptr = sfxnode->sn_Children[childidx];
1499    if(childptr >> LEAFBIT)
1500    {
1501      /* this is a leaf pointer */
1502      //printf("LF[%08lx]\n", childptr);
1503      bpos = WriteBits(buf, bpos, 0x7, 3);
1504      bpos = WriteBits(buf, bpos,
1505                       childptr & RELOFFSETMASK,
1506                       pg->pg_TotalRawBits);
1507    } else {
1508      /* this is a normal node pointer */
1509      val = childptr & RELOFFSETMASK;
1510      val -= newbase;
1511      newbase += val;
1512      //printf("Dist: %6ld\n", val);
1513      if(val == 0)
1514      {
1515        /* next */
1516        //printf("NP[1] ");
1517        bpos = WriteBits(buf, bpos, 0x0, 1);
1518      } else {
1519        if(val < (1UL << 6))
1520        {
1521          //printf("NP[8] ");
1522          bpos = WriteBits(buf, bpos, 0x2, 2);
1523          bpos = WriteBits(buf, bpos, val, 6);
1524        } else {
1525          if(val < (1UL << 11))
1526          {
1527            //printf("NP[15] ");
1528            bpos = WriteBits(buf, bpos, 0xc, 4);
1529            bpos = WriteBits(buf, bpos, val, 11);
1530          } else {
1531            //printf("NP[31] ");
1532            bpos = WriteBits(buf, bpos, 0xd, 4);
1533            bpos = WriteBits(buf, bpos, val, 27);
1534          }
1535        }
1536      }
1537    }
1538    childidx++;
1539  }
1540  //printf("P[%08lx] %ld\n", pos, bpos);
1541  return((bpos + 7) >> 3);
1542}
1543/* \\\ */
1544
1545/* /// "WritePackedLeaf()" */
1546ULONG WritePackedLeaf(struct PTPanPartition *pp, ULONG pos, UBYTE *buf)
1547{
1548  struct SfxNode *sfxnode;
1549  ULONG leafcnt;
1550  ULONG cnt;
1551  LONG oldval;
1552  LONG val;
1553  ULONG leafsize = 1;
1554  ULONG sdleafsize = 1;
1555  UBYTE *origbuf = buf;
1556
1557  sfxnode = (struct SfxNode *) &pp->pp_SfxNodes[pos];
1558
1559  /* how many leaves do we have? */
1560  leafcnt = GetTreeStatsLeafCountRec(pp, pos);
1561  //printf("%ld leaves\n", leafcnt);
1562  /* enlarge memory, if too small */
1563  if(pp->pp_LeafBufferSize < leafcnt)
1564  {
1565    pp->pp_LeafBuffer = (LONG *) realloc(pp->pp_LeafBuffer, leafcnt * sizeof(LONG));
1566    pp->pp_LeafBufferSize = leafcnt;
1567  }
1568  pp->pp_LeafBufferPtr = pp->pp_LeafBuffer;
1569  /* collect leafs */
1570  GetTreeStatsLeafCollectRec(pp, pos);
1571  if(leafcnt > 1)
1572  {
1573    qsort(pp->pp_LeafBuffer, leafcnt, sizeof(ULONG),
1574          (int (*)(const void *, const void *)) ULONGCompare);
1575  }
1576  /* do delta compression */
1577  oldval = pp->pp_LeafBuffer[0];
1578  arb_assert(pp->pp_LeafBuffer[0] < pp->pp_PTPanGlobal->pg_TotalRawSize);
1579  for(cnt = 1; cnt < leafcnt; cnt++)
1580  {
1581    arb_assert(pp->pp_LeafBuffer[cnt] < pp->pp_PTPanGlobal->pg_TotalRawSize);
1582    pp->pp_LeafBuffer[cnt] -= oldval;
1583    oldval += pp->pp_LeafBuffer[cnt];
1584    //printf("%ld\n", pp->pp_LeafBuffer[cnt]);
1585  }
1586
1587#ifdef DOUBLEDELTALEAVES
1588#ifdef DOUBLEDELTAOPTIONAL
1589  for(cnt = 0; cnt < leafcnt; cnt++)
1590  {
1591    val = pp->pp_LeafBuffer[cnt];
1592    if((val < -63) || (val > 63))
1593    {
1594      if((val < -8192) || (val > 8191))
1595      {
1596        if((val < -(1L << 20)) || (val > ((1L << 20) - 1)))
1597        {
1598          if((val < -(1L << 27)) || (val > ((1L << 27) - 1)))
1599          {
1600            /* five bytes */
1601            sdleafsize += 5;
1602          } else {
1603            /* four bytes */
1604            sdleafsize += 4;
1605          }
1606        } else {
1607          /* three bytes */
1608          sdleafsize += 3;
1609        }
1610      } else {
1611        /* two bytes */
1612        sdleafsize += 2;
1613      }
1614    } else {
1615      /* one byte */
1616      sdleafsize++;
1617    }
1618  }
1619  *pp->pp_LeafBuffer = -(*pp->pp_LeafBuffer);
1620  (*pp->pp_LeafBuffer)--;
1621#endif
1622  /* do delta compression a second time */
1623  if(leafcnt > 2)
1624  {
1625    oldval = pp->pp_LeafBuffer[1];
1626    for(cnt = 2; cnt < leafcnt; cnt++)
1627    {
1628      pp->pp_LeafBuffer[cnt] -= oldval;
1629      oldval += pp->pp_LeafBuffer[cnt];
1630      //printf("%ld\n", pp->pp_LeafBuffer[cnt]);
1631    }
1632  }
1633#endif
1634
1635  /* compression:
1636       msb == {1}    -> 1 byte offset (-   63 -     63)
1637       msb == {01}   -> 2 byte offset (- 8192 -   8191)
1638       msb == {001}  -> 3 byte offset (- 2^20 - 2^20-1)
1639       msb == {0001} -> 4 byte offset (- 2^27 - 2^27-1)
1640       msb == {0000} -> 5 byte offset (- 2^35 - 2^35-1)
1641     special opcodes:
1642       0xff      -> end of array
1643     This means the upper limit is currently 512 MB for raw sequence data
1644     (this can be changed though: Just introduce another opcode)
1645  */
1646
1647  for(cnt = 0; cnt < leafcnt; cnt++)
1648  {
1649    val = pp->pp_LeafBuffer[cnt];
1650    if((val < -63) || (val > 63))
1651    {
1652      if((val < -8192) || (val > 8191))
1653      {
1654        if((val < -(1L << 20)) || (val > ((1L << 20) - 1)))
1655        {
1656          if((val < -(1L << 27)) || (val > ((1L << 27) - 1)))
1657          {
1658            /* five bytes */
1659            if((val < -(1L << 35)) || (val > ((1L << 35) - 1)))
1660            {
1661              printf("ERROR: %ld: %ld, %ld out of range\n", pos, cnt, val);
1662            }
1663            val += 1L << 35;
1664            *buf++ = (val >> 32) & 0x0f;
1665            *buf++ = (val >> 24);
1666            *buf++ = (val >> 16);
1667            *buf++ = (val >> 8);
1668            *buf++ = val;
1669            leafsize += 5;
1670
1671          } else {
1672            /* four bytes */
1673            val += 1L << 27;
1674            *buf++ = ((val >> 24) & 0x0f) | 0x10;
1675            *buf++ = (val >> 16);
1676            *buf++ = (val >> 8);
1677            *buf++ = val;
1678            leafsize += 4;
1679          }
1680        } else {
1681          /* three bytes */
1682          val += 1L << 20;
1683          *buf++ = ((val >> 16) & 0x1f) | 0x20;
1684          *buf++ = (val >> 8);
1685          *buf++ = val;
1686          leafsize += 3;
1687        }
1688      } else {
1689        /* two bytes */
1690        val += 8192;
1691        *buf++ = ((val >> 8) & 0x3f) | 0x40;
1692        *buf++ = val;
1693        leafsize += 2;
1694      }
1695    } else {
1696      /* one byte */
1697      val += 63;
1698      *buf++ = (val & 0x7f) | 0x80;
1699      leafsize++;
1700    }
1701  }
1702#ifdef DOUBLEDELTAOPTIONAL
1703  if(sdleafsize < leafsize)
1704  {
1705    /* undo double delta */
1706    for(cnt = 2; cnt < leafcnt; cnt++)
1707    {
1708      pp->pp_LeafBuffer[cnt] += pp->pp_LeafBuffer[cnt-1];
1709    }
1710    buf = origbuf;
1711    leafsize = sdleafsize;
1712    for(cnt = 0; cnt < leafcnt; cnt++)
1713    {
1714      val = pp->pp_LeafBuffer[cnt];
1715      if((val < -63) || (val > 63))
1716      {
1717        if((val < -8192) || (val > 8191))
1718        {
1719          if((val < -(1L << 20)) || (val > ((1L << 20) - 1)))
1720          {
1721              if((val < -(1L << 27)) || (val > ((1L << 27) - 1)))
1722              {
1723                /* five bytes */
1724                val += 1L << 35;
1725                *buf++ = (val >> 32) & 0x0f;
1726                *buf++ = (val >> 24);
1727                *buf++ = (val >> 16);
1728                *buf++ = (val >> 8);
1729                *buf++ = val;
1730              } else {
1731                /* four bytes */
1732                val += 1L << 27;
1733                *buf++ = ((val >> 24) & 0x0f) | 0x10;
1734                *buf++ = (val >> 16);
1735                *buf++ = (val >> 8);
1736                *buf++ = val;
1737              }
1738          } else {
1739            /* three bytes */
1740            val += 1L << 20;
1741            *buf++ = ((val >> 16) & 0x1f) | 0x20;
1742            *buf++ = (val >> 8);
1743            *buf++ = val;
1744          }
1745        } else {
1746          /* two bytes */
1747          val += 8192;
1748          *buf++ = ((val >> 8) & 0x3f) | 0x40;
1749          *buf++ = val;
1750      }
1751      } else {
1752        /* one byte */
1753        val += 63;
1754        *buf++ = (val & 0x7f) | 0x80;
1755      }
1756    }
1757  }
1758#endif
1759  *buf = 0xff;
1760  return(leafsize);
1761}
1762/* \\\ */
1763
1764/* /// "CloneSearchQuery()" */
1765struct SearchQuery * CloneSearchQuery(struct SearchQuery *oldsq)
1766{
1767  struct SearchQuery *sq;
1768
1769  sq = (struct SearchQuery *) calloc(sizeof(struct SearchQuery), 1);
1770  if(!sq)
1771  {
1772    return(NULL);
1773  }
1774  /* copy the whole information */
1775  *sq = *oldsq;
1776  /* fix not cloneable stuff */
1777  NewList(&sq->sq_Hits);
1778  sq->sq_HitsHash = NULL;
1779  sq->sq_PosWeight = new double[sq->sq_QueryLen + 1];
1780  memcpy(sq->sq_PosWeight, oldsq->sq_PosWeight, (sq->sq_QueryLen + 1) * sizeof(double));
1781  return(sq);
1782}
1783/* \\\ */
1784
1785/* /// "AllocSearchQuery()" */
1786struct SearchQuery * AllocSearchQuery(struct PTPanGlobal *pg)
1787{
1788  struct SearchQuery *sq;
1789
1790  sq = (struct SearchQuery *) calloc(sizeof(struct SearchQuery), 1);
1791  if(!sq)
1792  {
1793    return(NULL);
1794  }
1795
1796  NewList(&sq->sq_Hits);
1797  sq->sq_PTPanGlobal = pg;
1798  sq->sq_MismatchWeights = &pg->pg_NoWeights;
1799  sq->sq_MaxErrors = 0.0;
1800  sq->sq_Reversed = FALSE;
1801  sq->sq_AllowReplace = TRUE;
1802  sq->sq_AllowInsert = FALSE;
1803  sq->sq_AllowDelete = FALSE;
1804  sq->sq_KillNSeqsAt = 0x80000000; /* maximum */
1805  sq->sq_MinorMisThres = 0.5;
1806  sq->sq_SortMode = SORT_HITS_NOWEIGHT;
1807  sq->sq_HitsHashSize = QUERYHITSHASHSIZE;
1808  sq->sq_HitsHash  = NULL;
1809  sq->sq_PosWeight = NULL;
1810  return(sq);
1811}
1812/* \\\ */
1813
1814/* /// "FreeSearchQuery()" */
1815void FreeSearchQuery(struct SearchQuery *sq)
1816{
1817  struct QueryHit *qh;
1818
1819  /* free hits */
1820  qh = (struct QueryHit *) sq->sq_Hits.lh_Head;
1821  while(qh->qh_Node.ln_Succ)
1822  {
1823    RemQueryHit(qh);
1824    qh = (struct QueryHit *) sq->sq_Hits.lh_Head;
1825  }
1826  /* free hash table */
1827  FreeHashArray(sq->sq_HitsHash);
1828 
1829  if (sq->sq_PosWeight) 
1830  {
1831    delete[] sq->sq_PosWeight;
1832    sq->sq_PosWeight = NULL;
1833  }
1834 
1835  /* free structure itself */
1836  free(sq);
1837}
1838/* \\\ */
1839
1840/* /// "SearchTree()" */
1841void SearchTree(struct SearchQuery *sq)
1842{
1843  if (PTPanGlobalPtr->pg_verbose >0)
1844    printf(">> SearchTree: Searching for %s\n", sq->sq_Query);
1845
1846  /* init */
1847  FreeHashArray(sq->sq_HitsHash);
1848  sq->sq_HitsHash = AllocHashArray(sq->sq_HitsHashSize);
1849  sq->sq_QueryLen = strlen(sq->sq_Query);
1850  sq->sq_NumHits = 0;
1851  NewList(&sq->sq_Hits);
1852  sq->sq_State.sqs_TreeNode = ReadPackedNode(sq->sq_PTPanPartition, 0);
1853  sq->sq_State.sqs_QueryPos = 0;
1854  sq->sq_State.sqs_ErrorCount = 0.0;
1855  sq->sq_State.sqs_ReplaceCount = 0;
1856  sq->sq_State.sqs_InsertCount = 0;
1857  sq->sq_State.sqs_DeleteCount = 0;
1858  sq->sq_State.sqs_NCount = 0;
1859  SearchTreeRec(sq);
1860  free(sq->sq_State.sqs_TreeNode);
1861
1862  if (PTPanGlobalPtr->pg_verbose >0)
1863    printf("<< SearchTree\n");
1864}
1865/* \\\ */
1866
1867/* /// "PostFilterQueryHits()" */
1868void PostFilterQueryHits(struct SearchQuery *sq)
1869{
1870  struct PTPanGlobal *pg = sq->sq_PTPanGlobal;
1871  struct PTPanSpecies *ps;
1872  struct QueryHit *qh;
1873  struct QueryHit *nextqh;
1874
1875  if (PTPanGlobalPtr->pg_verbose >1) {
1876    printf(">> PostFilterQueryHits: Hits %ld\n", sq->sq_NumHits);
1877  }
1878
1879  if(!sq->sq_NumHits) /* do we have hits at all? */
1880  {
1881    return;
1882  }
1883  /* do we need to sort the list? */
1884  if(sq->sq_NumHits > 1)
1885  {
1886    /* enter priority and sort */
1887    qh = (struct QueryHit *) sq->sq_Hits.lh_Head;
1888    while(qh->qh_Node.ln_Succ)
1889    {
1890      qh->qh_Node.ln_Pri = qh->qh_AbsPos;
1891      qh = (struct QueryHit *) qh->qh_Node.ln_Succ;
1892    }
1893    SortList(&sq->sq_Hits);
1894  }
1895
1896  /* get species, delete duplicates, delete alignment crossing hits */
1897  qh = (struct QueryHit *) sq->sq_Hits.lh_Head;
1898  ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head;
1899  while((nextqh = (struct QueryHit *) qh->qh_Node.ln_Succ))
1900  {
1901    /* check if current species is still valid */
1902    if((qh->qh_AbsPos < ps->ps_AbsOffset) ||
1903       (qh->qh_AbsPos >= (ps->ps_AbsOffset + (ULLONG) ps->ps_RawDataSize)))
1904    {
1905      /* go to next node by chance */
1906      ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ;
1907      if(ps->ps_Node.ln_Succ)
1908      {
1909        if((qh->qh_AbsPos < ps->ps_AbsOffset) ||
1910          (qh->qh_AbsPos >= (ps->ps_AbsOffset + (ULLONG) ps->ps_RawDataSize)))
1911        {
1912          /* still didn't match, so find it using the hard way */
1913          ps = (struct PTPanSpecies *) FindBinTreeLowerKey(pg->pg_SpeciesBinTree,
1914                                                       qh->qh_AbsPos);
1915        }
1916      } else {
1917        ps = (struct PTPanSpecies *) FindBinTreeLowerKey(pg->pg_SpeciesBinTree,
1918                                                        qh->qh_AbsPos);
1919      }
1920    }
1921    if((qh->qh_AbsPos < ps->ps_AbsOffset) ||
1922       (qh->qh_AbsPos >= (ps->ps_AbsOffset + (ULLONG) ps->ps_RawDataSize)))
1923    {
1924      printf("Mist gebaut (%s(%ld) Pos: %ld, Len %ld HitPos %ld)!\n",
1925            ps->ps_Name, ps->ps_Num, ps->ps_AbsOffset, ps->ps_RawDataSize, qh->qh_AbsPos);
1926    }
1927
1928    if (PTPanGlobalPtr->pg_verbose >1) {
1929      printf(" Hit %s (%ld) Pos: %ld, Len %ld HitPos %ld\n",
1930        ps->ps_Name, ps->ps_Num, ps->ps_AbsOffset, ps->ps_RawDataSize, qh->qh_AbsPos);
1931    }
1932
1933    /* enter species */
1934    qh->qh_Species = ps;
1935    /* filter alignment crossing hits */
1936    if(qh->qh_AbsPos - ps->ps_AbsOffset > ps->ps_RawDataSize - sq->sq_QueryLen)
1937    {
1938      if (PTPanGlobalPtr->pg_verbose >1) {
1939        printf(" Border crossed [%ld/%ld]\n",
1940        qh->qh_AbsPos - ps->ps_AbsOffset, ps->ps_RawDataSize);
1941      }
1942      pg->pg_Bench.ts_CrossBoundKilled++;
1943      RemQueryHit(qh);
1944    }
1945    qh = nextqh;
1946  }
1947}
1948/* \\\ */
1949
1950/* /// "AddQueryHit()" */
1951BOOL AddQueryHit(struct SearchQuery *sq, ULONG hitpos)
1952{
1953  arb_assert(hitpos < sq->sq_PTPanGlobal->pg_TotalRawSize);
1954
1955  struct QueryHit *qh;
1956  struct HashEntry *hash;
1957
1958  if (PTPanGlobalPtr->pg_verbose >1) {
1959    struct TreeNode *tn;
1960    printf(">> AddQueryHit: ");
1961    tn = sq->sq_State.sqs_TreeNode;
1962    do
1963    {
1964      printf("%s", tn->tn_Edge);
1965      tn = tn->tn_Parent;
1966      if (tn) printf("-");
1967    } while(tn);
1968    printf(" (%f/%d/%d/%d) QryPos %ld/%ld\n",
1969           sq->sq_State.sqs_ErrorCount,
1970           sq->sq_State.sqs_ReplaceCount,
1971           sq->sq_State.sqs_InsertCount,
1972           sq->sq_State.sqs_DeleteCount,
1973           sq->sq_State.sqs_QueryPos,
1974           sq->sq_QueryLen);
1975  }
1976
1977  /* try eliminating duplicates even at this stage */
1978  if((hash = GetHashEntry(sq->sq_HitsHash, hitpos)))
1979  {
1980    qh = (struct QueryHit *) hash->he_Data;
1981    if((qh->qh_AbsPos == hitpos + sq->sq_PTPanPartition->pp_RawOffset))
1982    {
1983      /* check, if the new hit was better */
1984      if(qh->qh_ErrorCount > sq->sq_State.sqs_ErrorCount)
1985      {
1986        qh->qh_ErrorCount = sq->sq_State.sqs_ErrorCount;
1987        qh->qh_ReplaceCount = sq->sq_State.sqs_ReplaceCount;
1988        qh->qh_InsertCount = sq->sq_State.sqs_InsertCount;
1989        qh->qh_DeleteCount = sq->sq_State.sqs_DeleteCount;
1990        if(sq->sq_Reversed) /* copy reversed flag */
1991        {
1992        qh->qh_Flags |= QHF_REVERSED;
1993        } else {
1994        qh->qh_Flags &= ~QHF_REVERSED;
1995        }
1996      }
1997      /* if the hit was safe now, we can clear the unsafe bit */
1998      if((sq->sq_State.sqs_QueryPos >= sq->sq_QueryLen))
1999      {
2000       qh->qh_Flags &= ~QHF_UNSAFE; /* clear unsafe bit */
2001      }
2002      return(TRUE);
2003    }
2004  }
2005#if 0 // old stuff
2006  qh = (struct QueryHit *) sq->sq_Hits.lh_TailPred;
2007  if(qh->qh_Node.ln_Pred && (qh->qh_AbsPos == hitpos + sq->sq_PTPanPartition->pp_RawOffset))
2008  {
2009    //printf("Duplicate!\n");
2010    /* check, if the new hit was better */
2011    if(qh->qh_ErrorCount > sq->sq_State.sqs_ErrorCount)
2012    {
2013      qh->qh_ErrorCount = sq->sq_State.sqs_ErrorCount;
2014      qh->qh_ReplaceCount = sq->sq_State.sqs_ReplaceCount;
2015      qh->qh_InsertCount = sq->sq_State.sqs_InsertCount;
2016      qh->qh_DeleteCount = sq->sq_State.sqs_DeleteCount;
2017      if(sq->sq_Reversed) /* copy reversed flag */
2018      {
2019        qh->qh_Flags |= QHF_REVERSED;
2020      } else {
2021        qh->qh_Flags &= ~QHF_REVERSED;
2022      }
2023    }
2024    /* if the hit was safe now, we can clear the unsafe bit */
2025    if((sq->sq_State.sqs_QueryPos >= sq->sq_QueryLen))
2026    {
2027      qh->qh_Flags &= ~QHF_UNSAFE; /* clear unsafe bit */
2028    }
2029    return(TRUE);
2030  }
2031#endif
2032  qh = (struct QueryHit *) malloc(sizeof(struct QueryHit));
2033  if(!qh)
2034  {
2035    return(FALSE); /* out of memory */
2036  }
2037  sq->sq_NumHits++;
2038
2039  /* add hit to array */
2040  qh->qh_AbsPos = hitpos;
2041  qh->qh_AbsPos += sq->sq_PTPanPartition->pp_RawOffset; /* allow more than 2 GB */
2042  qh->qh_ErrorCount = sq->sq_State.sqs_ErrorCount;
2043  qh->qh_ReplaceCount = sq->sq_State.sqs_ReplaceCount;
2044  qh->qh_InsertCount = sq->sq_State.sqs_InsertCount;
2045  qh->qh_DeleteCount = sq->sq_State.sqs_DeleteCount;
2046  qh->qh_Species = NULL;
2047  qh->qh_Flags = QHF_ISVALID;
2048  if(sq->sq_Reversed) /* set reversed flag */
2049  {
2050    qh->qh_Flags |= QHF_REVERSED;
2051  }
2052  if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2053  {
2054    qh->qh_Flags |= QHF_UNSAFE;
2055  }
2056  AddTail(&sq->sq_Hits, &qh->qh_Node);
2057  InsertHashEntry(sq->sq_HitsHash, hitpos, (ULONG) qh);
2058
2059  if (PTPanGlobalPtr->pg_verbose >1) {
2060    struct TreeNode *tn;
2061    printf("<< AddQueryHit: %ld [%08lx] <%s>: ",
2062        qh->qh_AbsPos, qh->qh_AbsPos, sq->sq_PTPanPartition->pp_PrefixSeq);
2063    tn = sq->sq_State.sqs_TreeNode;
2064    do
2065    {
2066      printf("%s", tn->tn_Edge);
2067      tn = tn->tn_Parent;
2068      if (tn) printf("-");
2069    } while(tn);
2070
2071    printf(" (%f/%d/%d/%d)\n",
2072        qh->qh_ErrorCount,
2073        qh->qh_ReplaceCount,
2074        qh->qh_InsertCount,
2075        qh->qh_DeleteCount);
2076  }
2077
2078  return(TRUE);
2079}
2080/* \\\ */
2081
2082/* /// "RemQueryHit()" */
2083void RemQueryHit(struct QueryHit *qh)
2084{
2085  /* unlink and free node */
2086  Remove(&qh->qh_Node);
2087  free(qh);
2088}
2089/* \\\ */
2090
2091/* /// "MergeQueryHits()" */
2092void MergeQueryHits(struct SearchQuery *tarsq, struct SearchQuery *srcsq)
2093{
2094  struct QueryHit *srcqh = (struct QueryHit *) srcsq->sq_Hits.lh_Head;
2095  struct QueryHit *tarqh = (struct QueryHit *) tarsq->sq_Hits.lh_Head;
2096  struct QueryHit *lasttarqh = NULL; //(struct QueryHit *) tarqh->qh_Node.ln_Pred;
2097
2098  /* iterate over source list */
2099  while(srcqh->qh_Node.ln_Succ)
2100  {
2101    while(tarqh->qh_Node.ln_Succ) /* are we at the end of the list? */
2102    {
2103      if(tarqh->qh_AbsPos > srcqh->qh_AbsPos) /* find the appropriate position */
2104      {
2105        break;
2106      }
2107      lasttarqh = tarqh; /* this one is before srcqh */
2108      tarqh = (struct QueryHit *) tarqh->qh_Node.ln_Succ;
2109    }
2110    //printf("Src: %ld\n", srcqh->qh_AbsPos);
2111    Remove(&srcqh->qh_Node);
2112    tarsq->sq_NumHits++;
2113    if(lasttarqh) /* is there a previous node to compare? */
2114    {
2115      /* check for duplicate */
2116      if(lasttarqh->qh_AbsPos == srcqh->qh_AbsPos)
2117      {
2118        //printf("Duplicate %ld!\n", lasttarqh->qh_AbsPos);
2119        tarsq->sq_PTPanGlobal->pg_Bench.ts_DupsKilled++;
2120        /* okay, this is a double -- which one do we keep? */
2121        if(lasttarqh->qh_ErrorCount < srcqh->qh_ErrorCount)
2122        {
2123        /* the old target was a better hit, so eliminate the old one */
2124          srcqh->qh_ErrorCount = lasttarqh->qh_ErrorCount;
2125          srcqh->qh_ReplaceCount = lasttarqh->qh_ReplaceCount;
2126          srcqh->qh_InsertCount = lasttarqh->qh_InsertCount;
2127          srcqh->qh_DeleteCount = lasttarqh->qh_DeleteCount;
2128        }
2129        /* remove the query hit */
2130        RemQueryHit(lasttarqh);
2131        tarsq->sq_NumHits--;
2132        lasttarqh = (struct QueryHit *) tarqh->qh_Node.ln_Pred;
2133        if(!lasttarqh->qh_Node.ln_Pred)
2134        {
2135        /* we killed the very first entry */
2136        //printf("Killed First!\n");
2137        AddHead(&tarsq->sq_Hits, &srcqh->qh_Node);
2138        } else {
2139        //printf("Insert\n");
2140        /* attach between lasttarqh and tarqh */
2141        lasttarqh->qh_Node.ln_Succ = tarqh->qh_Node.ln_Pred = &srcqh->qh_Node;
2142        srcqh->qh_Node.ln_Pred = &lasttarqh->qh_Node;
2143        srcqh->qh_Node.ln_Succ = &tarqh->qh_Node;
2144        }
2145      } else {
2146        /* normal case: insert it between lasttarqh and tarqh */
2147        lasttarqh->qh_Node.ln_Succ = tarqh->qh_Node.ln_Pred = &srcqh->qh_Node;
2148        srcqh->qh_Node.ln_Pred = &lasttarqh->qh_Node;
2149        srcqh->qh_Node.ln_Succ = &tarqh->qh_Node;
2150      }
2151    } else {
2152      /* we were at the start of the list */
2153      //printf("AddHead %ld\n", srcqh->qh_AbsPos);
2154      AddHead(&tarsq->sq_Hits, &srcqh->qh_Node);
2155    }
2156    tarqh = lasttarqh = srcqh;
2157    srcqh = (struct QueryHit *) srcsq->sq_Hits.lh_Head;
2158  }
2159  tarsq->sq_PTPanGlobal->pg_Bench.ts_Hits += tarsq->sq_NumHits;
2160  //printf("%ld Hits\n", tarsq->sq_NumHits);
2161}
2162/* \\\ */
2163
2164void PrintSearchQueryState(const char* s1, const char* s2, struct SearchQuery *sq)
2165{
2166  static char seq[100];
2167  int i = 0;
2168  struct TreeNode *tn;
2169
2170  tn = sq->sq_State.sqs_TreeNode;
2171  do {
2172    i += sprintf(seq+i, "%s", tn->tn_Edge);
2173    tn = tn->tn_Parent;
2174    if (tn) i += sprintf(seq+i,"-");
2175  } while(tn);
2176  seq[i] = 0;
2177
2178#if 0
2179  if (strcmp(seq, "G-A-A-G-U-A-A-A-G-A-C-X")==0) {
2180    printf("STOP!!!\n");
2181  }
2182#endif
2183
2184  if (PTPanGlobalPtr->pg_verbose >2) {
2185    printf("%s%s Src/Qry %ld/%ld: ", s1, s2,
2186           sq->sq_State.sqs_SourcePos, sq->sq_State.sqs_QueryPos);
2187    printf("%s (%f/%d/%d/%d)\n", seq,
2188           sq->sq_State.sqs_ErrorCount,
2189           sq->sq_State.sqs_ReplaceCount,
2190           sq->sq_State.sqs_InsertCount,
2191           sq->sq_State.sqs_DeleteCount);
2192  }
2193
2194}
2195
2196
2197/* /// "SearchTreeRec()" */
2198void SearchTreeRec(struct SearchQuery *sq)
2199{
2200  struct PTPanGlobal *pg = sq->sq_PTPanGlobal;
2201  struct TreeNode *oldtn = sq->sq_State.sqs_TreeNode;
2202  struct TreeNode *tn;
2203  UWORD seqcode;
2204  UWORD seqcode2;
2205  UWORD seqcodetree;
2206  BOOL ignore;
2207  BOOL seqcompare;
2208  struct SearchQueryState oldstate;
2209  ULONG cnt;
2210  ULONG *leafptr;
2211  ULONG leaf;
2212  BOOL leafadded;
2213
2214  const char* indent;
2215  if (PTPanGlobalPtr->pg_verbose >1) {
2216    const char* spaces = "                                            ";
2217    indent = spaces + strlen(spaces) - sq->sq_State.sqs_QueryPos;
2218  }
2219
2220  seqcompare = (sq->sq_State.sqs_QueryPos < sq->sq_QueryLen);
2221  /* collect leaves on our way down */
2222  if((cnt = oldtn->tn_NumLeaves))
2223  {
2224    /* collect hits */
2225    leafptr = oldtn->tn_Leaves;
2226    do
2227    {
2228      AddQueryHit(sq, *leafptr++);
2229    } while(--cnt);
2230    /* if we got leaves, we're at the end of the tree */
2231    return;
2232  }
2233
2234  if (PTPanGlobalPtr->pg_verbose >1)
2235    PrintSearchQueryState(indent, "=> SearchTreeRec: ", sq);
2236
2237#if 1
2238  /* Maximum tree level reached?
2239   * This is possible even without <oldtn> being a LEAF node,
2240   * when we reach the node via a long edge. (JW, 13.3.05)
2241   */
2242  if(((ULONG) oldtn->tn_Level >= oldtn->tn_PTPanPartition->pp_TreePruneDepth) ||
2243     ((ULONG) oldtn->tn_TreeOffset >= oldtn->tn_PTPanPartition->pp_TreePruneLength))
2244  {
2245    AddQueryHit(sq, oldtn->tn_Children[0] & ~LEAFMASK);
2246    return;
2247  }
2248#endif
2249
2250
2251  oldstate = sq->sq_State;
2252  /* recurse on children */
2253  for(seqcode = 0; seqcode < pg->pg_AlphaSize; seqcode++)
2254  {
2255    /* check, if we have a child */
2256    if((leaf = oldtn->tn_Children[seqcode]))
2257    {
2258      tn = NULL;
2259      leafadded = FALSE;
2260
2261      if (PTPanGlobalPtr->pg_verbose >2) {
2262        printf("%s>> SearchChild [%d] ", indent, seqcode);
2263        if(leaf & LEAFMASK)
2264            printf("LEAF %ld\n", leaf & ~LEAFMASK);
2265        else
2266            printf("%ld\n", leaf);
2267      }
2268
2269      /* *** phase one: check 1:1 replacement; this considers the hamming distance */
2270      ignore = FALSE;
2271      if(seqcompare)
2272      {
2273    /* check, if first seqcode of edge matches (N (seqcode 0) matches always) */
2274    seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2275    if(seqcode2 != seqcode)
2276    {
2277        if(sq->sq_AllowReplace)
2278        {
2279        /* increase error level by replace operation */
2280        sq->sq_State.sqs_ReplaceCount++;
2281        sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Replace[(seqcode2 * ALPHASIZE) + seqcode] 
2282                                       * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2283        if(!seqcode)
2284        {
2285            if(++sq->sq_State.sqs_NCount > sq->sq_KillNSeqsAt)
2286            {
2287            ignore = TRUE;
2288            }
2289        }
2290            } else {
2291                ignore = TRUE;
2292            }
2293    }
2294/* check, if more errors are tolerable. */
2295if(sq->sq_State.sqs_ErrorCount > sq->sq_MaxErrors)
2296{
2297/* too many errors, do not recurse */
2298ignore = TRUE;
2299}
2300    sq->sq_State.sqs_QueryPos++;
2301      }
2302
2303      /* should we take a deeper look? */
2304      if(!ignore)
2305      {
2306    /* did we have a leaf? */
2307    if(leaf & LEAFMASK)
2308    {
2309    AddQueryHit(sq, leaf & ~LEAFMASK);
2310    leafadded = TRUE;
2311    } else {
2312    /* get the child node */
2313    tn = GoDownNodeChild(oldtn, seqcode);
2314
2315    if (PTPanGlobalPtr->pg_verbose >2) {
2316        // debug
2317        if(!tn)
2318        {
2319         printf("%s  Couldn't go down on %d!\n", indent, seqcode);
2320         break;
2321        }
2322
2323        if(sq->sq_State.sqs_QueryPos == 1)
2324        {
2325        printf("%s  Down: %s\n", indent, tn->tn_Edge);
2326        }
2327    }
2328
2329    /* do we reach the end of the query with the first seqcode in the edge? */
2330    if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2331    {
2332        /* oooh, we have a longer edge, so check its contents before we follow it to the end */
2333        if(tn->tn_EdgeLen > 1)
2334        {
2335        STRPTR tarptr = &tn->tn_Edge[1];
2336        UBYTE tcode;
2337        while((tcode = *tarptr++))
2338            {
2339        /* check, if codes are the same (or N is found) */
2340        seqcodetree = pg->pg_CompressTable[tcode];
2341        seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2342        if(seqcode2 != seqcodetree)
2343        {
2344            sq->sq_State.sqs_ReplaceCount++;
2345            sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Replace[seqcode2 * ALPHASIZE + seqcodetree]
2346                                           * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2347            /* check, if more errors are tolerable. */
2348            if(sq->sq_State.sqs_ErrorCount > sq->sq_MaxErrors)
2349            {
2350                /* too many errors, do not recurse */
2351                ignore = TRUE;
2352                break;
2353            }
2354            if(!seqcodetree)
2355                {
2356                if(++sq->sq_State.sqs_NCount > sq->sq_KillNSeqsAt)
2357                    {
2358                    ignore = TRUE;
2359                    break;
2360                    }
2361                }
2362        }
2363        /* check if the end of the query string was reached */
2364        if(++sq->sq_State.sqs_QueryPos >= sq->sq_QueryLen)
2365        {
2366            break;
2367        }
2368        }
2369        }
2370    }
2371    /* are we allowed to go any further down the tree? */
2372        if(!ignore)
2373        {
2374            /* recurse */
2375            sq->sq_State.sqs_TreeNode = tn;
2376            if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2377            {
2378            SearchTreeRec(sq);
2379            } else {
2380            CollectTreeRec(sq);
2381            }
2382        } else {
2383            if (PTPanGlobalPtr->pg_verbose >2)
2384            printf("%s(%f >= %f)\n", indent, sq->sq_State.sqs_ErrorCount, sq->sq_MaxErrors);
2385        }
2386        }
2387      }
2388
2389
2390      /* *** phase two: check for adding a character in the query string  */
2391      /* this will be only done, if the inserting operation is allowed and
2392         we are not at the very beginning nor the end of the query (because
2393    then it will found without insert operation anyway) */
2394      if(oldstate.sqs_QueryPos &&
2395    (oldstate.sqs_QueryPos + 1 < sq->sq_QueryLen) &&
2396    sq->sq_AllowInsert)
2397    {
2398    ignore = FALSE;
2399    sq->sq_State = oldstate;
2400    /* increase error level by insert operation */
2401    sq->sq_State.sqs_InsertCount++;
2402    sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Insert[seqcode];
2403    if(!seqcode)
2404    {
2405    if(++sq->sq_State.sqs_NCount > sq->sq_KillNSeqsAt)
2406    {
2407        sq->sq_State.sqs_ErrorCount = sq->sq_MaxErrors + 1.0;
2408    }
2409    }
2410    if(sq->sq_State.sqs_ErrorCount <= sq->sq_MaxErrors)
2411    {
2412    /* did we have a leaf? */
2413    if(leaf & LEAFMASK)
2414    {
2415        if(!leafadded) /* don't add the same leaf twice! */
2416        {
2417        AddQueryHit(sq, leaf & ~LEAFMASK);
2418        leafadded = TRUE;
2419        }
2420    } else {
2421        /* get the child node */
2422        if(!tn)
2423        {
2424        tn = GoDownNodeChild(oldtn, seqcode);
2425
2426        if (PTPanGlobalPtr->pg_verbose >2) {
2427        // debug
2428        if(!tn)
2429        {
2430            printf("%s  Couldn't go down on %d!\n", indent, seqcode);
2431            break;
2432        }
2433        }
2434        }
2435        /* do we reach the end of the query with the first seqcode in the edge? */
2436        if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2437        {
2438        /* oooh, we have a longer edge, so check its contents before we follow it to the end */
2439        if(tn->tn_EdgeLen > 1)
2440        {
2441    STRPTR tarptr = &tn->tn_Edge[1];
2442    UBYTE tcode;
2443    while((tcode = *tarptr++))
2444    {
2445    /* check, if codes are the same (or N is found) */
2446    seqcodetree = pg->pg_CompressTable[tcode];
2447    seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2448    if(seqcode2 != seqcodetree)
2449    {
2450        sq->sq_State.sqs_ReplaceCount++;
2451        sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Replace[seqcode2 * ALPHASIZE + seqcodetree]
2452                                       * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2453        /* check, if more errors are tolerable. */
2454        if(sq->sq_State.sqs_ErrorCount > sq->sq_MaxErrors)
2455        {
2456        /* too many errors, do not recurse */
2457        ignore = TRUE;
2458        break;
2459        }
2460        if(!seqcodetree)
2461        {
2462        if(++sq->sq_State.sqs_NCount > sq->sq_KillNSeqsAt)
2463        {
2464        ignore = TRUE;
2465        break;
2466        }
2467        }
2468    }
2469    /* check if the end of the query string was reached */
2470    if(++sq->sq_State.sqs_QueryPos >= sq->sq_QueryLen)
2471    {
2472        break;
2473    }
2474    }
2475        }
2476        }
2477        /* are we allowed to go any further down the tree? */
2478        if(!ignore)
2479        {
2480        /* recurse */
2481        sq->sq_State.sqs_TreeNode = tn;
2482        if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2483        {
2484        SearchTreeRec(sq);
2485        } else {
2486        CollectTreeRec(sq);
2487        }
2488        }
2489    }
2490    }
2491      }
2492
2493      /* *** phase three: check for deleting a character in the query string  */
2494      /* this will be only done, if we're not at the end of the string and
2495    the delete operation is allowed */
2496      if(oldstate.sqs_QueryPos &&
2497    (oldstate.sqs_QueryPos + 1 < sq->sq_QueryLen) &&
2498    sq->sq_AllowDelete)
2499      {
2500    ignore = FALSE;
2501    sq->sq_State = oldstate;
2502    /* increase error level by delete operation */
2503    sq->sq_State.sqs_DeleteCount++;
2504    sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Delete[pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]]];
2505    sq->sq_State.sqs_QueryPos++;
2506    if(sq->sq_State.sqs_ErrorCount <= sq->sq_MaxErrors)
2507    {
2508    /* check, if first seqcode of edge matches (N (seqcode 0) matches always) */
2509    seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2510    if(seqcode2 != seqcode)
2511    {
2512        sq->sq_State.sqs_ReplaceCount++;
2513        sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Replace[(seqcode2 * ALPHASIZE) + seqcode]
2514                                       * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2515        /* check, if more errors are tolerable. */
2516        if(sq->sq_State.sqs_ErrorCount > sq->sq_MaxErrors)
2517        {
2518        /* too many errors, do not recurse */
2519        ignore = TRUE;
2520        }
2521    }
2522    sq->sq_State.sqs_QueryPos++;
2523    } else {
2524    ignore = TRUE;
2525    }
2526    /* should we take a deeper look? */
2527    if(!ignore)
2528    {
2529    /* did we have a leaf? */
2530    if(leaf & LEAFMASK)
2531    {
2532        if(!leafadded) /* don't add the same leaf twice! */
2533        {
2534        AddQueryHit(sq, leaf & ~LEAFMASK);
2535        }
2536    } else {
2537        /* get the child node */
2538        if(!tn)
2539        {
2540        tn = GoDownNodeChild(oldtn, seqcode);
2541#if 0 // debug
2542        if(!tn)
2543        {
2544    printf("%s  Couldn't go down on %d!\n", indent, seqcode);
2545    break;
2546      }
2547#endif
2548    }
2549    /* do we reach the end of the query with the first seqcode in the edge? */
2550    if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2551    {
2552      /* oooh, we have a longer edge, so check its contents before we follow it to the end */
2553      if(tn->tn_EdgeLen > 1)
2554      {
2555        STRPTR tarptr = &tn->tn_Edge[1];
2556        UBYTE tcode;
2557        while((tcode = *tarptr++))
2558        {
2559        /* check, if codes are the same (or N is found) */
2560        seqcodetree = pg->pg_CompressTable[tcode];
2561        seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2562        if(seqcode2 != seqcodetree)
2563        {
2564            sq->sq_State.sqs_ReplaceCount++;
2565            sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Replace[seqcode2 * ALPHASIZE + seqcodetree]
2566                                           * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2567            /* check, if more errors are tolerable. */
2568            if(sq->sq_State.sqs_ErrorCount > sq->sq_MaxErrors)
2569            {
2570            /* too many errors, do not recurse */
2571            ignore = TRUE;
2572            break;
2573            }
2574            if(!seqcodetree)
2575            {
2576            if(++sq->sq_State.sqs_NCount > sq->sq_KillNSeqsAt)
2577            {
2578            ignore = TRUE;
2579            break;
2580            }
2581            }
2582        }
2583        /* check if the end of the query string was reached */
2584        if(++sq->sq_State.sqs_QueryPos >= sq->sq_QueryLen)
2585        {
2586            break;
2587        }
2588        }
2589        }
2590        }
2591        /* are we allowed to go any further down the tree? */
2592        if(!ignore)
2593        {
2594        /* recurse */
2595        sq->sq_State.sqs_TreeNode = tn;
2596        if(sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2597        {
2598        SearchTreeRec(sq);
2599        } else {
2600        CollectTreeRec(sq);
2601        }
2602        }
2603    }
2604    }
2605      }
2606      if(tn)
2607      {
2608    /* clean up */
2609    free(tn);
2610      }
2611    }
2612    /* restore possible altered data */
2613    sq->sq_State = oldstate;
2614  }
2615}
2616/* \\\ */
2617
2618/* /// "CollectTreeRec()" */
2619void CollectTreeRec(struct SearchQuery *sq)
2620{
2621  struct PTPanGlobal *pg = sq->sq_PTPanGlobal;
2622  struct TreeNode *oldtn = sq->sq_State.sqs_TreeNode;
2623  struct TreeNode *tn;
2624  UWORD seqcode;
2625  ULONG cnt;
2626  ULONG *leafptr;
2627  ULONG leaf;
2628
2629  const char* indent;
2630  if (PTPanGlobalPtr->pg_verbose >2) {
2631    const char* spaces = "                                            ";
2632    indent = spaces + strlen(spaces) - sq->sq_State.sqs_QueryPos;
2633  }
2634
2635  /* collect leaves on our way down */
2636  if((cnt = oldtn->tn_NumLeaves))
2637  {
2638    /* collect hits */
2639    leafptr = oldtn->tn_Leaves;
2640    do
2641    {
2642      AddQueryHit(sq, *leafptr++);
2643    } while(--cnt);
2644    /* if we got leaves, we're at the end of the tree */
2645    return;
2646  }
2647
2648  if (PTPanGlobalPtr->pg_verbose >2) {
2649    struct TreeNode *tn;
2650    printf("%s=> CollectTreeRec: Src/Qry %ld/%ld: ",
2651    indent,
2652    sq->sq_State.sqs_SourcePos, sq->sq_State.sqs_QueryPos);
2653    tn = sq->sq_State.sqs_TreeNode;
2654    do
2655    {
2656      printf("%s", tn->tn_Edge);
2657      tn = tn->tn_Parent;
2658      if (tn) printf("-");
2659    } while(tn);
2660    printf(" (%f/%d/%d/%d)\n",
2661    sq->sq_State.sqs_ErrorCount,
2662    sq->sq_State.sqs_ReplaceCount,
2663    sq->sq_State.sqs_InsertCount,
2664    sq->sq_State.sqs_DeleteCount);
2665  }
2666
2667  /* recurse on children */
2668  for(seqcode = 0; seqcode < pg->pg_AlphaSize; seqcode++)
2669  {
2670    /* check, if we have a child */
2671    if((leaf = oldtn->tn_Children[seqcode]))
2672    {
2673      if (PTPanGlobalPtr->pg_verbose >2) {
2674    printf("%s>> CollectChild [%d] ", indent, seqcode);
2675    if(leaf & LEAFMASK)
2676    printf("LEAF %ld\n", leaf & ~LEAFMASK);
2677    else
2678    printf("%ld\n", leaf);
2679      }
2680
2681      /* did we have a leaf? */
2682      if(leaf & LEAFMASK)
2683      {
2684    AddQueryHit(sq, leaf & ~LEAFMASK);
2685      } else {
2686    /* get the child node */
2687    //tn = GoDownNodeChild(oldtn, seqcode);
2688    tn = GoDownNodeChildNoEdge(oldtn, seqcode);
2689    /* recurse */
2690    sq->sq_State.sqs_TreeNode = tn;
2691    CollectTreeRec(sq);
2692    /* clean up */
2693    free(tn);
2694      }
2695    }
2696  }
2697  sq->sq_State.sqs_TreeNode = oldtn;
2698}
2699/* \\\ */
2700
2701/* /// "MatchSequence()" */
2702BOOL MatchSequence(struct SearchQuery *sq)
2703{
2704  BOOL res;
2705
2706  if (PTPanGlobalPtr->pg_verbose >0)
2707    printf(">> MatchSequence: Matching %s in %s\n", sq->sq_Query, sq->sq_SourceSeq);
2708
2709  /* init */
2710  sq->sq_QueryLen = strlen(sq->sq_Query);
2711  sq->sq_State.sqs_SourcePos = 0;
2712  sq->sq_State.sqs_QueryPos = 0;
2713  sq->sq_State.sqs_ErrorCount = 0.0;
2714  sq->sq_State.sqs_ReplaceCount = 0;
2715  sq->sq_State.sqs_InsertCount = 0;
2716  sq->sq_State.sqs_DeleteCount = 0;
2717  res = MatchSequenceRec(sq);
2718
2719  if (PTPanGlobalPtr->pg_verbose >0) {
2720
2721    if (res) {
2722      printf("<< MatchSequence: yes (%f/%d/%d/%d)\n",
2723        sq->sq_State.sqs_ErrorCount,
2724        sq->sq_State.sqs_ReplaceCount,
2725        sq->sq_State.sqs_InsertCount,
2726        sq->sq_State.sqs_DeleteCount);
2727    } else {
2728      printf("<< MatchSequence:no\n");
2729    }
2730  }
2731
2732  return(res);
2733}
2734/* \\\ */
2735
2736/* /// "MatchSequenceRec()" */
2737BOOL MatchSequenceRec(struct SearchQuery *sq)
2738{
2739  struct PTPanGlobal *pg = sq->sq_PTPanGlobal;
2740  UWORD seqcode;
2741  UWORD seqcode2;
2742  BOOL ignore;
2743  struct SearchQueryState oldstate;
2744
2745  if(!(sq->sq_SourceSeq[sq->sq_State.sqs_SourcePos]))
2746  {
2747    return(TRUE); /* end of source sequence reached */
2748  }
2749
2750  oldstate = sq->sq_State;
2751  seqcode  = pg->pg_CompressTable[sq->sq_SourceSeq[sq->sq_State.sqs_SourcePos]];
2752  seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2753
2754  /* *** phase one: check 1:1 replacement; this considers the hamming distance */
2755  ignore = FALSE;
2756  /* check, if first seqcode of edge matches (N (seqcode 0) matches always) */
2757  if(seqcode2 != seqcode)
2758  {
2759    if(sq->sq_AllowReplace)
2760    {
2761      sq->sq_State.sqs_ReplaceCount++;
2762      sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Replace[(seqcode2 * ALPHASIZE) + seqcode]
2763                                     * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2764    } else {
2765      ignore = TRUE;
2766    }
2767  }
2768  /* check, if more errors are tolerable. */
2769  if(sq->sq_State.sqs_ErrorCount > sq->sq_MaxErrors)
2770  {
2771    /* too many errors, do not recurse */
2772    ignore = TRUE;
2773  }
2774  /* should we take a deeper look? */
2775  if(!ignore)
2776  {
2777    /* do we reach the end of the query? */
2778    if(++sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2779    {
2780      /* recurse (actually, iterate) */
2781      ++sq->sq_State.sqs_SourcePos;
2782      if(MatchSequenceRec(sq))
2783      {
2784    return(TRUE);
2785      }
2786    } else {
2787      return(TRUE);
2788    }
2789  }
2790
2791  /* *** phase two: check for adding a character in the query string  */
2792  /* this will be only done, if the inserting operation is allowed and
2793     we are not at the very beginning of the query (because then it will
2794     found without insert operation anyway) */
2795  if(oldstate.sqs_QueryPos && (oldstate.sqs_QueryPos + 1 < sq->sq_QueryLen) && sq->sq_AllowInsert)
2796  {
2797    sq->sq_State = oldstate;
2798    sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Insert[seqcode];
2799    sq->sq_State.sqs_InsertCount++;
2800    if(sq->sq_State.sqs_ErrorCount <= sq->sq_MaxErrors)
2801    {
2802      /* recurse */
2803      ++sq->sq_State.sqs_SourcePos;
2804      if(MatchSequenceRec(sq))
2805      {
2806    return(TRUE);
2807      }
2808    }
2809  }
2810
2811  /* *** phase three: check for deleting a character in the query string */
2812  /* this will be only done, if we're neither at the beginning of the string
2813     nor the end of it and the delete operation is allowed */
2814  if(oldstate.sqs_QueryPos && (oldstate.sqs_QueryPos + 1 < sq->sq_QueryLen) && sq->sq_AllowDelete)
2815  {
2816    sq->sq_State = oldstate;
2817    sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Delete[seqcode2];
2818    sq->sq_State.sqs_DeleteCount++;
2819    if(sq->sq_State.sqs_ErrorCount <= sq->sq_MaxErrors)
2820    {
2821      ++sq->sq_State.sqs_QueryPos;
2822      /* recurse */
2823      if(MatchSequenceRec(sq))
2824      {
2825    return(TRUE);
2826      }
2827    }
2828  }
2829  /* restore possible altered data */
2830  sq->sq_State = oldstate;
2831  return(FALSE);
2832}
2833/* \\\ */
2834
2835/* /// "FindSequenceMatch()" */
2836BOOL FindSequenceMatch(struct SearchQuery *sq, struct QueryHit *qh, STRPTR tarstr)
2837{
2838  BOOL res;
2839
2840  if (PTPanGlobalPtr->pg_verbose >0) {
2841    printf(">> FindSequenceMatch: Finding %s in %s [%ld] %s %f [%d/%d/%d] %s\n",
2842    sq->sq_Query,
2843    qh->qh_Species->ps_Name,
2844    qh->qh_AbsPos,
2845    sq->sq_SourceSeq,
2846    qh->qh_ErrorCount,
2847    qh->qh_ReplaceCount,
2848    qh->qh_InsertCount,
2849    qh->qh_DeleteCount,
2850    qh->qh_Flags & QHF_UNSAFE ? "(u)" : "(s)");
2851  }
2852
2853  /* init */
2854  sq->sq_QueryLen = strlen(sq->sq_Query);
2855  sq->sq_State.sqs_SourcePos = 0;
2856  sq->sq_State.sqs_QueryPos = 0;
2857  sq->sq_State.sqs_ErrorCount = 0.0;
2858  sq->sq_State.sqs_ReplaceCount = 0;
2859  sq->sq_State.sqs_InsertCount = 0;
2860  sq->sq_State.sqs_DeleteCount = 0;
2861  res = FindSequenceMatchRec(sq, qh, tarstr);
2862
2863  if (PTPanGlobalPtr->pg_verbose >0) {
2864    printf("<< FindSequenceMatch: ");
2865
2866    if(res)
2867      {
2868    printf("yes %f [%d/%d/%d]\n",
2869        sq->sq_State.sqs_ErrorCount,
2870        sq->sq_State.sqs_ReplaceCount,
2871        sq->sq_State.sqs_InsertCount,
2872        sq->sq_State.sqs_DeleteCount);
2873      } else {
2874    printf("no\n");
2875      }
2876  }
2877
2878  return(res);
2879}
2880/* \\\ */
2881
2882/* /// "FindSequenceMatchRec()" */
2883BOOL FindSequenceMatchRec(struct SearchQuery *sq, struct QueryHit *qh, STRPTR tarptr)
2884{
2885  struct PTPanGlobal *pg = sq->sq_PTPanGlobal;
2886  UWORD seqcode;
2887  UWORD seqcode2;
2888  BOOL ignore;
2889  struct SearchQueryState oldstate;
2890  STRPTR oldtarptr = tarptr;
2891  BOOL check;
2892  float misweight, maxweight;
2893
2894  if(!(sq->sq_SourceSeq[sq->sq_State.sqs_SourcePos]))
2895  {
2896    return(TRUE); /* end of source sequence reached */
2897  }
2898
2899  oldstate = sq->sq_State;
2900  seqcode = pg->pg_CompressTable[sq->sq_SourceSeq[sq->sq_State.sqs_SourcePos]];
2901  seqcode2 = pg->pg_CompressTable[sq->sq_Query[sq->sq_State.sqs_QueryPos]];
2902
2903  /* *** phase one: check 1:1 replacement; this considers the hamming distance */
2904  ignore = FALSE;
2905  /* check, if first seqcode of edge matches (N (seqcode 0) matches always) */
2906  if(seqcode2 != seqcode)
2907  {
2908#ifdef ALLOWDOTSINMATCH
2909    if (sq->sq_SourceSeq[sq->sq_State.sqs_SourcePos] == '.')
2910    {
2911        sq->sq_State.sqs_ReplaceCount++;
2912        misweight = sq->sq_MismatchWeights->mw_Replace[(seqcode2 * ALPHASIZE) + SEQCODE_N]  // '.' in match
2913                    * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];                          // will be treated
2914        sq->sq_State.sqs_ErrorCount += misweight;                                           // as 'N'
2915        if(tarptr)
2916        {
2917          *tarptr++ = '.';
2918          *tarptr = 0;
2919        }
2920    } else 
2921#endif
2922    if(sq->sq_AllowReplace && (sq->sq_State.sqs_ReplaceCount < qh->qh_ReplaceCount))
2923    {
2924      sq->sq_State.sqs_ReplaceCount++;
2925      misweight = sq->sq_MismatchWeights->mw_Replace[(seqcode2 * ALPHASIZE) + seqcode]
2926                  * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2927      sq->sq_State.sqs_ErrorCount += misweight;
2928      if(tarptr) /* write mismatching char */
2929      {
2930    if(seqcode) /* only for A, C, G, T codes, not for N */
2931    {
2932    maxweight = sq->sq_MismatchWeights->mw_Replace[(seqcode2 * ALPHASIZE) + pg->pg_ComplementTable[seqcode2]]
2933                * sq->sq_PosWeight[sq->sq_State.sqs_QueryPos];
2934    /* is it a great mismatch? */
2935    if(misweight > sq->sq_MinorMisThres)
2936    {
2937        /* output upper case letter */
2938        *tarptr++ = pg->pg_DecompressTable[seqcode];
2939    } else {
2940        /* output lower case letter */
2941        *tarptr++ = pg->pg_DecompressTable[seqcode]|0x20;
2942    }
2943    } else {
2944    *tarptr++ = pg->pg_DecompressTable[seqcode];
2945    }
2946    *tarptr = 0;
2947      }
2948    } else {
2949      ignore = TRUE;
2950    }
2951  } else {
2952    if(tarptr) /* write a '=' for a matching char */
2953    {
2954      *tarptr++ = '=';
2955      *tarptr = 0;
2956    }
2957  }
2958  /* check, if more errors are tolerable. */
2959  if ((sq->sq_State.sqs_ErrorCount - EPSILON) > qh->qh_ErrorCount) // don't compare floats directly!
2960  {
2961    /* too many errors, do not recurse */
2962    ignore = TRUE;
2963  }
2964  /* should we take a deeper look? */
2965  if(!ignore)
2966  {
2967    /* do we reach the end of the query? */
2968    if(++sq->sq_State.sqs_QueryPos < sq->sq_QueryLen)
2969    {
2970      /* recurse (actually, iterate) */
2971      ++sq->sq_State.sqs_SourcePos;
2972      check = FindSequenceMatchRec(sq, qh, tarptr);
2973    } else {
2974      check = TRUE;
2975    }
2976    if(check)
2977    {
2978      /* check error count */
2979      if((fabs(sq->sq_State.sqs_ErrorCount - qh->qh_ErrorCount) < EPSILON) &&  // don't compare floats directly!
2980    (sq->sq_State.sqs_ReplaceCount == qh->qh_ReplaceCount) &&
2981    (sq->sq_State.sqs_InsertCount == qh->qh_InsertCount) &&
2982    (sq->sq_State.sqs_DeleteCount == qh->qh_DeleteCount))
2983      {
2984    if (PTPanGlobalPtr->pg_verbose >1)
2985    printf("-- FindSequenceMatchRec: Found!\n");
2986
2987    return(TRUE);
2988      }
2989    }
2990  }
2991
2992  if (PTPanGlobalPtr->pg_verbose >1) {
2993    int i;
2994    for(i=0;i<50;i++) if (*(tarptr-i)=='-') {
2995      printf("  After replacements: %s %f [%d/%d/%d]\n",
2996        tarptr-i,
2997        sq->sq_State.sqs_ErrorCount,
2998        sq->sq_State.sqs_ReplaceCount,
2999        sq->sq_State.sqs_InsertCount,
3000        sq->sq_State.sqs_DeleteCount);
3001      break;
3002    }
3003    if (i==50)
3004      printf("-- FindSequenceMatchRec: ERROR: Start not found ?!\n");
3005  }
3006
3007  /* *** phase two: check for adding a character in the query string  */
3008  /* this will be only done, if the inserting operation is allowed and
3009     we are not at the very beginning of the query (because then it will
3010     found without insert operation anyway) */
3011  if(oldstate.sqs_QueryPos && (oldstate.sqs_QueryPos + 1 < sq->sq_QueryLen) &&
3012     sq->sq_AllowInsert &&
3013     (sq->sq_State.sqs_InsertCount < qh->qh_InsertCount))
3014  {
3015    tarptr = oldtarptr;
3016    sq->sq_State = oldstate;
3017    sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Insert[seqcode];
3018    sq->sq_State.sqs_InsertCount++;
3019    if(sq->sq_State.sqs_ErrorCount <= qh->qh_ErrorCount)
3020    {
3021      /* recurse */
3022      ++sq->sq_State.sqs_SourcePos;
3023      if(tarptr) /* write mismatching char */
3024      {
3025        tarptr[-1] = '*';
3026        *tarptr = 0;
3027      }
3028      check = FindSequenceMatchRec(sq, qh, tarptr);
3029    } else {
3030      check = TRUE;
3031    }
3032    if(check)
3033    {
3034      /* check error count */
3035      if((sq->sq_State.sqs_ErrorCount == qh->qh_ErrorCount) &&
3036    (sq->sq_State.sqs_ReplaceCount == qh->qh_ReplaceCount) &&
3037    (sq->sq_State.sqs_InsertCount == qh->qh_InsertCount) &&
3038    (sq->sq_State.sqs_DeleteCount == qh->qh_DeleteCount))
3039      {
3040    if (PTPanGlobalPtr->pg_verbose >1)
3041    printf("-- FindSequenceMatchRec: Found!\n");
3042
3043    return(TRUE);
3044      }
3045    }
3046  }
3047
3048  if (PTPanGlobalPtr->pg_verbose >1) {
3049    int i;
3050    for(i=0;i<50;i++) if (*(tarptr-i)=='-') {
3051      printf("  After insertions:   %s %f [%d/%d/%d]\n",
3052        tarptr-i,
3053        sq->sq_State.sqs_ErrorCount,
3054        sq->sq_State.sqs_ReplaceCount,
3055        sq->sq_State.sqs_InsertCount,
3056        sq->sq_State.sqs_DeleteCount);
3057      break;
3058    }
3059    if (i==50)
3060      printf("-- FindSequenceMatchRec: ERROR: Start not found ?!\n");
3061  }
3062
3063  /* *** phase three: check for deleting a character in the query string */
3064  /* this will be only done, if we're neither at the beginning of the string
3065     nor the end of it and the delete operation is allowed */
3066  if(oldstate.sqs_QueryPos && (oldstate.sqs_QueryPos + 1 < sq->sq_QueryLen) &&
3067     sq->sq_AllowDelete &&
3068     (sq->sq_State.sqs_DeleteCount < qh->qh_DeleteCount))
3069  {
3070    tarptr = oldtarptr;
3071    sq->sq_State = oldstate;
3072    sq->sq_State.sqs_ErrorCount += sq->sq_MismatchWeights->mw_Delete[seqcode2];
3073    sq->sq_State.sqs_DeleteCount++;
3074    if(sq->sq_State.sqs_ErrorCount <= qh->qh_ErrorCount)
3075    {
3076      ++sq->sq_State.sqs_QueryPos;
3077      /* recurse */
3078      if(tarptr) /* write char omission */
3079      {
3080        *tarptr++ = '_';
3081        *tarptr = 0;
3082      }
3083      check = FindSequenceMatchRec(sq, qh, tarptr);
3084    } else {
3085      check = TRUE;
3086    }
3087    if(check)
3088    {
3089      /* check error count */
3090      if((sq->sq_State.sqs_ErrorCount == qh->qh_ErrorCount) &&
3091         (sq->sq_State.sqs_ReplaceCount == qh->qh_ReplaceCount) &&
3092         (sq->sq_State.sqs_InsertCount == qh->qh_InsertCount) &&
3093         (sq->sq_State.sqs_DeleteCount == qh->qh_DeleteCount))
3094      {
3095        if (PTPanGlobalPtr->pg_verbose >1)
3096        printf("-- FindSequenceMatchRec: Found!\n");
3097
3098        return(TRUE);
3099      }
3100    }
3101  }
3102  /* restore possible altered data */
3103  sq->sq_State = oldstate;
3104
3105  if (PTPanGlobalPtr->pg_verbose >1) {
3106    int i;
3107    for(i=0;i<50;i++) if (*(tarptr-i)=='-') {
3108      printf("  After deletions:    %s %f [%d/%d/%d]\n",
3109        tarptr-i,
3110        sq->sq_State.sqs_ErrorCount,
3111        sq->sq_State.sqs_ReplaceCount,
3112        sq->sq_State.sqs_InsertCount,
3113        sq->sq_State.sqs_DeleteCount);
3114      break;
3115    }
3116    if (i==50)
3117      printf("-- FindSequenceMatchRec: ERROR: Start not found ?!\n");
3118  }
3119
3120  return(FALSE);
3121}
3122/* \\\ */
Note: See TracBrowser for help on using the repository browser.