source: branches/port5/ptpan/PTP_main.cxx

Last change on this file was 6419, checked in by westram, 15 years ago
File size: 16.2 KB
Line 
1#include <stdio.h>
2#include <stdlib.h>
3#include <unistd.h>
4#include <string.h>
5#include <sys/stat.h>
6#include <sys/time.h>
7#include <PT_server.h>
8#include <PT_server_prototypes.h>
9#include "ptpan.h"
10#include "pt_prototypes.h"
11#include <arbdbt.h>
12#include <servercntrl.h>
13#include <server.h>
14#include <client.h>
15#include <struct_man.h>
16#define MAX_TRY 3
17#define TIME_OUT 1000*60*60*24
18
19/*****************************************************************************
20        NEW STUFF
21*******************************************************************************/
22
23/* /// "AllocPTPanGlobal()" */
24struct PTPanGlobal * AllocPTPanGlobal(void)
25{
26  struct PTPanGlobal *pg;
27  ULONG cnt;
28  ULONG pval;
29  ULONG numbits;
30  struct MismatchWeights *mw;
31
32  /**** init server main struct ****/
33  printf("Init internal structs...\n");
34
35  pg = (struct PTPanGlobal *) calloc(1, sizeof(struct PTPanGlobal));
36  if(!pg)
37  {
38    printf("Error allocating PTPanGlobal!\n");
39    return(FALSE);
40  }
41  /* init some stuff and precalc tables */
42  NewList(&pg->pg_Species);
43  NewList(&pg->pg_Partitions);
44  pg->pg_AlphaSize = ALPHASIZE;
45
46  /* calculate table of powers of alphabet size */
47  pval = 1;
48  for(cnt = 0, pval = 1; cnt <= MAXCODEFITLONG; cnt++)
49  {
50    pg->pg_PowerTable[cnt] = pval;
51    pval *= pg->pg_AlphaSize;
52  }
53
54  /* calculate table of bits used for code of size n */
55  pval = pg->pg_AlphaSize;
56  numbits = 0;
57  pg->pg_BitsUseTable[0] = 0;
58  pg->pg_BitsMaskTable[0] = (1UL << (31-numbits));
59  for(cnt = 1; cnt <= MAXCODEFITLONG; cnt++)
60  {
61    while(pval > (1UL << numbits))
62    {
63      numbits++;
64    }
65    pg->pg_BitsUseTable[cnt] = numbits; /* bits required for codesize */
66    pg->pg_BitsShiftTable[cnt] = 32 - numbits; /* how many bits to shift left */
67    pg->pg_BitsMaskTable[cnt] = (1UL << (31-numbits));
68    pval *= pg->pg_AlphaSize;
69  }
70
71  /* bits count table */
72  for(cnt = 0; cnt < 256; cnt++)
73  {
74    numbits = 0;
75    pval = cnt;
76    while(pval)
77    {
78      numbits += (pval & 1);
79      pval >>= 1;
80    }
81    pg->pg_BitsCountTable[cnt] = numbits;
82  }
83
84  /* sequence compression tables */
85  /* due to calloc, this table is already set to SEQCODE_N for all other codes */
86  pg->pg_CompressTable[(UBYTE) 'a'] = SEQCODE_A;
87  pg->pg_CompressTable[(UBYTE) 'A'] = SEQCODE_A;
88  pg->pg_CompressTable[(UBYTE) 'c'] = SEQCODE_C;
89  pg->pg_CompressTable[(UBYTE) 'C'] = SEQCODE_C;
90  pg->pg_CompressTable[(UBYTE) 'g'] = SEQCODE_G;
91  pg->pg_CompressTable[(UBYTE) 'G'] = SEQCODE_G;
92  pg->pg_CompressTable[(UBYTE) 't'] = SEQCODE_T;
93  pg->pg_CompressTable[(UBYTE) 'T'] = SEQCODE_T;
94  pg->pg_CompressTable[(UBYTE) 'u'] = SEQCODE_T;
95  pg->pg_CompressTable[(UBYTE) 'U'] = SEQCODE_T;
96  pg->pg_CompressTable[(UBYTE) '-'] = SEQCODE_IGNORE; /* these chars don't go */
97  pg->pg_CompressTable[(UBYTE) '.'] = SEQCODE_IGNORE; /* into the tree */
98  pg->pg_CompressTable[0] = SEQCODE_IGNORE; /* terminal char, to optimize certain routines */
99
100  /* inverse table for decompressing */
101  pg->pg_DecompressTable[SEQCODE_N] = 'N';
102  pg->pg_DecompressTable[SEQCODE_A] = 'A';
103  pg->pg_DecompressTable[SEQCODE_C] = 'C';
104  pg->pg_DecompressTable[SEQCODE_G] = 'G';
105  pg->pg_DecompressTable[SEQCODE_T] = 'U';
106
107  /* table for creating the complement sequence */
108  pg->pg_ComplementTable[SEQCODE_N] = SEQCODE_N;
109  pg->pg_ComplementTable[SEQCODE_A] = SEQCODE_T;
110  pg->pg_ComplementTable[SEQCODE_C] = SEQCODE_G;
111  pg->pg_ComplementTable[SEQCODE_G] = SEQCODE_C;
112  pg->pg_ComplementTable[SEQCODE_T] = SEQCODE_A;
113
114  /* counting table to avoid branches */
115  for(cnt = 0; cnt < 256; cnt++)
116  {
117    pg->pg_SeqCodeValidTable[cnt] = (pg->pg_CompressTable[cnt] == SEQCODE_IGNORE) ? 0 : 1;
118  }
119
120  pg->pg_FreeMem = GB_get_physical_memory();
121#ifdef DEBUG
122  printf("physical_memory=%lu k (%lu Mb)\n", pg->pg_FreeMem, pg->pg_FreeMem >> 10);
123#endif // DEBUG
124  pg->pg_FreeMem <<= 10;
125
126  /* initialize species cache handler */
127  pg->pg_SpeciesCache = AllocCacheHandler();
128  if(!pg->pg_SpeciesCache)
129  {
130    printf("Couldn't allocate species cache handler!\n");
131    free(pg);
132    return(FALSE);
133  }
134  pg->pg_SpeciesCache->ch_UserData = pg;
135  /* reserve about 10% of the memory for the species cache */
136  pg->pg_SpeciesCache->ch_MaxCapacity = (pg->pg_FreeMem / 10) * 1;
137  pg->pg_SpeciesCache->ch_LoadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CacheSpeciesLoad;
138  pg->pg_SpeciesCache->ch_UnloadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CacheSpeciesUnload;
139  pg->pg_SpeciesCache->ch_SizeFunc = (ULONG (*)(struct CacheHandler *, APTR)) CacheSpeciesSize;
140
141  /* initialize partitions cache handler */
142  pg->pg_PartitionCache = AllocCacheHandler();
143  if(!pg->pg_PartitionCache)
144  {
145    printf("Couldn't allocate partitions cache handler!\n");
146    FreeCacheHandler(pg->pg_SpeciesCache);
147    free(pg);
148    return(FALSE);
149  }
150  pg->pg_PartitionCache->ch_UserData = pg;
151  /* reserve about 90% of the memory for the partition cache */
152  pg->pg_PartitionCache->ch_MaxCapacity = (pg->pg_FreeMem / 10) * 9;
153  pg->pg_PartitionCache->ch_LoadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CachePartitionLoad;
154  pg->pg_PartitionCache->ch_UnloadFunc = (BOOL (*)(struct CacheHandler *, APTR)) CachePartitionUnload;
155  pg->pg_PartitionCache->ch_SizeFunc = (ULONG (*)(struct CacheHandler *, APTR)) CachePartitionSize;
156
157  /* default mismatch weights */
158  mw = &pg->pg_MismatchWeights;
159  /* format: replace code1 (query) by code2 (database) adds x to the error value */
160  /*     N   A   C   G   T (-> query)
161      N 0.0 0.1 0.1 0.1 0.1
162      A  *  0.0 1.0 1.0 1.0
163      C  *  1.0 0.0 1.0 1.0
164      G  *  1.0 1.0 0.0 1.0
165      T  *  1.0 1.0 1.0 0.0
166    ins 2.0 2.0 2.0 2.0 2.0
167    del  *  2.0 2.0 2.0 2.0
168  */
169  /* fill diagonal first (no mismatch) */
170  for(cnt = 0; cnt < ALPHASIZE; cnt++)
171  {
172    mw->mw_Replace[cnt * ALPHASIZE + cnt] = 0.0;
173  }
174
175  /* N is a joker, but setting it to slightly higher values might be sensible */
176  mw->mw_Replace[SEQCODE_A * ALPHASIZE + SEQCODE_N] = 0.1;
177  mw->mw_Replace[SEQCODE_C * ALPHASIZE + SEQCODE_N] = 0.1;
178  mw->mw_Replace[SEQCODE_G * ALPHASIZE + SEQCODE_N] = 0.1;
179  mw->mw_Replace[SEQCODE_T * ALPHASIZE + SEQCODE_N] = 0.1;
180
181  /* replacing N by A, C, G, T will not occur (search string may not contain N) */
182  mw->mw_Replace[SEQCODE_N * ALPHASIZE + SEQCODE_A] = 99999.0;
183  mw->mw_Replace[SEQCODE_N * ALPHASIZE + SEQCODE_C] = 99999.0;
184  mw->mw_Replace[SEQCODE_N * ALPHASIZE + SEQCODE_G] = 99999.0;
185  mw->mw_Replace[SEQCODE_N * ALPHASIZE + SEQCODE_T] = 99999.0;
186
187  /* other parts of the matrix (should be symmetrical, but doesn't need to) */
188  mw->mw_Replace[SEQCODE_A * ALPHASIZE + SEQCODE_C] = 1.1;
189  mw->mw_Replace[SEQCODE_C * ALPHASIZE + SEQCODE_A] = 1.0;
190
191  mw->mw_Replace[SEQCODE_A * ALPHASIZE + SEQCODE_G] = 0.2;
192  mw->mw_Replace[SEQCODE_G * ALPHASIZE + SEQCODE_A] = 1.5;
193
194  mw->mw_Replace[SEQCODE_A * ALPHASIZE + SEQCODE_T] = 1.1;
195  mw->mw_Replace[SEQCODE_T * ALPHASIZE + SEQCODE_A] = 1.1;
196
197  mw->mw_Replace[SEQCODE_C * ALPHASIZE + SEQCODE_G] = 1.1;
198  mw->mw_Replace[SEQCODE_G * ALPHASIZE + SEQCODE_C] = 1.5;
199
200  mw->mw_Replace[SEQCODE_C * ALPHASIZE + SEQCODE_T] = 0.6;
201  mw->mw_Replace[SEQCODE_T * ALPHASIZE + SEQCODE_C] = 1.1;
202
203  mw->mw_Replace[SEQCODE_G * ALPHASIZE + SEQCODE_T] = 1.5;
204  mw->mw_Replace[SEQCODE_T * ALPHASIZE + SEQCODE_G] = 0.6;
205
206  /* insert operations (to query string) */
207  mw->mw_Insert[SEQCODE_N] = 2.0;
208  mw->mw_Insert[SEQCODE_A] = 2.0;
209  mw->mw_Insert[SEQCODE_C] = 2.0;
210  mw->mw_Insert[SEQCODE_G] = 2.0;
211  mw->mw_Insert[SEQCODE_T] = 2.0;
212
213  /* delete operations (from query string) */
214  mw->mw_Delete[SEQCODE_N] = 99999.0; /* should never happen */
215  mw->mw_Delete[SEQCODE_A] = 2.0;
216  mw->mw_Delete[SEQCODE_C] = 2.0;
217  mw->mw_Delete[SEQCODE_G] = 2.0;
218  mw->mw_Delete[SEQCODE_T] = 2.0;
219
220  /* init matrix for non-weighted stuff */
221  mw = &pg->pg_NoWeights;
222  /* fill standard 1.0 values */
223  for(cnt = 0; cnt < (ALPHASIZE * ALPHASIZE); cnt++)
224  {
225    mw->mw_Replace[cnt] = 1.0;
226  }
227  /* fill diagonal first (no mismatch) and insert / delete */
228  for(cnt = 0; cnt < ALPHASIZE; cnt++)
229  {
230    mw->mw_Replace[cnt * ALPHASIZE] = 0.1; // N (joker) replacement
231    mw->mw_Replace[cnt * ALPHASIZE + cnt] = 0.0; // diagonal
232    mw->mw_Insert[cnt] = 2.0;
233    mw->mw_Delete[cnt] = 2.0;
234  }
235
236  /* calculate maximum partition size (estimate 24 bytes per node) */
237  {
238    ULONG partmem = pg->pg_FreeMem;
239    /* tree building implementation is limited to max. 1 GB per partition */
240    if(partmem > (1UL<<30))
241    {
242      partmem = 1UL<<30;
243    }
244
245    pg->pg_MaxPartitionSize = partmem / (((sizeof(struct SfxNode2Edges) * SMALLNODESPERCENT) +
246          (sizeof(struct SfxNodeNEdges) * BIGNODESPERCENT)) / 100);
247  }
248  /* enable low memory mode */
249  pg->pg_LowMemoryMode = TRUE;
250
251  gettimeofday(&pg->pg_Bench.ts_Init, NULL);
252  pg->pg_Bench.ts_Last = pg->pg_Bench.ts_Init;
253
254  /* init command line flags */
255  pg->pg_verbose = 0;
256
257  return(pg);
258}
259/* \\\ */
260
261/* /// "FreePTPanGlobal()" */
262void FreePTPanGlobal(struct PTPanGlobal *pg)
263{
264  FlushCache(pg->pg_SpeciesCache);
265  FreeCacheHandler(pg->pg_SpeciesCache);
266  FlushCache(pg->pg_PartitionCache);
267  FreeCacheHandler(pg->pg_PartitionCache);
268  free(pg);
269}
270/* \\\ */
271
272/*****************************************************************************
273        END OF NEW STUFF
274*******************************************************************************/
275
276// *** FIXME *** see if we can get rid of this global structures
277
278struct PTPanGlobal *PTPanGlobalPtr = NULL;
279
280/*****************************************************************************
281        Communication
282*******************************************************************************/
283
284PT_main *aisc_main; /* muss so heissen */
285
286extern "C" int server_shutdown(PT_main *, aisc_string passwd)
287{
288  struct PTPanGlobal *pg = PTPanGlobalPtr;
289
290  printf("EXTERN: server_shutdown\n");
291  /** passwdcheck **/
292  if(strcmp(passwd, "47@#34543df43%&3667gh"))
293  {
294    return 1;
295  }
296  printf("\nI got the shutdown message.\n");
297  /** shoot clients **/
298  aisc_broadcast(pg->pg_ComSocket, 0,
299        "SERVER UPDATE BY ADMINISTRATOR!\n"
300        "You'll get the latest version. Your on-screen\n"
301        "information will be lost, sorry!");
302  /** shutdown **/
303  aisc_server_shutdown_and_exit(pg->pg_ComSocket, 0);
304  return(0);
305}
306
307extern "C" int broadcast(PT_main *main, int)
308{
309  struct PTPanGlobal *pg = PTPanGlobalPtr;
310
311  printf("EXTERN: broadcast\n");
312  aisc_broadcast(pg->pg_ComSocket, main->m_type, main->m_text);
313  return(0);
314}
315
316extern int aisc_core_on_error;
317
318int main(int argc, char *argv[])
319{
320  struct PTPanGlobal *pg;
321  STRPTR commandflag;
322
323  printf("\nTUM PeTer PAN SERVER (Chris Hodges) V0.12 18-Aug-04 (C) 2003-2004\n"
324    "Complete rewrite of the original code by Oliver Strunk from 1993\n\n");
325
326  /* allocate the PTPanGlobal structure */
327  if(!(pg = AllocPTPanGlobal()))
328  {
329    exit(1);
330  }
331
332  /* argh! global variable! would be nice, if we could get rid of this --
333     it is only used by the AISC functions */
334  PTPanGlobalPtr = pg;
335
336  /* aisc init */
337  GB_install_pid(0); /* not arb_clean able */
338  aisc_core_on_error = 0;
339  pg->pg_AISC = create_PT_main();
340
341  GB_init_gb(); // nedded for PT_new_design
342
343  /* set global variable -- sigh */
344  aisc_main = pg->pg_AISC;
345
346  /* first get the parameters */
347  pg->pg_ArbParams = arb_trace_argv(&argc, argv);
348
349  /* try to open com with any other pb server */
350  /* check command line syntax */
351  if((argc > 2) ||
352     ((argc < 2) && !pg->pg_ArbParams->db_server) ||
353     (argc >= 2 && strcmp(argv[1], "--help") == 0))
354  {
355    printf("Syntax: %s [-look/-build/-kill/-QUERY] -Dfile.arb -TSocketid\n", argv[0]);
356    exit(-1);
357  }
358  /* add default command flag */
359  if(argc == 2)
360  {
361    commandflag = argv[1];
362  } else {
363    commandflag = (STRPTR) "-boot";
364  }
365
366  /* get server host name */
367  if(!(pg->pg_ServerName = pg->pg_ArbParams->tcp))
368  {
369    if(!(pg->pg_ServerName = (STRPTR) GBS_read_arb_tcp("ARB_PT_SERVER0")))
370    {
371      GB_print_error(); /* no host name found */
372      exit(-1);
373    }
374  }
375
376  /* generate tree filename */
377  pg->pg_DBName    = pg->pg_ArbParams->db_server;
378  pg->pg_IndexName = GBS_global_string_copy("%s.pan", pg->pg_DBName);
379
380  /* check for other active servers */
381  {
382    aisc_com *ptlink;
383    T_PT_MAIN ptmain;
384    ptlink = (aisc_com *) aisc_open(pg->pg_DBName, &ptmain, AISC_MAGIC_NUMBER);
385    if(ptlink)
386    {
387      if(!strcasecmp(commandflag, "-look"))
388      {
389        exit(0); /* already another serther */
390      }
391      printf("There is another active server. I'll try to terminate it violently...\n");
392      aisc_nput(ptlink, PT_MAIN, ptmain, MAIN_SHUTDOWN, "47@#34543df43%&3667gh", NULL);
393      aisc_close(ptlink);
394    }
395  }
396  if(!strcmp(commandflag, "-kill"))
397  {
398    exit(0);
399  }
400
401  if(!strncasecmp(commandflag, "-build", 6)) /* build command */
402  {
403    ULONG val = atoi(&commandflag[6]);
404    if(val) /* extra option */
405    {
406      if(val > 100000) /* read out threshold */
407      {
408        pg->pg_MaxPartitionSize = atoi(&commandflag[6]);
409        printf("Forcing MaxPartitionSize = %ld.\n", pg->pg_MaxPartitionSize);
410      } else {
411        pg->pg_PruneLength = atoi(&commandflag[6]);
412        printf("Forcing PruneLength = %d.\n", pg->pg_PruneLength);
413      }
414    }
415    LoadSpecies(pg);
416    if(!strncmp(commandflag, "-bUiLd", 6))
417    {
418      pg->pg_UseStdSfxTree = TRUE;
419      if(BuildStdSuffixTree(pg))
420      {
421        printf("Suffix Tree index for database '%s' has been created.\n", pg->pg_DBName);
422        BenchOutput(pg);
423        exit(0);
424      } else {
425        printf("Unable to create Suffix Tree index for database '%s'!\n", pg->pg_DBName);
426        exit(1);
427      }
428    } else {
429      if(BuildPTPanIndex(pg))
430      {
431        printf("PT_PAN index for database '%s' has been created.\n", pg->pg_DBName);
432        BenchOutput(pg);
433        exit(0);
434      } else {
435        printf("Unable to create PT_PAN index for database '%s'!\n", pg->pg_DBName);
436        exit(1);
437      }
438    }
439  }
440
441  if(!strcasecmp(commandflag, "-QUERY"))
442  {
443    //enter_stage_3_load_tree(aisc_main, tname); /* now stage 3 */
444    exit(0);
445  }
446
447  /* Check if index is up2date */
448  {
449    struct stat dbstat, idxstat;
450    BOOL forcebuild = FALSE;
451    if(stat(pg->pg_DBName, &dbstat))
452    {
453      printf("PT_PAN: error while stat source %s\n", pg->pg_DBName);
454      aisc_server_shutdown_and_exit(pg->pg_ComSocket, -1);
455    }
456
457    if(stat(pg->pg_IndexName, &idxstat))
458    {
459      forcebuild = TRUE; /* there is no index at all! */
460    } else {
461      if((dbstat.st_mtime > idxstat.st_mtime) || (idxstat.st_size == 0))
462      {
463        /* so the index file was older or of zero size */
464        printf("PT_PAN: Database %s has been modified\n"
465            "more recently than index %s.\n"
466            "Forcing rebuilding of index...\n",
467            pg->pg_DBName, pg->pg_IndexName);
468        forcebuild = TRUE;
469      }
470      if(!LoadIndexHeader(pg))
471      {
472        forcebuild = TRUE; /* an error occured while loading the index header */
473      }
474    }
475    if(forcebuild)
476    {
477      LoadSpecies(pg);
478      if(BuildPTPanIndex(pg))
479      {
480        printf("PT_PAN index for database '%s' has been created.\n", pg->pg_DBName);
481      } else {
482        printf("Unable to create PT_PAN index for database '%s'!\n", pg->pg_DBName);
483        exit(1);
484      }
485      if(!LoadIndexHeader(pg))
486      {
487        printf("Fatal error, couldn't load index even after creation attempt!\n");
488        exit(1);
489      }
490    }
491  }
492
493  /*if(!LoadAllPartitions(pg))
494  {
495    printf("ERROR: Failed to load partitions into memory!\n");
496    exit(1);
497    }*/
498
499  /* so much for the the init, now let's do some real work */
500
501  if(!strcasecmp(commandflag, "-v")) pg->pg_verbose = 1;
502  if(!strcasecmp(commandflag, "-vv")) pg->pg_verbose = 2;
503  if(!strcasecmp(commandflag, "-vvv")) pg->pg_verbose = 3;
504
505#if 0
506  {
507    PT_exProb pep;
508    pep.result = NULL;
509    pep.restart = 1;
510    pep.plength = 21;
511    pep.numget = 100;
512    PT_find_exProb(&pep);
513    printf("%s\n", pep.result);
514  }
515#endif
516
517  /* open the socket connection */
518  printf("Opening connection...\n");
519  //sleep(1);
520  {
521    UWORD i;
522    for(i = 0; i < MAX_TRY; i++)
523    {
524      if((pg->pg_ComSocket = open_aisc_server(pg->pg_ServerName, TIME_OUT, 0)))
525      {
526        break;
527      } else {
528        sleep(10);
529      }
530    }
531    if(!pg->pg_ComSocket)
532    {
533      printf("PT_PAN: Gave up on opening the communication socket!\n");
534      exit(0);
535    }
536  }
537  /****** all ok: main loop ********/
538
539  printf("ok, server is running.\n"); // do NOT change or remove! others depend on it
540  fflush(stdout);
541
542  aisc_accept_calls(pg->pg_ComSocket);
543  aisc_server_shutdown_and_exit(pg->pg_ComSocket, 0);
544
545  return(0);
546}
547
548
Note: See TracBrowser for help on using the repository browser.