| 1 | #include <stdio.h> |
|---|
| 2 | #include <string.h> |
|---|
| 3 | #include <memory.h> |
|---|
| 4 | // #include <malloc.h> |
|---|
| 5 | #include <arbdb.h> |
|---|
| 6 | #include <arbdbt.h> |
|---|
| 7 | #include "tc.hxx" |
|---|
| 8 | #include <BI_helix.hxx> |
|---|
| 9 | |
|---|
| 10 | void pt_align(FILE *out, int &pos, int mod) { |
|---|
| 11 | int m = pos % mod; |
|---|
| 12 | if (!m) return; |
|---|
| 13 | while (m<mod) { |
|---|
| 14 | if (out) putc(0,out); |
|---|
| 15 | pos++; |
|---|
| 16 | m++; |
|---|
| 17 | } |
|---|
| 18 | } |
|---|
| 19 | |
|---|
| 20 | char *pt_getstring(FILE *in){ |
|---|
| 21 | char *p,*buffer; |
|---|
| 22 | int c; |
|---|
| 23 | void *strstruct = GBS_stropen(1024); |
|---|
| 24 | GBS_strcat(strstruct,0); |
|---|
| 25 | for (c = 1; c; ) { |
|---|
| 26 | c = getc(in); |
|---|
| 27 | if (c== EOF) return 0; |
|---|
| 28 | GBS_chrcat(strstruct,c); |
|---|
| 29 | } |
|---|
| 30 | p = GBS_strclose(strstruct); |
|---|
| 31 | |
|---|
| 32 | if (!p[0]){ |
|---|
| 33 | delete p; |
|---|
| 34 | return 0; |
|---|
| 35 | } |
|---|
| 36 | if (p[0] == 1) { |
|---|
| 37 | buffer = strdup(p+1); |
|---|
| 38 | }else{ |
|---|
| 39 | buffer = strdup(p); |
|---|
| 40 | } |
|---|
| 41 | delete p; |
|---|
| 42 | return buffer; |
|---|
| 43 | } |
|---|
| 44 | |
|---|
| 45 | int pt_putstring(char *s,FILE *out){ |
|---|
| 46 | if (!s) { |
|---|
| 47 | putc(0,out); |
|---|
| 48 | return 0; |
|---|
| 49 | } |
|---|
| 50 | int len = strlen(s)+1; |
|---|
| 51 | putc(1,out); |
|---|
| 52 | len = fwrite(s,len,1,out); |
|---|
| 53 | if (1 != len){ |
|---|
| 54 | return -1; |
|---|
| 55 | } |
|---|
| 56 | return 0; |
|---|
| 57 | } |
|---|
| 58 | |
|---|
| 59 | |
|---|
| 60 | master_gap_compress::master_gap_compress(void) { |
|---|
| 61 | memset((char *)this,0,sizeof(master_gap_compress)); |
|---|
| 62 | } |
|---|
| 63 | client_gap_compress::client_gap_compress(master_gap_compress *mgc) { |
|---|
| 64 | memset((char *)this,0,sizeof(client_gap_compress)); |
|---|
| 65 | master = mgc; |
|---|
| 66 | memsize = MEM_ALLOC_INIT_CLIENT; |
|---|
| 67 | l = (client_gap_compress_data *)calloc(sizeof(client_gap_compress_data),memsize); |
|---|
| 68 | } |
|---|
| 69 | |
|---|
| 70 | master_gap_compress::~master_gap_compress(void) { |
|---|
| 71 | delete rel_2_abs; |
|---|
| 72 | delete rel_2_abss; |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | client_gap_compress::~client_gap_compress(void) { |
|---|
| 76 | delete l; |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | void client_gap_compress::clean(master_gap_compress *mgc){ |
|---|
| 80 | if (memsize < MEM_ALLOC_INIT_CLIENT) { |
|---|
| 81 | delete l; |
|---|
| 82 | memset((char *)this,0,sizeof(client_gap_compress)); |
|---|
| 83 | memsize = MEM_ALLOC_INIT_CLIENT; |
|---|
| 84 | l = (client_gap_compress_data *)calloc(sizeof(client_gap_compress_data),memsize); |
|---|
| 85 | }else{ |
|---|
| 86 | client_gap_compress_data *oldl = l; |
|---|
| 87 | int old_memsize = memsize; |
|---|
| 88 | memset((char *)this,0,sizeof(client_gap_compress)); |
|---|
| 89 | memsize = old_memsize; |
|---|
| 90 | l = oldl; |
|---|
| 91 | } |
|---|
| 92 | master = mgc; |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | void master_gap_compress::optimize(void) { |
|---|
| 96 | if (!rel_2_abs) return; |
|---|
| 97 | if (rel_2_abs[len-1] > 0x7fff) { |
|---|
| 98 | rel_2_abs = (long *)realloc((char *)rel_2_abs,len*sizeof(long)); |
|---|
| 99 | }else{ |
|---|
| 100 | rel_2_abss = (short *)malloc(sizeof(short) * len); |
|---|
| 101 | int i; |
|---|
| 102 | for (i=0;i<len;i++) { |
|---|
| 103 | rel_2_abss[i] = (short) rel_2_abs[i]; |
|---|
| 104 | } |
|---|
| 105 | delete rel_2_abs; rel_2_abs = 0; |
|---|
| 106 | } |
|---|
| 107 | |
|---|
| 108 | memsize = len; |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | void master_gap_compress::write(long rel, long abs){ |
|---|
| 112 | if (rel_2_abss) GB_CORE; // no write after optimize |
|---|
| 113 | |
|---|
| 114 | if (!rel_2_abs) { |
|---|
| 115 | memsize = MEM_ALLOC_INIT_MASTER; |
|---|
| 116 | rel_2_abs = (long *)calloc(sizeof(long),memsize); |
|---|
| 117 | } |
|---|
| 118 | |
|---|
| 119 | if (rel >= memsize) { |
|---|
| 120 | memsize = memsize * MEM_ALLOC_FACTOR; |
|---|
| 121 | rel_2_abs = (long *)realloc((char *)rel_2_abs,sizeof(long)*memsize); |
|---|
| 122 | } |
|---|
| 123 | for( ; len<rel; len++) rel_2_abs[len] = 0; |
|---|
| 124 | |
|---|
| 125 | rel_2_abs[rel] = abs; |
|---|
| 126 | if (len <= rel) len = (int)rel+1; |
|---|
| 127 | } |
|---|
| 128 | |
|---|
| 129 | long master_gap_compress::read(long rel, long def_abs){ |
|---|
| 130 | if (rel>=len) { |
|---|
| 131 | this->write(rel,def_abs); |
|---|
| 132 | return def_abs; |
|---|
| 133 | } |
|---|
| 134 | if (rel_2_abss) return rel_2_abss[rel]; |
|---|
| 135 | return rel_2_abs[rel]; |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | void client_gap_compress::basic_write(long rel, long x, long y){ |
|---|
| 139 | |
|---|
| 140 | if (rel >= memsize) { |
|---|
| 141 | memsize = (int)rel * MEM_ALLOC_FACTOR; |
|---|
| 142 | l = (client_gap_compress_data *)realloc( |
|---|
| 143 | (char *)l,sizeof(client_gap_compress_data)*memsize); |
|---|
| 144 | } |
|---|
| 145 | l[rel].rel = rel; |
|---|
| 146 | l[rel].x = (short)x; |
|---|
| 147 | l[rel].y = y; |
|---|
| 148 | if (len <= rel) len = (int)rel+1; |
|---|
| 149 | } |
|---|
| 150 | |
|---|
| 151 | void client_gap_compress::write(long rel, long abs) { |
|---|
| 152 | if (rel >= master->len) { |
|---|
| 153 | master->write(rel,abs); |
|---|
| 154 | this->basic_write(rel,0,0); |
|---|
| 155 | return; |
|---|
| 156 | } |
|---|
| 157 | long m,r; |
|---|
| 158 | long *ml = master->rel_2_abs; |
|---|
| 159 | old_rel++; |
|---|
| 160 | old_m++; |
|---|
| 161 | // search ?: rel_2_abs[?] <= abs && rel_2_abs[?+1] > abs |
|---|
| 162 | |
|---|
| 163 | if ( rel == old_rel && old_m < master->len && (r=ml[old_m] ) >= abs ) { |
|---|
| 164 | if (r == abs) { |
|---|
| 165 | m = old_m; |
|---|
| 166 | }else{ |
|---|
| 167 | if (ml[ old_m -1 ] > abs) goto cgc_search_by_divide; |
|---|
| 168 | old_m--; |
|---|
| 169 | m = old_m; |
|---|
| 170 | } |
|---|
| 171 | }else{ |
|---|
| 172 | cgc_search_by_divide: |
|---|
| 173 | long l; |
|---|
| 174 | l = 0; r = master->len-1; |
|---|
| 175 | while (l<r-1) { // search the masters rel_2_abs[?] == abs |
|---|
| 176 | m = (l+r)>>1; |
|---|
| 177 | if ( ml[m] <= abs ) { |
|---|
| 178 | l = m; |
|---|
| 179 | continue; |
|---|
| 180 | } |
|---|
| 181 | r = m; |
|---|
| 182 | } |
|---|
| 183 | if ( ml[r] <= abs) m = r; else m = l; |
|---|
| 184 | old_rel = rel; |
|---|
| 185 | old_m = m; |
|---|
| 186 | } |
|---|
| 187 | this->basic_write(rel,m - rel,abs - ml[m] ); |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | // remove (eval. ) duplicated entries in client compress |
|---|
| 191 | // eg. (4,20,5) (5,21,5) -> delete second |
|---|
| 192 | |
|---|
| 193 | void client_gap_compress::optimize(int realloc_flag) { |
|---|
| 194 | if (!len) return; |
|---|
| 195 | int i; |
|---|
| 196 | int newlen = 0; |
|---|
| 197 | long old_x = -9999; |
|---|
| 198 | long old_y = -9999; |
|---|
| 199 | long old_dx = 0; |
|---|
| 200 | |
|---|
| 201 | for (i=0; i<len; i++) { |
|---|
| 202 | if ( l[i].x != old_x || l[i].y != old_y ) { |
|---|
| 203 | if (i<len+1) { |
|---|
| 204 | old_dx = l[i+1].x - l[i].x; |
|---|
| 205 | if (old_dx < -1 ) old_dx = 0; |
|---|
| 206 | if (old_dx > 0 ) old_dx = 0; |
|---|
| 207 | } |
|---|
| 208 | old_x = l[i].x; |
|---|
| 209 | old_y = l[i].y; |
|---|
| 210 | l[i].dx = (short)old_dx; |
|---|
| 211 | l[newlen++] = l[i]; |
|---|
| 212 | } |
|---|
| 213 | old_x += old_dx; |
|---|
| 214 | old_y -= old_dx; |
|---|
| 215 | } |
|---|
| 216 | if (realloc_flag) { |
|---|
| 217 | l = (struct client_gap_compress_data *)realloc((char *)l, |
|---|
| 218 | sizeof(struct client_gap_compress_data) * newlen); |
|---|
| 219 | } |
|---|
| 220 | len = memsize = newlen; |
|---|
| 221 | } |
|---|
| 222 | |
|---|
| 223 | |
|---|
| 224 | long client_gap_compress::rel_2_abs(long rel){ |
|---|
| 225 | int left,right; |
|---|
| 226 | int m; |
|---|
| 227 | long lrel; |
|---|
| 228 | right = len-1; |
|---|
| 229 | left = 0; |
|---|
| 230 | if (left>=right) { // client is master |
|---|
| 231 | return master->read(rel,0); |
|---|
| 232 | } |
|---|
| 233 | while (left<right-1) { // search the clients l[??].rel <= rel |
|---|
| 234 | // && l[??+1] > rel |
|---|
| 235 | m = (left+right)>>1; |
|---|
| 236 | lrel = l[m].rel; |
|---|
| 237 | if ( lrel <= rel ) { |
|---|
| 238 | left = m; |
|---|
| 239 | continue; |
|---|
| 240 | } |
|---|
| 241 | right = m; |
|---|
| 242 | } |
|---|
| 243 | if ( l[right].rel <= rel) m = right; else m = left; |
|---|
| 244 | long master_pos = l[m].rel; |
|---|
| 245 | long pos; |
|---|
| 246 | if (!l[m].dx) { // substitution or deletion |
|---|
| 247 | master_pos += l[m].x; // masterpos offset |
|---|
| 248 | master_pos += (rel - l[m].rel); // compressed data offset |
|---|
| 249 | pos = master->read(master_pos ,0) // read master at rel+offset |
|---|
| 250 | + l[m].y; |
|---|
| 251 | }else{ // insertion |
|---|
| 252 | master_pos += l[m].x; |
|---|
| 253 | pos = master->read(master_pos ,0) // read master at rel+offset |
|---|
| 254 | + (rel - l[m].rel) // + offset (missing data in compressed ) |
|---|
| 255 | + l[m].y; |
|---|
| 256 | } |
|---|
| 257 | return pos; |
|---|
| 258 | } |
|---|
| 259 | |
|---|
| 260 | gap_compress::gap_compress(void) { |
|---|
| 261 | memset((char *)this,0,sizeof(gap_compress)); |
|---|
| 262 | memclients = MEM_ALLOC_INIT_CLIENTS; |
|---|
| 263 | clients = (client_gap_compress **)calloc(sizeof(void *),memclients); |
|---|
| 264 | memmasters = MEM_ALLOC_INIT_MASTERS; |
|---|
| 265 | masters = (master_gap_compress **)calloc(sizeof(void *),memmasters); |
|---|
| 266 | } |
|---|
| 267 | |
|---|
| 268 | gap_compress::~gap_compress(void) { |
|---|
| 269 | int i; |
|---|
| 270 | for (i=0;i<nmasters;i++) delete masters[i]; |
|---|
| 271 | delete masters; |
|---|
| 272 | for (i=0;i<nclients;i++) delete clients[i]; |
|---|
| 273 | delete clients; |
|---|
| 274 | } |
|---|
| 275 | |
|---|
| 276 | void client_gap_compress::basic_write_sequence(char *sequence, long seq_len, int gap1,int gap2){ |
|---|
| 277 | long rel = 0; |
|---|
| 278 | long abs = 0; |
|---|
| 279 | rel = 0; |
|---|
| 280 | for (abs = 0; abs < seq_len; abs++) { |
|---|
| 281 | if ( sequence[abs] == gap1 ) continue; |
|---|
| 282 | if ( sequence[abs] == gap2 ) continue; |
|---|
| 283 | this->write(rel,abs); |
|---|
| 284 | rel++; |
|---|
| 285 | } |
|---|
| 286 | this->abs_seq_len = seq_len; |
|---|
| 287 | this->rel_seq_len = rel; |
|---|
| 288 | } |
|---|
| 289 | // create a new master and client sequence |
|---|
| 290 | void client_gap_compress::basic_write_master_sequence(char *sequence, long seq_len, int gap1,int gap2){ |
|---|
| 291 | long rel = 0; |
|---|
| 292 | long abs = 0; |
|---|
| 293 | |
|---|
| 294 | for (abs = 0; abs < seq_len; abs++) { // write sequence |
|---|
| 295 | if ( sequence[abs] == gap1 ) continue; |
|---|
| 296 | if ( sequence[abs] == gap2 ) continue; |
|---|
| 297 | master->write(rel,abs); |
|---|
| 298 | rel++; |
|---|
| 299 | } |
|---|
| 300 | this->basic_write(0,0,0); |
|---|
| 301 | |
|---|
| 302 | this->abs_seq_len = seq_len; |
|---|
| 303 | this->rel_seq_len = rel; |
|---|
| 304 | } |
|---|
| 305 | |
|---|
| 306 | master_gap_compress *gap_compress::search_best_master(client_gap_compress *client, |
|---|
| 307 | master_gap_compress *exclude,long max_diff, |
|---|
| 308 | char *sequence, long seq_len, int gap1,int gap2, int &best_alt) { |
|---|
| 309 | |
|---|
| 310 | long best = max_diff; |
|---|
| 311 | best_alt = -1; |
|---|
| 312 | master_gap_compress *best_master = 0; |
|---|
| 313 | int best_try; |
|---|
| 314 | master_gap_compress *master; |
|---|
| 315 | int _try = 0; |
|---|
| 316 | |
|---|
| 317 | if (!nmasters) best_alt = (int)max_diff; |
|---|
| 318 | |
|---|
| 319 | for (_try = 0;_try<nmasters;_try++) { |
|---|
| 320 | master = masters[_try]; |
|---|
| 321 | if (master == exclude) continue; |
|---|
| 322 | client->clean(master); |
|---|
| 323 | client->basic_write_sequence(sequence,seq_len,gap1,gap2); |
|---|
| 324 | client->optimize(0); |
|---|
| 325 | if (best_alt < 0 || client->len < best_alt) best_alt = client->len; |
|---|
| 326 | if (best <0 || client->len < best) { |
|---|
| 327 | best = client->len; |
|---|
| 328 | best_master = master; |
|---|
| 329 | best_try = _try; |
|---|
| 330 | if (best <=3 ) break; // very good choice, stop search |
|---|
| 331 | } |
|---|
| 332 | } |
|---|
| 333 | if (!best_master) return 0; |
|---|
| 334 | |
|---|
| 335 | int i; |
|---|
| 336 | for (i=best_try; i>0; i--) masters[i] = masters[i-1]; |
|---|
| 337 | masters[0] = best_master; // bring found client to top of list |
|---|
| 338 | |
|---|
| 339 | client->clean(best_master); |
|---|
| 340 | client->basic_write_sequence(sequence,seq_len,gap1,gap2); |
|---|
| 341 | client->optimize(1); |
|---|
| 342 | return best_master; |
|---|
| 343 | } |
|---|
| 344 | |
|---|
| 345 | long sort_masters(void *m1, void *m2, char *cd ){ |
|---|
| 346 | cd = cd; |
|---|
| 347 | master_gap_compress *master1 = (master_gap_compress *)m1; |
|---|
| 348 | master_gap_compress *master2 = (master_gap_compress *)m2; |
|---|
| 349 | if( master1->ref_cnt * master1->alt_master > |
|---|
| 350 | master2->ref_cnt * master2->alt_master ) |
|---|
| 351 | return -1; |
|---|
| 352 | return 0; |
|---|
| 353 | } |
|---|
| 354 | |
|---|
| 355 | gap_index gap_compress::write_sequence(char *sequence, long seq_len, int gap1,int gap2){ |
|---|
| 356 | client_gap_compress *client = 0; |
|---|
| 357 | client = new client_gap_compress(0); |
|---|
| 358 | master_gap_compress *master; |
|---|
| 359 | int i; |
|---|
| 360 | long max_diff = REL_DIFF_CLIENT_MASTER(seq_len); |
|---|
| 361 | int alt; // alternate master !!!! |
|---|
| 362 | |
|---|
| 363 | |
|---|
| 364 | master = search_best_master(client,0,max_diff, sequence, seq_len, gap1,gap2, alt); |
|---|
| 365 | |
|---|
| 366 | if (!master){ // create a new master !! |
|---|
| 367 | master = new master_gap_compress(); |
|---|
| 368 | master->alt_master = alt; |
|---|
| 369 | client->clean(master); |
|---|
| 370 | master->main_child = client; |
|---|
| 371 | client->basic_write_sequence(sequence,seq_len,gap1,gap2); |
|---|
| 372 | client->optimize(1); |
|---|
| 373 | |
|---|
| 374 | if (nmasters >= memmasters) { |
|---|
| 375 | memmasters = memmasters * MEM_ALLOC_FACTOR; |
|---|
| 376 | masters = (master_gap_compress **)realloc( |
|---|
| 377 | (char *)masters,sizeof(void *)*memmasters); |
|---|
| 378 | } |
|---|
| 379 | for (i=nmasters; i>0; i--) masters[i] = masters[i-1]; |
|---|
| 380 | nmasters++; |
|---|
| 381 | masters[0] = master; |
|---|
| 382 | } |
|---|
| 383 | master->ref_cnt++; |
|---|
| 384 | |
|---|
| 385 | if (nmasters >= MAX_MASTERS) { // maximum masters exceeded delete |
|---|
| 386 | int j; // search single refed masters and search new master |
|---|
| 387 | // for the only single child |
|---|
| 388 | for (j=0;j<MAX_MASTERS_REMOVE_N;j++){ |
|---|
| 389 | GB_mergesort((void **)masters,0,nmasters, sort_masters,0); |
|---|
| 390 | i = nmasters-1; |
|---|
| 391 | int cli; |
|---|
| 392 | for (cli = 0; cli < nclients; cli++) { |
|---|
| 393 | client_gap_compress *cl2 = clients[cli]; |
|---|
| 394 | if ( cl2->master != masters[i] ) continue; |
|---|
| 395 | char *seq2 = this->read_sequence(cli); |
|---|
| 396 | long seq_len2 = cl2->abs_seq_len; |
|---|
| 397 | master = search_best_master(clients[cli], |
|---|
| 398 | masters[i],-1, |
|---|
| 399 | seq2, seq_len2, '-','-',alt); |
|---|
| 400 | delete seq2; |
|---|
| 401 | master->ref_cnt++; |
|---|
| 402 | } |
|---|
| 403 | nmasters--; |
|---|
| 404 | delete masters[i]; |
|---|
| 405 | } |
|---|
| 406 | } |
|---|
| 407 | |
|---|
| 408 | if (nclients >= memclients) { |
|---|
| 409 | memclients = memclients * MEM_ALLOC_FACTOR; |
|---|
| 410 | clients = (client_gap_compress **)realloc( |
|---|
| 411 | (char *)clients,sizeof(void *)*memclients); |
|---|
| 412 | } |
|---|
| 413 | clients[nclients++] = client; |
|---|
| 414 | return nclients-1; |
|---|
| 415 | } |
|---|
| 416 | |
|---|
| 417 | void gap_compress::optimize(void) { |
|---|
| 418 | int i; |
|---|
| 419 | for (i=0;i<nmasters;i++) masters[i]->optimize(); |
|---|
| 420 | masters = (master_gap_compress **)realloc( |
|---|
| 421 | (char *)masters,sizeof(master_gap_compress) * nmasters); |
|---|
| 422 | memmasters = nmasters; |
|---|
| 423 | clients = (client_gap_compress **)realloc( |
|---|
| 424 | (char *)clients,sizeof(client_gap_compress) * nclients); |
|---|
| 425 | memclients = nclients; |
|---|
| 426 | } |
|---|
| 427 | |
|---|
| 428 | long gap_compress::rel_2_abs(gap_index index, long rel){ |
|---|
| 429 | if (index >= nclients) GB_CORE; |
|---|
| 430 | return clients[index]->rel_2_abs(rel); |
|---|
| 431 | } |
|---|
| 432 | |
|---|
| 433 | char *gap_compress::read_sequence(gap_index index){ |
|---|
| 434 | if (index >= nclients) GB_CORE; |
|---|
| 435 | client_gap_compress *client = clients[index]; |
|---|
| 436 | int i; |
|---|
| 437 | long abs; |
|---|
| 438 | char *res = (char *)malloc((int)client->abs_seq_len+1); |
|---|
| 439 | memset(res,'-',(int)client->abs_seq_len); |
|---|
| 440 | res[client->abs_seq_len] = 0; |
|---|
| 441 | for (i=0;i<client->rel_seq_len; i++) { |
|---|
| 442 | abs = client->rel_2_abs(i); |
|---|
| 443 | res[abs] = '+'; |
|---|
| 444 | } |
|---|
| 445 | return res; |
|---|
| 446 | } |
|---|
| 447 | |
|---|
| 448 | void gap_compress::statistic(int full){ |
|---|
| 449 | |
|---|
| 450 | int i; |
|---|
| 451 | printf("nmasters %i\n",nmasters); |
|---|
| 452 | printf("nclients %i\n",nclients); |
|---|
| 453 | if (full) { |
|---|
| 454 | long sum = 0; |
|---|
| 455 | for (i=0;i<nmasters;i++) { |
|---|
| 456 | printf(" %i: ref %i alt %i\n", |
|---|
| 457 | i,masters[i]->ref_cnt,masters[i]->alt_master); |
|---|
| 458 | } |
|---|
| 459 | for (i=0;i<nclients;i++) { |
|---|
| 460 | printf("%i ",clients[i]->len); |
|---|
| 461 | sum += clients[i]->len; |
|---|
| 462 | } |
|---|
| 463 | printf("\nsum len clients %i avg %f\n",sum,(double)sum/nclients); |
|---|
| 464 | #if 0 |
|---|
| 465 | client_gap_compress *client = clients[nclients-1]; |
|---|
| 466 | for (i=0 ; i < client->len; i++){ |
|---|
| 467 | printf(" %i rel %i x %i dx %i y %i\n", |
|---|
| 468 | i,client->l[i].rel,client->l[i].x,client->l[i].dx,client->l[i].y); |
|---|
| 469 | } |
|---|
| 470 | #endif |
|---|
| 471 | } |
|---|
| 472 | } |
|---|
| 473 | |
|---|
| 474 | int master_gap_compress::save(FILE *out, int index_write, int &pos){ |
|---|
| 475 | if (index_write) { |
|---|
| 476 | putw(len,out); |
|---|
| 477 | if (rel_2_abs) { |
|---|
| 478 | pt_align(0,pos,sizeof(long)); |
|---|
| 479 | putc(1,out); |
|---|
| 480 | }else{ |
|---|
| 481 | pt_align(0,pos,sizeof(short)); |
|---|
| 482 | putc(0,out); |
|---|
| 483 | } |
|---|
| 484 | putw(pos,out); |
|---|
| 485 | }else{ |
|---|
| 486 | if (rel_2_abs) { |
|---|
| 487 | pt_align(out,pos,sizeof(long)); |
|---|
| 488 | if (len != fwrite((char *)rel_2_abs,sizeof(long), len, out)){ |
|---|
| 489 | return -1; |
|---|
| 490 | } |
|---|
| 491 | }else{ |
|---|
| 492 | pt_align(out,pos,sizeof(short)); |
|---|
| 493 | if (len != fwrite((char *)rel_2_abss,sizeof(short), len, out)){ |
|---|
| 494 | return -1; |
|---|
| 495 | } |
|---|
| 496 | } |
|---|
| 497 | } |
|---|
| 498 | if (rel_2_abs) { |
|---|
| 499 | pos += len * sizeof(long); |
|---|
| 500 | }else{ |
|---|
| 501 | pos += len * sizeof(short); |
|---|
| 502 | } |
|---|
| 503 | return 0; |
|---|
| 504 | } |
|---|
| 505 | |
|---|
| 506 | int master_gap_compress::load(FILE *in, char *baseaddr){ |
|---|
| 507 | len = getw(in); |
|---|
| 508 | if (getc(in) != 0) { |
|---|
| 509 | rel_2_abs = (long *)(getw(in) + baseaddr); |
|---|
| 510 | }else{ |
|---|
| 511 | rel_2_abss = (short *)(getw(in) + baseaddr); |
|---|
| 512 | } |
|---|
| 513 | return 0; |
|---|
| 514 | } |
|---|
| 515 | |
|---|
| 516 | int client_gap_compress::save(FILE *out, int index_write, int &pos){ |
|---|
| 517 | if (index_write) { |
|---|
| 518 | putw(master->index,out); |
|---|
| 519 | putw(len,out); |
|---|
| 520 | pt_align(0,pos,sizeof(long)); |
|---|
| 521 | putw(pos,out); |
|---|
| 522 | putw((int)abs_seq_len,out); |
|---|
| 523 | putw((int)rel_seq_len,out); |
|---|
| 524 | }else{ |
|---|
| 525 | pt_align(out,pos,sizeof(long)); |
|---|
| 526 | if (len != fwrite((char *)l,sizeof(client_gap_compress_data), len, out)){ |
|---|
| 527 | return -1; |
|---|
| 528 | } |
|---|
| 529 | } |
|---|
| 530 | pos += sizeof(client_gap_compress_data) * len; |
|---|
| 531 | return 0; |
|---|
| 532 | } |
|---|
| 533 | |
|---|
| 534 | int client_gap_compress::load(FILE *in, char *baseaddr,master_gap_compress **masters){ |
|---|
| 535 | int master_index = getw(in); |
|---|
| 536 | master = masters[master_index]; |
|---|
| 537 | len = getw(in); |
|---|
| 538 | l = (client_gap_compress_data *)(getw(in) + baseaddr); |
|---|
| 539 | abs_seq_len = getw(in); |
|---|
| 540 | rel_seq_len = getw(in); |
|---|
| 541 | return 0; |
|---|
| 542 | } |
|---|
| 543 | |
|---|
| 544 | int gap_compress::save(FILE *out, int index_write, int &pos){ |
|---|
| 545 | if (index_write) { |
|---|
| 546 | putw(nmasters,out); |
|---|
| 547 | putw(nclients,out); |
|---|
| 548 | } |
|---|
| 549 | long i; |
|---|
| 550 | for (i=0;i<nmasters;i++){ |
|---|
| 551 | if(masters[i]->save(out,index_write,pos)){ |
|---|
| 552 | return -1; |
|---|
| 553 | } |
|---|
| 554 | masters[i]->index = (int)i; |
|---|
| 555 | } |
|---|
| 556 | for (i=0;i<nclients;i++){ |
|---|
| 557 | if (clients[i]->save(out,index_write,pos)){ |
|---|
| 558 | return -1; |
|---|
| 559 | } |
|---|
| 560 | } |
|---|
| 561 | return 0; |
|---|
| 562 | } |
|---|
| 563 | |
|---|
| 564 | int gap_compress::load(FILE *in, char *baseaddr){ |
|---|
| 565 | delete masters; |
|---|
| 566 | delete clients; |
|---|
| 567 | int i; |
|---|
| 568 | memmasters = nmasters = getw(in); |
|---|
| 569 | memclients = nclients = getw(in); |
|---|
| 570 | clients = (client_gap_compress **)calloc(sizeof(void *),memclients); |
|---|
| 571 | masters = (master_gap_compress **)calloc(sizeof(void *),memmasters); |
|---|
| 572 | |
|---|
| 573 | for (i=0;i<nmasters;i++){ |
|---|
| 574 | masters[i] = (master_gap_compress *)calloc(sizeof(master_gap_compress),1); |
|---|
| 575 | masters[i]->load(in,baseaddr); |
|---|
| 576 | } |
|---|
| 577 | for (i=0;i<nclients;i++){ |
|---|
| 578 | clients[i] = (client_gap_compress *)calloc(sizeof(client_gap_compress),1); |
|---|
| 579 | clients[i]->load(in,baseaddr,masters); |
|---|
| 580 | } |
|---|
| 581 | return 0; |
|---|
| 582 | } |
|---|
| 583 | |
|---|
| 584 | // pt_species_class::set destroys the sequence !!!!!! |
|---|
| 585 | // name and fullname must be a new copy |
|---|
| 586 | void pt_species_class::set(char *sequence, long seq_len, char *name, char *fullname){ |
|---|
| 587 | delete this->fullname; |
|---|
| 588 | this->fullname = fullname; |
|---|
| 589 | delete this->name; |
|---|
| 590 | this->name = name; |
|---|
| 591 | |
|---|
| 592 | delete this->sequence; |
|---|
| 593 | delete this->abs_sequence; |
|---|
| 594 | |
|---|
| 595 | this->abs_sequence = sequence; |
|---|
| 596 | this->sequence = (char *)calloc(sizeof(char), (int)seq_len+1); |
|---|
| 597 | |
|---|
| 598 | |
|---|
| 599 | char c; |
|---|
| 600 | char *src, |
|---|
| 601 | *dest, |
|---|
| 602 | *abs; |
|---|
| 603 | src = sequence; |
|---|
| 604 | dest = this->sequence; |
|---|
| 605 | abs = this->abs_sequence; |
|---|
| 606 | char *first_n = dest; // maximum number of consequtive n's |
|---|
| 607 | char *first_abs_n = abs; |
|---|
| 608 | |
|---|
| 609 | #define SET_NN first_n = dest; first_abs_n = abs |
|---|
| 610 | |
|---|
| 611 | while(c=*(src++)){ |
|---|
| 612 | switch (c) { |
|---|
| 613 | case 'A': |
|---|
| 614 | case 'a': *(dest++) = PT_A;*(abs++) = '+';SET_NN; break; |
|---|
| 615 | case 'C': |
|---|
| 616 | case 'c': *(dest++) = PT_C;*(abs++) = '+';SET_NN; break; |
|---|
| 617 | case 'G': |
|---|
| 618 | case 'g': *(dest++) = PT_G;*(abs++) = '+';SET_NN; break; |
|---|
| 619 | case 'U': |
|---|
| 620 | case 'u': |
|---|
| 621 | case 'T': |
|---|
| 622 | case 't': *(dest++) = PT_T;*(abs++) = '+';SET_NN; break; |
|---|
| 623 | case '.': *(dest)++ = PT_QU;*(abs++) = '+'; |
|---|
| 624 | while (*src =='.') { src++; *(abs++) = '-'; } |
|---|
| 625 | SET_NN; |
|---|
| 626 | break; |
|---|
| 627 | case '-': *(abs++) = '-'; break; |
|---|
| 628 | default: |
|---|
| 629 | if (dest-first_n < PT_MAX_NN) { |
|---|
| 630 | *(dest++) = PT_N;*(abs++) = '+';break; |
|---|
| 631 | }else{ |
|---|
| 632 | while (first_abs_n < abs) *(first_abs_n++) = '-'; |
|---|
| 633 | dest = first_n; |
|---|
| 634 | *(dest++) = PT_QU; |
|---|
| 635 | *(abs++) = '+'; |
|---|
| 636 | } |
|---|
| 637 | break; |
|---|
| 638 | } |
|---|
| 639 | |
|---|
| 640 | } |
|---|
| 641 | *dest = PT_QU; |
|---|
| 642 | *abs = 0; |
|---|
| 643 | if ( abs - this->abs_sequence != seq_len) GB_CORE; |
|---|
| 644 | this->abs_seq_len = seq_len; |
|---|
| 645 | this->seq_len = dest-this->sequence; |
|---|
| 646 | } |
|---|
| 647 | pt_species_class::pt_species_class(void){ |
|---|
| 648 | memset((char *)this,0,sizeof(pt_species_class)); |
|---|
| 649 | } |
|---|
| 650 | pt_species_class::~pt_species_class(void){ |
|---|
| 651 | delete sequence; |
|---|
| 652 | delete abs_sequence; |
|---|
| 653 | delete name; |
|---|
| 654 | delete fullname; |
|---|
| 655 | } |
|---|
| 656 | |
|---|
| 657 | void pt_species_class::calc_gap_index(gap_compress *gc){ |
|---|
| 658 | index = gc->write_sequence(abs_sequence, abs_seq_len, '-', '-'); |
|---|
| 659 | } |
|---|
| 660 | |
|---|
| 661 | int pt_species_class::save(FILE *out, int index_write, int &pos){ |
|---|
| 662 | if (index_write) { |
|---|
| 663 | putw(pos,out); |
|---|
| 664 | putw((int)abs_seq_len,out); |
|---|
| 665 | putw((int)seq_len,out); |
|---|
| 666 | if (pt_putstring(name,out) ){ |
|---|
| 667 | return -1; |
|---|
| 668 | } |
|---|
| 669 | if (pt_putstring(fullname,out) ){ |
|---|
| 670 | return -1; |
|---|
| 671 | } |
|---|
| 672 | putw((int)index,out); |
|---|
| 673 | }else{ |
|---|
| 674 | if (seq_len+1 != fwrite(sequence,sizeof(char), (int)seq_len+1, out)) |
|---|
| 675 | return -1; |
|---|
| 676 | } |
|---|
| 677 | pos += (int)seq_len+1; |
|---|
| 678 | return 0; |
|---|
| 679 | } |
|---|
| 680 | |
|---|
| 681 | |
|---|
| 682 | int pt_species_class::load(FILE *in, char *baseaddr){ |
|---|
| 683 | sequence = getw(in) + baseaddr; |
|---|
| 684 | abs_seq_len = getw(in); |
|---|
| 685 | seq_len = getw(in); |
|---|
| 686 | name = pt_getstring(in); |
|---|
| 687 | fullname = pt_getstring(in); |
|---|
| 688 | index = getw(in); |
|---|
| 689 | return 0; |
|---|
| 690 | } |
|---|
| 691 | |
|---|
| 692 | long pt_species_class::rel_2_abs(long rel){ |
|---|
| 693 | return cgc->rel_2_abs(rel); |
|---|
| 694 | } |
|---|
| 695 | |
|---|
| 696 | void pt_species_class::test_print(void){ |
|---|
| 697 | char *buffer = (char *)calloc(sizeof(char), (int)abs_seq_len + 1); |
|---|
| 698 | memset(buffer,'-',(int)abs_seq_len); |
|---|
| 699 | int i; |
|---|
| 700 | long abs; |
|---|
| 701 | for (i=0; i <seq_len; i++) { |
|---|
| 702 | abs = rel_2_abs(i); |
|---|
| 703 | buffer[abs] = PT_BASE_2_ASCII[this->get(i)]; |
|---|
| 704 | } |
|---|
| 705 | for (i=0;i< abs_seq_len; i+= 80) { |
|---|
| 706 | printf("%i ",i); |
|---|
| 707 | fwrite(buffer+i,1,80,stdout); |
|---|
| 708 | printf("\n"); |
|---|
| 709 | } |
|---|
| 710 | delete buffer; |
|---|
| 711 | } |
|---|
| 712 | |
|---|
| 713 | |
|---|
| 714 | pt_main_class::pt_main_class(void) { |
|---|
| 715 | memset((char *)this,0,sizeof(pt_main_class)); |
|---|
| 716 | } |
|---|
| 717 | |
|---|
| 718 | pt_main_class::~pt_main_class(void) { |
|---|
| 719 | GB_CORE; |
|---|
| 720 | } |
|---|
| 721 | |
|---|
| 722 | void pt_main_class::calc_name_hash(void){ |
|---|
| 723 | long i; |
|---|
| 724 | namehash = GBS_create_hash(PT_NAME_HASH_SIZE); |
|---|
| 725 | for (i=0;i<data_count;i++){ |
|---|
| 726 | GBS_write_hash(namehash, species[i].name,i+1); |
|---|
| 727 | } |
|---|
| 728 | |
|---|
| 729 | } |
|---|
| 730 | |
|---|
| 731 | void pt_main_class::calc_max_size_char_count(void){ |
|---|
| 732 | max_size = 0; |
|---|
| 733 | long i; |
|---|
| 734 | for (i = 0; i < data_count; i++){ /* get max sequence len */ |
|---|
| 735 | max_size = (int)MAX(max_size, species[i].seq_len); |
|---|
| 736 | char_count += species[i].seq_len; |
|---|
| 737 | } |
|---|
| 738 | } |
|---|
| 739 | |
|---|
| 740 | void pt_main_class::calc_bi_ecoli(void){ |
|---|
| 741 | if ( ecoli && strlen(ecoli) ){ |
|---|
| 742 | BI_ecoli_ref *ref = new BI_ecoli_ref; |
|---|
| 743 | char *error = ref->init(ecoli,strlen(ecoli)); |
|---|
| 744 | if (error) { |
|---|
| 745 | delete ecoli; |
|---|
| 746 | ecoli = 0; |
|---|
| 747 | }else{ |
|---|
| 748 | bi_ecoli = (void *)ref; |
|---|
| 749 | } |
|---|
| 750 | } |
|---|
| 751 | } |
|---|
| 752 | void pt_main_class::init(GBDATA *gb_main){ |
|---|
| 753 | GB_transaction dummy(gb_main); |
|---|
| 754 | GBDATA *gb_extended_data = GB_search(gb_main, "extended_data", GB_CREATE_CONTAINER); |
|---|
| 755 | GBDATA *gb_species_data = GB_search(gb_main, "species_data", GB_CREATE_CONTAINER); |
|---|
| 756 | GBDATA *gb_extended; |
|---|
| 757 | GBDATA *gb_species; |
|---|
| 758 | GBDATA *gb_data; |
|---|
| 759 | GBDATA *gb_name; |
|---|
| 760 | alignment_name = GBT_get_default_alignment(gb_main); |
|---|
| 761 | |
|---|
| 762 | gb_extended = GBT_find_extended(gb_extended_data, GBT_get_default_ref(gb_main)); |
|---|
| 763 | delete ecoli; |
|---|
| 764 | ecoli = 0; |
|---|
| 765 | if (gb_extended) { |
|---|
| 766 | gb_data = GBT_read_sequence(gb_extended,alignment_name); |
|---|
| 767 | if (gb_data) { |
|---|
| 768 | ecoli = GB_read_string(gb_data); |
|---|
| 769 | } |
|---|
| 770 | } |
|---|
| 771 | data_count = 0; |
|---|
| 772 | |
|---|
| 773 | for ( gb_species = GBT_first_species_rel_species_data(gb_species_data); |
|---|
| 774 | gb_species; |
|---|
| 775 | gb_species = GBT_next_species(gb_species) ){ |
|---|
| 776 | gb_data = GBT_read_sequence(gb_extended,alignment_name); |
|---|
| 777 | if (!gb_data) continue; |
|---|
| 778 | gb_name = GB_search(gb_species,"name",GB_FIND); |
|---|
| 779 | if (!gb_name) continue; |
|---|
| 780 | data_count ++; |
|---|
| 781 | } |
|---|
| 782 | } |
|---|
| 783 | // pt_main_class::save does more than save: all the conversation is done |
|---|
| 784 | // by this procedure |
|---|
| 785 | |
|---|
| 786 | int pt_main_class::save(GBDATA *gb_main, FILE *out, int index_write, int &pos){ |
|---|
| 787 | |
|---|
| 788 | GB_transaction dummy(gb_main); |
|---|
| 789 | GBDATA *gb_species_data = GB_search(gb_main, "species_data", GB_CREATE_CONTAINER); |
|---|
| 790 | GBDATA *gb_species; |
|---|
| 791 | GBDATA *gb_name; |
|---|
| 792 | GBDATA *gb_fullname; |
|---|
| 793 | GBDATA *gb_data; |
|---|
| 794 | char *name; |
|---|
| 795 | char *fullname; |
|---|
| 796 | pt_species_class *species = new pt_species_class; |
|---|
| 797 | if (index_write){ |
|---|
| 798 | this->init(gb_main); // calculate the number of species |
|---|
| 799 | putw(data_count,out); |
|---|
| 800 | pt_putstring(ecoli,out); |
|---|
| 801 | gc = new gap_compress; |
|---|
| 802 | } |
|---|
| 803 | int count = 0; |
|---|
| 804 | for ( gb_species = GBT_first_species_rel_species_data(gb_species_data); |
|---|
| 805 | gb_species; |
|---|
| 806 | gb_species = GBT_next_species(gb_species) ){ |
|---|
| 807 | gb_data = GBT_read_sequence(gb_species,alignment_name); |
|---|
| 808 | if (!gb_data) continue; |
|---|
| 809 | gb_name = GB_search(gb_species,"name",GB_FIND); |
|---|
| 810 | if (!gb_name) continue; |
|---|
| 811 | name = GB_read_string(gb_name); |
|---|
| 812 | gb_fullname = GB_search(gb_species,"full_name",GB_FIND); |
|---|
| 813 | if (gb_fullname) fullname = GB_read_string(gb_fullname); |
|---|
| 814 | else fullname = strdup(""); |
|---|
| 815 | char *seq = GB_read_string(gb_data);; |
|---|
| 816 | long seq_len = GB_read_string_count(gb_data); |
|---|
| 817 | species->set(seq,seq_len, name,fullname); |
|---|
| 818 | if (index_write) { |
|---|
| 819 | species->calc_gap_index(gc); |
|---|
| 820 | if (count != species->index) GB_CORE; |
|---|
| 821 | } |
|---|
| 822 | if (species->save(out,index_write, pos)){ |
|---|
| 823 | return -1; |
|---|
| 824 | } |
|---|
| 825 | |
|---|
| 826 | count ++; |
|---|
| 827 | if (count % 100 == 0) { |
|---|
| 828 | printf("%i ...\n",count); |
|---|
| 829 | } |
|---|
| 830 | } |
|---|
| 831 | delete species; |
|---|
| 832 | if (index_write) { |
|---|
| 833 | gc->optimize(); |
|---|
| 834 | gc->statistic(); |
|---|
| 835 | } |
|---|
| 836 | if (gc->save(out,index_write,pos)){ |
|---|
| 837 | return -1; |
|---|
| 838 | } |
|---|
| 839 | return 0; |
|---|
| 840 | } |
|---|
| 841 | |
|---|
| 842 | int pt_main_class::load(FILE *in, char *baseaddr){ |
|---|
| 843 | data_count = getw(in); |
|---|
| 844 | ecoli = pt_getstring(in); |
|---|
| 845 | species = (pt_species_class *)calloc(sizeof(pt_species_class),data_count); |
|---|
| 846 | int i; |
|---|
| 847 | for (i=0;i<data_count;i++) { |
|---|
| 848 | species[i].load(in,baseaddr); |
|---|
| 849 | } |
|---|
| 850 | |
|---|
| 851 | gc = new gap_compress; |
|---|
| 852 | if (gc->load(in,baseaddr)){ |
|---|
| 853 | return -1; |
|---|
| 854 | } |
|---|
| 855 | |
|---|
| 856 | for (i=0;i<data_count;i++) { |
|---|
| 857 | species[i].cgc = gc->clients[i]; |
|---|
| 858 | } |
|---|
| 859 | |
|---|
| 860 | calc_name_hash(); |
|---|
| 861 | calc_max_size_char_count(); |
|---|
| 862 | calc_bi_ecoli(); |
|---|
| 863 | |
|---|
| 864 | return 0; |
|---|
| 865 | } |
|---|
| 866 | |
|---|
| 867 | long pt_main_class::abs_2_ecoli(long pos) |
|---|
| 868 | { |
|---|
| 869 | if (!bi_ecoli) return pos; |
|---|
| 870 | long res,dummy; |
|---|
| 871 | ((BI_ecoli_ref *)bi_ecoli)->abs_2_rel(pos,res,dummy); |
|---|
| 872 | return res; |
|---|
| 873 | } |
|---|
| 874 | |
|---|
| 875 | int pt_main_class::main_convert(GBDATA *gb_main, char *basename){ |
|---|
| 876 | FILE *index_file; |
|---|
| 877 | FILE *sequence_file; |
|---|
| 878 | char *index_name = GBS_string_eval(basename,"*=*1.ind",0); |
|---|
| 879 | char *sequence_name = GBS_string_eval(basename,"*=*1.seq",0); |
|---|
| 880 | |
|---|
| 881 | index_file = fopen(index_name,"w"); |
|---|
| 882 | if (!index_file) { |
|---|
| 883 | GB_ERROR("Error Cannot create and write to file %s\n",index_name); |
|---|
| 884 | return -1; |
|---|
| 885 | } |
|---|
| 886 | sequence_file = fopen(sequence_name,"w"); |
|---|
| 887 | if (!sequence_file) { |
|---|
| 888 | GB_ERROR("Error Cannot create and write to file %s\n",sequence_name); |
|---|
| 889 | return -1; |
|---|
| 890 | } |
|---|
| 891 | GB_transaction dummy(gb_main); |
|---|
| 892 | |
|---|
| 893 | int pos = 0; |
|---|
| 894 | if (this->save(gb_main, index_file,1,pos)) { |
|---|
| 895 | GB_ERROR("Index Write Error\n"); return(-1); |
|---|
| 896 | }; |
|---|
| 897 | fclose(index_file); |
|---|
| 898 | pos = 0; |
|---|
| 899 | if (this->save(gb_main, sequence_file,0,pos)) { |
|---|
| 900 | GB_ERROR("Index Write Error\n"); return (-1); |
|---|
| 901 | }; |
|---|
| 902 | fclose(sequence_file); |
|---|
| 903 | printf("Sequence File length: %i\n",pos); |
|---|
| 904 | return 0; |
|---|
| 905 | } |
|---|
| 906 | |
|---|
| 907 | int pt_main_class::import_all(char *basename){ |
|---|
| 908 | FILE *index_file; |
|---|
| 909 | char *index_name = GBS_string_eval(basename,"*=*1.ind",0); |
|---|
| 910 | char *sequence_name = GBS_string_eval(basename,"*=*1.seq",0); |
|---|
| 911 | index_file = fopen(index_name,"r"); |
|---|
| 912 | if (!index_file) { |
|---|
| 913 | fprintf(stderr,"Error Cannot create and write to file %s\n",index_name); |
|---|
| 914 | return -1; |
|---|
| 915 | } |
|---|
| 916 | char *baseaddr = GB_map_file(sequence_name); |
|---|
| 917 | if (!baseaddr) { |
|---|
| 918 | GB_print_error(); |
|---|
| 919 | return -1; |
|---|
| 920 | } |
|---|
| 921 | if (load(index_file, baseaddr) ) { |
|---|
| 922 | return -1; |
|---|
| 923 | } |
|---|
| 924 | return 0; |
|---|
| 925 | } |
|---|
| 926 | |
|---|
| 927 | int |
|---|
| 928 | main(int argc, char **argv) |
|---|
| 929 | { |
|---|
| 930 | char *path; |
|---|
| 931 | GBDATA *gb_main; |
|---|
| 932 | pt_main_class *mc = new pt_main_class; |
|---|
| 933 | char *filename = "out.arb"; |
|---|
| 934 | if (argc < 2){ |
|---|
| 935 | if (mc->import_all(filename)) { |
|---|
| 936 | fprintf(stderr,"Load Error\n"); |
|---|
| 937 | return -1; |
|---|
| 938 | } |
|---|
| 939 | int i; |
|---|
| 940 | for (i=0; i < mc->data_count ; i++ ) { |
|---|
| 941 | mc->species[i].test_print(); |
|---|
| 942 | } |
|---|
| 943 | return 0; |
|---|
| 944 | }else{ |
|---|
| 945 | path = argv[1]; |
|---|
| 946 | } |
|---|
| 947 | |
|---|
| 948 | if (!(gb_main = GB_open(path, "r"))) |
|---|
| 949 | return (-1); |
|---|
| 950 | if (mc->main_convert(gb_main,filename)) { |
|---|
| 951 | fprintf(stderr,"Error\n"); |
|---|
| 952 | return -1; |
|---|
| 953 | } |
|---|
| 954 | GB_close(gb_main); |
|---|
| 955 | return 0; |
|---|
| 956 | } |
|---|
| 957 | |
|---|
| 958 | #if 0 |
|---|
| 959 | char *cmp; |
|---|
| 960 | int i; |
|---|
| 961 | cmp = gc.read_sequence(index); |
|---|
| 962 | for (i=0;i<seq_len;i++) { |
|---|
| 963 | if ( cmp[i] == '-' && ( |
|---|
| 964 | seq[i] == '-' || seq[i] == '.')) continue; |
|---|
| 965 | if ( cmp[i] != '-' && ( |
|---|
| 966 | seq[i] != '-' && seq[i] != '.')) continue; |
|---|
| 967 | break; |
|---|
| 968 | } |
|---|
| 969 | if (i<seq_len) { |
|---|
| 970 | printf("sequences differ %i\n%s\n%s\n",count, seq,cmp); |
|---|
| 971 | } |
|---|
| 972 | |
|---|
| 973 | delete cmp; |
|---|
| 974 | #endif |
|---|