1 | #include <stdio.h> |
---|
2 | #include <stdlib.h> |
---|
3 | // #include <malloc.h> |
---|
4 | #include <string.h> |
---|
5 | |
---|
6 | #include <arbdb.h> |
---|
7 | #include <arbdbt.h> |
---|
8 | #include <aw_root.hxx> |
---|
9 | #include <aw_device.hxx> |
---|
10 | #include <aw_window.hxx> |
---|
11 | #include <awt.hxx> |
---|
12 | |
---|
13 | #include "gde.hxx" |
---|
14 | #include "GDE_def.h" |
---|
15 | #include "GDE_menu.h" |
---|
16 | #include "GDE_extglob.h" |
---|
17 | |
---|
18 | int MAX(int a,int b) |
---|
19 | { |
---|
20 | if(a>b) return a; |
---|
21 | return b; |
---|
22 | } |
---|
23 | |
---|
24 | int MIN(int a,int b) |
---|
25 | { |
---|
26 | if(a<b) return a; |
---|
27 | return b; |
---|
28 | } |
---|
29 | |
---|
30 | void Regroup(NA_Alignment *alignment) |
---|
31 | { |
---|
32 | size_t j; |
---|
33 | size_t group; |
---|
34 | int last; |
---|
35 | |
---|
36 | for(j=0;j<alignment->numelements;j++) |
---|
37 | { |
---|
38 | alignment->element[j].groupf = NULL; |
---|
39 | alignment->element[j].groupb = NULL; |
---|
40 | } |
---|
41 | |
---|
42 | for (group = 1;group <= alignment->numgroups;group++) |
---|
43 | { |
---|
44 | last = -1; |
---|
45 | for(j=0;j<alignment->numelements;j++) |
---|
46 | if(alignment->element[j].groupid == group) |
---|
47 | { |
---|
48 | if(last != -1) |
---|
49 | { |
---|
50 | alignment->element[j].groupb = |
---|
51 | &(alignment->element[last]); |
---|
52 | alignment->element[last].groupf = |
---|
53 | &(alignment->element[j]); |
---|
54 | } |
---|
55 | last = j; |
---|
56 | } |
---|
57 | } |
---|
58 | return; |
---|
59 | } |
---|
60 | |
---|
61 | |
---|
62 | /* |
---|
63 | * Print error message, and die |
---|
64 | */ |
---|
65 | void ErrorOut5(int code,const char *string) |
---|
66 | { |
---|
67 | if (code == 0) |
---|
68 | { |
---|
69 | fprintf(stderr,"Error:%s\n",string); |
---|
70 | exit(1); |
---|
71 | } |
---|
72 | return; |
---|
73 | } |
---|
74 | |
---|
75 | |
---|
76 | /* |
---|
77 | * More robust memory management routines |
---|
78 | */ |
---|
79 | char *Calloc(int count,int size) |
---|
80 | { |
---|
81 | char *temp; |
---|
82 | size *= count; |
---|
83 | #ifdef SeeAlloc |
---|
84 | extern int TotalCalloc; |
---|
85 | TotalCalloc += count*size; |
---|
86 | fprintf(stderr,"Calloc %d %d\n",count*size,TotalCalloc); |
---|
87 | #endif |
---|
88 | temp = (char *)malloc(size); |
---|
89 | ErrorOut5(0!= temp,"Cannot allocate memory"); |
---|
90 | memset(temp,0,size); |
---|
91 | return(temp); |
---|
92 | } |
---|
93 | |
---|
94 | char *Realloc(char *block,int size) |
---|
95 | { |
---|
96 | char *temp; |
---|
97 | #ifdef SeeAlloc |
---|
98 | extern int TotalRealloc; |
---|
99 | TotalRealloc += size; |
---|
100 | fprintf(stderr,"Realloc %d\n",TotalRealloc); |
---|
101 | #endif |
---|
102 | temp = (char *)realloc(block,size); |
---|
103 | ErrorOut5(0 != temp,"Cannot change memory size"); |
---|
104 | |
---|
105 | return(temp); |
---|
106 | } |
---|
107 | |
---|
108 | void Cfree(char *block) |
---|
109 | { |
---|
110 | if (block) |
---|
111 | { |
---|
112 | /*if(cfree(block) == 0) |
---|
113 | Warning("Error in Cfree...");*/ |
---|
114 | free(block); |
---|
115 | } |
---|
116 | else |
---|
117 | Warning("Error in Cfree, NULL block"); |
---|
118 | return; |
---|
119 | } |
---|
120 | |
---|
121 | |
---|
122 | static void ReadNA_Flat(char *filename,char *dataset,int type) |
---|
123 | { |
---|
124 | size_t j; |
---|
125 | int i, jj, c, curelem=0,offset; |
---|
126 | char buffer[GBUFSIZ]; |
---|
127 | char in_line[GBUFSIZ]; |
---|
128 | char curname[GBUFSIZ]; |
---|
129 | i=0;c=0;type=0; |
---|
130 | |
---|
131 | NA_Sequence *this_elem; |
---|
132 | NA_Alignment *data; |
---|
133 | |
---|
134 | FILE *file; |
---|
135 | |
---|
136 | curname[0] = '\0'; |
---|
137 | data = (NA_Alignment*)dataset; |
---|
138 | |
---|
139 | file = fopen(filename,"r"); |
---|
140 | if(file == NULL) |
---|
141 | { |
---|
142 | fprintf(stderr,"Cannot open %s.\n",filename); |
---|
143 | return; |
---|
144 | } |
---|
145 | for(;fgets(in_line,GBUFSIZ,file) !=0;) |
---|
146 | { |
---|
147 | if (in_line[0] == '#' || |
---|
148 | in_line[0] == '%' || |
---|
149 | in_line[0] == '"' || |
---|
150 | in_line[0] == '@') |
---|
151 | { |
---|
152 | offset = 0; |
---|
153 | for(j=0;j<strlen(in_line);j++) |
---|
154 | { |
---|
155 | if(in_line[j] == '(') |
---|
156 | { |
---|
157 | sscanf((char*) |
---|
158 | &(in_line[j+1]),"%d",&offset); |
---|
159 | in_line[j] = '\0'; |
---|
160 | } |
---|
161 | } |
---|
162 | |
---|
163 | curelem = data->numelements++; |
---|
164 | if( curelem == 0 ) |
---|
165 | { |
---|
166 | data->element=(NA_Sequence*) |
---|
167 | Calloc(5,sizeof(NA_Sequence)); |
---|
168 | data->maxnumelements = 5; |
---|
169 | } |
---|
170 | else if (curelem==data->maxnumelements) |
---|
171 | { |
---|
172 | (data->maxnumelements) *= 2; |
---|
173 | data->element= |
---|
174 | (NA_Sequence*)Realloc((char*)data->element |
---|
175 | ,data->maxnumelements*sizeof(NA_Sequence)); |
---|
176 | } |
---|
177 | |
---|
178 | InitNASeq(&(data->element[curelem]), |
---|
179 | in_line[0] == '#'?DNA: |
---|
180 | in_line[0] == '%'?PROTEIN: |
---|
181 | in_line[0] == '"'?TEXT: |
---|
182 | in_line[0] == '@'?MASK:TEXT); |
---|
183 | this_elem= &(data->element[curelem]); |
---|
184 | if(in_line[strlen(in_line)-1] == '\n') |
---|
185 | in_line[strlen(in_line)-1] = '\0'; |
---|
186 | strncpy(this_elem->short_name,(char*)&(in_line[1]),31); |
---|
187 | this_elem->offset = offset; |
---|
188 | } |
---|
189 | else if(in_line[0] != '\n') |
---|
190 | { |
---|
191 | size_t strl = strlen(in_line); |
---|
192 | for(j=0,jj=0;j<strl;j++) |
---|
193 | if(in_line[j] != ' ' && in_line[j] != '\n' && |
---|
194 | in_line[j] != '\t') |
---|
195 | buffer[jj++] = in_line[j]; |
---|
196 | |
---|
197 | if(data->element[curelem].rmatrix) |
---|
198 | Ascii2NA(buffer,jj,data->element[curelem].rmatrix); |
---|
199 | AppendNA((NA_Base*)buffer,jj,&(data->element[curelem])); |
---|
200 | } |
---|
201 | } |
---|
202 | |
---|
203 | for(j=0;j<data->numelements;j++) |
---|
204 | data->maxlen = MAX(data->maxlen,data->element[j].seqlen + |
---|
205 | data->element[j].offset); |
---|
206 | |
---|
207 | for(j=0;j<data->numelements;j++) |
---|
208 | if(data->element[j].seqlen==0) |
---|
209 | data->element[j].protect = |
---|
210 | PROT_BASE_CHANGES+ PROT_GREY_SPACE+ |
---|
211 | PROT_WHITE_SPACE+ PROT_TRANSLATION; |
---|
212 | |
---|
213 | NormalizeOffset(data); |
---|
214 | Regroup(data); |
---|
215 | return; |
---|
216 | } |
---|
217 | |
---|
218 | /* |
---|
219 | LoadFile(): |
---|
220 | Load the given filename into the given dataset. Handle any |
---|
221 | type conversion needed to get the data into the specified data type. |
---|
222 | This routine is used in situations where the format and datatype is known. |
---|
223 | |
---|
224 | Copyright (c) 1989-1990, University of Illinois board of trustees. All |
---|
225 | rights reserved. Written by Steven Smith at the Center for Prokaryote Genome |
---|
226 | Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. |
---|
227 | Carl Woese. |
---|
228 | |
---|
229 | Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. |
---|
230 | All rights reserved. |
---|
231 | */ |
---|
232 | |
---|
233 | static void LoadFile(char *filename,NA_Alignment *dataset,int type,int format) |
---|
234 | { |
---|
235 | |
---|
236 | if (DataType != type) |
---|
237 | fprintf(stderr,"Warning, datatypes do not match.\n"); |
---|
238 | /* |
---|
239 | Handle the overwrite/create/merge dialog here. |
---|
240 | */ |
---|
241 | switch(format) |
---|
242 | { |
---|
243 | case NA_FLAT: |
---|
244 | ReadNA_Flat(filename,(char*)dataset,type); |
---|
245 | ((NA_Alignment*)dataset)->format = GDE; |
---|
246 | break; |
---|
247 | |
---|
248 | case GENBANK: |
---|
249 | ReadGen(filename,dataset,type); |
---|
250 | ((NA_Alignment*)dataset)->format = GENBANK; |
---|
251 | break; |
---|
252 | |
---|
253 | case ARBDB: |
---|
254 | ReadArbdb_plain(filename,dataset,type); |
---|
255 | ((NA_Alignment*)dataset)->format = ARBDB; |
---|
256 | break; |
---|
257 | |
---|
258 | case GDE: |
---|
259 | ReadGDE(filename,dataset,type); |
---|
260 | ((NA_Alignment*)dataset)->format = GDE; |
---|
261 | break; |
---|
262 | case COLORMASK: |
---|
263 | ReadCMask(filename); |
---|
264 | |
---|
265 | default: |
---|
266 | break; |
---|
267 | } |
---|
268 | return; |
---|
269 | } |
---|
270 | |
---|
271 | static int FindType(char *name,int *dtype,int *ftype) |
---|
272 | { |
---|
273 | FILE *file; |
---|
274 | char in_line[GBUFSIZ]; |
---|
275 | |
---|
276 | file = fopen(name,"r"); |
---|
277 | *dtype=0; |
---|
278 | *ftype=0; |
---|
279 | |
---|
280 | if (file == NULL) |
---|
281 | return(1); |
---|
282 | |
---|
283 | /* |
---|
284 | * Is this a flat file? |
---|
285 | * Get the first non blank line, see if a type marker shows up. |
---|
286 | */ |
---|
287 | if (fgets(in_line,GBUFSIZ,file) == 0) { |
---|
288 | return 1; |
---|
289 | } |
---|
290 | for(;strlen(in_line)<2 && fgets(in_line,GBUFSIZ,file) != NULL;) ; |
---|
291 | |
---|
292 | if (in_line[0] == '#' || in_line[0] == '%' || |
---|
293 | in_line[0] == '"' || in_line[0] == '@' ) |
---|
294 | { |
---|
295 | *dtype=NASEQ_ALIGN; |
---|
296 | *ftype=NA_FLAT; |
---|
297 | } |
---|
298 | |
---|
299 | /* |
---|
300 | * Else, try genbank |
---|
301 | */ |
---|
302 | else |
---|
303 | { |
---|
304 | fclose(file); |
---|
305 | file = fopen(name,"r"); |
---|
306 | *dtype=0; |
---|
307 | *ftype=0; |
---|
308 | |
---|
309 | if (file == NULL) |
---|
310 | return(1); |
---|
311 | |
---|
312 | for(;fgets(in_line,GBUFSIZ,file) != NULL;) |
---|
313 | if(Find(in_line,"LOCUS")) |
---|
314 | { |
---|
315 | *dtype=NASEQ_ALIGN; |
---|
316 | *ftype=GENBANK; |
---|
317 | fclose(file); |
---|
318 | return(0); |
---|
319 | } |
---|
320 | /* |
---|
321 | * and last, try GDE |
---|
322 | */ |
---|
323 | else if(Find(in_line,"sequence")) |
---|
324 | { |
---|
325 | *dtype = NASEQ_ALIGN; |
---|
326 | *ftype = GDE; |
---|
327 | fclose(file); |
---|
328 | return(0); |
---|
329 | } |
---|
330 | else if(Find(in_line,"start:")) |
---|
331 | { |
---|
332 | *dtype = NASEQ_ALIGN; |
---|
333 | *ftype = COLORMASK; |
---|
334 | fclose(file); |
---|
335 | return(0); |
---|
336 | } |
---|
337 | } |
---|
338 | |
---|
339 | fclose(file); |
---|
340 | return(0); |
---|
341 | } |
---|
342 | |
---|
343 | /* |
---|
344 | LoadData(): |
---|
345 | Load a data set from the command line argument. |
---|
346 | |
---|
347 | Copyright (c) 1989, University of Illinois board of trustees. All rights |
---|
348 | reserved. Written by Steven Smith at the Center for Prokaryote Genome |
---|
349 | Analysis. Design and implementation guidance by Dr. Gary Olsen and Dr. |
---|
350 | Carl Woese. |
---|
351 | |
---|
352 | Copyright (c) 1990,1991,1992 Steven Smith at the Harvard Genome Laboratory. |
---|
353 | All rights reserved. |
---|
354 | |
---|
355 | */ |
---|
356 | |
---|
357 | void LoadData(char *filen) |
---|
358 | { |
---|
359 | |
---|
360 | FILE *file; |
---|
361 | NA_Alignment *DataNaAln; |
---|
362 | char temp[1024]; |
---|
363 | /* |
---|
364 | * Get file name, determine the file type, and away we go.. |
---|
365 | */ |
---|
366 | if(Find2(filen,"gde")!=0) |
---|
367 | strcpy(FileName,filen); |
---|
368 | |
---|
369 | if (strstr(filen,".arb") || strchr(filen, ':')) { /* ARBDB TYPE */ |
---|
370 | if (DataSet == NULL) { |
---|
371 | DataSet = (NA_Alignment *) Calloc(1, |
---|
372 | sizeof(NA_Alignment)); |
---|
373 | DataNaAln = (NA_Alignment *) DataSet; |
---|
374 | DataSet->rel_offset = 0; |
---|
375 | } else{ |
---|
376 | DataNaAln = (NA_Alignment *) DataSet; |
---|
377 | } |
---|
378 | DataType = NASEQ_ALIGN; |
---|
379 | FileFormat = ARBDB; |
---|
380 | LoadFile(filen, DataNaAln, |
---|
381 | DataType, FileFormat); |
---|
382 | |
---|
383 | sprintf(temp,"Remote ARBDB access (%s)",filen); |
---|
384 | return; |
---|
385 | } |
---|
386 | |
---|
387 | |
---|
388 | if( (file=fopen(filen,"r"))!=0 ) |
---|
389 | { |
---|
390 | FindType(filen,&DataType,&FileFormat); |
---|
391 | switch(DataType) |
---|
392 | { |
---|
393 | case NASEQ_ALIGN: |
---|
394 | if(DataSet == NULL) |
---|
395 | { |
---|
396 | DataSet = (NA_Alignment*)Calloc(1, |
---|
397 | sizeof(NA_Alignment)); |
---|
398 | DataNaAln =(NA_Alignment*)DataSet; |
---|
399 | DataSet->rel_offset = 0; |
---|
400 | }else{ |
---|
401 | DataNaAln = (NA_Alignment*)DataSet; |
---|
402 | } |
---|
403 | |
---|
404 | LoadFile(filen,DataNaAln, |
---|
405 | DataType,FileFormat); |
---|
406 | |
---|
407 | break; |
---|
408 | default: |
---|
409 | aw_message(GBS_global_string("Internal error: unknown file type of file %s",filen)); |
---|
410 | break; |
---|
411 | } |
---|
412 | fclose(file); |
---|
413 | } |
---|
414 | sprintf(temp,"Genetic Data Environment 2.2 (%s)",FileName); |
---|
415 | return; |
---|
416 | } |
---|
417 | |
---|
418 | |
---|
419 | void AppendNA(NA_Base *buffer,int len,NA_Sequence *seq) |
---|
420 | { |
---|
421 | int curlen=0,j; |
---|
422 | NA_Base *temp; |
---|
423 | temp=0; |
---|
424 | if(seq->seqlen+len >= seq->seqmaxlen) |
---|
425 | { |
---|
426 | if(seq->seqlen>0) |
---|
427 | seq->sequence = (NA_Base*)Realloc((char*)seq->sequence, |
---|
428 | (seq->seqlen + len+GBUFSIZ) * sizeof(NA_Base)); |
---|
429 | else |
---|
430 | seq->sequence = (NA_Base*)Calloc(1,(seq->seqlen + |
---|
431 | len+GBUFSIZ) * sizeof(NA_Base)); |
---|
432 | seq->seqmaxlen = seq->seqlen + len+GBUFSIZ; |
---|
433 | } |
---|
434 | /* |
---|
435 | * seqlen is the length, and the index of the next free |
---|
436 | * base |
---|
437 | */ |
---|
438 | curlen = seq->seqlen + seq->offset; |
---|
439 | for(j=0;j<len;j++) |
---|
440 | putelem(seq,j+curlen,buffer[j]); |
---|
441 | |
---|
442 | seq->seqlen += len; |
---|
443 | return; |
---|
444 | } |
---|
445 | |
---|
446 | void Ascii2NA(char *buffer,int len,int matrix[16]) |
---|
447 | { |
---|
448 | /* |
---|
449 | * if the translation matrix exists, use it to |
---|
450 | * encode the buffer. |
---|
451 | */ |
---|
452 | int i; |
---|
453 | if(matrix != NULL) { |
---|
454 | for(i=0;i<len;i++) { |
---|
455 | buffer[i] = matrix[(unsigned char)buffer[i]]; |
---|
456 | } |
---|
457 | } |
---|
458 | return; |
---|
459 | } |
---|
460 | |
---|
461 | int WriteNA_Flat(NA_Alignment *aln,char *filename,int method,int maskable) |
---|
462 | { |
---|
463 | size_t j; |
---|
464 | int kk,mask = -1,k,offset; |
---|
465 | char offset_str[100],buf[100]; |
---|
466 | NA_Sequence *seqs; |
---|
467 | FILE *file; |
---|
468 | if(aln == NULL) |
---|
469 | return(1); |
---|
470 | if(aln->numelements == 0) |
---|
471 | return(1); |
---|
472 | seqs = aln->element; |
---|
473 | |
---|
474 | file = fopen(filename,"w"); |
---|
475 | if(file == NULL) |
---|
476 | { |
---|
477 | Warning("Cannot open file for output"); |
---|
478 | return(1); |
---|
479 | } |
---|
480 | if(maskable && (method != SELECT_REGION)) |
---|
481 | { |
---|
482 | for(j=0;j<aln->numelements;j++) |
---|
483 | if(seqs[j].elementtype == MASK && |
---|
484 | seqs[j].selected) |
---|
485 | mask = j; |
---|
486 | } |
---|
487 | /* Removed by OLIVER |
---|
488 | for(j=0;j<aln->numelements;j++) |
---|
489 | { |
---|
490 | SeqNorm(&(seqs[j])); |
---|
491 | } |
---|
492 | */ |
---|
493 | |
---|
494 | for(j=0;j<aln->numelements;j++) |
---|
495 | { |
---|
496 | if (method != SELECT_REGION) { |
---|
497 | offset = seqs[j].offset; |
---|
498 | } |
---|
499 | else { |
---|
500 | for(offset=seqs[j].offset; aln->selection_mask[offset] == '0'; offset++) ; |
---|
501 | } |
---|
502 | |
---|
503 | if(offset+aln->rel_offset != 0) |
---|
504 | sprintf(offset_str,"(%d)",offset+aln->rel_offset); |
---|
505 | else |
---|
506 | offset_str[0] = '\0'; |
---|
507 | |
---|
508 | if((((int)j!=mask) && (seqs[j].selected) && method != SELECT_REGION) |
---|
509 | || (method == SELECT_REGION && seqs[j].subselected) |
---|
510 | || method == ALL) |
---|
511 | { |
---|
512 | fprintf(file,"%c%s%s\n", |
---|
513 | seqs[j].elementtype == DNA?'#': |
---|
514 | seqs[j].elementtype == RNA?'#': |
---|
515 | seqs[j].elementtype == PROTEIN?'%': |
---|
516 | seqs[j].elementtype == TEXT?'"': |
---|
517 | seqs[j].elementtype == MASK?'@':'"', |
---|
518 | seqs[j].short_name, |
---|
519 | (offset+aln->rel_offset == 0)? "":offset_str); |
---|
520 | if(seqs[j].tmatrix) |
---|
521 | { |
---|
522 | if(mask == -1) |
---|
523 | for(k=0,kk=0;kk<seqs[j].seqlen;kk++) |
---|
524 | { |
---|
525 | if((k)%60 == 0 && k>0) |
---|
526 | { |
---|
527 | buf[60] = '\0'; |
---|
528 | fputs(buf,file); |
---|
529 | putc('\n',file); |
---|
530 | } |
---|
531 | if(method == SELECT_REGION) |
---|
532 | { |
---|
533 | if(aln->selection_mask[kk+offset]=='1') |
---|
534 | { |
---|
535 | buf[k%60] =((char)seqs[j].tmatrix[ |
---|
536 | (int)getelem( &(seqs[j]),kk+offset) ]); |
---|
537 | k++; |
---|
538 | } |
---|
539 | } |
---|
540 | else |
---|
541 | { |
---|
542 | buf[k%60] =((char)seqs[j].tmatrix[ |
---|
543 | (int)getelem( &(seqs[j]),kk+offset) ]); |
---|
544 | k++; |
---|
545 | } |
---|
546 | } |
---|
547 | else |
---|
548 | for(k=0,kk=0;kk<seqs[j].seqlen;kk++) |
---|
549 | { |
---|
550 | if(getelem(&(seqs[mask]),kk+seqs[mask].offset) != '0' |
---|
551 | && (getelem(&(seqs[mask]),kk+seqs[mask].offset) |
---|
552 | != '-')) |
---|
553 | { |
---|
554 | if((k++)%60 == 0 && k>1) |
---|
555 | { |
---|
556 | buf[60] = '\0'; |
---|
557 | fputs(buf,file); |
---|
558 | putc('\n',file); |
---|
559 | } |
---|
560 | buf[k%60] = ((char)seqs[j].tmatrix |
---|
561 | [getelem(&(seqs[j]),kk+offset)]); |
---|
562 | } |
---|
563 | } |
---|
564 | } |
---|
565 | else |
---|
566 | { |
---|
567 | if(mask == -1) |
---|
568 | for(k=0,kk=0;kk<seqs[j].seqlen;kk++) |
---|
569 | { |
---|
570 | if((k)%60 == 0 && k>0) |
---|
571 | { |
---|
572 | buf[60] = '\0'; |
---|
573 | fputs(buf,file); |
---|
574 | putc('\n',file); |
---|
575 | } |
---|
576 | if(method == SELECT_REGION) |
---|
577 | { |
---|
578 | if(aln->selection_mask[kk+offset]=='1') |
---|
579 | { |
---|
580 | buf[k%60] =(getelem( &(seqs[j]),kk+offset)); |
---|
581 | k++; |
---|
582 | } |
---|
583 | } |
---|
584 | else |
---|
585 | { |
---|
586 | buf[k%60] =( getelem( &(seqs[j]),kk+offset) ); |
---|
587 | k++; |
---|
588 | } |
---|
589 | } |
---|
590 | else |
---|
591 | for(k=0,kk=0;kk<seqs[j].seqlen;kk++) |
---|
592 | { |
---|
593 | if(getelem(&(seqs[mask]),kk+offset) == '1') |
---|
594 | { |
---|
595 | if((k++)%60 == 0 && k>1) |
---|
596 | { |
---|
597 | buf[60] = '\0'; |
---|
598 | fputs(buf,file); |
---|
599 | putc('\n',file); |
---|
600 | } |
---|
601 | buf[k%60] =((char)getelem(&(seqs[j]), |
---|
602 | kk+offset)); |
---|
603 | } |
---|
604 | } |
---|
605 | } |
---|
606 | buf[(k%60)>0 ? (k%60):60] = '\0'; |
---|
607 | fputs(buf,file); |
---|
608 | putc('\n',file); |
---|
609 | } |
---|
610 | } |
---|
611 | fclose(file); |
---|
612 | return(0); |
---|
613 | } |
---|
614 | |
---|
615 | |
---|
616 | void Warning(const char *s) |
---|
617 | { |
---|
618 | /*extern Frame frame; |
---|
619 | extern Panel_item left_foot,right_foot; |
---|
620 | Beep(); |
---|
621 | xv_set(frame,FRAME_RIGHT_FOOTER,s,0); |
---|
622 | xv_set(right_foot,PANEL_LABEL_STRING,s,0);*/ |
---|
623 | aw_message(s); |
---|
624 | } |
---|
625 | |
---|
626 | |
---|
627 | void InitNASeq(NA_Sequence *seq,int type) |
---|
628 | { |
---|
629 | |
---|
630 | |
---|
631 | SetTime(&(seq->t_stamp.origin)); |
---|
632 | SetTime(&(seq->t_stamp.modify)); |
---|
633 | strncpy(seq->id,uniqueID(),79); |
---|
634 | seq->seq_name[0] = '\0'; |
---|
635 | seq->barcode[0] = '\0'; |
---|
636 | seq->contig[0] = '\0'; |
---|
637 | seq->membrane[0] = '\0'; |
---|
638 | seq->authority[0] = '\0'; |
---|
639 | seq->short_name[0] = '\0'; |
---|
640 | seq->sequence = NULL; |
---|
641 | seq->offset = 0; |
---|
642 | seq->baggage = NULL; |
---|
643 | seq->baggage_len = 0; |
---|
644 | seq->baggage_maxlen = 0; |
---|
645 | seq->comments = NULL; |
---|
646 | seq->comments_len = 0; |
---|
647 | seq->comments_maxlen = 0; |
---|
648 | seq->description[0] = '\0'; |
---|
649 | seq->mask = NULL; |
---|
650 | seq->seqlen = 0; |
---|
651 | seq->seqmaxlen = 0; |
---|
652 | seq->protect = PROT_WHITE_SPACE + PROT_TRANSLATION; |
---|
653 | #ifdef HGL |
---|
654 | seq->attr = 0; |
---|
655 | #else |
---|
656 | seq->attr = IS_5_TO_3 + IS_PRIMARY; |
---|
657 | #endif |
---|
658 | seq->elementtype = type; |
---|
659 | seq->groupid = 0; |
---|
660 | seq->groupb = NULL; |
---|
661 | seq->groupf = NULL; |
---|
662 | seq->cmask = NULL; |
---|
663 | seq->selected = 0; |
---|
664 | seq->subselected = 0; |
---|
665 | |
---|
666 | switch (type) |
---|
667 | { |
---|
668 | case DNA: |
---|
669 | seq->tmatrix = Default_DNA_Trans; |
---|
670 | seq->rmatrix = Default_NA_RTrans; |
---|
671 | seq->col_lut = Default_NAColor_LKUP; |
---|
672 | break; |
---|
673 | case RNA: |
---|
674 | seq->tmatrix = Default_RNA_Trans; |
---|
675 | seq->rmatrix = Default_NA_RTrans; |
---|
676 | seq->col_lut = Default_NAColor_LKUP; |
---|
677 | break; |
---|
678 | case PROTEIN: |
---|
679 | seq->tmatrix = NULL; |
---|
680 | seq->rmatrix = NULL; |
---|
681 | seq->col_lut = Default_PROColor_LKUP; |
---|
682 | break; |
---|
683 | case MASK: |
---|
684 | case TEXT: |
---|
685 | default: |
---|
686 | seq->tmatrix = NULL; |
---|
687 | seq->rmatrix = NULL; |
---|
688 | seq->col_lut = NULL; |
---|
689 | break; |
---|
690 | } |
---|
691 | return; |
---|
692 | } |
---|
693 | |
---|
694 | |
---|
695 | void ReadCMask(const char *filename) |
---|
696 | { |
---|
697 | |
---|
698 | char in_line[GBUFSIZ]; |
---|
699 | char head[GBUFSIZ]; |
---|
700 | char curname[GBUFSIZ]; |
---|
701 | char temp[GBUFSIZ]; |
---|
702 | bool IGNORE_DASH = false; |
---|
703 | int offset; |
---|
704 | |
---|
705 | /*NA_DisplayData *NAdd;*/ |
---|
706 | NA_Alignment *aln; |
---|
707 | |
---|
708 | size_t j; |
---|
709 | size_t curlen = 0; |
---|
710 | int *colors = 0,orig_ctype,jj,indx = 0; |
---|
711 | FILE *file; |
---|
712 | |
---|
713 | if(DataSet == NULL) return; |
---|
714 | |
---|
715 | /*NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; |
---|
716 | |
---|
717 | if(NAdd == NULL) |
---|
718 | return; |
---|
719 | */ |
---|
720 | aln = (NA_Alignment*)DataSet; |
---|
721 | |
---|
722 | curname[0] = '\0'; |
---|
723 | orig_ctype = COLOR_MONO; |
---|
724 | file = fopen(filename,"r"); |
---|
725 | if(file == NULL) |
---|
726 | { |
---|
727 | Warning("File not found"); |
---|
728 | Warning(filename); |
---|
729 | return; |
---|
730 | } |
---|
731 | |
---|
732 | /*NAdd->color_type = COLOR_ALN_MASK;*/ |
---|
733 | for(;fgets(in_line,GBUFSIZ,file) !=0;) |
---|
734 | { |
---|
735 | if(Find(in_line,"offset:")) |
---|
736 | { |
---|
737 | crop(in_line,head,temp); |
---|
738 | sscanf(temp,"%d",&(aln->cmask_offset)); |
---|
739 | } |
---|
740 | else if(Find(in_line,"nodash:")) |
---|
741 | IGNORE_DASH = true; |
---|
742 | else if(Find(in_line,"dash:")) |
---|
743 | IGNORE_DASH = true; |
---|
744 | else if(Find(in_line,"name:")) |
---|
745 | { |
---|
746 | crop(in_line,head,curname); |
---|
747 | curname[strlen(curname)-1] = '\0'; |
---|
748 | for(j=0;j<strlen(curname);j++) |
---|
749 | if(curname[j] == '(') |
---|
750 | curname[j] = '\0'; |
---|
751 | } |
---|
752 | else if(Find(in_line,"length:")) |
---|
753 | { |
---|
754 | crop(in_line,head,temp); |
---|
755 | sscanf(temp,"%zu",&curlen); |
---|
756 | } |
---|
757 | else if(Find(in_line,"start:")) |
---|
758 | { |
---|
759 | indx = -1; |
---|
760 | if(curlen == 0) |
---|
761 | { |
---|
762 | Warning("illegal format in colormask"); |
---|
763 | /*NAdd->color_type = orig_ctype;*/ |
---|
764 | return; |
---|
765 | } |
---|
766 | if(strlen(curname) != 0) |
---|
767 | { |
---|
768 | indx = -1; |
---|
769 | for(j=0;j<aln->numelements;j++) |
---|
770 | if(Find(aln->element[j].short_name,curname) |
---|
771 | || Find(aln->element[j].id,curname)) |
---|
772 | { |
---|
773 | if(aln->element[j].cmask != NULL) |
---|
774 | Cfree((char*)aln -> element[j].cmask); |
---|
775 | colors=(int*)Calloc(aln->element[j] |
---|
776 | .seqmaxlen+1+aln->element[j].offset |
---|
777 | ,sizeof(int)); |
---|
778 | aln->element[j].cmask = colors; |
---|
779 | /*NAdd->color_type = COLOR_SEQ_MASK;*/ |
---|
780 | indx = j; |
---|
781 | j = aln->numelements; |
---|
782 | } |
---|
783 | if(indx == -1) |
---|
784 | colors=NULL; |
---|
785 | } |
---|
786 | else |
---|
787 | { |
---|
788 | if(aln->cmask != NULL) Cfree((char*)aln->cmask); |
---|
789 | colors=(int*)Calloc(curlen,sizeof(int)); |
---|
790 | aln->cmask = colors; |
---|
791 | aln->cmask_len = curlen; |
---|
792 | /*NAdd->color_type = COLOR_ALN_MASK;*/ |
---|
793 | for(j=0;j<curlen;j++) |
---|
794 | colors[j] = 12; |
---|
795 | } |
---|
796 | |
---|
797 | if(IGNORE_DASH && (indx != -1)) |
---|
798 | { |
---|
799 | for(jj=0,j=0;(j<curlen) && |
---|
800 | (jj<aln->element[indx].seqlen);j++,jj++) |
---|
801 | { |
---|
802 | offset = aln->element[indx].offset; |
---|
803 | if(fgets(in_line,GBUFSIZ,file)==NULL) |
---|
804 | { |
---|
805 | Warning |
---|
806 | ("illegal format in colormask"); |
---|
807 | /*NAdd->color_type = orig_ctype;*/ |
---|
808 | return; |
---|
809 | } |
---|
810 | /* |
---|
811 | * Fixed so that the keyword nodash causes the colormask to be mapped |
---|
812 | * to the sequence, not the alignment. |
---|
813 | * |
---|
814 | * The allocated space is equal the seqlen of the matched sequence. |
---|
815 | * |
---|
816 | */ |
---|
817 | if(aln->element[indx].tmatrix) |
---|
818 | for(;(getelem(&(aln->element[indx]),jj |
---|
819 | +offset) |
---|
820 | ==(aln->element[indx].tmatrix['-']) |
---|
821 | || (getelem(&(aln->element[indx]),jj |
---|
822 | +offset) |
---|
823 | ==aln->element[indx].tmatrix['~'])) |
---|
824 | && jj < aln->element[indx].seqlen;) |
---|
825 | colors[jj++] = 12; |
---|
826 | else |
---|
827 | for(;getelem(&(aln->element[indx]),jj |
---|
828 | +offset) |
---|
829 | =='-' && jj < aln->element[indx].seqlen;) |
---|
830 | colors[jj++] = 12; |
---|
831 | |
---|
832 | sscanf(in_line,"%d",&(colors[jj])); |
---|
833 | } |
---|
834 | } |
---|
835 | else if((indx == -1) && (strlen(curname) != 0)) |
---|
836 | for(j=0;j<curlen;j++) |
---|
837 | fgets(in_line,GBUFSIZ,file); |
---|
838 | else |
---|
839 | for(j=0;j<curlen;j++) |
---|
840 | { |
---|
841 | if(fgets(in_line,GBUFSIZ,file)==NULL) |
---|
842 | { |
---|
843 | Warning |
---|
844 | ("illegal format in colormask"); |
---|
845 | /*NAdd->color_type = orig_ctype;*/ |
---|
846 | return; |
---|
847 | } |
---|
848 | sscanf(in_line,"%d",&(colors[j])); |
---|
849 | } |
---|
850 | IGNORE_DASH = false; |
---|
851 | curname[0] = '\0'; |
---|
852 | } |
---|
853 | |
---|
854 | } |
---|
855 | /*RepaintAll(true);*/ |
---|
856 | return; |
---|
857 | } |
---|
858 | |
---|
859 | |
---|
860 | int WriteStatus(NA_Alignment *aln,char *filename,int method) |
---|
861 | { |
---|
862 | // extern int EditMode; |
---|
863 | // NA_DisplayData *NAdd; |
---|
864 | NA_Sequence *this_seq; |
---|
865 | int j; |
---|
866 | FILE *file; |
---|
867 | method=0;filename=0; |
---|
868 | |
---|
869 | if(DataSet == NULL) |
---|
870 | return(1); |
---|
871 | |
---|
872 | /* |
---|
873 | NAdd = (NA_DisplayData*)((NA_Alignment*)DataSet)->na_ddata; |
---|
874 | if(NAdd == NULL) |
---|
875 | return(1); |
---|
876 | */ |
---|
877 | |
---|
878 | file = fopen(filename,"w"); |
---|
879 | if (file == NULL) |
---|
880 | { |
---|
881 | Warning("Cannot open status file."); |
---|
882 | return(1); |
---|
883 | } |
---|
884 | fprintf(file,"File_format: %s\n",FileFormat==GENBANK?"genbank":"flat"); |
---|
885 | /* |
---|
886 | fprintf(file,"EditMode: %s\n",EditMode==INSERT?"insert": |
---|
887 | "check"); |
---|
888 | */ |
---|
889 | |
---|
890 | this_seq = &(aln->element[1]); /* Nadd->cursor !? */ |
---|
891 | if(this_seq->id != NULL) |
---|
892 | fprintf(file,"sequence-ID %s\n",this_seq->id); |
---|
893 | fprintf(file,"Column: %d\nPos:%d\n",1,1);/*NAdd->cursor_x,NAdd->position*/ |
---|
894 | switch(this_seq->elementtype) |
---|
895 | { |
---|
896 | case DNA: |
---|
897 | case RNA: |
---|
898 | fprintf(file,"#%s\n", |
---|
899 | this_seq->short_name); |
---|
900 | break; |
---|
901 | case PROTEIN: |
---|
902 | fprintf(file,"%%%s\n", |
---|
903 | this_seq->short_name); |
---|
904 | break; |
---|
905 | case MASK: |
---|
906 | fprintf(file,"@%s\n", |
---|
907 | this_seq->short_name); |
---|
908 | break; |
---|
909 | case TEXT: |
---|
910 | fprintf(file,"%c%s\n",'"', |
---|
911 | this_seq->short_name); |
---|
912 | break; |
---|
913 | default: |
---|
914 | break; |
---|
915 | } |
---|
916 | if(this_seq->tmatrix) |
---|
917 | for(j=0;j<this_seq->seqlen;j++) |
---|
918 | putc(this_seq->tmatrix[getelem(this_seq,j)],file); |
---|
919 | else |
---|
920 | for(j=0;j<this_seq->seqlen;j++) |
---|
921 | putc(getelem(this_seq,j),file); |
---|
922 | |
---|
923 | fclose(file); |
---|
924 | return(0); |
---|
925 | } |
---|
926 | |
---|
927 | void ReadStatus(char *filename) |
---|
928 | { |
---|
929 | filename=0; |
---|
930 | /* |
---|
931 | int i,j; |
---|
932 | FILE *file; |
---|
933 | filename=0; |
---|
934 | |
---|
935 | char in_line[GBUFSIZ],head[GBUFSIZ]; |
---|
936 | file = fopen(filename,"r"); |
---|
937 | for(;!DONE;) |
---|
938 | { |
---|
939 | fgets(in_line,GBUFSIZ,file); |
---|
940 | if(strlen(in_line) == 0) |
---|
941 | DONE = true; |
---|
942 | else |
---|
943 | { |
---|
944 | sscanf(in_line,"%s",head); |
---|
945 | if(strncmp(head,"Col",3) != 0) |
---|
946 | { |
---|
947 | sscanf(in_line,"%*s %d",head,&(DataSet->nadd-> |
---|
948 | cursor_x),&(DataSet->nadd->cursory); |
---|
949 | } |
---|
950 | else if(strncmp(head,"Pos",3) != 0) |
---|
951 | { |
---|
952 | } |
---|
953 | } |
---|
954 | } |
---|
955 | |
---|
956 | */ |
---|
957 | } |
---|
958 | |
---|
959 | |
---|
960 | void NormalizeOffset(NA_Alignment *aln) |
---|
961 | { |
---|
962 | int i; |
---|
963 | size_t j; |
---|
964 | int offset = 99999999; |
---|
965 | i=0; |
---|
966 | |
---|
967 | for(j=0;j<aln->numelements;j++) |
---|
968 | offset = MIN(offset,aln->element[j].offset); |
---|
969 | |
---|
970 | for(j=0;j<aln->numelements;j++) |
---|
971 | aln->element[j].offset -= offset; |
---|
972 | |
---|
973 | aln->maxlen = -999999999; |
---|
974 | for(j=0;j<aln->numelements;j++) |
---|
975 | aln->maxlen = MAX(aln->element[j].seqlen+aln->element[j].offset, |
---|
976 | aln->maxlen); |
---|
977 | |
---|
978 | aln->rel_offset += offset; |
---|
979 | |
---|
980 | if(aln->numelements == 0) |
---|
981 | aln->rel_offset = 0; |
---|
982 | |
---|
983 | return; |
---|
984 | } |
---|
985 | |
---|
986 | int WriteCMask(NA_Alignment *aln,char *filename,int method,int maskable) |
---|
987 | { |
---|
988 | size_t j; |
---|
989 | int kk,mask = -1,k,offset; |
---|
990 | char offset_str[100]; |
---|
991 | int *buf; |
---|
992 | NA_Sequence *seqs; |
---|
993 | FILE *file; |
---|
994 | if(aln == NULL) |
---|
995 | return(1); |
---|
996 | |
---|
997 | if(aln->numelements == 0) |
---|
998 | return(1); |
---|
999 | seqs = aln->element; |
---|
1000 | |
---|
1001 | file = fopen(filename,"w"); |
---|
1002 | if(file == NULL) |
---|
1003 | { |
---|
1004 | Warning("Cannot open file for output"); |
---|
1005 | return(1); |
---|
1006 | } |
---|
1007 | if(maskable && (method != SELECT_REGION)) |
---|
1008 | { |
---|
1009 | for(j=0;j<aln->numelements;j++) |
---|
1010 | if(seqs[j].elementtype == MASK && |
---|
1011 | seqs[j].selected) |
---|
1012 | mask = j; |
---|
1013 | } |
---|
1014 | for(j=0;j<aln->numelements;j++) |
---|
1015 | { |
---|
1016 | SeqNorm(&(seqs[j])); |
---|
1017 | } |
---|
1018 | |
---|
1019 | for(j=0;j<aln->numelements;j++) |
---|
1020 | { |
---|
1021 | if(method != SELECT_REGION) { |
---|
1022 | offset = seqs[j].offset; |
---|
1023 | } |
---|
1024 | else { |
---|
1025 | for(offset=seqs[j].offset; aln->selection_mask[offset] == '0'; offset++) ; |
---|
1026 | } |
---|
1027 | |
---|
1028 | if(offset+aln->rel_offset != 0) { |
---|
1029 | sprintf(offset_str,"(%d)",offset+aln->rel_offset); |
---|
1030 | } |
---|
1031 | else { |
---|
1032 | offset_str[0] = '\0'; |
---|
1033 | } |
---|
1034 | |
---|
1035 | if((((int)j!=mask) && (seqs[j].selected) && method != SELECT_REGION) |
---|
1036 | || (method == SELECT_REGION && seqs[j].subselected) |
---|
1037 | || method == ALL) |
---|
1038 | { |
---|
1039 | fprintf(file,"%c%s%s\n", |
---|
1040 | seqs[j].elementtype == DNA?'#': |
---|
1041 | seqs[j].elementtype == RNA?'#': |
---|
1042 | seqs[j].elementtype == PROTEIN?'%': |
---|
1043 | seqs[j].elementtype == TEXT?'"': |
---|
1044 | seqs[j].elementtype == MASK?'@':'"', |
---|
1045 | seqs[j].short_name, |
---|
1046 | (offset+aln->rel_offset == 0)? "":offset_str); |
---|
1047 | |
---|
1048 | if(seqs[j].cmask != NULL) |
---|
1049 | { |
---|
1050 | |
---|
1051 | buf =(int*) Calloc(seqs[j].seqlen,sizeof(int) ); |
---|
1052 | |
---|
1053 | if(mask == -1) |
---|
1054 | { |
---|
1055 | for(k=0,kk=0;kk<seqs[j].seqlen;kk++) |
---|
1056 | { |
---|
1057 | if(method == SELECT_REGION) |
---|
1058 | { |
---|
1059 | if(aln->selection_mask[kk+offset]=='1') |
---|
1060 | buf[k++] = (getcmask( &(seqs[j]),kk+offset)); |
---|
1061 | } |
---|
1062 | |
---|
1063 | else |
---|
1064 | buf[k++] =( getcmask( &(seqs[j]),kk+offset) ); |
---|
1065 | } |
---|
1066 | } |
---|
1067 | else |
---|
1068 | { |
---|
1069 | for(k=0,kk=0;kk<seqs[j].seqlen;kk++) |
---|
1070 | if(getelem(&(seqs[mask]),kk+offset) == '1') |
---|
1071 | buf[k++] =(getcmask(&(seqs[j]), kk+offset)); |
---|
1072 | /* |
---|
1073 | * Looks like k might be one behind? |
---|
1074 | */ |
---|
1075 | } |
---|
1076 | fprintf(file,"name:%s\noffset:%d\nlength:%d\nstart:\n", |
---|
1077 | seqs[j].short_name,seqs[j].offset,k); |
---|
1078 | |
---|
1079 | for(kk = 0; kk < k; kk++) |
---|
1080 | fprintf(file, "%d\n", buf[kk]); |
---|
1081 | |
---|
1082 | Cfree((char*)buf); |
---|
1083 | } |
---|
1084 | } |
---|
1085 | } |
---|
1086 | fclose(file); |
---|
1087 | return(0); |
---|
1088 | } |
---|