1 | /* -------------- genbank related subroutines ----------------- */ |
---|
2 | |
---|
3 | #include <stdio.h> |
---|
4 | #include <ctype.h> |
---|
5 | #include "convert.h" |
---|
6 | #include "global.h" |
---|
7 | #include <assert.h> |
---|
8 | |
---|
9 | #define NOPERIOD 0 |
---|
10 | #define PERIOD 1 |
---|
11 | |
---|
12 | extern int warning_out; |
---|
13 | /* ------------------------------------------------------------ |
---|
14 | * Function init_genbank(). |
---|
15 | * Initialize genbank entry. |
---|
16 | */ |
---|
17 | void init_genbank() { |
---|
18 | |
---|
19 | /* void Freespace(); */ |
---|
20 | /* char *Dupstr(); */ |
---|
21 | int indi; |
---|
22 | |
---|
23 | /* initialize genbank format */ |
---|
24 | |
---|
25 | Freespace(&(data.gbk.locus)); |
---|
26 | Freespace(&(data.gbk.definition)); |
---|
27 | Freespace(&(data.gbk.accession)); |
---|
28 | Freespace(&(data.gbk.keywords)); |
---|
29 | Freespace(&(data.gbk.source)); |
---|
30 | Freespace(&(data.gbk.organism)); |
---|
31 | for(indi=0; indi<data.gbk.numofref; indi++) { |
---|
32 | Freespace(&(data.gbk.reference[indi].ref)); |
---|
33 | Freespace(&(data.gbk.reference[indi].author)); |
---|
34 | Freespace(&(data.gbk.reference[indi].title)); |
---|
35 | Freespace(&(data.gbk.reference[indi].journal)); |
---|
36 | Freespace(&(data.gbk.reference[indi].standard)); |
---|
37 | } |
---|
38 | Freespace((char**)&(data.gbk.reference)); |
---|
39 | Freespace(&(data.gbk.comments.orginf.source)); |
---|
40 | Freespace(&(data.gbk.comments.orginf.cc)); |
---|
41 | Freespace(&(data.gbk.comments.orginf.formname)); |
---|
42 | Freespace(&(data.gbk.comments.orginf.nickname)); |
---|
43 | Freespace(&(data.gbk.comments.orginf.commname)); |
---|
44 | Freespace(&(data.gbk.comments.orginf.hostorg)); |
---|
45 | Freespace(&(data.gbk.comments.seqinf.RDPid)); |
---|
46 | Freespace(&(data.gbk.comments.seqinf.gbkentry)); |
---|
47 | Freespace(&(data.gbk.comments.seqinf.methods)); |
---|
48 | Freespace(&(data.gbk.comments.others)); |
---|
49 | data.gbk.locus=Dupstr("\n"); |
---|
50 | data.gbk.definition=Dupstr("\n"); |
---|
51 | data.gbk.accession=Dupstr("\n"); |
---|
52 | data.gbk.keywords=Dupstr("\n"); |
---|
53 | data.gbk.source=Dupstr("\n"); |
---|
54 | data.gbk.organism=Dupstr("\n"); |
---|
55 | data.gbk.numofref=0; |
---|
56 | data.gbk.comments.orginf.exist=0; |
---|
57 | data.gbk.comments.orginf.source=Dupstr("\n"); |
---|
58 | data.gbk.comments.orginf.cc=Dupstr("\n"); |
---|
59 | data.gbk.comments.orginf.formname=Dupstr("\n"); |
---|
60 | data.gbk.comments.orginf.nickname=Dupstr("\n"); |
---|
61 | data.gbk.comments.orginf.commname=Dupstr("\n"); |
---|
62 | data.gbk.comments.orginf.hostorg=Dupstr("\n"); |
---|
63 | data.gbk.comments.seqinf.exist=0; |
---|
64 | data.gbk.comments.seqinf.RDPid=Dupstr("\n"); |
---|
65 | data.gbk.comments.seqinf.gbkentry=Dupstr("\n"); |
---|
66 | data.gbk.comments.seqinf.methods=Dupstr("\n"); |
---|
67 | data.gbk.comments.others=NULL; |
---|
68 | data.gbk.comments.seqinf.comp5=' '; |
---|
69 | data.gbk.comments.seqinf.comp3=' '; |
---|
70 | } |
---|
71 | /* ----------------------------------------------------------- |
---|
72 | * Function genbank_in(). |
---|
73 | * Read in one genbank entry. |
---|
74 | */ |
---|
75 | char genbank_in(fp) |
---|
76 | FILE_BUFFER fp; |
---|
77 | { |
---|
78 | char line[LINENUM], key[TOKENNUM] /*, temp[LINENUM]*/; |
---|
79 | const char *eof; |
---|
80 | char eoen; |
---|
81 | /* char *Fgetline(); */ |
---|
82 | /* char *genbank_one_entry_in(); */ |
---|
83 | /* char *genbank_source(), *genbank_reference(); */ |
---|
84 | /* char *genbank_comments(), *genbank_origin(); */ |
---|
85 | /* char *genbank_skip_unidentified(); */ |
---|
86 | /* void genbank_key_word(), warning(), error(); */ |
---|
87 | /* void Append_char(); */ |
---|
88 | /* void genbank_verify_accession(), genbank_verify_keywords(); */ |
---|
89 | /* int Lenstr(); */ |
---|
90 | /* static int count = 0; */ |
---|
91 | |
---|
92 | eoen=' '; |
---|
93 | /* end-of-entry, set to be 'y' after '//' is read */ |
---|
94 | for(eof=Fgetline(line, LINENUM, fp);eof!=NULL&&eoen!='y'; ) { |
---|
95 | |
---|
96 | if(Lenstr(line)<=1) { |
---|
97 | eof=Fgetline(line, LINENUM, fp); |
---|
98 | continue; /* empty line, skip */ |
---|
99 | } |
---|
100 | |
---|
101 | genbank_key_word(line, 0, key, TOKENNUM); |
---|
102 | |
---|
103 | eoen='n'; |
---|
104 | |
---|
105 | if((Cmpstr(key, "LOCUS"))==EQ) { |
---|
106 | |
---|
107 | eof = genbank_one_entry_in(&data.gbk.locus, line, fp); |
---|
108 | if(Lenstr(data.gbk.locus)<61) warning(14, "LOCUS data might be incomplete"); |
---|
109 | |
---|
110 | } else if((Cmpstr(key, "DEFINITION"))==EQ) { |
---|
111 | |
---|
112 | eof = genbank_one_entry_in(&data.gbk.definition, line, fp); |
---|
113 | |
---|
114 | /* correct missing '.' at the end */ |
---|
115 | Append_char(&(data.gbk.definition), '.'); |
---|
116 | |
---|
117 | } else if((Cmpstr(key, "ACCESSION"))==EQ) { |
---|
118 | |
---|
119 | eof = genbank_one_entry_in(&data.gbk.accession, line, fp); |
---|
120 | |
---|
121 | genbank_verify_accession(); |
---|
122 | |
---|
123 | } else if((Cmpstr(key, "KEYWORDS"))==EQ) { |
---|
124 | |
---|
125 | eof = genbank_one_entry_in(&data.gbk.keywords, line, fp); |
---|
126 | genbank_verify_keywords(); |
---|
127 | |
---|
128 | } else if((Cmpstr(key, "SOURCE"))==EQ) { |
---|
129 | eof = genbank_source(line, fp); |
---|
130 | /* correct missing '.' at the end */ |
---|
131 | Append_char(&(data.gbk.source), '.'); |
---|
132 | Append_char(&(data.gbk.organism), '.'); |
---|
133 | } else if((Cmpstr(key, "REFERENCE"))==EQ) { |
---|
134 | eof = genbank_reference(line, fp); |
---|
135 | } else if((Cmpstr(key, "COMMENTS"))==EQ) { |
---|
136 | eof = genbank_comments(line, fp); |
---|
137 | } else if((Cmpstr(key, "COMMENT"))==EQ) { |
---|
138 | eof = genbank_comments(line, fp); |
---|
139 | } else if((Cmpstr(key, "ORIGIN"))==EQ) { |
---|
140 | eof = genbank_origin(line, fp); |
---|
141 | eoen = 'y'; |
---|
142 | } else { /* unidentified key word */ |
---|
143 | eof = genbank_skip_unidentified(line, fp, 2); |
---|
144 | } |
---|
145 | /* except "ORIGIN", at the end of all the other cases, |
---|
146 | * a new line has already read in, so no further read |
---|
147 | * is necessary */ |
---|
148 | } /* for loop to read an entry line by line */ |
---|
149 | |
---|
150 | if(eoen=='n') { |
---|
151 | warning(86, "Reach EOF before sequence data is read."); |
---|
152 | return(EOF); |
---|
153 | } |
---|
154 | if(eof==NULL) return(EOF); |
---|
155 | else return(EOF+1); |
---|
156 | |
---|
157 | } |
---|
158 | /* ------------------------------------------------------------- |
---|
159 | * Function genbank_key_word(). |
---|
160 | * Get the key_word from line beginning at index. |
---|
161 | */ |
---|
162 | void genbank_key_word(line, index, key, length) |
---|
163 | char *line; |
---|
164 | int index; |
---|
165 | char *key; |
---|
166 | int length; |
---|
167 | { |
---|
168 | int indi, indj; |
---|
169 | |
---|
170 | if(line==NULL) { key[0]='\0'; return; } |
---|
171 | |
---|
172 | for(indi=index, indj=0; |
---|
173 | (index-indi)<length&&line[indi]!=' '&&line[indi]!='\t' |
---|
174 | &&line[indi]!='\n'&&line[indi]!='\0'&&indi<12; |
---|
175 | indi++, indj++) |
---|
176 | key[indj] = line[indi]; |
---|
177 | |
---|
178 | key[indj] = '\0'; |
---|
179 | } |
---|
180 | /* ------------------------------------------------------------- |
---|
181 | * Function genbank_comment_subkey_word(). |
---|
182 | * Get the subkey_word in comment lines beginning |
---|
183 | * at index. |
---|
184 | */ |
---|
185 | int genbank_comment_subkey_word(line, index, key, length) |
---|
186 | char *line; |
---|
187 | int index; |
---|
188 | char *key; |
---|
189 | int length; |
---|
190 | { |
---|
191 | int indi, indj; |
---|
192 | |
---|
193 | if(line==NULL) { key[0]='\0'; return(index); } |
---|
194 | |
---|
195 | for(indi = index, indj=0; |
---|
196 | (index-indi)<length && line[indi] != ':' && line[indi]!='\t' && line[indi]!='\n' && line[indi]!='\0' && line[indi]!='('; |
---|
197 | indi++, indj++) |
---|
198 | { |
---|
199 | key[indj] = line[indi]; |
---|
200 | } |
---|
201 | |
---|
202 | if(line[indi]==':') key[indj++] = ':'; |
---|
203 | key[indj] = '\0'; |
---|
204 | |
---|
205 | return(indi+1); |
---|
206 | } |
---|
207 | /* ------------------------------------------------------------ |
---|
208 | * Function genbank_chcek_blanks(). |
---|
209 | * Check if there is (numb) of blanks at beginning |
---|
210 | * of line. |
---|
211 | */ |
---|
212 | int genbank_check_blanks(line, numb) |
---|
213 | char *line; |
---|
214 | int numb; |
---|
215 | { |
---|
216 | int blank=1, indi, indk; |
---|
217 | |
---|
218 | for(indi=0; blank&&indi<numb; indi++) { |
---|
219 | if(line[indi]!=' '&&line[indi]!='\t') blank=0; |
---|
220 | if(line[indi]=='\t') { |
---|
221 | indk=indi/8+1; indi=8*indk+1; |
---|
222 | } |
---|
223 | } |
---|
224 | |
---|
225 | return(blank); |
---|
226 | } |
---|
227 | /* ---------------------------------------------------------------- |
---|
228 | * Function genbank_continue_line(). |
---|
229 | * if there are (numb) of blanks at the beginning |
---|
230 | * of line, it is a continue line of the |
---|
231 | * current command. |
---|
232 | */ |
---|
233 | char *genbank_continue_line(string, line, numb, fp) |
---|
234 | char **string, *line; |
---|
235 | int numb; /* number of blanks needed to define a continue line */ |
---|
236 | FILE_BUFFER fp; |
---|
237 | { |
---|
238 | int ind; |
---|
239 | /* int Lenstr(); */ |
---|
240 | /* int genbank_check_blanks(), Skip_white_space(); */ |
---|
241 | char *eof, temp[LINENUM]; |
---|
242 | /* char *Fgetline(); */ |
---|
243 | /* void Cpystr(), Append_rp_eoln(); */ |
---|
244 | |
---|
245 | /* check continue lines */ |
---|
246 | for(eof=Fgetline(line, LINENUM, fp); |
---|
247 | eof!=NULL&&(genbank_check_blanks(line, numb) |
---|
248 | ||line[0]=='\n'); eof=Fgetline(line, LINENUM, fp)) { |
---|
249 | |
---|
250 | if(line[0]=='\n') continue; /* empty line is allowed */ |
---|
251 | /* remove end-of-line, if there is any */ |
---|
252 | ind=Skip_white_space(line, 0); |
---|
253 | Cpystr(temp, (line+ind)); |
---|
254 | Append_rp_eoln(string, temp); |
---|
255 | |
---|
256 | } /* end of continue line checking */ |
---|
257 | |
---|
258 | return(eof); |
---|
259 | } |
---|
260 | /* ------------------------------------------------------------ |
---|
261 | * Function genbank_one_entry_in(). |
---|
262 | * Read in genbank one entry lines. |
---|
263 | */ |
---|
264 | char *genbank_one_entry_in(datastring, line, fp) |
---|
265 | char **datastring, *line; |
---|
266 | FILE_BUFFER fp; |
---|
267 | { |
---|
268 | int index; |
---|
269 | /* int Skip_white_space(), Lenstr(); */ |
---|
270 | char *eof; |
---|
271 | /* char *genbank_continue_line(), *Dupstr(); */ |
---|
272 | /* void error(), Freespace(); */ |
---|
273 | |
---|
274 | index = Skip_white_space(line, 12); |
---|
275 | Freespace(datastring); |
---|
276 | *datastring = Dupstr(line+index); |
---|
277 | eof = (char*)genbank_continue_line(datastring, line, 12, fp); |
---|
278 | |
---|
279 | return(eof); |
---|
280 | } |
---|
281 | /* ------------------------------------------------------------ |
---|
282 | * Function genbank_one_comment_entry(). |
---|
283 | * Read in one genbank sub-entry in comments lines. |
---|
284 | */ |
---|
285 | char *genbank_one_comment_entry(datastring, line, start_index, fp) |
---|
286 | char **datastring, *line; |
---|
287 | int start_index; |
---|
288 | FILE_BUFFER fp; |
---|
289 | { |
---|
290 | int index; |
---|
291 | /* int Skip_white_space(), Lenstr(); */ |
---|
292 | char *eof; |
---|
293 | /* char *genbank_continue_line(), *Dupstr(); */ |
---|
294 | /* void error(), Freespace(); */ |
---|
295 | |
---|
296 | index = Skip_white_space(line, start_index); |
---|
297 | Freespace(datastring); |
---|
298 | *datastring = Dupstr(line+index); |
---|
299 | eof = (char*)genbank_continue_line (datastring, line, 20, fp); |
---|
300 | return(eof); |
---|
301 | } |
---|
302 | /* -------------------------------------------------------------- |
---|
303 | * Function genbank_source() |
---|
304 | * Read in genbank SOURCE lines and also ORGANISM |
---|
305 | * lines. |
---|
306 | */ |
---|
307 | char *genbank_source(line, fp) |
---|
308 | char *line; |
---|
309 | FILE_BUFFER fp; |
---|
310 | { |
---|
311 | int index; |
---|
312 | /* int Skip_white_space(); */ |
---|
313 | char *eof; |
---|
314 | /* char *genbank_continue_line(), *Dupstr(); */ |
---|
315 | /* char *genbank_one_entry_in(); */ |
---|
316 | char *dummy, key[TOKENNUM]; |
---|
317 | /* void Freespace(), genbank_key_word(); */ |
---|
318 | |
---|
319 | eof = genbank_one_entry_in(&(data.gbk.source), line, fp); |
---|
320 | genbank_key_word(line, 2, key, TOKENNUM); |
---|
321 | if(Cmpstr(key, "ORGANISM")==EQ) { |
---|
322 | index = Skip_white_space(line, 12); |
---|
323 | data.gbk.organism = Dupstr(line+index); |
---|
324 | dummy = (char*)Dupstr("\n"); |
---|
325 | eof = (char*)genbank_continue_line(&(dummy), line, 12, fp); |
---|
326 | Freespace(&dummy); |
---|
327 | } |
---|
328 | return(eof); |
---|
329 | } |
---|
330 | /* -------------------------------------------------------------- |
---|
331 | * Function genbank_reference(). |
---|
332 | * Read in genbank REFERENCE lines. |
---|
333 | */ |
---|
334 | char *genbank_reference(line, fp) |
---|
335 | char *line; |
---|
336 | FILE_BUFFER fp; |
---|
337 | { |
---|
338 | #define AUTH 0 |
---|
339 | #define TIT 1 |
---|
340 | #define JOUR 2 |
---|
341 | /* void genbank_key_word(), error(), warning(); */ |
---|
342 | /* void Freespace(), init_reference(); */ |
---|
343 | /* void Append_char(); */ |
---|
344 | char *eof, key[TOKENNUM]; |
---|
345 | /* char *Dupstr(), *genbank_skip_unidentified(); */ |
---|
346 | char temp[LINENUM]; |
---|
347 | /* char *Reallocspace(), *genbank_one_entry_in(); */ |
---|
348 | int /*index, indi,*/ refnum; |
---|
349 | /* int Cmpstr(), Skip_white_space(); */ |
---|
350 | int acount=0, tcount=0, jcount=0, scount=0; |
---|
351 | |
---|
352 | sscanf(line+12, "%d", &refnum); |
---|
353 | if(refnum <= data.gbk.numofref) { |
---|
354 | sprintf(temp, |
---|
355 | "Might redefine reference %d", refnum); |
---|
356 | warning(17, temp); |
---|
357 | eof = genbank_skip_unidentified(line, fp, 12); |
---|
358 | } else { |
---|
359 | data.gbk.numofref = refnum; |
---|
360 | data.gbk.reference = (Reference*)Reallocspace ((char*)data.gbk.reference, (unsigned) (sizeof(Reference)*(data.gbk.numofref))); |
---|
361 | /* initialize the buffer */ |
---|
362 | init_reference(&(data.gbk.reference[refnum-1]), ALL); |
---|
363 | eof = genbank_one_entry_in (&(data.gbk.reference[refnum-1].ref),line, fp); |
---|
364 | } |
---|
365 | /* find the reference listings */ |
---|
366 | for( ;eof!=NULL&&line[0]==' '&&line[1]==' '; ) |
---|
367 | { |
---|
368 | /* find the key word */ |
---|
369 | genbank_key_word(line, 2, key, TOKENNUM); |
---|
370 | /* skip white space */ |
---|
371 | if((Cmpstr(key, "AUTHORS"))==EQ) { |
---|
372 | eof = genbank_one_entry_in( |
---|
373 | &(data.gbk.reference[refnum-1].author), |
---|
374 | line, fp); |
---|
375 | |
---|
376 | /* add '.' if missing at the end */ |
---|
377 | Append_char |
---|
378 | (&(data.gbk.reference[refnum-1].author),'.'); |
---|
379 | |
---|
380 | if(acount==0) acount=1; |
---|
381 | else { |
---|
382 | |
---|
383 | sprintf(temp, "AUTHORS of REFERENCE %d is redefined" |
---|
384 | , refnum); |
---|
385 | warning(10, temp); |
---|
386 | } |
---|
387 | |
---|
388 | } else if((Cmpstr(key, "TITLE"))==EQ) { |
---|
389 | eof = genbank_one_entry_in( |
---|
390 | &(data.gbk.reference[refnum-1].title), |
---|
391 | line, fp); |
---|
392 | if(tcount==0) tcount=1; |
---|
393 | else { |
---|
394 | |
---|
395 | sprintf(temp, "TITLE of REFERENCE %d is redefined" |
---|
396 | , refnum); |
---|
397 | |
---|
398 | warning(11, temp); |
---|
399 | } |
---|
400 | } else if((Cmpstr(key, "JOURNAL"))==EQ) { |
---|
401 | |
---|
402 | eof = genbank_one_entry_in( |
---|
403 | &(data.gbk.reference[refnum-1].journal), |
---|
404 | line, fp); |
---|
405 | |
---|
406 | if(jcount==0) jcount=1; |
---|
407 | else { |
---|
408 | |
---|
409 | sprintf(temp, |
---|
410 | "JOURNAL of REFERENCE %d is redefined", refnum); |
---|
411 | |
---|
412 | warning(12, temp); |
---|
413 | } |
---|
414 | } else if((Cmpstr(key, "STANDARD"))==EQ) { |
---|
415 | |
---|
416 | eof = genbank_one_entry_in( |
---|
417 | &(data.gbk.reference[refnum-1].standard), |
---|
418 | line, fp); |
---|
419 | |
---|
420 | if(scount==0) scount=1; |
---|
421 | else { |
---|
422 | |
---|
423 | sprintf(temp, |
---|
424 | "STANDARD of REFERENCE %d is redefined", refnum); |
---|
425 | |
---|
426 | warning(13, temp); |
---|
427 | } |
---|
428 | } else { |
---|
429 | |
---|
430 | sprintf(temp, |
---|
431 | "Unidentified REFERENCE subkeyword: %s#", key); |
---|
432 | |
---|
433 | warning(18,temp); |
---|
434 | eof = genbank_skip_unidentified(line, fp, 12); |
---|
435 | } |
---|
436 | } /* for loop */ |
---|
437 | return(eof); |
---|
438 | } |
---|
439 | /* -------------------------------------------------------------- |
---|
440 | * Function genbank_comments(). |
---|
441 | * Read in genbank COMMENTS lines. |
---|
442 | */ |
---|
443 | const char *genbank_comments(line, fp) |
---|
444 | char *line; |
---|
445 | FILE_BUFFER fp; |
---|
446 | { |
---|
447 | int index, indi, ptr/*, genbank_check_blanks()*/; |
---|
448 | /* int Lenstr(), Skip_white_space(); */ |
---|
449 | /* int Cmpstr(), genbank_comment_subkey_word(); */ |
---|
450 | const char *eof; |
---|
451 | /* char *Fgetline(), *Dupstr(); */ |
---|
452 | char key[TOKENNUM]; |
---|
453 | /* char *genbank_one_comment_entry(); */ |
---|
454 | /* void Freespace(), Append(); */ |
---|
455 | |
---|
456 | if(Lenstr(line)<=12) { |
---|
457 | if((eof = Fgetline(line, LINENUM, fp))==NULL) |
---|
458 | return(eof); |
---|
459 | } |
---|
460 | /* make up data to match the logic reasoning for next statment */ |
---|
461 | for(indi=0; indi<12; line[indi++]=' ') ; eof = "NONNULL"; |
---|
462 | |
---|
463 | for( ;eof!=NULL&&(genbank_check_blanks(line, 12) ||line[0]=='\n'); ) { |
---|
464 | if(line[0]=='\n') { /* skip empty line */ |
---|
465 | eof=Fgetline(line, LINENUM, fp); |
---|
466 | continue; |
---|
467 | } |
---|
468 | |
---|
469 | ptr = index = 12; |
---|
470 | |
---|
471 | index = Skip_white_space(line, index); |
---|
472 | #if defined(DEBUG) |
---|
473 | if (index >= TOKENNUM) { |
---|
474 | printf("big index %i after Skip_white_space\n", index); |
---|
475 | } |
---|
476 | #endif /* DEBUG */ |
---|
477 | index = genbank_comment_subkey_word (line, index, key, TOKENNUM); |
---|
478 | #if defined(DEBUG) |
---|
479 | if (index >= TOKENNUM) { |
---|
480 | printf("big index %i after genbank_comment_subkey_word\n", index); |
---|
481 | } |
---|
482 | #endif /* DEBUG */ |
---|
483 | |
---|
484 | if(Cmpstr(key, "Source of strain:")==EQ) { |
---|
485 | eof = genbank_one_comment_entry (&(data.gbk.comments.orginf.source), line, index, fp); |
---|
486 | |
---|
487 | } else if(Cmpstr(key, "Culture collection:")==EQ) { |
---|
488 | |
---|
489 | eof = genbank_one_comment_entry (&(data.gbk.comments.orginf.cc), line, index, fp); |
---|
490 | |
---|
491 | } else if(Cmpstr(key, "Former name:")==EQ) { |
---|
492 | |
---|
493 | eof = genbank_one_comment_entry (&(data.gbk.comments.orginf.formname), line, index, fp); |
---|
494 | |
---|
495 | } else if(Cmpstr(key, "Alternate name:")==EQ) { |
---|
496 | |
---|
497 | eof = genbank_one_comment_entry (&(data.gbk.comments.orginf.nickname), line, index, fp); |
---|
498 | |
---|
499 | } else if(Cmpstr(key, "Common name:")==EQ) { |
---|
500 | |
---|
501 | eof = genbank_one_comment_entry (&(data.gbk.comments.orginf.commname), line, index, fp); |
---|
502 | |
---|
503 | } else if(Cmpstr(key, "Host organism:")==EQ) { |
---|
504 | |
---|
505 | eof = genbank_one_comment_entry (&(data.gbk.comments.orginf.hostorg), line, index, fp); |
---|
506 | |
---|
507 | } else if(Cmpstr(key, "RDP ID:")==EQ) { |
---|
508 | eof = genbank_one_comment_entry (&(data.gbk.comments.seqinf.RDPid), line, index, fp); |
---|
509 | |
---|
510 | } else if(Cmpstr(key, |
---|
511 | "Corresponding GenBank entry:")==EQ) { |
---|
512 | |
---|
513 | eof = genbank_one_comment_entry (&(data.gbk.comments.seqinf.gbkentry), line, index, fp); |
---|
514 | |
---|
515 | } else if(Cmpstr(key, "Sequencing methods:")==EQ) { |
---|
516 | |
---|
517 | eof = genbank_one_comment_entry (&(data.gbk.comments.seqinf.methods), line, index, fp); |
---|
518 | |
---|
519 | } else if(Cmpstr(key, "5' end complete:")==EQ) { |
---|
520 | sscanf(line+index, "%s", key); |
---|
521 | if(key[0]=='Y') data.gbk.comments.seqinf.comp5 = 'y'; |
---|
522 | else data.gbk.comments.seqinf.comp5 = 'n'; |
---|
523 | eof=Fgetline(line, LINENUM, fp); |
---|
524 | } else if(Cmpstr(key, "3' end complete:")==EQ) { |
---|
525 | sscanf(line+index, "%s", key); |
---|
526 | if(key[0]=='Y') data.gbk.comments.seqinf.comp3 = 'y'; |
---|
527 | else data.gbk.comments.seqinf.comp3 = 'n'; |
---|
528 | eof=Fgetline(line, LINENUM, fp); |
---|
529 | } else if(Cmpstr(key, |
---|
530 | "Sequence information ")==EQ) { |
---|
531 | /* do nothing */ |
---|
532 | data.gbk.comments.seqinf.exist = 1; |
---|
533 | eof=Fgetline(line, LINENUM, fp); |
---|
534 | } else if(Cmpstr(key, "Organism information")==EQ) { |
---|
535 | /* do nothing */ |
---|
536 | data.gbk.comments.orginf.exist = 1; |
---|
537 | eof=Fgetline(line, LINENUM, fp); |
---|
538 | } else { /* other comments */ |
---|
539 | |
---|
540 | assert(ptr == 12); |
---|
541 | if(data.gbk.comments.others == NULL) { |
---|
542 | data.gbk.comments.others =(char*)Dupstr(line+ptr); |
---|
543 | } |
---|
544 | else { |
---|
545 | Append(&(data.gbk.comments.others), line+ptr); |
---|
546 | } |
---|
547 | |
---|
548 | eof=Fgetline(line, LINENUM, fp); |
---|
549 | } |
---|
550 | } /* for loop */ |
---|
551 | |
---|
552 | return(eof); |
---|
553 | } |
---|
554 | /* -------------------------------------------------------------- |
---|
555 | * Function genbank_origin(). |
---|
556 | * Read in genbank sequence data. |
---|
557 | */ |
---|
558 | char |
---|
559 | *genbank_origin(line, fp) |
---|
560 | char *line; |
---|
561 | FILE_BUFFER fp; |
---|
562 | { |
---|
563 | char *eof; |
---|
564 | int index; |
---|
565 | /* void warning(); */ |
---|
566 | |
---|
567 | data.seq_length = 0; |
---|
568 | data.sequence[data.seq_length] = '\0'; // needed if sequence data is empty |
---|
569 | |
---|
570 | /* read in whole sequence data */ |
---|
571 | for(eof=Fgetline(line, LINENUM, fp); |
---|
572 | eof != NULL&&line[0]!='/'&&line[1]!='/'; |
---|
573 | eof = Fgetline(line, LINENUM, fp)) |
---|
574 | { |
---|
575 | /* empty line, skip */ |
---|
576 | if(Lenstr(line)<=1) continue; |
---|
577 | for(index=9; line[index]!='\n'&&line[index]!='\0'; |
---|
578 | index++) |
---|
579 | { |
---|
580 | if(line[index]!=' ' && data.seq_length>=data.max) { |
---|
581 | data.max += 100; |
---|
582 | |
---|
583 | data.sequence = (char*)Reallocspace(data.sequence, |
---|
584 | (unsigned)(sizeof(char)*data.max)); |
---|
585 | } |
---|
586 | if(line[index]!=' ') data.sequence[data.seq_length++] = line[index]; |
---|
587 | } |
---|
588 | if(data.seq_length>=data.max) { |
---|
589 | data.max += 100; |
---|
590 | |
---|
591 | data.sequence = (char*)Reallocspace(data.sequence, |
---|
592 | (unsigned)(sizeof(char)*data.max)); |
---|
593 | } |
---|
594 | data.sequence[data.seq_length] = '\0'; |
---|
595 | } |
---|
596 | |
---|
597 | return(eof); |
---|
598 | } |
---|
599 | /* --------------------------------------------------------------- |
---|
600 | * Function genbank_skip_unidentified(). |
---|
601 | * Skip the lines of unidentified keyword. |
---|
602 | */ |
---|
603 | char |
---|
604 | *genbank_skip_unidentified(line, fp, blank_num) |
---|
605 | char *line; |
---|
606 | FILE_BUFFER fp; |
---|
607 | int blank_num; |
---|
608 | { |
---|
609 | char *eof; |
---|
610 | /* int genbank_check_blanks(); */ |
---|
611 | |
---|
612 | for(eof=Fgetline(line, LINENUM, fp); |
---|
613 | eof!=NULL&&genbank_check_blanks(line, blank_num); |
---|
614 | eof=Fgetline(line, LINENUM, fp)) ; |
---|
615 | |
---|
616 | return(eof); |
---|
617 | } |
---|
618 | /* --------------------------------------------------------------- |
---|
619 | * Function genbank_verify_accession(). |
---|
620 | * Verify accession information. |
---|
621 | */ |
---|
622 | void |
---|
623 | genbank_verify_accession() |
---|
624 | { |
---|
625 | int indi, /*index,*/ len, count, remainder; |
---|
626 | char temp[LONGTEXT]; |
---|
627 | /* char *Reallocspace(); */ |
---|
628 | /* void warning(); */ |
---|
629 | |
---|
630 | if(Cmpstr(data.gbk.accession, "No information\n")==EQ) return; |
---|
631 | len=Lenstr(data.gbk.accession); |
---|
632 | if((len % 7)!=0) { |
---|
633 | if(warning_out) |
---|
634 | fprintf(stderr, |
---|
635 | "\nACCESSION: %s", data.gbk.accession); |
---|
636 | warning(136, |
---|
637 | "Each accession number should be a six-character identifier."); |
---|
638 | } |
---|
639 | for(indi=count=0; indi<len-1; indi++) { |
---|
640 | remainder=indi % 7; |
---|
641 | switch(remainder) { |
---|
642 | case 0: |
---|
643 | count++; |
---|
644 | if(count>9){ |
---|
645 | if(warning_out) fprintf(stderr, |
---|
646 | "\nACCESSION: %s", data.gbk.accession); |
---|
647 | warning(137, |
---|
648 | "No more than 9 accession numbers are allowed in ACCESSION line."); |
---|
649 | data.gbk.accession[indi-1]='\n'; |
---|
650 | data.gbk.accession[indi]='\0'; |
---|
651 | data.gbk.accession = (char*)Reallocspace |
---|
652 | (data.gbk.accession, |
---|
653 | (unsigned)(sizeof(char)*indi)); |
---|
654 | return; |
---|
655 | } |
---|
656 | if(!isalpha(data.gbk.accession[indi])) { |
---|
657 | sprintf(temp, |
---|
658 | "The %d(th) accession number must start with a letter.", |
---|
659 | count); |
---|
660 | warning(138, temp); |
---|
661 | } |
---|
662 | break; |
---|
663 | case 1: |
---|
664 | case 2: |
---|
665 | case 3: |
---|
666 | case 4: |
---|
667 | case 5: |
---|
668 | if(!isdigit(data.gbk.accession[indi])) { |
---|
669 | sprintf(temp, |
---|
670 | "The last 5 characters of the %d(th) accession number should be all digits.", |
---|
671 | count); |
---|
672 | warning(140, temp); |
---|
673 | } |
---|
674 | break; |
---|
675 | case 6: |
---|
676 | if((indi!=(len-1)&&data.gbk.accession[indi]!=' ') |
---|
677 | ||(indi==(len-1)&&data.gbk.accession[indi]!='\n')) |
---|
678 | { |
---|
679 | if(warning_out) fprintf(stderr, |
---|
680 | "\nACCESSION: %s", data.gbk.accession); |
---|
681 | warning(139, |
---|
682 | "Accession numbers should be separated by a space."); |
---|
683 | data.gbk.accession[indi]=' '; |
---|
684 | } |
---|
685 | break; |
---|
686 | default: ; |
---|
687 | } |
---|
688 | } /* check every char of ACCESSION line. */ |
---|
689 | } |
---|
690 | /* ------------------------------------------------------------------ |
---|
691 | * Function genbank_verify_keywords(). |
---|
692 | * Verify keywords. |
---|
693 | */ |
---|
694 | void |
---|
695 | genbank_verify_keywords() { |
---|
696 | |
---|
697 | int indi, count, len; |
---|
698 | /* void Append_char(), warning(); */ |
---|
699 | |
---|
700 | /* correct missing '.' at the end */ |
---|
701 | Append_char(&(data.gbk.keywords), '.'); |
---|
702 | |
---|
703 | for(indi=count=0, len=Lenstr(data.gbk.keywords); indi<len; indi++) |
---|
704 | if(data.gbk.keywords[indi]=='.') count++; |
---|
705 | |
---|
706 | if(count!=1) { |
---|
707 | if(warning_out) fprintf(stderr, |
---|
708 | "\nKEYWORDS: %s", data.gbk.keywords); |
---|
709 | warning(141, |
---|
710 | "No more than one period is allowed in KEYWORDS line."); |
---|
711 | } |
---|
712 | } |
---|
713 | /* --------------------------------------------------------------- |
---|
714 | * Function genbank_in_locus(). |
---|
715 | * Read in next genbank locus and sequence only. |
---|
716 | * For use of converting to simple format(read in only simple |
---|
717 | * information instead of whole records). |
---|
718 | */ |
---|
719 | char |
---|
720 | genbank_in_locus(fp) |
---|
721 | FILE_BUFFER fp; |
---|
722 | { |
---|
723 | char line[LINENUM], key[TOKENNUM]; |
---|
724 | char *eof, eoen; |
---|
725 | /* char *genbank_one_entry_in(), *genbank_origin(); */ |
---|
726 | /* void genbank_key_word(), warning(), error(); */ |
---|
727 | |
---|
728 | eoen=' '; /* end-of-entry, set to be 'y' after '//' is read */ |
---|
729 | for(eof=Fgetline(line, LINENUM, fp); eof!=NULL&&eoen!='y'; ) { |
---|
730 | genbank_key_word(line, 0, key, TOKENNUM); |
---|
731 | if((Cmpstr(key, "ORIGIN"))==EQ) { |
---|
732 | eof = genbank_origin(line, fp); |
---|
733 | eoen = 'y'; |
---|
734 | } else if((Cmpstr(key, "LOCUS"))==EQ) { |
---|
735 | eof = genbank_one_entry_in(&data.gbk.locus, |
---|
736 | line, fp); |
---|
737 | } else eof=Fgetline(line, LINENUM, fp); |
---|
738 | } /* for loop to read an entry line by line */ |
---|
739 | |
---|
740 | if(eoen=='n') |
---|
741 | error(9, "Reach EOF before one entry is read, Exit"); |
---|
742 | |
---|
743 | if(eof==NULL) return(EOF); |
---|
744 | else return(EOF+1); |
---|
745 | |
---|
746 | } |
---|
747 | /* --------------------------------------------------------------- |
---|
748 | * Function genbank_out(). |
---|
749 | * Output in a genbank format. |
---|
750 | */ |
---|
751 | void |
---|
752 | genbank_out(fp) |
---|
753 | FILE *fp; |
---|
754 | { |
---|
755 | /* void genbank_print_lines(), genbank_out_origin(); */ |
---|
756 | /* void genbank_print_comment(), count_base(); */ |
---|
757 | /* char temp[LONGTEXT]; */ |
---|
758 | /* void genbank_out_one_entry(), genbank_out_one_comment(); */ |
---|
759 | /* int Lenstr(), deterninator(); */ |
---|
760 | int indi, /*indj, indk,*/ length; |
---|
761 | int base_a, base_t, base_g, base_c, base_other; |
---|
762 | |
---|
763 | /* Assume the last char of each field is '\n' */ |
---|
764 | genbank_out_one_entry(fp, data.gbk.locus, "LOCUS ", SEPNODEFINED, "", NOPERIOD); |
---|
765 | genbank_out_one_entry(fp, data.gbk.definition, "DEFINITION ", SEPNODEFINED, "", PERIOD); |
---|
766 | genbank_out_one_entry(fp, data.gbk.accession, "ACCESSION ", SEPNODEFINED, "", NOPERIOD); |
---|
767 | genbank_out_one_entry(fp, data.gbk.keywords, "KEYWORDS ", SEPDEFINED, ";", PERIOD); |
---|
768 | |
---|
769 | if(Lenstr(data.gbk.source)>1) { |
---|
770 | fprintf(fp, "SOURCE "); |
---|
771 | genbank_print_lines(fp, data.gbk.source, SEPNODEFINED, ""); |
---|
772 | if(Lenstr(data.gbk.organism)>1) { |
---|
773 | fprintf(fp, " ORGANISM "); |
---|
774 | genbank_print_lines(fp, data.gbk.organism, SEPNODEFINED, ""); |
---|
775 | } else fprintf(fp, " ORGANISM No information.\n"); |
---|
776 | } else if(Lenstr(data.gbk.organism)>1) { |
---|
777 | |
---|
778 | fprintf(fp, "SOURCE No information.\n ORGANISM "); |
---|
779 | genbank_print_lines(fp, data.gbk.organism, |
---|
780 | SEPNODEFINED, ""); |
---|
781 | } else fprintf(fp, |
---|
782 | "SOURCE No information.\n ORGANISM No information.\n"); |
---|
783 | |
---|
784 | if(data.gbk.numofref>0) { |
---|
785 | for(indi=0; indi<data.gbk.numofref; indi++) { |
---|
786 | |
---|
787 | if(Lenstr(data.gbk.reference[indi].ref)>1) { |
---|
788 | fprintf(fp, "REFERENCE "); |
---|
789 | genbank_print_lines(fp, |
---|
790 | data.gbk.reference[indi].ref, |
---|
791 | SEPNODEFINED, ""); |
---|
792 | } else fprintf(fp, |
---|
793 | "REFERENCE %d\n", indi+1); |
---|
794 | |
---|
795 | genbank_out_one_entry(fp, |
---|
796 | data.gbk.reference[indi].author, |
---|
797 | " AUTHORS ", SEPDEFINED, " ", |
---|
798 | NOPERIOD); |
---|
799 | |
---|
800 | if(Lenstr(data.gbk.reference[indi].title)>1) |
---|
801 | { |
---|
802 | fprintf(fp, " TITLE "); |
---|
803 | genbank_print_lines(fp, |
---|
804 | data.gbk.reference[indi].title, |
---|
805 | SEPNODEFINED, ""); |
---|
806 | } |
---|
807 | |
---|
808 | genbank_out_one_entry(fp, |
---|
809 | data.gbk.reference[indi].journal, |
---|
810 | " JOURNAL ", SEPNODEFINED, "", |
---|
811 | NOPERIOD); |
---|
812 | |
---|
813 | genbank_out_one_entry(fp, |
---|
814 | data.gbk.reference[indi].standard, |
---|
815 | " STANDARD ", SEPNODEFINED, "", |
---|
816 | NOPERIOD); |
---|
817 | |
---|
818 | } /* subkey loop */ |
---|
819 | } else { |
---|
820 | fprintf(fp, "REFERENCE 1\n"); |
---|
821 | fprintf(fp, " AUTHORS No information\n"); |
---|
822 | fprintf(fp, " JOURNAL No information\n"); |
---|
823 | fprintf(fp, " TITLE No information\n"); |
---|
824 | fprintf(fp, " STANDARD No information\n"); |
---|
825 | } |
---|
826 | |
---|
827 | if(data.gbk.comments.orginf.exist==1|| |
---|
828 | data.gbk.comments.seqinf.exist == 1 || |
---|
829 | Lenstr(data.gbk.comments.others)>0) |
---|
830 | { |
---|
831 | fprintf(fp, "COMMENTS "); |
---|
832 | |
---|
833 | if(data.gbk.comments.orginf.exist==1) { |
---|
834 | fprintf(fp, "Organism information\n"); |
---|
835 | |
---|
836 | genbank_out_one_comment(fp, data.gbk.comments.orginf.source, |
---|
837 | "Source of strain: ", |
---|
838 | COMMSKINDENT, COMMCNINDENT); |
---|
839 | |
---|
840 | genbank_out_one_comment(fp, |
---|
841 | data.gbk.comments.orginf.cc, |
---|
842 | "Culture collection: ", |
---|
843 | COMMSKINDENT, COMMCNINDENT); |
---|
844 | |
---|
845 | genbank_out_one_comment(fp, |
---|
846 | data.gbk.comments.orginf.formname, |
---|
847 | "Former name: ", |
---|
848 | COMMSKINDENT, COMMCNINDENT); |
---|
849 | |
---|
850 | genbank_out_one_comment(fp, |
---|
851 | data.gbk.comments.orginf.nickname, |
---|
852 | "Alternate name: ", |
---|
853 | COMMSKINDENT, COMMCNINDENT); |
---|
854 | |
---|
855 | genbank_out_one_comment(fp, |
---|
856 | data.gbk.comments.orginf.commname, |
---|
857 | "Common name: ", |
---|
858 | COMMSKINDENT, COMMCNINDENT); |
---|
859 | |
---|
860 | genbank_out_one_comment(fp, |
---|
861 | data.gbk.comments.orginf.hostorg, |
---|
862 | "Host organism: ", |
---|
863 | COMMSKINDENT, COMMCNINDENT); |
---|
864 | |
---|
865 | if(data.gbk.comments.seqinf.exist == 1 || |
---|
866 | Lenstr(data.gbk.comments.others)>0) |
---|
867 | fprintf(fp, " "); |
---|
868 | } /* organism information */ |
---|
869 | |
---|
870 | if(data.gbk.comments.seqinf.exist==1) { |
---|
871 | |
---|
872 | fprintf(fp, |
---|
873 | "Sequence information (bases 1 to %d)\n", |
---|
874 | data.seq_length); |
---|
875 | } |
---|
876 | |
---|
877 | genbank_out_one_comment(fp, |
---|
878 | data.gbk.comments.seqinf.RDPid, |
---|
879 | "RDP ID: ", |
---|
880 | COMMSKINDENT, COMMCNINDENT); |
---|
881 | |
---|
882 | genbank_out_one_comment(fp, |
---|
883 | data.gbk.comments.seqinf.gbkentry, |
---|
884 | "Corresponding GenBank entry: ", |
---|
885 | COMMSKINDENT, COMMCNINDENT); |
---|
886 | |
---|
887 | genbank_out_one_comment(fp, |
---|
888 | data.gbk.comments.seqinf.methods, |
---|
889 | "Sequencing methods: ", |
---|
890 | COMMSKINDENT, COMMCNINDENT); |
---|
891 | |
---|
892 | if(data.gbk.comments.seqinf.comp5=='n') |
---|
893 | fprintf(fp, |
---|
894 | " 5' end complete: No\n"); |
---|
895 | |
---|
896 | else if(data.gbk.comments.seqinf.comp5=='y') |
---|
897 | fprintf(fp, |
---|
898 | " 5' end complete: Yes\n"); |
---|
899 | |
---|
900 | if(data.gbk.comments.seqinf.comp3=='n') |
---|
901 | fprintf(fp, |
---|
902 | " 3' end complete: No\n"); |
---|
903 | |
---|
904 | else if(data.gbk.comments.seqinf.comp3=='y') |
---|
905 | fprintf(fp, |
---|
906 | " 3' end complete: Yes\n"); |
---|
907 | |
---|
908 | /* print 12 spaces of the first line */ |
---|
909 | if(Lenstr(data.gbk.comments.others)>0) |
---|
910 | fprintf(fp, " "); |
---|
911 | |
---|
912 | if(Lenstr(data.gbk.comments.others)>0) { |
---|
913 | length = Lenstr(data.gbk.comments.others); |
---|
914 | for(indi=0; indi<length; indi++) |
---|
915 | { |
---|
916 | fprintf(fp, "%c", |
---|
917 | data.gbk.comments.others[indi]); |
---|
918 | |
---|
919 | /* if another line, print 12 spaces first */ |
---|
920 | if(data.gbk.comments.others[indi]=='\n' |
---|
921 | &&data.gbk.comments.others[indi+1]!='\0') |
---|
922 | |
---|
923 | fprintf(fp, " "); |
---|
924 | |
---|
925 | } |
---|
926 | } /* other comments */ |
---|
927 | } /* comment */ |
---|
928 | |
---|
929 | count_base(&base_a, &base_t, &base_g, &base_c, &base_other); |
---|
930 | |
---|
931 | /* don't write 0 others in this base line */ |
---|
932 | if(base_other>0) |
---|
933 | fprintf(fp, |
---|
934 | "BASE COUNT %6d a %6d c %6d g %6d t %6d others\n", |
---|
935 | base_a, base_c, base_g, base_t, base_other); |
---|
936 | else fprintf(fp, "BASE COUNT %6d a %6d c %6d g %6d t\n", |
---|
937 | base_a, base_c, base_g, base_t); |
---|
938 | |
---|
939 | genbank_out_origin(fp); |
---|
940 | } |
---|
941 | /* ------------------------------------------------------------ |
---|
942 | * Function genbank_out_one_entry(). |
---|
943 | * Print out key and string if string length > 1 |
---|
944 | * otherwise print key and "No information" w/wo |
---|
945 | * period at the end depending on flag period. |
---|
946 | */ |
---|
947 | void genbank_out_one_entry(fp, string, key, flag, patterns, period) |
---|
948 | FILE *fp; |
---|
949 | char *string; |
---|
950 | const char *key; |
---|
951 | int flag; |
---|
952 | const char *patterns; |
---|
953 | int period; |
---|
954 | { |
---|
955 | /* int Lenstr(); */ |
---|
956 | /* void genbank_print_lines(); */ |
---|
957 | |
---|
958 | if(Lenstr(string)>1) { |
---|
959 | fprintf(fp, "%s", key); |
---|
960 | genbank_print_lines(fp, string, flag, patterns); |
---|
961 | } else if(period) |
---|
962 | fprintf(fp, "%sNo information.\n", key); |
---|
963 | else fprintf(fp, "%sNo information\n", key); |
---|
964 | } |
---|
965 | /* ------------------------------------------------------------- |
---|
966 | * Function genbank_out_one_comment(). |
---|
967 | * print out one genbank comment sub-keyword. |
---|
968 | */ |
---|
969 | void genbank_out_one_comment(fp, string, key, skindent, cnindent) |
---|
970 | FILE *fp; |
---|
971 | char *string; |
---|
972 | const char *key; |
---|
973 | int skindent, cnindent; /* subkeyword indent and continue line indent */ |
---|
974 | { |
---|
975 | /* int Lenstr(); */ |
---|
976 | /* void genbank_print_comment(); */ |
---|
977 | |
---|
978 | if(Lenstr(string)>1) |
---|
979 | genbank_print_comment(fp, key, string, skindent, cnindent); |
---|
980 | } |
---|
981 | /* -------------------------------------------------------------- |
---|
982 | * Fucntion genbank_print_lines(). |
---|
983 | * Print one grnbank line, wrap around if over |
---|
984 | * column 80. |
---|
985 | */ |
---|
986 | void genbank_print_lines(fp, string, flag, separators) |
---|
987 | FILE *fp; |
---|
988 | char *string; |
---|
989 | int flag; |
---|
990 | const char *separators; |
---|
991 | { |
---|
992 | int first_time=1, indi, indj, indk /*,indl*/; |
---|
993 | int ibuf, len; |
---|
994 | |
---|
995 | len = Lenstr(string)-1; |
---|
996 | /* indi: first char of the line */ |
---|
997 | /* num of char, excluding the first char, of the line */ |
---|
998 | for(indi=0; indi<len; indi+=(indj+1)) { |
---|
999 | indj=GBMAXCHAR; |
---|
1000 | if((Lenstr(string+indi))>GBMAXCHAR) { |
---|
1001 | |
---|
1002 | /* search for proper termination of a line */ |
---|
1003 | |
---|
1004 | ibuf = indj; |
---|
1005 | |
---|
1006 | for(;indj>0 |
---|
1007 | &&((!flag&&!last_word(string[indj+indi])) |
---|
1008 | ||(flag&&!is_separator(string[indj+indi], separators))); |
---|
1009 | indj--); |
---|
1010 | |
---|
1011 | if(indj==0) indj=ibuf; |
---|
1012 | else if(string[indi+indj+1]==' ') indj++; |
---|
1013 | |
---|
1014 | /* print left margine */ |
---|
1015 | if(!first_time) |
---|
1016 | fprintf(fp, " "); |
---|
1017 | else first_time = 0; |
---|
1018 | |
---|
1019 | for(indk=0; indk<indj; indk++) |
---|
1020 | fprintf(fp, "%c", string[indi+indk]); |
---|
1021 | |
---|
1022 | /* leave out the last space, if there is any */ |
---|
1023 | if(string[indi+indj]!=' '&&string[indi+indj]!='\n') |
---|
1024 | fprintf(fp, "%c", string[indi+indj]); |
---|
1025 | fprintf(fp, "\n"); |
---|
1026 | |
---|
1027 | } else if(first_time) |
---|
1028 | fprintf(fp, "%s", string+indi); |
---|
1029 | else fprintf(fp, |
---|
1030 | " %s", string+indi); |
---|
1031 | } |
---|
1032 | } |
---|
1033 | /* -------------------------------------------------------------- |
---|
1034 | * Fucntion genbank_print_comment(). |
---|
1035 | * Print one grnbank line, wrap around if over |
---|
1036 | * column 80. |
---|
1037 | */ |
---|
1038 | void genbank_print_comment(fp, key, string, offset, indent) |
---|
1039 | FILE *fp; |
---|
1040 | char *string; |
---|
1041 | const char *key; |
---|
1042 | int offset, indent; |
---|
1043 | { |
---|
1044 | int first_time=1, indi, indj, indk, indl; |
---|
1045 | int len; |
---|
1046 | |
---|
1047 | len = Lenstr(string)-1; |
---|
1048 | for(indi=0; indi<len; indi+=(indj+1)) { |
---|
1049 | |
---|
1050 | if(first_time) |
---|
1051 | indj=GBMAXCHAR-offset-Lenstr(key)-1; |
---|
1052 | else indj=GBMAXCHAR-offset-indent-1; |
---|
1053 | |
---|
1054 | fprintf(fp, " "); |
---|
1055 | |
---|
1056 | if(!first_time) { |
---|
1057 | for(indl=0; indl<(offset+indent); indl++) |
---|
1058 | fprintf(fp, " "); |
---|
1059 | } else { |
---|
1060 | for(indl=0; indl<offset; indl++) |
---|
1061 | fprintf(fp, " "); |
---|
1062 | fprintf(fp, "%s", key); |
---|
1063 | first_time = 0; |
---|
1064 | } |
---|
1065 | if(Lenstr(string+indi)>indj) { |
---|
1066 | |
---|
1067 | /* search for proper termination of a line */ |
---|
1068 | for(;indj>=0&&!last_word(string[indj+indi]); |
---|
1069 | indj--) ; |
---|
1070 | |
---|
1071 | /* print left margine */ |
---|
1072 | if(string[indi]==' ') indk = 1; |
---|
1073 | else indk = 0; |
---|
1074 | |
---|
1075 | for(; indk<indj; indk++) |
---|
1076 | fprintf(fp, "%c", string[indi+indk]); |
---|
1077 | |
---|
1078 | /* leave out the last space, if there is any */ |
---|
1079 | if(string[indi+indj]!=' ') |
---|
1080 | fprintf(fp, "%c", string[indi+indj]); |
---|
1081 | fprintf(fp, "\n"); |
---|
1082 | |
---|
1083 | } else fprintf(fp, "%s", string+indi); |
---|
1084 | |
---|
1085 | } /* for each char */ |
---|
1086 | } |
---|
1087 | /* --------------------------------------------------------------- |
---|
1088 | * Fcuntion genbank_out_origin(). |
---|
1089 | * Output sequence data in genbank format. |
---|
1090 | */ |
---|
1091 | void |
---|
1092 | genbank_out_origin(fp) |
---|
1093 | FILE *fp; |
---|
1094 | { |
---|
1095 | |
---|
1096 | int indi, indj, indk; |
---|
1097 | |
---|
1098 | fprintf(fp, "ORIGIN\n"); |
---|
1099 | |
---|
1100 | for(indi=0, indj=0, indk=1; indi<data.seq_length; indi++) |
---|
1101 | { |
---|
1102 | if((indk % 60)==1) fprintf(fp, " %6d ", indk); |
---|
1103 | fprintf(fp, "%c", data.sequence[indi]); |
---|
1104 | indj++; |
---|
1105 | |
---|
1106 | /* blank space follows every 10 bases, but not before '\n' */ |
---|
1107 | if((indk % 60)==0) { fprintf(fp, "\n"); indj=0; } |
---|
1108 | else if(indj==10&&indi!=(data.seq_length-1)) |
---|
1109 | { fprintf(fp, " "); indj=0; } |
---|
1110 | indk++; |
---|
1111 | } |
---|
1112 | |
---|
1113 | if((indk % 60)!=1) fprintf(fp, "\n"); |
---|
1114 | fprintf(fp, "//\n"); |
---|
1115 | } |
---|
1116 | /* ----------------------------------------------------------- |
---|
1117 | * Function genbank_to_genbank(). |
---|
1118 | * Convert from genbank to genbank. |
---|
1119 | */ |
---|
1120 | void |
---|
1121 | genbank_to_genbank(inf, outf) |
---|
1122 | char *inf, *outf; |
---|
1123 | { |
---|
1124 | FILE *IFP, *ofp; |
---|
1125 | FILE_BUFFER ifp; |
---|
1126 | char temp[TOKENNUM]; |
---|
1127 | |
---|
1128 | if((IFP=fopen(inf, "r"))==NULL) { |
---|
1129 | sprintf(temp, |
---|
1130 | "Cannot open input file %s, exit\n", inf); |
---|
1131 | error(35, temp); |
---|
1132 | } |
---|
1133 | ifp = create_FILE_BUFFER(inf, IFP); |
---|
1134 | if((ofp=fopen(outf, "w"))==NULL) { |
---|
1135 | sprintf(temp, |
---|
1136 | "Cannot open output file %s, exit\n", outf); |
---|
1137 | error(36, temp); |
---|
1138 | } |
---|
1139 | init(); |
---|
1140 | init_genbank(); |
---|
1141 | /* rewind(ifp); */ |
---|
1142 | while(genbank_in(ifp)!=EOF) { |
---|
1143 | data.numofseq++; |
---|
1144 | genbank_out(ofp); |
---|
1145 | init_genbank(); |
---|
1146 | } |
---|
1147 | |
---|
1148 | #ifdef log |
---|
1149 | fprintf(stderr, |
---|
1150 | "Total %d sequences have been processed\n", |
---|
1151 | data.numofseq); |
---|
1152 | #endif |
---|
1153 | |
---|
1154 | destroy_FILE_BUFFER(ifp); fclose(ofp); |
---|
1155 | } |
---|
1156 | /* ----------------------------------------------------------- |
---|
1157 | * Function init_reference(). |
---|
1158 | * Init. new reference record(init. value is "\n"). |
---|
1159 | */ |
---|
1160 | void |
---|
1161 | init_reference(ref, flag) |
---|
1162 | Reference *ref; |
---|
1163 | int flag; |
---|
1164 | { |
---|
1165 | /* char *Dupstr(); */ |
---|
1166 | |
---|
1167 | if(flag==REF) ref->ref = Dupstr("\n"); |
---|
1168 | if(flag!=AUTHOR) ref->author = Dupstr("\n"); |
---|
1169 | if(flag!=JOURNAL) ref->journal = Dupstr("\n"); |
---|
1170 | if(flag!=TITLE) ref->title = Dupstr("\n"); |
---|
1171 | if(flag!=STANDARD) ref->standard = Dupstr("\n"); |
---|
1172 | } |
---|