1 | /* -------------- genbank related subroutines ----------------- */ |
---|
2 | |
---|
3 | #include <stdio.h> |
---|
4 | #include <ctype.h> |
---|
5 | #include "convert.h" |
---|
6 | #include "global.h" |
---|
7 | |
---|
8 | #define NOPERIOD 0 |
---|
9 | #define PERIOD 1 |
---|
10 | |
---|
11 | extern int warning_out; |
---|
12 | /* ------------------------------------------------------------ */ |
---|
13 | /* Function init_genbank(). |
---|
14 | /* Initialize genbank entry. |
---|
15 | */ |
---|
16 | void |
---|
17 | init_genbank() { |
---|
18 | |
---|
19 | void Freespace(); |
---|
20 | char *Dupstr(); |
---|
21 | int indi; |
---|
22 | |
---|
23 | /* initialize genbank format */ |
---|
24 | |
---|
25 | Freespace(&(data.gbk.locus)); |
---|
26 | Freespace(&(data.gbk.definition)); |
---|
27 | Freespace(&(data.gbk.accession)); |
---|
28 | Freespace(&(data.gbk.keywords)); |
---|
29 | Freespace(&(data.gbk.source)); |
---|
30 | Freespace(&(data.gbk.organism)); |
---|
31 | for(indi=0; indi<data.gbk.numofref; indi++) { |
---|
32 | Freespace(&(data.gbk.reference[indi].ref)); |
---|
33 | Freespace(&(data.gbk.reference[indi].author)); |
---|
34 | Freespace(&(data.gbk.reference[indi].title)); |
---|
35 | Freespace(&(data.gbk.reference[indi].journal)); |
---|
36 | Freespace(&(data.gbk.reference[indi].standard)); |
---|
37 | } |
---|
38 | Freespace(&(data.gbk.reference)); |
---|
39 | Freespace(&(data.gbk.comments.orginf.source)); |
---|
40 | Freespace(&(data.gbk.comments.orginf.cc)); |
---|
41 | Freespace(&(data.gbk.comments.orginf.formname)); |
---|
42 | Freespace(&(data.gbk.comments.orginf.nickname)); |
---|
43 | Freespace(&(data.gbk.comments.orginf.commname)); |
---|
44 | Freespace(&(data.gbk.comments.orginf.hostorg)); |
---|
45 | Freespace(&(data.gbk.comments.seqinf.RDPid)); |
---|
46 | Freespace(&(data.gbk.comments.seqinf.gbkentry)); |
---|
47 | Freespace(&(data.gbk.comments.seqinf.methods)); |
---|
48 | Freespace(&(data.gbk.comments.others)); |
---|
49 | data.gbk.locus=Dupstr("\n"); |
---|
50 | data.gbk.definition=Dupstr("\n"); |
---|
51 | data.gbk.accession=Dupstr("\n"); |
---|
52 | data.gbk.keywords=Dupstr("\n"); |
---|
53 | data.gbk.source=Dupstr("\n"); |
---|
54 | data.gbk.organism=Dupstr("\n"); |
---|
55 | data.gbk.numofref=0; |
---|
56 | data.gbk.comments.orginf.exist=0; |
---|
57 | data.gbk.comments.orginf.source=Dupstr("\n"); |
---|
58 | data.gbk.comments.orginf.cc=Dupstr("\n"); |
---|
59 | data.gbk.comments.orginf.formname=Dupstr("\n"); |
---|
60 | data.gbk.comments.orginf.nickname=Dupstr("\n"); |
---|
61 | data.gbk.comments.orginf.commname=Dupstr("\n"); |
---|
62 | data.gbk.comments.orginf.hostorg=Dupstr("\n"); |
---|
63 | data.gbk.comments.seqinf.exist=0; |
---|
64 | data.gbk.comments.seqinf.RDPid=Dupstr("\n"); |
---|
65 | data.gbk.comments.seqinf.gbkentry=Dupstr("\n"); |
---|
66 | data.gbk.comments.seqinf.methods=Dupstr("\n"); |
---|
67 | data.gbk.comments.others=NULL; |
---|
68 | data.gbk.comments.seqinf.comp5=' '; |
---|
69 | data.gbk.comments.seqinf.comp3=' '; |
---|
70 | } |
---|
71 | /* ----------------------------------------------------------- */ |
---|
72 | /* Function genbank_in(). |
---|
73 | /* Read in one genbank entry. |
---|
74 | */ |
---|
75 | char |
---|
76 | genbank_in(fp) |
---|
77 | FILE *fp; |
---|
78 | { |
---|
79 | char line[LINENUM], key[TOKENNUM], temp[LINENUM]; |
---|
80 | char *Fgetline(),*eof, eoen; |
---|
81 | char *genbank_one_entry_in(); |
---|
82 | char *genbank_source(), *genbank_reference(); |
---|
83 | char *genbank_comments(), *genbank_origin(); |
---|
84 | char *genbank_skip_unidentified(); |
---|
85 | void genbank_key_word(), warning(), error(); |
---|
86 | void Append_char(); |
---|
87 | void genbank_verify_accession(), genbank_verify_keywords(); |
---|
88 | int Lenstr(); |
---|
89 | static int count = 0; |
---|
90 | |
---|
91 | eoen=' '; |
---|
92 | /* end-of-entry, set to be 'y' after '//' is read */ |
---|
93 | for(eof=Fgetline(line, LINENUM, fp);eof!=NULL&&eoen!='y'; ) { |
---|
94 | |
---|
95 | if(Lenstr(line)<=1) { |
---|
96 | eof=Fgetline(line, LINENUM, fp); |
---|
97 | continue; /* empty line, skip */ |
---|
98 | } |
---|
99 | |
---|
100 | genbank_key_word(line, 0, key, TOKENNUM); |
---|
101 | |
---|
102 | eoen='n'; |
---|
103 | |
---|
104 | if((Cmpstr(key, "LOCUS"))==EQ) { |
---|
105 | |
---|
106 | eof = genbank_one_entry_in(&data.gbk.locus, line, fp); |
---|
107 | if(Lenstr(data.gbk.locus)<61) warning(14, "LOCUS data might be incomplete"); |
---|
108 | |
---|
109 | } else if((Cmpstr(key, "DEFINITION"))==EQ) { |
---|
110 | |
---|
111 | eof = genbank_one_entry_in(&data.gbk.definition, line, fp); |
---|
112 | |
---|
113 | /* correct missing '.' at the end */ |
---|
114 | Append_char(&(data.gbk.definition), '.'); |
---|
115 | |
---|
116 | } else if((Cmpstr(key, "ACCESSION"))==EQ) { |
---|
117 | |
---|
118 | eof = genbank_one_entry_in(&data.gbk.accession, line, fp); |
---|
119 | |
---|
120 | genbank_verify_accession(); |
---|
121 | |
---|
122 | } else if((Cmpstr(key, "KEYWORDS"))==EQ) { |
---|
123 | |
---|
124 | eof = genbank_one_entry_in(&data.gbk.keywords, line, fp); |
---|
125 | genbank_verify_keywords(); |
---|
126 | |
---|
127 | } else if((Cmpstr(key, "SOURCE"))==EQ) { |
---|
128 | eof = genbank_source(line, fp); |
---|
129 | /* correct missing '.' at the end */ |
---|
130 | Append_char(&(data.gbk.source), '.'); |
---|
131 | Append_char(&(data.gbk.organism), '.'); |
---|
132 | } else if((Cmpstr(key, "REFERENCE"))==EQ) { |
---|
133 | eof = genbank_reference(line, fp); |
---|
134 | } else if((Cmpstr(key, "COMMENTS"))==EQ) { |
---|
135 | eof = genbank_comments(line, fp); |
---|
136 | } else if((Cmpstr(key, "COMMENT"))==EQ) { |
---|
137 | eof = genbank_comments(line, fp); |
---|
138 | } else if((Cmpstr(key, "ORIGIN"))==EQ) { |
---|
139 | eof = genbank_origin(line, fp); |
---|
140 | eoen = 'y'; |
---|
141 | } else { /* unidentified key word */ |
---|
142 | eof = genbank_skip_unidentified(line, fp, 2); |
---|
143 | } |
---|
144 | /* except "ORIGIN", at the end of all the other cases, |
---|
145 | /* a new line has already read in, so no further read |
---|
146 | /* is necessary*/ |
---|
147 | } /* for loop to read an entry line by line */ |
---|
148 | |
---|
149 | if(eoen=='n') { |
---|
150 | warning(86, "Reach EOF before sequence data is read."); |
---|
151 | return(EOF); |
---|
152 | } |
---|
153 | if(eof==NULL) return(EOF); |
---|
154 | else return(EOF+1); |
---|
155 | |
---|
156 | } |
---|
157 | /* ------------------------------------------------------------- */ |
---|
158 | /* Function genbank_key_word(). |
---|
159 | /* Get the key_word from line beginning at index. |
---|
160 | */ |
---|
161 | void |
---|
162 | genbank_key_word(line, index, key, length) |
---|
163 | char *line; |
---|
164 | int index; |
---|
165 | char *key; |
---|
166 | int length; |
---|
167 | { |
---|
168 | int indi, indj; |
---|
169 | |
---|
170 | if(line==NULL) { key[0]='\0'; return; } |
---|
171 | |
---|
172 | for(indi=index, indj=0; |
---|
173 | (index-indi)<length&&line[indi]!=' '&&line[indi]!='\t' |
---|
174 | &&line[indi]!='\n'&&line[indi]!='\0'&&indi<12; |
---|
175 | indi++, indj++) |
---|
176 | key[indj] = line[indi]; |
---|
177 | |
---|
178 | key[indj] = '\0'; |
---|
179 | } |
---|
180 | /* ------------------------------------------------------------- */ |
---|
181 | /* Function genbank_comment_subkey_word(). |
---|
182 | /* Get the subkey_word in comment lines beginning |
---|
183 | /* at index. |
---|
184 | */ |
---|
185 | int |
---|
186 | genbank_comment_subkey_word(line, index, key, length) |
---|
187 | char *line; |
---|
188 | int index; |
---|
189 | char *key; |
---|
190 | int length; |
---|
191 | { |
---|
192 | int indi, indj; |
---|
193 | |
---|
194 | if(line==NULL) { key[0]='\0'; return(index); } |
---|
195 | |
---|
196 | for(indi=index, indj=0; (index-indi)<length |
---|
197 | &&line[indi]!=':'&&line[indi]!='\t'&&line[indi]!='\n' |
---|
198 | &&line[indi]!='\0'&&line[indi]!='('; indi++, indj++) |
---|
199 | key[indj] = line[indi]; |
---|
200 | |
---|
201 | if(line[indi]==':') key[indj++] = ':'; |
---|
202 | |
---|
203 | key[indj] = '\0'; |
---|
204 | |
---|
205 | return(indi+1); |
---|
206 | } |
---|
207 | /* ------------------------------------------------------------ */ |
---|
208 | /* Function genbank_chcek_blanks(). |
---|
209 | /* Check if there is (numb) of blanks at beginning |
---|
210 | /* of line. |
---|
211 | */ |
---|
212 | int |
---|
213 | genbank_check_blanks(line, numb) |
---|
214 | char *line; |
---|
215 | int numb; |
---|
216 | { |
---|
217 | int blank=1, indi, indk; |
---|
218 | |
---|
219 | for(indi=0; blank&&indi<numb; indi++) { |
---|
220 | if(line[indi]!=' '&&line[indi]!='\t') blank=0; |
---|
221 | if(line[indi]=='\t') { |
---|
222 | indk=indi/8+1; indi=8*indk+1; |
---|
223 | } |
---|
224 | } |
---|
225 | |
---|
226 | return(blank); |
---|
227 | } |
---|
228 | /* ---------------------------------------------------------------- */ |
---|
229 | /* Function genbank_continue_line(). |
---|
230 | /* if there are (numb) of blanks at the beginning |
---|
231 | /* of line, it is a continue line of the |
---|
232 | /* current command. |
---|
233 | */ |
---|
234 | char |
---|
235 | *genbank_continue_line(string, line, numb, fp) |
---|
236 | char **string, *line; |
---|
237 | int numb; /* number of blanks needed to define a continue line */ |
---|
238 | FILE *fp; |
---|
239 | { |
---|
240 | int Lenstr(), ind; |
---|
241 | int genbank_check_blanks(), Skip_white_space(); |
---|
242 | char *Fgetline(), *eof, temp[LINENUM]; |
---|
243 | void Cpystr(), Append_rp_eoln(); |
---|
244 | |
---|
245 | /* check continue lines */ |
---|
246 | for(eof=Fgetline(line, LINENUM, fp); |
---|
247 | eof!=NULL&&(genbank_check_blanks(line, numb) |
---|
248 | ||line[0]=='\n'); eof=Fgetline(line, LINENUM, fp)) { |
---|
249 | |
---|
250 | if(line[0]=='\n') continue; /* empty line is allowed */ |
---|
251 | /* remove end-of-line, if there is any */ |
---|
252 | ind=Skip_white_space(line, 0); |
---|
253 | Cpystr(temp, (line+ind)); |
---|
254 | Append_rp_eoln(string, temp); |
---|
255 | |
---|
256 | } /* end of continue line checking */ |
---|
257 | |
---|
258 | return(eof); |
---|
259 | } |
---|
260 | /* ------------------------------------------------------------ */ |
---|
261 | /* Function genbank_one_entry_in(). |
---|
262 | /* Read in genbank one entry lines. |
---|
263 | */ |
---|
264 | char |
---|
265 | *genbank_one_entry_in(datastring, line, fp) |
---|
266 | char **datastring, *line; |
---|
267 | FILE *fp; |
---|
268 | { |
---|
269 | int index, Skip_white_space(), Lenstr(); |
---|
270 | char *eof, *genbank_continue_line(), *Dupstr(); |
---|
271 | void error(), Freespace(); |
---|
272 | |
---|
273 | index = Skip_white_space(line, 12); |
---|
274 | Freespace(datastring); |
---|
275 | *datastring = Dupstr(line+index); |
---|
276 | eof = (char*)genbank_continue_line |
---|
277 | (datastring, line, 12, fp); |
---|
278 | |
---|
279 | return(eof); |
---|
280 | } |
---|
281 | /* ------------------------------------------------------------ */ |
---|
282 | /* Function genbank_one_comment_entry(). |
---|
283 | /* Read in one genbank sub-entry in comments lines. |
---|
284 | */ |
---|
285 | char |
---|
286 | *genbank_one_comment_entry(datastring, line, start_index, fp) |
---|
287 | char **datastring, *line; |
---|
288 | int start_index; |
---|
289 | FILE *fp; |
---|
290 | { |
---|
291 | int index, Skip_white_space(), Lenstr(); |
---|
292 | char *eof, *genbank_continue_line(), *Dupstr(); |
---|
293 | void error(), Freespace(); |
---|
294 | |
---|
295 | index = Skip_white_space(line, start_index); |
---|
296 | Freespace(datastring); |
---|
297 | *datastring = Dupstr(line+index); |
---|
298 | eof = (char*)genbank_continue_line |
---|
299 | (datastring, line, 20, fp); |
---|
300 | return(eof); |
---|
301 | } |
---|
302 | /* -------------------------------------------------------------- */ |
---|
303 | /* Function genbank_source() |
---|
304 | /* Read in genbank SOURCE lines and also ORGANISM |
---|
305 | /* lines. |
---|
306 | */ |
---|
307 | char |
---|
308 | *genbank_source(line, fp) |
---|
309 | char *line; |
---|
310 | FILE *fp; |
---|
311 | { |
---|
312 | int index, Skip_white_space(); |
---|
313 | char *eof, *genbank_continue_line(), *Dupstr(); |
---|
314 | char *genbank_one_entry_in(); |
---|
315 | char *dummy, key[TOKENNUM]; |
---|
316 | void Freespace(), genbank_key_word(); |
---|
317 | |
---|
318 | eof = genbank_one_entry_in(&(data.gbk.source), line, fp); |
---|
319 | genbank_key_word(line, 2, key, TOKENNUM); |
---|
320 | if(Cmpstr(key, "ORGANISM")==EQ) { |
---|
321 | index = Skip_white_space(line, 12); |
---|
322 | data.gbk.organism = Dupstr(line+index); |
---|
323 | dummy = (char*)Dupstr("\n"); |
---|
324 | eof = (char*)genbank_continue_line(&(dummy), line, 12, fp); |
---|
325 | Freespace(&dummy); |
---|
326 | } |
---|
327 | return(eof); |
---|
328 | } |
---|
329 | /* -------------------------------------------------------------- */ |
---|
330 | /* Function genbank_reference(). |
---|
331 | /* Read in genbank REFERENCE lines. |
---|
332 | */ |
---|
333 | char |
---|
334 | *genbank_reference(line, fp) |
---|
335 | char *line; |
---|
336 | FILE *fp; |
---|
337 | { |
---|
338 | #define AUTH 0 |
---|
339 | #define TIT 1 |
---|
340 | #define JOUR 2 |
---|
341 | void genbank_key_word(), error(), warning(); |
---|
342 | void Freespace(), init_reference(); |
---|
343 | void Append_char(); |
---|
344 | char *eof, key[TOKENNUM]; |
---|
345 | char *Dupstr(), *genbank_skip_unidentified(); |
---|
346 | char temp[LINENUM]; |
---|
347 | char *Reallocspace(), *genbank_one_entry_in(); |
---|
348 | int index, indi, refnum; |
---|
349 | int Cmpstr(), Skip_white_space(); |
---|
350 | int acount=0, tcount=0, jcount=0, scount=0; |
---|
351 | |
---|
352 | sscanf(line+12, "%d", &refnum); |
---|
353 | if(refnum <= data.gbk.numofref) { |
---|
354 | sprintf(temp, |
---|
355 | "Might redefine reference %d", refnum); |
---|
356 | warning(17, temp); |
---|
357 | eof = genbank_skip_unidentified(line, fp, 12); |
---|
358 | } else { |
---|
359 | data.gbk.numofref = refnum; |
---|
360 | data.gbk.reference = (Reference*)Reallocspace |
---|
361 | (data.gbk.reference, (unsigned) |
---|
362 | (sizeof(Reference)*(data.gbk.numofref))); |
---|
363 | /* initialize the buffer */ |
---|
364 | init_reference(&(data.gbk.reference[refnum-1]), ALL); |
---|
365 | eof = genbank_one_entry_in |
---|
366 | (&(data.gbk.reference[refnum-1].ref),line, fp); |
---|
367 | } |
---|
368 | /* find the reference listings */ |
---|
369 | for( ;eof!=NULL&&line[0]==' '&&line[1]==' '; ) |
---|
370 | { |
---|
371 | /* find the key word */ |
---|
372 | genbank_key_word(line, 2, key, TOKENNUM); |
---|
373 | /* skip white space */ |
---|
374 | if((Cmpstr(key, "AUTHORS"))==EQ) { |
---|
375 | eof = genbank_one_entry_in( |
---|
376 | &(data.gbk.reference[refnum-1].author), |
---|
377 | line, fp); |
---|
378 | |
---|
379 | /* add '.' if missing at the end */ |
---|
380 | Append_char |
---|
381 | (&(data.gbk.reference[refnum-1].author),'.'); |
---|
382 | |
---|
383 | if(acount==0) acount=1; |
---|
384 | else { |
---|
385 | |
---|
386 | sprintf(temp, "AUTHORS of REFERENCE %d is redefined" |
---|
387 | , refnum); |
---|
388 | warning(10, temp); |
---|
389 | } |
---|
390 | |
---|
391 | } else if((Cmpstr(key, "TITLE"))==EQ) { |
---|
392 | eof = genbank_one_entry_in( |
---|
393 | &(data.gbk.reference[refnum-1].title), |
---|
394 | line, fp); |
---|
395 | if(tcount==0) tcount=1; |
---|
396 | else { |
---|
397 | |
---|
398 | sprintf(temp, "TITLE of REFERENCE %d is redefined" |
---|
399 | , refnum); |
---|
400 | |
---|
401 | warning(11, temp); |
---|
402 | } |
---|
403 | } else if((Cmpstr(key, "JOURNAL"))==EQ) { |
---|
404 | |
---|
405 | eof = genbank_one_entry_in( |
---|
406 | &(data.gbk.reference[refnum-1].journal), |
---|
407 | line, fp); |
---|
408 | |
---|
409 | if(jcount==0) jcount=1; |
---|
410 | else { |
---|
411 | |
---|
412 | sprintf(temp, |
---|
413 | "JOURNAL of REFERENCE %d is redefined", refnum); |
---|
414 | |
---|
415 | warning(12, temp); |
---|
416 | } |
---|
417 | } else if((Cmpstr(key, "STANDARD"))==EQ) { |
---|
418 | |
---|
419 | eof = genbank_one_entry_in( |
---|
420 | &(data.gbk.reference[refnum-1].standard), |
---|
421 | line, fp); |
---|
422 | |
---|
423 | if(scount==0) scount=1; |
---|
424 | else { |
---|
425 | |
---|
426 | sprintf(temp, |
---|
427 | "STANDARD of REFERENCE %d is redefined", refnum); |
---|
428 | |
---|
429 | warning(13, temp); |
---|
430 | } |
---|
431 | } else { |
---|
432 | |
---|
433 | sprintf(temp, |
---|
434 | "Unidentified REFERENCE subkeyword: %s#", key); |
---|
435 | |
---|
436 | warning(18,temp); |
---|
437 | eof = genbank_skip_unidentified(line, fp, 12); |
---|
438 | } |
---|
439 | } /* for loop */ |
---|
440 | return(eof); |
---|
441 | } |
---|
442 | /* -------------------------------------------------------------- */ |
---|
443 | /* Function genbank_comments(). |
---|
444 | /* Read in genbank COMMENTS lines. |
---|
445 | */ |
---|
446 | char |
---|
447 | *genbank_comments(line, fp) |
---|
448 | char *line; |
---|
449 | FILE *fp; |
---|
450 | { |
---|
451 | int index, indi, ptr, genbank_check_blanks(); |
---|
452 | int Lenstr(), Skip_white_space(); |
---|
453 | int Cmpstr(), genbank_comment_subkey_word(); |
---|
454 | char *Fgetline(), *Dupstr(), *eof; |
---|
455 | char key[TOKENNUM]; |
---|
456 | char *genbank_one_comment_entry(); |
---|
457 | void Freespace(), Append(); |
---|
458 | |
---|
459 | if(Lenstr(line)<=12) { |
---|
460 | if((eof = Fgetline(line, LINENUM, fp))==NULL) |
---|
461 | return(eof); |
---|
462 | } |
---|
463 | /* make up data to match the logic reasoning for next statment */ |
---|
464 | for(indi=0; indi<12; line[indi++]=' ') ; eof = "NONNULL"; |
---|
465 | |
---|
466 | for( ;eof!=NULL&&(genbank_check_blanks(line, 12) |
---|
467 | ||line[0]=='\n'); ) { |
---|
468 | |
---|
469 | if(line[0]=='\n') { /* skip empty line */ |
---|
470 | eof=Fgetline(line, LINENUM, fp); |
---|
471 | continue; |
---|
472 | } |
---|
473 | |
---|
474 | ptr = index = 12; |
---|
475 | |
---|
476 | index = Skip_white_space(line, index); |
---|
477 | index = genbank_comment_subkey_word |
---|
478 | (line, index, key, TOKENNUM); |
---|
479 | |
---|
480 | if(Cmpstr(key, "Source of strain:")==EQ) { |
---|
481 | eof = genbank_one_comment_entry |
---|
482 | (&(data.gbk.comments.orginf.source), line, index, fp); |
---|
483 | |
---|
484 | } else if(Cmpstr(key, "Culture collection:")==EQ) { |
---|
485 | |
---|
486 | eof = genbank_one_comment_entry |
---|
487 | (&(data.gbk.comments.orginf.cc), line, index, fp); |
---|
488 | |
---|
489 | } else if(Cmpstr(key, "Former name:")==EQ) { |
---|
490 | |
---|
491 | eof = genbank_one_comment_entry |
---|
492 | (&(data.gbk.comments.orginf.formname), line, index, fp); |
---|
493 | |
---|
494 | } else if(Cmpstr(key, "Alternate name:")==EQ) { |
---|
495 | |
---|
496 | eof = genbank_one_comment_entry |
---|
497 | (&(data.gbk.comments.orginf.nickname), line, index, fp); |
---|
498 | |
---|
499 | } else if(Cmpstr(key, "Common name:")==EQ) { |
---|
500 | |
---|
501 | eof = genbank_one_comment_entry |
---|
502 | (&(data.gbk.comments.orginf.commname), line, index, fp); |
---|
503 | |
---|
504 | } else if(Cmpstr(key, "Host organism:")==EQ) { |
---|
505 | |
---|
506 | eof = genbank_one_comment_entry |
---|
507 | (&(data.gbk.comments.orginf.hostorg), line, index, fp); |
---|
508 | |
---|
509 | } else if(Cmpstr(key, "RDP ID:")==EQ) { |
---|
510 | eof = genbank_one_comment_entry |
---|
511 | (&(data.gbk.comments.seqinf.RDPid), line, index, fp); |
---|
512 | |
---|
513 | } else if(Cmpstr(key, |
---|
514 | "Corresponding GenBank entry:")==EQ) { |
---|
515 | |
---|
516 | eof = genbank_one_comment_entry |
---|
517 | (&(data.gbk.comments.seqinf.gbkentry), line, index, fp); |
---|
518 | |
---|
519 | } else if(Cmpstr(key, "Sequencing methods:")==EQ) { |
---|
520 | |
---|
521 | eof = genbank_one_comment_entry |
---|
522 | (&(data.gbk.comments.seqinf.methods), line, index, fp); |
---|
523 | |
---|
524 | } else if(Cmpstr(key, "5' end complete:")==EQ) { |
---|
525 | sscanf(line+index, "%s", key); |
---|
526 | if(key[0]=='Y') |
---|
527 | data.gbk.comments.seqinf.comp5 = 'y'; |
---|
528 | else data.gbk.comments.seqinf.comp5 = 'n'; |
---|
529 | eof=Fgetline(line, LINENUM, fp); |
---|
530 | } else if(Cmpstr(key, "3' end complete:")==EQ) { |
---|
531 | sscanf(line+index, "%s", key); |
---|
532 | if(key[0]=='Y') |
---|
533 | data.gbk.comments.seqinf.comp3 = 'y'; |
---|
534 | else data.gbk.comments.seqinf.comp3 = 'n'; |
---|
535 | eof=Fgetline(line, LINENUM, fp); |
---|
536 | } else if(Cmpstr(key, |
---|
537 | "Sequence information ")==EQ) { |
---|
538 | /* do nothing */ |
---|
539 | data.gbk.comments.seqinf.exist = 1; |
---|
540 | eof=Fgetline(line, LINENUM, fp); |
---|
541 | } else if(Cmpstr(key, "Organism information")==EQ) { |
---|
542 | /* do nothing */ |
---|
543 | data.gbk.comments.orginf.exist = 1; |
---|
544 | eof=Fgetline(line, LINENUM, fp); |
---|
545 | } else { /* other comments */ |
---|
546 | |
---|
547 | if(data.gbk.comments.others == NULL) { |
---|
548 | data.gbk.comments.others |
---|
549 | =(char*)Dupstr(line+ptr); |
---|
550 | |
---|
551 | } else Append(&(data.gbk.comments.others), |
---|
552 | line+ptr); |
---|
553 | |
---|
554 | eof=Fgetline(line, LINENUM, fp); |
---|
555 | } |
---|
556 | } /* for loop */ |
---|
557 | |
---|
558 | return(eof); |
---|
559 | } |
---|
560 | /* -------------------------------------------------------------- */ |
---|
561 | /* Function genbank_origin(). |
---|
562 | /* Read in genbank sequence data. |
---|
563 | */ |
---|
564 | char |
---|
565 | *genbank_origin(line, fp) |
---|
566 | char *line; |
---|
567 | FILE *fp; |
---|
568 | { |
---|
569 | char *Fgetline(), *eof, *Reallocspace(); |
---|
570 | int index, Lenstr(); |
---|
571 | void warning(); |
---|
572 | |
---|
573 | data.seq_length = 0; |
---|
574 | /* read in whole sequence data */ |
---|
575 | for(eof=Fgetline(line, LINENUM, fp); |
---|
576 | eof!=NULL&&line[0]!='/'&&line[1]!='/'; |
---|
577 | eof=Fgetline(line, LINENUM, fp)) |
---|
578 | { |
---|
579 | /* empty line, skip */ |
---|
580 | if(Lenstr(line)<=1) continue; |
---|
581 | for(index=9; line[index]!='\n'&&line[index]!='\0'; |
---|
582 | index++) |
---|
583 | { |
---|
584 | if(line[index]!=' ' && data.seq_length>=data.max) { |
---|
585 | data.max += 100; |
---|
586 | |
---|
587 | data.sequence = (char*)Reallocspace(data.sequence, |
---|
588 | (unsigned)(sizeof(char)*data.max)); |
---|
589 | } |
---|
590 | if(line[index]!=' ') data.sequence[data.seq_length++] = line[index]; |
---|
591 | } |
---|
592 | if(data.seq_length>=data.max) { |
---|
593 | data.max += 100; |
---|
594 | |
---|
595 | data.sequence = (char*)Reallocspace(data.sequence, |
---|
596 | (unsigned)(sizeof(char)*data.max)); |
---|
597 | } |
---|
598 | data.sequence[data.seq_length] = '\0'; |
---|
599 | } |
---|
600 | |
---|
601 | return(eof); |
---|
602 | } |
---|
603 | /* --------------------------------------------------------------- */ |
---|
604 | /* Function genbank_skip_unidentified(). |
---|
605 | /* Skip the lines of unidentified keyword. |
---|
606 | */ |
---|
607 | char |
---|
608 | *genbank_skip_unidentified(line, fp, blank_num) |
---|
609 | char *line; |
---|
610 | FILE *fp; |
---|
611 | int blank_num; |
---|
612 | { |
---|
613 | char *Fgetline(), *eof; |
---|
614 | int genbank_check_blanks(); |
---|
615 | |
---|
616 | for(eof=Fgetline(line, LINENUM, fp); |
---|
617 | eof!=NULL&&genbank_check_blanks(line, blank_num); |
---|
618 | eof=Fgetline(line, LINENUM, fp)) ; |
---|
619 | |
---|
620 | return(eof); |
---|
621 | } |
---|
622 | /* --------------------------------------------------------------- */ |
---|
623 | /* Function genbank_verify_accession(). |
---|
624 | /* Verify accession information. |
---|
625 | */ |
---|
626 | void |
---|
627 | genbank_verify_accession() |
---|
628 | { |
---|
629 | int indi, index, len, Lenstr(), count, remainder; |
---|
630 | char temp[LONGTEXT], *Reallocspace(); |
---|
631 | void warning(); |
---|
632 | |
---|
633 | if(Cmpstr(data.gbk.accession, "No information\n")==EQ) return; |
---|
634 | len=Lenstr(data.gbk.accession); |
---|
635 | if((len % 7)!=0) { |
---|
636 | if(warning_out) |
---|
637 | fprintf(stderr, |
---|
638 | "\nACCESSION: %s", data.gbk.accession); |
---|
639 | warning(136, |
---|
640 | "Each accession number should be a six-character identifier."); |
---|
641 | } |
---|
642 | for(indi=count=0; indi<len-1; indi++) { |
---|
643 | remainder=indi % 7; |
---|
644 | switch(remainder) { |
---|
645 | case 0: |
---|
646 | count++; |
---|
647 | if(count>9){ |
---|
648 | if(warning_out) fprintf(stderr, |
---|
649 | "\nACCESSION: %s", data.gbk.accession); |
---|
650 | warning(137, |
---|
651 | "No more than 9 accession numbers are allowed in ACCESSION line."); |
---|
652 | data.gbk.accession[indi-1]='\n'; |
---|
653 | data.gbk.accession[indi]='\0'; |
---|
654 | data.gbk.accession = (char*)Reallocspace |
---|
655 | (data.gbk.accession, |
---|
656 | (unsigned)(sizeof(char)*indi)); |
---|
657 | return; |
---|
658 | } |
---|
659 | if(!isalpha(data.gbk.accession[indi])) { |
---|
660 | sprintf(temp, |
---|
661 | "The %d(th) accession number must start with a letter.", |
---|
662 | count); |
---|
663 | warning(138, temp); |
---|
664 | } |
---|
665 | break; |
---|
666 | case 1: |
---|
667 | case 2: |
---|
668 | case 3: |
---|
669 | case 4: |
---|
670 | case 5: |
---|
671 | if(!isdigit(data.gbk.accession[indi])) { |
---|
672 | sprintf(temp, |
---|
673 | "The last 5 characters of the %d(th) accession number should be all digits.", |
---|
674 | count); |
---|
675 | warning(140, temp); |
---|
676 | } |
---|
677 | break; |
---|
678 | case 6: |
---|
679 | if((indi!=(len-1)&&data.gbk.accession[indi]!=' ') |
---|
680 | ||(indi==(len-1)&&data.gbk.accession[indi]!='\n')) |
---|
681 | { |
---|
682 | if(warning_out) fprintf(stderr, |
---|
683 | "\nACCESSION: %s", data.gbk.accession); |
---|
684 | warning(139, |
---|
685 | "Accesssion numbers should be separated by a space."); |
---|
686 | data.gbk.accession[indi]=' '; |
---|
687 | } |
---|
688 | break; |
---|
689 | default: ; |
---|
690 | } |
---|
691 | } /* check every char of ACCESSION line. */ |
---|
692 | } |
---|
693 | /* ------------------------------------------------------------------ */ |
---|
694 | /* Function genbank_verify_keywords(). |
---|
695 | /* Verify keywords. |
---|
696 | */ |
---|
697 | void |
---|
698 | genbank_verify_keywords() { |
---|
699 | |
---|
700 | int indi, count, len, Lenstr(); |
---|
701 | void Append_char(), warning(); |
---|
702 | |
---|
703 | /* correct missing '.' at the end */ |
---|
704 | Append_char(&(data.gbk.keywords), '.'); |
---|
705 | |
---|
706 | for(indi=count=0, len=Lenstr(data.gbk.keywords); indi<len; indi++) |
---|
707 | if(data.gbk.keywords[indi]=='.') count++; |
---|
708 | |
---|
709 | if(count!=1) { |
---|
710 | if(warning_out) fprintf(stderr, |
---|
711 | "\nKEYWORDS: %s", data.gbk.keywords); |
---|
712 | warning(141, |
---|
713 | "No more than one period is allowed in KEYWORDS line."); |
---|
714 | } |
---|
715 | } |
---|
716 | /* --------------------------------------------------------------- */ |
---|
717 | /* Function genbank_in_locus(). |
---|
718 | /* Read in next genbank locus and sequence only. |
---|
719 | /* For use of converting to simple format(read in only simple |
---|
720 | /* information instead of whole records). |
---|
721 | */ |
---|
722 | char |
---|
723 | genbank_in_locus(fp) |
---|
724 | FILE *fp; |
---|
725 | { |
---|
726 | char line[LINENUM], key[TOKENNUM]; |
---|
727 | char *Fgetline(), *eof, eoen; |
---|
728 | char *genbank_one_entry_in(), *genbank_origin(); |
---|
729 | void genbank_key_word(), warning(), error(); |
---|
730 | |
---|
731 | eoen=' '; /* end-of-entry, set to be 'y' after '//' is read */ |
---|
732 | for(eof=Fgetline(line, LINENUM, fp); eof!=NULL&&eoen!='y'; ) { |
---|
733 | genbank_key_word(line, 0, key, TOKENNUM); |
---|
734 | if((Cmpstr(key, "ORIGIN"))==EQ) { |
---|
735 | eof = genbank_origin(line, fp); |
---|
736 | eoen = 'y'; |
---|
737 | } else if((Cmpstr(key, "LOCUS"))==EQ) { |
---|
738 | eof = genbank_one_entry_in(&data.gbk.locus, |
---|
739 | line, fp); |
---|
740 | } else eof=Fgetline(line, LINENUM, fp); |
---|
741 | } /* for loop to read an entry line by line */ |
---|
742 | |
---|
743 | if(eoen=='n') |
---|
744 | error(9, "Reach EOF before one entry is read, Exit"); |
---|
745 | |
---|
746 | if(eof==NULL) return(EOF); |
---|
747 | else return(EOF+1); |
---|
748 | |
---|
749 | } |
---|
750 | /* --------------------------------------------------------------- */ |
---|
751 | /* Function genbank_out(). |
---|
752 | /* Output in a genbank format. |
---|
753 | */ |
---|
754 | void |
---|
755 | genbank_out(fp) |
---|
756 | FILE *fp; |
---|
757 | { |
---|
758 | void genbank_print_lines(), genbank_out_origin(); |
---|
759 | void genbank_print_comment(), count_base(); |
---|
760 | char temp[LONGTEXT]; |
---|
761 | void genbank_out_one_entry(), genbank_out_one_comment(); |
---|
762 | int indi, indj, indk, length, Lenstr(), deterninator(); |
---|
763 | int base_a, base_t, base_g, base_c, base_other; |
---|
764 | |
---|
765 | /* Assume the last char of each field is '\n' */ |
---|
766 | genbank_out_one_entry(fp, data.gbk.locus, "LOCUS ", |
---|
767 | SEPNODEFINED, "", NOPERIOD); |
---|
768 | |
---|
769 | genbank_out_one_entry(fp, data.gbk.definition, |
---|
770 | "DEFINITION ", SEPNODEFINED, "", PERIOD); |
---|
771 | |
---|
772 | genbank_out_one_entry(fp, data.gbk.accession, |
---|
773 | "ACCESSION ", SEPNODEFINED, "", NOPERIOD); |
---|
774 | |
---|
775 | genbank_out_one_entry(fp, data.gbk.keywords, |
---|
776 | "KEYWORDS ", SEPDEFINED, ";", PERIOD); |
---|
777 | |
---|
778 | if(Lenstr(data.gbk.source)>1) { |
---|
779 | fprintf(fp, "SOURCE "); |
---|
780 | genbank_print_lines(fp, data.gbk.source, |
---|
781 | SEPNODEFINED, ""); |
---|
782 | if(Lenstr(data.gbk.organism)>1) { |
---|
783 | fprintf(fp, " ORGANISM "); |
---|
784 | genbank_print_lines(fp, data.gbk.organism, |
---|
785 | SEPNODEFINED, ""); |
---|
786 | } else fprintf(fp, " ORGANISM No information.\n"); |
---|
787 | } else if(Lenstr(data.gbk.organism)>1) { |
---|
788 | |
---|
789 | fprintf(fp, "SOURCE No information.\n ORGANISM "); |
---|
790 | genbank_print_lines(fp, data.gbk.organism, |
---|
791 | SEPNODEFINED, ""); |
---|
792 | } else fprintf(fp, |
---|
793 | "SOURCE No information.\n ORGANISM No information.\n"); |
---|
794 | |
---|
795 | if(data.gbk.numofref>0) { |
---|
796 | for(indi=0; indi<data.gbk.numofref; indi++) { |
---|
797 | |
---|
798 | if(Lenstr(data.gbk.reference[indi].ref)>1) { |
---|
799 | fprintf(fp, "REFERENCE "); |
---|
800 | genbank_print_lines(fp, |
---|
801 | data.gbk.reference[indi].ref, |
---|
802 | SEPNODEFINED, ""); |
---|
803 | } else fprintf(fp, |
---|
804 | "REFERENCE %d\n", indi+1); |
---|
805 | |
---|
806 | genbank_out_one_entry(fp, |
---|
807 | data.gbk.reference[indi].author, |
---|
808 | " AUTHORS ", SEPDEFINED, " ", |
---|
809 | NOPERIOD); |
---|
810 | |
---|
811 | if(Lenstr(data.gbk.reference[indi].title)>1) |
---|
812 | { |
---|
813 | fprintf(fp, " TITLE "); |
---|
814 | genbank_print_lines(fp, |
---|
815 | data.gbk.reference[indi].title, |
---|
816 | SEPNODEFINED, ""); |
---|
817 | } |
---|
818 | |
---|
819 | genbank_out_one_entry(fp, |
---|
820 | data.gbk.reference[indi].journal, |
---|
821 | " JOURNAL ", SEPNODEFINED, "", |
---|
822 | NOPERIOD); |
---|
823 | |
---|
824 | genbank_out_one_entry(fp, |
---|
825 | data.gbk.reference[indi].standard, |
---|
826 | " STANDARD ", SEPNODEFINED, "", |
---|
827 | NOPERIOD); |
---|
828 | |
---|
829 | } /* subkey loop */ |
---|
830 | } else { |
---|
831 | fprintf(fp, "REFERENCE 1\n"); |
---|
832 | fprintf(fp, " AUTHORS No information\n"); |
---|
833 | fprintf(fp, " JOURNAL No information\n"); |
---|
834 | fprintf(fp, " TITLE No information\n"); |
---|
835 | fprintf(fp, " STANDARD No information\n"); |
---|
836 | } |
---|
837 | |
---|
838 | if(data.gbk.comments.orginf.exist==1|| |
---|
839 | data.gbk.comments.seqinf.exist == 1 || |
---|
840 | Lenstr(data.gbk.comments.others)>0) |
---|
841 | { |
---|
842 | fprintf(fp, "COMMENTS "); |
---|
843 | |
---|
844 | if(data.gbk.comments.orginf.exist==1) { |
---|
845 | fprintf(fp, "Organism information\n"); |
---|
846 | |
---|
847 | genbank_out_one_comment(fp, data.gbk.comments.orginf.source, |
---|
848 | "Source of strain: ", |
---|
849 | COMMSKINDENT, COMMCNINDENT); |
---|
850 | |
---|
851 | genbank_out_one_comment(fp, |
---|
852 | data.gbk.comments.orginf.cc, |
---|
853 | "Culture collection: ", |
---|
854 | COMMSKINDENT, COMMCNINDENT); |
---|
855 | |
---|
856 | genbank_out_one_comment(fp, |
---|
857 | data.gbk.comments.orginf.formname, |
---|
858 | "Former name: ", |
---|
859 | COMMSKINDENT, COMMCNINDENT); |
---|
860 | |
---|
861 | genbank_out_one_comment(fp, |
---|
862 | data.gbk.comments.orginf.nickname, |
---|
863 | "Alternate name: ", |
---|
864 | COMMSKINDENT, COMMCNINDENT); |
---|
865 | |
---|
866 | genbank_out_one_comment(fp, |
---|
867 | data.gbk.comments.orginf.commname, |
---|
868 | "Common name: ", |
---|
869 | COMMSKINDENT, COMMCNINDENT); |
---|
870 | |
---|
871 | genbank_out_one_comment(fp, |
---|
872 | data.gbk.comments.orginf.hostorg, |
---|
873 | "Host organism: ", |
---|
874 | COMMSKINDENT, COMMCNINDENT); |
---|
875 | |
---|
876 | if(data.gbk.comments.seqinf.exist == 1 || |
---|
877 | Lenstr(data.gbk.comments.others)>0) |
---|
878 | fprintf(fp, " "); |
---|
879 | } /* organism information */ |
---|
880 | |
---|
881 | if(data.gbk.comments.seqinf.exist==1) { |
---|
882 | |
---|
883 | fprintf(fp, |
---|
884 | "Sequence information (bases 1 to %d)\n", |
---|
885 | data.seq_length); |
---|
886 | } |
---|
887 | |
---|
888 | genbank_out_one_comment(fp, |
---|
889 | data.gbk.comments.seqinf.RDPid, |
---|
890 | "RDP ID: ", |
---|
891 | COMMSKINDENT, COMMCNINDENT); |
---|
892 | |
---|
893 | genbank_out_one_comment(fp, |
---|
894 | data.gbk.comments.seqinf.gbkentry, |
---|
895 | "Corresponding GenBank entry: ", |
---|
896 | COMMSKINDENT, COMMCNINDENT); |
---|
897 | |
---|
898 | genbank_out_one_comment(fp, |
---|
899 | data.gbk.comments.seqinf.methods, |
---|
900 | "Sequencing methods: ", |
---|
901 | COMMSKINDENT, COMMCNINDENT); |
---|
902 | |
---|
903 | if(data.gbk.comments.seqinf.comp5=='n') |
---|
904 | fprintf(fp, |
---|
905 | " 5' end complete: No\n"); |
---|
906 | |
---|
907 | else if(data.gbk.comments.seqinf.comp5=='y') |
---|
908 | fprintf(fp, |
---|
909 | " 5' end complete: Yes\n"); |
---|
910 | |
---|
911 | if(data.gbk.comments.seqinf.comp3=='n') |
---|
912 | fprintf(fp, |
---|
913 | " 3' end complete: No\n"); |
---|
914 | |
---|
915 | else if(data.gbk.comments.seqinf.comp3=='y') |
---|
916 | fprintf(fp, |
---|
917 | " 3' end complete: Yes\n"); |
---|
918 | |
---|
919 | /* print 12 spaces of the first line */ |
---|
920 | if(Lenstr(data.gbk.comments.others)>0) |
---|
921 | fprintf(fp, " "); |
---|
922 | |
---|
923 | if(Lenstr(data.gbk.comments.others)>0) { |
---|
924 | length = Lenstr(data.gbk.comments.others); |
---|
925 | for(indi=0; indi<length; indi++) |
---|
926 | { |
---|
927 | fprintf(fp, "%c", |
---|
928 | data.gbk.comments.others[indi]); |
---|
929 | |
---|
930 | /* if another line, print 12 spaces first */ |
---|
931 | if(data.gbk.comments.others[indi]=='\n' |
---|
932 | &&data.gbk.comments.others[indi+1]!='\0') |
---|
933 | |
---|
934 | fprintf(fp, " "); |
---|
935 | |
---|
936 | } |
---|
937 | } /* other comments */ |
---|
938 | } /* comment */ |
---|
939 | |
---|
940 | count_base(&base_a, &base_t, &base_g, &base_c, &base_other); |
---|
941 | |
---|
942 | /* don't write 0 others in this base line */ |
---|
943 | if(base_other>0) |
---|
944 | fprintf(fp, |
---|
945 | "BASE COUNT %6d a %6d c %6d g %6d t %6d others\n", |
---|
946 | base_a, base_c, base_g, base_t, base_other); |
---|
947 | else fprintf(fp, "BASE COUNT %6d a %6d c %6d g %6d t\n", |
---|
948 | base_a, base_c, base_g, base_t); |
---|
949 | |
---|
950 | genbank_out_origin(fp); |
---|
951 | } |
---|
952 | /* ------------------------------------------------------------ */ |
---|
953 | /* Function genbank_out_one_entry(). |
---|
954 | /* Print out key and string if string length > 1 |
---|
955 | /* otherwise print key and "No information" w/wo |
---|
956 | /* period at the end depending on flag period. |
---|
957 | */ |
---|
958 | void |
---|
959 | genbank_out_one_entry(fp, string, key, flag, patterns, period) |
---|
960 | FILE *fp; |
---|
961 | char *string, *key; |
---|
962 | int flag; |
---|
963 | char *patterns; |
---|
964 | int period; |
---|
965 | { |
---|
966 | int Lenstr(); |
---|
967 | void genbank_print_lines(); |
---|
968 | |
---|
969 | if(Lenstr(string)>1) { |
---|
970 | fprintf(fp, "%s", key); |
---|
971 | genbank_print_lines(fp, string, flag, patterns); |
---|
972 | } else if(period) |
---|
973 | fprintf(fp, "%sNo information.\n", key); |
---|
974 | else fprintf(fp, "%sNo information\n", key); |
---|
975 | } |
---|
976 | /* ------------------------------------------------------------- */ |
---|
977 | /* Function genbank_out_one_comment(). |
---|
978 | /* print out one genbank comment sub-keyword. |
---|
979 | */ |
---|
980 | void |
---|
981 | genbank_out_one_comment(fp, string, key, skindent, cnindent) |
---|
982 | FILE *fp; |
---|
983 | char *string, *key; |
---|
984 | int skindent, cnindent; /* subkeyword indent and |
---|
985 | /* continue line indent */ |
---|
986 | { |
---|
987 | int Lenstr(); |
---|
988 | void genbank_print_comment(); |
---|
989 | |
---|
990 | if(Lenstr(string)>1) |
---|
991 | genbank_print_comment(fp, key, |
---|
992 | string, skindent, cnindent); |
---|
993 | } |
---|
994 | /* -------------------------------------------------------------- */ |
---|
995 | /* Fucntion genbank_print_lines(). |
---|
996 | /* Print one grnbank line, wrap around if over |
---|
997 | /* column 80. |
---|
998 | */ |
---|
999 | void |
---|
1000 | genbank_print_lines(fp, string, flag, separators) |
---|
1001 | FILE *fp; |
---|
1002 | char *string; |
---|
1003 | int flag; |
---|
1004 | char *separators; |
---|
1005 | { |
---|
1006 | int first_time=1, indi, indj, indk, indl; |
---|
1007 | int ibuf, len, last_word(), is_separator(); |
---|
1008 | |
---|
1009 | len = Lenstr(string)-1; |
---|
1010 | /* indi: first char of the line */ |
---|
1011 | /* num of char, excluding the first char, of the line */ |
---|
1012 | for(indi=0; indi<len; indi+=(indj+1)) { |
---|
1013 | indj=GBMAXCHAR; |
---|
1014 | if((Lenstr(string+indi))>GBMAXCHAR) { |
---|
1015 | |
---|
1016 | /* search for proper termination of a line */ |
---|
1017 | |
---|
1018 | ibuf = indj; |
---|
1019 | |
---|
1020 | for(;indj>0 |
---|
1021 | &&((!flag&&!last_word(string[indj+indi])) |
---|
1022 | ||(flag&&!is_separator |
---|
1023 | (string[indj+indi], separators))); |
---|
1024 | indj--); |
---|
1025 | |
---|
1026 | if(indj==0) indj=ibuf; |
---|
1027 | else if(string[indi+indj+1]==' ') indj++; |
---|
1028 | |
---|
1029 | /* print left margine */ |
---|
1030 | if(!first_time) |
---|
1031 | fprintf(fp, " "); |
---|
1032 | else first_time = 0; |
---|
1033 | |
---|
1034 | for(indk=0; indk<indj; indk++) |
---|
1035 | fprintf(fp, "%c", string[indi+indk]); |
---|
1036 | |
---|
1037 | /* leave out the last space, if there is any */ |
---|
1038 | if(string[indi+indj]!=' '&&string[indi+indj]!='\n') |
---|
1039 | fprintf(fp, "%c", string[indi+indj]); |
---|
1040 | fprintf(fp, "\n"); |
---|
1041 | |
---|
1042 | } else if(first_time) |
---|
1043 | fprintf(fp, "%s", string+indi); |
---|
1044 | else fprintf(fp, |
---|
1045 | " %s", string+indi); |
---|
1046 | } |
---|
1047 | } |
---|
1048 | /* -------------------------------------------------------------- */ |
---|
1049 | /* Fucntion genbank_print_comment(). |
---|
1050 | /* Print one grnbank line, wrap around if over |
---|
1051 | /* column 80. |
---|
1052 | */ |
---|
1053 | void |
---|
1054 | genbank_print_comment(fp, key, string, offset, indent) |
---|
1055 | FILE *fp; |
---|
1056 | char *key, *string; |
---|
1057 | int offset, indent; |
---|
1058 | { |
---|
1059 | int first_time=1, indi, indj, indk, indl; |
---|
1060 | int len, last_word(); |
---|
1061 | |
---|
1062 | len = Lenstr(string)-1; |
---|
1063 | for(indi=0; indi<len; indi+=(indj+1)) { |
---|
1064 | |
---|
1065 | if(first_time) |
---|
1066 | indj=GBMAXCHAR-offset-Lenstr(key)-1; |
---|
1067 | else indj=GBMAXCHAR-offset-indent-1; |
---|
1068 | |
---|
1069 | fprintf(fp, " "); |
---|
1070 | |
---|
1071 | if(!first_time) { |
---|
1072 | for(indl=0; indl<(offset+indent); indl++) |
---|
1073 | fprintf(fp, " "); |
---|
1074 | } else { |
---|
1075 | for(indl=0; indl<offset; indl++) |
---|
1076 | fprintf(fp, " "); |
---|
1077 | fprintf(fp, "%s", key); |
---|
1078 | first_time = 0; |
---|
1079 | } |
---|
1080 | if(Lenstr(string+indi)>indj) { |
---|
1081 | |
---|
1082 | /* search for proper termination of a line */ |
---|
1083 | for(;indj>=0&&!last_word(string[indj+indi]); |
---|
1084 | indj--) ; |
---|
1085 | |
---|
1086 | /* print left margine */ |
---|
1087 | if(string[indi]==' ') indk = 1; |
---|
1088 | else indk = 0; |
---|
1089 | |
---|
1090 | for(; indk<indj; indk++) |
---|
1091 | fprintf(fp, "%c", string[indi+indk]); |
---|
1092 | |
---|
1093 | /* leave out the last space, if there is any */ |
---|
1094 | if(string[indi+indj]!=' ') |
---|
1095 | fprintf(fp, "%c", string[indi+indj]); |
---|
1096 | fprintf(fp, "\n"); |
---|
1097 | |
---|
1098 | } else fprintf(fp, "%s", string+indi); |
---|
1099 | |
---|
1100 | } /* for each char */ |
---|
1101 | } |
---|
1102 | /* --------------------------------------------------------------- */ |
---|
1103 | /* Fcuntion genbank_out_origin(). |
---|
1104 | /* Output sequence data in genbank format. |
---|
1105 | */ |
---|
1106 | void |
---|
1107 | genbank_out_origin(fp) |
---|
1108 | FILE *fp; |
---|
1109 | { |
---|
1110 | |
---|
1111 | int indi, indj, indk; |
---|
1112 | |
---|
1113 | fprintf(fp, "ORIGIN\n"); |
---|
1114 | |
---|
1115 | for(indi=0, indj=0, indk=1; indi<data.seq_length; indi++) |
---|
1116 | { |
---|
1117 | if((indk % 60)==1) fprintf(fp, " %6d ", indk); |
---|
1118 | fprintf(fp, "%c", data.sequence[indi]); |
---|
1119 | indj++; |
---|
1120 | |
---|
1121 | /* blank space follows every 10 bases, |
---|
1122 | /* but not before '\n' */ |
---|
1123 | if((indk % 60)==0) { fprintf(fp, "\n"); indj=0; } |
---|
1124 | else if(indj==10&&indi!=(data.seq_length-1)) |
---|
1125 | { fprintf(fp, " "); indj=0; } |
---|
1126 | indk++; |
---|
1127 | } |
---|
1128 | |
---|
1129 | if((indk % 60)!=1) fprintf(fp, "\n"); |
---|
1130 | fprintf(fp, "//\n"); |
---|
1131 | } |
---|
1132 | /* ----------------------------------------------------------- */ |
---|
1133 | /* Function genbank_to_genbank(). |
---|
1134 | /* Convert from genbank to genbank. |
---|
1135 | */ |
---|
1136 | void |
---|
1137 | genbank_to_genbank(inf, outf) |
---|
1138 | char *inf, *outf; |
---|
1139 | { |
---|
1140 | FILE *ifp, *ofp, *fopen(); |
---|
1141 | char genbank_in(), temp[TOKENNUM]; |
---|
1142 | char *Dupstr(); |
---|
1143 | void init(); |
---|
1144 | void init_genbank(), error(); |
---|
1145 | int gtoe(); |
---|
1146 | |
---|
1147 | if((ifp=fopen(inf, "r"))==NULL) { |
---|
1148 | sprintf(temp, |
---|
1149 | "Cannot open input file %s, exit\n", inf); |
---|
1150 | error(35, temp); |
---|
1151 | } |
---|
1152 | if((ofp=fopen(outf, "w"))==NULL) { |
---|
1153 | sprintf(temp, |
---|
1154 | "Cannot open output file %s, exit\n", outf); |
---|
1155 | error(36, temp); |
---|
1156 | } |
---|
1157 | init(); |
---|
1158 | init_genbank(); |
---|
1159 | rewind(ifp); |
---|
1160 | while(genbank_in(ifp)!=EOF) { |
---|
1161 | data.numofseq++; |
---|
1162 | genbank_out(ofp); |
---|
1163 | init_genbank(); |
---|
1164 | } |
---|
1165 | |
---|
1166 | #ifdef log |
---|
1167 | fprintf(stderr, |
---|
1168 | "Total %d sequences have been processed\n", |
---|
1169 | data.numofseq); |
---|
1170 | #endif |
---|
1171 | |
---|
1172 | fclose(ifp); fclose(ofp); |
---|
1173 | } |
---|
1174 | /* ----------------------------------------------------------- */ |
---|
1175 | /* Function init_reference(). |
---|
1176 | /* Init. new reference record(init. value is "\n"). |
---|
1177 | */ |
---|
1178 | void |
---|
1179 | init_reference(ref, flag) |
---|
1180 | Reference *ref; |
---|
1181 | int flag; |
---|
1182 | { |
---|
1183 | char *Dupstr(); |
---|
1184 | |
---|
1185 | if(flag==REF) ref->ref = Dupstr("\n"); |
---|
1186 | if(flag!=AUTHOR) ref->author = Dupstr("\n"); |
---|
1187 | if(flag!=JOURNAL) ref->journal = Dupstr("\n"); |
---|
1188 | if(flag!=TITLE) ref->title = Dupstr("\n"); |
---|
1189 | if(flag!=STANDARD) ref->standard = Dupstr("\n"); |
---|
1190 | } |
---|