1 | // genbank and Macke converting program |
---|
2 | |
---|
3 | #include "genbank.h" |
---|
4 | #include "macke.h" |
---|
5 | |
---|
6 | static int paren_string(char *line, char *pstring, int index) { |
---|
7 | int len = str0len(line); |
---|
8 | int paren_num = 0; |
---|
9 | int indk; |
---|
10 | |
---|
11 | for (indk = 0; index < len; index++) { |
---|
12 | if (paren_num >= 1) |
---|
13 | pstring[indk++] = line[index]; |
---|
14 | if (line[index] == '(') |
---|
15 | paren_num++; |
---|
16 | if (line[index] == ')') |
---|
17 | paren_num--; |
---|
18 | } |
---|
19 | if (indk == 0) |
---|
20 | return -1; |
---|
21 | pstring[--indk] = '\0'; |
---|
22 | return index; |
---|
23 | } |
---|
24 | |
---|
25 | static void get_atcc_string(const char *line, char *temp, int index) { |
---|
26 | // Get the rest of the line until reaching certain terminators, such as ';', ',', '.',... |
---|
27 | |
---|
28 | int len = str0len(line); |
---|
29 | int paren_num = 0; |
---|
30 | int indk; |
---|
31 | |
---|
32 | for (indk = 0; index < len; index++, indk++) { |
---|
33 | temp[indk] = line[index]; |
---|
34 | if (temp[indk] == '(') |
---|
35 | paren_num++; |
---|
36 | if (temp[indk] == ')') |
---|
37 | if (paren_num == 0) |
---|
38 | break; |
---|
39 | else |
---|
40 | paren_num--; |
---|
41 | else if (paren_num == 0 && (temp[indk] == ';' || temp[indk] == '.' || temp[indk] == ',' || temp[indk] == '/' || temp[indk] == '\n')) |
---|
42 | break; |
---|
43 | } |
---|
44 | temp[indk] = '\0'; |
---|
45 | } |
---|
46 | |
---|
47 | static char *get_atcc(const Macke& macke, char *source) { |
---|
48 | static int cc_num = 16; |
---|
49 | static const char *CC[16] = { |
---|
50 | "ATCC", "CCM", "CDC", "CIP", "CNCTC", |
---|
51 | "DSM", "EPA", "JCM", "NADC", "NCDO", "NCTC", "NRCC", |
---|
52 | "NRRL", "PCC", "USDA", "VPI" |
---|
53 | }; |
---|
54 | |
---|
55 | int indi, indj, index; |
---|
56 | int length; |
---|
57 | char buffer[LONGTEXT], temp[LONGTEXT+5], pstring[LONGTEXT]; |
---|
58 | char atcc[LONGTEXT]; |
---|
59 | |
---|
60 | atcc[0] = '\0'; |
---|
61 | for (indi = 0; indi < cc_num; indi++) { |
---|
62 | index = 0; |
---|
63 | while ((index = paren_string(source, pstring, index)) > 0) { |
---|
64 | if ((indj = find_pattern(pstring, CC[indi])) >= 0) { |
---|
65 | // skip the key word |
---|
66 | indj += str0len(CC[indi]); |
---|
67 | // skip blank spaces |
---|
68 | indj = Skip_white_space(pstring, indj); |
---|
69 | // get strain |
---|
70 | get_atcc_string(pstring, buffer, indj); |
---|
71 | sprintf(temp, "%s %s", CC[indi], buffer); |
---|
72 | length = str0len(atcc); |
---|
73 | if (length > 0) { |
---|
74 | atcc[length] = '\0'; |
---|
75 | strcat(atcc, ", "); |
---|
76 | } |
---|
77 | strcat(atcc, temp); |
---|
78 | } |
---|
79 | } |
---|
80 | } |
---|
81 | // append eoln to the atcc string |
---|
82 | length = str0len(atcc); |
---|
83 | if (macke.atcc) { |
---|
84 | macke.atcc[length] = '\0'; |
---|
85 | } |
---|
86 | strcat(atcc, "\n"); |
---|
87 | return nulldup(atcc); |
---|
88 | } |
---|
89 | |
---|
90 | static char *genbank_get_atcc(const GenBank& gbk, const Macke& macke) { |
---|
91 | // Get atcc from SOURCE line in Genbank data file. |
---|
92 | char *atcc = NULp; |
---|
93 | |
---|
94 | // get culture collection # |
---|
95 | if (has_content(gbk.source)) { |
---|
96 | atcc = get_atcc(macke, gbk.source); |
---|
97 | } |
---|
98 | if (!has_content(atcc) && has_content(macke.strain)) { |
---|
99 | // add () to macke strain to be processed correctly |
---|
100 | char temp[LONGTEXT]; |
---|
101 | sprintf(temp, "(%s)", macke.strain); |
---|
102 | atcc = get_atcc(macke, temp); |
---|
103 | } |
---|
104 | return atcc; |
---|
105 | } |
---|
106 | |
---|
107 | void Macke::add_35end_remark(char end35, char yn) { |
---|
108 | if (yn == ' ') return; |
---|
109 | |
---|
110 | char *content = strf("%c' end complete: %s\n", end35, yn == 'y' ? "Yes" : "No"); |
---|
111 | add_remark(content); |
---|
112 | free(content); |
---|
113 | } |
---|
114 | |
---|
115 | void Macke::add_remarks_from(const GenbankRef& ref) { |
---|
116 | add_remark_if_content("ref:", ref.ref); |
---|
117 | add_remark_if_content("auth:", ref.author); |
---|
118 | add_remark_if_content("jour:", ref.journal); |
---|
119 | add_remark_if_content("title:", ref.title); |
---|
120 | add_remark_if_content("standard:", ref.standard); |
---|
121 | } |
---|
122 | |
---|
123 | void Macke::add_remarks_from(const OrgInfo& orginf) { |
---|
124 | add_remark_if_content("Source of strain:", orginf.source); // copy source of strain |
---|
125 | add_remark_if_content("Former name:", orginf.formname); // copy former name |
---|
126 | add_remark_if_content("Alternate name:", orginf.nickname); // copy alternate name |
---|
127 | add_remark_if_content("Common name:", orginf.commname); // copy common name |
---|
128 | add_remark_if_content("Host organism:", orginf.hostorg); // copy host organism |
---|
129 | } |
---|
130 | |
---|
131 | void Macke::add_remarks_from(const RDP_comments& comments) { |
---|
132 | add_remarks_from(comments.orginf); |
---|
133 | add_remarks_from(comments.seqinf); |
---|
134 | |
---|
135 | // other comments, not RDP DataBase specially defined |
---|
136 | int len = str0len(comments.others); |
---|
137 | if (len > 0) { |
---|
138 | for (int indi = 0, indj = 0; indi < len; indi++) { |
---|
139 | char temp[LONGTEXT]; |
---|
140 | temp[indj++] = comments.others[indi]; |
---|
141 | if (comments.others[indi] == '\n' || comments.others[indi] == '\0') { |
---|
142 | temp[indj] = '\0'; |
---|
143 | add_remark(temp); |
---|
144 | indj = 0; |
---|
145 | } |
---|
146 | } |
---|
147 | } |
---|
148 | } |
---|
149 | |
---|
150 | void Macke::add_remarks_from(const SeqInfo& seqinf) { |
---|
151 | add_remark_if_content("RDP ID:", seqinf.RDPid); // copy RDP ID |
---|
152 | add_remark_if_content("Sequencing methods:", seqinf.methods); // copy methods |
---|
153 | |
---|
154 | add_35end_remark('3', seqinf.comp3); |
---|
155 | add_35end_remark('5', seqinf.comp5); |
---|
156 | } |
---|
157 | |
---|
158 | void Macke::add_remarks_from(const GenBank& gbk) { |
---|
159 | // Create Macke remarks. |
---|
160 | |
---|
161 | // REFERENCE the first reference |
---|
162 | if (gbk.has_refs()) |
---|
163 | add_remark_if_content("ref:", gbk.get_ref(0).ref); |
---|
164 | |
---|
165 | // The rest of the REFERENCES |
---|
166 | for (int indi = 1; indi < gbk.get_refcount(); indi++) { |
---|
167 | add_remarks_from(gbk.get_ref(indi)); |
---|
168 | } |
---|
169 | |
---|
170 | add_remark_if_content("KEYWORDS:", gbk.keywords); // copy keywords as remark |
---|
171 | add_remark_if_content("GenBank ACCESSION:", gbk.accession); // copy accession as remark when genbank entry also exists. |
---|
172 | add_remarks_from(gbk.comments); |
---|
173 | } |
---|
174 | |
---|
175 | static void correct_subspecies(char *subspecies) { |
---|
176 | // Remove the strain information in subspecies which is sometime mistakenly written into it. |
---|
177 | int indj; |
---|
178 | |
---|
179 | if ((indj = find_pattern(subspecies, "str\n")) >= 0 || (indj = find_strain(subspecies, 0)) >= 0) { |
---|
180 | ca_assert(subspecies[indj-1] == ' '); // assume to overwrite a space |
---|
181 | subspecies[indj - 1] = '\n'; |
---|
182 | subspecies[indj] = '\0'; |
---|
183 | } |
---|
184 | } |
---|
185 | |
---|
186 | static void check_consistency(const char *what, char* const& var, const char *New) { |
---|
187 | if (has_content(var)) { |
---|
188 | if (!str_equal(var, New)) { |
---|
189 | warningf(20, "Inconsistent %s definitions detected:\n" |
---|
190 | " %s" |
---|
191 | "and %s", what, var, New); |
---|
192 | } |
---|
193 | } |
---|
194 | else { |
---|
195 | strcpy(var, New); |
---|
196 | } |
---|
197 | } |
---|
198 | |
---|
199 | static void get_string(char *temp, const char *line, int index) { |
---|
200 | // Get the rest of the line until reaching certain terminators, |
---|
201 | // such as ';', ',', '.',... |
---|
202 | // Always append "\n" at the end of the result. |
---|
203 | |
---|
204 | index = Skip_white_space(line, index); |
---|
205 | |
---|
206 | int len = str0len(line); |
---|
207 | int paren_num = 0; |
---|
208 | int indk; |
---|
209 | |
---|
210 | for (indk = 0; index < len; index++, indk++) { |
---|
211 | temp[indk] = line[index]; |
---|
212 | if (temp[indk] == '(') |
---|
213 | paren_num++; |
---|
214 | if (temp[indk] == ')') |
---|
215 | if (paren_num == 0) |
---|
216 | break; |
---|
217 | else |
---|
218 | paren_num--; |
---|
219 | else if (temp[indk] == '\n' || (paren_num == 0 && temp[indk] == ';')) |
---|
220 | break; |
---|
221 | } |
---|
222 | if (indk > 1 && is_end_mark(temp[indk - 1])) |
---|
223 | indk--; |
---|
224 | temp[indk++] = '\n'; |
---|
225 | temp[indk] = '\0'; |
---|
226 | } |
---|
227 | |
---|
228 | static void copy_subspecies_and_check_consistency(char* const& subspecies, const char *from, int indj) { |
---|
229 | char temp[LONGTEXT]; |
---|
230 | get_string(temp, from, indj); |
---|
231 | correct_subspecies(temp); |
---|
232 | check_consistency("subspecies", subspecies, temp); |
---|
233 | } |
---|
234 | static void copy_strain_and_check_consistency(char* const& strain, const char *from, int indj) { |
---|
235 | char temp[LONGTEXT]; |
---|
236 | get_string(temp, from, indj); |
---|
237 | check_consistency("strain", strain, temp); |
---|
238 | } |
---|
239 | |
---|
240 | static void check_strain_from(char* const& strain, const char *from) { |
---|
241 | if (has_content(from)) { |
---|
242 | int indj = skip_strain(from, ' '); |
---|
243 | if (indj >= 0) copy_strain_and_check_consistency(strain, from, indj); |
---|
244 | } |
---|
245 | } |
---|
246 | |
---|
247 | static char *genbank_get_strain(const GenBank& gbk) { |
---|
248 | // Get strain from DEFINITION, COMMENT or SOURCE line in Genbank data file. |
---|
249 | char strain[LONGTEXT]; |
---|
250 | |
---|
251 | strain[0] = '\0'; |
---|
252 | |
---|
253 | if (has_content(gbk.comments.others)) { |
---|
254 | int indj = find_pattern(gbk.comments.others, "*source:"); |
---|
255 | if (indj >= 0) { |
---|
256 | int indk = skip_pattern(gbk.comments.others + indj, "strain="); |
---|
257 | if (indk >= 0) copy_strain_and_check_consistency(strain, gbk.comments.others, indj+indk); |
---|
258 | } |
---|
259 | } |
---|
260 | |
---|
261 | check_strain_from(strain, gbk.definition); |
---|
262 | check_strain_from(strain, gbk.source); |
---|
263 | |
---|
264 | return nulldup(strain); |
---|
265 | } |
---|
266 | |
---|
267 | static char *genbank_get_subspecies(const GenBank& gbk) { |
---|
268 | // Get subspecies information from SOURCE, DEFINITION, or COMMENT line of Genbank data file. |
---|
269 | int indj; |
---|
270 | char subspecies[LONGTEXT]; |
---|
271 | |
---|
272 | subspecies[0] = '\0'; |
---|
273 | |
---|
274 | if (has_content(gbk.definition)) { |
---|
275 | if ((indj = skip_pattern(gbk.definition, "subsp. ")) >= 0) { |
---|
276 | copy_subspecies_and_check_consistency(subspecies, gbk.definition, indj); |
---|
277 | } |
---|
278 | } |
---|
279 | if (has_content(gbk.comments.others)) { |
---|
280 | if ((indj = find_pattern(gbk.comments.others, "*source:")) >= 0) { |
---|
281 | int indk = skip_subspecies(gbk.comments.others + indj, '='); |
---|
282 | if (indk >= 0) { |
---|
283 | copy_subspecies_and_check_consistency(subspecies, gbk.comments.others, indj+indk); |
---|
284 | } |
---|
285 | } |
---|
286 | } |
---|
287 | |
---|
288 | if (has_content(gbk.source)) { |
---|
289 | if ((indj = skip_subspecies(gbk.source, ' ')) >= 0) { |
---|
290 | copy_subspecies_and_check_consistency(subspecies, gbk.source, indj); |
---|
291 | } |
---|
292 | } |
---|
293 | |
---|
294 | return nulldup(subspecies); |
---|
295 | } |
---|
296 | |
---|
297 | static void mtog_decode_ref_and_remarks(const Macke& macke, GenBank& gbk) { |
---|
298 | // Decode remarks of Macke to GenBank format. |
---|
299 | ca_assert(gbk.get_refcount() == 0); |
---|
300 | |
---|
301 | if (has_content(macke.author)) freedup(gbk.get_new_ref().author, macke.author); |
---|
302 | if (has_content(macke.journal)) freedup(gbk.get_latest_ref().journal, macke.journal); |
---|
303 | if (has_content(macke.title)) freedup(gbk.get_latest_ref().title, macke.title); |
---|
304 | |
---|
305 | bool first_ref = true; |
---|
306 | |
---|
307 | RDP_comments& comments = gbk.comments; |
---|
308 | OrgInfo& orginf = comments.orginf; |
---|
309 | SeqInfo& seqinf = comments.seqinf; |
---|
310 | |
---|
311 | for (int ridx = 0; ridx < macke.get_rem_count(); ridx++) { |
---|
312 | char key[TOKENSIZE]; |
---|
313 | int offset = macke_key_word(macke.get_rem(ridx), 0, key); |
---|
314 | |
---|
315 | if (str_equal(key, "ref")) { |
---|
316 | GenbankRef& ref = first_ref ? gbk.get_latest_ref() : gbk.get_new_ref(); |
---|
317 | freeset(ref.ref, macke.copy_multi_rem(ridx, offset)); |
---|
318 | first_ref = false; |
---|
319 | } |
---|
320 | else if (str_equal(key, "auth")) { |
---|
321 | freeset(gbk.get_latest_ref().author, macke.copy_multi_rem(ridx, offset)); |
---|
322 | } |
---|
323 | else if (str_equal(key, "title")) { |
---|
324 | freeset(gbk.get_latest_ref().title, macke.copy_multi_rem(ridx, offset)); |
---|
325 | } |
---|
326 | else if (str_equal(key, "jour")) { |
---|
327 | freeset(gbk.get_latest_ref().journal, macke.copy_multi_rem(ridx, offset)); |
---|
328 | } |
---|
329 | else if (str_equal(key, "standard")) { |
---|
330 | freeset(gbk.get_latest_ref().standard, macke.copy_multi_rem(ridx, offset)); |
---|
331 | } |
---|
332 | else if (str_equal(key, "KEYWORDS")) { |
---|
333 | freeset(gbk.keywords, macke.copy_multi_rem(ridx, offset)); |
---|
334 | terminate_with(gbk.keywords, '.'); |
---|
335 | } |
---|
336 | else if (str_equal(key, "GenBank ACCESSION")) { |
---|
337 | freeset(gbk.accession, macke.copy_multi_rem(ridx, offset)); |
---|
338 | } |
---|
339 | else if (str_equal(key, "Source of strain")) { |
---|
340 | freeset(orginf.source, macke.copy_multi_rem(ridx, offset)); |
---|
341 | } |
---|
342 | else if (str_equal(key, "Former name")) { |
---|
343 | freeset(orginf.formname, macke.copy_multi_rem(ridx, offset)); |
---|
344 | } |
---|
345 | else if (str_equal(key, "Alternate name")) { |
---|
346 | freeset(orginf.nickname, macke.copy_multi_rem(ridx, offset)); |
---|
347 | } |
---|
348 | else if (str_equal(key, "Common name")) { |
---|
349 | freeset(orginf.commname, macke.copy_multi_rem(ridx, offset)); |
---|
350 | } |
---|
351 | else if (str_equal(key, "Host organism")) { |
---|
352 | freeset(orginf.hostorg, macke.copy_multi_rem(ridx, offset)); |
---|
353 | } |
---|
354 | else if (str_equal(key, "RDP ID")) { |
---|
355 | freeset(seqinf.RDPid, macke.copy_multi_rem(ridx, offset)); |
---|
356 | } |
---|
357 | else if (str_equal(key, "Sequencing methods")) { |
---|
358 | freeset(seqinf.methods, macke.copy_multi_rem(ridx, offset)); |
---|
359 | } |
---|
360 | else if (str_equal(key, "3' end complete")) { |
---|
361 | scan_token_or_die(key, macke.get_rem(ridx) + offset); |
---|
362 | seqinf.comp3 = str_equal(key, "Yes") ? 'y' : 'n'; |
---|
363 | } |
---|
364 | else if (str_equal(key, "5' end complete")) { |
---|
365 | scan_token_or_die(key, macke.get_rem(ridx) + offset); |
---|
366 | seqinf.comp5 = str_equal(key, "Yes") ? 'y' : 'n'; |
---|
367 | } |
---|
368 | else { // other (non-interpreted) comments |
---|
369 | Append(comments.others, macke.get_rem(ridx)); |
---|
370 | } |
---|
371 | } |
---|
372 | } |
---|
373 | |
---|
374 | static void mtog_genbank_def_and_source(const Macke& macke, GenBank& gbk) { |
---|
375 | // Define GenBank DEFINITION and SOURCE lines the way RDP group likes. |
---|
376 | copy_content(gbk.definition, macke.name); |
---|
377 | if (has_content(macke.subspecies)) { |
---|
378 | if (!has_content(gbk.definition)) { |
---|
379 | warning(22, "Genus and Species not defined"); |
---|
380 | skip_eolnl_and_append(gbk.definition, "subsp. "); |
---|
381 | } |
---|
382 | else |
---|
383 | skip_eolnl_and_append(gbk.definition, " subsp. "); |
---|
384 | |
---|
385 | Append(gbk.definition, macke.subspecies); |
---|
386 | } |
---|
387 | |
---|
388 | if (has_content(macke.strain)) { |
---|
389 | if (!has_content(gbk.definition)) { |
---|
390 | warning(23, "Genus and Species and Subspecies not defined"); |
---|
391 | skip_eolnl_and_append(gbk.definition, "str. "); |
---|
392 | } |
---|
393 | else |
---|
394 | skip_eolnl_and_append(gbk.definition, " str. "); |
---|
395 | |
---|
396 | Append(gbk.definition, macke.strain); |
---|
397 | } |
---|
398 | |
---|
399 | // create SOURCE line, temp. |
---|
400 | if (copy_content(gbk.source, gbk.definition)) terminate_with(gbk.source, '.'); |
---|
401 | |
---|
402 | // append keyword to definition, if there is keyword. |
---|
403 | if (has_content(gbk.keywords)) { |
---|
404 | if (has_content(gbk.definition)) |
---|
405 | skip_eolnl_and_append(gbk.definition, "; \n"); |
---|
406 | |
---|
407 | // Here keywords must be ended by a '.' already |
---|
408 | skip_eolnl_and_append(gbk.definition, gbk.keywords); |
---|
409 | } |
---|
410 | else |
---|
411 | skip_eolnl_and_append(gbk.definition, ".\n"); |
---|
412 | } |
---|
413 | |
---|
414 | int mtog(const Macke& macke, GenBank& gbk, const Seq& seq) { // __ATTR__USERESULT |
---|
415 | // Convert Macke format to Genbank format. |
---|
416 | int indi; |
---|
417 | char temp[LONGTEXT]; |
---|
418 | |
---|
419 | strcpy(temp, macke.seqabbr); |
---|
420 | |
---|
421 | for (indi = str0len(temp); indi < 13; temp[indi++] = ' ') {} |
---|
422 | |
---|
423 | if (has_content(macke.date)) |
---|
424 | sprintf((temp + 10), "%7d bp RNA RNA %s\n", seq.get_len(), genbank_date(macke.date)); |
---|
425 | else |
---|
426 | sprintf((temp + 10), "%7d bp RNA RNA %s\n", seq.get_len(), genbank_date(today_date())); |
---|
427 | |
---|
428 | freedup(gbk.locus, temp); |
---|
429 | |
---|
430 | // GenBank ORGANISM |
---|
431 | if (copy_content(gbk.organism, macke.name)) terminate_with(gbk.organism, '.'); |
---|
432 | |
---|
433 | RDP_comments& comments = gbk.comments; |
---|
434 | OrgInfo& orginf = comments.orginf; |
---|
435 | SeqInfo& seqinf = comments.seqinf; |
---|
436 | |
---|
437 | copy_content(seqinf.methods, macke.rna); |
---|
438 | |
---|
439 | if (!copy_content(seqinf.gbkentry, macke.acs)) |
---|
440 | copy_content(seqinf.gbkentry, macke.nbk); |
---|
441 | |
---|
442 | copy_content(orginf.cultcoll, macke.atcc); |
---|
443 | mtog_decode_ref_and_remarks(macke, gbk); |
---|
444 | |
---|
445 | // final conversion of cultcoll |
---|
446 | if (!has_content(orginf.cultcoll)) copy_content(orginf.cultcoll, macke.atcc); |
---|
447 | |
---|
448 | // define GenBank DEFINITION, after GenBank KEYWORD is defined. |
---|
449 | mtog_genbank_def_and_source(macke, gbk); |
---|
450 | |
---|
451 | return 1; |
---|
452 | } |
---|
453 | |
---|
454 | int gtom(const GenBank& gbk, Macke& macke) { // __ATTR__USERESULT |
---|
455 | // Convert from Genbank format to Macke format. |
---|
456 | |
---|
457 | // copy sequence abbr, assume every entry in gbk must end with \n\0 |
---|
458 | // no '\n' at the end of the string |
---|
459 | { |
---|
460 | char temp[LONGTEXT]; |
---|
461 | genbank_key_word(gbk.locus, 0, temp); |
---|
462 | freedup(macke.seqabbr, temp); |
---|
463 | } |
---|
464 | |
---|
465 | // copy name and definition |
---|
466 | if (!copy_content(macke.name, gbk.organism) && has_content(gbk.definition)) { |
---|
467 | char genus[TOKENSIZE]; |
---|
468 | char species[TOKENSIZE]; |
---|
469 | |
---|
470 | ASSERT_RESULT(int, 2, sscanf(gbk.definition, "%s %s", genus, species)); |
---|
471 | |
---|
472 | int last = str0len(species)-1; |
---|
473 | if (species[last] == ';') species[last] = '\0'; |
---|
474 | |
---|
475 | freeset(macke.name, strf("%s %s\n", genus, species)); |
---|
476 | } |
---|
477 | |
---|
478 | const OrgInfo& orginf = gbk.comments.orginf; |
---|
479 | const SeqInfo& seqinf = gbk.comments.seqinf; |
---|
480 | |
---|
481 | copy_content(macke.atcc, orginf.cultcoll); // copy cultcoll name and number |
---|
482 | copy_content(macke.rna, seqinf.methods); // copy rna(methods) |
---|
483 | |
---|
484 | freeset(macke.date, gbk.get_date()); Append(macke.date, "\n"); |
---|
485 | |
---|
486 | // copy genbank entry (gbkentry has higher priority than gbk.accession) |
---|
487 | if (!copy_content(macke.acs, seqinf.gbkentry)) { |
---|
488 | char buffer[TOKENSIZE]; |
---|
489 | if (has_content(gbk.accession) && !str_equal(gbk.accession, "No information\n")) { |
---|
490 | scan_token_or_die(buffer, gbk.accession); |
---|
491 | strcat(buffer, "\n"); |
---|
492 | } |
---|
493 | else { |
---|
494 | strcpy(buffer, "\n"); |
---|
495 | } |
---|
496 | freedup(macke.acs, buffer); |
---|
497 | } |
---|
498 | |
---|
499 | // copy the first reference from GenBank to Macke |
---|
500 | if (gbk.has_refs()) { |
---|
501 | copy_content(macke.author, gbk.get_ref(0).author); |
---|
502 | copy_content(macke.journal, gbk.get_ref(0).journal); |
---|
503 | copy_content(macke.title, gbk.get_ref(0).title); |
---|
504 | } |
---|
505 | // the rest of references are put into remarks, rem:..... |
---|
506 | macke.add_remarks_from(gbk); |
---|
507 | |
---|
508 | // adjust the strain, subspecies, and atcc information |
---|
509 | freeset(macke.strain, genbank_get_strain(gbk)); |
---|
510 | freeset(macke.subspecies, genbank_get_subspecies(gbk)); |
---|
511 | if (!has_content(macke.atcc)) { |
---|
512 | freeset(macke.atcc, genbank_get_atcc(gbk, macke)); |
---|
513 | } |
---|
514 | |
---|
515 | return 1; |
---|
516 | } |
---|