| 1 | // -------------- genbank related subroutines ----------------- |
|---|
| 2 | |
|---|
| 3 | #include "genbank.h" |
|---|
| 4 | #include "wrap.h" |
|---|
| 5 | |
|---|
| 6 | #define NOPERIOD 0 |
|---|
| 7 | #define PERIOD 1 |
|---|
| 8 | |
|---|
| 9 | void genbank_key_word(const char *line, int index, char *key) { |
|---|
| 10 | ca_assert((GBINDENT-index) >= 0); |
|---|
| 11 | int len = parse_key_word(line+index, key, " \t\n"); |
|---|
| 12 | if ((index+len) >= GBINDENT) { |
|---|
| 13 | key[GBINDENT-index] = 0; |
|---|
| 14 | } |
|---|
| 15 | } |
|---|
| 16 | |
|---|
| 17 | static int genbank_check_blanks(const char *line, int numb) { |
|---|
| 18 | // Check if there is (numb) of blanks at beginning of line. |
|---|
| 19 | int blank = 1, indi, indk; |
|---|
| 20 | |
|---|
| 21 | for (indi = 0; blank && indi < numb; indi++) { |
|---|
| 22 | if (line[indi] != ' ' && line[indi] != '\t') |
|---|
| 23 | blank = 0; |
|---|
| 24 | if (line[indi] == '\t') { |
|---|
| 25 | indk = indi / 8 + 1; |
|---|
| 26 | indi = 8 * indk + 1; |
|---|
| 27 | } |
|---|
| 28 | } |
|---|
| 29 | |
|---|
| 30 | return blank; |
|---|
| 31 | } |
|---|
| 32 | |
|---|
| 33 | static void genbank_continue_line(char*& Str, int numb, Reader& reader) { |
|---|
| 34 | // if following line(s) are continued line(s), append them to 'Str'. |
|---|
| 35 | // if 'Str' is NULp, lines only get skipped. |
|---|
| 36 | // 'numb' = number of blanks needed at BOL to defined continued lines |
|---|
| 37 | |
|---|
| 38 | // check continue lines |
|---|
| 39 | for (++reader; |
|---|
| 40 | reader.line() && (genbank_check_blanks(reader.line(), numb) || reader.line()[0] == '\n'); |
|---|
| 41 | ++reader) |
|---|
| 42 | { |
|---|
| 43 | if (reader.line()[0] != '\n') { // empty line is allowed |
|---|
| 44 | if (Str) { |
|---|
| 45 | // remove end-of-line, if there is any |
|---|
| 46 | int ind = Skip_white_space(reader.line(), 0); |
|---|
| 47 | char temp[LINESIZE]; |
|---|
| 48 | strcpy(temp, (reader.line() + ind)); |
|---|
| 49 | skip_eolnl_and_append_spaced(Str, temp); |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | static void genbank_one_entry_in(char*& datastring, Reader& reader) { |
|---|
| 56 | freedup(datastring, reader.line()+Skip_white_space(reader.line(), GBINDENT)); |
|---|
| 57 | return genbank_continue_line(datastring, GBINDENT, reader); |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | static void genbank_one_comment_entry(char*& datastring, int start_index, Reader& reader) { |
|---|
| 61 | // Read in one genbank sub-entry in comments lines. |
|---|
| 62 | freedup(datastring, reader.line() + Skip_white_space(reader.line(), start_index)); |
|---|
| 63 | genbank_continue_line(datastring, 20, reader); |
|---|
| 64 | } |
|---|
| 65 | |
|---|
| 66 | static void genbank_source(GenBank& gbk, Reader& reader) { |
|---|
| 67 | // Read in genbank SOURCE lines and also ORGANISM lines. |
|---|
| 68 | genbank_one_entry_in(gbk.source, reader); |
|---|
| 69 | char key[TOKENSIZE]; |
|---|
| 70 | genbank_key_word(reader.line(), 2, key); |
|---|
| 71 | if (str_equal(key, "ORGANISM")) { |
|---|
| 72 | int indent = Skip_white_space(reader.line(), GBINDENT); |
|---|
| 73 | freedup(gbk.organism, reader.line() + indent); |
|---|
| 74 | |
|---|
| 75 | char *skip_em = NULp; |
|---|
| 76 | genbank_continue_line(skip_em, GBINDENT, reader); |
|---|
| 77 | } |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | class startsWithBlanks : virtual Noncopyable { |
|---|
| 81 | int blanks; |
|---|
| 82 | |
|---|
| 83 | public: |
|---|
| 84 | startsWithBlanks(int blanks_) : blanks(blanks_) {} |
|---|
| 85 | bool operator()(const char *line) const { return genbank_check_blanks(line, blanks); } |
|---|
| 86 | }; |
|---|
| 87 | |
|---|
| 88 | |
|---|
| 89 | static void genbank_skip_unidentified(Reader& reader, int blank_num) { |
|---|
| 90 | // Skip the lines of unidentified keyword. |
|---|
| 91 | ++reader; |
|---|
| 92 | startsWithBlanks num_blanks(blank_num); |
|---|
| 93 | reader.skipOverLinesThat(num_blanks); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | static void genbank_reference(GenBank& gbk, Reader& reader) { |
|---|
| 97 | // Read in genbank REFERENCE lines. |
|---|
| 98 | int refnum; |
|---|
| 99 | ASSERT_RESULT(int, 1, sscanf(reader.line() + GBINDENT, "%d", &refnum)); |
|---|
| 100 | if (refnum <= gbk.get_refcount()) { |
|---|
| 101 | warningf(17, "Might redefine reference %d", refnum); |
|---|
| 102 | genbank_skip_unidentified(reader, GBINDENT); |
|---|
| 103 | } |
|---|
| 104 | else { |
|---|
| 105 | gbk.resize_refs(refnum); |
|---|
| 106 | genbank_one_entry_in(gbk.get_latest_ref().ref, reader); |
|---|
| 107 | } |
|---|
| 108 | |
|---|
| 109 | GenbankRef& ref = gbk.get_latest_ref(); |
|---|
| 110 | |
|---|
| 111 | for (; reader.line() && reader.line()[0] == ' ' && reader.line()[1] == ' ';) { |
|---|
| 112 | char key[TOKENSIZE]; |
|---|
| 113 | genbank_key_word(reader.line(), 2, key); |
|---|
| 114 | if (str_equal(key, "AUTHORS")) { |
|---|
| 115 | if (has_content(ref.author)) warningf(10, "AUTHORS of REFERENCE %d is redefined", refnum); |
|---|
| 116 | genbank_one_entry_in(ref.author, reader); |
|---|
| 117 | terminate_with(ref.author, '.'); // add '.' if missing at the end |
|---|
| 118 | } |
|---|
| 119 | else if (str_equal(key, "TITLE")) { |
|---|
| 120 | if (has_content(ref.title)) warningf(11, "TITLE of REFERENCE %d is redefined", refnum); |
|---|
| 121 | genbank_one_entry_in(ref.title, reader); |
|---|
| 122 | } |
|---|
| 123 | else if (str_equal(key, "JOURNAL")) { |
|---|
| 124 | if (has_content(ref.journal)) warningf(12, "JOURNAL of REFERENCE %d is redefined", refnum); |
|---|
| 125 | genbank_one_entry_in(ref.journal, reader); |
|---|
| 126 | } |
|---|
| 127 | else if (str_equal(key, "STANDARD")) { |
|---|
| 128 | if (has_content(ref.standard)) warningf(13, "STANDARD of REFERENCE %d is redefined", refnum); |
|---|
| 129 | genbank_one_entry_in(ref.standard, reader); |
|---|
| 130 | } |
|---|
| 131 | else { |
|---|
| 132 | warningf(18, "Unidentified REFERENCE subkeyword: %s#", key); |
|---|
| 133 | genbank_skip_unidentified(reader, GBINDENT); |
|---|
| 134 | } |
|---|
| 135 | } |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | static void genbank_comments(GenBank& gbk, Reader& reader) { |
|---|
| 139 | // Read in genbank COMMENTS lines. |
|---|
| 140 | char key[TOKENSIZE]; |
|---|
| 141 | |
|---|
| 142 | if (str0len(reader.line()) <= GBINDENT) { |
|---|
| 143 | ++reader; |
|---|
| 144 | if (!reader.line()) return; |
|---|
| 145 | } |
|---|
| 146 | |
|---|
| 147 | // replace keyword with spaces |
|---|
| 148 | // => identical format for 1st and following lines. |
|---|
| 149 | { |
|---|
| 150 | char *line = ARB_strdup(reader.line()); |
|---|
| 151 | for (int indi = 0; indi < GBINDENT; line[indi++] = ' ') {} |
|---|
| 152 | reader.set_line(line); |
|---|
| 153 | free(line); |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | |
|---|
| 157 | for (; reader.line() && (genbank_check_blanks(reader.line(), GBINDENT) || reader.line()[0] == '\n');) { |
|---|
| 158 | if (reader.line()[0] == '\n') { // skip empty line |
|---|
| 159 | ++reader; |
|---|
| 160 | continue; |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | int index = Skip_white_space(reader.line(), GBINDENT); |
|---|
| 164 | ca_assert(index<TOKENSIZE); // buffer overflow ? |
|---|
| 165 | |
|---|
| 166 | index += comment_subkey(reader.line()+index, key); |
|---|
| 167 | ca_assert(index<TOKENSIZE); // buffer overflow ? |
|---|
| 168 | |
|---|
| 169 | RDP_comment_parser one_comment_entry = genbank_one_comment_entry; |
|---|
| 170 | RDP_comments& comments = gbk.comments; |
|---|
| 171 | |
|---|
| 172 | if (!parse_RDP_comment(comments, one_comment_entry, key, index, reader)) { |
|---|
| 173 | // other comments |
|---|
| 174 | Append(comments.others, reader.line() + GBINDENT); |
|---|
| 175 | ++reader; |
|---|
| 176 | } |
|---|
| 177 | } |
|---|
| 178 | } |
|---|
| 179 | |
|---|
| 180 | inline bool valid_acc_char(char ch) { return isalnum(ch) || ch == '_'; } |
|---|
| 181 | |
|---|
| 182 | static void genbank_verify_accession(GenBank& gbk) { |
|---|
| 183 | // Verify accession information. |
|---|
| 184 | if (str_equal(gbk.accession, "No information\n")) return; // @@@ really allow this ? |
|---|
| 185 | |
|---|
| 186 | char *new_acc = NULp; |
|---|
| 187 | const char *sep = " \t\n;"; |
|---|
| 188 | SmartCharPtr req_fail; |
|---|
| 189 | SmartCharPtr copy = ARB_strdup(gbk.accession); |
|---|
| 190 | int count = 0; |
|---|
| 191 | |
|---|
| 192 | for (char *acc = strtok(&*copy, sep); acc && req_fail.isNull(); acc = strtok(NULp, sep)) { |
|---|
| 193 | count++; |
|---|
| 194 | if (!isalpha(acc[0])) req_fail = ARB_strdup("has to start with a letter"); |
|---|
| 195 | else { |
|---|
| 196 | for (int i = 0; acc[i]; ++i) { |
|---|
| 197 | if (!valid_acc_char(acc[i])) { |
|---|
| 198 | req_fail = strf("invalid char '%c'", acc[i]); |
|---|
| 199 | break; |
|---|
| 200 | } |
|---|
| 201 | } |
|---|
| 202 | } |
|---|
| 203 | |
|---|
| 204 | if (new_acc) Append(new_acc, ' '); |
|---|
| 205 | Append(new_acc, acc); |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | if (req_fail.isNull() && count>9) { |
|---|
| 209 | req_fail = strf("No more than 9 accession number allowed (found %i)", count); |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | if (!req_fail.isNull()) { |
|---|
| 213 | skip_eolnl_and_append(gbk.accession, ""); |
|---|
| 214 | throw_errorf(15, "Invalid accession number '%s' (%s)", gbk.accession, &*req_fail); |
|---|
| 215 | } |
|---|
| 216 | |
|---|
| 217 | Append(new_acc, '\n'); |
|---|
| 218 | freeset(gbk.accession, new_acc); |
|---|
| 219 | } |
|---|
| 220 | static void genbank_verify_keywords(GenBank& gbk) { |
|---|
| 221 | // Verify keywords. |
|---|
| 222 | int indi, count, len; |
|---|
| 223 | |
|---|
| 224 | // correct missing '.' at the end |
|---|
| 225 | terminate_with(gbk.keywords, '.'); |
|---|
| 226 | |
|---|
| 227 | for (indi = count = 0, len = str0len(gbk.keywords); indi < len; indi++) // LOOP_VECTORIZED[!<5] // tested down to gcc 5.5.0 (fails on 4.9.2) |
|---|
| 228 | if (gbk.keywords[indi] == '.') |
|---|
| 229 | count++; |
|---|
| 230 | |
|---|
| 231 | if (count != 1) { |
|---|
| 232 | // @@@ raise error here ? |
|---|
| 233 | if (Warnings::shown()) |
|---|
| 234 | fprintf(stderr, "\nKEYWORDS: %s", gbk.keywords); |
|---|
| 235 | warning(141, "No more than one period is allowed in KEYWORDS line."); |
|---|
| 236 | } |
|---|
| 237 | } |
|---|
| 238 | void GenbankParser::parse_section() { |
|---|
| 239 | char key[TOKENSIZE]; |
|---|
| 240 | genbank_key_word(reader.line(), 0, key); |
|---|
| 241 | state = ENTRY_STARTED; |
|---|
| 242 | parse_keyed_section(key); |
|---|
| 243 | } |
|---|
| 244 | |
|---|
| 245 | static void genbank_origin(Seq& seq, Reader& reader) { |
|---|
| 246 | // Read in genbank sequence data. |
|---|
| 247 | ca_assert(seq.is_empty()); |
|---|
| 248 | |
|---|
| 249 | // read in whole sequence data |
|---|
| 250 | for (++reader; reader.line() && !is_sequence_terminator(reader.line()); ++reader) { |
|---|
| 251 | if (has_content(reader.line())) { |
|---|
| 252 | for (int index = 9; reader.line()[index] != '\n' && reader.line()[index] != '\0'; index++) { |
|---|
| 253 | if (reader.line()[index] != ' ') |
|---|
| 254 | seq.add(reader.line()[index]); |
|---|
| 255 | } |
|---|
| 256 | } |
|---|
| 257 | } |
|---|
| 258 | } |
|---|
| 259 | |
|---|
| 260 | void GenbankParser::parse_keyed_section(const char *key) { |
|---|
| 261 | if (str_equal(key, "LOCUS")) { |
|---|
| 262 | genbank_one_entry_in(gbk.locus, reader); |
|---|
| 263 | if (!gbk.locus_contains_date()) { |
|---|
| 264 | static bool alreadyWarned = false; |
|---|
| 265 | if (!alreadyWarned) { |
|---|
| 266 | warning(14, "LOCUS data might be incomplete (no date seen)"); |
|---|
| 267 | alreadyWarned = true; |
|---|
| 268 | } |
|---|
| 269 | } |
|---|
| 270 | } |
|---|
| 271 | else if (str_equal(key, "DEFINITION")) { |
|---|
| 272 | genbank_one_entry_in(gbk.definition, reader); |
|---|
| 273 | terminate_with(gbk.definition, '.'); // correct missing '.' at the end |
|---|
| 274 | } |
|---|
| 275 | else if (str_equal(key, "ACCESSION")) { |
|---|
| 276 | genbank_one_entry_in(gbk.accession, reader); |
|---|
| 277 | genbank_verify_accession(gbk); |
|---|
| 278 | } |
|---|
| 279 | else if (str_equal(key, "KEYWORDS")) { |
|---|
| 280 | genbank_one_entry_in(gbk.keywords, reader); |
|---|
| 281 | genbank_verify_keywords(gbk); |
|---|
| 282 | } |
|---|
| 283 | else if (str_equal(key, "SOURCE")) { |
|---|
| 284 | genbank_source(gbk, reader); |
|---|
| 285 | terminate_with(gbk.source, '.'); // correct missing '.' at the end |
|---|
| 286 | terminate_with(gbk.organism, '.'); |
|---|
| 287 | } |
|---|
| 288 | else if (str_equal(key, "REFERENCE")) { |
|---|
| 289 | genbank_reference(gbk, reader); |
|---|
| 290 | } |
|---|
| 291 | else if (str_equal(key, "COMMENTS")) { |
|---|
| 292 | genbank_comments(gbk, reader); |
|---|
| 293 | } |
|---|
| 294 | else if (str_equal(key, "COMMENT")) { |
|---|
| 295 | genbank_comments(gbk, reader); |
|---|
| 296 | } |
|---|
| 297 | else if (str_equal(key, "ORIGIN")) { |
|---|
| 298 | genbank_origin(seq, reader); |
|---|
| 299 | state = ENTRY_COMPLETED; |
|---|
| 300 | } |
|---|
| 301 | else { |
|---|
| 302 | genbank_skip_unidentified(reader, 2); |
|---|
| 303 | } |
|---|
| 304 | } |
|---|
| 305 | |
|---|
| 306 | static void genbank_print_lines(Writer& write, const char *key, const char *content, const WrapMode& wrapMode) { |
|---|
| 307 | // Print one genbank line, wrap around if over column GBMAXLINE |
|---|
| 308 | |
|---|
| 309 | ca_assert(strlen(key) == GBINDENT); |
|---|
| 310 | ca_assert(content[strlen(content)-1] == '\n'); |
|---|
| 311 | |
|---|
| 312 | wrapMode.print(write, key, " ", content, GBMAXLINE); |
|---|
| 313 | } |
|---|
| 314 | |
|---|
| 315 | static void genbank_out_one_entry(Writer& write, const char *key, const char *content, const WrapMode& wrapMode, int period) { |
|---|
| 316 | /* Print out key and content if content length > 1 |
|---|
| 317 | * otherwise print key and "No information" w/wo |
|---|
| 318 | * period at the end depending on flag period. |
|---|
| 319 | */ |
|---|
| 320 | |
|---|
| 321 | if (!has_content(content)) { |
|---|
| 322 | content = period ? "No information.\n" : "No information\n"; |
|---|
| 323 | } |
|---|
| 324 | genbank_print_lines(write, key, content, wrapMode); |
|---|
| 325 | } |
|---|
| 326 | |
|---|
| 327 | static void genbank_out_one_reference(Writer& write, const GenbankRef& gbk_ref, int gbk_ref_num) { |
|---|
| 328 | WrapMode wrapWords(true); |
|---|
| 329 | |
|---|
| 330 | { |
|---|
| 331 | const char *r = gbk_ref.ref; |
|---|
| 332 | char refnum[TOKENSIZE]; |
|---|
| 333 | |
|---|
| 334 | if (!has_content(r)) { |
|---|
| 335 | sprintf(refnum, "%d\n", gbk_ref_num); |
|---|
| 336 | r = refnum; |
|---|
| 337 | } |
|---|
| 338 | genbank_out_one_entry(write, "REFERENCE ", r, wrapWords, NOPERIOD); |
|---|
| 339 | } |
|---|
| 340 | |
|---|
| 341 | genbank_out_one_entry(write, " AUTHORS ", gbk_ref.author, WrapMode(" "), NOPERIOD); |
|---|
| 342 | genbank_out_one_entry(write, " JOURNAL ", gbk_ref.journal, wrapWords, NOPERIOD); |
|---|
| 343 | genbank_out_one_entry(write, " TITLE ", gbk_ref.title, wrapWords, NOPERIOD); |
|---|
| 344 | genbank_out_one_entry(write, " STANDARD ", gbk_ref.standard, wrapWords, NOPERIOD); |
|---|
| 345 | } |
|---|
| 346 | |
|---|
| 347 | static void genbank_print_comment_if_content(Writer& write, const char *key, const char *content) { |
|---|
| 348 | // Print one genbank line, wrap around if over column GBMAXLINE |
|---|
| 349 | |
|---|
| 350 | if (!has_content(content)) return; |
|---|
| 351 | |
|---|
| 352 | char first[LINESIZE]; sprintf(first, "%*s%s", GBINDENT+RDP_SUBKEY_INDENT, "", key); |
|---|
| 353 | char other[LINESIZE]; sprintf(other, "%*s", GBINDENT+RDP_SUBKEY_INDENT+RDP_CONTINUED_INDENT, ""); |
|---|
| 354 | WrapMode(true).print(write, first, other, content, GBMAXLINE); |
|---|
| 355 | } |
|---|
| 356 | |
|---|
| 357 | static void genbank_out_origin(const Seq& seq, Writer& write) { // @@@ inline method |
|---|
| 358 | // Output sequence data in genbank format. |
|---|
| 359 | seq.out(write, GENBANK); |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | inline void genbank_print_completeness(Writer& write, char compX, char X) { |
|---|
| 363 | if (compX == ' ') return; |
|---|
| 364 | ca_assert(compX == 'y' || compX == 'n'); |
|---|
| 365 | write.outf(" %c' end complete: %s\n", X, compX == 'y' ? "Yes" : "No"); |
|---|
| 366 | } |
|---|
| 367 | |
|---|
| 368 | void genbank_out_header(const GenBank& gbk, const Seq& seq, Writer& write) { |
|---|
| 369 | int indi; |
|---|
| 370 | WrapMode wrapWords(true); |
|---|
| 371 | |
|---|
| 372 | genbank_out_one_entry(write, "LOCUS ", gbk.locus, wrapWords, NOPERIOD); |
|---|
| 373 | genbank_out_one_entry(write, "DEFINITION ", gbk.definition, wrapWords, PERIOD); |
|---|
| 374 | genbank_out_one_entry(write, "ACCESSION ", gbk.accession, wrapWords, NOPERIOD); |
|---|
| 375 | genbank_out_one_entry(write, "KEYWORDS ", gbk.keywords, WrapMode(";"), PERIOD); |
|---|
| 376 | genbank_out_one_entry(write, "SOURCE ", gbk.source, wrapWords, PERIOD); |
|---|
| 377 | genbank_out_one_entry(write, " ORGANISM ", gbk.organism, wrapWords, PERIOD); |
|---|
| 378 | |
|---|
| 379 | if (gbk.has_refs()) { |
|---|
| 380 | for (indi = 0; indi < gbk.get_refcount(); indi++) { |
|---|
| 381 | genbank_out_one_reference(write, gbk.get_ref(indi), indi+1); |
|---|
| 382 | } |
|---|
| 383 | } |
|---|
| 384 | else { |
|---|
| 385 | genbank_out_one_reference(write, GenbankRef(), 1); |
|---|
| 386 | } |
|---|
| 387 | |
|---|
| 388 | const RDP_comments& comments = gbk.comments; |
|---|
| 389 | const OrgInfo& orginf = comments.orginf; |
|---|
| 390 | const SeqInfo& seqinf = comments.seqinf; |
|---|
| 391 | |
|---|
| 392 | if (comments.exists()) { |
|---|
| 393 | write.out("COMMENTS "); |
|---|
| 394 | |
|---|
| 395 | if (orginf.exists()) { |
|---|
| 396 | write.out("Organism information\n"); |
|---|
| 397 | |
|---|
| 398 | genbank_print_comment_if_content(write, "Source of strain: ", orginf.source); |
|---|
| 399 | genbank_print_comment_if_content(write, "Culture collection: ", orginf.cultcoll); // this field is used in ../lib/import/.rdp_old.ift |
|---|
| 400 | genbank_print_comment_if_content(write, "Former name: ", orginf.formname); // other fields occur in no .ift |
|---|
| 401 | genbank_print_comment_if_content(write, "Alternate name: ", orginf.nickname); |
|---|
| 402 | genbank_print_comment_if_content(write, "Common name: ", orginf.commname); |
|---|
| 403 | genbank_print_comment_if_content(write, "Host organism: ", orginf.hostorg); |
|---|
| 404 | |
|---|
| 405 | if (seqinf.exists() || str0len(comments.others) > 0) |
|---|
| 406 | write.out(" "); |
|---|
| 407 | } |
|---|
| 408 | |
|---|
| 409 | if (seqinf.exists()) { |
|---|
| 410 | write.outf("Sequence information (bases 1 to %d)\n", seq.get_len()); |
|---|
| 411 | |
|---|
| 412 | genbank_print_comment_if_content(write, "RDP ID: ", seqinf.RDPid); |
|---|
| 413 | genbank_print_comment_if_content(write, "Corresponding GenBank entry: ", seqinf.gbkentry); // this field is used in ../lib/import/.rdp_old.ift |
|---|
| 414 | genbank_print_comment_if_content(write, "Sequencing methods: ", seqinf.methods); |
|---|
| 415 | |
|---|
| 416 | genbank_print_completeness(write, seqinf.comp5, '5'); |
|---|
| 417 | genbank_print_completeness(write, seqinf.comp3, '3'); |
|---|
| 418 | } |
|---|
| 419 | |
|---|
| 420 | // @@@ use wrapper for code below ? |
|---|
| 421 | // print GBINDENT spaces of the first line |
|---|
| 422 | if (str0len(comments.others) > 0) { |
|---|
| 423 | write.repeated(' ', GBINDENT); |
|---|
| 424 | } |
|---|
| 425 | |
|---|
| 426 | if (str0len(comments.others) > 0) { |
|---|
| 427 | int length = str0len(comments.others); |
|---|
| 428 | for (indi = 0; indi < length; indi++) { |
|---|
| 429 | write.out(comments.others[indi]); |
|---|
| 430 | |
|---|
| 431 | // if another line, print GBINDENT spaces first |
|---|
| 432 | if (comments.others[indi] == '\n' && comments.others[indi + 1] != '\0') { |
|---|
| 433 | write.repeated(' ', GBINDENT); |
|---|
| 434 | } |
|---|
| 435 | } |
|---|
| 436 | } |
|---|
| 437 | } |
|---|
| 438 | } |
|---|
| 439 | |
|---|
| 440 | void genbank_out_base_count(const Seq& seq, Writer& write) { |
|---|
| 441 | BaseCounts bases; |
|---|
| 442 | seq.count(bases); |
|---|
| 443 | write.outf("BASE COUNT %6d a %6d c %6d g %6d t", bases.a, bases.c, bases.g, bases.t); |
|---|
| 444 | if (bases.other) { // don't write 0 others |
|---|
| 445 | write.outf(" %6d others", bases.other); |
|---|
| 446 | } |
|---|
| 447 | write.out('\n'); |
|---|
| 448 | } |
|---|
| 449 | |
|---|
| 450 | void genbank_out(const GenBank& gbk, const Seq& seq, Writer& write) { |
|---|
| 451 | // Output in a genbank format |
|---|
| 452 | |
|---|
| 453 | genbank_out_header(gbk, seq, write); |
|---|
| 454 | genbank_out_base_count(seq, write); |
|---|
| 455 | write.out("ORIGIN\n"); |
|---|
| 456 | genbank_out_origin(seq, write); |
|---|
| 457 | } |
|---|
| 458 | |
|---|
| 459 | bool GenbankReader::read_one_entry(Seq& seq) { |
|---|
| 460 | data.reinit(); |
|---|
| 461 | if (!GenbankParser(data, seq, *this).parse_entry()) abort(); |
|---|
| 462 | return ok(); |
|---|
| 463 | } |
|---|