| 1 | #include <stdlib.h> |
|---|
| 2 | #include <stdio.h> |
|---|
| 3 | #include <string.h> |
|---|
| 4 | #include <sys/types.h> |
|---|
| 5 | #include <sys/stat.h> |
|---|
| 6 | #include <ctype.h> |
|---|
| 7 | #include <errno.h> |
|---|
| 8 | |
|---|
| 9 | #include "careful.h" |
|---|
| 10 | |
|---|
| 11 | char *progname; |
|---|
| 12 | |
|---|
| 13 | /* Most entries are smaller than this, so this value avoids a few |
|---|
| 14 | calls to realloc. */ |
|---|
| 15 | #define INITIAL_BUF_LEN 4096 |
|---|
| 16 | |
|---|
| 17 | typedef struct |
|---|
| 18 | { |
|---|
| 19 | char *text; |
|---|
| 20 | size_t len; |
|---|
| 21 | } |
|---|
| 22 | len_string; |
|---|
| 23 | |
|---|
| 24 | #define FWRITE_LEN_STRING(len_string, stream) \ |
|---|
| 25 | (fwrite ((len_string).text, sizeof (char), (len_string).len, (stream))) |
|---|
| 26 | |
|---|
| 27 | |
|---|
| 28 | /* This is auxiliary function that used by -h or -help options |
|---|
| 29 | Simply to print out information from tabl-gb.help file. */ |
|---|
| 30 | |
|---|
| 31 | void |
|---|
| 32 | get_help () |
|---|
| 33 | { |
|---|
| 34 | |
|---|
| 35 | fputs ("\ |
|---|
| 36 | Written by Pavel Slavin, pavel@darwin.life.uiuc.edu\n\ |
|---|
| 37 | and Jim Blandy, jimb@gnu.ai.mit.edu\n\ |
|---|
| 38 | $Id: tabl-gb.c 5458 2008-07-16 15:24:20Z westram $ \n\ |
|---|
| 39 | tabl-gb writes key/value pairs to a file or stdout in GenBank format.\n\n\ |
|---|
| 40 | Calling sequence:\n\ |
|---|
| 41 | [ -h | --help ] ; Displays this text\n\ |
|---|
| 42 | [ --out-file file ] ; stdout if omitted\n\ |
|---|
| 43 | [ --err-file errfile ] ; stderr if omitted\n\ |
|---|
| 44 | [ --annotation-file file ] ; stdin if omitted\n\ |
|---|
| 45 | --annotation-end char ; indicates end of an annotation\n\ |
|---|
| 46 | ; In ascii code\n\ |
|---|
| 47 | [ --sequence-file file ] ; stdin if omitted\n\ |
|---|
| 48 | --sequence-end char ; indicates end of a sequence\n\ |
|---|
| 49 | ; In ascii code\n\ |
|---|
| 50 | ", |
|---|
| 51 | stderr); |
|---|
| 52 | |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | /* The name of the file to which we should write error messages, or |
|---|
| 56 | zero for stderr. */ |
|---|
| 57 | char *error_file_name = 0; |
|---|
| 58 | |
|---|
| 59 | /* The file to which we write error messages, or zero if we haven't |
|---|
| 60 | opened one yet. */ |
|---|
| 61 | FILE *error_file = 0; |
|---|
| 62 | |
|---|
| 63 | |
|---|
| 64 | |
|---|
| 65 | /* |
|---|
| 66 | * This function will be called only in case when error |
|---|
| 67 | * was met. Error entry will get appended to an existing |
|---|
| 68 | * file, or file will get created if there was no previous |
|---|
| 69 | * error entries. |
|---|
| 70 | * Make sure that file was empty (or did not exist) before |
|---|
| 71 | * running the program. The new entries will get appended |
|---|
| 72 | * to a file |
|---|
| 73 | */ |
|---|
| 74 | void |
|---|
| 75 | error_entry (char *entry) |
|---|
| 76 | { |
|---|
| 77 | if (! error_file) |
|---|
| 78 | { |
|---|
| 79 | if (!error_file_name) |
|---|
| 80 | error_file = stderr; |
|---|
| 81 | else |
|---|
| 82 | error_file = fopen (error_file_name, "a+"); |
|---|
| 83 | } |
|---|
| 84 | |
|---|
| 85 | fputs (entry, error_file); |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | |
|---|
| 89 | /* Signal an error if P is 0; otherwise, return P. */ |
|---|
| 90 | void * |
|---|
| 91 | check_ptr (void *p) |
|---|
| 92 | { |
|---|
| 93 | if (! p) |
|---|
| 94 | { |
|---|
| 95 | error_entry ("virtual memory exhausted\n"); |
|---|
| 96 | exit (2); |
|---|
| 97 | } |
|---|
| 98 | else |
|---|
| 99 | return p; |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | |
|---|
| 103 | |
|---|
| 104 | /* Read text from SOURCE until we find DELIMITER, or hit EOF. |
|---|
| 105 | Set *BUF_PTR to a malloc'd buffer for the text, which the caller must free. |
|---|
| 106 | The delimiting string or EOF is not included in the buffer. |
|---|
| 107 | If EOF was the first non-newline character we found, return -1. |
|---|
| 108 | Otherwise, return the length of the text read. */ |
|---|
| 109 | size_t |
|---|
| 110 | getdelim_str (FILE *source, char *delim, char **buf_ptr) |
|---|
| 111 | { |
|---|
| 112 | size_t delim_len = strlen (delim); |
|---|
| 113 | char delim_last_char; |
|---|
| 114 | |
|---|
| 115 | size_t buf_len = INITIAL_BUF_LEN; |
|---|
| 116 | char *buf = (char *) check_ptr (malloc (buf_len)); |
|---|
| 117 | |
|---|
| 118 | size_t i = 0; |
|---|
| 119 | int c; |
|---|
| 120 | |
|---|
| 121 | if (delim_len == 0) |
|---|
| 122 | abort (); |
|---|
| 123 | delim_last_char = delim[delim_len - 1]; |
|---|
| 124 | |
|---|
| 125 | while ((c = getc (source)) != EOF) |
|---|
| 126 | { |
|---|
| 127 | /* Do we need to enlarge the buffer? */ |
|---|
| 128 | if (i >= buf_len) |
|---|
| 129 | { |
|---|
| 130 | buf_len *= 2; |
|---|
| 131 | buf = (char *) check_ptr (realloc (buf, buf_len)); |
|---|
| 132 | } |
|---|
| 133 | |
|---|
| 134 | buf[i++] = c; |
|---|
| 135 | |
|---|
| 136 | /* Have we read the delimiter? We check to see if we just |
|---|
| 137 | stored delim_last_char; this is a quick, false-positive test. |
|---|
| 138 | Then we check for the whole string; this is a slow but |
|---|
| 139 | correct test. */ |
|---|
| 140 | if (c == delim_last_char |
|---|
| 141 | && i >= delim_len |
|---|
| 142 | && ! memcmp (&buf[i - delim_len], delim, delim_len)) |
|---|
| 143 | break; |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | if (ferror (source)) |
|---|
| 147 | { |
|---|
| 148 | perror (progname); |
|---|
| 149 | exit (2); |
|---|
| 150 | } |
|---|
| 151 | |
|---|
| 152 | *buf_ptr = buf; |
|---|
| 153 | |
|---|
| 154 | if (c == EOF) |
|---|
| 155 | { |
|---|
| 156 | /* Special case, as documented. */ |
|---|
| 157 | if (i == 0) |
|---|
| 158 | { |
|---|
| 159 | free (buf); |
|---|
| 160 | return -1; |
|---|
| 161 | } |
|---|
| 162 | else |
|---|
| 163 | return i; |
|---|
| 164 | } |
|---|
| 165 | else |
|---|
| 166 | return i - delim_len; |
|---|
| 167 | } |
|---|
| 168 | |
|---|
| 169 | |
|---|
| 170 | /* This function writes sequence in GenBank format. */ |
|---|
| 171 | void |
|---|
| 172 | write_seq (len_string *sequence, FILE *out) |
|---|
| 173 | { |
|---|
| 174 | char buf[80]; |
|---|
| 175 | size_t sequence_len = sequence->len; |
|---|
| 176 | size_t line_start; |
|---|
| 177 | |
|---|
| 178 | for (line_start = 0; |
|---|
| 179 | line_start < sequence_len; |
|---|
| 180 | line_start += 60) |
|---|
| 181 | { |
|---|
| 182 | size_t line_end; |
|---|
| 183 | size_t column_start; |
|---|
| 184 | char *p; |
|---|
| 185 | |
|---|
| 186 | sprintf (buf, "%9d", line_start + 1); |
|---|
| 187 | p = buf + 9; |
|---|
| 188 | |
|---|
| 189 | /* Where is the end of this line? */ |
|---|
| 190 | line_end = line_start + 60; |
|---|
| 191 | if (line_end > sequence_len) |
|---|
| 192 | line_end = sequence_len; |
|---|
| 193 | |
|---|
| 194 | for (column_start = line_start; |
|---|
| 195 | column_start < line_end; |
|---|
| 196 | column_start += 10) |
|---|
| 197 | { |
|---|
| 198 | size_t column_len; |
|---|
| 199 | |
|---|
| 200 | /* Where is the end of this column? */ |
|---|
| 201 | column_len = line_end - column_start; |
|---|
| 202 | if (column_len > 10) |
|---|
| 203 | column_len = 10; |
|---|
| 204 | |
|---|
| 205 | *p++ = ' '; |
|---|
| 206 | memcpy (p, sequence->text + column_start, column_len); |
|---|
| 207 | p += column_len; |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | fwrite (buf, sizeof (char), p - buf, out); |
|---|
| 211 | putc ('\n', out); |
|---|
| 212 | } |
|---|
| 213 | } |
|---|
| 214 | |
|---|
| 215 | |
|---|
| 216 | /* This function puts back GenBank entries */ |
|---|
| 217 | void |
|---|
| 218 | put_gbfile (char *outfile, char *annotfile, char *seqfile, |
|---|
| 219 | char annot_end, char seq_end) |
|---|
| 220 | { |
|---|
| 221 | FILE *out; |
|---|
| 222 | FILE *annot; |
|---|
| 223 | FILE *seq; |
|---|
| 224 | char annot_end_string[2]; |
|---|
| 225 | char seq_end_string [2]; |
|---|
| 226 | len_string annotation; /* place where each annotation will be held */ |
|---|
| 227 | len_string sequence; /* place where each sequence will be held */ |
|---|
| 228 | |
|---|
| 229 | /* pointers to a out-file, err-file. |
|---|
| 230 | All files opened as read\write, and a new file created |
|---|
| 231 | if one specified does not exist */ |
|---|
| 232 | if (!outfile) |
|---|
| 233 | out = stdout; |
|---|
| 234 | else |
|---|
| 235 | out = fopen (outfile, "w+"); |
|---|
| 236 | |
|---|
| 237 | /* pointer to annotation and sequence files. |
|---|
| 238 | Opened as a read only */ |
|---|
| 239 | if (!annotfile) |
|---|
| 240 | annot = stdin; |
|---|
| 241 | else |
|---|
| 242 | annot = fopen (annotfile, "r"); |
|---|
| 243 | if (!seqfile) |
|---|
| 244 | seq = stdin; |
|---|
| 245 | else |
|---|
| 246 | seq = fopen (seqfile, "r"); |
|---|
| 247 | |
|---|
| 248 | if (annot == NULL || seq == NULL) |
|---|
| 249 | { |
|---|
| 250 | error_entry ("Either annotation or sequence files you specified on\n"); |
|---|
| 251 | error_entry ("the command line do not exist.\n"); |
|---|
| 252 | exit (1); |
|---|
| 253 | } |
|---|
| 254 | |
|---|
| 255 | annot_end_string[0] = annot_end; |
|---|
| 256 | seq_end_string [0] = seq_end; |
|---|
| 257 | annot_end_string[1] = seq_end_string[1] = '\0'; |
|---|
| 258 | |
|---|
| 259 | while (!feof (annot) && !feof (seq)) |
|---|
| 260 | { |
|---|
| 261 | annotation.len = getdelim_str (annot, annot_end_string, |
|---|
| 262 | &annotation.text); |
|---|
| 263 | sequence.len = getdelim_str (seq, seq_end_string, |
|---|
| 264 | &sequence.text); |
|---|
| 265 | if (annotation.len != -1 && sequence.len != -1) |
|---|
| 266 | { |
|---|
| 267 | FWRITE_LEN_STRING (annotation, out); |
|---|
| 268 | write_seq (&sequence, out); |
|---|
| 269 | fputs ("//\n", out); |
|---|
| 270 | free (annotation.text); |
|---|
| 271 | free (sequence.text); |
|---|
| 272 | } |
|---|
| 273 | else |
|---|
| 274 | break; |
|---|
| 275 | |
|---|
| 276 | check_file (out, outfile, "writing GenBank data"); |
|---|
| 277 | } |
|---|
| 278 | |
|---|
| 279 | if (!feof (annot) || !feof (seq)) |
|---|
| 280 | { |
|---|
| 281 | error_entry ("Hallelujah! You have more sequences than annotations, or "); |
|---|
| 282 | error_entry ("vice versa."); |
|---|
| 283 | exit (1); |
|---|
| 284 | } |
|---|
| 285 | |
|---|
| 286 | careful_close (seq, seqfile); |
|---|
| 287 | careful_close (annot, annotfile); |
|---|
| 288 | careful_close (out, outfile); |
|---|
| 289 | } |
|---|
| 290 | |
|---|
| 291 | |
|---|
| 292 | |
|---|
| 293 | int |
|---|
| 294 | main (int argc, char *argv[]) |
|---|
| 295 | { |
|---|
| 296 | char *outfile = NULL; /* Name of output GenBank file */ |
|---|
| 297 | char *annotfile = NULL; /* Name of annotation file */ |
|---|
| 298 | char *seqfile = NULL; /* Name of sequence file */ |
|---|
| 299 | char annotend = 12; /* Separator at the end of each annotation */ |
|---|
| 300 | char seqend = 10; /* Separator at the end of each sequences */ |
|---|
| 301 | int i; |
|---|
| 302 | |
|---|
| 303 | progname = careful_prog_name (argv[0]); |
|---|
| 304 | |
|---|
| 305 | if (argc == 1) |
|---|
| 306 | { |
|---|
| 307 | get_help (); |
|---|
| 308 | return (1); |
|---|
| 309 | } |
|---|
| 310 | |
|---|
| 311 | for (i = 1; i < argc; i++) |
|---|
| 312 | { |
|---|
| 313 | if (!strcmp (argv[i], "--annotation-end")) |
|---|
| 314 | { |
|---|
| 315 | i++; |
|---|
| 316 | annotend = toascii (atoi (argv[i])); |
|---|
| 317 | } |
|---|
| 318 | else if (!strcmp (argv[i], "--sequence-end")) |
|---|
| 319 | { |
|---|
| 320 | i++; |
|---|
| 321 | seqend = toascii (atoi (argv[i])); |
|---|
| 322 | } |
|---|
| 323 | else if (!strcmp (argv[i], "--out-file")) |
|---|
| 324 | { |
|---|
| 325 | i++; |
|---|
| 326 | outfile = argv[i]; |
|---|
| 327 | } |
|---|
| 328 | else if (!strcmp (argv[i], "--err-file")) |
|---|
| 329 | { |
|---|
| 330 | i++; |
|---|
| 331 | error_file_name = argv[i]; |
|---|
| 332 | } |
|---|
| 333 | else if (!strcmp (argv[i], "--annotation-file")) |
|---|
| 334 | { |
|---|
| 335 | i++; |
|---|
| 336 | annotfile = argv[i]; |
|---|
| 337 | } |
|---|
| 338 | else if (!strcmp (argv[i], "--sequence-file")) |
|---|
| 339 | { |
|---|
| 340 | i++; |
|---|
| 341 | seqfile = argv[i]; |
|---|
| 342 | } |
|---|
| 343 | else if (!strcmp (argv[i], "-h") || !strcmp (argv[i], "--help")) |
|---|
| 344 | { |
|---|
| 345 | get_help (); |
|---|
| 346 | return 1; |
|---|
| 347 | } |
|---|
| 348 | else |
|---|
| 349 | { |
|---|
| 350 | fputs ("\nYour calling sequence is incorrect.\ |
|---|
| 351 | Try tabl-gb --help option\n", stderr); |
|---|
| 352 | return 1; |
|---|
| 353 | } |
|---|
| 354 | } |
|---|
| 355 | |
|---|
| 356 | put_gbfile (outfile, annotfile, seqfile, annotend, seqend); |
|---|
| 357 | |
|---|
| 358 | return 0; |
|---|
| 359 | } |
|---|