source: tags/initial/ALEIO/tabl-gb.c

Last change on this file was 5458, checked in by baderk, 16 years ago

Removed .cvsignore files from repository. Hopefully this time all svn:ignore flags were set right.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.3 KB
Line 
1#include <stdlib.h>
2#include <stdio.h>
3#include <string.h>
4#include <sys/types.h>
5#include <sys/stat.h>
6#include <ctype.h>
7#include <errno.h>
8
9#include "careful.h"
10
11char *progname;
12
13/* Most entries are smaller than this, so this value avoids a few
14   calls to realloc.  */
15#define INITIAL_BUF_LEN 4096
16
17typedef struct
18  {
19    char *text;
20    size_t len;
21  }
22len_string;
23
24#define FWRITE_LEN_STRING(len_string, stream) \
25  (fwrite ((len_string).text, sizeof (char), (len_string).len, (stream)))
26
27
28/* This is auxiliary function that used by -h or -help options
29   Simply to print out information from tabl-gb.help file. */
30
31void 
32get_help ()
33{
34
35  fputs ("\
36Written by Pavel Slavin, pavel@darwin.life.uiuc.edu\n\
37       and Jim Blandy,   jimb@gnu.ai.mit.edu\n\
38$Id: tabl-gb.c 5458 2008-07-16 15:24:20Z westram $ \n\
39tabl-gb writes key/value pairs to a file or stdout in GenBank format.\n\n\
40Calling sequence:\n\
41[ -h | --help ]                      ; Displays this text\n\
42[ --out-file file ]                  ; stdout if omitted\n\
43[ --err-file errfile ]               ; stderr if omitted\n\
44[ --annotation-file file ]           ; stdin if omitted\n\
45  --annotation-end char              ; indicates end of an annotation\n\
46                                     ; In ascii code\n\
47[ --sequence-file file ]             ; stdin if omitted\n\
48  --sequence-end char                ; indicates end of a sequence\n\
49                                     ; In ascii code\n\
50",
51         stderr);
52
53}
54
55/* The name of the file to which we should write error messages, or
56   zero for stderr.  */
57char *error_file_name = 0;
58
59/* The file to which we write error messages, or zero if we haven't
60   opened one yet.  */
61FILE *error_file = 0;
62
63
64
65/*
66 * This function will be called only in case when error
67 * was met.  Error entry will get appended to an existing
68 * file, or file will get created if there was no previous
69 * error entries.
70 * Make sure that file was empty (or did not exist) before
71 * running the program.  The new entries will get appended
72 * to a file
73 */
74void 
75error_entry (char *entry)
76{
77  if (! error_file)
78    {
79      if (!error_file_name)
80        error_file = stderr;
81      else
82        error_file = fopen (error_file_name, "a+");
83    }
84
85  fputs (entry, error_file);
86}
87
88
89/* Signal an error if P is 0; otherwise, return P.  */
90void *
91check_ptr (void *p)
92{
93  if (! p)
94    {
95      error_entry ("virtual memory exhausted\n");
96      exit (2);
97    }
98  else
99    return p;
100}
101
102
103
104/* Read text from SOURCE until we find DELIMITER, or hit EOF.
105   Set *BUF_PTR to a malloc'd buffer for the text, which the caller must free.
106   The delimiting string or EOF is not included in the buffer.
107   If EOF was the first non-newline character we found, return -1.
108   Otherwise, return the length of the text read.  */
109size_t
110getdelim_str (FILE *source, char *delim, char **buf_ptr)
111{
112  size_t delim_len = strlen (delim);
113  char delim_last_char;
114
115  size_t buf_len = INITIAL_BUF_LEN;
116  char *buf = (char *) check_ptr (malloc (buf_len));
117
118  size_t i = 0;
119  int c;
120
121  if (delim_len == 0)
122    abort ();
123  delim_last_char = delim[delim_len - 1];
124 
125  while ((c = getc (source)) != EOF)
126    {
127      /* Do we need to enlarge the buffer?  */
128      if (i >= buf_len)
129        {
130          buf_len *= 2;
131          buf = (char *) check_ptr (realloc (buf, buf_len));
132        }
133
134      buf[i++] = c;
135     
136      /* Have we read the delimiter?  We check to see if we just
137         stored delim_last_char; this is a quick, false-positive test.
138         Then we check for the whole string; this is a slow but
139         correct test.  */
140      if (c == delim_last_char
141          && i >= delim_len
142          && ! memcmp (&buf[i - delim_len], delim, delim_len))
143        break;
144    }
145
146  if (ferror (source))
147    {
148      perror (progname);
149      exit (2);
150    }
151
152  *buf_ptr = buf;
153
154  if (c == EOF)
155    {
156      /* Special case, as documented.  */
157      if (i == 0)
158        {
159          free (buf);
160          return -1;
161        }
162      else
163        return i;
164    }
165  else
166    return i - delim_len;
167}
168
169
170/* This function writes sequence in GenBank format. */
171void
172write_seq (len_string *sequence, FILE *out)
173{
174  char buf[80];
175  size_t sequence_len = sequence->len;
176  size_t line_start;
177
178  for (line_start = 0;
179       line_start < sequence_len;
180       line_start += 60)
181    {
182      size_t line_end;
183      size_t column_start;
184      char *p;
185     
186      sprintf (buf, "%9d", line_start + 1);
187      p = buf + 9;
188
189      /* Where is the end of this line?  */
190      line_end = line_start + 60;
191      if (line_end > sequence_len)
192        line_end = sequence_len;
193     
194      for (column_start = line_start;
195           column_start < line_end;
196           column_start += 10)
197        {
198          size_t column_len;
199
200          /* Where is the end of this column?  */
201          column_len = line_end - column_start;
202          if (column_len > 10)
203            column_len = 10;
204
205          *p++ = ' ';
206          memcpy (p, sequence->text + column_start, column_len);
207          p += column_len;
208        }
209
210      fwrite (buf, sizeof (char), p - buf, out);
211      putc ('\n', out);
212    }
213}
214
215
216/*  This function puts back GenBank entries  */
217void 
218put_gbfile (char *outfile, char *annotfile, char *seqfile,
219            char annot_end, char seq_end)
220{
221  FILE *out;
222  FILE *annot;
223  FILE *seq;
224  char annot_end_string[2];
225  char seq_end_string  [2];
226  len_string annotation;        /* place where each annotation will be held */
227  len_string sequence;          /* place where each sequence will be held */
228
229  /* pointers to a out-file, err-file.
230     All files opened as read\write, and a new file created
231     if one specified does not exist */
232  if (!outfile)
233    out = stdout;
234  else
235    out = fopen (outfile, "w+");
236
237  /* pointer to annotation and sequence files.
238     Opened as a read only */
239  if (!annotfile)
240    annot = stdin;
241  else
242    annot = fopen (annotfile, "r");
243  if (!seqfile)
244    seq = stdin;
245  else
246    seq = fopen (seqfile, "r");
247
248  if (annot == NULL || seq == NULL)
249    {
250      error_entry ("Either annotation or sequence files you specified on\n");
251      error_entry ("the command line do not exist.\n");
252      exit (1);
253    }
254
255  annot_end_string[0] = annot_end;
256  seq_end_string  [0] = seq_end;
257  annot_end_string[1] = seq_end_string[1] = '\0';
258
259  while (!feof (annot) && !feof (seq))
260    {
261      annotation.len = getdelim_str (annot, annot_end_string,
262                                     &annotation.text);
263      sequence.len   = getdelim_str (seq,   seq_end_string,
264                                     &sequence.text);
265      if (annotation.len != -1 && sequence.len != -1)
266        {
267          FWRITE_LEN_STRING (annotation, out);
268          write_seq (&sequence, out);
269          fputs ("//\n", out);
270          free (annotation.text);
271          free (sequence.text);
272        }
273      else
274        break;
275
276      check_file (out, outfile, "writing GenBank data");
277    }
278
279  if (!feof (annot) || !feof (seq))
280    {
281      error_entry ("Hallelujah!  You have more sequences than annotations, or ");
282      error_entry ("vice versa.");
283      exit (1);
284    }
285
286  careful_close (seq, seqfile);
287  careful_close (annot, annotfile);
288  careful_close (out, outfile);
289}
290
291
292
293int
294main (int argc, char *argv[])
295{
296  char *outfile = NULL;         /* Name of output GenBank file */
297  char *annotfile = NULL;       /* Name of annotation file */
298  char *seqfile = NULL;         /* Name of sequence file */
299  char annotend = 12;           /* Separator at the end of each annotation */
300  char seqend = 10;             /* Separator at the end of each sequences */
301  int i;
302
303  progname = careful_prog_name (argv[0]);
304
305  if (argc == 1)
306    {
307      get_help ();
308      return (1);
309    }
310
311  for (i = 1; i < argc; i++)
312    {
313      if (!strcmp (argv[i], "--annotation-end"))
314        {
315          i++;
316          annotend = toascii (atoi (argv[i]));
317        }
318      else if (!strcmp (argv[i], "--sequence-end"))
319        {
320          i++;
321          seqend = toascii (atoi (argv[i]));
322        }
323      else if (!strcmp (argv[i], "--out-file"))
324        {
325          i++;
326          outfile = argv[i];
327        }
328      else if (!strcmp (argv[i], "--err-file"))
329        {
330          i++;
331          error_file_name = argv[i];
332        }
333      else if (!strcmp (argv[i], "--annotation-file"))
334        {
335          i++;
336          annotfile = argv[i];
337        }
338      else if (!strcmp (argv[i], "--sequence-file"))
339        {
340          i++;
341          seqfile = argv[i];
342        }
343      else if (!strcmp (argv[i], "-h") || !strcmp (argv[i], "--help"))
344        {
345          get_help ();
346          return 1;
347        }
348      else
349        {
350          fputs ("\nYour calling sequence is incorrect.\
351  Try tabl-gb --help option\n", stderr);
352          return 1;
353        }
354    }
355
356  put_gbfile (outfile, annotfile, seqfile, annotend, seqend);
357
358  return 0;
359}
Note: See TracBrowser for help on using the repository browser.