1 | /* ureadasn.c |
---|
2 | -- parse, mangle and otherwise rewrite ASN1 file/entries for readseq reading |
---|
3 | -- from NCBI toolkit (ncbi.nlm.nih.gov:/toolkit) |
---|
4 | */ |
---|
5 | |
---|
6 | #ifdef NCBI |
---|
7 | |
---|
8 | #include <stdio.h> |
---|
9 | #include <ctype.h> |
---|
10 | #include <string.h> |
---|
11 | |
---|
12 | /* NCBI toolkit :include: must be on lib path */ |
---|
13 | #include <ncbi.h> |
---|
14 | #include <seqport.h> |
---|
15 | |
---|
16 | #define UREADASN |
---|
17 | #include "ureadseq.h" |
---|
18 | |
---|
19 | #pragma segment ureadasn |
---|
20 | |
---|
21 | /* this stuff is hacked up from tofasta.c of ncbitools */ |
---|
22 | #define kBaseAny 0 |
---|
23 | #define kBaseNucleic 1 |
---|
24 | #define kBaseAmino 2 |
---|
25 | |
---|
26 | typedef struct tofasta { |
---|
27 | Boolean idonly; |
---|
28 | short *seqnum; |
---|
29 | short whichSeq; |
---|
30 | char **seq, **seqid; |
---|
31 | long *seqlen; |
---|
32 | } FastaDat, PNTR FastaPtr; |
---|
33 | |
---|
34 | |
---|
35 | void BioseqRawToRaw(BioseqPtr bsp, Boolean idonly, |
---|
36 | short whichSeq, short *seqnum, |
---|
37 | char **seq, char **seqid, long *seqlen) |
---|
38 | { |
---|
39 | SeqPortPtr spp; |
---|
40 | SeqIdPtr bestid; |
---|
41 | Uint1 repr, code, residue; |
---|
42 | CharPtr tmp, title; |
---|
43 | long outlen, outmax; |
---|
44 | char localid[256], *sp; |
---|
45 | |
---|
46 | /* !!! this may be called several times for a single sequence |
---|
47 | because SeqEntryExplore looks for parts and joins them... |
---|
48 | assume seq, seqid, seqlen may contain data (or NULL) |
---|
49 | */ |
---|
50 | if (bsp == NULL) return; |
---|
51 | repr = Bioseq_repr(bsp); |
---|
52 | if (!(repr == Seq_repr_raw || repr == Seq_repr_const)) return; |
---|
53 | |
---|
54 | (*seqnum)++; |
---|
55 | if (!(whichSeq == *seqnum || whichSeq == 0)) return; |
---|
56 | |
---|
57 | bestid = SeqIdFindBest(bsp->id, (Uint1) 0); |
---|
58 | title = BioseqGetTitle(bsp); |
---|
59 | if (idonly) { |
---|
60 | sprintf(localid, " %d) ", *seqnum); |
---|
61 | tmp= localid + strlen(localid)-1; |
---|
62 | } |
---|
63 | else { |
---|
64 | strcpy(localid," "); |
---|
65 | tmp= localid; |
---|
66 | } |
---|
67 | tmp = SeqIdPrint(bestid, tmp, PRINTID_FASTA_SHORT); |
---|
68 | tmp = StringMove(tmp, " "); |
---|
69 | StringNCpy(tmp, title, 200); |
---|
70 | /* fprintf(stderr,"BioseqRawToRaw: localid='%s'\n",localid); */ |
---|
71 | |
---|
72 | /* < seqid is fixed storage */ |
---|
73 | /* strcpy( *seqid, localid); */ |
---|
74 | /* < seqid is variable sized */ |
---|
75 | outmax= strlen(localid) + 3; |
---|
76 | if (*seqid==NULL) { |
---|
77 | *seqid= (char*) malloc(outmax); |
---|
78 | if (*seqid==NULL) return; |
---|
79 | strcpy(*seqid, localid); |
---|
80 | } |
---|
81 | else { |
---|
82 | outmax += strlen(*seqid) + 2; |
---|
83 | *seqid= (char*) realloc( *seqid, outmax); |
---|
84 | if (*seqid==NULL) return; |
---|
85 | if (!idonly) strcat(*seqid, "; "); |
---|
86 | strcat(*seqid, localid); |
---|
87 | } |
---|
88 | |
---|
89 | if (idonly) { |
---|
90 | strcat(*seqid,"\n"); |
---|
91 | return; |
---|
92 | } |
---|
93 | |
---|
94 | if (ISA_na(bsp->mol)) code = Seq_code_iupacna; |
---|
95 | else code = Seq_code_iupacaa; |
---|
96 | spp = SeqPortNew(bsp, 0, -1, 0, code); |
---|
97 | SeqPortSeek(spp, 0, SEEK_SET); |
---|
98 | |
---|
99 | sp= *seq; |
---|
100 | if (sp==NULL) { /* this is always true now !? */ |
---|
101 | outlen= 0; |
---|
102 | outmax= 500; |
---|
103 | sp= (char*) malloc(outmax); |
---|
104 | } |
---|
105 | else { |
---|
106 | outlen= strlen(sp); |
---|
107 | outmax= outlen + 500; |
---|
108 | sp= (char*) realloc( sp, outmax); |
---|
109 | } |
---|
110 | if (sp==NULL) return; |
---|
111 | |
---|
112 | while ((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF) { |
---|
113 | if (outlen>=outmax) { |
---|
114 | outmax= outlen + 500; |
---|
115 | sp= (char*) realloc(sp, outmax); |
---|
116 | if (sp==NULL) return; |
---|
117 | } |
---|
118 | sp[outlen++] = residue; |
---|
119 | } |
---|
120 | sp= (char*) realloc(sp, outlen+1); |
---|
121 | if (sp!=NULL) sp[outlen]= '\0'; |
---|
122 | *seq= sp; |
---|
123 | *seqlen= outlen; |
---|
124 | SeqPortFree(spp); |
---|
125 | return; |
---|
126 | } |
---|
127 | |
---|
128 | |
---|
129 | static void SeqEntryRawseq(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent) |
---|
130 | { |
---|
131 | FastaPtr tfa; |
---|
132 | BioseqPtr bsp; |
---|
133 | |
---|
134 | if (!IS_Bioseq(sep)) return; |
---|
135 | bsp = (BioseqPtr)sep->data.ptrvalue; |
---|
136 | tfa = (FastaPtr) data; |
---|
137 | BioseqRawToRaw(bsp, tfa->idonly, tfa->whichSeq, tfa->seqnum, |
---|
138 | tfa->seq, tfa->seqid, tfa->seqlen); |
---|
139 | } |
---|
140 | |
---|
141 | void SeqEntryToRaw(SeqEntryPtr sep, Boolean idonly, short whichSeq, short *seqnum, |
---|
142 | char **seq, char **seqid, long *seqlen) |
---|
143 | { |
---|
144 | FastaDat tfa; |
---|
145 | |
---|
146 | if (sep == NULL) return; |
---|
147 | tfa.idonly= idonly; |
---|
148 | tfa.seqnum= seqnum; |
---|
149 | tfa.whichSeq= whichSeq; |
---|
150 | tfa.seq = seq; |
---|
151 | tfa.seqid = seqid; |
---|
152 | tfa.seqlen= seqlen; |
---|
153 | SeqEntryExplore(sep, (Pointer)&tfa, SeqEntryRawseq); |
---|
154 | } |
---|
155 | |
---|
156 | |
---|
157 | |
---|
158 | |
---|
159 | char *listASNSeqs(const char *filename, const long skiplines, |
---|
160 | const short format, /* note: this is kASNseqentry or kASNseqset */ |
---|
161 | short *nseq, short *error ) |
---|
162 | { |
---|
163 | AsnIoPtr aip = NULL; |
---|
164 | SeqEntryPtr the_set; |
---|
165 | AsnTypePtr atp, atp2; |
---|
166 | AsnModulePtr amp; |
---|
167 | Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */ |
---|
168 | char *seq = NULL; |
---|
169 | char *seqid = NULL, stemp[256]; |
---|
170 | long seqlen; |
---|
171 | int i, count; |
---|
172 | |
---|
173 | *nseq= 0; |
---|
174 | *error= 0; |
---|
175 | |
---|
176 | /* asn dictionary setups */ |
---|
177 | /*fprintf(stderr,"listASNSeqs: SeqEntryLoad\n");*/ |
---|
178 | if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */ |
---|
179 | amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */ |
---|
180 | if (amp == NULL) goto errxit; |
---|
181 | atp = AsnFind("Bioseq-set"); /* get the initial type pointers */ |
---|
182 | if (atp == NULL) goto errxit; |
---|
183 | atp2 = AsnFind("Bioseq-set.seq-set.E"); |
---|
184 | if (atp2 == NULL) goto errxit; |
---|
185 | |
---|
186 | /*fprintf(stderr,"listASNSeqs: AsnIoOpen\n");*/ |
---|
187 | /* open the ASN.1 input file in the right mode */ |
---|
188 | /* !!!! THIS FAILS when filename has MAC PATH (& other paths?) (:folder:filename) */ |
---|
189 | if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit; |
---|
190 | for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */ |
---|
191 | |
---|
192 | if (! ErrSetLog ("stderr")) goto errxit; |
---|
193 | else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */ |
---|
194 | |
---|
195 | if (format == kASNseqentry) { /* read one Seq-entry */ |
---|
196 | /*fprintf(stderr,"listASNSeqs: SeqEntryAsnRead\n");*/ |
---|
197 | the_set = SeqEntryAsnRead(aip, NULL); |
---|
198 | SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen); |
---|
199 | if (seq) free(seq); seq= NULL; |
---|
200 | SeqEntryFree(the_set); |
---|
201 | } |
---|
202 | else { /* read Seq-entry's from a Bioseq-set */ |
---|
203 | count = 0; |
---|
204 | /*fprintf(stderr,"listASNSeqs: AsnReadId\n");*/ |
---|
205 | while ((atp = AsnReadId(aip, amp, atp)) != NULL) { |
---|
206 | if (atp == atp2) { /* top level Seq-entry */ |
---|
207 | the_set = SeqEntryAsnRead(aip, atp); |
---|
208 | SeqEntryToRaw(the_set, true, 0, nseq, &seq, &seqid, &seqlen); |
---|
209 | SeqEntryFree(the_set); |
---|
210 | if (seq) free(seq); seq= NULL; |
---|
211 | } |
---|
212 | else |
---|
213 | AsnReadVal(aip, atp, NULL); |
---|
214 | count++; |
---|
215 | } |
---|
216 | } |
---|
217 | |
---|
218 | AsnIoClose(aip); |
---|
219 | *error= 0; |
---|
220 | return seqid; |
---|
221 | |
---|
222 | errxit: |
---|
223 | AsnIoClose(aip); |
---|
224 | if (seqid) free(seqid); |
---|
225 | *error= eASNerr; |
---|
226 | return NULL; |
---|
227 | } |
---|
228 | |
---|
229 | |
---|
230 | char *readASNSeq(const short whichEntry, const char *filename, |
---|
231 | const long skiplines, |
---|
232 | const short format, /* note: this is kASNseqentry or kASNseqset */ |
---|
233 | long *seqlen, short *nseq, |
---|
234 | short *error, char **seqid ) |
---|
235 | { |
---|
236 | AsnIoPtr aip = NULL; |
---|
237 | SeqEntryPtr the_set; |
---|
238 | AsnTypePtr atp, atp2; |
---|
239 | AsnModulePtr amp; |
---|
240 | Boolean inIsBinary= FALSE; /* damn, why can't asn routines test this? */ |
---|
241 | char *seq, stemp[200]; |
---|
242 | int i, count; |
---|
243 | |
---|
244 | *seqlen= 0; |
---|
245 | *nseq= 0; |
---|
246 | *error= 0; |
---|
247 | seq= NULL; |
---|
248 | |
---|
249 | /*fprintf(stderr,"readASNseq: SeqEntryLoad\n");*/ |
---|
250 | /* asn dictionary setups */ |
---|
251 | if (! SeqEntryLoad()) goto errxit; /* sequence alphabets (and sequence parse trees) */ |
---|
252 | amp = AsnAllModPtr(); /* get pointer to all loaded ASN.1 modules */ |
---|
253 | if (amp == NULL) goto errxit; |
---|
254 | atp = AsnFind("Bioseq-set"); /* get the initial type pointers */ |
---|
255 | if (atp == NULL) goto errxit; |
---|
256 | atp2 = AsnFind("Bioseq-set.seq-set.E"); |
---|
257 | if (atp2 == NULL) goto errxit; |
---|
258 | |
---|
259 | /* open the ASN.1 input file in the right mode */ |
---|
260 | /*fprintf(stderr,"readASNseq: AsnIoOpen(%s)\n", filename);*/ |
---|
261 | if ((aip = AsnIoOpen(filename, inIsBinary?"rb":"r")) == NULL) goto errxit; |
---|
262 | for (i=0; i<skiplines; i++) fgets( stemp, 255, aip->fp); /* this may mess up asn routines... */ |
---|
263 | |
---|
264 | if (! ErrSetLog ("stderr")) goto errxit; |
---|
265 | else ErrSetOpts(ERR_CONTINUE, ERR_LOG_ON); /*?? log errors instead of die */ |
---|
266 | |
---|
267 | seq= NULL; |
---|
268 | if (format == kASNseqentry) { /* read one Seq-entry */ |
---|
269 | /*fprintf(stderr,"readASNseq: SeqEntryAsnRead\n");*/ |
---|
270 | the_set = SeqEntryAsnRead(aip, NULL); |
---|
271 | SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen); |
---|
272 | SeqEntryFree(the_set); |
---|
273 | goto goodexit; |
---|
274 | } |
---|
275 | |
---|
276 | else { /* read Seq-entry's from a Bioseq-set */ |
---|
277 | count = 0; |
---|
278 | /*fprintf(stderr,"readASNseq: AsnReadId\n");*/ |
---|
279 | while ((atp = AsnReadId(aip, amp, atp)) != NULL) { |
---|
280 | if (atp == atp2) { /* top level Seq-entry */ |
---|
281 | the_set = SeqEntryAsnRead(aip, atp); |
---|
282 | SeqEntryToRaw(the_set, false, whichEntry, nseq, &seq, seqid, seqlen); |
---|
283 | SeqEntryFree(the_set); |
---|
284 | if (*nseq >= whichEntry) goto goodexit; |
---|
285 | } |
---|
286 | else |
---|
287 | AsnReadVal(aip, atp, NULL); |
---|
288 | count++; |
---|
289 | } |
---|
290 | } |
---|
291 | |
---|
292 | goodexit: |
---|
293 | AsnIoClose(aip); |
---|
294 | *error= 0; |
---|
295 | return seq; |
---|
296 | |
---|
297 | errxit: |
---|
298 | AsnIoClose(aip); |
---|
299 | *error= eASNerr; |
---|
300 | if (seq) free(seq); |
---|
301 | return NULL; |
---|
302 | } |
---|
303 | |
---|
304 | |
---|
305 | #endif /*NCBI*/ |
---|