1 | # include <stdio.h> |
---|
2 | |
---|
3 | char aliname[80], nucname[80], destname[80], taxname[11][255]; |
---|
4 | FILE *sourceali, *sourcenuc, *destali; |
---|
5 | /* files: their names and pointers |
---|
6 | */ |
---|
7 | int totnum, seqnum, blocknum, amslr, dpvou, |
---|
8 | nucoraa, taxnum, globcount, position; |
---|
9 | /* totnum is the total number of sequences in the alignment |
---|
10 | seqnum is the number of sequences that will be used |
---|
11 | counters that get incremented or decremented: |
---|
12 | amslr, bntms, count, dpvou, etc |
---|
13 | nucoraa: 1 for nuc alignments, 3 for amino acid alignments |
---|
14 | taxnum is the ID-number of the current taxon |
---|
15 | globcount is the counter for the main loop |
---|
16 | position is the number for the codon position to be analyzed |
---|
17 | */ |
---|
18 | int oldcheck, newcheck; /* for checking the number of characters */ |
---|
19 | int nucmito, yorm, namebool; |
---|
20 | /* for nuclear genetic code: nucmito == 0 |
---|
21 | for mitochondrial code : nucmito == 1 |
---|
22 | if yorm == 0: no conversion of leu and arg first positions; |
---|
23 | if yorm == 1: conversion occurs |
---|
24 | */ |
---|
25 | int putnucbool; |
---|
26 | /* for exclusion of regions where homology is uncertain */ |
---|
27 | long begin, remember; |
---|
28 | /* current pointer position within sourceali and destali */ |
---|
29 | double actualcount, count[9]; |
---|
30 | |
---|
31 | |
---|
32 | main() |
---|
33 | { |
---|
34 | long temp; /* holds a temporary position of pointer |
---|
35 | within destali */ |
---|
36 | |
---|
37 | /* INITIALIZING GLOBAL VARIABLES */ |
---|
38 | |
---|
39 | position = 5; /* initialize to analyze all positions */ |
---|
40 | putnucbool = 0; /* initialize exclude-boolean */ |
---|
41 | actualcount = 0; /* initialize base counter */ |
---|
42 | yorm = 2; /* initialize 'Y' or 'M' option */ |
---|
43 | nucmito = 2; /* initialize type of genetic code option */ |
---|
44 | for (amslr = 0; amslr < 9; amslr++) { |
---|
45 | count[amslr] = 0; |
---|
46 | } /* initialize base counters */ |
---|
47 | |
---|
48 | /* FUNCTION CALLS START HERE */ |
---|
49 | |
---|
50 | parstuff(); /* gets info from user */ |
---|
51 | findalignment(); /* finds the string ALIGNMENT */ |
---|
52 | findbeginning(); /* finds the start of the sequence alignment */ |
---|
53 | blocknum = countblocks(); /* counts the number of blocks in the |
---|
54 | * alignment */ |
---|
55 | goback(); /* pointer back to last \n before alignment */ |
---|
56 | |
---|
57 | /* MAIN LOOP. ONE EXECUTION OF IT PROCESSES ONE SEQUENCE */ |
---|
58 | |
---|
59 | for (globcount = 0; globcount < seqnum; globcount++) { |
---|
60 | if (globcount > 0) { /* if it's not the first sequence */ |
---|
61 | jump(1); /* puts pointer at beginning of sequence line */ |
---|
62 | begin = ftell(sourceali); |
---|
63 | } |
---|
64 | findname(); /* finds the number of this sequence in |
---|
65 | * sourceali */ |
---|
66 | goback(); /* goes back to beginning of line */ |
---|
67 | findnucseq(); /* finds the corresponding sequence in |
---|
68 | * sourcenuc */ |
---|
69 | |
---|
70 | for (dpvou = 0; dpvou < blocknum; dpvou++) |
---|
71 | /* |
---|
72 | * loops through the alignment as many times as there are |
---|
73 | * blocks |
---|
74 | */ |
---|
75 | { |
---|
76 | putnucs(); /* grabs nucleotides, puts them into destali */ |
---|
77 | if (dpvou != (blocknum - 1)) /* if it's not the last |
---|
78 | * block.. */ |
---|
79 | jump(seqnum - 1); /* ..jump */ |
---|
80 | else if (dpvou == (blocknum - 1)) { |
---|
81 | tailcheck(globcount); /* checks if there are bases |
---|
82 | * remaining beyond the end |
---|
83 | * of the sequence */ |
---|
84 | goback(); /* go back to beginning of previous |
---|
85 | * sequence in alignment */ |
---|
86 | } |
---|
87 | } /* end of single sequence processing */ |
---|
88 | |
---|
89 | if (globcount == 0) { |
---|
90 | oldcheck = newcheck; /* length of first sequence is |
---|
91 | * standard */ |
---|
92 | temp = ftell(destali); |
---|
93 | fseek(destali, remember, 0); |
---|
94 | fprintf(destali, "%10d", oldcheck); |
---|
95 | fseek(destali, temp, 0); |
---|
96 | } else if (oldcheck != newcheck) { |
---|
97 | printf("CRASH! Lengths of sequences do not agree. Check %s\n", aliname); |
---|
98 | exit(0); |
---|
99 | } |
---|
100 | if ((newcheck % 60) != 0) |
---|
101 | fputc('\n', destali); |
---|
102 | newcheck = 0; /* resets counter */ |
---|
103 | } |
---|
104 | |
---|
105 | /* END OF MAIN LOOP */ |
---|
106 | |
---|
107 | /* CALCULATE BASE COMPOSITION IF FIRST POSITIONS OF CODONS ARE INCLUDED */ |
---|
108 | if ((nucoraa == 3) && (position != 2) && |
---|
109 | (position != 3) && (yorm == 1)) |
---|
110 | summer(); |
---|
111 | |
---|
112 | fclose ( destali ); |
---|
113 | fclose ( sourcenuc ); |
---|
114 | fclose ( sourceali ); |
---|
115 | |
---|
116 | return 0; |
---|
117 | } /* end of main */ |
---|
118 | |
---|
119 | |
---|
120 | parstuff() |
---|
121 | { /* gets info from user */ |
---|
122 | int bool, dumnum; |
---|
123 | char dummy, nucchar; |
---|
124 | |
---|
125 | do { |
---|
126 | printf("Alignment file to be read: "); |
---|
127 | scanf("%s", aliname); |
---|
128 | if (NULL == (sourceali = fopen(aliname, "r"))) { |
---|
129 | bool = 0; |
---|
130 | printf("Cannot open %s. Check path or spelling.\n", aliname); |
---|
131 | } else |
---|
132 | bool = 1; |
---|
133 | } |
---|
134 | while (bool == 0); |
---|
135 | |
---|
136 | do { |
---|
137 | printf("Nucleotide file to be read: "); |
---|
138 | scanf("%s", nucname); |
---|
139 | if (NULL == (sourcenuc = fopen(nucname, "r"))) { |
---|
140 | bool = 0; |
---|
141 | printf("Cannot open %s. Check path or spelling.\n", nucname); |
---|
142 | } else |
---|
143 | bool = 1; |
---|
144 | } |
---|
145 | while (bool == 0); |
---|
146 | |
---|
147 | do { |
---|
148 | printf("Destination file to be written: "); |
---|
149 | scanf("%s", destname); |
---|
150 | if (NULL == (destali = fopen(destname, "w"))) { |
---|
151 | bool = 0; |
---|
152 | printf("Cannot open %s. Check path or spelling.\n", destname); |
---|
153 | } else |
---|
154 | bool = 1; |
---|
155 | } |
---|
156 | while (bool == 0); |
---|
157 | |
---|
158 | do { |
---|
159 | printf("\nTotal number of sequences in alignment: "); |
---|
160 | scanf("%d", &totnum); |
---|
161 | if (totnum < 2) { |
---|
162 | printf("%d; CRASH! That won't work.\n", &totnum); |
---|
163 | exit(0); |
---|
164 | } else |
---|
165 | bool = 1; |
---|
166 | } |
---|
167 | while (bool == 0); |
---|
168 | scanf("%c", &dummy); |
---|
169 | |
---|
170 | do { |
---|
171 | printf("Number of sequences to be used: "); |
---|
172 | scanf("%d", &seqnum); |
---|
173 | if (seqnum < 2) { |
---|
174 | printf("%d; CRASH! That won't work.\n", &seqnum); |
---|
175 | exit(0); |
---|
176 | } else |
---|
177 | bool = 1; |
---|
178 | } |
---|
179 | while ((bool == 0) || (seqnum > totnum)); |
---|
180 | scanf("%c", &dummy); |
---|
181 | |
---|
182 | fprintf(destali, " %d", seqnum); /* Prints the number of |
---|
183 | * sequences.. */ |
---|
184 | remember = ftell(destali); /* ..remembers where the pointer is.. */ |
---|
185 | fprintf(destali, " LENGTH\n"); /* ..and keeps ten spaces for |
---|
186 | * the length to be filled in |
---|
187 | * later */ |
---|
188 | do { |
---|
189 | printf("Nucleic acid or Protein coding sequence? (n/p): "); |
---|
190 | scanf("%c", &nucchar); |
---|
191 | if ((nucchar == 'n') || (nucchar == 'N')) { |
---|
192 | nucoraa = 1; |
---|
193 | bool = 1; |
---|
194 | } else if ((nucchar == 'p') || (nucchar == 'P')) { |
---|
195 | nucoraa = 3; |
---|
196 | bool = 1; |
---|
197 | } else { |
---|
198 | printf("Try again:\n"); |
---|
199 | scanf("%c", &dummy); |
---|
200 | bool = 0; |
---|
201 | } |
---|
202 | } |
---|
203 | while (bool == 0); |
---|
204 | scanf("%c", &dummy); |
---|
205 | |
---|
206 | if (nucoraa == 3) { |
---|
207 | do { |
---|
208 | printf("Nuclear or mitochondrial genetic code? (n/m): "); |
---|
209 | scanf("%c", &nucchar); |
---|
210 | if ((nucchar == 'n') || (nucchar == 'N')) { |
---|
211 | nucmito = 0; |
---|
212 | bool = 1; |
---|
213 | } else if ((nucchar == 'm') || (nucchar == 'M')) { |
---|
214 | nucmito = 1; |
---|
215 | bool = 1; |
---|
216 | } else { |
---|
217 | printf("Try again:\n"); |
---|
218 | scanf("%c", &dummy); |
---|
219 | bool = 0; |
---|
220 | } |
---|
221 | } |
---|
222 | while (bool == 0); |
---|
223 | scanf("%c", &dummy); |
---|
224 | |
---|
225 | do { |
---|
226 | printf("\nEnter a number between 1 and 5, for the\n"); |
---|
227 | printf(" codon position you wish to analyze:\n"); |
---|
228 | printf(" 1 for first, 2 for second, 3 for third\n"); |
---|
229 | printf(" 4 for first plus second, 5 for all. (1-5): "); |
---|
230 | scanf("%d", &position); |
---|
231 | if ((position < 1) || (5 < position)) { |
---|
232 | printf("Bull.... Try again\n"); |
---|
233 | } |
---|
234 | } |
---|
235 | while ((position < 1) || (5 < position)); |
---|
236 | scanf("%c", &dummy); |
---|
237 | |
---|
238 | if ((position != 2) && (position != 3)) { |
---|
239 | do { |
---|
240 | printf("Conversion of first positions to degenerate base? (y/n): "); |
---|
241 | scanf("%c", &nucchar); |
---|
242 | if ((nucchar == 'y') || (nucchar == 'Y')) { |
---|
243 | yorm = 1; |
---|
244 | bool = 1; |
---|
245 | } else if ((nucchar == 'n') || (nucchar == 'N')) { |
---|
246 | yorm = 0; |
---|
247 | bool = 1; |
---|
248 | } else { |
---|
249 | printf("Try again:\n"); |
---|
250 | scanf("%c", &dummy); |
---|
251 | bool = 0; |
---|
252 | } |
---|
253 | } |
---|
254 | while (bool == 0); |
---|
255 | scanf("%c", &dummy); |
---|
256 | } |
---|
257 | } |
---|
258 | do { |
---|
259 | printf("Use nAmes or nUmbers as identifiers? (a/u): "); |
---|
260 | scanf("%c", &nucchar); |
---|
261 | if ((nucchar == 'a') || (nucchar == 'A')) { |
---|
262 | namebool = 0; |
---|
263 | bool = 1; |
---|
264 | } else if ((nucchar == 'u') || (nucchar == 'U')) { |
---|
265 | namebool = 1; |
---|
266 | bool = 1; |
---|
267 | } else { |
---|
268 | printf("Try again:\n"); |
---|
269 | scanf("%c", &dummy); |
---|
270 | bool = 0; |
---|
271 | } |
---|
272 | } |
---|
273 | while (bool == 0); |
---|
274 | scanf("%c", &dummy); |
---|
275 | |
---|
276 | printf("\nNucleotide sequences in: %12s\n", nucname); |
---|
277 | if (nucoraa == 1) { |
---|
278 | printf("Nucleotide alignment source: %12s\n", aliname); |
---|
279 | printf("Nucleotide alignment destination: %12s\n", destname); |
---|
280 | } else if (nucoraa == 3) { |
---|
281 | printf("Amino acid alignment source: %12s\n", aliname); |
---|
282 | printf("Nucleotide alignment destination: %12s\n", destname); |
---|
283 | if (position == 1) { |
---|
284 | printf("First "); |
---|
285 | } else if (position == 2) { |
---|
286 | printf("Second "); |
---|
287 | } else if (position == 3) { |
---|
288 | printf("Third "); |
---|
289 | } else if (position == 4) { |
---|
290 | printf("First plus second "); |
---|
291 | } else if (position == 5) { |
---|
292 | printf("All "); |
---|
293 | } |
---|
294 | printf("codon positions will be used.\n"); |
---|
295 | |
---|
296 | if ((yorm == 0) && (position != 2) && (position != 3)) { |
---|
297 | if (nucmito == 0) { |
---|
298 | printf("No conversion of L and R 1st positions.\n"); |
---|
299 | } else if (nucmito == 1) { |
---|
300 | printf("No conversion of L 1st positions.\n"); |
---|
301 | } |
---|
302 | } else if ((yorm == 1) && (position != 2) && (position != 3)) { |
---|
303 | if (nucmito == 0) { |
---|
304 | printf("L and R 1st positions will be converted to Y and M.\n"); |
---|
305 | } else if (nucmito == 1) { |
---|
306 | printf("L 1st positions will be converted to Y.\n"); |
---|
307 | } |
---|
308 | } /* end of if yorm == 0 */ |
---|
309 | } /* end of nucoraa == 3 */ |
---|
310 | if (namebool == 0) { |
---|
311 | printf("Names "); |
---|
312 | } else if (namebool == 1) { |
---|
313 | printf("Numbers "); |
---|
314 | } |
---|
315 | printf("will be used to identify sequences.\n\n"); |
---|
316 | |
---|
317 | return 0; |
---|
318 | } |
---|
319 | |
---|
320 | |
---|
321 | findalignment() |
---|
322 | { /* finds the string "ALIGNMENT" in the |
---|
323 | * alignment */ |
---|
324 | char testchar[9], dummy, seqname[200]; |
---|
325 | int bntms, count, eqwpv; |
---|
326 | |
---|
327 | for (count = 0; count < totnum; count++) { |
---|
328 | fscanf(sourceali, "%d", &taxnum); |
---|
329 | dummy = fgetc(sourceali); |
---|
330 | fgets(seqname, 200, sourceali); |
---|
331 | bntms = 0; |
---|
332 | while ((seqname[bntms] != '\n') && (bntms < 10)) { |
---|
333 | if (seqname[bntms] != '\n') { |
---|
334 | taxname[bntms][taxnum] = seqname[bntms]; |
---|
335 | } |
---|
336 | bntms++; |
---|
337 | } |
---|
338 | if (bntms < 10) { |
---|
339 | for (eqwpv = bntms; eqwpv < 10; eqwpv++) |
---|
340 | taxname[eqwpv][taxnum] = ' '; |
---|
341 | } |
---|
342 | } |
---|
343 | |
---|
344 | testchar[0] = '\n'; |
---|
345 | while (strncmp(testchar, "ALIGNMENT", 9) != 0) { |
---|
346 | for (count = 0; count < 9; count++) { |
---|
347 | testchar[count] = fgetc(sourceali); |
---|
348 | if (testchar[count] == '\n') { |
---|
349 | break; |
---|
350 | } |
---|
351 | } |
---|
352 | dummy = testchar[count]; |
---|
353 | if (dummy == '\n') { |
---|
354 | continue; |
---|
355 | } |
---|
356 | while ((dummy = fgetc(sourceali)) != '\n'); |
---|
357 | } |
---|
358 | return 0; |
---|
359 | } |
---|
360 | |
---|
361 | findbeginning() |
---|
362 | { /* finds the first line of sequence alignment */ |
---|
363 | char testchar; |
---|
364 | |
---|
365 | while ((testchar = fgetc(sourceali)) != '\n'); |
---|
366 | while ((testchar != 45 && testchar < 65) || |
---|
367 | (testchar > 91 && testchar != '{')) |
---|
368 | testchar = fgetc(sourceali); |
---|
369 | begin = ftell(sourceali) - 1; |
---|
370 | /* pointer will be put to last \n before first amino acid */ |
---|
371 | return 0; |
---|
372 | } |
---|
373 | |
---|
374 | countblocks() |
---|
375 | { /* counts the number of seqeunce blocks in |
---|
376 | * the alignment */ |
---|
377 | char dummy; |
---|
378 | int dumnuc; |
---|
379 | |
---|
380 | dumnuc = -1; |
---|
381 | do { |
---|
382 | dummy = fgetc(sourceali); |
---|
383 | while ((dummy != '*') && (dummy != EOF)) |
---|
384 | dummy = fgetc(sourceali); |
---|
385 | dumnuc++; |
---|
386 | while ((dummy != '\n') && (dummy != EOF)) |
---|
387 | dummy = fgetc(sourceali); |
---|
388 | } |
---|
389 | while (dummy != EOF); |
---|
390 | return dumnuc; |
---|
391 | } |
---|
392 | |
---|
393 | jump(howmany) |
---|
394 | { /* jumps howmany lines in sourceali */ |
---|
395 | char linedump, aminocheck; |
---|
396 | int eqwpv; |
---|
397 | long lastpos; |
---|
398 | eqwpv = 0; |
---|
399 | |
---|
400 | do { |
---|
401 | linedump = fgetc(sourceali); |
---|
402 | /* |
---|
403 | * checks if the first character on the line is an amino acid |
---|
404 | * or a [ |
---|
405 | */ |
---|
406 | if ((linedump < 65 || 91 < linedump) && (linedump != 45)) |
---|
407 | /* if it's not an amino acid.. */ |
---|
408 | { |
---|
409 | --eqwpv;/* ..do not increase counter */ |
---|
410 | if (linedump == '{') /* if it's an ignore |
---|
411 | * character.. */ |
---|
412 | while ((linedump = fgetc(sourceali)) != '}'); |
---|
413 | /* ..go to resume char */ |
---|
414 | } |
---|
415 | if (linedump != '\n') /* if not at end of line.. */ |
---|
416 | while ((linedump = fgetc(sourceali)) != '\n'); |
---|
417 | /* ..goes to end of line */ |
---|
418 | eqwpv++; |
---|
419 | } |
---|
420 | while (eqwpv < howmany); |
---|
421 | |
---|
422 | do { /* this loop ties up the end by ensuring that |
---|
423 | * the next line starts with an amino acid or a [ */ |
---|
424 | lastpos = ftell(sourceali); /* remembers the position in |
---|
425 | * the sequence */ |
---|
426 | aminocheck = fgetc(sourceali); /* amino acid, hyphen, or '[' |
---|
427 | * ? */ |
---|
428 | if ((aminocheck < 65 || 91 < aminocheck) && aminocheck != 45) { |
---|
429 | if (aminocheck == '{') /* if it's an ignore |
---|
430 | * character.. */ |
---|
431 | while ((aminocheck = fgetc(sourceali)) != '}'); |
---|
432 | /* ..goes to resume char */ |
---|
433 | if (aminocheck != '\n') /* if not at end of line ... */ |
---|
434 | while ((aminocheck = fgetc(sourceali)) != '\n'); |
---|
435 | /* ..goes to end of line */ |
---|
436 | } |
---|
437 | } |
---|
438 | while ((aminocheck < 65 || 91 < aminocheck) && (aminocheck != 45)); |
---|
439 | |
---|
440 | fseek(sourceali, lastpos, 0); /* loop is ended by amino acid; now |
---|
441 | * go back to the beginning of the |
---|
442 | * current line */ |
---|
443 | return 0; |
---|
444 | } /* end of jump */ |
---|
445 | |
---|
446 | |
---|
447 | findname() |
---|
448 | { /* finds the number of the sequence line that |
---|
449 | * it's looking at and prints it or the |
---|
450 | * corresponding name at the beginning of the |
---|
451 | * destfile's corresponding sequence */ |
---|
452 | char dummy; |
---|
453 | int count, totnumloop, testcount, namefield; |
---|
454 | long currpos; |
---|
455 | |
---|
456 | amslr = 0; |
---|
457 | dummy = fgetc(sourceali); |
---|
458 | if (dummy == '{') { /* if it's an ignore char.. */ |
---|
459 | do |
---|
460 | dummy = fgetc(sourceali); |
---|
461 | while (dummy != '}'); |
---|
462 | do |
---|
463 | dummy = fgetc(sourceali); |
---|
464 | while (dummy != '\n'); |
---|
465 | begin = ftell(sourceali); /* for putnucs (\n) */ |
---|
466 | dummy = fgetc(sourceali); /* pointer on first aa */ |
---|
467 | } /* ..read until on the first amino acid after |
---|
468 | * the resume char */ |
---|
469 | while (dummy == 32 || dummy == 45 || |
---|
470 | (65 <= dummy && dummy <= 91) || dummy == 93) { |
---|
471 | dummy = fgetc(sourceali); |
---|
472 | amslr++; |
---|
473 | } |
---|
474 | |
---|
475 | /* USER: IF YOUR ALIGNMENT ONLY HAS THE TAXON NUMBER AT THE END OF EACH |
---|
476 | * LINE, AND NOT ALSO THE POSITION NUMBER, DELETE ONE OF THE FOLLOWING |
---|
477 | * TWO LINES BEFORE COMPILING! */ |
---|
478 | |
---|
479 | fscanf(sourceali, "%d", &taxnum); /* dumps the length number */ |
---|
480 | fscanf(sourceali, "%d", &taxnum); /* correct number of taxon */ |
---|
481 | |
---|
482 | if (namebool == 1) { /* if use numbers */ |
---|
483 | fprintf(destali, "%d", taxnum); |
---|
484 | if (taxnum < 10) { |
---|
485 | testcount = 1; |
---|
486 | } |
---|
487 | /* writes the number ... */ |
---|
488 | else if ((taxnum < 100) && (taxnum > 9)) { |
---|
489 | testcount = 2; |
---|
490 | } else if ((taxnum < 512) && (taxnum > 99)) { |
---|
491 | testcount = 3; |
---|
492 | } else { |
---|
493 | printf("Number of sequence out of range.\n"); |
---|
494 | exit(0); |
---|
495 | } |
---|
496 | |
---|
497 | for (namefield = testcount; namefield < 10; namefield++) { |
---|
498 | fputc(' ', destali); |
---|
499 | } /* ..and fills up the namefield to 10 chars */ |
---|
500 | } else if (namebool == 0) { /* if use names, not numbers.. */ |
---|
501 | for (count = 0; count < 10; count++) { |
---|
502 | fprintf(destali, "%c", taxname[count][taxnum]); |
---|
503 | } /* ..writes the name */ |
---|
504 | } |
---|
505 | while (dummy != '\n') { |
---|
506 | dummy = fgetc(sourceali); |
---|
507 | } |
---|
508 | |
---|
509 | return 0; |
---|
510 | } |
---|
511 | |
---|
512 | findnucseq() |
---|
513 | { /* first puts file-pointer at > of sequence |
---|
514 | * and then puts it at first nucleotide */ |
---|
515 | char dummy; |
---|
516 | int countseq; |
---|
517 | |
---|
518 | fseek(sourcenuc, 0, 0); |
---|
519 | |
---|
520 | countseq = -1; /* sequence numbers start with 0 */ |
---|
521 | while (countseq < taxnum) { |
---|
522 | countseq++; |
---|
523 | while ((dummy = fgetc(sourcenuc)) != '>'); /* marker */ |
---|
524 | while ((dummy = fgetc(sourcenuc)) != '\n'); /* marker line end */ |
---|
525 | while ((dummy = fgetc(sourcenuc)) != '\n'); /* name line end */ |
---|
526 | } |
---|
527 | return 0; |
---|
528 | } |
---|
529 | |
---|
530 | |
---|
531 | goback() |
---|
532 | { |
---|
533 | fseek(sourceali, begin, 0); |
---|
534 | return 0; |
---|
535 | } |
---|
536 | |
---|
537 | |
---|
538 | countnuc(nuc) |
---|
539 | { |
---|
540 | actualcount++; |
---|
541 | switch (nuc) { |
---|
542 | case 'G': |
---|
543 | count[0]++; |
---|
544 | break; |
---|
545 | case 'A': |
---|
546 | count[1]++; |
---|
547 | break; |
---|
548 | case 'T': |
---|
549 | case 'U': |
---|
550 | count[2]++; |
---|
551 | break; |
---|
552 | case 'C': |
---|
553 | count[3]++; |
---|
554 | break; |
---|
555 | case 'Y': |
---|
556 | count[4]++; |
---|
557 | break; |
---|
558 | case 'R': |
---|
559 | count[5]++; |
---|
560 | break; |
---|
561 | case 'M': |
---|
562 | count[6]++; |
---|
563 | break; |
---|
564 | case 'N': |
---|
565 | count[7]++; |
---|
566 | break; |
---|
567 | default: |
---|
568 | count[8]++; |
---|
569 | break; |
---|
570 | } |
---|
571 | return 0; |
---|
572 | } |
---|
573 | |
---|
574 | |
---|
575 | putnucs() |
---|
576 | { |
---|
577 | char base, dummy; |
---|
578 | int zahl; |
---|
579 | |
---|
580 | while ((dummy = fgetc(sourceali)) != '\n') { |
---|
581 | if (dummy == 91) { /* if it's a start-exclude character ([) */ |
---|
582 | putnucbool = 1; |
---|
583 | } else if (dummy == 93) { /* if it's a stop-exclude character (]) */ |
---|
584 | putnucbool = 0; |
---|
585 | } else if (65 <= dummy && dummy <= 90) { /* if it's an authentic |
---|
586 | * sequence character */ |
---|
587 | for (zahl = 0; zahl < nucoraa; zahl++) { |
---|
588 | base = fgetc(sourcenuc); |
---|
589 | if ((65 <= base && base <= 90) || (97 <= base && base <= 122)) { |
---|
590 | if (putnucbool == 0) { |
---|
591 | if (nucoraa == 3) { /* if it's an amino acid sequence */ |
---|
592 | if (yorm == 1) { /* if conversion is desired */ |
---|
593 | if ((dummy == 'L') && (zahl == 0)) { |
---|
594 | base = 'Y'; /* if amino acid is leucine */ |
---|
595 | } |
---|
596 | if (nucmito == 0) { /* if code is nuclear */ |
---|
597 | if ((dummy == 'R') && (zahl == 0)) { |
---|
598 | base = 'M'; /* if amino acid is arginine */ |
---|
599 | } |
---|
600 | } |
---|
601 | } |
---|
602 | if (97 <= base && base <= 122) { |
---|
603 | base -= 32; |
---|
604 | } |
---|
605 | } |
---|
606 | if ((position == 5) || ((position == 1) && (zahl == 0)) || |
---|
607 | ((position == 2) && (zahl == 1)) || |
---|
608 | ((position == 3) && (zahl == 2)) || |
---|
609 | ((position == 4) && ((zahl == 0) || (zahl == 1)))) { |
---|
610 | countnuc(base); |
---|
611 | newcheck++; |
---|
612 | fputc(base, destali); |
---|
613 | if (newcheck % 60 == 0) |
---|
614 | fputc('\n', destali); |
---|
615 | } |
---|
616 | } /* end of if conditional on putnucbool */ |
---|
617 | } /* end of if conditional on authentic nucleotide */ |
---|
618 | else if (base == '*') { |
---|
619 | printf("CRASH! not enough nucleotides.\n"); |
---|
620 | exit(0); |
---|
621 | } else |
---|
622 | --zahl; |
---|
623 | } /* end of loop counted by nucoraa and zahl */ |
---|
624 | } /* end of if conditional on authentic sequence character in alignment */ |
---|
625 | else if (dummy == 45) { /* if it is a hyphen */ |
---|
626 | if (putnucbool == 0) { |
---|
627 | for (zahl = 0; zahl < nucoraa; zahl++) { |
---|
628 | if ((position == 5) || ((position == 1) && (zahl == 0)) || |
---|
629 | ((position == 2) && (zahl == 1)) || |
---|
630 | ((position == 3) && (zahl == 2)) || |
---|
631 | ((position == 4) && ((zahl == 0) || (zahl == 1)))) { |
---|
632 | newcheck++; |
---|
633 | fputc('-', destali); |
---|
634 | if (newcheck % 60 == 0) |
---|
635 | fputc('\n', destali); |
---|
636 | } |
---|
637 | } |
---|
638 | } |
---|
639 | } |
---|
640 | } /* end of while */ |
---|
641 | return 0; |
---|
642 | } /* end of putnucs */ |
---|
643 | |
---|
644 | |
---|
645 | tailcheck(seq) |
---|
646 | { |
---|
647 | char checkbool, pacman; |
---|
648 | checkbool = 0; |
---|
649 | do { |
---|
650 | pacman = fgetc(sourcenuc); |
---|
651 | if ((65 <= pacman && pacman <= 90) || |
---|
652 | (97 <= pacman && pacman <= 122)) { |
---|
653 | checkbool = 1; |
---|
654 | if (checkbool == 1) |
---|
655 | printf("%c", pacman); |
---|
656 | } |
---|
657 | } |
---|
658 | while (pacman != '*'); |
---|
659 | if (checkbool == 1) { |
---|
660 | printf("\nWARNING. Sequence # %d in %s contains the above\n", seq, nucname); |
---|
661 | printf(" bases beyond the number of bases it should contain\n"); |
---|
662 | printf(" according to the alignment in %s.\n", aliname ); |
---|
663 | printf(" Please compare the two files.\n\n"); |
---|
664 | } |
---|
665 | } |
---|
666 | |
---|
667 | summer() |
---|
668 | { |
---|
669 | int cou; |
---|
670 | |
---|
671 | count[1] += (2 * count[6] / 3); |
---|
672 | count[2] += (count[4] / 3); |
---|
673 | count[3] += ((2 * count[4] + count[6]) / 3); |
---|
674 | printf("Frequencies of A, C, G, T:\n"); |
---|
675 | printf("%10.5lf", count[1] / actualcount); |
---|
676 | printf("%10.5lf", count[3] / actualcount); |
---|
677 | printf("%10.5lf", count[0] / actualcount); |
---|
678 | printf("%10.5lf\n", count[2] / actualcount); |
---|
679 | return 0; |
---|
680 | } |
---|
681 | /* end of program */ |
---|
682 | |
---|