1 | /* |
---|
2 | * Copyright 1991 Steven Smith at the Harvard Genome Lab. |
---|
3 | * All rights reserved. |
---|
4 | */ |
---|
5 | #include "Flatio.c" |
---|
6 | |
---|
7 | int Comp(a,b) |
---|
8 | char a,b; |
---|
9 | { |
---|
10 | static int CtoB[128]={ |
---|
11 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00, |
---|
12 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
---|
13 | 0x01,0xe,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06, |
---|
14 | 0x08,0x08,0x07,0,0x09,0xa,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04, |
---|
15 | 0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06,0x08,0x08,0x07,0,0x09,0x0a, |
---|
16 | 0,0,0,0,0x00,0 |
---|
17 | }; |
---|
18 | |
---|
19 | return ((CtoB[a]) & (CtoB[b])); |
---|
20 | } |
---|
21 | |
---|
22 | int FindNext(target,offset,enzymes,numenzymes,match_len,color) |
---|
23 | char *target,enzymes[][80]; |
---|
24 | int offset,numenzymes,*match_len,*color; |
---|
25 | { |
---|
26 | int i,j,k,closest,len1,dif,flag = FALSE; |
---|
27 | closest = strlen(target); |
---|
28 | *match_len = 0; |
---|
29 | for(k=0;k<numenzymes;k++) |
---|
30 | { |
---|
31 | dif = (strlen(target)) - (len1 = strlen(enzymes[k])) +1; |
---|
32 | |
---|
33 | if(len1>0) |
---|
34 | for(flag = FALSE,j=offset;j<dif && flag == FALSE;j++) |
---|
35 | { |
---|
36 | flag = TRUE; |
---|
37 | for(i=0;i<len1 && flag;i++) |
---|
38 | { |
---|
39 | flag = Comp(enzymes[k][i],target[i+j])? |
---|
40 | TRUE:FALSE; |
---|
41 | } |
---|
42 | } |
---|
43 | if(j-1<closest) |
---|
44 | { |
---|
45 | closest = j-1; |
---|
46 | *color = k%6+1; |
---|
47 | *match_len = strlen(enzymes[k]); |
---|
48 | } |
---|
49 | } |
---|
50 | if(closest + *match_len < (int)strlen(target)) |
---|
51 | return(closest); |
---|
52 | else |
---|
53 | return(-1); |
---|
54 | } |
---|
55 | |
---|
56 | int main(ac,av) |
---|
57 | int ac; |
---|
58 | char **av; |
---|
59 | { |
---|
60 | struct data_format data[10000]; |
---|
61 | FILE *file; |
---|
62 | int i,j,k,color,numseqs,numenzymes,nextpos,len; |
---|
63 | char enzymes[80][80],dummy[80]; |
---|
64 | if(ac<3) |
---|
65 | { |
---|
66 | fprintf(stderr,"Usage: %s enzyme_file seq_file\n",av[0]); |
---|
67 | return -1; |
---|
68 | } |
---|
69 | file = fopen(av[2],"r"); |
---|
70 | if(file == NULL) |
---|
71 | return -1; |
---|
72 | |
---|
73 | numseqs = ReadFlat(file,data,10000); |
---|
74 | |
---|
75 | file = fopen(av[1],"r"); |
---|
76 | if(file == NULL) |
---|
77 | return -1; |
---|
78 | |
---|
79 | for(numenzymes = 0; |
---|
80 | fscanf(file,"%s %s",enzymes[numenzymes],dummy)>0; |
---|
81 | numenzymes++); |
---|
82 | |
---|
83 | for(i=0;i<numseqs;i++) |
---|
84 | { |
---|
85 | /* |
---|
86 | if(numseqs>1) |
---|
87 | */ |
---|
88 | printf("name:%s\n",data[i].name); |
---|
89 | printf("length:%zu\n",strlen(data[i].nuc)); |
---|
90 | if(numseqs>1) |
---|
91 | printf("nodash:\n"); |
---|
92 | printf("start:\n"); |
---|
93 | for(j=0;j<data[i].length;) |
---|
94 | { |
---|
95 | for(;data[i].nuc[j] == '-' && j<data[i].length;) |
---|
96 | { |
---|
97 | printf("8\n"); |
---|
98 | j++; |
---|
99 | } |
---|
100 | if((nextpos = FindNext(data[i].nuc,j,enzymes,numenzymes |
---|
101 | ,&len,&color)) != -1) |
---|
102 | { |
---|
103 | for(k=j;k<nextpos;k++) |
---|
104 | printf("8\n"); |
---|
105 | for(k=j+nextpos;k<j+nextpos+len;k++) |
---|
106 | printf("%d\n",color); |
---|
107 | j=nextpos+len; |
---|
108 | } |
---|
109 | else |
---|
110 | for(;j<data[i].length;j++) |
---|
111 | printf("8\n"); |
---|
112 | } |
---|
113 | } |
---|
114 | return 0; |
---|
115 | } |
---|
116 | |
---|
117 | |
---|