| 1 | /* |
|---|
| 2 | * Copyright 1991 Steven Smith at the Harvard Genome Lab. |
|---|
| 3 | * All rights reserved. |
|---|
| 4 | */ |
|---|
| 5 | #include "Flatio.c" |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | main(ac,av) |
|---|
| 9 | int ac; |
|---|
| 10 | char **av; |
|---|
| 11 | { |
|---|
| 12 | struct data_format data[10000]; |
|---|
| 13 | FILE *file; |
|---|
| 14 | int i,j,k,color,numseqs,numenzymes,nextpos,len; |
|---|
| 15 | char enzymes[80][80],dummy[80]; |
|---|
| 16 | if(ac<3) |
|---|
| 17 | { |
|---|
| 18 | fprintf(stderr,"Usage: %s enzyme_file seq_file\n",av[0]); |
|---|
| 19 | exit(-1); |
|---|
| 20 | } |
|---|
| 21 | file = fopen(av[2],"r"); |
|---|
| 22 | if(file == NULL) |
|---|
| 23 | exit(-1); |
|---|
| 24 | |
|---|
| 25 | numseqs = ReadFlat(file,data,10000); |
|---|
| 26 | |
|---|
| 27 | file = fopen(av[1],"r"); |
|---|
| 28 | if(file == NULL) |
|---|
| 29 | exit(-1); |
|---|
| 30 | |
|---|
| 31 | for(numenzymes = 0; |
|---|
| 32 | fscanf(file,"%s %s",enzymes[numenzymes],dummy)>0; |
|---|
| 33 | numenzymes++); |
|---|
| 34 | |
|---|
| 35 | for(i=0;i<numseqs;i++) |
|---|
| 36 | { |
|---|
| 37 | /* |
|---|
| 38 | if(numseqs>1) |
|---|
| 39 | */ |
|---|
| 40 | printf("name:%s\n",data[i].name); |
|---|
| 41 | printf("length:%zu\n",strlen(data[i].nuc)); |
|---|
| 42 | if(numseqs>1) |
|---|
| 43 | printf("nodash:\n"); |
|---|
| 44 | printf("start:\n"); |
|---|
| 45 | for(j=0;j<data[i].length;) |
|---|
| 46 | { |
|---|
| 47 | for(;data[i].nuc[j] == '-' && j<data[i].length;) |
|---|
| 48 | { |
|---|
| 49 | printf("8\n"); |
|---|
| 50 | j++; |
|---|
| 51 | } |
|---|
| 52 | if((nextpos = FindNext(data[i].nuc,j,enzymes,numenzymes |
|---|
| 53 | ,&len,&color)) != -1) |
|---|
| 54 | { |
|---|
| 55 | for(k=j;k<nextpos;k++) |
|---|
| 56 | printf("8\n"); |
|---|
| 57 | for(k=j+nextpos;k<j+nextpos+len;k++) |
|---|
| 58 | printf("%d\n",color); |
|---|
| 59 | j=nextpos+len; |
|---|
| 60 | } |
|---|
| 61 | else |
|---|
| 62 | for(;j<data[i].length;j++) |
|---|
| 63 | printf("8\n"); |
|---|
| 64 | } |
|---|
| 65 | } |
|---|
| 66 | exit(0); |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | |
|---|
| 70 | FindNext(target,offset,enzymes,numenzymes,match_len,color) |
|---|
| 71 | char *target,enzymes[][80]; |
|---|
| 72 | int numenzymes,*match_len,*color; |
|---|
| 73 | { |
|---|
| 74 | int i,j,k,closest,len1,dif,flag = FALSE; |
|---|
| 75 | closest = strlen(target); |
|---|
| 76 | *match_len = 0; |
|---|
| 77 | for(k=0;k<numenzymes;k++) |
|---|
| 78 | { |
|---|
| 79 | dif = (strlen(target)) - (len1 = strlen(enzymes[k])) +1; |
|---|
| 80 | |
|---|
| 81 | if(len1>0) |
|---|
| 82 | for(flag = FALSE,j=offset;j<dif && flag == FALSE;j++) |
|---|
| 83 | { |
|---|
| 84 | flag = TRUE; |
|---|
| 85 | for(i=0;i<len1 && flag;i++) |
|---|
| 86 | { |
|---|
| 87 | flag = Comp(enzymes[k][i],target[i+j])? |
|---|
| 88 | TRUE:FALSE; |
|---|
| 89 | } |
|---|
| 90 | } |
|---|
| 91 | if(j-1<closest) |
|---|
| 92 | { |
|---|
| 93 | closest = j-1; |
|---|
| 94 | *color = k%6+1; |
|---|
| 95 | *match_len = strlen(enzymes[k]); |
|---|
| 96 | } |
|---|
| 97 | } |
|---|
| 98 | if(closest + *match_len < strlen(target)) |
|---|
| 99 | return(closest); |
|---|
| 100 | else |
|---|
| 101 | return(-1); |
|---|
| 102 | } |
|---|
| 103 | |
|---|
| 104 | Comp(a,b) |
|---|
| 105 | char a,b; |
|---|
| 106 | { |
|---|
| 107 | static int CtoB[128]={ |
|---|
| 108 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00, |
|---|
| 109 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|---|
| 110 | 0x01,0xe,0x02,0x0d,0,0,0x04,0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06, |
|---|
| 111 | 0x08,0x08,0x07,0,0x09,0xa,0,0,0,0,0,0,0,0x01,0x0e,0x02,0x0d,0,0,0x04, |
|---|
| 112 | 0x0b,0,0,0x0c,0,0x03,0x0f,0,0,0,0x05,0x06,0x08,0x08,0x07,0,0x09,0x0a, |
|---|
| 113 | 0,0,0,0,0x00,0 |
|---|
| 114 | }; |
|---|
| 115 | |
|---|
| 116 | static int BtoC[128] = |
|---|
| 117 | { |
|---|
| 118 | '-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N', |
|---|
| 119 | '~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n', |
|---|
| 120 | '-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N', |
|---|
| 121 | '~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n', |
|---|
| 122 | '-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N', |
|---|
| 123 | '~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n', |
|---|
| 124 | '-','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N', |
|---|
| 125 | '~','a','c','m','g','r','s','v','t','w','y','h','k','d','b','n', |
|---|
| 126 | }; |
|---|
| 127 | |
|---|
| 128 | |
|---|
| 129 | return ((CtoB[a]) & (CtoB[b])); |
|---|
| 130 | } |
|---|