| 1 | #include "mltaln.h" |
|---|
| 2 | |
|---|
| 3 | #define DEBUG 0 |
|---|
| 4 | |
|---|
| 5 | static int seedoffset; |
|---|
| 6 | |
|---|
| 7 | static void replace_unusual( int n, char **seq, char *usual, char unknown, int (*uporlow)( int ) ) |
|---|
| 8 | { |
|---|
| 9 | int i; |
|---|
| 10 | char *pt; |
|---|
| 11 | for( i=0; i<n; i++ ) |
|---|
| 12 | { |
|---|
| 13 | pt = seq[i]; |
|---|
| 14 | while( *pt ) |
|---|
| 15 | { |
|---|
| 16 | if( !strchr( usual, *pt ) ) *pt = unknown; |
|---|
| 17 | else *pt = uporlow( *pt ); |
|---|
| 18 | pt++; |
|---|
| 19 | } |
|---|
| 20 | } |
|---|
| 21 | } |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | void arguments( int argc, char *argv[] ) |
|---|
| 25 | { |
|---|
| 26 | int c; |
|---|
| 27 | |
|---|
| 28 | seedoffset = 0; |
|---|
| 29 | inputfile = NULL; |
|---|
| 30 | |
|---|
| 31 | while( --argc > 0 && (*++argv)[0] == '-' ) |
|---|
| 32 | { |
|---|
| 33 | while ( (c = *++argv[0]) ) |
|---|
| 34 | { |
|---|
| 35 | switch( c ) |
|---|
| 36 | { |
|---|
| 37 | case 'o': |
|---|
| 38 | seedoffset = atoi( *++argv ); |
|---|
| 39 | fprintf( stderr, "seedoffset = %d\n", seedoffset ); |
|---|
| 40 | --argc; |
|---|
| 41 | goto nextoption; |
|---|
| 42 | case 'i': |
|---|
| 43 | inputfile = *++argv; |
|---|
| 44 | fprintf( stderr, "inputfile = %s\n", inputfile ); |
|---|
| 45 | --argc; |
|---|
| 46 | goto nextoption; |
|---|
| 47 | default: |
|---|
| 48 | fprintf( stderr, "illegal option %c\n", c ); |
|---|
| 49 | argc = 0; |
|---|
| 50 | break; |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | nextoption: |
|---|
| 54 | ; |
|---|
| 55 | } |
|---|
| 56 | if( argc != 0 ) |
|---|
| 57 | { |
|---|
| 58 | fprintf( stderr, "options: Check source file !\n" ); |
|---|
| 59 | exit( 1 ); |
|---|
| 60 | } |
|---|
| 61 | } |
|---|
| 62 | |
|---|
| 63 | |
|---|
| 64 | |
|---|
| 65 | int main( int argc, char *argv[] ) |
|---|
| 66 | { |
|---|
| 67 | FILE *infp; |
|---|
| 68 | int nlenmin; |
|---|
| 69 | char **name; |
|---|
| 70 | char **seq; |
|---|
| 71 | int *nlen; |
|---|
| 72 | int i; |
|---|
| 73 | char *usual; |
|---|
| 74 | |
|---|
| 75 | arguments( argc, argv ); |
|---|
| 76 | |
|---|
| 77 | if( inputfile ) |
|---|
| 78 | { |
|---|
| 79 | infp = fopen( inputfile, "r" ); |
|---|
| 80 | if( !infp ) |
|---|
| 81 | { |
|---|
| 82 | fprintf( stderr, "Cannot open %s\n", inputfile ); |
|---|
| 83 | exit( 1 ); |
|---|
| 84 | } |
|---|
| 85 | } |
|---|
| 86 | else |
|---|
| 87 | infp = stdin; |
|---|
| 88 | |
|---|
| 89 | |
|---|
| 90 | dorp = NOTSPECIFIED; |
|---|
| 91 | getnumlen_casepreserve( infp, &nlenmin ); |
|---|
| 92 | |
|---|
| 93 | fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); |
|---|
| 94 | |
|---|
| 95 | seq = AllocateCharMtx( njob, nlenmax+1 ); |
|---|
| 96 | name = AllocateCharMtx( njob, B+1 ); |
|---|
| 97 | nlen = AllocateIntVec( njob ); |
|---|
| 98 | |
|---|
| 99 | readData_pointer_casepreserve( infp, name, nlen, seq ); |
|---|
| 100 | |
|---|
| 101 | // for( i=0; i<njob; i++ ) gappick_samestring( seq[i] ); |
|---|
| 102 | |
|---|
| 103 | #if 0 |
|---|
| 104 | FILE *origfp; |
|---|
| 105 | origfp = fopen( "_original", "w" ); |
|---|
| 106 | if( !origfp ) |
|---|
| 107 | { |
|---|
| 108 | fprintf( stderr, "Cannot open _original\n" ); |
|---|
| 109 | exit( 1 ); |
|---|
| 110 | } |
|---|
| 111 | for( i=0; i<njob; i++ ) |
|---|
| 112 | { |
|---|
| 113 | nlen[i] = strlen( seq[i] ); |
|---|
| 114 | fprintf( origfp, ">%s\n", name[i]+1 ); |
|---|
| 115 | if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0; |
|---|
| 116 | fprintf( origfp, "%s\n", seq[i] ); |
|---|
| 117 | } |
|---|
| 118 | fclose( origfp ); |
|---|
| 119 | #endif |
|---|
| 120 | |
|---|
| 121 | if( dorp == 'p' ) |
|---|
| 122 | { |
|---|
| 123 | usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-."; |
|---|
| 124 | replace_unusual( njob, seq, usual, 'X', toupper ); |
|---|
| 125 | } |
|---|
| 126 | else |
|---|
| 127 | { |
|---|
| 128 | usual = "ATGCUatgcuBDHKMNRSVWYXbdhkmnrsvwyx-"; |
|---|
| 129 | replace_unusual( njob, seq, usual, 'n', tolower ); |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | |
|---|
| 133 | |
|---|
| 134 | for( i=0; i<njob; i++ ) |
|---|
| 135 | { |
|---|
| 136 | fprintf( stdout, ">_os_%d_oe_%s\n", i+seedoffset, name[i]+1 ); |
|---|
| 137 | fprintf( stdout, "%s\n", seq[i] ); |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | free( nlen ); |
|---|
| 141 | FreeCharMtx( seq ); |
|---|
| 142 | FreeCharMtx( name ); |
|---|
| 143 | |
|---|
| 144 | return( 0 ); |
|---|
| 145 | } |
|---|