1 | #include "mltaln.h" |
---|
2 | |
---|
3 | #define DEBUG 0 |
---|
4 | |
---|
5 | static char *whereismccaskillmea; |
---|
6 | |
---|
7 | #ifdef enablemultithread |
---|
8 | typedef struct _thread_arg |
---|
9 | { |
---|
10 | int thread_no; |
---|
11 | int njob; |
---|
12 | int *jobpospt; |
---|
13 | int **gapmap; |
---|
14 | char **nogap; |
---|
15 | int nlenmax; |
---|
16 | RNApair ***pairprob; |
---|
17 | pthread_mutex_t *mutex; |
---|
18 | } thread_arg_t; |
---|
19 | #endif |
---|
20 | |
---|
21 | void outmccaskill( FILE *fp, RNApair **pairprob, int length ) |
---|
22 | { |
---|
23 | int i; |
---|
24 | RNApair *pt; |
---|
25 | for( i=0; i<length; i++ ) for( pt=pairprob[i]; pt->bestpos!=-1; pt++ ) |
---|
26 | { |
---|
27 | if( pt->bestpos > i ) |
---|
28 | fprintf( fp, "%d %d %50.40f\n", i, pt->bestpos, pt->bestscore ); |
---|
29 | } |
---|
30 | } |
---|
31 | |
---|
32 | #if 1 |
---|
33 | static void readrawmccaskill( FILE *fp, RNApair **pairprob, int length ) |
---|
34 | { |
---|
35 | char gett[1000]; |
---|
36 | int *pairnum; |
---|
37 | int i; |
---|
38 | int left, right; |
---|
39 | float prob; |
---|
40 | |
---|
41 | pairnum = (int *)calloc( length, sizeof( int ) ); |
---|
42 | for( i=0; i<length; i++ ) pairnum[i] = 0; |
---|
43 | |
---|
44 | while( 1 ) |
---|
45 | { |
---|
46 | fgets( gett, 999, fp ); |
---|
47 | if( feof( fp ) ) break; |
---|
48 | if( gett[0] == '>' ) continue; |
---|
49 | sscanf( gett, "%d %d %f", &left, &right, &prob ); |
---|
50 | if( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou |
---|
51 | //fprintf( stderr, "gett = %s\n", gett ); |
---|
52 | |
---|
53 | if( left != right && prob > 0.0 ) |
---|
54 | { |
---|
55 | pairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) ); |
---|
56 | pairprob[left][pairnum[left]].bestscore = prob; |
---|
57 | pairprob[left][pairnum[left]].bestpos = right; |
---|
58 | pairnum[left]++; |
---|
59 | pairprob[left][pairnum[left]].bestscore = -1.0; |
---|
60 | pairprob[left][pairnum[left]].bestpos = -1; |
---|
61 | // fprintf( stderr, "%d-%d, %f\n", left, right, prob ); |
---|
62 | |
---|
63 | pairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) ); |
---|
64 | pairprob[right][pairnum[right]].bestscore = prob; |
---|
65 | pairprob[right][pairnum[right]].bestpos = left; |
---|
66 | pairnum[right]++; |
---|
67 | pairprob[right][pairnum[right]].bestscore = -1.0; |
---|
68 | pairprob[right][pairnum[right]].bestpos = -1; |
---|
69 | // fprintf( stderr, "%d-%d, %f\n", right, left, prob ); |
---|
70 | } |
---|
71 | } |
---|
72 | free( pairnum ); |
---|
73 | } |
---|
74 | #endif |
---|
75 | |
---|
76 | #ifdef enablemultithread |
---|
77 | static void *athread( void *arg ) |
---|
78 | { |
---|
79 | thread_arg_t *targ = (thread_arg_t *)arg; |
---|
80 | int thread_no = targ->thread_no; |
---|
81 | int njob = targ->njob; |
---|
82 | int *jobpospt = targ->jobpospt; |
---|
83 | int **gapmap = targ->gapmap; |
---|
84 | char **nogap = targ->nogap; |
---|
85 | int nlenmax = targ->nlenmax; |
---|
86 | RNApair ***pairprob = targ->pairprob; |
---|
87 | |
---|
88 | int i, res; |
---|
89 | FILE *infp; |
---|
90 | char *com; |
---|
91 | char *dirname; |
---|
92 | |
---|
93 | dirname = calloc( 100, sizeof( char ) ); |
---|
94 | com = calloc( 1000, sizeof( char ) ); |
---|
95 | |
---|
96 | |
---|
97 | while( 1 ) |
---|
98 | { |
---|
99 | pthread_mutex_lock( targ->mutex ); |
---|
100 | i = *jobpospt; |
---|
101 | if( i == njob ) |
---|
102 | { |
---|
103 | pthread_mutex_unlock( targ->mutex ); |
---|
104 | // return( NULL ); |
---|
105 | break; |
---|
106 | } |
---|
107 | *jobpospt = i+1; |
---|
108 | pthread_mutex_unlock( targ->mutex ); |
---|
109 | |
---|
110 | commongappick_record( 1, nogap+i, gapmap[i] ); |
---|
111 | if( strlen( nogap[i] ) == 0 ) continue; |
---|
112 | |
---|
113 | sprintf( dirname, "_%d", i ); |
---|
114 | sprintf( com, "rm -rf %s", dirname ); |
---|
115 | system( com ); |
---|
116 | sprintf( com, "mkdir %s", dirname ); |
---|
117 | system( com ); |
---|
118 | |
---|
119 | fprintf( stderr, "%d / %d (by thread %4d)\n", i+1, njob, thread_no ); |
---|
120 | sprintf( com, "%s/_mccaskillinorg", dirname ); |
---|
121 | infp = fopen( com, "w" ); |
---|
122 | // fprintf( infp, ">in\n%s\n", nogap[i] ); |
---|
123 | fprintf( infp, ">in\n" ); |
---|
124 | write1seq( infp, nogap[i] ); |
---|
125 | fclose( infp ); |
---|
126 | |
---|
127 | sprintf( com, "tr -d '\\r' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname ); |
---|
128 | system( com ); // for cygwin, wakaran |
---|
129 | if( alg == 'G' ) |
---|
130 | sprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea ); |
---|
131 | else |
---|
132 | sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea ); |
---|
133 | res = system( com ); |
---|
134 | |
---|
135 | if( res ) |
---|
136 | { |
---|
137 | fprintf( stderr, "ERROR IN mccaskill_mea\n" ); |
---|
138 | exit( 1 ); |
---|
139 | } |
---|
140 | |
---|
141 | sprintf( com, "%s/_mccaskillout", dirname ); |
---|
142 | infp = fopen( com, "r" ); |
---|
143 | readrawmccaskill( infp, pairprob[i], nlenmax ); |
---|
144 | fclose( infp ); |
---|
145 | |
---|
146 | sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname ); |
---|
147 | if( system( com ) ) |
---|
148 | { |
---|
149 | fprintf( stderr, "retrying to rmdir\n" ); |
---|
150 | // nanosleep( 100000 ); |
---|
151 | sleep( 1 ); |
---|
152 | system( com ); |
---|
153 | } |
---|
154 | } |
---|
155 | free( dirname ); |
---|
156 | free( com ); |
---|
157 | return( NULL ); |
---|
158 | } |
---|
159 | #endif |
---|
160 | |
---|
161 | void arguments( int argc, char *argv[] ) |
---|
162 | { |
---|
163 | int c; |
---|
164 | nthread = 1; |
---|
165 | inputfile = NULL; |
---|
166 | dorp = NOTSPECIFIED; |
---|
167 | kimuraR = NOTSPECIFIED; |
---|
168 | pamN = NOTSPECIFIED; |
---|
169 | whereismccaskillmea = NULL; |
---|
170 | alg = 's'; |
---|
171 | |
---|
172 | while( --argc > 0 && (*++argv)[0] == '-' ) |
---|
173 | { |
---|
174 | while ( (c = *++argv[0]) ) |
---|
175 | { |
---|
176 | switch( c ) |
---|
177 | { |
---|
178 | case 'i': |
---|
179 | inputfile = *++argv; |
---|
180 | fprintf( stderr, "inputfile = %s\n", inputfile ); |
---|
181 | --argc; |
---|
182 | goto nextoption; |
---|
183 | case 'd': |
---|
184 | whereismccaskillmea = *++argv; |
---|
185 | fprintf( stderr, "whereismccaskillmea = %s\n", whereismccaskillmea ); |
---|
186 | --argc; |
---|
187 | goto nextoption; |
---|
188 | case 'C': |
---|
189 | nthread = atoi( *++argv ); |
---|
190 | fprintf( stderr, "nthread = %d\n", nthread ); |
---|
191 | --argc; |
---|
192 | goto nextoption; |
---|
193 | case 's': |
---|
194 | alg = 's'; // use scarna; default |
---|
195 | break; |
---|
196 | case 'G': |
---|
197 | alg = 'G'; // use dafs, instead of scarna |
---|
198 | break; |
---|
199 | default: |
---|
200 | fprintf( stderr, "illegal option %c\n", c ); |
---|
201 | argc = 0; |
---|
202 | break; |
---|
203 | } |
---|
204 | } |
---|
205 | nextoption: |
---|
206 | ; |
---|
207 | } |
---|
208 | if( argc != 0 ) |
---|
209 | { |
---|
210 | fprintf( stderr, "options: Check source file !\n" ); |
---|
211 | exit( 1 ); |
---|
212 | } |
---|
213 | } |
---|
214 | |
---|
215 | |
---|
216 | int main( int argc, char *argv[] ) |
---|
217 | { |
---|
218 | static char com[10000]; |
---|
219 | static int *nlen; |
---|
220 | int left, right; |
---|
221 | int res; |
---|
222 | static char **name, **seq, **nogap; |
---|
223 | static int **gapmap; |
---|
224 | static int *order; |
---|
225 | int i, j; |
---|
226 | FILE *infp; |
---|
227 | RNApair ***pairprob; |
---|
228 | RNApair **alnpairprob; |
---|
229 | RNApair *pairprobpt; |
---|
230 | RNApair *pt; |
---|
231 | int *alnpairnum; |
---|
232 | float prob; |
---|
233 | int adpos; |
---|
234 | |
---|
235 | arguments( argc, argv ); |
---|
236 | #ifndef enablemultithread |
---|
237 | nthread = 0; |
---|
238 | #endif |
---|
239 | |
---|
240 | if( inputfile ) |
---|
241 | { |
---|
242 | infp = fopen( inputfile, "r" ); |
---|
243 | if( !infp ) |
---|
244 | { |
---|
245 | fprintf( stderr, "Cannot open %s\n", inputfile ); |
---|
246 | exit( 1 ); |
---|
247 | } |
---|
248 | } |
---|
249 | else |
---|
250 | infp = stdin; |
---|
251 | |
---|
252 | if( !whereismccaskillmea ) |
---|
253 | whereismccaskillmea = ""; |
---|
254 | |
---|
255 | getnumlen( infp ); |
---|
256 | rewind( infp ); |
---|
257 | |
---|
258 | if( dorp != 'd' ) |
---|
259 | { |
---|
260 | fprintf( stderr, "nuc only\n" ); |
---|
261 | exit( 1 ); |
---|
262 | } |
---|
263 | |
---|
264 | seq = AllocateCharMtx( njob, nlenmax*2+1 ); |
---|
265 | nogap = AllocateCharMtx( njob, nlenmax*2+1 ); |
---|
266 | gapmap = AllocateIntMtx( njob, nlenmax*2+1 ); |
---|
267 | order = AllocateIntVec( njob ); |
---|
268 | name = AllocateCharMtx( njob, B+1 ); |
---|
269 | nlen = AllocateIntVec( njob ); |
---|
270 | pairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); |
---|
271 | alnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); |
---|
272 | alnpairnum = AllocateIntVec( nlenmax ); |
---|
273 | |
---|
274 | for( i=0; i<nlenmax; i++ ) alnpairnum[i] = 0; |
---|
275 | |
---|
276 | readData_pointer( infp, name, nlen, seq ); |
---|
277 | fclose( infp ); |
---|
278 | |
---|
279 | for( i=0; i<njob; i++ ) |
---|
280 | { |
---|
281 | pairprob[i] = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) ); |
---|
282 | for( j=0; j<nlenmax; j++ ) |
---|
283 | { |
---|
284 | pairprob[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); |
---|
285 | pairprob[i][j][0].bestpos = -1; |
---|
286 | pairprob[i][j][0].bestscore = -1.0; |
---|
287 | } |
---|
288 | strcpy( nogap[i], seq[i] ); |
---|
289 | order[i] = i; |
---|
290 | } |
---|
291 | for( j=0; j<nlenmax; j++ ) |
---|
292 | { |
---|
293 | alnpairprob[j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); |
---|
294 | alnpairprob[j][0].bestpos = -1; |
---|
295 | alnpairprob[j][0].bestscore = -1.0; |
---|
296 | } |
---|
297 | |
---|
298 | |
---|
299 | constants( njob, seq ); |
---|
300 | |
---|
301 | if( alg == 'G' ) |
---|
302 | fprintf( stderr, "Running DAFS (Sato et al. 2012; http://www.ncrna.org/).\n" ); |
---|
303 | else |
---|
304 | fprintf( stderr, "Running mxscarna with the mccaskill_mea mode.\n" ); |
---|
305 | #ifdef enablemultithread |
---|
306 | if( nthread > 0 ) |
---|
307 | { |
---|
308 | int jobpos; |
---|
309 | pthread_t *handle; |
---|
310 | pthread_mutex_t mutex; |
---|
311 | thread_arg_t *targ; |
---|
312 | jobpos = 0; |
---|
313 | |
---|
314 | targ = calloc( nthread, sizeof( thread_arg_t ) ); |
---|
315 | handle = calloc( nthread, sizeof( pthread_t ) ); |
---|
316 | pthread_mutex_init( &mutex, NULL ); |
---|
317 | |
---|
318 | for( i=0; i<nthread; i++ ) |
---|
319 | { |
---|
320 | targ[i].thread_no = i; |
---|
321 | targ[i].njob = njob; |
---|
322 | targ[i].jobpospt = &jobpos; |
---|
323 | targ[i].gapmap = gapmap; |
---|
324 | targ[i].nogap = nogap; |
---|
325 | targ[i].nlenmax = nlenmax; |
---|
326 | targ[i].pairprob = pairprob; |
---|
327 | targ[i].mutex = &mutex; |
---|
328 | |
---|
329 | // athread( targ ); |
---|
330 | pthread_create( handle+i, NULL, athread, (void *)(targ+i) ); |
---|
331 | |
---|
332 | } |
---|
333 | |
---|
334 | for( i=0; i<nthread; i++ ) |
---|
335 | { |
---|
336 | pthread_join( handle[i], NULL ); |
---|
337 | } |
---|
338 | pthread_mutex_destroy( &mutex ); |
---|
339 | |
---|
340 | free( handle ); |
---|
341 | free( targ ); |
---|
342 | |
---|
343 | |
---|
344 | for( i=0; i<njob; i++ ) |
---|
345 | { |
---|
346 | fprintf( stdout, ">%d\n", i ); |
---|
347 | outmccaskill( stdout, pairprob[i], nlenmax ); |
---|
348 | } |
---|
349 | } |
---|
350 | else |
---|
351 | #endif |
---|
352 | { |
---|
353 | for( i=0; i<njob; i++ ) |
---|
354 | { |
---|
355 | fprintf( stderr, "%d / %d\n", i+1, njob ); |
---|
356 | commongappick_record( 1, nogap+i, gapmap[i] ); |
---|
357 | if( strlen( nogap[i] ) == 0 ) |
---|
358 | { |
---|
359 | fprintf( stdout, ">%d\n", i ); |
---|
360 | continue; |
---|
361 | } |
---|
362 | |
---|
363 | infp = fopen( "_mccaskillinorg", "w" ); |
---|
364 | // fprintf( infp, ">in\n%s\n", nogap[i] ); |
---|
365 | fprintf( infp, ">in\n" ); |
---|
366 | write1seq( infp, nogap[i] ); |
---|
367 | fclose( infp ); |
---|
368 | |
---|
369 | system( "tr -d '\\r' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran |
---|
370 | if( alg == 'G' ) |
---|
371 | sprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea ); |
---|
372 | else |
---|
373 | sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea ); |
---|
374 | res = system( com ); |
---|
375 | |
---|
376 | if( res ) |
---|
377 | { |
---|
378 | fprintf( stderr, "ERROR IN mccaskill_mea\n" ); |
---|
379 | exit( 1 ); |
---|
380 | } |
---|
381 | |
---|
382 | infp = fopen( "_mccaskillout", "r" ); |
---|
383 | readrawmccaskill( infp, pairprob[i], nlenmax ); |
---|
384 | fclose( infp ); |
---|
385 | fprintf( stdout, ">%d\n", i ); |
---|
386 | outmccaskill( stdout, pairprob[i], nlenmax ); |
---|
387 | } |
---|
388 | } |
---|
389 | |
---|
390 | for( i=0; i<njob; i++ ) |
---|
391 | { |
---|
392 | for( j=0; j<nlen[i]; j++ ) for( pairprobpt=pairprob[i][j]; pairprobpt->bestpos!=-1; pairprobpt++ ) |
---|
393 | { |
---|
394 | left = gapmap[i][j]; |
---|
395 | right = gapmap[i][pairprobpt->bestpos]; |
---|
396 | prob = pairprobpt->bestscore; |
---|
397 | |
---|
398 | for( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ ) |
---|
399 | if( pt->bestpos == right ) break; |
---|
400 | |
---|
401 | if( pt->bestpos == -1 ) |
---|
402 | { |
---|
403 | alnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) ); |
---|
404 | adpos = alnpairnum[left]; |
---|
405 | alnpairnum[left]++; |
---|
406 | alnpairprob[left][adpos].bestscore = 0.0; |
---|
407 | alnpairprob[left][adpos].bestpos = right; |
---|
408 | alnpairprob[left][adpos+1].bestscore = -1.0; |
---|
409 | alnpairprob[left][adpos+1].bestpos = -1; |
---|
410 | pt = alnpairprob[left]+adpos; |
---|
411 | } |
---|
412 | else |
---|
413 | adpos = pt-alnpairprob[left]; |
---|
414 | |
---|
415 | pt->bestscore += prob; |
---|
416 | if( pt->bestpos != right ) |
---|
417 | { |
---|
418 | fprintf( stderr, "okashii!\n" ); |
---|
419 | exit( 1 ); |
---|
420 | } |
---|
421 | // fprintf( stderr, "adding %d-%d, %f\n", left, right, prob ); |
---|
422 | } |
---|
423 | } |
---|
424 | |
---|
425 | for( i=0; i<njob; i++ ) |
---|
426 | { |
---|
427 | for( j=0; j<nlenmax; j++ ) free( pairprob[i][j] ); |
---|
428 | free( pairprob[i] ); |
---|
429 | } |
---|
430 | free( pairprob ); |
---|
431 | for( j=0; j<nlenmax; j++ ) free( alnpairprob[j] ); |
---|
432 | free( alnpairprob ); |
---|
433 | free( alnpairnum ); |
---|
434 | fprintf( stderr, "%d thread(s)\n", nthread ); |
---|
435 | return( 0 ); |
---|
436 | |
---|
437 | #if 0 |
---|
438 | fprintf( stdout, "result=\n" ); |
---|
439 | |
---|
440 | for( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ ) |
---|
441 | { |
---|
442 | pairprobpt->bestscore /= (float)njob; |
---|
443 | left = i; |
---|
444 | right = pairprobpt->bestpos; |
---|
445 | prob = pairprobpt->bestscore; |
---|
446 | fprintf( stdout, "%d-%d, %f\n", left, right, prob ); |
---|
447 | } |
---|
448 | |
---|
449 | return( 0 ); |
---|
450 | #endif |
---|
451 | } |
---|