1 | /* Tree-dependent-iteration */ |
---|
2 | /* Devide to segments */ |
---|
3 | |
---|
4 | #include "mltaln.h" |
---|
5 | |
---|
6 | extern char **seq_g; |
---|
7 | extern char **res_g; |
---|
8 | static int subalignment; |
---|
9 | static int subalignmentoffset; |
---|
10 | |
---|
11 | static int intop; |
---|
12 | static int intree; |
---|
13 | |
---|
14 | void arguments( int argc, char *argv[] ) |
---|
15 | { |
---|
16 | int c; |
---|
17 | char *argkey; |
---|
18 | |
---|
19 | outnumber = 0; |
---|
20 | nthread = 1; |
---|
21 | randomseed = 0; |
---|
22 | scoreout = 0; |
---|
23 | parallelizationstrategy = BAATARI1; |
---|
24 | intop = 0; |
---|
25 | intree = 0; |
---|
26 | inputfile = NULL; |
---|
27 | rnakozo = 0; |
---|
28 | rnaprediction = 'm'; |
---|
29 | nevermemsave = 0; |
---|
30 | score_check = 1; |
---|
31 | fftkeika = 1; |
---|
32 | constraint = 0; |
---|
33 | fmodel = 0; |
---|
34 | kobetsubunkatsu = 1; |
---|
35 | bunkatsu = 1; |
---|
36 | nblosum = 62; |
---|
37 | niter = 100; |
---|
38 | calledByXced = 0; |
---|
39 | devide = 1; |
---|
40 | divWinSize = 20; /* 70 */ |
---|
41 | divThreshold = 65; |
---|
42 | fftscore = 1; |
---|
43 | fftRepeatStop = 0; |
---|
44 | fftNoAnchStop = 0; |
---|
45 | scmtd = 5; |
---|
46 | cooling = 1; |
---|
47 | weight = 4; |
---|
48 | utree = 1; |
---|
49 | refine = 1; |
---|
50 | check = 1; |
---|
51 | cut = 0.0; |
---|
52 | disp = 0; |
---|
53 | outgap = 1; |
---|
54 | use_fft = 0; // CHUUI dochira demo mafft.tmpl deha F |
---|
55 | force_fft = 0; |
---|
56 | alg = 'A'; /* chuui */ |
---|
57 | mix = 0; |
---|
58 | checkC = 0; |
---|
59 | tbitr = 0; |
---|
60 | treemethod = 'X'; |
---|
61 | scoremtx = 1; |
---|
62 | dorp = NOTSPECIFIED; |
---|
63 | ppenalty = NOTSPECIFIED; |
---|
64 | ppenalty_ex = NOTSPECIFIED; |
---|
65 | poffset = NOTSPECIFIED; |
---|
66 | kimuraR = NOTSPECIFIED; |
---|
67 | pamN = NOTSPECIFIED; |
---|
68 | geta2 = GETA2; |
---|
69 | fftWinSize = NOTSPECIFIED; |
---|
70 | fftThreshold = NOTSPECIFIED; |
---|
71 | RNAppenalty = NOTSPECIFIED; |
---|
72 | RNAppenalty_ex = NOTSPECIFIED; |
---|
73 | RNApthr = NOTSPECIFIED; |
---|
74 | TMorJTT = JTT; |
---|
75 | consweight_multi = 1.0; |
---|
76 | consweight_rna = 0.0; |
---|
77 | subalignment = 0; |
---|
78 | subalignmentoffset = 0; |
---|
79 | |
---|
80 | while( --argc > 0 && (*++argv)[0] == '-' ) |
---|
81 | { |
---|
82 | while ( (c = *++argv[0]) ) |
---|
83 | { |
---|
84 | switch( c ) |
---|
85 | { |
---|
86 | case 'i': |
---|
87 | inputfile = *++argv; |
---|
88 | fprintf( stderr, "inputfile = %s\n", inputfile ); |
---|
89 | --argc; |
---|
90 | goto nextoption; |
---|
91 | case 'I': |
---|
92 | niter = atoi( *++argv ); |
---|
93 | fprintf( stderr, "niter = %d\n", niter ); |
---|
94 | --argc; |
---|
95 | goto nextoption; |
---|
96 | case 'e': |
---|
97 | RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 ); |
---|
98 | --argc; |
---|
99 | goto nextoption; |
---|
100 | case 'o': |
---|
101 | RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); |
---|
102 | --argc; |
---|
103 | goto nextoption; |
---|
104 | case 'f': |
---|
105 | ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); |
---|
106 | // fprintf( stderr, "ppenalty = %d\n", ppenalty ); |
---|
107 | --argc; |
---|
108 | goto nextoption; |
---|
109 | case 'g': |
---|
110 | ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); |
---|
111 | // fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); |
---|
112 | --argc; |
---|
113 | goto nextoption; |
---|
114 | case 'h': |
---|
115 | poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); |
---|
116 | fprintf( stderr, "poffset = %d\n", poffset ); |
---|
117 | --argc; |
---|
118 | goto nextoption; |
---|
119 | case 'k': |
---|
120 | kimuraR = atoi( *++argv ); |
---|
121 | fprintf( stderr, "kappa = %d\n", kimuraR ); |
---|
122 | --argc; |
---|
123 | goto nextoption; |
---|
124 | case 'b': |
---|
125 | nblosum = atoi( *++argv ); |
---|
126 | scoremtx = 1; |
---|
127 | fprintf( stderr, "blosum %d / kimura 200\n", nblosum ); |
---|
128 | --argc; |
---|
129 | goto nextoption; |
---|
130 | case 'j': |
---|
131 | pamN = atoi( *++argv ); |
---|
132 | scoremtx = 0; |
---|
133 | TMorJTT = JTT; |
---|
134 | fprintf( stderr, "jtt/kimura %d\n", pamN ); |
---|
135 | --argc; |
---|
136 | goto nextoption; |
---|
137 | case 'm': |
---|
138 | pamN = atoi( *++argv ); |
---|
139 | scoremtx = 0; |
---|
140 | TMorJTT = TM; |
---|
141 | fprintf( stderr, "tm %d\n", pamN ); |
---|
142 | --argc; |
---|
143 | goto nextoption; |
---|
144 | case 'l': |
---|
145 | fastathreshold = atof( *++argv ); |
---|
146 | constraint = 2; |
---|
147 | --argc; |
---|
148 | goto nextoption; |
---|
149 | case 'r': |
---|
150 | consweight_rna = atof( *++argv ); |
---|
151 | rnakozo = 1; |
---|
152 | --argc; |
---|
153 | goto nextoption; |
---|
154 | case 'c': |
---|
155 | consweight_multi = atof( *++argv ); |
---|
156 | --argc; |
---|
157 | goto nextoption; |
---|
158 | case 'C': |
---|
159 | nthread = atoi( *++argv ); |
---|
160 | fprintf( stderr, "nthread = %d\n", nthread ); |
---|
161 | --argc; |
---|
162 | goto nextoption; |
---|
163 | case 'H': |
---|
164 | subalignment = 1; |
---|
165 | subalignmentoffset = atoi( *++argv ); |
---|
166 | --argc; |
---|
167 | goto nextoption; |
---|
168 | case 't': |
---|
169 | randomseed = atoi( *++argv ); |
---|
170 | fprintf( stderr, "randomseed = %d\n", randomseed ); |
---|
171 | --argc; |
---|
172 | goto nextoption; |
---|
173 | case 'p': |
---|
174 | argkey = *++argv; |
---|
175 | if( !strcmp( argkey, "BESTFIRST" ) ) parallelizationstrategy = BESTFIRST; |
---|
176 | else if( !strcmp( argkey, "BAATARI0" ) ) parallelizationstrategy = BAATARI0; |
---|
177 | else if( !strcmp( argkey, "BAATARI1" ) ) parallelizationstrategy = BAATARI1; |
---|
178 | else if( !strcmp( argkey, "BAATARI2" ) ) parallelizationstrategy = BAATARI2; |
---|
179 | else |
---|
180 | { |
---|
181 | fprintf( stderr, "Unknown parallelization strategy, %s\n", argkey ); |
---|
182 | exit( 1 ); |
---|
183 | } |
---|
184 | // exit( 1 ); |
---|
185 | --argc; |
---|
186 | goto nextoption; |
---|
187 | case 'S' : |
---|
188 | scoreout = 1; |
---|
189 | break; |
---|
190 | case 's' : |
---|
191 | RNAscoremtx = 'r'; |
---|
192 | break; |
---|
193 | #if 1 |
---|
194 | case 'a': |
---|
195 | fmodel = 1; |
---|
196 | break; |
---|
197 | #endif |
---|
198 | case 'N': |
---|
199 | nevermemsave = 1; |
---|
200 | break; |
---|
201 | case 'D': |
---|
202 | dorp = 'd'; |
---|
203 | break; |
---|
204 | case 'P': |
---|
205 | dorp = 'p'; |
---|
206 | break; |
---|
207 | case 'Q': |
---|
208 | alg = 'Q'; |
---|
209 | break; |
---|
210 | case 'R': |
---|
211 | rnaprediction = 'r'; |
---|
212 | break; |
---|
213 | case 'O': |
---|
214 | fftNoAnchStop = 1; |
---|
215 | break; |
---|
216 | #if 0 |
---|
217 | case 'e': |
---|
218 | fftscore = 0; |
---|
219 | break; |
---|
220 | case 'r': |
---|
221 | fmodel = -1; |
---|
222 | break; |
---|
223 | case 'R': |
---|
224 | fftRepeatStop = 1; |
---|
225 | break; |
---|
226 | #endif |
---|
227 | case 'T': |
---|
228 | kobetsubunkatsu = 0; |
---|
229 | break; |
---|
230 | case 'B': |
---|
231 | bunkatsu = 0; |
---|
232 | break; |
---|
233 | #if 0 |
---|
234 | case 'c': |
---|
235 | cooling = 1; |
---|
236 | break; |
---|
237 | case 'a': |
---|
238 | alg = 'a'; |
---|
239 | break; |
---|
240 | case 's' : |
---|
241 | treemethod = 's'; |
---|
242 | break; |
---|
243 | case 'H': |
---|
244 | alg = 'H'; |
---|
245 | break; |
---|
246 | #endif |
---|
247 | case 'A': |
---|
248 | alg = 'A'; |
---|
249 | break; |
---|
250 | case 'M': |
---|
251 | alg = 'M'; |
---|
252 | break; |
---|
253 | case 'F': |
---|
254 | use_fft = 1; |
---|
255 | break; |
---|
256 | #if 0 |
---|
257 | case 't': |
---|
258 | weight = 4; |
---|
259 | break; |
---|
260 | #endif |
---|
261 | case 'u': |
---|
262 | weight = 0; |
---|
263 | break; |
---|
264 | case 'U': |
---|
265 | intree = 1; |
---|
266 | break; |
---|
267 | case 'V': |
---|
268 | intop = 1; |
---|
269 | break; |
---|
270 | case 'J': |
---|
271 | utree = 0; |
---|
272 | break; |
---|
273 | case 'd': |
---|
274 | disp = 1; |
---|
275 | break; |
---|
276 | case 'Z': |
---|
277 | score_check = 0; |
---|
278 | break; |
---|
279 | case 'Y': |
---|
280 | score_check = 2; |
---|
281 | break; |
---|
282 | #if 0 |
---|
283 | case 'n' : |
---|
284 | treemethod = 'n'; |
---|
285 | break; |
---|
286 | #endif |
---|
287 | case 'n' : |
---|
288 | outnumber = 1; |
---|
289 | break; |
---|
290 | case 'X' : |
---|
291 | treemethod = 'X'; |
---|
292 | break; |
---|
293 | case 'E' : |
---|
294 | treemethod = 'E'; |
---|
295 | break; |
---|
296 | case 'q' : |
---|
297 | treemethod = 'q'; |
---|
298 | break; |
---|
299 | case 'z': |
---|
300 | fftThreshold = atoi( *++argv ); |
---|
301 | --argc; |
---|
302 | goto nextoption; |
---|
303 | case 'w': |
---|
304 | fftWinSize = atoi( *++argv ); |
---|
305 | --argc; |
---|
306 | goto nextoption; |
---|
307 | default: |
---|
308 | fprintf( stderr, "illegal option %c\n", c ); |
---|
309 | argc = 0; |
---|
310 | break; |
---|
311 | } |
---|
312 | } |
---|
313 | nextoption: |
---|
314 | ; |
---|
315 | } |
---|
316 | if( argc == 1 ) |
---|
317 | { |
---|
318 | cut = atof( (*argv) ); |
---|
319 | argc--; |
---|
320 | } |
---|
321 | if( argc != 0 ) |
---|
322 | { |
---|
323 | fprintf( stderr, "options : Check source file!\n" ); |
---|
324 | exit( 1 ); |
---|
325 | } |
---|
326 | #if 0 |
---|
327 | if( alg == 'A' && weight == 0 ) |
---|
328 | ErrorExit( "ERROR : Algorithm A+ and un-weighted\n" ); |
---|
329 | #endif |
---|
330 | } |
---|
331 | |
---|
332 | |
---|
333 | int main( int argc, char *argv[] ) |
---|
334 | { |
---|
335 | int identity; |
---|
336 | static int nlen[M]; |
---|
337 | static char **name, **seq, **aseq, **bseq; |
---|
338 | static Segment *segment = NULL; |
---|
339 | static int anchors[MAXSEG]; |
---|
340 | int i, j; |
---|
341 | int iseg, nseg; |
---|
342 | int ***topol; |
---|
343 | double **len; |
---|
344 | double **eff; |
---|
345 | FILE *prep; |
---|
346 | FILE *infp; |
---|
347 | FILE *orderfp; |
---|
348 | int alloclen; |
---|
349 | int returnvalue; |
---|
350 | char c; |
---|
351 | int ocut; |
---|
352 | char **seq_g_bk; |
---|
353 | LocalHom **localhomtable = NULL; // by D.Mathog |
---|
354 | RNApair ***singlerna; |
---|
355 | int nogaplen; |
---|
356 | static char **nogap1seq; |
---|
357 | static char *kozoarivec; |
---|
358 | int nkozo; |
---|
359 | int alignmentlength; |
---|
360 | int **skipthisbranch; |
---|
361 | int foundthebranch; |
---|
362 | int nsubalignments, maxmem; |
---|
363 | int **subtable; |
---|
364 | int *insubtable; |
---|
365 | int *preservegaps; |
---|
366 | char ***subalnpt; |
---|
367 | |
---|
368 | arguments( argc, argv ); |
---|
369 | #ifndef enablemultithread |
---|
370 | nthread = 0; |
---|
371 | #endif |
---|
372 | |
---|
373 | if( inputfile ) |
---|
374 | { |
---|
375 | infp = fopen( inputfile, "r" ); |
---|
376 | if( !infp ) |
---|
377 | { |
---|
378 | fprintf( stderr, "Cannot open %s\n", inputfile ); |
---|
379 | exit( 1 ); |
---|
380 | } |
---|
381 | } |
---|
382 | else |
---|
383 | infp = stdin; |
---|
384 | |
---|
385 | #if 0 |
---|
386 | PreRead( stdin, &njob, &nlenmax ); |
---|
387 | #else |
---|
388 | getnumlen( infp ); |
---|
389 | #endif |
---|
390 | rewind( infp ); |
---|
391 | |
---|
392 | nkozo = 0; |
---|
393 | |
---|
394 | if( njob < 2 ) |
---|
395 | { |
---|
396 | fprintf( stderr, "At least 2 sequences should be input!\n" |
---|
397 | "Only %d sequence found.\n", njob ); |
---|
398 | exit( 1 ); |
---|
399 | } |
---|
400 | |
---|
401 | |
---|
402 | if( subalignment ) |
---|
403 | { |
---|
404 | readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem ); |
---|
405 | fprintf( stderr, "nsubalignments = %d\n", nsubalignments ); |
---|
406 | fprintf( stderr, "maxmem = %d\n", maxmem ); |
---|
407 | subtable = AllocateIntMtx( nsubalignments, maxmem+1 ); |
---|
408 | insubtable = AllocateIntVec( njob ); |
---|
409 | preservegaps = AllocateIntVec( njob ); |
---|
410 | for( i=0; i<njob; i++ ) insubtable[i] = 0; |
---|
411 | for( i=0; i<njob; i++ ) preservegaps[i] = 0; |
---|
412 | subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 ); |
---|
413 | readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL ); |
---|
414 | for( i=0; i<nsubalignments; i++ ) for( j=0; j<insubtable[i]; j++ ) |
---|
415 | { |
---|
416 | if( subtable[i][j] < 0 ) |
---|
417 | { |
---|
418 | fprintf( stderr, "Not supported in the iterative refinmenment mode.\n" ); |
---|
419 | fprintf( stderr, "Please use a positive number to represent a sequence.\n" ); |
---|
420 | } |
---|
421 | } |
---|
422 | } |
---|
423 | |
---|
424 | ocut = cut; |
---|
425 | |
---|
426 | segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) ); |
---|
427 | // if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' ) |
---|
428 | topol = AllocateIntCub( njob, 2, njob ); |
---|
429 | len = AllocateDoubleMtx( njob, 2 ); |
---|
430 | eff = AllocateDoubleMtx( njob, njob ); |
---|
431 | seq = AllocateCharMtx( njob, nlenmax*9+1 ); |
---|
432 | name = AllocateCharMtx( njob, B+1 ); |
---|
433 | seq_g = AllocateCharMtx( njob, nlenmax*9+1 ); |
---|
434 | res_g = AllocateCharMtx( njob, nlenmax*9+1 ); |
---|
435 | aseq = AllocateCharMtx( njob, nlenmax*9+1 ); |
---|
436 | bseq = AllocateCharMtx( njob, nlenmax*9+1 ); |
---|
437 | nogap1seq = AllocateCharMtx( 1, nlenmax*1+1 ); |
---|
438 | alloclen = nlenmax * 9; |
---|
439 | seq_g_bk = AllocateCharMtx( njob, 0 ); |
---|
440 | for( i=0; i<njob; i++ ) seq_g_bk[i] = seq_g[i]; |
---|
441 | kozoarivec = AllocateCharVec( njob ); |
---|
442 | skipthisbranch = AllocateIntMtx( njob, 2 ); |
---|
443 | for( i=0; i<njob; i++ ) skipthisbranch[i][0] = skipthisbranch[i][1] = 0; |
---|
444 | |
---|
445 | if( constraint ) |
---|
446 | { |
---|
447 | localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); |
---|
448 | for( i=0; i<njob; i++) |
---|
449 | { |
---|
450 | localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); |
---|
451 | for( j=0; j<njob; j++) |
---|
452 | { |
---|
453 | localhomtable[i][j].start1 = -1; |
---|
454 | localhomtable[i][j].end1 = -1; |
---|
455 | localhomtable[i][j].start2 = -1; |
---|
456 | localhomtable[i][j].end2 = -1; |
---|
457 | localhomtable[i][j].overlapaa = -1.0; |
---|
458 | localhomtable[i][j].opt = -1.0; |
---|
459 | localhomtable[i][j].importance = -1.0; |
---|
460 | localhomtable[i][j].next = NULL; |
---|
461 | localhomtable[i][j].nokori = 0; |
---|
462 | localhomtable[i][j].extended = -1; |
---|
463 | localhomtable[i][j].last = localhomtable[i]+j; |
---|
464 | localhomtable[i][j].korh = 'h'; |
---|
465 | } |
---|
466 | } |
---|
467 | fprintf( stderr, "Loading 'hat3' ... " ); |
---|
468 | fflush( stderr ); |
---|
469 | prep = fopen( "hat3", "r" ); |
---|
470 | if( prep == NULL ) ErrorExit( "Make hat3." ); |
---|
471 | readlocalhomtable2( prep, njob, localhomtable, kozoarivec ); |
---|
472 | fclose( prep ); |
---|
473 | // for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) |
---|
474 | // fprintf( stdout, "%d %d %d %d %d %d %d\n", i, j, localhomtable[i][j].opt, localhomtable[i][j].start1, localhomtable[i][j].end1, localhomtable[i][j].start2, localhomtable[i][j].end2 ); |
---|
475 | fprintf( stderr, "done.\n" ); |
---|
476 | fflush( stderr ); |
---|
477 | #if 0 |
---|
478 | fprintf( stderr, "Extending localhom ... " ); |
---|
479 | extendlocalhom( njob, localhomtable ); |
---|
480 | fprintf( stderr, "done.\n" ); |
---|
481 | #endif |
---|
482 | nkozo = 0; |
---|
483 | for( i=0; i<njob; i++ ) if( kozoarivec[i] ) nkozo++; |
---|
484 | } |
---|
485 | |
---|
486 | |
---|
487 | // if( nlenmax > 30000 ) |
---|
488 | if( nlenmax > 50000 ) // version >= 6.823 |
---|
489 | { |
---|
490 | #if 0 |
---|
491 | if( constraint ) |
---|
492 | { |
---|
493 | fprintf( stderr, "\nnlenmax=%d, nagasugi!\n", nlenmax ); |
---|
494 | exit( 1 ); |
---|
495 | } |
---|
496 | if( nevermemsave ) |
---|
497 | { |
---|
498 | fprintf( stderr, "\nnevermemsave=1, nlenmax=%d, nagasugi!\n", nlenmax ); |
---|
499 | exit( 1 ); |
---|
500 | } |
---|
501 | #endif |
---|
502 | if( !constraint && !nevermemsave && alg != 'M' ) |
---|
503 | { |
---|
504 | fprintf( stderr, "\nnlenmax=%d, Switching to the memsave mode\n", nlenmax ); |
---|
505 | alg = 'M'; |
---|
506 | } |
---|
507 | } |
---|
508 | |
---|
509 | #if 0 |
---|
510 | Read( name, nlen, seq_g ); |
---|
511 | #else |
---|
512 | readData_pointer( infp, name, nlen, seq_g ); |
---|
513 | #endif |
---|
514 | fclose( infp ); |
---|
515 | |
---|
516 | |
---|
517 | for( i=0; i<njob; i++ ) |
---|
518 | { |
---|
519 | res_g[i][0] = 0; |
---|
520 | } |
---|
521 | |
---|
522 | identity = 1; |
---|
523 | for( i=0; i<njob; i++ ) |
---|
524 | { |
---|
525 | identity *= ( nlen[i] == nlen[0] ); |
---|
526 | } |
---|
527 | if( !identity ) |
---|
528 | { |
---|
529 | fprintf( stderr, "Input pre-aligned data\n" ); |
---|
530 | exit( 1 ); |
---|
531 | } |
---|
532 | constants( njob, seq_g ); |
---|
533 | |
---|
534 | #if 0 |
---|
535 | fprintf( stderr, "penalty = %d\n", penalty ); |
---|
536 | fprintf( stderr, "penalty_ex = %d\n", penalty_ex ); |
---|
537 | fprintf( stderr, "offset = %d\n", offset ); |
---|
538 | #endif |
---|
539 | |
---|
540 | initSignalSM(); |
---|
541 | |
---|
542 | initFiles(); |
---|
543 | |
---|
544 | #if 0 |
---|
545 | if( njob == 2 ) |
---|
546 | { |
---|
547 | writePre( njob, name, nlen, seq_g, 1 ); |
---|
548 | SHOWVERSION; |
---|
549 | return( 0 ); |
---|
550 | } |
---|
551 | #else |
---|
552 | if( njob == 2 ) |
---|
553 | { |
---|
554 | weight = 0; |
---|
555 | niter = 1; |
---|
556 | } |
---|
557 | #endif |
---|
558 | |
---|
559 | c = seqcheck( seq_g ); |
---|
560 | if( c ) |
---|
561 | { |
---|
562 | fprintf( stderr, "Illegal character %c\n", c ); |
---|
563 | exit( 1 ); |
---|
564 | } |
---|
565 | commongappick( njob, seq_g ); |
---|
566 | |
---|
567 | if( rnakozo && rnaprediction == 'm' ) |
---|
568 | { |
---|
569 | singlerna = (RNApair ***)calloc( njob, sizeof( RNApair ** ) ); |
---|
570 | prep = fopen( "hat4", "r" ); |
---|
571 | if( prep == NULL ) ErrorExit( "Make hat4 using mccaskill." ); |
---|
572 | fprintf( stderr, "Loading 'hat4' ... " ); |
---|
573 | fflush( stderr ); |
---|
574 | for( i=0; i<njob; i++ ) |
---|
575 | { |
---|
576 | gappick0( nogap1seq[0], seq_g[i] ); |
---|
577 | nogaplen = strlen( nogap1seq[0] ); |
---|
578 | singlerna[i] = (RNApair **)calloc( nogaplen+1, sizeof( RNApair * ) ); |
---|
579 | for( j=0; j<nogaplen; j++ ) |
---|
580 | { |
---|
581 | singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) ); |
---|
582 | singlerna[i][j][0].bestpos = -1; |
---|
583 | singlerna[i][j][0].bestscore = -1.0; |
---|
584 | } |
---|
585 | singlerna[i][nogaplen] = NULL; |
---|
586 | readmccaskill( prep, singlerna[i], nogaplen ); |
---|
587 | } |
---|
588 | fclose( prep ); |
---|
589 | fprintf( stderr, "\ndone.\n" ); |
---|
590 | fflush( stderr ); |
---|
591 | } |
---|
592 | else |
---|
593 | singlerna = NULL; |
---|
594 | |
---|
595 | |
---|
596 | |
---|
597 | if( utree ) |
---|
598 | { |
---|
599 | prep = fopen( "hat2", "r" ); |
---|
600 | if( !prep ) ErrorExit( "Make hat2." ); |
---|
601 | readhat2_pointer( prep, njob, name, eff ); |
---|
602 | fclose( prep ); |
---|
603 | #if DEBUG |
---|
604 | for( i=0; i<njob-1; i++ ) |
---|
605 | { |
---|
606 | for( j=i+1; j<njob; j++ ) |
---|
607 | { |
---|
608 | printf( " %f", eff[i][j] ); |
---|
609 | } |
---|
610 | printf( "\n" ); |
---|
611 | } |
---|
612 | #endif |
---|
613 | if( intree ) |
---|
614 | veryfastsupg_double_loadtree( njob, eff, topol, len, name ); |
---|
615 | else if( intop ) // v6.528 deha if( intop ) dattanode intree ga mukou datta. |
---|
616 | { |
---|
617 | fprintf( stderr, "--topin has been disabled\n" ); |
---|
618 | exit( 1 ); |
---|
619 | // veryfastsupg_double_loadtop( njob, eff, topol, len ); |
---|
620 | } |
---|
621 | else if( subalignment ) |
---|
622 | fixed_supg_double_treeout_constrained( njob, eff, topol, len, name, nsubalignments, subtable ); |
---|
623 | else if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' ) |
---|
624 | // veryfastsupg_double_outtree( njob, eff, topol, len, name ); |
---|
625 | fixed_musclesupg_double_treeout( njob, eff, topol, len, name ); |
---|
626 | else if( treemethod == 'n' ) |
---|
627 | nj( njob, eff, topol, len ); |
---|
628 | else if( treemethod == 's' ) |
---|
629 | spg( njob, eff, topol, len ); |
---|
630 | else if( treemethod == 'p' ) |
---|
631 | upg2( njob, eff, topol, len ); |
---|
632 | else ErrorExit( "Incorrect treemethod.\n" ); |
---|
633 | } |
---|
634 | #if DEBUG |
---|
635 | printf( "utree = %d\n", utree ); |
---|
636 | #endif |
---|
637 | |
---|
638 | orderfp = fopen( "order", "w" ); |
---|
639 | if( !orderfp ) |
---|
640 | { |
---|
641 | fprintf( stderr, "Cannot open 'order'\n" ); |
---|
642 | exit( 1 ); |
---|
643 | } |
---|
644 | for( i=0; (j=topol[njob-2][0][i])!=-1; i++ ) |
---|
645 | { |
---|
646 | fprintf( orderfp, "%d\n", j ); |
---|
647 | } |
---|
648 | for( i=0; (j=topol[njob-2][1][i])!=-1; i++ ) |
---|
649 | { |
---|
650 | fprintf( orderfp, "%d\n", j ); |
---|
651 | } |
---|
652 | fclose( orderfp ); |
---|
653 | |
---|
654 | |
---|
655 | fprintf( stderr, "\n" ); |
---|
656 | if( ( !utree && kobetsubunkatsu ) || constraint || !bunkatsu ) |
---|
657 | { |
---|
658 | nseg = 0; |
---|
659 | anchors[0] =0; |
---|
660 | anchors[1] =strlen( seq_g[0] ); |
---|
661 | nseg += 2; |
---|
662 | } |
---|
663 | else |
---|
664 | { |
---|
665 | nseg = searchAnchors( njob, seq_g, segment ); |
---|
666 | #if 0 |
---|
667 | fprintf( stderr, "### nseg = %d\n", nseg ); |
---|
668 | fprintf( stderr, "### seq_g[0] = %s\n", seq_g[0] ); |
---|
669 | fprintf( stderr, "### nlenmax = %d\n", nlenmax ); |
---|
670 | fprintf( stderr, "### len = %d\n", strlen( seq_g[0] ) ); |
---|
671 | |
---|
672 | #endif |
---|
673 | |
---|
674 | anchors[0] = 0; |
---|
675 | for( i=0; i<nseg; i++ ) anchors[i+1] = segment[i].center; |
---|
676 | anchors[nseg+1] = strlen( seq_g[0] ); |
---|
677 | nseg +=2; |
---|
678 | |
---|
679 | #if 0 |
---|
680 | for( i=0; i<nseg; i++ ) |
---|
681 | fprintf( stderr, "anchor[%d] = %d\n", i, anchors[i] ); |
---|
682 | #endif |
---|
683 | } |
---|
684 | |
---|
685 | |
---|
686 | //--------------- kokokara ---- |
---|
687 | if( subalignment ) |
---|
688 | { |
---|
689 | for( i=0; i<nsubalignments; i++ ) |
---|
690 | { |
---|
691 | fprintf( stderr, "Checking subalignment %d:\n", i+1 ); |
---|
692 | alignmentlength = strlen( seq[subtable[i][0]] ); |
---|
693 | // for( j=0; subtable[i][j]!=-1; j++ ) |
---|
694 | // fprintf( stderr, " %d. %-30.30s\n", subtable[i][j]+1, name[subtable[i][j]]+1 ); |
---|
695 | for( j=0; subtable[i][j]!=-1; j++ ) |
---|
696 | { |
---|
697 | if( subtable[i][j] >= njob ) |
---|
698 | { |
---|
699 | fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 ); |
---|
700 | exit( 1 ); |
---|
701 | } |
---|
702 | if( alignmentlength != strlen( seq[subtable[i][j]] ) ) |
---|
703 | { |
---|
704 | fprintf( stderr, "\n" ); |
---|
705 | fprintf( stderr, "###############################################################################\n" ); |
---|
706 | fprintf( stderr, "# ERROR!\n" ); |
---|
707 | fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 ); |
---|
708 | fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" ); |
---|
709 | fprintf( stderr, "#\n" ); |
---|
710 | fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength ); |
---|
711 | fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) ); |
---|
712 | fprintf( stderr, "#\n" ); |
---|
713 | fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" ); |
---|
714 | if( subalignmentoffset ) |
---|
715 | { |
---|
716 | fprintf( stderr, "#\n" ); |
---|
717 | fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); |
---|
718 | fprintf( stderr, "# In this case, the rule of numbering is:\n" ); |
---|
719 | fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); |
---|
720 | fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); |
---|
721 | } |
---|
722 | fprintf( stderr, "###############################################################################\n" ); |
---|
723 | fprintf( stderr, "\n" ); |
---|
724 | exit( 1 ); |
---|
725 | } |
---|
726 | insubtable[subtable[i][j]] = 1; |
---|
727 | } |
---|
728 | for( j=0; j<njob-1; j++ ) |
---|
729 | { |
---|
730 | if( includemember( topol[j][0], subtable[i] ) && !samemember( topol[j][0], subtable[i] ) ) |
---|
731 | skipthisbranch[j][0] = 1; |
---|
732 | if( includemember( topol[j][1], subtable[i] ) && !samemember( topol[j][1], subtable[i] ) ) |
---|
733 | skipthisbranch[j][1] = 1; |
---|
734 | } |
---|
735 | foundthebranch = 0; |
---|
736 | for( j=0; j<njob-1; j++ ) |
---|
737 | { |
---|
738 | if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) ) |
---|
739 | { |
---|
740 | foundthebranch = 1; |
---|
741 | fprintf( stderr, " -> OK\n" ); |
---|
742 | break; |
---|
743 | } |
---|
744 | } |
---|
745 | if( !foundthebranch ) |
---|
746 | { |
---|
747 | system( "cp infile.tree GuideTree" ); // tekitou |
---|
748 | fprintf( stderr, "\n" ); |
---|
749 | fprintf( stderr, "###############################################################################\n" ); |
---|
750 | fprintf( stderr, "# ERROR!\n" ); |
---|
751 | fprintf( stderr, "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 ); |
---|
752 | fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" ); |
---|
753 | fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" ); |
---|
754 | fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" ); |
---|
755 | fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" ); |
---|
756 | if( subalignmentoffset ) |
---|
757 | { |
---|
758 | fprintf( stderr, "#\n" ); |
---|
759 | fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset ); |
---|
760 | fprintf( stderr, "# In this case, the rule of numbering is:\n" ); |
---|
761 | fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset ); |
---|
762 | fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob ); |
---|
763 | } |
---|
764 | fprintf( stderr, "############################################################################### \n" ); |
---|
765 | fprintf( stderr, "\n" ); |
---|
766 | exit( 1 ); |
---|
767 | } |
---|
768 | // commongappick( seq[subtable[i]], subalignment[i] ); // irukamo |
---|
769 | } |
---|
770 | #if 0 |
---|
771 | for( i=0; i<njob-1; i++ ) |
---|
772 | { |
---|
773 | fprintf( stderr, "STEP %d\n", i+1 ); |
---|
774 | fprintf( stderr, "group1 = " ); |
---|
775 | for( j=0; topol[i][0][j] != -1; j++ ) |
---|
776 | fprintf( stderr, "%d ", topol[i][0][j]+1 ); |
---|
777 | fprintf( stderr, "\n" ); |
---|
778 | fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][0] ); |
---|
779 | fprintf( stderr, "group2 = " ); |
---|
780 | for( j=0; topol[i][1][j] != -1; j++ ) |
---|
781 | fprintf( stderr, "%d ", topol[i][1][j]+1 ); |
---|
782 | fprintf( stderr, "\n" ); |
---|
783 | fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][1] ); |
---|
784 | } |
---|
785 | #endif |
---|
786 | |
---|
787 | for( i=0; i<njob; i++ ) |
---|
788 | { |
---|
789 | if( insubtable[i] ) strcpy( bseq[i], seq[i] ); |
---|
790 | else gappick0( bseq[i], seq[i] ); |
---|
791 | } |
---|
792 | |
---|
793 | for( i=0; i<nsubalignments; i++ ) |
---|
794 | { |
---|
795 | for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]]; |
---|
796 | commongappick( j, subalnpt[i] ); |
---|
797 | } |
---|
798 | |
---|
799 | FreeIntMtx( subtable ); |
---|
800 | free( insubtable ); |
---|
801 | for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] ); |
---|
802 | free( subalnpt ); |
---|
803 | free( preservegaps ); |
---|
804 | } |
---|
805 | //--------------- kokomade ---- |
---|
806 | |
---|
807 | |
---|
808 | |
---|
809 | |
---|
810 | for( i=0; i<njob; i++ ) res_g[i][0] = 0; |
---|
811 | |
---|
812 | for( iseg=0; iseg<nseg-1; iseg++ ) |
---|
813 | { |
---|
814 | int tmplen = anchors[iseg+1]-anchors[iseg]; |
---|
815 | int pos = strlen( res_g[0] ); |
---|
816 | for( j=0; j<njob; j++ ) |
---|
817 | { |
---|
818 | strncpy( seq[j], seq_g[j], tmplen ); |
---|
819 | seq[j][tmplen]= 0; |
---|
820 | seq_g[j] += tmplen; |
---|
821 | |
---|
822 | } |
---|
823 | fprintf( stderr, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen ); |
---|
824 | fflush( stderr ); |
---|
825 | fprintf( trap_g, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen ); |
---|
826 | |
---|
827 | cut = ocut; |
---|
828 | returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, skipthisbranch, alloclen, localhomtable, singlerna, nkozo, kozoarivec ); |
---|
829 | |
---|
830 | for( i=0; i<njob; i++ ) |
---|
831 | strcat( res_g[i], bseq[i] ); |
---|
832 | } |
---|
833 | FreeCharMtx( seq_g_bk ); |
---|
834 | FreeIntCub( topol ); |
---|
835 | FreeDoubleMtx( len ); |
---|
836 | FreeDoubleMtx( eff ); |
---|
837 | FreeIntMtx( skipthisbranch ); |
---|
838 | free( kozoarivec ); |
---|
839 | if( constraint ) FreeLocalHomTable( localhomtable, njob ); |
---|
840 | if( rnakozo && rnaprediction == 'm' ) |
---|
841 | { |
---|
842 | if( singlerna ) // nen no tame |
---|
843 | { |
---|
844 | for( i=0; i<njob; i++ ) |
---|
845 | { |
---|
846 | for( j=0; singlerna[i][j]!=NULL; j++ ) |
---|
847 | { |
---|
848 | if( singlerna[i][j] ) free( singlerna[i][j] ); |
---|
849 | } |
---|
850 | if( singlerna[i] ) free( singlerna[i] ); |
---|
851 | } |
---|
852 | free( singlerna ); |
---|
853 | singlerna = NULL; |
---|
854 | } |
---|
855 | } |
---|
856 | |
---|
857 | #if 0 |
---|
858 | Write( stdout, njob, name, nlen, bseq ); |
---|
859 | #endif |
---|
860 | |
---|
861 | fprintf( stderr, "done\n" ); |
---|
862 | fprintf( trap_g, "done\n" ); |
---|
863 | fclose( trap_g ); |
---|
864 | |
---|
865 | |
---|
866 | devide = 0; |
---|
867 | writePre( njob, name, nlen, res_g, 1 ); |
---|
868 | #if 0 |
---|
869 | writeData( stdout, njob, name, nlen, res_g, 1 ); |
---|
870 | #endif |
---|
871 | |
---|
872 | |
---|
873 | SHOWVERSION; |
---|
874 | return( 0 ); |
---|
875 | } |
---|
876 | |
---|
877 | #if 0 |
---|
878 | signed int main( int argc, char *argv[] ) |
---|
879 | { |
---|
880 | int i, nlen[M]; |
---|
881 | char b[B]; |
---|
882 | char a[] = "="; |
---|
883 | int value; |
---|
884 | |
---|
885 | gets( b ); njob = atoi( b ); |
---|
886 | |
---|
887 | /* |
---|
888 | scoremtx = 0; |
---|
889 | if( strstr( b, "ayhoff" ) ) scoremtx = 1; |
---|
890 | else if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1; |
---|
891 | else if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2; |
---|
892 | else scoremtx = 0; |
---|
893 | */ |
---|
894 | if( strstr( b, "constraint" ) ) cnst = 1; |
---|
895 | |
---|
896 | nlenmax = 0; |
---|
897 | i = 0; |
---|
898 | while( i<njob ) |
---|
899 | { |
---|
900 | gets( b ); |
---|
901 | if( !strncmp( b, a, 1 ) ) |
---|
902 | { |
---|
903 | gets( b ); nlen[i] = atoi( b ); |
---|
904 | if( nlen[i] > nlenmax ) nlenmax = nlen[i]; |
---|
905 | i++; |
---|
906 | } |
---|
907 | } |
---|
908 | if( nlenmax > N || njob > M ) |
---|
909 | { |
---|
910 | fprintf( stderr, "ERROR in main\n" ); |
---|
911 | exit( 1 ); |
---|
912 | } |
---|
913 | /* |
---|
914 | nlenmax = Na; |
---|
915 | */ |
---|
916 | rewind( stdin ); |
---|
917 | value = main1( nlen, argc, argv ); |
---|
918 | exit( 0 ); |
---|
919 | } |
---|
920 | #endif |
---|