| 1 | /* iteration ( algorithm C ) */ |
|---|
| 2 | #include "mltaln.h" |
|---|
| 3 | |
|---|
| 4 | #define DEBUG 0 |
|---|
| 5 | |
|---|
| 6 | static void Writeoptions( FILE *fp ) |
|---|
| 7 | { |
|---|
| 8 | if( scoremtx == 1 ) |
|---|
| 9 | fprintf( fp, "Dayhoff( ... )\n" ); |
|---|
| 10 | else if( scoremtx == -1 ) |
|---|
| 11 | fprintf( fp, "DNA\n" ); |
|---|
| 12 | else if( scoremtx == 2 ) |
|---|
| 13 | fprintf( fp, "Miyata-Yasunaga\n" ); |
|---|
| 14 | else |
|---|
| 15 | fprintf( fp, "JTT %dPAM\n", pamN ); |
|---|
| 16 | |
|---|
| 17 | if( scoremtx == 0 ) |
|---|
| 18 | fprintf( fp, "Gap Penalty = %+d, %+d\n", penalty, offset ); |
|---|
| 19 | else |
|---|
| 20 | fprintf( fp, "Gap Penalty = %+d\n", penalty ); |
|---|
| 21 | |
|---|
| 22 | fprintf( fp, "marginal score to search : best - %f\n", cut ); |
|---|
| 23 | if( scmtd == 3 ) |
|---|
| 24 | fprintf( fp, "score of rnd or sco\n" ); |
|---|
| 25 | else if( scmtd == 4 ) |
|---|
| 26 | fprintf( fp, "score = sigma( score for a pair of homologous amino acids ) / ( number of amino acids pairs )\n" ); |
|---|
| 27 | else if( scmtd == 5 ) |
|---|
| 28 | fprintf( fp, "score : SP\n" ); |
|---|
| 29 | if( mix ) |
|---|
| 30 | fprintf( fp, "?\n" ); |
|---|
| 31 | else |
|---|
| 32 | { |
|---|
| 33 | if( weight == 2 ) |
|---|
| 34 | fprintf( fp, "weighted, geta2 = %f\n", geta2 ); |
|---|
| 35 | else if( weight == 3 ) |
|---|
| 36 | { |
|---|
| 37 | if( scmtd == 4 ) |
|---|
| 38 | fprintf( fp, "reversely weighted in function 'align', unweighted in function 'score_calc'\n" ); |
|---|
| 39 | else |
|---|
| 40 | fprintf( fp, "weighted like ClustalW," ); |
|---|
| 41 | } |
|---|
| 42 | else |
|---|
| 43 | fprintf( fp, "unweighted\n" ); |
|---|
| 44 | } |
|---|
| 45 | if( weight && utree ) |
|---|
| 46 | { |
|---|
| 47 | fprintf( fp, "using tree defined by the file hat2 with simplified UPG method\n" ); |
|---|
| 48 | } |
|---|
| 49 | if( weight && !utree ) |
|---|
| 50 | fprintf( fp, "using temporary tree by simplified UPG method\n" ); |
|---|
| 51 | fprintf( fp, "Algorithm %c\n", alg ); |
|---|
| 52 | } |
|---|
| 53 | |
|---|
| 54 | |
|---|
| 55 | |
|---|
| 56 | |
|---|
| 57 | char **align0( float *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex ) |
|---|
| 58 | { |
|---|
| 59 | char **result; |
|---|
| 60 | |
|---|
| 61 | if( alg == 'B' ) |
|---|
| 62 | { |
|---|
| 63 | ErrorExit( "Sorry!" ); |
|---|
| 64 | /* |
|---|
| 65 | if( outgap == 0 ) |
|---|
| 66 | { |
|---|
| 67 | result = alignm1_o( wm, aseq, seq, scmx, effarr, icyc, ex ); |
|---|
| 68 | } |
|---|
| 69 | if( outgap == 1 ) |
|---|
| 70 | { |
|---|
| 71 | result = alignm1( wm, aseq, seq, scmx, effarr, icyc, ex ); |
|---|
| 72 | } |
|---|
| 73 | */ |
|---|
| 74 | } |
|---|
| 75 | else if( alg == 'C' ) |
|---|
| 76 | { |
|---|
| 77 | result = Calignm1( wm, aseq, seq, effarr, icyc, ex ); |
|---|
| 78 | } |
|---|
| 79 | return( result ); |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | |
|---|
| 83 | double score_m_1_0( char **aseq, int locnjob, int s, double **eff, double effarr[M] ) |
|---|
| 84 | { |
|---|
| 85 | double x; |
|---|
| 86 | |
|---|
| 87 | if( alg == 'B' ) |
|---|
| 88 | { |
|---|
| 89 | ErrorExit( "Sorry!" ); |
|---|
| 90 | } |
|---|
| 91 | if( alg == 'C' ) |
|---|
| 92 | { |
|---|
| 93 | x = Cscore_m_1( aseq, locnjob, s, eff ); |
|---|
| 94 | } |
|---|
| 95 | fprintf( stderr, "in score_m_1_0 %f\n", x ); |
|---|
| 96 | return( x ); |
|---|
| 97 | } |
|---|
| 98 | |
|---|
| 99 | int iteration( int locnjob, char name[M][B], int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **eff ) |
|---|
| 100 | { |
|---|
| 101 | double tscore, mscore; |
|---|
| 102 | int identity; |
|---|
| 103 | static char *mseq1, **mseq2 = NULL; |
|---|
| 104 | static char **result; |
|---|
| 105 | int i, l; |
|---|
| 106 | static double effarr[M]; |
|---|
| 107 | int s; |
|---|
| 108 | int sss[2]; |
|---|
| 109 | char ou; |
|---|
| 110 | int alloclen; |
|---|
| 111 | int resultlen; |
|---|
| 112 | int nlenmax0 = nlenmax; |
|---|
| 113 | FILE *prep; |
|---|
| 114 | char sai[M]; |
|---|
| 115 | char sai1[M]; |
|---|
| 116 | char sai2[M]; |
|---|
| 117 | #if 0 |
|---|
| 118 | double his[2][M][MAXITERATION/locnjob+1]; |
|---|
| 119 | #else |
|---|
| 120 | double ***his; |
|---|
| 121 | #endif |
|---|
| 122 | int cyc[2]; |
|---|
| 123 | char shindou = 0; |
|---|
| 124 | float wm; |
|---|
| 125 | int returnvalue; |
|---|
| 126 | |
|---|
| 127 | for( i=0; i<locnjob; i++ ) |
|---|
| 128 | { |
|---|
| 129 | sai[i] = 1; |
|---|
| 130 | sai1[i] = 1; |
|---|
| 131 | sai2[i] = 2; |
|---|
| 132 | } |
|---|
| 133 | sai[locnjob] = sai1[locnjob] = sai2[locnjob] = 0; |
|---|
| 134 | |
|---|
| 135 | |
|---|
| 136 | Writeoptions( trap_g ); |
|---|
| 137 | |
|---|
| 138 | his = AllocateDoubleCub( 2, M, MAXITERATION/locnjob+1 ); |
|---|
| 139 | |
|---|
| 140 | if( mseq2 == NULL ) |
|---|
| 141 | { |
|---|
| 142 | alloclen = nlenmax * 2.0; |
|---|
| 143 | AllocateTmpSeqs( &mseq2, &mseq1, alloclen ); |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | if( !tbitr && !tbweight ) |
|---|
| 147 | { |
|---|
| 148 | writePre( locnjob, name, nlen, aseq, 0 ); |
|---|
| 149 | |
|---|
| 150 | #if 0 |
|---|
| 151 | prep = fopen( "best", "w" ); |
|---|
| 152 | Write( prep, locnjob, name, nlen, aseq ); |
|---|
| 153 | fclose( prep ); |
|---|
| 154 | #endif |
|---|
| 155 | } |
|---|
| 156 | |
|---|
| 157 | |
|---|
| 158 | |
|---|
| 159 | |
|---|
| 160 | treeconstruction( aseq, locnjob, topol, len, eff ); |
|---|
| 161 | tscore = score_calc0( aseq, locnjob, eff, 0 ); |
|---|
| 162 | |
|---|
| 163 | #if DEBUG |
|---|
| 164 | fprintf( stderr, "eff mtx in iteration\n" ); |
|---|
| 165 | for( i=0; i<locnjob; i++ ) |
|---|
| 166 | { |
|---|
| 167 | for( j=0; j<locnjob; j++ ) |
|---|
| 168 | { |
|---|
| 169 | fprintf( stderr, "%5.3f ", eff[i][j] ); |
|---|
| 170 | } |
|---|
| 171 | fprintf( stderr, "\n" ); |
|---|
| 172 | } |
|---|
| 173 | #endif |
|---|
| 174 | |
|---|
| 175 | fprintf( stderr, "\n" ); |
|---|
| 176 | if( disp ) |
|---|
| 177 | { |
|---|
| 178 | fprintf( stderr, "aseq(initial)\n" ); |
|---|
| 179 | display( aseq, locnjob ); |
|---|
| 180 | } |
|---|
| 181 | fprintf( stderr, "initial score = %f \n", tscore ); |
|---|
| 182 | fprintf( stderr, "\n" ); |
|---|
| 183 | for( i=0; i<locnjob; i++ ) strcpy( bseq[i], aseq[i] ); |
|---|
| 184 | mscore = tscore; |
|---|
| 185 | srand( time(NULL) ); |
|---|
| 186 | |
|---|
| 187 | sss[1] = 0; |
|---|
| 188 | sss[0] = locnjob-1; |
|---|
| 189 | /* |
|---|
| 190 | sss[0] = (int)( (float)locnjob/2.0 ); |
|---|
| 191 | */ |
|---|
| 192 | ou = 1; |
|---|
| 193 | cyc[0] = 0; cyc[1] = 0; |
|---|
| 194 | |
|---|
| 195 | for( s=-1, l=0; l<MAXITERATION; l++ ) |
|---|
| 196 | { |
|---|
| 197 | int ss; |
|---|
| 198 | double tmpscore, tmpscore1; |
|---|
| 199 | |
|---|
| 200 | if( strlen( aseq[0] ) > nlenmax ) |
|---|
| 201 | nlenmax = strlen( aseq[0] ); |
|---|
| 202 | |
|---|
| 203 | /* |
|---|
| 204 | s = ( int )( rnd() * locnjob ); |
|---|
| 205 | s++; |
|---|
| 206 | if( s == locnjob ) s = 0; |
|---|
| 207 | ou = 0; |
|---|
| 208 | */ |
|---|
| 209 | if( ou == 0 ) |
|---|
| 210 | { |
|---|
| 211 | ou = 1; |
|---|
| 212 | s = sss[0]; |
|---|
| 213 | /* |
|---|
| 214 | sss[0]++; |
|---|
| 215 | if( sss[0] == locnjob ) |
|---|
| 216 | { |
|---|
| 217 | sss[0] = 0; |
|---|
| 218 | cyc[0]++; |
|---|
| 219 | } |
|---|
| 220 | */ |
|---|
| 221 | sss[0]--; |
|---|
| 222 | if( sss[0] == -1 ) |
|---|
| 223 | { |
|---|
| 224 | sss[0] = locnjob-1; |
|---|
| 225 | cyc[0]++; |
|---|
| 226 | } |
|---|
| 227 | } |
|---|
| 228 | else |
|---|
| 229 | { |
|---|
| 230 | ou = 0; |
|---|
| 231 | s = sss[1]; |
|---|
| 232 | sss[1]++; |
|---|
| 233 | if( sss[1] == locnjob ) |
|---|
| 234 | { |
|---|
| 235 | sss[1] = 0; |
|---|
| 236 | cyc[1]++; |
|---|
| 237 | } |
|---|
| 238 | } |
|---|
| 239 | fprintf( trap_g, "%d ", weight ); |
|---|
| 240 | |
|---|
| 241 | /* |
|---|
| 242 | for( i=0, count=0; i<strlen( aseq[s] ); i++ ) |
|---|
| 243 | { |
|---|
| 244 | if( aseq[s][i] != '-' ) |
|---|
| 245 | { |
|---|
| 246 | mseq1[count] = aseq[s][i]; |
|---|
| 247 | count++; |
|---|
| 248 | } |
|---|
| 249 | } |
|---|
| 250 | mseq1[count] = 0; |
|---|
| 251 | */ |
|---|
| 252 | gappick0( mseq1, aseq[s] ); |
|---|
| 253 | |
|---|
| 254 | if( checkC ) |
|---|
| 255 | tmpscore = score_m_1_0( aseq, locnjob, s, eff, effarr ); |
|---|
| 256 | |
|---|
| 257 | gappick( locnjob, s, aseq, mseq2, eff, effarr ); |
|---|
| 258 | |
|---|
| 259 | result = align0( &wm, mseq2, mseq1, effarr, locnjob-2, s ); |
|---|
| 260 | resultlen = strlen( result[0] ); |
|---|
| 261 | if( resultlen > alloclen ) |
|---|
| 262 | { |
|---|
| 263 | if( resultlen > nlenmax0*3 || resultlen > N ) |
|---|
| 264 | { |
|---|
| 265 | fprintf(stderr, "Error in main1\n"); |
|---|
| 266 | exit( 1 ); |
|---|
| 267 | } |
|---|
| 268 | FreeTmpSeqs( mseq2, mseq1 ); |
|---|
| 269 | alloclen = strlen( result[0] ) * 2.0; |
|---|
| 270 | fprintf( stderr, "\n\ntrying to allocate TmpSeqs\n\n" ); |
|---|
| 271 | AllocateTmpSeqs( &mseq2, &mseq1, alloclen ); |
|---|
| 272 | } |
|---|
| 273 | for( i=0; i<locnjob; i++ ) strcpy( mseq2[i], result[i] ); |
|---|
| 274 | |
|---|
| 275 | if( checkC ) |
|---|
| 276 | fprintf( stderr, "wm in iteration == %f\n", wm ); |
|---|
| 277 | |
|---|
| 278 | strcpy( mseq1, mseq2[locnjob-1] ); |
|---|
| 279 | /* |
|---|
| 280 | Write( stdout, locnjob, name, nlen, mseq2 ); |
|---|
| 281 | */ |
|---|
| 282 | for( i=locnjob-2; i>=s; i-- ) strcpy( mseq2[i+1], mseq2[i] ); |
|---|
| 283 | strcpy( mseq2[s], mseq1 ); |
|---|
| 284 | if( checkC ) |
|---|
| 285 | { |
|---|
| 286 | tmpscore1= score_m_1_0( mseq2, locnjob, s, eff, effarr ); |
|---|
| 287 | fprintf( stderr, "pick up %d, before ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore ); |
|---|
| 288 | fprintf( stderr, "pick up %d, after ALIGNM1 score_m_1_0 = %f\n", s+1, tmpscore1 ); |
|---|
| 289 | if( tmpscore1 < tmpscore ) |
|---|
| 290 | { |
|---|
| 291 | fprintf( stderr, "\7" ); |
|---|
| 292 | fprintf( trap_g, ">>>>>>>n\n" ); |
|---|
| 293 | } |
|---|
| 294 | if( fabs( wm - tmpscore1 ) / wm > 0.001 ) |
|---|
| 295 | { |
|---|
| 296 | fprintf( stderr, "\7sorry\n" ); |
|---|
| 297 | exit( 1 ); |
|---|
| 298 | } |
|---|
| 299 | } |
|---|
| 300 | |
|---|
| 301 | identity = !strcmp( mseq2[s], aseq[s] ); |
|---|
| 302 | if( s == locnjob - 1 ) ss = 0; else ss=s+1; |
|---|
| 303 | |
|---|
| 304 | identity *= !strcmp( mseq2[ss], aseq[ss] ); |
|---|
| 305 | |
|---|
| 306 | if( !identity ) |
|---|
| 307 | { |
|---|
| 308 | tmpscore = score_calc0( mseq2, locnjob, eff, s ); |
|---|
| 309 | } |
|---|
| 310 | else tmpscore = tscore; |
|---|
| 311 | |
|---|
| 312 | if( disp ) |
|---|
| 313 | { |
|---|
| 314 | fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 ); |
|---|
| 315 | display( mseq2, locnjob ); |
|---|
| 316 | } |
|---|
| 317 | fprintf( stderr, "% 3d % 3d / the rest \n", l+1, s+1 ); |
|---|
| 318 | fprintf( stderr, "score = %f mscore = %f ", tmpscore, mscore ); |
|---|
| 319 | |
|---|
| 320 | fprintf( trap_g, "%#4d %#4d / the rest ", l+1, s+1 ); |
|---|
| 321 | fprintf( trap_g, "score = %f mscore = %f ", tmpscore, mscore ); |
|---|
| 322 | |
|---|
| 323 | if( identity ) |
|---|
| 324 | { |
|---|
| 325 | fprintf( stderr, "( identical )\n" ); |
|---|
| 326 | fprintf( trap_g, "( identical )\n" ); |
|---|
| 327 | sai[s] = 2; |
|---|
| 328 | } |
|---|
| 329 | |
|---|
| 330 | else if( tmpscore > mscore - cut ) |
|---|
| 331 | { |
|---|
| 332 | fprintf( stderr, "accepted\n" ); |
|---|
| 333 | fprintf( trap_g, "accepted\n" ); |
|---|
| 334 | for( i=0; i<locnjob; i++ ) strcpy( aseq[i], mseq2[i] ); |
|---|
| 335 | strcpy( sai, sai1 ); /* kokoka ? */ |
|---|
| 336 | if( !tbitr && !tbweight ) |
|---|
| 337 | { |
|---|
| 338 | writePre( locnjob, name, nlen, aseq, 0 ); |
|---|
| 339 | } |
|---|
| 340 | strcpy( sai, sai1 ); |
|---|
| 341 | tscore = tmpscore; |
|---|
| 342 | /* |
|---|
| 343 | tscore = tmpscore = score_calc0( aseq, locnjob, eff, s ); * ? * |
|---|
| 344 | */ |
|---|
| 345 | if( tmpscore > mscore ) |
|---|
| 346 | { |
|---|
| 347 | for( i=0; i<locnjob; i++ ) strcpy( bseq[i], mseq2[i] ); |
|---|
| 348 | treeconstruction( bseq, locnjob, topol, len, eff ); |
|---|
| 349 | tscore = mscore = score_calc0( bseq, locnjob, eff, s ); |
|---|
| 350 | fprintf( trap_g, " -> %f\n", mscore ); |
|---|
| 351 | strcpy( sai, sai1 ); /* kokoka ? */ |
|---|
| 352 | #if 0 |
|---|
| 353 | if( !tbitr && !tbweight ) |
|---|
| 354 | { prep = fopen( "best", "w" ); |
|---|
| 355 | Write( prep, locnjob, name, nlen, bseq ); |
|---|
| 356 | fclose( prep ); |
|---|
| 357 | } |
|---|
| 358 | #endif |
|---|
| 359 | } |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | else |
|---|
| 363 | { |
|---|
| 364 | if( tmpscore == tscore ) |
|---|
| 365 | { |
|---|
| 366 | fprintf( stderr, "occational coincidence \n" ); |
|---|
| 367 | fprintf( trap_g, "occational coincidence\n" ); |
|---|
| 368 | } |
|---|
| 369 | else |
|---|
| 370 | { |
|---|
| 371 | fprintf( stderr, "rejected\n" ); |
|---|
| 372 | fprintf( trap_g, "rejected\n" ); |
|---|
| 373 | } |
|---|
| 374 | for( i=0; i<locnjob; i++ ) strcpy( aseq[i], bseq[i] ); |
|---|
| 375 | tscore = mscore; |
|---|
| 376 | sai[s] = 2; |
|---|
| 377 | } |
|---|
| 378 | |
|---|
| 379 | /* |
|---|
| 380 | prep = fopen( "cur", "w" ); |
|---|
| 381 | Write( prep, locnjob, name, nlen, mseq2 ); |
|---|
| 382 | fclose( prep ); |
|---|
| 383 | */ |
|---|
| 384 | |
|---|
| 385 | his[ou][s][cyc[ou]] = tmpscore; |
|---|
| 386 | if( !strcmp( sai, sai2 ) ) |
|---|
| 387 | { |
|---|
| 388 | returnvalue = 0; |
|---|
| 389 | fprintf( trap_g, "converged\n" ); |
|---|
| 390 | break; |
|---|
| 391 | } |
|---|
| 392 | for( i=cyc[ou]-1; i>0; i-- ) |
|---|
| 393 | { |
|---|
| 394 | if( tmpscore == his[ou][s][i] ) |
|---|
| 395 | { |
|---|
| 396 | shindou = 1; |
|---|
| 397 | break; |
|---|
| 398 | } |
|---|
| 399 | } |
|---|
| 400 | fprintf( stderr, "\n" ); |
|---|
| 401 | if( shindou == 1 ) |
|---|
| 402 | { |
|---|
| 403 | returnvalue = -1; |
|---|
| 404 | fprintf( trap_g, "oscillating\n" ); |
|---|
| 405 | break; |
|---|
| 406 | } |
|---|
| 407 | } |
|---|
| 408 | if( l == MAXITERATION ) returnvalue = -2; |
|---|
| 409 | FreeDoubleCub( his ); |
|---|
| 410 | return( returnvalue ); |
|---|
| 411 | } |
|---|
| 412 | |
|---|