1 | #include "mltaln.h" |
---|
2 | |
---|
3 | #define DEBUG 0 |
---|
4 | |
---|
5 | #if 0 |
---|
6 | int seqlen( char *seq ) |
---|
7 | { |
---|
8 | int val = 0; |
---|
9 | while( *seq ) |
---|
10 | if( *seq++ != '-' ) val++; |
---|
11 | return( val ); |
---|
12 | } |
---|
13 | #else |
---|
14 | int seqlen( char *seq ) |
---|
15 | { |
---|
16 | int val = 0; |
---|
17 | if( *newgapstr == '-' ) |
---|
18 | { |
---|
19 | while( *seq ) |
---|
20 | if( *seq++ != '-' ) val++; |
---|
21 | } |
---|
22 | else |
---|
23 | { |
---|
24 | while( *seq ) |
---|
25 | { |
---|
26 | if( *seq != '-' && *seq != *newgapstr ) val++; |
---|
27 | seq++; |
---|
28 | } |
---|
29 | } |
---|
30 | return( val ); |
---|
31 | } |
---|
32 | #endif |
---|
33 | |
---|
34 | int intlen( int *num ) |
---|
35 | { |
---|
36 | int value = 0; |
---|
37 | while( *num++ != -1 ) value++; |
---|
38 | return( value ); |
---|
39 | } |
---|
40 | |
---|
41 | char seqcheck( char **seq ) |
---|
42 | { |
---|
43 | int i, len; |
---|
44 | char **seqbk = seq; |
---|
45 | while( *seq ) |
---|
46 | { |
---|
47 | len = strlen( *seq ); |
---|
48 | for( i=0; i<len; i++ ) |
---|
49 | { |
---|
50 | if( amino_n[(int)(*seq)[i]] == -1 ) |
---|
51 | { |
---|
52 | |
---|
53 | fprintf( stderr, "========================================================================= \n" ); |
---|
54 | fprintf( stderr, "========================================================================= \n" ); |
---|
55 | fprintf( stderr, "=== \n" ); |
---|
56 | fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] ); |
---|
57 | fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) ); |
---|
58 | fprintf( stderr, "=== \n" ); |
---|
59 | fprintf( stderr, "=== To make an alignment having unusual characters (U, @, #, etc), try\n" ); |
---|
60 | fprintf( stderr, "=== %% mafft --anysymbol input > output\n" ); |
---|
61 | fprintf( stderr, "=== \n" ); |
---|
62 | fprintf( stderr, "========================================================================= \n" ); |
---|
63 | fprintf( stderr, "========================================================================= \n" ); |
---|
64 | return( (int)(*seq)[i] ); |
---|
65 | } |
---|
66 | } |
---|
67 | seq++; |
---|
68 | } |
---|
69 | return( 0 ); |
---|
70 | } |
---|
71 | |
---|
72 | void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx ) |
---|
73 | { |
---|
74 | int i, j, lgth; |
---|
75 | |
---|
76 | lgth = strlen( aseq[0] ); |
---|
77 | for( j=0; j<lgth; j++ ) |
---|
78 | { |
---|
79 | for( i=0; i<26; i++ ) |
---|
80 | { |
---|
81 | scmx[i][j] = 0; |
---|
82 | } |
---|
83 | } |
---|
84 | for( i=0; i<icyc+1; i++ ) |
---|
85 | { |
---|
86 | int id; |
---|
87 | id = amino_n[(int)aseq[i][0]]; |
---|
88 | scmx[id][0] += (float)effarr[i]; |
---|
89 | } |
---|
90 | for( j=1; j<lgth-1; j++ ) |
---|
91 | { |
---|
92 | for( i=0; i<icyc+1; i++ ) |
---|
93 | { |
---|
94 | int id; |
---|
95 | id = amino_n[(int)aseq[i][j]]; |
---|
96 | scmx[id][j] += (float)effarr[i]; |
---|
97 | } |
---|
98 | } |
---|
99 | for( i=0; i<icyc+1; i++ ) |
---|
100 | { |
---|
101 | int id; |
---|
102 | id = amino_n[(int)aseq[i][lgth-1]]; |
---|
103 | scmx[id][lgth-1] += (float)effarr[i]; |
---|
104 | } |
---|
105 | } |
---|
106 | |
---|
107 | void exitall( char arr[] ) |
---|
108 | { |
---|
109 | fprintf( stderr, "%s\n", arr ); |
---|
110 | exit( 1 ); |
---|
111 | } |
---|
112 | |
---|
113 | void display( char **seq, int nseq ) |
---|
114 | { |
---|
115 | int i, imax; |
---|
116 | char b[121]; |
---|
117 | |
---|
118 | if( !disp ) return; |
---|
119 | if( nseq > DISPSEQF ) imax = DISPSEQF; |
---|
120 | else imax = nseq; |
---|
121 | fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" ); |
---|
122 | for( i=0; i<+imax; i++ ) |
---|
123 | { |
---|
124 | strncpy( b, seq[i]+DISPSITEI, 120 ); |
---|
125 | b[120] = 0; |
---|
126 | fprintf( stderr, "%3d %s\n", i+1, b ); |
---|
127 | } |
---|
128 | } |
---|
129 | #if 0 |
---|
130 | double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len ) |
---|
131 | { |
---|
132 | int i, j, k; |
---|
133 | double score; |
---|
134 | double tmpscore; |
---|
135 | char *mseq1, *mseq2; |
---|
136 | double efficient; |
---|
137 | char xxx[100]; |
---|
138 | |
---|
139 | // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i]; |
---|
140 | // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i]; |
---|
141 | |
---|
142 | score = 0.0; |
---|
143 | for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ ) |
---|
144 | { |
---|
145 | efficient = eff1[i] * eff2[j]; |
---|
146 | mseq1 = seq1[i]; |
---|
147 | mseq2 = seq2[j]; |
---|
148 | tmpscore = 0.0; |
---|
149 | for( k=0; k<len; k++ ) |
---|
150 | { |
---|
151 | if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; |
---|
152 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
153 | |
---|
154 | if( mseq1[k] == '-' ) |
---|
155 | { |
---|
156 | tmpscore += penalty; |
---|
157 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
158 | while( mseq1[++k] == '-' ) |
---|
159 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
160 | k--; |
---|
161 | if( k >len-2 ) break; |
---|
162 | continue; |
---|
163 | } |
---|
164 | if( mseq2[k] == '-' ) |
---|
165 | { |
---|
166 | tmpscore += penalty; |
---|
167 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
168 | while( mseq2[++k] == '-' ) |
---|
169 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
170 | k--; |
---|
171 | if( k > len-2 ) break; |
---|
172 | continue; |
---|
173 | } |
---|
174 | } |
---|
175 | score += (double)tmpscore * efficient; |
---|
176 | #if 1 |
---|
177 | sprintf( xxx, "%f", score ); |
---|
178 | // fprintf( stderr, "## score in intergroup_score = %f\n", score ); |
---|
179 | #endif |
---|
180 | } |
---|
181 | #if 0 |
---|
182 | fprintf( stderr, "###score = %f\n", score ); |
---|
183 | #endif |
---|
184 | #if 0 |
---|
185 | fprintf( stderr, "## score in intergroup_score = %f\n", score ); |
---|
186 | #endif |
---|
187 | return( score ); |
---|
188 | } |
---|
189 | #endif |
---|
190 | |
---|
191 | void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) |
---|
192 | { |
---|
193 | int i, j, k; |
---|
194 | int len2 = len - 2; |
---|
195 | int ms1, ms2; |
---|
196 | double tmpscore; |
---|
197 | char *mseq1, *mseq2; |
---|
198 | double efficient; |
---|
199 | |
---|
200 | // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i]; |
---|
201 | // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i]; |
---|
202 | |
---|
203 | |
---|
204 | |
---|
205 | *value = 0.0; |
---|
206 | for( i=0; i<clus1; i++ ) |
---|
207 | { |
---|
208 | for( j=0; j<clus2; j++ ) |
---|
209 | { |
---|
210 | efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */ |
---|
211 | mseq1 = seq1[i]; |
---|
212 | mseq2 = seq2[j]; |
---|
213 | tmpscore = 0.0; |
---|
214 | for( k=0; k<len; k++ ) |
---|
215 | { |
---|
216 | ms1 = (int)mseq1[k]; |
---|
217 | ms2 = (int)mseq2[k]; |
---|
218 | if( ms1 == (int)'-' && ms2 == (int)'-' ) continue; |
---|
219 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
220 | |
---|
221 | if( ms1 == (int)'-' ) |
---|
222 | { |
---|
223 | tmpscore += (double)penalty; |
---|
224 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
225 | while( (ms1=(int)mseq1[++k]) == (int)'-' ) |
---|
226 | ; |
---|
227 | // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
228 | k--; |
---|
229 | if( k >len2 ) break; |
---|
230 | continue; |
---|
231 | } |
---|
232 | if( ms2 == (int)'-' ) |
---|
233 | { |
---|
234 | tmpscore += (double)penalty; |
---|
235 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
236 | while( (ms2=(int)mseq2[++k]) == (int)'-' ) |
---|
237 | ; |
---|
238 | // tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
239 | k--; |
---|
240 | if( k > len2 ) break; |
---|
241 | continue; |
---|
242 | } |
---|
243 | } |
---|
244 | *value += (double)tmpscore * (double)efficient; |
---|
245 | // fprintf( stderr, "val in _gapnomi = %f\n", *value ); |
---|
246 | } |
---|
247 | } |
---|
248 | #if 0 |
---|
249 | fprintf( stdout, "###score = %f\n", score ); |
---|
250 | #endif |
---|
251 | #if DEBUG |
---|
252 | fprintf( stderr, "score in intergroup_score = %f\n", score ); |
---|
253 | #endif |
---|
254 | // return( score ); |
---|
255 | } |
---|
256 | void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) |
---|
257 | { |
---|
258 | int i, j, k; |
---|
259 | int len2 = len - 2; |
---|
260 | int ms1, ms2; |
---|
261 | double tmpscore; |
---|
262 | char *mseq1, *mseq2; |
---|
263 | double efficient; |
---|
264 | |
---|
265 | // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i]; |
---|
266 | // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i]; |
---|
267 | |
---|
268 | |
---|
269 | |
---|
270 | *value = 0.0; |
---|
271 | for( i=0; i<clus1; i++ ) |
---|
272 | { |
---|
273 | for( j=0; j<clus2; j++ ) |
---|
274 | { |
---|
275 | efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */ |
---|
276 | mseq1 = seq1[i]; |
---|
277 | mseq2 = seq2[j]; |
---|
278 | tmpscore = 0.0; |
---|
279 | for( k=0; k<len; k++ ) |
---|
280 | { |
---|
281 | ms1 = (int)mseq1[k]; |
---|
282 | ms2 = (int)mseq2[k]; |
---|
283 | if( ms1 == (int)'-' && ms2 == (int)'-' ) continue; |
---|
284 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
285 | |
---|
286 | if( ms1 == (int)'-' ) |
---|
287 | { |
---|
288 | tmpscore += (double)penalty; |
---|
289 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
290 | while( (ms1=(int)mseq1[++k]) == (int)'-' ) |
---|
291 | ; |
---|
292 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
293 | k--; |
---|
294 | if( k >len2 ) break; |
---|
295 | continue; |
---|
296 | } |
---|
297 | if( ms2 == (int)'-' ) |
---|
298 | { |
---|
299 | tmpscore += (double)penalty; |
---|
300 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
301 | while( (ms2=(int)mseq2[++k]) == (int)'-' ) |
---|
302 | ; |
---|
303 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
304 | k--; |
---|
305 | if( k > len2 ) break; |
---|
306 | continue; |
---|
307 | } |
---|
308 | } |
---|
309 | *value += (double)tmpscore * (double)efficient; |
---|
310 | // fprintf( stderr, "val in _gapnomi = %f\n", *value ); |
---|
311 | } |
---|
312 | } |
---|
313 | #if 0 |
---|
314 | fprintf( stdout, "###score = %f\n", score ); |
---|
315 | #endif |
---|
316 | #if DEBUG |
---|
317 | fprintf( stderr, "score in intergroup_score = %f\n", score ); |
---|
318 | #endif |
---|
319 | // return( score ); |
---|
320 | } |
---|
321 | |
---|
322 | void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) |
---|
323 | { |
---|
324 | int i, j, k; |
---|
325 | int len2 = len - 2; |
---|
326 | int ms1, ms2; |
---|
327 | double tmpscore; |
---|
328 | char *mseq1, *mseq2; |
---|
329 | double efficient; |
---|
330 | |
---|
331 | double gaptmpscore; |
---|
332 | double gapscore = 0.0; |
---|
333 | |
---|
334 | // fprintf( stderr, "#### in intergroup_score\n" ); |
---|
335 | |
---|
336 | // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i]; |
---|
337 | // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i]; |
---|
338 | |
---|
339 | *value = 0.0; |
---|
340 | for( i=0; i<clus1; i++ ) |
---|
341 | { |
---|
342 | for( j=0; j<clus2; j++ ) |
---|
343 | { |
---|
344 | efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */ |
---|
345 | mseq1 = seq1[i]; |
---|
346 | mseq2 = seq2[j]; |
---|
347 | tmpscore = 0.0; |
---|
348 | gaptmpscore = 0.0; |
---|
349 | for( k=0; k<len; k++ ) |
---|
350 | { |
---|
351 | ms1 = (int)mseq1[k]; |
---|
352 | ms2 = (int)mseq2[k]; |
---|
353 | if( ms1 == (int)'-' && ms2 == (int)'-' ) continue; |
---|
354 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
355 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
356 | |
---|
357 | if( ms1 == (int)'-' ) |
---|
358 | { |
---|
359 | tmpscore += (double)penalty; |
---|
360 | gaptmpscore += (double)penalty; |
---|
361 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
362 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
363 | while( (ms1=(int)mseq1[++k]) == (int)'-' ) |
---|
364 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
365 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
366 | k--; |
---|
367 | if( k >len2 ) break; |
---|
368 | continue; |
---|
369 | } |
---|
370 | if( ms2 == (int)'-' ) |
---|
371 | { |
---|
372 | tmpscore += (double)penalty; |
---|
373 | gaptmpscore += (double)penalty; |
---|
374 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
375 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
376 | while( (ms2=(int)mseq2[++k]) == (int)'-' ) |
---|
377 | // tmpscore += (double)amino_dis[ms1][ms2]; |
---|
378 | tmpscore += (double)amino_dis_consweight_multi[ms1][ms2]; |
---|
379 | k--; |
---|
380 | if( k > len2 ) break; |
---|
381 | continue; |
---|
382 | } |
---|
383 | } |
---|
384 | *value += (double)tmpscore * (double)efficient; |
---|
385 | gapscore += (double)gaptmpscore * (double)efficient; |
---|
386 | } |
---|
387 | } |
---|
388 | #if 0 |
---|
389 | fprintf( stderr, "###gapscore = %f\n", gapscore ); |
---|
390 | #endif |
---|
391 | #if DEBUG |
---|
392 | fprintf( stderr, "score in intergroup_score = %f\n", score ); |
---|
393 | #endif |
---|
394 | // return( score ); |
---|
395 | } |
---|
396 | void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value ) |
---|
397 | { |
---|
398 | int i, j, k; |
---|
399 | int len2 = len - 2; |
---|
400 | int ms1, ms2; |
---|
401 | double tmpscore; |
---|
402 | char *mseq1, *mseq2; |
---|
403 | static double efficient[1]; |
---|
404 | |
---|
405 | // totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i]; |
---|
406 | // totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i]; |
---|
407 | |
---|
408 | *value = 0.0; |
---|
409 | for( i=0; i<clus1; i++ ) |
---|
410 | { |
---|
411 | for( j=0; j<clus2; j++ ) |
---|
412 | { |
---|
413 | *efficient = eff1[i] * eff2[j]; /* $B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k(B, $BB?J,%P%0(B */ |
---|
414 | mseq1 = seq1[i]; |
---|
415 | mseq2 = seq2[j]; |
---|
416 | tmpscore = 0.0; |
---|
417 | for( k=0; k<len; k++ ) |
---|
418 | { |
---|
419 | ms1 = (int)mseq1[k]; |
---|
420 | ms2 = (int)mseq2[k]; |
---|
421 | if( ms1 == (int)'-' && ms2 == (int)'-' ) continue; |
---|
422 | tmpscore += (double)amino_dis[ms1][ms2]; |
---|
423 | |
---|
424 | if( ms1 == (int)'-' ) |
---|
425 | { |
---|
426 | tmpscore += (double)penalty; |
---|
427 | tmpscore += (double)amino_dis[ms1][ms2]; |
---|
428 | while( (ms1=(int)mseq1[++k]) == (int)'-' ) |
---|
429 | tmpscore += (double)amino_dis[ms1][ms2]; |
---|
430 | k--; |
---|
431 | if( k >len2 ) break; |
---|
432 | continue; |
---|
433 | } |
---|
434 | if( ms2 == (int)'-' ) |
---|
435 | { |
---|
436 | tmpscore += (double)penalty; |
---|
437 | tmpscore += (double)amino_dis[ms1][ms2]; |
---|
438 | while( (ms2=(int)mseq2[++k]) == (int)'-' ) |
---|
439 | tmpscore += (double)amino_dis[ms1][ms2]; |
---|
440 | k--; |
---|
441 | if( k > len2 ) break; |
---|
442 | continue; |
---|
443 | } |
---|
444 | } |
---|
445 | *value += (double)tmpscore * (double)*efficient; |
---|
446 | } |
---|
447 | } |
---|
448 | #if 0 |
---|
449 | fprintf( stdout, "###score = %f\n", score ); |
---|
450 | #endif |
---|
451 | #if DEBUG |
---|
452 | fprintf( stderr, "score in intergroup_score = %f\n", score ); |
---|
453 | #endif |
---|
454 | // return( score ); |
---|
455 | } |
---|
456 | |
---|
457 | |
---|
458 | double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ |
---|
459 | { |
---|
460 | int i, j, k; |
---|
461 | double c; |
---|
462 | int len = strlen( seq[0] ); |
---|
463 | double score; |
---|
464 | double tmpscore; |
---|
465 | char *mseq1, *mseq2; |
---|
466 | double efficient; |
---|
467 | #if DEBUG |
---|
468 | FILE *fp; |
---|
469 | #endif |
---|
470 | |
---|
471 | score = 0.0; |
---|
472 | c = 0.0; |
---|
473 | |
---|
474 | for( i=0; i<s; i++ ) |
---|
475 | { |
---|
476 | |
---|
477 | if( i == ex ) continue; |
---|
478 | efficient = eff[i][ex]; |
---|
479 | mseq1 = seq[i]; |
---|
480 | mseq2 = seq[ex]; |
---|
481 | tmpscore = 0.0; |
---|
482 | for( k=0; k<len; k++ ) |
---|
483 | { |
---|
484 | if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; |
---|
485 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
486 | |
---|
487 | if( mseq1[k] == '-' ) |
---|
488 | { |
---|
489 | tmpscore += penalty; |
---|
490 | while( mseq1[++k] == '-' ) |
---|
491 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
492 | k--; |
---|
493 | if( k > len-2 ) break; |
---|
494 | continue; |
---|
495 | } |
---|
496 | if( mseq2[k] == '-' ) |
---|
497 | { |
---|
498 | tmpscore += penalty; |
---|
499 | while( mseq2[++k] == '-' ) |
---|
500 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
501 | k--; |
---|
502 | if( k > len-2 ) break; |
---|
503 | continue; |
---|
504 | } |
---|
505 | } |
---|
506 | score += (double)tmpscore * efficient; |
---|
507 | /* |
---|
508 | fprintf( stdout, "%d-%d tmpscore = %f, eff = %f, tmpscore*eff = %f\n", i, ex, tmpscore, efficient, tmpscore*efficient ); |
---|
509 | */ |
---|
510 | } |
---|
511 | /* |
---|
512 | fprintf( stdout, "total score = %f\n", score ); |
---|
513 | */ |
---|
514 | |
---|
515 | for( i=0; i<s-1; i++ ) |
---|
516 | { |
---|
517 | for( j=i+1; j<s; j++ ) |
---|
518 | { |
---|
519 | if( i == ex || j == ex ) continue; |
---|
520 | |
---|
521 | efficient = eff[i][j]; |
---|
522 | mseq1 = seq[i]; |
---|
523 | mseq2 = seq[j]; |
---|
524 | tmpscore = 0.0; |
---|
525 | for( k=0; k<len; k++ ) |
---|
526 | { |
---|
527 | if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; |
---|
528 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
529 | |
---|
530 | if( mseq1[k] == '-' ) |
---|
531 | { |
---|
532 | tmpscore += penalty; |
---|
533 | while( mseq1[++k] == '-' ) |
---|
534 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
535 | k--; |
---|
536 | if( k > len-2 ) break; |
---|
537 | continue; |
---|
538 | } |
---|
539 | if( mseq2[k] == '-' ) |
---|
540 | { |
---|
541 | tmpscore += penalty; |
---|
542 | while( mseq2[++k] == '-' ) |
---|
543 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
544 | k--; |
---|
545 | if( k > len-2 ) break; |
---|
546 | continue; |
---|
547 | } |
---|
548 | } |
---|
549 | score += (double)tmpscore * efficient; |
---|
550 | } |
---|
551 | } |
---|
552 | /* |
---|
553 | fprintf( stderr, "score in score_calc5 = %f\n", score ); |
---|
554 | */ |
---|
555 | return( (double)score ); |
---|
556 | /* |
---|
557 | |
---|
558 | fprintf( trap_g, "score by fast = %f\n", (float)score ); |
---|
559 | |
---|
560 | tmpscore = score = 0.0; |
---|
561 | for( i=0; i<s; i++ ) |
---|
562 | { |
---|
563 | if( i == ex ) continue; |
---|
564 | tmpscore = Cscore_m_1( seq, i, eff ); |
---|
565 | fprintf( stdout, "%d %f\n", i, tmpscore ); |
---|
566 | |
---|
567 | score += tmpscore; |
---|
568 | } |
---|
569 | tmpscore = Cscore_m_1( seq, ex, eff ); |
---|
570 | fprintf( stdout, "ex%d %f\n", i, tmpscore ); |
---|
571 | score += tmpscore; |
---|
572 | |
---|
573 | return( score ); |
---|
574 | */ |
---|
575 | } |
---|
576 | |
---|
577 | |
---|
578 | |
---|
579 | double score_calc4( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */ |
---|
580 | { |
---|
581 | int i, j, k; |
---|
582 | double c; |
---|
583 | int len = strlen( seq[0] ); |
---|
584 | double score; |
---|
585 | long tmpscore; |
---|
586 | char *mseq1, *mseq2; |
---|
587 | double efficient; |
---|
588 | |
---|
589 | score = 0.0; |
---|
590 | c = 0.0; |
---|
591 | /* |
---|
592 | printf( "in score_calc4\n" ); |
---|
593 | for( i=0; i<s; i++ ) |
---|
594 | { |
---|
595 | for( j=0; j<s; j++ ) |
---|
596 | { |
---|
597 | printf( "% 5.3f", eff[i][j] ); |
---|
598 | } |
---|
599 | printf( "\n" ); |
---|
600 | |
---|
601 | } |
---|
602 | */ |
---|
603 | for( i=0; i<s-1; i++ ) |
---|
604 | { |
---|
605 | for( j=i+1; j<s; j++ ) |
---|
606 | { |
---|
607 | efficient = eff[i][j]; |
---|
608 | if( mix == 1 ) efficient = 1.0; |
---|
609 | /* |
---|
610 | printf( "weight for %d v.s. %d = %f\n", i, j, efficient ); |
---|
611 | */ |
---|
612 | mseq1 = seq[i]; |
---|
613 | mseq2 = seq[j]; |
---|
614 | tmpscore = 0; |
---|
615 | for( k=0; k<len; k++ ) |
---|
616 | { |
---|
617 | if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; |
---|
618 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ; |
---|
619 | |
---|
620 | c += efficient; |
---|
621 | |
---|
622 | if( mseq1[k] == '-' ) |
---|
623 | { |
---|
624 | tmpscore += penalty - n_dis[24][0]; |
---|
625 | while( mseq1[++k] == '-' ) |
---|
626 | ; |
---|
627 | k--; |
---|
628 | if( k > len-2 ) break; |
---|
629 | continue; |
---|
630 | } |
---|
631 | if( mseq2[k] == '-' ) |
---|
632 | { |
---|
633 | tmpscore += penalty - n_dis[24][0]; |
---|
634 | while( mseq2[++k] == '-' ) |
---|
635 | ; |
---|
636 | k--; |
---|
637 | if( k > len-2 ) break; |
---|
638 | continue; |
---|
639 | } |
---|
640 | } |
---|
641 | /* |
---|
642 | if( x == 65 ) printf( "i=%d j=%d tmpscore=%d l=%d\n", i, j, tmpscore, len ); |
---|
643 | */ |
---|
644 | score += (double)tmpscore * efficient; |
---|
645 | } |
---|
646 | } |
---|
647 | score /= c; |
---|
648 | return( (double)score ); |
---|
649 | } |
---|
650 | |
---|
651 | |
---|
652 | |
---|
653 | void upg2( int nseq, double **eff, int ***topol, double **len ) |
---|
654 | { |
---|
655 | int i, j, k; |
---|
656 | double tmplen[M]; |
---|
657 | |
---|
658 | static char **pair = NULL; |
---|
659 | |
---|
660 | if( !pair ) |
---|
661 | { |
---|
662 | pair = AllocateCharMtx( njob, njob ); |
---|
663 | } |
---|
664 | |
---|
665 | for( i=0; i<nseq; i++ ) tmplen[i] = 0.0; |
---|
666 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0; |
---|
667 | for( i=0; i<nseq; i++ ) pair[i][i] = 1; |
---|
668 | |
---|
669 | for( k=0; k<nseq-1; k++ ) |
---|
670 | { |
---|
671 | float minscore = 9999.0; |
---|
672 | int im = -1, jm = -1; |
---|
673 | int count; |
---|
674 | |
---|
675 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
676 | { |
---|
677 | if( eff[i][j] < minscore ) |
---|
678 | { |
---|
679 | minscore = eff[i][j]; |
---|
680 | im = i; jm = j; |
---|
681 | } |
---|
682 | } |
---|
683 | for( i=0, count=0; i<nseq; i++ ) |
---|
684 | if( pair[im][i] > 0 ) |
---|
685 | { |
---|
686 | topol[k][0][count] = i; |
---|
687 | count++; |
---|
688 | } |
---|
689 | topol[k][0][count] = -1; |
---|
690 | for( i=0, count=0; i<nseq; i++ ) |
---|
691 | if( pair[jm][i] > 0 ) |
---|
692 | { |
---|
693 | topol[k][1][count] = i; |
---|
694 | count++; |
---|
695 | } |
---|
696 | topol[k][1][count] = -1; |
---|
697 | |
---|
698 | len[k][0] = minscore / 2.0 - tmplen[im]; |
---|
699 | len[k][1] = minscore / 2.0 - tmplen[jm]; |
---|
700 | |
---|
701 | tmplen[im] = minscore / 2.0; |
---|
702 | |
---|
703 | for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 ); |
---|
704 | for( i=0; i<nseq; i++ ) pair[jm][i] = 0; |
---|
705 | |
---|
706 | for( i=0; i<nseq; i++ ) |
---|
707 | { |
---|
708 | if( i != im && i != jm ) |
---|
709 | { |
---|
710 | eff[MIN(i,im)][MAX(i,im)] = |
---|
711 | ( eff[MIN(i,im)][MAX(i,im)] + eff[MIN(i,jm)][MAX(i,jm)] ) / 2.0; |
---|
712 | eff[MIN(i,jm)][MAX(i,jm)] = 9999.0; |
---|
713 | } |
---|
714 | eff[im][jm] = 9999.0; |
---|
715 | } |
---|
716 | #if DEBUG |
---|
717 | printf( "STEP-%03d:\n", k+1 ); |
---|
718 | printf( "len0 = %f\n", len[k][0] ); |
---|
719 | for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] ); |
---|
720 | printf( "\n" ); |
---|
721 | printf( "len1 = %f\n", len[k][1] ); |
---|
722 | for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); |
---|
723 | printf( "\n" ); |
---|
724 | #endif |
---|
725 | } |
---|
726 | } |
---|
727 | |
---|
728 | static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos ) |
---|
729 | { |
---|
730 | int j; |
---|
731 | float tmpfloat; |
---|
732 | float mindisfrom; |
---|
733 | int nearest; |
---|
734 | // float **effptpt; |
---|
735 | Bchain *acptj; |
---|
736 | |
---|
737 | mindisfrom = 999.9; |
---|
738 | nearest = -1; |
---|
739 | |
---|
740 | // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; |
---|
741 | |
---|
742 | // for( j=pos+1; j<nseq; j++ ) |
---|
743 | for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) |
---|
744 | { |
---|
745 | j = acptj->pos; |
---|
746 | // if( (tmpfloat=*effpt++) < *mindisfrompt ) |
---|
747 | if( (tmpfloat=eff[pos][j-pos]) < mindisfrom ) |
---|
748 | { |
---|
749 | mindisfrom = tmpfloat; |
---|
750 | nearest = j; |
---|
751 | } |
---|
752 | } |
---|
753 | // effptpt = eff; |
---|
754 | // for( j=0; j<pos; j++ ) |
---|
755 | for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) |
---|
756 | { |
---|
757 | j = acptj->pos; |
---|
758 | // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) |
---|
759 | if( (tmpfloat=eff[j][pos-j]) < mindisfrom ) |
---|
760 | { |
---|
761 | mindisfrom = tmpfloat; |
---|
762 | nearest = j; |
---|
763 | } |
---|
764 | } |
---|
765 | |
---|
766 | *mindisfrompt = mindisfrom; |
---|
767 | *nearestpt = nearest; |
---|
768 | } |
---|
769 | |
---|
770 | static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos ) |
---|
771 | { |
---|
772 | int j; |
---|
773 | double tmpfloat; |
---|
774 | double **effptpt; |
---|
775 | Bchain *acptj; |
---|
776 | |
---|
777 | *mindisfrompt = 999.9; |
---|
778 | *nearestpt = -1; |
---|
779 | |
---|
780 | // if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos; |
---|
781 | |
---|
782 | // for( j=pos+1; j<nseq; j++ ) |
---|
783 | for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) |
---|
784 | { |
---|
785 | j = acptj->pos; |
---|
786 | // if( (tmpfloat=*effpt++) < *mindisfrompt ) |
---|
787 | if( (tmpfloat=eff[pos][j]) < *mindisfrompt ) |
---|
788 | { |
---|
789 | *mindisfrompt = tmpfloat; |
---|
790 | *nearestpt = j; |
---|
791 | } |
---|
792 | } |
---|
793 | effptpt = eff; |
---|
794 | // for( j=0; j<pos; j++ ) |
---|
795 | for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) |
---|
796 | { |
---|
797 | j = acptj->pos; |
---|
798 | // if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt ) |
---|
799 | if( (tmpfloat=eff[j][pos]) < *mindisfrompt ) |
---|
800 | { |
---|
801 | *mindisfrompt = tmpfloat; |
---|
802 | *nearestpt = j; |
---|
803 | } |
---|
804 | } |
---|
805 | } |
---|
806 | |
---|
807 | |
---|
808 | |
---|
809 | static void loadtreeoneline( int *ar, float *len, FILE *fp ) |
---|
810 | { |
---|
811 | static char gett[1000]; |
---|
812 | int res; |
---|
813 | char *p; |
---|
814 | |
---|
815 | p = fgets( gett, 999, fp ); |
---|
816 | if( p == NULL ) |
---|
817 | { |
---|
818 | fprintf( stderr, "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" ); |
---|
819 | fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); |
---|
820 | exit( 1 ); |
---|
821 | } |
---|
822 | |
---|
823 | |
---|
824 | res = sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 ); |
---|
825 | if( res != 4 ) |
---|
826 | { |
---|
827 | fprintf( stderr, "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" ); |
---|
828 | fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); |
---|
829 | exit( 1 ); |
---|
830 | } |
---|
831 | |
---|
832 | ar[0]--; |
---|
833 | ar[1]--; |
---|
834 | |
---|
835 | if( ar[0] >= ar[1] ) |
---|
836 | { |
---|
837 | fprintf( stderr, "\n\nIncorrect guide tree\n" ); |
---|
838 | fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); |
---|
839 | exit( 1 ); |
---|
840 | } |
---|
841 | |
---|
842 | |
---|
843 | // fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] ); |
---|
844 | // fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] ); |
---|
845 | } |
---|
846 | |
---|
847 | void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep ) |
---|
848 | { |
---|
849 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
850 | int *intpt, *intpt2; |
---|
851 | int *hist = NULL; |
---|
852 | Bchain *ac = NULL; |
---|
853 | int im = -1, jm = -1; |
---|
854 | Bchain *acjmnext, *acjmprev; |
---|
855 | int prevnode; |
---|
856 | Bchain *acpti; |
---|
857 | int *pt1, *pt2, *pt11, *pt22; |
---|
858 | int *nmemar; |
---|
859 | int nmemim, nmemjm; |
---|
860 | char **tree; |
---|
861 | char *treetmp; |
---|
862 | char *nametmp, *nameptr, *tmpptr; |
---|
863 | char namec; |
---|
864 | FILE *fp; |
---|
865 | int node[2]; |
---|
866 | |
---|
867 | fp = fopen( "_guidetree", "r" ); |
---|
868 | if( !fp ) |
---|
869 | { |
---|
870 | fprintf( stderr, "cannot open _guidetree\n" ); |
---|
871 | exit( 1 ); |
---|
872 | } |
---|
873 | |
---|
874 | if( !hist ) |
---|
875 | { |
---|
876 | hist = AllocateIntVec( njob ); |
---|
877 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
878 | nmemar = AllocateIntVec( njob ); |
---|
879 | // treetmp = AllocateCharVec( njob*50 ); |
---|
880 | treetmp = NULL; |
---|
881 | nametmp = AllocateCharVec( 1000 ); // nagasugi |
---|
882 | // tree = AllocateCharMtx( njob, njob*50 ); |
---|
883 | tree = AllocateCharMtx( njob, 0 ); |
---|
884 | } |
---|
885 | |
---|
886 | for( i=0; i<nseq; i++ ) |
---|
887 | { |
---|
888 | for( j=0; j<999; j++ ) nametmp[j] = 0; |
---|
889 | for( j=0; j<999; j++ ) |
---|
890 | { |
---|
891 | namec = name[i][j]; |
---|
892 | if( namec == 0 ) |
---|
893 | break; |
---|
894 | else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' ) |
---|
895 | nametmp[j] = namec; |
---|
896 | else |
---|
897 | nametmp[j] = '_'; |
---|
898 | } |
---|
899 | nametmp[j] = 0; |
---|
900 | // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); |
---|
901 | if( outnumber ) |
---|
902 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
903 | else |
---|
904 | nameptr = nametmp + 1; |
---|
905 | |
---|
906 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
907 | |
---|
908 | tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 |
---|
909 | if( tree[i] == NULL ) |
---|
910 | { |
---|
911 | fprintf( stderr, "Cannot allocate tree!\n" ); |
---|
912 | exit( 1 ); |
---|
913 | } |
---|
914 | sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); |
---|
915 | } |
---|
916 | |
---|
917 | |
---|
918 | for( i=0; i<nseq; i++ ) |
---|
919 | { |
---|
920 | ac[i].next = ac+i+1; |
---|
921 | ac[i].prev = ac+i-1; |
---|
922 | ac[i].pos = i; |
---|
923 | } |
---|
924 | ac[nseq-1].next = NULL; |
---|
925 | |
---|
926 | |
---|
927 | for( i=0; i<nseq; i++ ) |
---|
928 | { |
---|
929 | hist[i] = -1; |
---|
930 | nmemar[i] = 1; |
---|
931 | } |
---|
932 | |
---|
933 | fprintf( stderr, "\n" ); |
---|
934 | for( k=0; k<nseq-1; k++ ) |
---|
935 | { |
---|
936 | if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq ); |
---|
937 | #if 0 |
---|
938 | minscore = 999.9; |
---|
939 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
940 | { |
---|
941 | i = acpti->pos; |
---|
942 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
943 | if( mindisfrom[i] < minscore ) // muscle |
---|
944 | { |
---|
945 | im = i; |
---|
946 | minscore = mindisfrom[i]; |
---|
947 | } |
---|
948 | } |
---|
949 | jm = nearest[im]; |
---|
950 | if( jm < im ) |
---|
951 | { |
---|
952 | j=jm; jm=im; im=j; |
---|
953 | } |
---|
954 | #else |
---|
955 | len[k][0] = len[k][1] = -1.0; |
---|
956 | loadtreeoneline( node, len[k], fp ); |
---|
957 | im = node[0]; |
---|
958 | jm = node[1]; |
---|
959 | |
---|
960 | if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) |
---|
961 | { |
---|
962 | fprintf( stderr, "\n\nCheck the guide tree.\n" ); |
---|
963 | fprintf( stderr, "im=%d, jm=%d\n", im+1, jm+1 ); |
---|
964 | fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); |
---|
965 | exit( 1 ); |
---|
966 | } |
---|
967 | |
---|
968 | |
---|
969 | if( len[k][0] == -1.0 || len[k][1] == -1.0 ) |
---|
970 | { |
---|
971 | fprintf( stderr, "\n\nERROR: Branch length is not given.\n" ); |
---|
972 | exit( 1 ); |
---|
973 | } |
---|
974 | |
---|
975 | if( len[k][0] < 0.0 ) len[k][0] = 0.0; |
---|
976 | if( len[k][1] < 0.0 ) len[k][1] = 0.0; |
---|
977 | |
---|
978 | #endif |
---|
979 | |
---|
980 | prevnode = hist[im]; |
---|
981 | if( dep ) dep[k].child0 = prevnode; |
---|
982 | nmemim = nmemar[im]; |
---|
983 | |
---|
984 | // fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim ); |
---|
985 | |
---|
986 | intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); |
---|
987 | if( prevnode == -1 ) |
---|
988 | { |
---|
989 | *intpt++ = im; |
---|
990 | *intpt = -1; |
---|
991 | } |
---|
992 | else |
---|
993 | { |
---|
994 | pt1 = topol[prevnode][0]; |
---|
995 | pt2 = topol[prevnode][1]; |
---|
996 | if( *pt1 > *pt2 ) |
---|
997 | { |
---|
998 | pt11 = pt2; |
---|
999 | pt22 = pt1; |
---|
1000 | } |
---|
1001 | else |
---|
1002 | { |
---|
1003 | pt11 = pt1; |
---|
1004 | pt22 = pt2; |
---|
1005 | } |
---|
1006 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
1007 | *intpt++ = *intpt2++; |
---|
1008 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
1009 | *intpt++ = *intpt2++; |
---|
1010 | *intpt = -1; |
---|
1011 | } |
---|
1012 | |
---|
1013 | |
---|
1014 | nmemjm = nmemar[jm]; |
---|
1015 | prevnode = hist[jm]; |
---|
1016 | if( dep ) dep[k].child1 = prevnode; |
---|
1017 | |
---|
1018 | // fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm ); |
---|
1019 | |
---|
1020 | intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); |
---|
1021 | if( !intpt ) |
---|
1022 | { |
---|
1023 | fprintf( stderr, "Cannot reallocate topol\n" ); |
---|
1024 | exit( 1 ); |
---|
1025 | } |
---|
1026 | if( prevnode == -1 ) |
---|
1027 | { |
---|
1028 | *intpt++ = jm; |
---|
1029 | *intpt = -1; |
---|
1030 | } |
---|
1031 | else |
---|
1032 | { |
---|
1033 | pt1 = topol[prevnode][0]; |
---|
1034 | pt2 = topol[prevnode][1]; |
---|
1035 | if( *pt1 > *pt2 ) |
---|
1036 | { |
---|
1037 | pt11 = pt2; |
---|
1038 | pt22 = pt1; |
---|
1039 | } |
---|
1040 | else |
---|
1041 | { |
---|
1042 | pt11 = pt1; |
---|
1043 | pt22 = pt2; |
---|
1044 | } |
---|
1045 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
1046 | *intpt++ = *intpt2++; |
---|
1047 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
1048 | *intpt++ = *intpt2++; |
---|
1049 | *intpt = -1; |
---|
1050 | } |
---|
1051 | |
---|
1052 | |
---|
1053 | // len[k][0] = ( minscore - tmptmplen[im] ); |
---|
1054 | // len[k][1] = ( minscore - tmptmplen[jm] ); |
---|
1055 | // len[k][0] = -1; |
---|
1056 | // len[k][1] = -1; |
---|
1057 | |
---|
1058 | |
---|
1059 | hist[im] = k; |
---|
1060 | nmemar[im] = nmemim + nmemjm; |
---|
1061 | |
---|
1062 | // mindisfrom[im] = 999.9; |
---|
1063 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
1064 | { |
---|
1065 | i = acpti->pos; |
---|
1066 | if( i != im && i != jm ) |
---|
1067 | { |
---|
1068 | if( i < im ) |
---|
1069 | { |
---|
1070 | miniim = i; |
---|
1071 | maxiim = im; |
---|
1072 | minijm = i; |
---|
1073 | maxijm = jm; |
---|
1074 | } |
---|
1075 | else if( i < jm ) |
---|
1076 | { |
---|
1077 | miniim = im; |
---|
1078 | maxiim = i; |
---|
1079 | minijm = i; |
---|
1080 | maxijm = jm; |
---|
1081 | } |
---|
1082 | else |
---|
1083 | { |
---|
1084 | miniim = im; |
---|
1085 | maxiim = i; |
---|
1086 | minijm = jm; |
---|
1087 | maxijm = i; |
---|
1088 | } |
---|
1089 | } |
---|
1090 | } |
---|
1091 | |
---|
1092 | |
---|
1093 | treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo |
---|
1094 | if( !treetmp ) |
---|
1095 | { |
---|
1096 | fprintf( stderr, "Cannot allocate treetmp\n" ); |
---|
1097 | exit( 1 ); |
---|
1098 | } |
---|
1099 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
1100 | free( tree[im] ); |
---|
1101 | free( tree[jm] ); |
---|
1102 | tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); |
---|
1103 | tree[jm] = NULL; |
---|
1104 | if( tree[im] == NULL ) |
---|
1105 | { |
---|
1106 | fprintf( stderr, "Cannot reallocate tree!\n" ); |
---|
1107 | exit( 1 ); |
---|
1108 | } |
---|
1109 | strcpy( tree[im], treetmp ); |
---|
1110 | |
---|
1111 | // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); |
---|
1112 | acjmprev = ac[jm].prev; |
---|
1113 | acjmnext = ac[jm].next; |
---|
1114 | acjmprev->next = acjmnext; |
---|
1115 | if( acjmnext != NULL ) |
---|
1116 | acjmnext->prev = acjmprev; |
---|
1117 | // free( (void *)eff[jm] ); eff[jm] = NULL; |
---|
1118 | |
---|
1119 | #if 0 // muscle seems to miss this. |
---|
1120 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
1121 | { |
---|
1122 | i = acpti->pos; |
---|
1123 | if( nearest[i] == im ) |
---|
1124 | { |
---|
1125 | // fprintf( stderr, "calling setnearest\n" ); |
---|
1126 | // setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); |
---|
1127 | } |
---|
1128 | } |
---|
1129 | #endif |
---|
1130 | |
---|
1131 | |
---|
1132 | #if 0 |
---|
1133 | fprintf( stdout, "vSTEP-%03d:\n", k+1 ); |
---|
1134 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
1135 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); |
---|
1136 | fprintf( stdout, "\n" ); |
---|
1137 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
1138 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); |
---|
1139 | fprintf( stdout, "\n" ); |
---|
1140 | #endif |
---|
1141 | } |
---|
1142 | fclose( fp ); |
---|
1143 | fp = fopen( "infile.tree", "w" ); |
---|
1144 | fprintf( fp, "%s\n", treetmp ); |
---|
1145 | fprintf( fp, "#by loadtree\n" ); |
---|
1146 | fclose( fp ); |
---|
1147 | |
---|
1148 | FreeCharMtx( tree ); |
---|
1149 | free( treetmp ); |
---|
1150 | free( nametmp ); |
---|
1151 | free( hist ); |
---|
1152 | free( (char *)ac ); |
---|
1153 | free( (void *)nmemar ); |
---|
1154 | |
---|
1155 | |
---|
1156 | } |
---|
1157 | |
---|
1158 | static float sueff1, sueff05; |
---|
1159 | static double sueff1_double, sueff05_double; |
---|
1160 | |
---|
1161 | static float cluster_mix_float( float d1, float d2 ) |
---|
1162 | { |
---|
1163 | return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 ); |
---|
1164 | } |
---|
1165 | static float cluster_average_float( float d1, float d2 ) |
---|
1166 | { |
---|
1167 | return( ( d1 + d2 ) * 0.5 ); |
---|
1168 | } |
---|
1169 | static float cluster_minimum_float( float d1, float d2 ) |
---|
1170 | { |
---|
1171 | return( MIN( d1, d2 ) ); |
---|
1172 | } |
---|
1173 | static double cluster_mix_double( double d1, double d2 ) |
---|
1174 | { |
---|
1175 | return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double ); |
---|
1176 | } |
---|
1177 | static double cluster_average_double( double d1, double d2 ) |
---|
1178 | { |
---|
1179 | return( ( d1 + d2 ) * 0.5 ); |
---|
1180 | } |
---|
1181 | static double cluster_minimum_double( double d1, double d2 ) |
---|
1182 | { |
---|
1183 | return( MIN( d1, d2 ) ); |
---|
1184 | } |
---|
1185 | |
---|
1186 | |
---|
1187 | void fixed_supg_float_realloc_nobk_halfmtx_treeout_constrained( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep, int ngroup, int **groups ) |
---|
1188 | { |
---|
1189 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
1190 | int *intpt, *intpt2; |
---|
1191 | float tmpfloat; |
---|
1192 | float eff1, eff0; |
---|
1193 | float *tmptmplen = NULL; //static? |
---|
1194 | int *hist = NULL; //static? |
---|
1195 | Bchain *ac = NULL; //static? |
---|
1196 | int im = -1, jm = -1; |
---|
1197 | Bchain *acjmnext, *acjmprev; |
---|
1198 | int prevnode; |
---|
1199 | Bchain *acpti, *acptj; |
---|
1200 | int *pt1, *pt2, *pt11, *pt22; |
---|
1201 | int *nmemar; //static? |
---|
1202 | int nmemim, nmemjm; |
---|
1203 | float minscore; |
---|
1204 | int *nearest = NULL; // by D.Mathog, a guess |
---|
1205 | float *mindisfrom = NULL; // by D.Mathog, a guess |
---|
1206 | char **tree; //static? |
---|
1207 | char *treetmp; //static? |
---|
1208 | char *nametmp, *nameptr, *tmpptr; //static? |
---|
1209 | FILE *fp; |
---|
1210 | float (*clusterfuncpt[1])(float,float); |
---|
1211 | char namec; |
---|
1212 | int *testtopol, **inconsistent; |
---|
1213 | int **inconsistentpairlist; |
---|
1214 | int ninconsistentpairs; |
---|
1215 | int *warned; |
---|
1216 | int allinconsistent; |
---|
1217 | int firsttime; |
---|
1218 | |
---|
1219 | |
---|
1220 | sueff1 = 1 - SUEFF; |
---|
1221 | sueff05 = SUEFF * 0.5; |
---|
1222 | if ( treemethod == 'X' ) |
---|
1223 | clusterfuncpt[0] = cluster_mix_float; |
---|
1224 | else if ( treemethod == 'E' ) |
---|
1225 | clusterfuncpt[0] = cluster_average_float; |
---|
1226 | else if ( treemethod == 'q' ) |
---|
1227 | clusterfuncpt[0] = cluster_minimum_float; |
---|
1228 | else |
---|
1229 | { |
---|
1230 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
1231 | exit( 1 ); |
---|
1232 | } |
---|
1233 | |
---|
1234 | if( !hist ) |
---|
1235 | { |
---|
1236 | hist = AllocateIntVec( njob ); |
---|
1237 | tmptmplen = AllocateFloatVec( njob ); |
---|
1238 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
1239 | nmemar = AllocateIntVec( njob ); |
---|
1240 | mindisfrom = AllocateFloatVec( njob ); |
---|
1241 | nearest = AllocateIntVec( njob ); |
---|
1242 | // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? |
---|
1243 | treetmp = NULL; // kentou 2013/06/12 |
---|
1244 | nametmp = AllocateCharVec( 1000 ); // nagasugi |
---|
1245 | // tree = AllocateCharMtx( njob, njob*600 ); |
---|
1246 | tree = AllocateCharMtx( njob, 0 ); |
---|
1247 | testtopol = AllocateIntVec( njob + 1 ); |
---|
1248 | inconsistent = AllocateIntMtx( njob, njob ); // muda |
---|
1249 | inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda |
---|
1250 | warned = AllocateIntVec( ngroup ); |
---|
1251 | } |
---|
1252 | |
---|
1253 | |
---|
1254 | for( i=0; i<nseq; i++ ) |
---|
1255 | { |
---|
1256 | for( j=0; j<999; j++ ) nametmp[j] = 0; |
---|
1257 | for( j=0; j<999; j++ ) |
---|
1258 | { |
---|
1259 | namec = name[i][j]; |
---|
1260 | if( namec == 0 ) |
---|
1261 | break; |
---|
1262 | else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' ) |
---|
1263 | nametmp[j] = namec; |
---|
1264 | else |
---|
1265 | nametmp[j] = '_'; |
---|
1266 | } |
---|
1267 | nametmp[j] = 0; |
---|
1268 | // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); |
---|
1269 | if( outnumber ) |
---|
1270 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
1271 | else |
---|
1272 | nameptr = nametmp + 1; |
---|
1273 | |
---|
1274 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
1275 | |
---|
1276 | tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 |
---|
1277 | if( tree[i] == NULL ) |
---|
1278 | { |
---|
1279 | fprintf( stderr, "Cannot allocate tree!\n" ); |
---|
1280 | exit( 1 ); |
---|
1281 | } |
---|
1282 | sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); |
---|
1283 | } |
---|
1284 | for( i=0; i<nseq; i++ ) |
---|
1285 | { |
---|
1286 | ac[i].next = ac+i+1; |
---|
1287 | ac[i].prev = ac+i-1; |
---|
1288 | ac[i].pos = i; |
---|
1289 | } |
---|
1290 | ac[nseq-1].next = NULL; |
---|
1291 | |
---|
1292 | for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle |
---|
1293 | |
---|
1294 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
1295 | for( i=0; i<nseq; i++ ) |
---|
1296 | { |
---|
1297 | hist[i] = -1; |
---|
1298 | nmemar[i] = 1; |
---|
1299 | } |
---|
1300 | |
---|
1301 | fprintf( stderr, "\n" ); |
---|
1302 | ninconsistentpairs = 0; |
---|
1303 | for( k=0; k<nseq-1; k++ ) |
---|
1304 | { |
---|
1305 | if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq ); |
---|
1306 | |
---|
1307 | for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0; |
---|
1308 | // for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!! |
---|
1309 | ninconsistentpairs = 0; |
---|
1310 | firsttime = 1; |
---|
1311 | while( 1 ) |
---|
1312 | { |
---|
1313 | if( firsttime ) |
---|
1314 | { |
---|
1315 | firsttime = 0; |
---|
1316 | minscore = 999.9; |
---|
1317 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
1318 | { |
---|
1319 | i = acpti->pos; |
---|
1320 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
1321 | if( mindisfrom[i] < minscore ) // muscle |
---|
1322 | { |
---|
1323 | im = i; |
---|
1324 | minscore = mindisfrom[i]; |
---|
1325 | } |
---|
1326 | } |
---|
1327 | jm = nearest[im]; |
---|
1328 | if( jm < im ) |
---|
1329 | { |
---|
1330 | j=jm; jm=im; im=j; |
---|
1331 | } |
---|
1332 | } |
---|
1333 | else |
---|
1334 | { |
---|
1335 | minscore = 999.9; |
---|
1336 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
1337 | { |
---|
1338 | i = acpti->pos; |
---|
1339 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
1340 | for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) |
---|
1341 | { |
---|
1342 | j = acptj->pos; |
---|
1343 | if( !inconsistent[i][j] && (tmpfloat=eff[i][j-i]) < minscore ) |
---|
1344 | { |
---|
1345 | minscore = tmpfloat; |
---|
1346 | im = i; jm = j; |
---|
1347 | } |
---|
1348 | } |
---|
1349 | for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) |
---|
1350 | { |
---|
1351 | j = acptj->pos; |
---|
1352 | if( !inconsistent[j][i] && (tmpfloat=eff[j][i-j]) < minscore ) |
---|
1353 | { |
---|
1354 | minscore = tmpfloat; |
---|
1355 | im = j; jm = i; |
---|
1356 | } |
---|
1357 | } |
---|
1358 | } |
---|
1359 | } |
---|
1360 | |
---|
1361 | |
---|
1362 | allinconsistent = 1; |
---|
1363 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
1364 | { |
---|
1365 | for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) |
---|
1366 | { |
---|
1367 | if( inconsistent[acpti->pos][acptj->pos] == 0 ) |
---|
1368 | { |
---|
1369 | allinconsistent = 0; |
---|
1370 | goto exitloop_f; |
---|
1371 | } |
---|
1372 | } |
---|
1373 | } |
---|
1374 | exitloop_f: |
---|
1375 | |
---|
1376 | if( allinconsistent ) |
---|
1377 | { |
---|
1378 | fprintf( stderr, "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); |
---|
1379 | exit( 1 ); |
---|
1380 | } |
---|
1381 | #if 1 |
---|
1382 | intpt = testtopol; |
---|
1383 | prevnode = hist[im]; |
---|
1384 | if( prevnode == -1 ) |
---|
1385 | { |
---|
1386 | *intpt++ = im; |
---|
1387 | } |
---|
1388 | else |
---|
1389 | { |
---|
1390 | for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) |
---|
1391 | *intpt++ = *intpt2++; |
---|
1392 | for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) |
---|
1393 | *intpt++ = *intpt2++; |
---|
1394 | } |
---|
1395 | |
---|
1396 | prevnode = hist[jm]; |
---|
1397 | if( prevnode == -1 ) |
---|
1398 | { |
---|
1399 | *intpt++ = jm; |
---|
1400 | } |
---|
1401 | else |
---|
1402 | { |
---|
1403 | for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) |
---|
1404 | *intpt++ = *intpt2++; |
---|
1405 | for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) |
---|
1406 | *intpt++ = *intpt2++; |
---|
1407 | } |
---|
1408 | *intpt = -1; |
---|
1409 | // fprintf( stderr, "testtopol = \n" ); |
---|
1410 | // for( i=0; testtopol[i]>-1; i++ ) fprintf( stderr, " %03d", testtopol[i]+1 ); |
---|
1411 | // fprintf( stderr, "\n" ); |
---|
1412 | #endif |
---|
1413 | for( i=0; i<ngroup; i++ ) |
---|
1414 | { |
---|
1415 | // fprintf( stderr, "groups[%d] = \n", i ); |
---|
1416 | // for( j=0; groups[i][j]>-1; j++ ) fprintf( stderr, " %03d", groups[i][j]+1 ); |
---|
1417 | // fprintf( stderr, "\n" ); |
---|
1418 | if( overlapmember( groups[i], testtopol ) ) |
---|
1419 | { |
---|
1420 | if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) |
---|
1421 | { |
---|
1422 | if( !warned[i] ) |
---|
1423 | { |
---|
1424 | warned[i] = 1; |
---|
1425 | fprintf( stderr, "\n###################################################################\n" ); |
---|
1426 | fprintf( stderr, "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); |
---|
1427 | fprintf( stderr, "###################################################################\n" ); |
---|
1428 | } |
---|
1429 | inconsistent[im][jm] = 1; |
---|
1430 | inconsistentpairlist[ninconsistentpairs][0] = im; |
---|
1431 | inconsistentpairlist[ninconsistentpairs][1] = jm; |
---|
1432 | ninconsistentpairs++; |
---|
1433 | break; |
---|
1434 | } |
---|
1435 | } |
---|
1436 | } |
---|
1437 | if( i == ngroup ) |
---|
1438 | { |
---|
1439 | // fprintf( stderr, "OK\n" ); |
---|
1440 | break; |
---|
1441 | } |
---|
1442 | } |
---|
1443 | |
---|
1444 | |
---|
1445 | prevnode = hist[im]; |
---|
1446 | if( dep ) dep[k].child0 = prevnode; |
---|
1447 | nmemim = nmemar[im]; |
---|
1448 | intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); |
---|
1449 | if( prevnode == -1 ) |
---|
1450 | { |
---|
1451 | *intpt++ = im; |
---|
1452 | *intpt = -1; |
---|
1453 | } |
---|
1454 | else |
---|
1455 | { |
---|
1456 | pt1 = topol[prevnode][0]; |
---|
1457 | pt2 = topol[prevnode][1]; |
---|
1458 | if( *pt1 > *pt2 ) |
---|
1459 | { |
---|
1460 | pt11 = pt2; |
---|
1461 | pt22 = pt1; |
---|
1462 | } |
---|
1463 | else |
---|
1464 | { |
---|
1465 | pt11 = pt1; |
---|
1466 | pt22 = pt2; |
---|
1467 | } |
---|
1468 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
1469 | *intpt++ = *intpt2++; |
---|
1470 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
1471 | *intpt++ = *intpt2++; |
---|
1472 | *intpt = -1; |
---|
1473 | } |
---|
1474 | |
---|
1475 | prevnode = hist[jm]; |
---|
1476 | if( dep ) dep[k].child1 = prevnode; |
---|
1477 | nmemjm = nmemar[jm]; |
---|
1478 | intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); |
---|
1479 | if( !intpt ) |
---|
1480 | { |
---|
1481 | fprintf( stderr, "Cannot reallocate topol\n" ); |
---|
1482 | exit( 1 ); |
---|
1483 | } |
---|
1484 | if( prevnode == -1 ) |
---|
1485 | { |
---|
1486 | *intpt++ = jm; |
---|
1487 | *intpt = -1; |
---|
1488 | } |
---|
1489 | else |
---|
1490 | { |
---|
1491 | pt1 = topol[prevnode][0]; |
---|
1492 | pt2 = topol[prevnode][1]; |
---|
1493 | if( *pt1 > *pt2 ) |
---|
1494 | { |
---|
1495 | pt11 = pt2; |
---|
1496 | pt22 = pt1; |
---|
1497 | } |
---|
1498 | else |
---|
1499 | { |
---|
1500 | pt11 = pt1; |
---|
1501 | pt22 = pt2; |
---|
1502 | } |
---|
1503 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
1504 | *intpt++ = *intpt2++; |
---|
1505 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
1506 | *intpt++ = *intpt2++; |
---|
1507 | *intpt = -1; |
---|
1508 | } |
---|
1509 | |
---|
1510 | minscore *= 0.5; |
---|
1511 | |
---|
1512 | len[k][0] = ( minscore - tmptmplen[im] ); |
---|
1513 | len[k][1] = ( minscore - tmptmplen[jm] ); |
---|
1514 | |
---|
1515 | if( dep ) dep[k].distfromtip = minscore; |
---|
1516 | // fprintf( stderr, "\n##### dep[%d].distfromtip = %f\n", k, minscore ); |
---|
1517 | |
---|
1518 | tmptmplen[im] = minscore; |
---|
1519 | |
---|
1520 | hist[im] = k; |
---|
1521 | nmemar[im] = nmemim + nmemjm; |
---|
1522 | |
---|
1523 | mindisfrom[im] = 999.9; |
---|
1524 | eff[im][jm-im] = 999.9; |
---|
1525 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
1526 | { |
---|
1527 | i = acpti->pos; |
---|
1528 | if( i != im && i != jm ) |
---|
1529 | { |
---|
1530 | if( i < im ) |
---|
1531 | { |
---|
1532 | miniim = i; |
---|
1533 | maxiim = im; |
---|
1534 | minijm = i; |
---|
1535 | maxijm = jm; |
---|
1536 | } |
---|
1537 | else if( i < jm ) |
---|
1538 | { |
---|
1539 | miniim = im; |
---|
1540 | maxiim = i; |
---|
1541 | minijm = i; |
---|
1542 | maxijm = jm; |
---|
1543 | } |
---|
1544 | else |
---|
1545 | { |
---|
1546 | miniim = im; |
---|
1547 | maxiim = i; |
---|
1548 | minijm = jm; |
---|
1549 | maxijm = i; |
---|
1550 | } |
---|
1551 | eff0 = eff[miniim][maxiim-miniim]; |
---|
1552 | eff1 = eff[minijm][maxijm-minijm]; |
---|
1553 | #if 0 |
---|
1554 | tmpfloat = eff[miniim][maxiim-miniim] = |
---|
1555 | MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; |
---|
1556 | #else |
---|
1557 | tmpfloat = eff[miniim][maxiim-miniim] = |
---|
1558 | (clusterfuncpt[0])( eff0, eff1 ); |
---|
1559 | #endif |
---|
1560 | #if 1 |
---|
1561 | if( tmpfloat < mindisfrom[i] ) |
---|
1562 | { |
---|
1563 | mindisfrom[i] = tmpfloat; |
---|
1564 | nearest[i] = im; |
---|
1565 | } |
---|
1566 | if( tmpfloat < mindisfrom[im] ) |
---|
1567 | { |
---|
1568 | mindisfrom[im] = tmpfloat; |
---|
1569 | nearest[im] = i; |
---|
1570 | } |
---|
1571 | if( nearest[i] == jm ) |
---|
1572 | { |
---|
1573 | nearest[i] = im; |
---|
1574 | } |
---|
1575 | #endif |
---|
1576 | } |
---|
1577 | } |
---|
1578 | |
---|
1579 | treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo |
---|
1580 | if( !treetmp ) |
---|
1581 | { |
---|
1582 | fprintf( stderr, "Cannot allocate treetmp\n" ); |
---|
1583 | exit( 1 ); |
---|
1584 | } |
---|
1585 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
1586 | free( tree[im] ); |
---|
1587 | free( tree[jm] ); |
---|
1588 | tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); |
---|
1589 | tree[jm] = NULL; |
---|
1590 | if( tree[im] == NULL ) |
---|
1591 | { |
---|
1592 | fprintf( stderr, "Cannot reallocate tree!\n" ); |
---|
1593 | exit( 1 ); |
---|
1594 | } |
---|
1595 | strcpy( tree[im], treetmp ); |
---|
1596 | |
---|
1597 | acjmprev = ac[jm].prev; |
---|
1598 | acjmnext = ac[jm].next; |
---|
1599 | acjmprev->next = acjmnext; |
---|
1600 | if( acjmnext != NULL ) |
---|
1601 | acjmnext->prev = acjmprev; |
---|
1602 | free( (void *)eff[jm] ); eff[jm] = NULL; |
---|
1603 | |
---|
1604 | #if 1 // muscle seems to miss this. |
---|
1605 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
1606 | { |
---|
1607 | i = acpti->pos; |
---|
1608 | if( nearest[i] == im ) |
---|
1609 | { |
---|
1610 | // fprintf( stderr, "calling setnearest\n" ); |
---|
1611 | setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); |
---|
1612 | } |
---|
1613 | } |
---|
1614 | #endif |
---|
1615 | |
---|
1616 | |
---|
1617 | #if 0 |
---|
1618 | fprintf( stderr, "\noSTEP-%03d:\n", k+1 ); |
---|
1619 | fprintf( stderr, "len0 = %f\n", len[k][0] ); |
---|
1620 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); |
---|
1621 | fprintf( stderr, "\n" ); |
---|
1622 | fprintf( stderr, "len1 = %f\n", len[k][1] ); |
---|
1623 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); |
---|
1624 | fprintf( stderr, "\n\n" ); |
---|
1625 | #endif |
---|
1626 | } |
---|
1627 | fp = fopen( "infile.tree", "w" ); |
---|
1628 | fprintf( fp, "%s\n", treetmp ); |
---|
1629 | fclose( fp ); |
---|
1630 | |
---|
1631 | free( tree[0] ); |
---|
1632 | free( tree ); |
---|
1633 | free( treetmp ); |
---|
1634 | free( nametmp ); |
---|
1635 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
1636 | free( hist ); hist = NULL; |
---|
1637 | free( (char *)ac ); ac = NULL; |
---|
1638 | free( (void *)nmemar ); nmemar = NULL; |
---|
1639 | free( mindisfrom ); |
---|
1640 | free( nearest ); |
---|
1641 | free( testtopol ); |
---|
1642 | FreeIntMtx( inconsistent ); |
---|
1643 | FreeIntMtx( inconsistentpairlist ); |
---|
1644 | free( warned ); |
---|
1645 | } |
---|
1646 | |
---|
1647 | void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep ) |
---|
1648 | { |
---|
1649 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
1650 | int *intpt, *intpt2; |
---|
1651 | float tmpfloat; |
---|
1652 | float eff1, eff0; |
---|
1653 | float *tmptmplen = NULL; //static? |
---|
1654 | int *hist = NULL; //static? |
---|
1655 | Bchain *ac = NULL; //static? |
---|
1656 | int im = -1, jm = -1; |
---|
1657 | Bchain *acjmnext, *acjmprev; |
---|
1658 | int prevnode; |
---|
1659 | Bchain *acpti; |
---|
1660 | int *pt1, *pt2, *pt11, *pt22; |
---|
1661 | int *nmemar; //static? |
---|
1662 | int nmemim, nmemjm; |
---|
1663 | float minscore; |
---|
1664 | int *nearest = NULL; // by D.Mathog, a guess |
---|
1665 | float *mindisfrom = NULL; // by D.Mathog, a guess |
---|
1666 | char **tree; //static? |
---|
1667 | char *treetmp; //static? |
---|
1668 | char *nametmp, *nameptr, *tmpptr; //static? |
---|
1669 | FILE *fp; |
---|
1670 | float (*clusterfuncpt[1])(float,float); |
---|
1671 | char namec; |
---|
1672 | |
---|
1673 | |
---|
1674 | sueff1 = 1 - SUEFF; |
---|
1675 | sueff05 = SUEFF * 0.5; |
---|
1676 | if ( treemethod == 'X' ) |
---|
1677 | clusterfuncpt[0] = cluster_mix_float; |
---|
1678 | else if ( treemethod == 'E' ) |
---|
1679 | clusterfuncpt[0] = cluster_average_float; |
---|
1680 | else if ( treemethod == 'q' ) |
---|
1681 | clusterfuncpt[0] = cluster_minimum_float; |
---|
1682 | else |
---|
1683 | { |
---|
1684 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
1685 | exit( 1 ); |
---|
1686 | } |
---|
1687 | |
---|
1688 | if( !hist ) |
---|
1689 | { |
---|
1690 | hist = AllocateIntVec( njob ); |
---|
1691 | tmptmplen = AllocateFloatVec( njob ); |
---|
1692 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
1693 | nmemar = AllocateIntVec( njob ); |
---|
1694 | mindisfrom = AllocateFloatVec( njob ); |
---|
1695 | nearest = AllocateIntVec( njob ); |
---|
1696 | // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? |
---|
1697 | treetmp = NULL; // kentou 2013/06/12 |
---|
1698 | nametmp = AllocateCharVec( 1000 ); // nagasugi |
---|
1699 | // tree = AllocateCharMtx( njob, njob*600 ); |
---|
1700 | tree = AllocateCharMtx( njob, 0 ); |
---|
1701 | } |
---|
1702 | |
---|
1703 | |
---|
1704 | for( i=0; i<nseq; i++ ) |
---|
1705 | { |
---|
1706 | for( j=0; j<999; j++ ) nametmp[j] = 0; |
---|
1707 | for( j=0; j<999; j++ ) |
---|
1708 | { |
---|
1709 | namec = name[i][j]; |
---|
1710 | if( namec == 0 ) |
---|
1711 | break; |
---|
1712 | else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' ) |
---|
1713 | nametmp[j] = namec; |
---|
1714 | else |
---|
1715 | nametmp[j] = '_'; |
---|
1716 | } |
---|
1717 | nametmp[j] = 0; |
---|
1718 | // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); |
---|
1719 | if( outnumber ) |
---|
1720 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
1721 | else |
---|
1722 | nameptr = nametmp + 1; |
---|
1723 | |
---|
1724 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
1725 | |
---|
1726 | tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 |
---|
1727 | if( tree[i] == NULL ) |
---|
1728 | { |
---|
1729 | fprintf( stderr, "Cannot allocate tree!\n" ); |
---|
1730 | exit( 1 ); |
---|
1731 | } |
---|
1732 | sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); |
---|
1733 | } |
---|
1734 | for( i=0; i<nseq; i++ ) |
---|
1735 | { |
---|
1736 | ac[i].next = ac+i+1; |
---|
1737 | ac[i].prev = ac+i-1; |
---|
1738 | ac[i].pos = i; |
---|
1739 | } |
---|
1740 | ac[nseq-1].next = NULL; |
---|
1741 | |
---|
1742 | for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle |
---|
1743 | |
---|
1744 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
1745 | for( i=0; i<nseq; i++ ) |
---|
1746 | { |
---|
1747 | hist[i] = -1; |
---|
1748 | nmemar[i] = 1; |
---|
1749 | } |
---|
1750 | |
---|
1751 | fprintf( stderr, "\n" ); |
---|
1752 | for( k=0; k<nseq-1; k++ ) |
---|
1753 | { |
---|
1754 | if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq ); |
---|
1755 | |
---|
1756 | minscore = 999.9; |
---|
1757 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
1758 | { |
---|
1759 | i = acpti->pos; |
---|
1760 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
1761 | if( mindisfrom[i] < minscore ) // muscle |
---|
1762 | { |
---|
1763 | im = i; |
---|
1764 | minscore = mindisfrom[i]; |
---|
1765 | } |
---|
1766 | } |
---|
1767 | jm = nearest[im]; |
---|
1768 | if( jm < im ) |
---|
1769 | { |
---|
1770 | j=jm; jm=im; im=j; |
---|
1771 | } |
---|
1772 | |
---|
1773 | |
---|
1774 | prevnode = hist[im]; |
---|
1775 | if( dep ) dep[k].child0 = prevnode; |
---|
1776 | nmemim = nmemar[im]; |
---|
1777 | intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); |
---|
1778 | if( prevnode == -1 ) |
---|
1779 | { |
---|
1780 | *intpt++ = im; |
---|
1781 | *intpt = -1; |
---|
1782 | } |
---|
1783 | else |
---|
1784 | { |
---|
1785 | pt1 = topol[prevnode][0]; |
---|
1786 | pt2 = topol[prevnode][1]; |
---|
1787 | if( *pt1 > *pt2 ) |
---|
1788 | { |
---|
1789 | pt11 = pt2; |
---|
1790 | pt22 = pt1; |
---|
1791 | } |
---|
1792 | else |
---|
1793 | { |
---|
1794 | pt11 = pt1; |
---|
1795 | pt22 = pt2; |
---|
1796 | } |
---|
1797 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
1798 | *intpt++ = *intpt2++; |
---|
1799 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
1800 | *intpt++ = *intpt2++; |
---|
1801 | *intpt = -1; |
---|
1802 | } |
---|
1803 | |
---|
1804 | prevnode = hist[jm]; |
---|
1805 | if( dep ) dep[k].child1 = prevnode; |
---|
1806 | nmemjm = nmemar[jm]; |
---|
1807 | intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); |
---|
1808 | if( !intpt ) |
---|
1809 | { |
---|
1810 | fprintf( stderr, "Cannot reallocate topol\n" ); |
---|
1811 | exit( 1 ); |
---|
1812 | } |
---|
1813 | if( prevnode == -1 ) |
---|
1814 | { |
---|
1815 | *intpt++ = jm; |
---|
1816 | *intpt = -1; |
---|
1817 | } |
---|
1818 | else |
---|
1819 | { |
---|
1820 | pt1 = topol[prevnode][0]; |
---|
1821 | pt2 = topol[prevnode][1]; |
---|
1822 | if( *pt1 > *pt2 ) |
---|
1823 | { |
---|
1824 | pt11 = pt2; |
---|
1825 | pt22 = pt1; |
---|
1826 | } |
---|
1827 | else |
---|
1828 | { |
---|
1829 | pt11 = pt1; |
---|
1830 | pt22 = pt2; |
---|
1831 | } |
---|
1832 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
1833 | *intpt++ = *intpt2++; |
---|
1834 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
1835 | *intpt++ = *intpt2++; |
---|
1836 | *intpt = -1; |
---|
1837 | } |
---|
1838 | |
---|
1839 | minscore *= 0.5; |
---|
1840 | |
---|
1841 | len[k][0] = ( minscore - tmptmplen[im] ); |
---|
1842 | len[k][1] = ( minscore - tmptmplen[jm] ); |
---|
1843 | |
---|
1844 | if( dep ) dep[k].distfromtip = minscore; |
---|
1845 | // fprintf( stderr, "\n##### dep[%d].distfromtip = %f\n", k, minscore ); |
---|
1846 | |
---|
1847 | tmptmplen[im] = minscore; |
---|
1848 | |
---|
1849 | hist[im] = k; |
---|
1850 | nmemar[im] = nmemim + nmemjm; |
---|
1851 | |
---|
1852 | mindisfrom[im] = 999.9; |
---|
1853 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
1854 | { |
---|
1855 | i = acpti->pos; |
---|
1856 | if( i != im && i != jm ) |
---|
1857 | { |
---|
1858 | if( i < im ) |
---|
1859 | { |
---|
1860 | miniim = i; |
---|
1861 | maxiim = im; |
---|
1862 | minijm = i; |
---|
1863 | maxijm = jm; |
---|
1864 | } |
---|
1865 | else if( i < jm ) |
---|
1866 | { |
---|
1867 | miniim = im; |
---|
1868 | maxiim = i; |
---|
1869 | minijm = i; |
---|
1870 | maxijm = jm; |
---|
1871 | } |
---|
1872 | else |
---|
1873 | { |
---|
1874 | miniim = im; |
---|
1875 | maxiim = i; |
---|
1876 | minijm = jm; |
---|
1877 | maxijm = i; |
---|
1878 | } |
---|
1879 | eff0 = eff[miniim][maxiim-miniim]; |
---|
1880 | eff1 = eff[minijm][maxijm-minijm]; |
---|
1881 | #if 0 |
---|
1882 | tmpfloat = eff[miniim][maxiim-miniim] = |
---|
1883 | MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; |
---|
1884 | #else |
---|
1885 | tmpfloat = eff[miniim][maxiim-miniim] = |
---|
1886 | (clusterfuncpt[0])( eff0, eff1 ); |
---|
1887 | #endif |
---|
1888 | if( tmpfloat < mindisfrom[i] ) |
---|
1889 | { |
---|
1890 | mindisfrom[i] = tmpfloat; |
---|
1891 | nearest[i] = im; |
---|
1892 | } |
---|
1893 | if( tmpfloat < mindisfrom[im] ) |
---|
1894 | { |
---|
1895 | mindisfrom[im] = tmpfloat; |
---|
1896 | nearest[im] = i; |
---|
1897 | } |
---|
1898 | if( nearest[i] == jm ) |
---|
1899 | { |
---|
1900 | nearest[i] = im; |
---|
1901 | } |
---|
1902 | } |
---|
1903 | } |
---|
1904 | |
---|
1905 | treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo |
---|
1906 | if( !treetmp ) |
---|
1907 | { |
---|
1908 | fprintf( stderr, "Cannot allocate treetmp\n" ); |
---|
1909 | exit( 1 ); |
---|
1910 | } |
---|
1911 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
1912 | free( tree[im] ); |
---|
1913 | free( tree[jm] ); |
---|
1914 | tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); |
---|
1915 | tree[jm] = NULL; |
---|
1916 | if( tree[im] == NULL ) |
---|
1917 | { |
---|
1918 | fprintf( stderr, "Cannot reallocate tree!\n" ); |
---|
1919 | exit( 1 ); |
---|
1920 | } |
---|
1921 | strcpy( tree[im], treetmp ); |
---|
1922 | |
---|
1923 | acjmprev = ac[jm].prev; |
---|
1924 | acjmnext = ac[jm].next; |
---|
1925 | acjmprev->next = acjmnext; |
---|
1926 | if( acjmnext != NULL ) |
---|
1927 | acjmnext->prev = acjmprev; |
---|
1928 | free( (void *)eff[jm] ); eff[jm] = NULL; |
---|
1929 | |
---|
1930 | #if 1 // muscle seems to miss this. |
---|
1931 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
1932 | { |
---|
1933 | i = acpti->pos; |
---|
1934 | if( nearest[i] == im ) |
---|
1935 | { |
---|
1936 | // fprintf( stderr, "calling setnearest\n" ); |
---|
1937 | setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); |
---|
1938 | } |
---|
1939 | } |
---|
1940 | #endif |
---|
1941 | |
---|
1942 | |
---|
1943 | #if 0 |
---|
1944 | fprintf( stderr, "\nooSTEP-%03d:\n", k+1 ); |
---|
1945 | fprintf( stderr, "len0 = %f\n", len[k][0] ); |
---|
1946 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 ); |
---|
1947 | fprintf( stderr, "\n" ); |
---|
1948 | fprintf( stderr, "len1 = %f\n", len[k][1] ); |
---|
1949 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 ); |
---|
1950 | fprintf( stderr, "\n" ); |
---|
1951 | #endif |
---|
1952 | } |
---|
1953 | fp = fopen( "infile.tree", "w" ); |
---|
1954 | fprintf( fp, "%s\n", treetmp ); |
---|
1955 | fclose( fp ); |
---|
1956 | |
---|
1957 | free( tree[0] ); |
---|
1958 | free( tree ); |
---|
1959 | free( treetmp ); |
---|
1960 | free( nametmp ); |
---|
1961 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
1962 | free( hist ); hist = NULL; |
---|
1963 | free( (char *)ac ); ac = NULL; |
---|
1964 | free( (void *)nmemar ); nmemar = NULL; |
---|
1965 | free( mindisfrom ); |
---|
1966 | free( nearest ); |
---|
1967 | } |
---|
1968 | |
---|
1969 | void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name ) |
---|
1970 | { |
---|
1971 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
1972 | int *intpt, *intpt2; |
---|
1973 | double tmpfloat; |
---|
1974 | double eff1, eff0; |
---|
1975 | static double *tmptmplen = NULL; |
---|
1976 | static int *hist = NULL; |
---|
1977 | static Bchain *ac = NULL; |
---|
1978 | int im = -1, jm = -1; |
---|
1979 | Bchain *acjmnext, *acjmprev; |
---|
1980 | int prevnode; |
---|
1981 | Bchain *acpti; |
---|
1982 | int *pt1, *pt2, *pt11, *pt22; |
---|
1983 | static int *nmemar; |
---|
1984 | int nmemim, nmemjm; |
---|
1985 | double minscore; |
---|
1986 | int *nearest = NULL; // by D.Mathog, a guess |
---|
1987 | double *mindisfrom = NULL; // by D.Mathog, a guess |
---|
1988 | static char **tree; |
---|
1989 | static char *treetmp; |
---|
1990 | static char *nametmp, *nameptr, *tmpptr; |
---|
1991 | FILE *fp; |
---|
1992 | double (*clusterfuncpt[1])(double,double); |
---|
1993 | char namec; |
---|
1994 | |
---|
1995 | |
---|
1996 | sueff1_double = 1 - SUEFF; |
---|
1997 | sueff05_double = SUEFF * 0.5; |
---|
1998 | if ( treemethod == 'X' ) |
---|
1999 | clusterfuncpt[0] = cluster_mix_double; |
---|
2000 | else if ( treemethod == 'E' ) |
---|
2001 | clusterfuncpt[0] = cluster_average_double; |
---|
2002 | else if ( treemethod == 'q' ) |
---|
2003 | clusterfuncpt[0] = cluster_minimum_double; |
---|
2004 | else |
---|
2005 | { |
---|
2006 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
2007 | exit( 1 ); |
---|
2008 | } |
---|
2009 | |
---|
2010 | |
---|
2011 | |
---|
2012 | |
---|
2013 | |
---|
2014 | #if 0 |
---|
2015 | if( !hist ) |
---|
2016 | { |
---|
2017 | hist = AllocateIntVec( njob ); |
---|
2018 | tmptmplen = AllocateDoubleVec( njob ); |
---|
2019 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
2020 | nmemar = AllocateIntVec( njob ); |
---|
2021 | mindisfrom = AllocateDoubleVec( njob ); |
---|
2022 | nearest = AllocateIntVec( njob ); |
---|
2023 | treetmp = AllocateCharVec( njob*150 ); |
---|
2024 | nametmp = AllocateCharVec( 91 ); |
---|
2025 | tree = AllocateCharMtx( njob, njob*150 ); |
---|
2026 | } |
---|
2027 | for( i=0; i<nseq; i++ ) |
---|
2028 | { |
---|
2029 | for( j=0; j<90; j++ ) nametmp[j] = 0; |
---|
2030 | for( j=0; j<90; j++ ) |
---|
2031 | { |
---|
2032 | if( name[i][j] == 0 ) |
---|
2033 | break; |
---|
2034 | else if( isalnum( name[i][j] ) ) |
---|
2035 | nametmp[j] = name[i][j]; |
---|
2036 | else |
---|
2037 | nametmp[j] = '_'; |
---|
2038 | } |
---|
2039 | nametmp[90] = 0; |
---|
2040 | // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 ); |
---|
2041 | if( outnumber ) |
---|
2042 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
2043 | else |
---|
2044 | nameptr = nametmp + 1; |
---|
2045 | |
---|
2046 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
2047 | |
---|
2048 | sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); |
---|
2049 | } |
---|
2050 | |
---|
2051 | #else |
---|
2052 | |
---|
2053 | if( !hist ) |
---|
2054 | { |
---|
2055 | hist = AllocateIntVec( njob ); |
---|
2056 | tmptmplen = AllocateDoubleVec( njob ); |
---|
2057 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
2058 | nmemar = AllocateIntVec( njob ); |
---|
2059 | mindisfrom = AllocateDoubleVec( njob ); |
---|
2060 | nearest = AllocateIntVec( njob ); |
---|
2061 | // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? |
---|
2062 | treetmp = NULL; // kentou 2013/06/12 |
---|
2063 | nametmp = AllocateCharVec( 1000 ); // nagasugi |
---|
2064 | // tree = AllocateCharMtx( njob, njob*600 ); |
---|
2065 | tree = AllocateCharMtx( njob, 0 ); |
---|
2066 | } |
---|
2067 | |
---|
2068 | |
---|
2069 | for( i=0; i<nseq; i++ ) |
---|
2070 | { |
---|
2071 | for( j=0; j<999; j++ ) nametmp[j] = 0; |
---|
2072 | for( j=0; j<999; j++ ) |
---|
2073 | { |
---|
2074 | namec = name[i][j]; |
---|
2075 | if( namec == 0 ) |
---|
2076 | break; |
---|
2077 | else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' ) |
---|
2078 | nametmp[j] = namec; |
---|
2079 | else |
---|
2080 | nametmp[j] = '_'; |
---|
2081 | } |
---|
2082 | nametmp[j] = 0; |
---|
2083 | // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); |
---|
2084 | if( outnumber ) |
---|
2085 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
2086 | else |
---|
2087 | nameptr = nametmp + 1; |
---|
2088 | |
---|
2089 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
2090 | |
---|
2091 | tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 |
---|
2092 | if( tree[i] == NULL ) |
---|
2093 | { |
---|
2094 | fprintf( stderr, "Cannot allocate tree!\n" ); |
---|
2095 | exit( 1 ); |
---|
2096 | } |
---|
2097 | sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); |
---|
2098 | } |
---|
2099 | |
---|
2100 | #endif |
---|
2101 | |
---|
2102 | |
---|
2103 | |
---|
2104 | |
---|
2105 | |
---|
2106 | |
---|
2107 | |
---|
2108 | |
---|
2109 | for( i=0; i<nseq; i++ ) |
---|
2110 | { |
---|
2111 | ac[i].next = ac+i+1; |
---|
2112 | ac[i].prev = ac+i-1; |
---|
2113 | ac[i].pos = i; |
---|
2114 | } |
---|
2115 | ac[nseq-1].next = NULL; |
---|
2116 | |
---|
2117 | for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle |
---|
2118 | |
---|
2119 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
2120 | for( i=0; i<nseq; i++ ) |
---|
2121 | { |
---|
2122 | hist[i] = -1; |
---|
2123 | nmemar[i] = 1; |
---|
2124 | } |
---|
2125 | |
---|
2126 | fprintf( stderr, "\n" ); |
---|
2127 | for( k=0; k<nseq-1; k++ ) |
---|
2128 | { |
---|
2129 | if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq ); |
---|
2130 | |
---|
2131 | minscore = 999.9; |
---|
2132 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
2133 | { |
---|
2134 | i = acpti->pos; |
---|
2135 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
2136 | if( mindisfrom[i] < minscore ) // muscle |
---|
2137 | { |
---|
2138 | im = i; |
---|
2139 | minscore = mindisfrom[i]; |
---|
2140 | } |
---|
2141 | } |
---|
2142 | jm = nearest[im]; |
---|
2143 | if( jm < im ) |
---|
2144 | { |
---|
2145 | j=jm; jm=im; im=j; |
---|
2146 | } |
---|
2147 | |
---|
2148 | |
---|
2149 | prevnode = hist[im]; |
---|
2150 | nmemim = nmemar[im]; |
---|
2151 | // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); |
---|
2152 | intpt = topol[k][0]; |
---|
2153 | if( prevnode == -1 ) |
---|
2154 | { |
---|
2155 | *intpt++ = im; |
---|
2156 | *intpt = -1; |
---|
2157 | } |
---|
2158 | else |
---|
2159 | { |
---|
2160 | pt1 = topol[prevnode][0]; |
---|
2161 | pt2 = topol[prevnode][1]; |
---|
2162 | if( *pt1 > *pt2 ) |
---|
2163 | { |
---|
2164 | pt11 = pt2; |
---|
2165 | pt22 = pt1; |
---|
2166 | } |
---|
2167 | else |
---|
2168 | { |
---|
2169 | pt11 = pt1; |
---|
2170 | pt22 = pt2; |
---|
2171 | } |
---|
2172 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
2173 | *intpt++ = *intpt2++; |
---|
2174 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
2175 | *intpt++ = *intpt2++; |
---|
2176 | *intpt = -1; |
---|
2177 | } |
---|
2178 | |
---|
2179 | prevnode = hist[jm]; |
---|
2180 | nmemjm = nmemar[jm]; |
---|
2181 | // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); |
---|
2182 | intpt = topol[k][1]; |
---|
2183 | if( prevnode == -1 ) |
---|
2184 | { |
---|
2185 | *intpt++ = jm; |
---|
2186 | *intpt = -1; |
---|
2187 | } |
---|
2188 | else |
---|
2189 | { |
---|
2190 | pt1 = topol[prevnode][0]; |
---|
2191 | pt2 = topol[prevnode][1]; |
---|
2192 | if( *pt1 > *pt2 ) |
---|
2193 | { |
---|
2194 | pt11 = pt2; |
---|
2195 | pt22 = pt1; |
---|
2196 | } |
---|
2197 | else |
---|
2198 | { |
---|
2199 | pt11 = pt1; |
---|
2200 | pt22 = pt2; |
---|
2201 | } |
---|
2202 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
2203 | *intpt++ = *intpt2++; |
---|
2204 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
2205 | *intpt++ = *intpt2++; |
---|
2206 | *intpt = -1; |
---|
2207 | } |
---|
2208 | |
---|
2209 | minscore *= 0.5; |
---|
2210 | |
---|
2211 | len[k][0] = ( minscore - tmptmplen[im] ); |
---|
2212 | len[k][1] = ( minscore - tmptmplen[jm] ); |
---|
2213 | |
---|
2214 | |
---|
2215 | tmptmplen[im] = minscore; |
---|
2216 | |
---|
2217 | hist[im] = k; |
---|
2218 | nmemar[im] = nmemim + nmemjm; |
---|
2219 | |
---|
2220 | mindisfrom[im] = 999.9; |
---|
2221 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
2222 | { |
---|
2223 | i = acpti->pos; |
---|
2224 | if( i != im && i != jm ) |
---|
2225 | { |
---|
2226 | if( i < im ) |
---|
2227 | { |
---|
2228 | miniim = i; |
---|
2229 | maxiim = im; |
---|
2230 | minijm = i; |
---|
2231 | maxijm = jm; |
---|
2232 | } |
---|
2233 | else if( i < jm ) |
---|
2234 | { |
---|
2235 | miniim = im; |
---|
2236 | maxiim = i; |
---|
2237 | minijm = i; |
---|
2238 | maxijm = jm; |
---|
2239 | } |
---|
2240 | else |
---|
2241 | { |
---|
2242 | miniim = im; |
---|
2243 | maxiim = i; |
---|
2244 | minijm = jm; |
---|
2245 | maxijm = i; |
---|
2246 | } |
---|
2247 | eff0 = eff[miniim][maxiim]; |
---|
2248 | eff1 = eff[minijm][maxijm]; |
---|
2249 | #if 0 |
---|
2250 | tmpfloat = eff[miniim][maxiim] = |
---|
2251 | MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; |
---|
2252 | #else |
---|
2253 | tmpfloat = eff[miniim][maxiim] = |
---|
2254 | (clusterfuncpt[0])( eff0, eff1 ); |
---|
2255 | #endif |
---|
2256 | if( tmpfloat < mindisfrom[i] ) |
---|
2257 | { |
---|
2258 | mindisfrom[i] = tmpfloat; |
---|
2259 | nearest[i] = im; |
---|
2260 | } |
---|
2261 | if( tmpfloat < mindisfrom[im] ) |
---|
2262 | { |
---|
2263 | mindisfrom[im] = tmpfloat; |
---|
2264 | nearest[im] = i; |
---|
2265 | } |
---|
2266 | if( nearest[i] == jm ) |
---|
2267 | { |
---|
2268 | nearest[i] = im; |
---|
2269 | } |
---|
2270 | } |
---|
2271 | } |
---|
2272 | #if 0 |
---|
2273 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
2274 | strcpy( tree[im], treetmp ); |
---|
2275 | #else |
---|
2276 | treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo |
---|
2277 | if( !treetmp ) |
---|
2278 | { |
---|
2279 | fprintf( stderr, "Cannot allocate treetmp\n" ); |
---|
2280 | exit( 1 ); |
---|
2281 | } |
---|
2282 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
2283 | free( tree[im] ); |
---|
2284 | free( tree[jm] ); |
---|
2285 | tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); |
---|
2286 | tree[jm] = NULL; |
---|
2287 | if( tree[im] == NULL ) |
---|
2288 | { |
---|
2289 | fprintf( stderr, "Cannot reallocate tree!\n" ); |
---|
2290 | exit( 1 ); |
---|
2291 | } |
---|
2292 | strcpy( tree[im], treetmp ); |
---|
2293 | #endif |
---|
2294 | |
---|
2295 | acjmprev = ac[jm].prev; |
---|
2296 | acjmnext = ac[jm].next; |
---|
2297 | acjmprev->next = acjmnext; |
---|
2298 | if( acjmnext != NULL ) |
---|
2299 | acjmnext->prev = acjmprev; |
---|
2300 | // free( (void *)eff[jm] ); eff[jm] = NULL; |
---|
2301 | |
---|
2302 | #if 1 // muscle seems to miss this. |
---|
2303 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
2304 | { |
---|
2305 | i = acpti->pos; |
---|
2306 | if( nearest[i] == im ) |
---|
2307 | { |
---|
2308 | // fprintf( stderr, "calling setnearest\n" ); |
---|
2309 | setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); |
---|
2310 | } |
---|
2311 | } |
---|
2312 | #endif |
---|
2313 | |
---|
2314 | |
---|
2315 | #if 0 |
---|
2316 | fprintf( stdout, "\nvSTEP-%03d:\n", k+1 ); |
---|
2317 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
2318 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); |
---|
2319 | fprintf( stdout, "\n" ); |
---|
2320 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
2321 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); |
---|
2322 | fprintf( stdout, "\n" ); |
---|
2323 | #endif |
---|
2324 | } |
---|
2325 | fp = fopen( "infile.tree", "w" ); |
---|
2326 | fprintf( fp, "%s\n", treetmp ); |
---|
2327 | fclose( fp ); |
---|
2328 | #if 0 |
---|
2329 | FreeCharMtx( tree ); |
---|
2330 | #else |
---|
2331 | free( tree[0] ); |
---|
2332 | free( tree ); |
---|
2333 | #endif |
---|
2334 | free( treetmp ); |
---|
2335 | free( nametmp ); |
---|
2336 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
2337 | free( hist ); hist = NULL; |
---|
2338 | free( (char *)ac ); ac = NULL; |
---|
2339 | free( (void *)nmemar ); nmemar = NULL; |
---|
2340 | free( mindisfrom ); |
---|
2341 | free( nearest ); |
---|
2342 | } |
---|
2343 | |
---|
2344 | void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups ) |
---|
2345 | { |
---|
2346 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
2347 | int *intpt, *intpt2; |
---|
2348 | double tmpfloat; |
---|
2349 | double eff1, eff0; |
---|
2350 | static double *tmptmplen = NULL; |
---|
2351 | static int *hist = NULL; |
---|
2352 | static Bchain *ac = NULL; |
---|
2353 | int im = -1, jm = -1; |
---|
2354 | Bchain *acjmnext, *acjmprev; |
---|
2355 | int prevnode; |
---|
2356 | Bchain *acpti, *acptj; |
---|
2357 | int *pt1, *pt2, *pt11, *pt22; |
---|
2358 | static int *nmemar; |
---|
2359 | int nmemim, nmemjm; |
---|
2360 | double minscore; |
---|
2361 | int *nearest = NULL; // by D.Mathog, a guess |
---|
2362 | double *mindisfrom = NULL; // by D.Mathog, a guess |
---|
2363 | static char **tree; |
---|
2364 | static char *treetmp; |
---|
2365 | static char *nametmp, *nameptr, *tmpptr; |
---|
2366 | FILE *fp; |
---|
2367 | double (*clusterfuncpt[1])(double,double); |
---|
2368 | char namec; |
---|
2369 | int *testtopol, **inconsistent; |
---|
2370 | int **inconsistentpairlist; |
---|
2371 | int ninconsistentpairs; |
---|
2372 | int *warned; |
---|
2373 | int allinconsistent; |
---|
2374 | int firsttime; |
---|
2375 | |
---|
2376 | |
---|
2377 | sueff1_double = 1 - SUEFF; |
---|
2378 | sueff05_double = SUEFF * 0.5; |
---|
2379 | if ( treemethod == 'X' ) |
---|
2380 | clusterfuncpt[0] = cluster_mix_double; |
---|
2381 | else if ( treemethod == 'E' ) |
---|
2382 | clusterfuncpt[0] = cluster_average_double; |
---|
2383 | else if ( treemethod == 'q' ) |
---|
2384 | clusterfuncpt[0] = cluster_minimum_double; |
---|
2385 | else |
---|
2386 | { |
---|
2387 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
2388 | exit( 1 ); |
---|
2389 | } |
---|
2390 | |
---|
2391 | |
---|
2392 | |
---|
2393 | |
---|
2394 | |
---|
2395 | #if 0 |
---|
2396 | if( !hist ) |
---|
2397 | { |
---|
2398 | hist = AllocateIntVec( njob ); |
---|
2399 | tmptmplen = AllocateDoubleVec( njob ); |
---|
2400 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
2401 | nmemar = AllocateIntVec( njob ); |
---|
2402 | mindisfrom = AllocateDoubleVec( njob ); |
---|
2403 | nearest = AllocateIntVec( njob ); |
---|
2404 | treetmp = AllocateCharVec( njob*150 ); |
---|
2405 | nametmp = AllocateCharVec( 91 ); |
---|
2406 | tree = AllocateCharMtx( njob, njob*150 ); |
---|
2407 | } |
---|
2408 | for( i=0; i<nseq; i++ ) |
---|
2409 | { |
---|
2410 | for( j=0; j<90; j++ ) nametmp[j] = 0; |
---|
2411 | for( j=0; j<90; j++ ) |
---|
2412 | { |
---|
2413 | if( name[i][j] == 0 ) |
---|
2414 | break; |
---|
2415 | else if( isalnum( name[i][j] ) ) |
---|
2416 | nametmp[j] = name[i][j]; |
---|
2417 | else |
---|
2418 | nametmp[j] = '_'; |
---|
2419 | } |
---|
2420 | nametmp[90] = 0; |
---|
2421 | // sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 ); |
---|
2422 | if( outnumber ) |
---|
2423 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
2424 | else |
---|
2425 | nameptr = nametmp + 1; |
---|
2426 | |
---|
2427 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
2428 | |
---|
2429 | sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr ); |
---|
2430 | } |
---|
2431 | |
---|
2432 | #else |
---|
2433 | |
---|
2434 | if( !hist ) |
---|
2435 | { |
---|
2436 | hist = AllocateIntVec( njob ); |
---|
2437 | tmptmplen = AllocateDoubleVec( njob ); |
---|
2438 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
2439 | nmemar = AllocateIntVec( njob ); |
---|
2440 | mindisfrom = AllocateDoubleVec( njob ); |
---|
2441 | nearest = AllocateIntVec( njob ); |
---|
2442 | // treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi? |
---|
2443 | treetmp = NULL; // kentou 2013/06/12 |
---|
2444 | nametmp = AllocateCharVec( 1000 ); // nagasugi |
---|
2445 | // tree = AllocateCharMtx( njob, njob*600 ); |
---|
2446 | tree = AllocateCharMtx( njob, 0 ); |
---|
2447 | testtopol = AllocateIntVec( njob + 1 ); |
---|
2448 | inconsistent = AllocateIntMtx( njob, njob ); // muda |
---|
2449 | inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda |
---|
2450 | warned = AllocateIntVec( ngroup ); |
---|
2451 | } |
---|
2452 | |
---|
2453 | |
---|
2454 | for( i=0; i<nseq; i++ ) |
---|
2455 | { |
---|
2456 | for( j=0; j<999; j++ ) nametmp[j] = 0; |
---|
2457 | for( j=0; j<999; j++ ) |
---|
2458 | { |
---|
2459 | namec = name[i][j]; |
---|
2460 | if( namec == 0 ) |
---|
2461 | break; |
---|
2462 | else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' ) |
---|
2463 | nametmp[j] = namec; |
---|
2464 | else |
---|
2465 | nametmp[j] = '_'; |
---|
2466 | } |
---|
2467 | nametmp[j] = 0; |
---|
2468 | // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); |
---|
2469 | if( outnumber ) |
---|
2470 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
2471 | else |
---|
2472 | nameptr = nametmp + 1; |
---|
2473 | |
---|
2474 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
2475 | |
---|
2476 | tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 |
---|
2477 | if( tree[i] == NULL ) |
---|
2478 | { |
---|
2479 | fprintf( stderr, "Cannot allocate tree!\n" ); |
---|
2480 | exit( 1 ); |
---|
2481 | } |
---|
2482 | sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); |
---|
2483 | } |
---|
2484 | |
---|
2485 | #endif |
---|
2486 | |
---|
2487 | |
---|
2488 | |
---|
2489 | |
---|
2490 | |
---|
2491 | |
---|
2492 | |
---|
2493 | |
---|
2494 | for( i=0; i<nseq; i++ ) |
---|
2495 | { |
---|
2496 | ac[i].next = ac+i+1; |
---|
2497 | ac[i].prev = ac+i-1; |
---|
2498 | ac[i].pos = i; |
---|
2499 | } |
---|
2500 | ac[nseq-1].next = NULL; |
---|
2501 | |
---|
2502 | for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle |
---|
2503 | |
---|
2504 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
2505 | for( i=0; i<nseq; i++ ) |
---|
2506 | { |
---|
2507 | hist[i] = -1; |
---|
2508 | nmemar[i] = 1; |
---|
2509 | } |
---|
2510 | |
---|
2511 | fprintf( stderr, "\n" ); |
---|
2512 | ninconsistentpairs = 0; |
---|
2513 | for( k=0; k<nseq-1; k++ ) |
---|
2514 | { |
---|
2515 | if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq ); |
---|
2516 | |
---|
2517 | |
---|
2518 | |
---|
2519 | // for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; |
---|
2520 | for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0; |
---|
2521 | ninconsistentpairs = 0; |
---|
2522 | firsttime = 1; |
---|
2523 | while( 1 ) |
---|
2524 | { |
---|
2525 | if( firsttime ) |
---|
2526 | { |
---|
2527 | firsttime = 0; |
---|
2528 | minscore = 999.9; |
---|
2529 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
2530 | { |
---|
2531 | i = acpti->pos; |
---|
2532 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
2533 | if( mindisfrom[i] < minscore ) // muscle |
---|
2534 | { |
---|
2535 | im = i; |
---|
2536 | minscore = mindisfrom[i]; |
---|
2537 | } |
---|
2538 | } |
---|
2539 | jm = nearest[im]; |
---|
2540 | if( jm < im ) |
---|
2541 | { |
---|
2542 | j=jm; jm=im; im=j; |
---|
2543 | } |
---|
2544 | } |
---|
2545 | else |
---|
2546 | { |
---|
2547 | minscore = 999.9; |
---|
2548 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
2549 | { |
---|
2550 | i = acpti->pos; |
---|
2551 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
2552 | for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) |
---|
2553 | { |
---|
2554 | j = acptj->pos; |
---|
2555 | if( !inconsistent[i][j] && (tmpfloat=eff[i][j]) < minscore ) |
---|
2556 | { |
---|
2557 | minscore = tmpfloat; |
---|
2558 | im = i; jm = j; |
---|
2559 | } |
---|
2560 | } |
---|
2561 | for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next ) |
---|
2562 | { |
---|
2563 | j = acptj->pos; |
---|
2564 | if( !inconsistent[j][i] && (tmpfloat=eff[j][i]) < minscore ) |
---|
2565 | { |
---|
2566 | minscore = tmpfloat; |
---|
2567 | im = j; jm = i; |
---|
2568 | } |
---|
2569 | } |
---|
2570 | } |
---|
2571 | } |
---|
2572 | |
---|
2573 | allinconsistent = 1; |
---|
2574 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
2575 | { |
---|
2576 | for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) |
---|
2577 | { |
---|
2578 | if( inconsistent[acpti->pos][acptj->pos] == 0 ) |
---|
2579 | { |
---|
2580 | allinconsistent = 0; |
---|
2581 | goto exitloop_d; |
---|
2582 | } |
---|
2583 | } |
---|
2584 | } |
---|
2585 | exitloop_d: |
---|
2586 | |
---|
2587 | if( allinconsistent ) |
---|
2588 | { |
---|
2589 | fprintf( stderr, "\n\n\nPlease check whether the grouping is possible.\n\n\n" ); |
---|
2590 | exit( 1 ); |
---|
2591 | } |
---|
2592 | #if 1 |
---|
2593 | intpt = testtopol; |
---|
2594 | prevnode = hist[im]; |
---|
2595 | if( prevnode == -1 ) |
---|
2596 | { |
---|
2597 | *intpt++ = im; |
---|
2598 | } |
---|
2599 | else |
---|
2600 | { |
---|
2601 | for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) |
---|
2602 | *intpt++ = *intpt2++; |
---|
2603 | for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) |
---|
2604 | *intpt++ = *intpt2++; |
---|
2605 | } |
---|
2606 | |
---|
2607 | prevnode = hist[jm]; |
---|
2608 | if( prevnode == -1 ) |
---|
2609 | { |
---|
2610 | *intpt++ = jm; |
---|
2611 | } |
---|
2612 | else |
---|
2613 | { |
---|
2614 | for( intpt2=topol[prevnode][0]; *intpt2!=-1; ) |
---|
2615 | *intpt++ = *intpt2++; |
---|
2616 | for( intpt2=topol[prevnode][1]; *intpt2!=-1; ) |
---|
2617 | *intpt++ = *intpt2++; |
---|
2618 | } |
---|
2619 | *intpt = -1; |
---|
2620 | // fprintf( stderr, "testtopol = \n" ); |
---|
2621 | // for( i=0; testtopol[i]>-1; i++ ) fprintf( stderr, " %03d", testtopol[i]+1 ); |
---|
2622 | // fprintf( stderr, "\n" ); |
---|
2623 | #endif |
---|
2624 | for( i=0; i<ngroup; i++ ) |
---|
2625 | { |
---|
2626 | // fprintf( stderr, "groups[%d] = \n", i ); |
---|
2627 | // for( j=0; groups[i][j]>-1; j++ ) fprintf( stderr, " %03d", groups[i][j]+1 ); |
---|
2628 | // fprintf( stderr, "\n" ); |
---|
2629 | if( overlapmember( testtopol, groups[i] ) ) |
---|
2630 | { |
---|
2631 | if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) ) |
---|
2632 | { |
---|
2633 | if( !warned[i] ) |
---|
2634 | { |
---|
2635 | warned[i] = 1; |
---|
2636 | fprintf( stderr, "\n###################################################################\n" ); |
---|
2637 | fprintf( stderr, "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 ); |
---|
2638 | fprintf( stderr, "###################################################################\n" ); |
---|
2639 | } |
---|
2640 | inconsistent[im][jm] = 1; |
---|
2641 | inconsistentpairlist[ninconsistentpairs][0] = im; |
---|
2642 | inconsistentpairlist[ninconsistentpairs][1] = jm; |
---|
2643 | ninconsistentpairs++; |
---|
2644 | break; |
---|
2645 | } |
---|
2646 | } |
---|
2647 | } |
---|
2648 | if( i == ngroup ) |
---|
2649 | { |
---|
2650 | // fprintf( stderr, "OK\n" ); |
---|
2651 | break; |
---|
2652 | } |
---|
2653 | } |
---|
2654 | |
---|
2655 | |
---|
2656 | |
---|
2657 | |
---|
2658 | |
---|
2659 | |
---|
2660 | prevnode = hist[im]; |
---|
2661 | nmemim = nmemar[im]; |
---|
2662 | // intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); |
---|
2663 | intpt = topol[k][0]; |
---|
2664 | if( prevnode == -1 ) |
---|
2665 | { |
---|
2666 | *intpt++ = im; |
---|
2667 | *intpt = -1; |
---|
2668 | } |
---|
2669 | else |
---|
2670 | { |
---|
2671 | pt1 = topol[prevnode][0]; |
---|
2672 | pt2 = topol[prevnode][1]; |
---|
2673 | if( *pt1 > *pt2 ) |
---|
2674 | { |
---|
2675 | pt11 = pt2; |
---|
2676 | pt22 = pt1; |
---|
2677 | } |
---|
2678 | else |
---|
2679 | { |
---|
2680 | pt11 = pt1; |
---|
2681 | pt22 = pt2; |
---|
2682 | } |
---|
2683 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
2684 | *intpt++ = *intpt2++; |
---|
2685 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
2686 | *intpt++ = *intpt2++; |
---|
2687 | *intpt = -1; |
---|
2688 | } |
---|
2689 | |
---|
2690 | prevnode = hist[jm]; |
---|
2691 | nmemjm = nmemar[jm]; |
---|
2692 | // intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); |
---|
2693 | intpt = topol[k][1]; |
---|
2694 | if( prevnode == -1 ) |
---|
2695 | { |
---|
2696 | *intpt++ = jm; |
---|
2697 | *intpt = -1; |
---|
2698 | } |
---|
2699 | else |
---|
2700 | { |
---|
2701 | pt1 = topol[prevnode][0]; |
---|
2702 | pt2 = topol[prevnode][1]; |
---|
2703 | if( *pt1 > *pt2 ) |
---|
2704 | { |
---|
2705 | pt11 = pt2; |
---|
2706 | pt22 = pt1; |
---|
2707 | } |
---|
2708 | else |
---|
2709 | { |
---|
2710 | pt11 = pt1; |
---|
2711 | pt22 = pt2; |
---|
2712 | } |
---|
2713 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
2714 | *intpt++ = *intpt2++; |
---|
2715 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
2716 | *intpt++ = *intpt2++; |
---|
2717 | *intpt = -1; |
---|
2718 | } |
---|
2719 | |
---|
2720 | minscore *= 0.5; |
---|
2721 | |
---|
2722 | len[k][0] = ( minscore - tmptmplen[im] ); |
---|
2723 | len[k][1] = ( minscore - tmptmplen[jm] ); |
---|
2724 | |
---|
2725 | |
---|
2726 | tmptmplen[im] = minscore; |
---|
2727 | |
---|
2728 | hist[im] = k; |
---|
2729 | nmemar[im] = nmemim + nmemjm; |
---|
2730 | |
---|
2731 | mindisfrom[im] = 999.9; |
---|
2732 | eff[im][jm] = 999.9; |
---|
2733 | // eff[im][jm-im] = 999.9; // bug?? |
---|
2734 | |
---|
2735 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
2736 | { |
---|
2737 | i = acpti->pos; |
---|
2738 | if( i != im && i != jm ) |
---|
2739 | { |
---|
2740 | if( i < im ) |
---|
2741 | { |
---|
2742 | miniim = i; |
---|
2743 | maxiim = im; |
---|
2744 | minijm = i; |
---|
2745 | maxijm = jm; |
---|
2746 | } |
---|
2747 | else if( i < jm ) |
---|
2748 | { |
---|
2749 | miniim = im; |
---|
2750 | maxiim = i; |
---|
2751 | minijm = i; |
---|
2752 | maxijm = jm; |
---|
2753 | } |
---|
2754 | else |
---|
2755 | { |
---|
2756 | miniim = im; |
---|
2757 | maxiim = i; |
---|
2758 | minijm = jm; |
---|
2759 | maxijm = i; |
---|
2760 | } |
---|
2761 | eff0 = eff[miniim][maxiim]; |
---|
2762 | eff1 = eff[minijm][maxijm]; |
---|
2763 | #if 0 |
---|
2764 | tmpfloat = eff[miniim][maxiim] = |
---|
2765 | MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; |
---|
2766 | #else |
---|
2767 | tmpfloat = eff[miniim][maxiim] = |
---|
2768 | (clusterfuncpt[0])( eff0, eff1 ); |
---|
2769 | #endif |
---|
2770 | |
---|
2771 | #if 1 |
---|
2772 | if( tmpfloat < mindisfrom[i] ) |
---|
2773 | { |
---|
2774 | mindisfrom[i] = tmpfloat; |
---|
2775 | nearest[i] = im; |
---|
2776 | } |
---|
2777 | if( tmpfloat < mindisfrom[im] ) |
---|
2778 | { |
---|
2779 | mindisfrom[im] = tmpfloat; |
---|
2780 | nearest[im] = i; |
---|
2781 | } |
---|
2782 | if( nearest[i] == jm ) |
---|
2783 | { |
---|
2784 | nearest[i] = im; |
---|
2785 | } |
---|
2786 | #endif |
---|
2787 | } |
---|
2788 | } |
---|
2789 | #if 0 |
---|
2790 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
2791 | strcpy( tree[im], treetmp ); |
---|
2792 | #else |
---|
2793 | treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo |
---|
2794 | if( !treetmp ) |
---|
2795 | { |
---|
2796 | fprintf( stderr, "Cannot allocate treetmp\n" ); |
---|
2797 | exit( 1 ); |
---|
2798 | } |
---|
2799 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
2800 | free( tree[im] ); |
---|
2801 | free( tree[jm] ); |
---|
2802 | tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); |
---|
2803 | tree[jm] = NULL; |
---|
2804 | if( tree[im] == NULL ) |
---|
2805 | { |
---|
2806 | fprintf( stderr, "Cannot reallocate tree!\n" ); |
---|
2807 | exit( 1 ); |
---|
2808 | } |
---|
2809 | strcpy( tree[im], treetmp ); |
---|
2810 | #endif |
---|
2811 | |
---|
2812 | acjmprev = ac[jm].prev; |
---|
2813 | acjmnext = ac[jm].next; |
---|
2814 | acjmprev->next = acjmnext; |
---|
2815 | if( acjmnext != NULL ) |
---|
2816 | acjmnext->prev = acjmprev; |
---|
2817 | // free( (void *)eff[jm] ); eff[jm] = NULL; |
---|
2818 | |
---|
2819 | #if 1 // muscle seems to miss this. |
---|
2820 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
2821 | { |
---|
2822 | i = acpti->pos; |
---|
2823 | if( nearest[i] == im ) |
---|
2824 | { |
---|
2825 | // fprintf( stderr, "calling setnearest\n" ); |
---|
2826 | setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); |
---|
2827 | } |
---|
2828 | } |
---|
2829 | #endif |
---|
2830 | |
---|
2831 | |
---|
2832 | #if 0 |
---|
2833 | fprintf( stdout, "\ncSTEP-%03d:\n", k+1 ); |
---|
2834 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
2835 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); |
---|
2836 | fprintf( stdout, "\n" ); |
---|
2837 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
2838 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); |
---|
2839 | fprintf( stdout, "\n" ); |
---|
2840 | #endif |
---|
2841 | } |
---|
2842 | fp = fopen( "infile.tree", "w" ); |
---|
2843 | fprintf( fp, "%s\n", treetmp ); |
---|
2844 | fclose( fp ); |
---|
2845 | #if 0 |
---|
2846 | FreeCharMtx( tree ); |
---|
2847 | #else |
---|
2848 | free( tree[0] ); |
---|
2849 | free( tree ); |
---|
2850 | #endif |
---|
2851 | free( treetmp ); |
---|
2852 | free( nametmp ); |
---|
2853 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
2854 | free( hist ); hist = NULL; |
---|
2855 | free( (char *)ac ); ac = NULL; |
---|
2856 | free( (void *)nmemar ); nmemar = NULL; |
---|
2857 | free( mindisfrom ); |
---|
2858 | free( nearest ); |
---|
2859 | free( testtopol ); |
---|
2860 | FreeIntMtx( inconsistent ); |
---|
2861 | FreeIntMtx( inconsistentpairlist ); |
---|
2862 | free( warned ); |
---|
2863 | } |
---|
2864 | |
---|
2865 | void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *dep, int progressout ) |
---|
2866 | { |
---|
2867 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
2868 | int *intpt, *intpt2; |
---|
2869 | float tmpfloat; |
---|
2870 | float eff1, eff0; |
---|
2871 | float *tmptmplen = NULL; // static TLS -> local, 2012/02/25 |
---|
2872 | int *hist = NULL; // static TLS -> local, 2012/02/25 |
---|
2873 | Bchain *ac = NULL; // static TLS -> local, 2012/02/25 |
---|
2874 | int im = -1, jm = -1; |
---|
2875 | Bchain *acjmnext, *acjmprev; |
---|
2876 | int prevnode; |
---|
2877 | Bchain *acpti; |
---|
2878 | int *pt1, *pt2, *pt11, *pt22; |
---|
2879 | int *nmemar; // static TLS -> local, 2012/02/25 |
---|
2880 | int nmemim, nmemjm; |
---|
2881 | float minscore; |
---|
2882 | // float sueff1 = 1 - SUEFF; |
---|
2883 | // float sueff05 = SUEFF * 0.5; |
---|
2884 | int *nearest = NULL; // by Mathog, a guess |
---|
2885 | float *mindisfrom = NULL; // by Mathog, a guess |
---|
2886 | float (*clusterfuncpt[1])(float,float); |
---|
2887 | |
---|
2888 | |
---|
2889 | sueff1 = 1 - SUEFF; |
---|
2890 | sueff05 = SUEFF * 0.5; |
---|
2891 | if ( treemethod == 'X' ) |
---|
2892 | clusterfuncpt[0] = cluster_mix_float; |
---|
2893 | else if ( treemethod == 'E' ) |
---|
2894 | clusterfuncpt[0] = cluster_average_float; |
---|
2895 | else if ( treemethod == 'q' ) |
---|
2896 | clusterfuncpt[0] = cluster_minimum_float; |
---|
2897 | else |
---|
2898 | { |
---|
2899 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
2900 | exit( 1 ); |
---|
2901 | } |
---|
2902 | |
---|
2903 | if( !hist ) |
---|
2904 | { |
---|
2905 | hist = AllocateIntVec( njob ); |
---|
2906 | tmptmplen = AllocateFloatVec( njob ); |
---|
2907 | ac = (Bchain *)malloc( njob * sizeof( Bchain ) ); |
---|
2908 | nmemar = AllocateIntVec( njob ); |
---|
2909 | mindisfrom = AllocateFloatVec( njob ); |
---|
2910 | nearest = AllocateIntVec( njob ); |
---|
2911 | } |
---|
2912 | |
---|
2913 | |
---|
2914 | for( i=0; i<nseq; i++ ) |
---|
2915 | { |
---|
2916 | ac[i].next = ac+i+1; |
---|
2917 | ac[i].prev = ac+i-1; |
---|
2918 | ac[i].pos = i; |
---|
2919 | } |
---|
2920 | ac[nseq-1].next = NULL; |
---|
2921 | |
---|
2922 | for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle |
---|
2923 | |
---|
2924 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
2925 | for( i=0; i<nseq; i++ ) |
---|
2926 | { |
---|
2927 | hist[i] = -1; |
---|
2928 | nmemar[i] = 1; |
---|
2929 | } |
---|
2930 | |
---|
2931 | if( progressout ) fprintf( stderr, "\n" ); |
---|
2932 | for( k=0; k<nseq-1; k++ ) |
---|
2933 | { |
---|
2934 | if( progressout && k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq ); |
---|
2935 | |
---|
2936 | minscore = 999.9; |
---|
2937 | for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) |
---|
2938 | { |
---|
2939 | i = acpti->pos; |
---|
2940 | // fprintf( stderr, "k=%d i=%d\n", k, i ); |
---|
2941 | if( mindisfrom[i] < minscore ) // muscle |
---|
2942 | { |
---|
2943 | im = i; |
---|
2944 | minscore = mindisfrom[i]; |
---|
2945 | } |
---|
2946 | } |
---|
2947 | jm = nearest[im]; |
---|
2948 | if( jm < im ) |
---|
2949 | { |
---|
2950 | j=jm; jm=im; im=j; |
---|
2951 | } |
---|
2952 | |
---|
2953 | |
---|
2954 | prevnode = hist[im]; |
---|
2955 | if( dep ) dep[k].child0 = prevnode; |
---|
2956 | nmemim = nmemar[im]; |
---|
2957 | intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); |
---|
2958 | if( prevnode == -1 ) |
---|
2959 | { |
---|
2960 | *intpt++ = im; |
---|
2961 | *intpt = -1; |
---|
2962 | } |
---|
2963 | else |
---|
2964 | { |
---|
2965 | pt1 = topol[prevnode][0]; |
---|
2966 | pt2 = topol[prevnode][1]; |
---|
2967 | if( *pt1 > *pt2 ) |
---|
2968 | { |
---|
2969 | pt11 = pt2; |
---|
2970 | pt22 = pt1; |
---|
2971 | } |
---|
2972 | else |
---|
2973 | { |
---|
2974 | pt11 = pt1; |
---|
2975 | pt22 = pt2; |
---|
2976 | } |
---|
2977 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
2978 | *intpt++ = *intpt2++; |
---|
2979 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
2980 | *intpt++ = *intpt2++; |
---|
2981 | *intpt = -1; |
---|
2982 | } |
---|
2983 | |
---|
2984 | prevnode = hist[jm]; |
---|
2985 | if( dep ) dep[k].child1 = prevnode; |
---|
2986 | nmemjm = nmemar[jm]; |
---|
2987 | intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); |
---|
2988 | if( !intpt ) |
---|
2989 | { |
---|
2990 | fprintf( stderr, "Cannot reallocate topol\n" ); |
---|
2991 | exit( 1 ); |
---|
2992 | } |
---|
2993 | if( prevnode == -1 ) |
---|
2994 | { |
---|
2995 | *intpt++ = jm; |
---|
2996 | *intpt = -1; |
---|
2997 | } |
---|
2998 | else |
---|
2999 | { |
---|
3000 | pt1 = topol[prevnode][0]; |
---|
3001 | pt2 = topol[prevnode][1]; |
---|
3002 | if( *pt1 > *pt2 ) |
---|
3003 | { |
---|
3004 | pt11 = pt2; |
---|
3005 | pt22 = pt1; |
---|
3006 | } |
---|
3007 | else |
---|
3008 | { |
---|
3009 | pt11 = pt1; |
---|
3010 | pt22 = pt2; |
---|
3011 | } |
---|
3012 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3013 | *intpt++ = *intpt2++; |
---|
3014 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3015 | *intpt++ = *intpt2++; |
---|
3016 | *intpt = -1; |
---|
3017 | } |
---|
3018 | |
---|
3019 | minscore *= 0.5; |
---|
3020 | |
---|
3021 | len[k][0] = ( minscore - tmptmplen[im] ); |
---|
3022 | len[k][1] = ( minscore - tmptmplen[jm] ); |
---|
3023 | |
---|
3024 | if( dep ) dep[k].distfromtip = minscore; |
---|
3025 | |
---|
3026 | tmptmplen[im] = minscore; |
---|
3027 | |
---|
3028 | hist[im] = k; |
---|
3029 | nmemar[im] = nmemim + nmemjm; |
---|
3030 | |
---|
3031 | mindisfrom[im] = 999.9; |
---|
3032 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
3033 | { |
---|
3034 | i = acpti->pos; |
---|
3035 | if( i != im && i != jm ) |
---|
3036 | { |
---|
3037 | if( i < im ) |
---|
3038 | { |
---|
3039 | miniim = i; |
---|
3040 | maxiim = im; |
---|
3041 | minijm = i; |
---|
3042 | maxijm = jm; |
---|
3043 | } |
---|
3044 | else if( i < jm ) |
---|
3045 | { |
---|
3046 | miniim = im; |
---|
3047 | maxiim = i; |
---|
3048 | minijm = i; |
---|
3049 | maxijm = jm; |
---|
3050 | } |
---|
3051 | else |
---|
3052 | { |
---|
3053 | miniim = im; |
---|
3054 | maxiim = i; |
---|
3055 | minijm = jm; |
---|
3056 | maxijm = i; |
---|
3057 | } |
---|
3058 | eff0 = eff[miniim][maxiim-miniim]; |
---|
3059 | eff1 = eff[minijm][maxijm-minijm]; |
---|
3060 | tmpfloat = eff[miniim][maxiim-miniim] = |
---|
3061 | #if 0 |
---|
3062 | MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05; |
---|
3063 | #else |
---|
3064 | (clusterfuncpt[0])( eff0, eff1 ); |
---|
3065 | #endif |
---|
3066 | if( tmpfloat < mindisfrom[i] ) |
---|
3067 | { |
---|
3068 | mindisfrom[i] = tmpfloat; |
---|
3069 | nearest[i] = im; |
---|
3070 | } |
---|
3071 | if( tmpfloat < mindisfrom[im] ) |
---|
3072 | { |
---|
3073 | mindisfrom[im] = tmpfloat; |
---|
3074 | nearest[im] = i; |
---|
3075 | } |
---|
3076 | if( nearest[i] == jm ) |
---|
3077 | { |
---|
3078 | nearest[i] = im; |
---|
3079 | } |
---|
3080 | } |
---|
3081 | } |
---|
3082 | |
---|
3083 | // fprintf( stderr, "im,jm=%d,%d\n", im, jm ); |
---|
3084 | acjmprev = ac[jm].prev; |
---|
3085 | acjmnext = ac[jm].next; |
---|
3086 | acjmprev->next = acjmnext; |
---|
3087 | if( acjmnext != NULL ) |
---|
3088 | acjmnext->prev = acjmprev; |
---|
3089 | free( (void *)eff[jm] ); eff[jm] = NULL; |
---|
3090 | |
---|
3091 | #if 1 // muscle seems to miss this. |
---|
3092 | for( acpti=ac; acpti!=NULL; acpti=acpti->next ) |
---|
3093 | { |
---|
3094 | i = acpti->pos; |
---|
3095 | if( nearest[i] == im ) |
---|
3096 | { |
---|
3097 | // fprintf( stderr, "calling setnearest\n" ); |
---|
3098 | setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); |
---|
3099 | } |
---|
3100 | } |
---|
3101 | #endif |
---|
3102 | |
---|
3103 | |
---|
3104 | #if 0 |
---|
3105 | fprintf( stdout, "vSTEP-%03d:\n", k+1 ); |
---|
3106 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
3107 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 ); |
---|
3108 | fprintf( stdout, "\n" ); |
---|
3109 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
3110 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 ); |
---|
3111 | fprintf( stdout, "\n" ); |
---|
3112 | #endif |
---|
3113 | } |
---|
3114 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
3115 | free( hist ); hist = NULL; |
---|
3116 | free( (char *)ac ); ac = NULL; |
---|
3117 | free( (void *)nmemar ); nmemar = NULL; |
---|
3118 | free( mindisfrom ); |
---|
3119 | free( nearest ); |
---|
3120 | } |
---|
3121 | |
---|
3122 | |
---|
3123 | |
---|
3124 | |
---|
3125 | |
---|
3126 | |
---|
3127 | |
---|
3128 | |
---|
3129 | |
---|
3130 | void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name ) |
---|
3131 | { |
---|
3132 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
3133 | int *intpt, *intpt2; |
---|
3134 | double eff1, eff0; |
---|
3135 | int *hist = NULL; |
---|
3136 | Achain *ac = NULL; |
---|
3137 | double minscore; |
---|
3138 | char **tree; |
---|
3139 | char *treetmp; |
---|
3140 | int im = -1, jm = -1; |
---|
3141 | int prevnode, acjmnext, acjmprev; |
---|
3142 | int *pt1, *pt2, *pt11, *pt22; |
---|
3143 | FILE *fp; |
---|
3144 | int node[2]; |
---|
3145 | float lenfl[2]; |
---|
3146 | char *nametmp, *nameptr, *tmpptr; //static? |
---|
3147 | char namec; |
---|
3148 | |
---|
3149 | fp = fopen( "_guidetree", "r" ); |
---|
3150 | if( !fp ) |
---|
3151 | { |
---|
3152 | fprintf( stderr, "cannot open _guidetree\n" ); |
---|
3153 | exit( 1 ); |
---|
3154 | } |
---|
3155 | |
---|
3156 | |
---|
3157 | if( !hist ) |
---|
3158 | { |
---|
3159 | // treetmp = AllocateCharVec( njob*50 ); |
---|
3160 | treetmp = NULL; |
---|
3161 | // tree = AllocateCharMtx( njob, njob*50 ); |
---|
3162 | tree = AllocateCharMtx( njob, 0 ); |
---|
3163 | nametmp = AllocateCharVec( 1000 ); // nagasugi |
---|
3164 | hist = AllocateIntVec( njob ); |
---|
3165 | ac = (Achain *)malloc( njob * sizeof( Achain ) ); |
---|
3166 | } |
---|
3167 | |
---|
3168 | for( i=0; i<nseq; i++ ) |
---|
3169 | { |
---|
3170 | for( j=0; j<999; j++ ) nametmp[j] = 0; |
---|
3171 | for( j=0; j<999; j++ ) |
---|
3172 | { |
---|
3173 | namec = name[i][j]; |
---|
3174 | if( namec == 0 ) |
---|
3175 | break; |
---|
3176 | else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' ) |
---|
3177 | nametmp[j] = namec; |
---|
3178 | else |
---|
3179 | nametmp[j] = '_'; |
---|
3180 | } |
---|
3181 | nametmp[j] = 0; |
---|
3182 | // sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 ); |
---|
3183 | if( outnumber ) |
---|
3184 | nameptr = strstr( nametmp, "_numo_e" ) + 8; |
---|
3185 | else |
---|
3186 | nameptr = nametmp + 1; |
---|
3187 | |
---|
3188 | if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame |
---|
3189 | |
---|
3190 | tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100 |
---|
3191 | if( tree[i] == NULL ) |
---|
3192 | { |
---|
3193 | fprintf( stderr, "Cannot allocate tree!\n" ); |
---|
3194 | exit( 1 ); |
---|
3195 | } |
---|
3196 | sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr ); |
---|
3197 | } |
---|
3198 | |
---|
3199 | for( i=0; i<nseq; i++ ) |
---|
3200 | { |
---|
3201 | ac[i].next = i+1; |
---|
3202 | ac[i].prev = i-1; |
---|
3203 | // ac[i].curr = i; |
---|
3204 | } |
---|
3205 | ac[nseq-1].next = -1; |
---|
3206 | |
---|
3207 | for( i=0; i<nseq; i++ ) hist[i] = -1; |
---|
3208 | |
---|
3209 | fprintf( stderr, "\n" ); |
---|
3210 | for( k=0; k<nseq-1; k++ ) |
---|
3211 | { |
---|
3212 | if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq ); |
---|
3213 | |
---|
3214 | #if 0 |
---|
3215 | minscore = 99999.9; |
---|
3216 | for( i=0; ac[i].next!=-1; i=ac[i].next ) |
---|
3217 | { |
---|
3218 | for( j=ac[i].next; j!=-1; j=ac[j].next ) |
---|
3219 | { |
---|
3220 | tmpdouble = eff[i][j]; |
---|
3221 | if( tmpdouble < minscore ) |
---|
3222 | { |
---|
3223 | minscore = tmpdouble; |
---|
3224 | im = i; jm = j; |
---|
3225 | } |
---|
3226 | } |
---|
3227 | } |
---|
3228 | #else |
---|
3229 | lenfl[0] = lenfl[1] = -1.0; |
---|
3230 | loadtreeoneline( node, lenfl, fp ); |
---|
3231 | im = node[0]; |
---|
3232 | jm = node[1]; |
---|
3233 | minscore = eff[im][jm]; |
---|
3234 | |
---|
3235 | if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL ) |
---|
3236 | { |
---|
3237 | fprintf( stderr, "\n\nCheck the guide tree.\n" ); |
---|
3238 | fprintf( stderr, "im=%d, jm=%d\n", im+1, jm+1 ); |
---|
3239 | fprintf( stderr, "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" ); |
---|
3240 | exit( 1 ); |
---|
3241 | } |
---|
3242 | |
---|
3243 | |
---|
3244 | // fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore ); |
---|
3245 | |
---|
3246 | |
---|
3247 | if( lenfl[0] == -1.0 || lenfl[1] == -1.0 ) |
---|
3248 | { |
---|
3249 | fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" ); |
---|
3250 | exit( 1 ); |
---|
3251 | } |
---|
3252 | |
---|
3253 | if( lenfl[0] < 0.0 ) lenfl[0] = 0.0; |
---|
3254 | if( lenfl[1] < 0.0 ) lenfl[1] = 0.0; |
---|
3255 | #endif |
---|
3256 | |
---|
3257 | // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); |
---|
3258 | |
---|
3259 | intpt = topol[k][0]; |
---|
3260 | prevnode = hist[im]; |
---|
3261 | if( prevnode == -1 ) |
---|
3262 | { |
---|
3263 | *intpt++ = im; |
---|
3264 | *intpt = -1; |
---|
3265 | } |
---|
3266 | else |
---|
3267 | { |
---|
3268 | pt1 = topol[prevnode][0]; |
---|
3269 | pt2 = topol[prevnode][1]; |
---|
3270 | if( *pt1 > *pt2 ) |
---|
3271 | { |
---|
3272 | pt11 = pt2; |
---|
3273 | pt22 = pt1; |
---|
3274 | } |
---|
3275 | else |
---|
3276 | { |
---|
3277 | pt11 = pt1; |
---|
3278 | pt22 = pt2; |
---|
3279 | } |
---|
3280 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3281 | *intpt++ = *intpt2++; |
---|
3282 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3283 | *intpt++ = *intpt2++; |
---|
3284 | *intpt = -1; |
---|
3285 | } |
---|
3286 | |
---|
3287 | intpt = topol[k][1]; |
---|
3288 | prevnode = hist[jm]; |
---|
3289 | if( prevnode == -1 ) |
---|
3290 | { |
---|
3291 | *intpt++ = jm; |
---|
3292 | *intpt = -1; |
---|
3293 | } |
---|
3294 | else |
---|
3295 | { |
---|
3296 | pt1 = topol[prevnode][0]; |
---|
3297 | pt2 = topol[prevnode][1]; |
---|
3298 | if( *pt1 > *pt2 ) |
---|
3299 | { |
---|
3300 | pt11 = pt2; |
---|
3301 | pt22 = pt1; |
---|
3302 | } |
---|
3303 | else |
---|
3304 | { |
---|
3305 | pt11 = pt1; |
---|
3306 | pt22 = pt2; |
---|
3307 | } |
---|
3308 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3309 | *intpt++ = *intpt2++; |
---|
3310 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3311 | *intpt++ = *intpt2++; |
---|
3312 | *intpt = -1; |
---|
3313 | } |
---|
3314 | |
---|
3315 | minscore *= 0.5; |
---|
3316 | |
---|
3317 | #if 0 |
---|
3318 | len[k][0] = minscore - tmptmplen[im]; |
---|
3319 | len[k][1] = minscore - tmptmplen[jm]; |
---|
3320 | #else |
---|
3321 | len[k][0] = lenfl[0]; |
---|
3322 | len[k][1] = lenfl[1]; |
---|
3323 | #endif |
---|
3324 | |
---|
3325 | |
---|
3326 | hist[im] = k; |
---|
3327 | |
---|
3328 | for( i=0; i!=-1; i=ac[i].next ) |
---|
3329 | { |
---|
3330 | if( i != im && i != jm ) |
---|
3331 | { |
---|
3332 | if( i < im ) |
---|
3333 | { |
---|
3334 | miniim = i; |
---|
3335 | maxiim = im; |
---|
3336 | minijm = i; |
---|
3337 | maxijm = jm; |
---|
3338 | } |
---|
3339 | else if( i < jm ) |
---|
3340 | { |
---|
3341 | miniim = im; |
---|
3342 | maxiim = i; |
---|
3343 | minijm = i; |
---|
3344 | maxijm = jm; |
---|
3345 | } |
---|
3346 | else |
---|
3347 | { |
---|
3348 | miniim = im; |
---|
3349 | maxiim = i; |
---|
3350 | minijm = jm; |
---|
3351 | maxijm = i; |
---|
3352 | } |
---|
3353 | eff0 = eff[miniim][maxiim]; |
---|
3354 | eff1 = eff[minijm][maxijm]; |
---|
3355 | eff[miniim][maxiim] = |
---|
3356 | MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + |
---|
3357 | ( eff0 + eff1 ) * 0.5 * SUEFF; |
---|
3358 | } |
---|
3359 | } |
---|
3360 | acjmprev = ac[jm].prev; |
---|
3361 | acjmnext = ac[jm].next; |
---|
3362 | ac[acjmprev].next = acjmnext; |
---|
3363 | if( acjmnext != -1 ) |
---|
3364 | ac[acjmnext].prev = acjmprev; |
---|
3365 | |
---|
3366 | |
---|
3367 | treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo |
---|
3368 | if( !treetmp ) |
---|
3369 | { |
---|
3370 | fprintf( stderr, "Cannot allocate treetmp\n" ); |
---|
3371 | exit( 1 ); |
---|
3372 | } |
---|
3373 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
3374 | free( tree[im] ); |
---|
3375 | free( tree[jm] ); |
---|
3376 | tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) ); |
---|
3377 | tree[jm] = NULL; |
---|
3378 | if( tree[im] == NULL ) |
---|
3379 | { |
---|
3380 | fprintf( stderr, "Cannot reallocate tree!\n" ); |
---|
3381 | exit( 1 ); |
---|
3382 | } |
---|
3383 | strcpy( tree[im], treetmp ); |
---|
3384 | |
---|
3385 | // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
3386 | // strcpy( tree[im], treetmp ); |
---|
3387 | |
---|
3388 | #if 0 |
---|
3389 | fprintf( stdout, "STEP-%03d:\n", k+1 ); |
---|
3390 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
3391 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); |
---|
3392 | fprintf( stdout, "\n" ); |
---|
3393 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
3394 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); |
---|
3395 | fprintf( stdout, "\n" ); |
---|
3396 | #endif |
---|
3397 | } |
---|
3398 | fclose( fp ); |
---|
3399 | |
---|
3400 | |
---|
3401 | fp = fopen( "infile.tree", "w" ); |
---|
3402 | fprintf( fp, "%s\n", treetmp ); |
---|
3403 | // fprintf( fp, "by veryfastsupg_double_loadtree\n" ); |
---|
3404 | fclose( fp ); |
---|
3405 | |
---|
3406 | #if 1 |
---|
3407 | fprintf( stderr, "\n" ); |
---|
3408 | free( hist ); |
---|
3409 | free( (char *)ac ); |
---|
3410 | FreeCharMtx( tree ); |
---|
3411 | free( treetmp ); |
---|
3412 | free( nametmp ); |
---|
3413 | #endif |
---|
3414 | |
---|
3415 | |
---|
3416 | } |
---|
3417 | |
---|
3418 | #if 0 |
---|
3419 | void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len ) |
---|
3420 | { |
---|
3421 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
3422 | int *intpt, *intpt2; |
---|
3423 | double tmpdouble; |
---|
3424 | double eff1, eff0; |
---|
3425 | static double *tmptmplen = NULL; |
---|
3426 | static int *hist = NULL; |
---|
3427 | static Achain *ac = NULL; |
---|
3428 | double minscore; |
---|
3429 | int im = -1, jm = -1; |
---|
3430 | int prevnode, acjmnext, acjmprev; |
---|
3431 | int *pt1, *pt2, *pt11, *pt22; |
---|
3432 | if( !hist ) |
---|
3433 | { |
---|
3434 | hist = AllocateIntVec( njob ); |
---|
3435 | tmptmplen = (double *)malloc( njob * sizeof( double ) ); |
---|
3436 | ac = (Achain *)malloc( njob * sizeof( Achain ) ); |
---|
3437 | } |
---|
3438 | |
---|
3439 | for( i=0; i<nseq; i++ ) |
---|
3440 | { |
---|
3441 | ac[i].next = i+1; |
---|
3442 | ac[i].prev = i-1; |
---|
3443 | // ac[i].curr = i; |
---|
3444 | } |
---|
3445 | ac[nseq-1].next = -1; |
---|
3446 | |
---|
3447 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
3448 | for( i=0; i<nseq; i++ ) hist[i] = -1; |
---|
3449 | |
---|
3450 | fprintf( stderr, "\n" ); |
---|
3451 | for( k=0; k<nseq-1; k++ ) |
---|
3452 | { |
---|
3453 | if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq ); |
---|
3454 | |
---|
3455 | minscore = 99999.9; |
---|
3456 | for( i=0; ac[i].next!=-1; i=ac[i].next ) |
---|
3457 | { |
---|
3458 | for( j=ac[i].next; j!=-1; j=ac[j].next ) |
---|
3459 | { |
---|
3460 | tmpdouble = eff[i][j]; |
---|
3461 | if( tmpdouble < minscore ) |
---|
3462 | { |
---|
3463 | minscore = tmpdouble; |
---|
3464 | im = i; jm = j; |
---|
3465 | } |
---|
3466 | } |
---|
3467 | } |
---|
3468 | |
---|
3469 | // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); |
---|
3470 | |
---|
3471 | intpt = topol[k][0]; |
---|
3472 | prevnode = hist[im]; |
---|
3473 | if( prevnode == -1 ) |
---|
3474 | { |
---|
3475 | *intpt++ = im; |
---|
3476 | *intpt = -1; |
---|
3477 | } |
---|
3478 | else |
---|
3479 | { |
---|
3480 | pt1 = topol[prevnode][0]; |
---|
3481 | pt2 = topol[prevnode][1]; |
---|
3482 | if( *pt1 > *pt2 ) |
---|
3483 | { |
---|
3484 | pt11 = pt2; |
---|
3485 | pt22 = pt1; |
---|
3486 | } |
---|
3487 | else |
---|
3488 | { |
---|
3489 | pt11 = pt1; |
---|
3490 | pt22 = pt2; |
---|
3491 | } |
---|
3492 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3493 | *intpt++ = *intpt2++; |
---|
3494 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3495 | *intpt++ = *intpt2++; |
---|
3496 | *intpt = -1; |
---|
3497 | } |
---|
3498 | |
---|
3499 | intpt = topol[k][1]; |
---|
3500 | prevnode = hist[jm]; |
---|
3501 | if( prevnode == -1 ) |
---|
3502 | { |
---|
3503 | *intpt++ = jm; |
---|
3504 | *intpt = -1; |
---|
3505 | } |
---|
3506 | else |
---|
3507 | { |
---|
3508 | pt1 = topol[prevnode][0]; |
---|
3509 | pt2 = topol[prevnode][1]; |
---|
3510 | if( *pt1 > *pt2 ) |
---|
3511 | { |
---|
3512 | pt11 = pt2; |
---|
3513 | pt22 = pt1; |
---|
3514 | } |
---|
3515 | else |
---|
3516 | { |
---|
3517 | pt11 = pt1; |
---|
3518 | pt22 = pt2; |
---|
3519 | } |
---|
3520 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3521 | *intpt++ = *intpt2++; |
---|
3522 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3523 | *intpt++ = *intpt2++; |
---|
3524 | *intpt = -1; |
---|
3525 | } |
---|
3526 | |
---|
3527 | minscore *= 0.5; |
---|
3528 | |
---|
3529 | len[k][0] = minscore - tmptmplen[im]; |
---|
3530 | len[k][1] = minscore - tmptmplen[jm]; |
---|
3531 | |
---|
3532 | tmptmplen[im] = minscore; |
---|
3533 | |
---|
3534 | hist[im] = k; |
---|
3535 | |
---|
3536 | for( i=0; i!=-1; i=ac[i].next ) |
---|
3537 | { |
---|
3538 | if( i != im && i != jm ) |
---|
3539 | { |
---|
3540 | if( i < im ) |
---|
3541 | { |
---|
3542 | miniim = i; |
---|
3543 | maxiim = im; |
---|
3544 | minijm = i; |
---|
3545 | maxijm = jm; |
---|
3546 | } |
---|
3547 | else if( i < jm ) |
---|
3548 | { |
---|
3549 | miniim = im; |
---|
3550 | maxiim = i; |
---|
3551 | minijm = i; |
---|
3552 | maxijm = jm; |
---|
3553 | } |
---|
3554 | else |
---|
3555 | { |
---|
3556 | miniim = im; |
---|
3557 | maxiim = i; |
---|
3558 | minijm = jm; |
---|
3559 | maxijm = i; |
---|
3560 | } |
---|
3561 | eff0 = eff[miniim][maxiim]; |
---|
3562 | eff1 = eff[minijm][maxijm]; |
---|
3563 | eff[miniim][maxiim] = |
---|
3564 | MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + |
---|
3565 | ( eff0 + eff1 ) * 0.5 * SUEFF; |
---|
3566 | } |
---|
3567 | } |
---|
3568 | acjmprev = ac[jm].prev; |
---|
3569 | acjmnext = ac[jm].next; |
---|
3570 | ac[acjmprev].next = acjmnext; |
---|
3571 | if( acjmnext != -1 ) |
---|
3572 | ac[acjmnext].prev = acjmprev; |
---|
3573 | #if 0 |
---|
3574 | fprintf( stdout, "STEP-%03d:\n", k+1 ); |
---|
3575 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
3576 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); |
---|
3577 | fprintf( stdout, "\n" ); |
---|
3578 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
3579 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); |
---|
3580 | fprintf( stdout, "\n" ); |
---|
3581 | #endif |
---|
3582 | } |
---|
3583 | #if 1 |
---|
3584 | fprintf( stderr, "\n" ); |
---|
3585 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
3586 | free( hist ); hist = NULL; |
---|
3587 | free( (char *)ac ); ac = NULL; |
---|
3588 | #endif |
---|
3589 | } |
---|
3590 | #endif |
---|
3591 | |
---|
3592 | void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used |
---|
3593 | { |
---|
3594 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
3595 | int *intpt, *intpt2; |
---|
3596 | double tmpdouble; |
---|
3597 | double eff1, eff0; |
---|
3598 | static double *tmptmplen = NULL; |
---|
3599 | static int *hist = NULL; |
---|
3600 | static Achain *ac = NULL; |
---|
3601 | double minscore; |
---|
3602 | static char **tree; |
---|
3603 | static char *treetmp; |
---|
3604 | static char *nametmp; |
---|
3605 | FILE *fpout; |
---|
3606 | int im = -1, jm = -1; |
---|
3607 | int prevnode, acjmnext, acjmprev; |
---|
3608 | int *pt1, *pt2, *pt11, *pt22; |
---|
3609 | double (*clusterfuncpt[1])(double,double); |
---|
3610 | |
---|
3611 | |
---|
3612 | sueff1_double = 1 - SUEFF; |
---|
3613 | sueff05_double = SUEFF * 0.5; |
---|
3614 | if ( treemethod == 'X' ) |
---|
3615 | clusterfuncpt[0] = cluster_mix_double; |
---|
3616 | else if ( treemethod == 'E' ) |
---|
3617 | clusterfuncpt[0] = cluster_average_double; |
---|
3618 | else if ( treemethod == 'q' ) |
---|
3619 | clusterfuncpt[0] = cluster_minimum_double; |
---|
3620 | else |
---|
3621 | { |
---|
3622 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
3623 | exit( 1 ); |
---|
3624 | } |
---|
3625 | |
---|
3626 | if( !hist ) |
---|
3627 | { |
---|
3628 | treetmp = AllocateCharVec( njob*50 ); |
---|
3629 | tree = AllocateCharMtx( njob, njob*50 ); |
---|
3630 | hist = AllocateIntVec( njob ); |
---|
3631 | tmptmplen = (double *)malloc( njob * sizeof( double ) ); |
---|
3632 | ac = (Achain *)malloc( njob * sizeof( Achain ) ); |
---|
3633 | nametmp = AllocateCharVec( 31 ); |
---|
3634 | } |
---|
3635 | |
---|
3636 | // for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 ); |
---|
3637 | for( i=0; i<nseq; i++ ) |
---|
3638 | { |
---|
3639 | for( j=0; j<30; j++ ) nametmp[j] = 0; |
---|
3640 | for( j=0; j<30; j++ ) |
---|
3641 | { |
---|
3642 | if( isalnum( name[i][j] ) ) |
---|
3643 | nametmp[j] = name[i][j]; |
---|
3644 | else |
---|
3645 | nametmp[j] = '_'; |
---|
3646 | } |
---|
3647 | nametmp[30] = 0; |
---|
3648 | sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 ); |
---|
3649 | } |
---|
3650 | |
---|
3651 | for( i=0; i<nseq; i++ ) |
---|
3652 | { |
---|
3653 | ac[i].next = i+1; |
---|
3654 | ac[i].prev = i-1; |
---|
3655 | // ac[i].curr = i; |
---|
3656 | } |
---|
3657 | ac[nseq-1].next = -1; |
---|
3658 | |
---|
3659 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
3660 | for( i=0; i<nseq; i++ ) hist[i] = -1; |
---|
3661 | |
---|
3662 | fprintf( stderr, "\n" ); |
---|
3663 | for( k=0; k<nseq-1; k++ ) |
---|
3664 | { |
---|
3665 | if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq ); |
---|
3666 | |
---|
3667 | minscore = 99999.9; |
---|
3668 | for( i=0; ac[i].next!=-1; i=ac[i].next ) |
---|
3669 | { |
---|
3670 | for( j=ac[i].next; j!=-1; j=ac[j].next ) |
---|
3671 | { |
---|
3672 | tmpdouble = eff[i][j]; |
---|
3673 | if( tmpdouble < minscore ) |
---|
3674 | { |
---|
3675 | minscore = tmpdouble; |
---|
3676 | im = i; jm = j; |
---|
3677 | } |
---|
3678 | } |
---|
3679 | } |
---|
3680 | |
---|
3681 | // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); |
---|
3682 | |
---|
3683 | intpt = topol[k][0]; |
---|
3684 | prevnode = hist[im]; |
---|
3685 | if( prevnode == -1 ) |
---|
3686 | { |
---|
3687 | *intpt++ = im; |
---|
3688 | *intpt = -1; |
---|
3689 | } |
---|
3690 | else |
---|
3691 | { |
---|
3692 | pt1 = topol[prevnode][0]; |
---|
3693 | pt2 = topol[prevnode][1]; |
---|
3694 | if( *pt1 > *pt2 ) |
---|
3695 | { |
---|
3696 | pt11 = pt2; |
---|
3697 | pt22 = pt1; |
---|
3698 | } |
---|
3699 | else |
---|
3700 | { |
---|
3701 | pt11 = pt1; |
---|
3702 | pt22 = pt2; |
---|
3703 | } |
---|
3704 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3705 | *intpt++ = *intpt2++; |
---|
3706 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3707 | *intpt++ = *intpt2++; |
---|
3708 | *intpt = -1; |
---|
3709 | } |
---|
3710 | |
---|
3711 | intpt = topol[k][1]; |
---|
3712 | prevnode = hist[jm]; |
---|
3713 | if( prevnode == -1 ) |
---|
3714 | { |
---|
3715 | *intpt++ = jm; |
---|
3716 | *intpt = -1; |
---|
3717 | } |
---|
3718 | else |
---|
3719 | { |
---|
3720 | pt1 = topol[prevnode][0]; |
---|
3721 | pt2 = topol[prevnode][1]; |
---|
3722 | if( *pt1 > *pt2 ) |
---|
3723 | { |
---|
3724 | pt11 = pt2; |
---|
3725 | pt22 = pt1; |
---|
3726 | } |
---|
3727 | else |
---|
3728 | { |
---|
3729 | pt11 = pt1; |
---|
3730 | pt22 = pt2; |
---|
3731 | } |
---|
3732 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3733 | *intpt++ = *intpt2++; |
---|
3734 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3735 | *intpt++ = *intpt2++; |
---|
3736 | *intpt = -1; |
---|
3737 | } |
---|
3738 | |
---|
3739 | minscore *= 0.5; |
---|
3740 | |
---|
3741 | len[k][0] = minscore - tmptmplen[im]; |
---|
3742 | len[k][1] = minscore - tmptmplen[jm]; |
---|
3743 | |
---|
3744 | tmptmplen[im] = minscore; |
---|
3745 | |
---|
3746 | hist[im] = k; |
---|
3747 | |
---|
3748 | for( i=0; i!=-1; i=ac[i].next ) |
---|
3749 | { |
---|
3750 | if( i != im && i != jm ) |
---|
3751 | { |
---|
3752 | if( i < im ) |
---|
3753 | { |
---|
3754 | miniim = i; |
---|
3755 | maxiim = im; |
---|
3756 | minijm = i; |
---|
3757 | maxijm = jm; |
---|
3758 | } |
---|
3759 | else if( i < jm ) |
---|
3760 | { |
---|
3761 | miniim = im; |
---|
3762 | maxiim = i; |
---|
3763 | minijm = i; |
---|
3764 | maxijm = jm; |
---|
3765 | } |
---|
3766 | else |
---|
3767 | { |
---|
3768 | miniim = im; |
---|
3769 | maxiim = i; |
---|
3770 | minijm = jm; |
---|
3771 | maxijm = i; |
---|
3772 | } |
---|
3773 | eff0 = eff[miniim][maxiim]; |
---|
3774 | eff1 = eff[minijm][maxijm]; |
---|
3775 | #if 0 |
---|
3776 | eff[miniim][maxiim] = |
---|
3777 | MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + |
---|
3778 | ( eff0 + eff1 ) * 0.5 * SUEFF; |
---|
3779 | #else |
---|
3780 | eff[miniim][maxiim] = |
---|
3781 | (clusterfuncpt[0])( eff0, eff1 ); |
---|
3782 | #endif |
---|
3783 | } |
---|
3784 | } |
---|
3785 | acjmprev = ac[jm].prev; |
---|
3786 | acjmnext = ac[jm].next; |
---|
3787 | ac[acjmprev].next = acjmnext; |
---|
3788 | if( acjmnext != -1 ) |
---|
3789 | ac[acjmnext].prev = acjmprev; |
---|
3790 | |
---|
3791 | sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] ); |
---|
3792 | strcpy( tree[im], treetmp ); |
---|
3793 | #if 0 |
---|
3794 | fprintf( stdout, "STEP-%03d:\n", k+1 ); |
---|
3795 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
3796 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); |
---|
3797 | fprintf( stdout, "\n" ); |
---|
3798 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
3799 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); |
---|
3800 | fprintf( stdout, "\n" ); |
---|
3801 | #endif |
---|
3802 | } |
---|
3803 | fpout = fopen( "infile.tree", "w" ); |
---|
3804 | fprintf( fpout, "%s\n", treetmp ); |
---|
3805 | // fprintf( fpout, "by veryfastsupg_double_outtree\n" ); |
---|
3806 | fclose( fpout ); |
---|
3807 | #if 1 |
---|
3808 | fprintf( stderr, "\n" ); |
---|
3809 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
3810 | free( hist ); hist = NULL; |
---|
3811 | free( (char *)ac ); ac = NULL; |
---|
3812 | FreeCharMtx( tree ); |
---|
3813 | free( treetmp ); |
---|
3814 | free( nametmp ); |
---|
3815 | #endif |
---|
3816 | } |
---|
3817 | |
---|
3818 | void veryfastsupg( int nseq, double **oeff, int ***topol, double **len ) |
---|
3819 | { |
---|
3820 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
3821 | int *intpt, *intpt2; |
---|
3822 | int tmpint; |
---|
3823 | int eff1, eff0; |
---|
3824 | static double *tmptmplen = NULL; |
---|
3825 | static int **eff = NULL; |
---|
3826 | static int *hist = NULL; |
---|
3827 | static Achain *ac = NULL; |
---|
3828 | int minscore; |
---|
3829 | double minscoref; |
---|
3830 | int im = -1, jm = -1; |
---|
3831 | int prevnode, acjmnext, acjmprev; |
---|
3832 | int *pt1, *pt2, *pt11, *pt22; |
---|
3833 | if( !eff ) |
---|
3834 | { |
---|
3835 | eff = AllocateIntMtx( njob, njob ); |
---|
3836 | hist = AllocateIntVec( njob ); |
---|
3837 | tmptmplen = (double *)malloc( njob * sizeof( double ) ); |
---|
3838 | ac = (Achain *)malloc( njob * sizeof( Achain ) ); |
---|
3839 | } |
---|
3840 | |
---|
3841 | for( i=0; i<nseq; i++ ) |
---|
3842 | { |
---|
3843 | for( j=0; j<nseq; j++ ) |
---|
3844 | { |
---|
3845 | eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 ); |
---|
3846 | } |
---|
3847 | } |
---|
3848 | |
---|
3849 | for( i=0; i<nseq; i++ ) |
---|
3850 | { |
---|
3851 | ac[i].next = i+1; |
---|
3852 | ac[i].prev = i-1; |
---|
3853 | // ac[i].curr = i; |
---|
3854 | } |
---|
3855 | ac[nseq-1].next = -1; |
---|
3856 | |
---|
3857 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0; |
---|
3858 | for( i=0; i<nseq; i++ ) hist[i] = -1; |
---|
3859 | |
---|
3860 | fprintf( stderr, "\n" ); |
---|
3861 | for( k=0; k<nseq-1; k++ ) |
---|
3862 | { |
---|
3863 | if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq ); |
---|
3864 | |
---|
3865 | minscore = INTMTXSCALE*4; |
---|
3866 | for( i=0; ac[i].next!=-1; i=ac[i].next ) |
---|
3867 | { |
---|
3868 | for( j=ac[i].next; j!=-1; j=ac[j].next ) |
---|
3869 | { |
---|
3870 | tmpint = eff[i][j]; |
---|
3871 | if( tmpint < minscore ) |
---|
3872 | { |
---|
3873 | minscore = tmpint; |
---|
3874 | im = i; jm = j; |
---|
3875 | } |
---|
3876 | } |
---|
3877 | } |
---|
3878 | minscoref = (double)minscore * 0.5 / ( INTMTXSCALE ); |
---|
3879 | |
---|
3880 | // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); |
---|
3881 | |
---|
3882 | #if 1 |
---|
3883 | intpt = topol[k][0]; |
---|
3884 | prevnode = hist[im]; |
---|
3885 | if( prevnode == -1 ) |
---|
3886 | { |
---|
3887 | *intpt++ = im; |
---|
3888 | *intpt = -1; |
---|
3889 | } |
---|
3890 | else |
---|
3891 | { |
---|
3892 | pt1 = topol[prevnode][0]; |
---|
3893 | pt2 = topol[prevnode][1]; |
---|
3894 | if( *pt1 > *pt2 ) |
---|
3895 | { |
---|
3896 | pt11 = pt2; |
---|
3897 | pt22 = pt1; |
---|
3898 | } |
---|
3899 | else |
---|
3900 | { |
---|
3901 | pt11 = pt1; |
---|
3902 | pt22 = pt2; |
---|
3903 | } |
---|
3904 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3905 | *intpt++ = *intpt2++; |
---|
3906 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3907 | *intpt++ = *intpt2++; |
---|
3908 | *intpt = -1; |
---|
3909 | } |
---|
3910 | |
---|
3911 | intpt = topol[k][1]; |
---|
3912 | prevnode = hist[jm]; |
---|
3913 | if( prevnode == -1 ) |
---|
3914 | { |
---|
3915 | *intpt++ = jm; |
---|
3916 | *intpt = -1; |
---|
3917 | } |
---|
3918 | else |
---|
3919 | { |
---|
3920 | pt1 = topol[prevnode][0]; |
---|
3921 | pt2 = topol[prevnode][1]; |
---|
3922 | if( *pt1 > *pt2 ) |
---|
3923 | { |
---|
3924 | pt11 = pt2; |
---|
3925 | pt22 = pt1; |
---|
3926 | } |
---|
3927 | else |
---|
3928 | { |
---|
3929 | pt11 = pt1; |
---|
3930 | pt22 = pt2; |
---|
3931 | } |
---|
3932 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
3933 | *intpt++ = *intpt2++; |
---|
3934 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
3935 | *intpt++ = *intpt2++; |
---|
3936 | *intpt = -1; |
---|
3937 | } |
---|
3938 | #else |
---|
3939 | intpt = topol[k][0]; |
---|
3940 | for( i=0; i<nseq; i++ ) |
---|
3941 | if( pair[im][i] > -2 ) |
---|
3942 | *intpt++ = i; |
---|
3943 | *intpt = -1; |
---|
3944 | |
---|
3945 | intpt = topol[k][1]; |
---|
3946 | for( i=0; i<nseq; i++ ) |
---|
3947 | if( pair[jm][i] > -2 ) |
---|
3948 | *intpt++ = i; |
---|
3949 | *intpt = -1; |
---|
3950 | #endif |
---|
3951 | |
---|
3952 | len[k][0] = minscoref - tmptmplen[im]; |
---|
3953 | len[k][1] = minscoref - tmptmplen[jm]; |
---|
3954 | |
---|
3955 | tmptmplen[im] = minscoref; |
---|
3956 | |
---|
3957 | hist[im] = k; |
---|
3958 | |
---|
3959 | for( i=0; i!=-1; i=ac[i].next ) |
---|
3960 | { |
---|
3961 | if( i != im && i != jm ) |
---|
3962 | { |
---|
3963 | if( i < im ) |
---|
3964 | { |
---|
3965 | miniim = i; |
---|
3966 | maxiim = im; |
---|
3967 | minijm = i; |
---|
3968 | maxijm = jm; |
---|
3969 | } |
---|
3970 | else if( i < jm ) |
---|
3971 | { |
---|
3972 | miniim = im; |
---|
3973 | maxiim = i; |
---|
3974 | minijm = i; |
---|
3975 | maxijm = jm; |
---|
3976 | } |
---|
3977 | else |
---|
3978 | { |
---|
3979 | miniim = im; |
---|
3980 | maxiim = i; |
---|
3981 | minijm = jm; |
---|
3982 | maxijm = i; |
---|
3983 | } |
---|
3984 | eff0 = eff[miniim][maxiim]; |
---|
3985 | eff1 = eff[minijm][maxijm]; |
---|
3986 | eff[miniim][maxiim] = |
---|
3987 | MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + |
---|
3988 | ( eff0 + eff1 ) * 0.5 * SUEFF; |
---|
3989 | } |
---|
3990 | } |
---|
3991 | acjmprev = ac[jm].prev; |
---|
3992 | acjmnext = ac[jm].next; |
---|
3993 | ac[acjmprev].next = acjmnext; |
---|
3994 | if( acjmnext != -1 ) |
---|
3995 | ac[acjmnext].prev = acjmprev; |
---|
3996 | #if 0 |
---|
3997 | fprintf( stdout, "STEP-%03d:\n", k+1 ); |
---|
3998 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
3999 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); |
---|
4000 | fprintf( stdout, "\n" ); |
---|
4001 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
4002 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); |
---|
4003 | fprintf( stdout, "\n" ); |
---|
4004 | #endif |
---|
4005 | } |
---|
4006 | #if 1 |
---|
4007 | FreeIntMtx( eff ); eff = NULL; |
---|
4008 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
4009 | free( hist ); hist = NULL; |
---|
4010 | free( (char *)ac ); ac = NULL; |
---|
4011 | #endif |
---|
4012 | } |
---|
4013 | void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len ) |
---|
4014 | /* len$B$O!"(B oeff$B$,@0?t!#(Blen$B$b<B$O@0?t!#(B |
---|
4015 | $BI,MW$K1~$8$F3d$C$F;H$&!#(B */ |
---|
4016 | { |
---|
4017 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
4018 | int *intpt, *intpt2; |
---|
4019 | int tmpint; |
---|
4020 | int eff1, eff0; |
---|
4021 | static int *tmptmplen = NULL; |
---|
4022 | static int **eff = NULL; |
---|
4023 | static int *hist = NULL; |
---|
4024 | static Achain *ac = NULL; |
---|
4025 | int minscore; |
---|
4026 | int im = -1, jm = -1; |
---|
4027 | int prevnode, acjmnext, acjmprev; |
---|
4028 | int *pt1, *pt2, *pt11, *pt22; |
---|
4029 | |
---|
4030 | |
---|
4031 | if( !eff ) |
---|
4032 | { |
---|
4033 | eff = AllocateIntMtx( njob, njob ); |
---|
4034 | hist = AllocateIntVec( njob ); |
---|
4035 | tmptmplen = AllocateIntVec( njob ); |
---|
4036 | ac = (Achain *)malloc( njob * sizeof( Achain ) ); |
---|
4037 | } |
---|
4038 | |
---|
4039 | for( i=0; i<nseq; i++ ) |
---|
4040 | { |
---|
4041 | for( j=0; j<nseq; j++ ) |
---|
4042 | { |
---|
4043 | eff[i][j] = ( oeff[i][j] ); |
---|
4044 | } |
---|
4045 | } |
---|
4046 | |
---|
4047 | for( i=0; i<nseq; i++ ) |
---|
4048 | { |
---|
4049 | ac[i].next = i+1; |
---|
4050 | ac[i].prev = i-1; |
---|
4051 | // ac[i].curr = i; |
---|
4052 | } |
---|
4053 | ac[nseq-1].next = -1; |
---|
4054 | |
---|
4055 | for( i=0; i<nseq; i++ ) tmptmplen[i] = 0; |
---|
4056 | for( i=0; i<nseq; i++ ) hist[i] = -1; |
---|
4057 | |
---|
4058 | fprintf( stderr, "\n" ); |
---|
4059 | for( k=0; k<nseq-1; k++ ) |
---|
4060 | { |
---|
4061 | if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq ); |
---|
4062 | |
---|
4063 | minscore = INTMTXSCALE*4; |
---|
4064 | for( i=0; ac[i].next!=-1; i=ac[i].next ) |
---|
4065 | { |
---|
4066 | for( j=ac[i].next; j!=-1; j=ac[j].next ) |
---|
4067 | { |
---|
4068 | tmpint = eff[i][j]; |
---|
4069 | if( tmpint < minscore ) |
---|
4070 | { |
---|
4071 | minscore = tmpint; |
---|
4072 | im = i; jm = j; |
---|
4073 | } |
---|
4074 | } |
---|
4075 | } |
---|
4076 | |
---|
4077 | // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); |
---|
4078 | |
---|
4079 | intpt = topol[k][0]; |
---|
4080 | prevnode = hist[im]; |
---|
4081 | if( prevnode == -1 ) |
---|
4082 | { |
---|
4083 | *intpt++ = im; |
---|
4084 | *intpt = -1; |
---|
4085 | } |
---|
4086 | else |
---|
4087 | { |
---|
4088 | pt1 = topol[prevnode][0]; |
---|
4089 | pt2 = topol[prevnode][1]; |
---|
4090 | if( *pt1 > *pt2 ) |
---|
4091 | { |
---|
4092 | pt11 = pt2; |
---|
4093 | pt22 = pt1; |
---|
4094 | } |
---|
4095 | else |
---|
4096 | { |
---|
4097 | pt11 = pt1; |
---|
4098 | pt22 = pt2; |
---|
4099 | } |
---|
4100 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
4101 | *intpt++ = *intpt2++; |
---|
4102 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
4103 | *intpt++ = *intpt2++; |
---|
4104 | *intpt = -1; |
---|
4105 | } |
---|
4106 | |
---|
4107 | intpt = topol[k][1]; |
---|
4108 | prevnode = hist[jm]; |
---|
4109 | if( prevnode == -1 ) |
---|
4110 | { |
---|
4111 | *intpt++ = jm; |
---|
4112 | *intpt = -1; |
---|
4113 | } |
---|
4114 | else |
---|
4115 | { |
---|
4116 | pt1 = topol[prevnode][0]; |
---|
4117 | pt2 = topol[prevnode][1]; |
---|
4118 | if( *pt1 > *pt2 ) |
---|
4119 | { |
---|
4120 | pt11 = pt2; |
---|
4121 | pt22 = pt1; |
---|
4122 | } |
---|
4123 | else |
---|
4124 | { |
---|
4125 | pt11 = pt1; |
---|
4126 | pt22 = pt2; |
---|
4127 | } |
---|
4128 | for( intpt2=pt11; *intpt2!=-1; ) |
---|
4129 | *intpt++ = *intpt2++; |
---|
4130 | for( intpt2=pt22; *intpt2!=-1; ) |
---|
4131 | *intpt++ = *intpt2++; |
---|
4132 | *intpt = -1; |
---|
4133 | } |
---|
4134 | |
---|
4135 | minscore *= 0.5; |
---|
4136 | |
---|
4137 | len[k][0] = (double)( minscore - tmptmplen[im] ); |
---|
4138 | len[k][1] = (double)( minscore - tmptmplen[jm] ); |
---|
4139 | |
---|
4140 | tmptmplen[im] = minscore; |
---|
4141 | |
---|
4142 | #if 0 |
---|
4143 | free( tmptmplen ); |
---|
4144 | tmptmplen = AllocateIntVec( nseq ); |
---|
4145 | #endif |
---|
4146 | |
---|
4147 | |
---|
4148 | hist[im] = k; |
---|
4149 | |
---|
4150 | for( i=0; i!=-1; i=ac[i].next ) |
---|
4151 | { |
---|
4152 | if( i != im && i != jm ) |
---|
4153 | { |
---|
4154 | if( i < im ) |
---|
4155 | { |
---|
4156 | miniim = i; |
---|
4157 | maxiim = im; |
---|
4158 | minijm = i; |
---|
4159 | maxijm = jm; |
---|
4160 | } |
---|
4161 | else if( i < jm ) |
---|
4162 | { |
---|
4163 | miniim = im; |
---|
4164 | maxiim = i; |
---|
4165 | minijm = i; |
---|
4166 | maxijm = jm; |
---|
4167 | } |
---|
4168 | else |
---|
4169 | { |
---|
4170 | miniim = im; |
---|
4171 | maxiim = i; |
---|
4172 | minijm = jm; |
---|
4173 | maxijm = i; |
---|
4174 | } |
---|
4175 | eff0 = eff[miniim][maxiim]; |
---|
4176 | eff1 = eff[minijm][maxijm]; |
---|
4177 | eff[miniim][maxiim] = |
---|
4178 | (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF ); |
---|
4179 | } |
---|
4180 | } |
---|
4181 | acjmprev = ac[jm].prev; |
---|
4182 | acjmnext = ac[jm].next; |
---|
4183 | ac[acjmprev].next = acjmnext; |
---|
4184 | if( acjmnext != -1 ) |
---|
4185 | ac[acjmnext].prev = acjmprev; |
---|
4186 | #if 0 |
---|
4187 | fprintf( stdout, "STEP-%03d:\n", k+1 ); |
---|
4188 | fprintf( stdout, "len0 = %f\n", len[k][0] ); |
---|
4189 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] ); |
---|
4190 | fprintf( stdout, "\n" ); |
---|
4191 | fprintf( stdout, "len1 = %f\n", len[k][1] ); |
---|
4192 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] ); |
---|
4193 | fprintf( stdout, "\n" ); |
---|
4194 | #endif |
---|
4195 | } |
---|
4196 | FreeIntMtx( eff ); eff = NULL; |
---|
4197 | free( (void *)tmptmplen ); tmptmplen = NULL; |
---|
4198 | free( hist ); hist = NULL; |
---|
4199 | free( (char *)ac ); ac = NULL; |
---|
4200 | } |
---|
4201 | void fastsupg( int nseq, double **oeff, int ***topol, double **len ) |
---|
4202 | { |
---|
4203 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
4204 | #if 0 |
---|
4205 | double eff[nseq][nseq]; |
---|
4206 | char pair[njob][njob]; |
---|
4207 | #else |
---|
4208 | static float *tmplen; |
---|
4209 | int *intpt; |
---|
4210 | float tmpfloat; |
---|
4211 | float eff1, eff0; |
---|
4212 | static float **eff = NULL; |
---|
4213 | static char **pair = NULL; |
---|
4214 | static Achain *ac; |
---|
4215 | float minscore; |
---|
4216 | int im = -1, jm = -1; |
---|
4217 | if( !eff ) |
---|
4218 | { |
---|
4219 | eff = AllocateFloatMtx( njob, njob ); |
---|
4220 | pair = AllocateCharMtx( njob, njob ); |
---|
4221 | tmplen = AllocateFloatVec( njob ); |
---|
4222 | ac = (Achain *)calloc( njob, sizeof( Achain ) ); |
---|
4223 | } |
---|
4224 | #endif |
---|
4225 | |
---|
4226 | for( i=0; i<nseq; i++ ) |
---|
4227 | { |
---|
4228 | for( j=0; j<nseq; j++ ) |
---|
4229 | { |
---|
4230 | eff[i][j] = (float)oeff[i][j]; |
---|
4231 | } |
---|
4232 | } |
---|
4233 | |
---|
4234 | for( i=0; i<nseq; i++ ) |
---|
4235 | { |
---|
4236 | ac[i].next = i+1; |
---|
4237 | ac[i].prev = i-1; |
---|
4238 | // ac[i].curr = i; |
---|
4239 | } |
---|
4240 | ac[nseq-1].next = -1; |
---|
4241 | |
---|
4242 | for( i=0; i<nseq; i++ ) tmplen[i] = 0.0; |
---|
4243 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0; |
---|
4244 | for( i=0; i<nseq; i++ ) pair[i][i] = 1; |
---|
4245 | |
---|
4246 | fprintf( stderr, "\n" ); |
---|
4247 | for( k=0; k<nseq-1; k++ ) |
---|
4248 | { |
---|
4249 | if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq ); |
---|
4250 | |
---|
4251 | minscore = 9999.0; |
---|
4252 | for( i=0; ac[i].next!=-1; i=ac[i].next ) |
---|
4253 | // for( i=0; i<nseq-1; i++ ) |
---|
4254 | { |
---|
4255 | for( j=ac[i].next; j!=-1; j=ac[j].next ) |
---|
4256 | // for( j=i+1; j<nseq; j++ ) |
---|
4257 | { |
---|
4258 | tmpfloat = eff[i][j]; |
---|
4259 | if( tmpfloat < minscore ) |
---|
4260 | { |
---|
4261 | minscore = tmpfloat; |
---|
4262 | im = i; jm = j; |
---|
4263 | } |
---|
4264 | } |
---|
4265 | } |
---|
4266 | |
---|
4267 | // fprintf( stderr, "im=%d, jm=%d\n", im, jm ); |
---|
4268 | |
---|
4269 | intpt = topol[k][0]; |
---|
4270 | for( i=0; i<nseq; i++ ) |
---|
4271 | if( pair[im][i] > 0 ) |
---|
4272 | *intpt++ = i; |
---|
4273 | *intpt = -1; |
---|
4274 | |
---|
4275 | intpt = topol[k][1]; |
---|
4276 | for( i=0; i<nseq; i++ ) |
---|
4277 | if( pair[jm][i] > 0 ) |
---|
4278 | *intpt++ = i; |
---|
4279 | *intpt = -1; |
---|
4280 | |
---|
4281 | minscore /= 2.0; |
---|
4282 | |
---|
4283 | len[k][0] = (double)minscore - tmplen[im]; |
---|
4284 | len[k][1] = (double)minscore - tmplen[jm]; |
---|
4285 | |
---|
4286 | tmplen[im] = (double)minscore; |
---|
4287 | |
---|
4288 | for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 ); |
---|
4289 | for( i=0; i<nseq; i++ ) pair[jm][i] = 0; |
---|
4290 | |
---|
4291 | // for( i=0; i<nseq; i++ ) |
---|
4292 | for( i=0; i!=-1; i=ac[i].next ) |
---|
4293 | { |
---|
4294 | if( i != im && i != jm ) |
---|
4295 | { |
---|
4296 | if( i < im ) |
---|
4297 | { |
---|
4298 | miniim = i; |
---|
4299 | maxiim = im; |
---|
4300 | minijm = i; |
---|
4301 | maxijm = jm; |
---|
4302 | } |
---|
4303 | else if( i < jm ) |
---|
4304 | { |
---|
4305 | miniim = im; |
---|
4306 | maxiim = i; |
---|
4307 | minijm = i; |
---|
4308 | maxijm = jm; |
---|
4309 | } |
---|
4310 | else |
---|
4311 | { |
---|
4312 | miniim = im; |
---|
4313 | maxiim = i; |
---|
4314 | minijm = jm; |
---|
4315 | maxijm = i; |
---|
4316 | } |
---|
4317 | eff0 = eff[miniim][maxiim]; |
---|
4318 | eff1 = eff[minijm][maxijm]; |
---|
4319 | eff[miniim][maxiim] = |
---|
4320 | MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + |
---|
4321 | ( eff0 + eff1 ) * 0.5 * SUEFF; |
---|
4322 | // eff[minijm][maxijm] = 9999.0; |
---|
4323 | } |
---|
4324 | } |
---|
4325 | ac[ac[jm].prev].next = ac[jm].next; |
---|
4326 | ac[ac[jm].next].prev = ac[jm].prev; |
---|
4327 | // eff[im][jm] = 9999.0; |
---|
4328 | #if 0 |
---|
4329 | fprintf( stderr, "STEP-%03d:\n", k+1 ); |
---|
4330 | fprintf( stderr, "len0 = %f\n", len[k][0] ); |
---|
4331 | for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] ); |
---|
4332 | fprintf( stderr, "\n" ); |
---|
4333 | fprintf( stderr, "len1 = %f\n", len[k][1] ); |
---|
4334 | for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] ); |
---|
4335 | fprintf( stderr, "\n" ); |
---|
4336 | #endif |
---|
4337 | } |
---|
4338 | fprintf( stderr, "\n" ); |
---|
4339 | |
---|
4340 | // FreeFloatMtx( eff ); |
---|
4341 | // FreeCharMtx( pair ); |
---|
4342 | // FreeFloatVec( tmplen ); |
---|
4343 | // free( ac ); |
---|
4344 | } |
---|
4345 | void supg( int nseq, double **oeff, int ***topol, double **len ) |
---|
4346 | { |
---|
4347 | int i, j, k, miniim, maxiim, minijm, maxijm; |
---|
4348 | #if 0 |
---|
4349 | double eff[nseq][nseq]; |
---|
4350 | char pair[njob][njob]; |
---|
4351 | #else |
---|
4352 | static float *tmplen; |
---|
4353 | int *intpt; |
---|
4354 | float **floatptpt; |
---|
4355 | float *floatpt; |
---|
4356 | float tmpfloat; |
---|
4357 | float eff1, eff0; |
---|
4358 | static float **eff = NULL; |
---|
4359 | static char **pair = NULL; |
---|
4360 | if( !eff ) |
---|
4361 | { |
---|
4362 | eff = AllocateFloatMtx( njob, njob ); |
---|
4363 | pair = AllocateCharMtx( njob, njob ); |
---|
4364 | tmplen = AllocateFloatVec( njob ); |
---|
4365 | } |
---|
4366 | #endif |
---|
4367 | |
---|
4368 | |
---|
4369 | for( i=0; i<nseq; i++ ) |
---|
4370 | { |
---|
4371 | for( j=0; j<nseq; j++ ) |
---|
4372 | { |
---|
4373 | eff[i][j] = (float)oeff[i][j]; |
---|
4374 | } |
---|
4375 | } |
---|
4376 | for( i=0; i<nseq; i++ ) tmplen[i] = 0.0; |
---|
4377 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0; |
---|
4378 | for( i=0; i<nseq; i++ ) pair[i][i] = 1; |
---|
4379 | |
---|
4380 | for( k=0; k<nseq-1; k++ ) |
---|
4381 | { |
---|
4382 | float minscore = 9999.0; |
---|
4383 | int im = -1, jm = -1; |
---|
4384 | |
---|
4385 | |
---|
4386 | floatptpt = eff; |
---|
4387 | for( i=0; i<nseq-1; i++ ) |
---|
4388 | { |
---|
4389 | floatpt = *floatptpt++ + i + 1; |
---|
4390 | for( j=i+1; j<nseq; j++ ) |
---|
4391 | { |
---|
4392 | tmpfloat = *floatpt++; |
---|
4393 | if( tmpfloat < minscore ) |
---|
4394 | { |
---|
4395 | minscore = tmpfloat; |
---|
4396 | im = i; jm = j; |
---|
4397 | } |
---|
4398 | } |
---|
4399 | } |
---|
4400 | intpt = topol[k][0]; |
---|
4401 | for( i=0; i<nseq; i++ ) |
---|
4402 | if( pair[im][i] > 0 ) |
---|
4403 | *intpt++ = i; |
---|
4404 | *intpt = -1; |
---|
4405 | |
---|
4406 | intpt = topol[k][1]; |
---|
4407 | for( i=0; i<nseq; i++ ) |
---|
4408 | if( pair[jm][i] > 0 ) |
---|
4409 | *intpt++ = i; |
---|
4410 | *intpt = -1; |
---|
4411 | |
---|
4412 | len[k][0] = (double)minscore / 2.0 - tmplen[im]; |
---|
4413 | len[k][1] = (double)minscore / 2.0 - tmplen[jm]; |
---|
4414 | |
---|
4415 | tmplen[im] = (double)minscore / 2.0; |
---|
4416 | |
---|
4417 | for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 ); |
---|
4418 | for( i=0; i<nseq; i++ ) pair[jm][i] = 0; |
---|
4419 | |
---|
4420 | for( i=0; i<nseq; i++ ) |
---|
4421 | { |
---|
4422 | if( i != im && i != jm ) |
---|
4423 | { |
---|
4424 | #if 1 |
---|
4425 | if( i < im ) |
---|
4426 | { |
---|
4427 | miniim = i; |
---|
4428 | maxiim = im; |
---|
4429 | minijm = i; |
---|
4430 | maxijm = jm; |
---|
4431 | } |
---|
4432 | else if( i < jm ) |
---|
4433 | { |
---|
4434 | miniim = im; |
---|
4435 | maxiim = i; |
---|
4436 | minijm = i; |
---|
4437 | maxijm = jm; |
---|
4438 | } |
---|
4439 | else |
---|
4440 | { |
---|
4441 | miniim = im; |
---|
4442 | maxiim = i; |
---|
4443 | minijm = jm; |
---|
4444 | maxijm = i; |
---|
4445 | } |
---|
4446 | #else |
---|
4447 | miniim = MIN( i, im ); |
---|
4448 | maxiim = MAX( i, im ); |
---|
4449 | minijm = MIN( i, jm ); |
---|
4450 | maxijm = MAX( i, jm ); |
---|
4451 | #endif |
---|
4452 | #if 1 |
---|
4453 | eff0 = eff[miniim][maxiim]; |
---|
4454 | eff1 = eff[minijm][maxijm]; |
---|
4455 | eff[miniim][maxiim] = |
---|
4456 | MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + |
---|
4457 | ( eff0 + eff1 ) * 0.5 * SUEFF; |
---|
4458 | #else |
---|
4459 | MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) + |
---|
4460 | ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF; |
---|
4461 | #endif |
---|
4462 | eff[minijm][maxijm] = 9999.0; |
---|
4463 | eff[im][jm] = 9999.0; |
---|
4464 | } |
---|
4465 | } |
---|
4466 | #if DEBUG |
---|
4467 | printf( "STEP-%03d:\n", k+1 ); |
---|
4468 | printf( "len0 = %f\n", len[k][0] ); |
---|
4469 | for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] ); |
---|
4470 | printf( "\n" ); |
---|
4471 | printf( "len1 = %f\n", len[k][1] ); |
---|
4472 | for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); |
---|
4473 | printf( "\n" ); |
---|
4474 | #endif |
---|
4475 | } |
---|
4476 | } |
---|
4477 | |
---|
4478 | void spg( int nseq, double **oeff, int ***topol, double **len ) |
---|
4479 | { |
---|
4480 | int i, j, k; |
---|
4481 | double tmplen[M]; |
---|
4482 | #if 0 |
---|
4483 | double eff[nseq][nseq]; |
---|
4484 | char pair[njob][njob]; |
---|
4485 | #else |
---|
4486 | double **eff = NULL; |
---|
4487 | char **pair = NULL; |
---|
4488 | if( !eff ) |
---|
4489 | { |
---|
4490 | eff = AllocateDoubleMtx( njob, njob ); |
---|
4491 | pair = AllocateCharMtx( njob, njob ); |
---|
4492 | } |
---|
4493 | #endif |
---|
4494 | |
---|
4495 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j]; |
---|
4496 | for( i=0; i<nseq; i++ ) tmplen[i] = 0.0; |
---|
4497 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0; |
---|
4498 | for( i=0; i<nseq; i++ ) pair[i][i] = 1; |
---|
4499 | |
---|
4500 | for( k=0; k<nseq-1; k++ ) |
---|
4501 | { |
---|
4502 | float minscore = 9999.0; |
---|
4503 | int im = -1, jm = -1; |
---|
4504 | int count; |
---|
4505 | |
---|
4506 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
4507 | { |
---|
4508 | if( eff[i][j] < minscore ) |
---|
4509 | { |
---|
4510 | minscore = eff[i][j]; |
---|
4511 | im = i; jm = j; |
---|
4512 | } |
---|
4513 | } |
---|
4514 | for( i=0, count=0; i<nseq; i++ ) |
---|
4515 | if( pair[im][i] > 0 ) |
---|
4516 | { |
---|
4517 | topol[k][0][count] = i; |
---|
4518 | count++; |
---|
4519 | } |
---|
4520 | topol[k][0][count] = -1; |
---|
4521 | for( i=0, count=0; i<nseq; i++ ) |
---|
4522 | if( pair[jm][i] > 0 ) |
---|
4523 | { |
---|
4524 | topol[k][1][count] = i; |
---|
4525 | count++; |
---|
4526 | } |
---|
4527 | topol[k][1][count] = -1; |
---|
4528 | |
---|
4529 | len[k][0] = minscore / 2.0 - tmplen[im]; |
---|
4530 | len[k][1] = minscore / 2.0 - tmplen[jm]; |
---|
4531 | |
---|
4532 | tmplen[im] = minscore / 2.0; |
---|
4533 | |
---|
4534 | for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 ); |
---|
4535 | for( i=0; i<nseq; i++ ) pair[jm][i] = 0; |
---|
4536 | |
---|
4537 | for( i=0; i<nseq; i++ ) |
---|
4538 | { |
---|
4539 | if( i != im && i != jm ) |
---|
4540 | { |
---|
4541 | eff[MIN(i,im)][MAX(i,im)] = |
---|
4542 | MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] ); |
---|
4543 | eff[MIN(i,jm)][MAX(i,jm)] = 9999.0; |
---|
4544 | } |
---|
4545 | eff[im][jm] = 9999.0; |
---|
4546 | } |
---|
4547 | #if DEBUG |
---|
4548 | printf( "STEP-%03d:\n", k+1 ); |
---|
4549 | printf( "len0 = %f\n", len[k][0] ); |
---|
4550 | for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] ); |
---|
4551 | printf( "\n" ); |
---|
4552 | printf( "len1 = %f\n", len[k][1] ); |
---|
4553 | for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] ); |
---|
4554 | printf( "\n" ); |
---|
4555 | #endif |
---|
4556 | } |
---|
4557 | } |
---|
4558 | |
---|
4559 | double ipower( double x, int n ) /* n > 0 */ |
---|
4560 | { |
---|
4561 | double r; |
---|
4562 | |
---|
4563 | r = 1; |
---|
4564 | while( n != 0 ) |
---|
4565 | { |
---|
4566 | if( n & 1 ) r *= x; |
---|
4567 | x *= x; n >>= 1; |
---|
4568 | } |
---|
4569 | return( r ); |
---|
4570 | } |
---|
4571 | |
---|
4572 | void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */ |
---|
4573 | { |
---|
4574 | int i, j, k, s1, s2; |
---|
4575 | static double rootnode[M]; |
---|
4576 | |
---|
4577 | if( nseq-2 < 0 ) |
---|
4578 | { |
---|
4579 | fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq ); |
---|
4580 | exit( 1 ); |
---|
4581 | } |
---|
4582 | |
---|
4583 | for( i=0; i<nseq; i++ ) rootnode[i] = 0; |
---|
4584 | for( i=0; i<nseq-2; i++ ) |
---|
4585 | { |
---|
4586 | for( j=0; topol[i][0][j]>-1; j++ ) |
---|
4587 | rootnode[topol[i][0][j]]++; |
---|
4588 | for( j=0; topol[i][1][j]>-1; j++ ) |
---|
4589 | rootnode[topol[i][1][j]]++; |
---|
4590 | for( j=0; topol[i][0][j]>-1; j++ ) |
---|
4591 | { |
---|
4592 | s1 = topol[i][0][j]; |
---|
4593 | for( k=0; topol[i][1][k]>-1; k++ ) |
---|
4594 | { |
---|
4595 | s2 = topol[i][1][k]; |
---|
4596 | node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; |
---|
4597 | } |
---|
4598 | } |
---|
4599 | } |
---|
4600 | for( j=0; topol[nseq-2][0][j]>-1; j++ ) |
---|
4601 | { |
---|
4602 | s1 = topol[nseq-2][0][j]; |
---|
4603 | for( k=0; topol[nseq-2][1][k]>-1; k++ ) |
---|
4604 | { |
---|
4605 | s2 = topol[nseq-2][1][k]; |
---|
4606 | node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; |
---|
4607 | } |
---|
4608 | } |
---|
4609 | } |
---|
4610 | |
---|
4611 | void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */ |
---|
4612 | { |
---|
4613 | int i, j, k, s1, s2; |
---|
4614 | int rootnode[M]; |
---|
4615 | |
---|
4616 | for( i=0; i<nseq; i++ ) rootnode[i] = 0; |
---|
4617 | for( i=0; i<nseq-2; i++ ) |
---|
4618 | { |
---|
4619 | for( j=0; topol[i][0][j]>-1; j++ ) |
---|
4620 | rootnode[topol[i][0][j]]++; |
---|
4621 | for( j=0; topol[i][1][j]>-1; j++ ) |
---|
4622 | rootnode[topol[i][1][j]]++; |
---|
4623 | for( j=0; topol[i][0][j]>-1; j++ ) |
---|
4624 | { |
---|
4625 | s1 = topol[i][0][j]; |
---|
4626 | for( k=0; topol[i][1][k]>-1; k++ ) |
---|
4627 | { |
---|
4628 | s2 = topol[i][1][k]; |
---|
4629 | node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; |
---|
4630 | } |
---|
4631 | } |
---|
4632 | } |
---|
4633 | for( j=0; topol[nseq-2][0][j]>-1; j++ ) |
---|
4634 | { |
---|
4635 | s1 = topol[nseq-2][0][j]; |
---|
4636 | for( k=0; topol[nseq-2][1][k]>-1; k++ ) |
---|
4637 | { |
---|
4638 | s2 = topol[nseq-2][1][k]; |
---|
4639 | node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; |
---|
4640 | } |
---|
4641 | } |
---|
4642 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
4643 | node[j][i] = node[i][j]; |
---|
4644 | #if DEBUG |
---|
4645 | fprintf( stderr, "node[][] in countnode_int" ); |
---|
4646 | for( i=0; i<nseq; i++ ) |
---|
4647 | { |
---|
4648 | for( j=0; j<nseq; j++ ) |
---|
4649 | { |
---|
4650 | fprintf( stderr, "%#3d", node[i][j] ); |
---|
4651 | } |
---|
4652 | fprintf( stderr, "\n" ); |
---|
4653 | } |
---|
4654 | #endif |
---|
4655 | } |
---|
4656 | |
---|
4657 | void counteff_simple_float( int nseq, int ***topol, float **len, double *node ) |
---|
4658 | { |
---|
4659 | int i, j, s1, s2; |
---|
4660 | double total; |
---|
4661 | static double rootnode[M]; |
---|
4662 | static double eff[M]; |
---|
4663 | |
---|
4664 | #if DEBUG |
---|
4665 | for( i=0; i<nseq; i++ ){ |
---|
4666 | fprintf( stderr, "len0 = %f\n", len[i][0] ); |
---|
4667 | fprintf( stderr, "len1 = %f\n", len[i][1] ); |
---|
4668 | } |
---|
4669 | #endif |
---|
4670 | for( i=0; i<nseq; i++ ) |
---|
4671 | { |
---|
4672 | rootnode[i] = 0.0; |
---|
4673 | eff[i] = 1.0; |
---|
4674 | /* |
---|
4675 | rootnode[i] = 1.0; |
---|
4676 | */ |
---|
4677 | } |
---|
4678 | for( i=0; i<nseq-1; i++ ) |
---|
4679 | { |
---|
4680 | for( j=0; (s1=topol[i][0][j]) > -1; j++ ) |
---|
4681 | { |
---|
4682 | rootnode[s1] += (double)len[i][0] * eff[s1]; |
---|
4683 | eff[s1] *= 0.5; |
---|
4684 | /* |
---|
4685 | rootnode[s1] *= 0.5; |
---|
4686 | */ |
---|
4687 | |
---|
4688 | } |
---|
4689 | for( j=0; (s2=topol[i][1][j]) > -1; j++ ) |
---|
4690 | { |
---|
4691 | rootnode[s2] += (double)len[i][1] * eff[s2]; |
---|
4692 | eff[s2] *= 0.5; |
---|
4693 | /* |
---|
4694 | rootnode[s2] *= 0.5; |
---|
4695 | */ |
---|
4696 | |
---|
4697 | } |
---|
4698 | } |
---|
4699 | for( i=0; i<nseq; i++ ) |
---|
4700 | { |
---|
4701 | #if 1 /* 97.9.29 */ |
---|
4702 | rootnode[i] += GETA3; |
---|
4703 | #endif |
---|
4704 | #if 0 |
---|
4705 | fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] ); |
---|
4706 | #endif |
---|
4707 | } |
---|
4708 | #if 1 |
---|
4709 | total = 0.0; |
---|
4710 | for( i=0; i<nseq; i++ ) |
---|
4711 | { |
---|
4712 | total += rootnode[i]; |
---|
4713 | } |
---|
4714 | #else |
---|
4715 | total = 1.0; |
---|
4716 | #endif |
---|
4717 | |
---|
4718 | for( i=0; i<nseq; i++ ) |
---|
4719 | { |
---|
4720 | node[i] = rootnode[i] / total; |
---|
4721 | } |
---|
4722 | |
---|
4723 | #if 0 |
---|
4724 | fprintf( stderr, "weight array in counteff_simple\n" ); |
---|
4725 | for( i=0; i<nseq; i++ ) |
---|
4726 | fprintf( stderr, "%f\n", node[i] ); |
---|
4727 | printf( "\n" ); |
---|
4728 | exit( 1 ); |
---|
4729 | #endif |
---|
4730 | } |
---|
4731 | |
---|
4732 | |
---|
4733 | void counteff_simple_float_nostatic( int nseq, int ***topol, float **len, double *node ) |
---|
4734 | { |
---|
4735 | int i, j, s1, s2; |
---|
4736 | double total; |
---|
4737 | double *rootnode; |
---|
4738 | double *eff; |
---|
4739 | |
---|
4740 | rootnode = AllocateDoubleVec( nseq ); |
---|
4741 | eff = AllocateDoubleVec( nseq ); |
---|
4742 | |
---|
4743 | #if DEBUG |
---|
4744 | for( i=0; i<nseq; i++ ){ |
---|
4745 | fprintf( stderr, "len0 = %f\n", len[i][0] ); |
---|
4746 | fprintf( stderr, "len1 = %f\n", len[i][1] ); |
---|
4747 | } |
---|
4748 | #endif |
---|
4749 | for( i=0; i<nseq; i++ ) |
---|
4750 | { |
---|
4751 | rootnode[i] = 0.0; |
---|
4752 | eff[i] = 1.0; |
---|
4753 | /* |
---|
4754 | rootnode[i] = 1.0; |
---|
4755 | */ |
---|
4756 | } |
---|
4757 | for( i=0; i<nseq-1; i++ ) |
---|
4758 | { |
---|
4759 | for( j=0; (s1=topol[i][0][j]) > -1; j++ ) |
---|
4760 | { |
---|
4761 | rootnode[s1] += (double)len[i][0] * eff[s1]; |
---|
4762 | eff[s1] *= 0.5; |
---|
4763 | /* |
---|
4764 | rootnode[s1] *= 0.5; |
---|
4765 | */ |
---|
4766 | |
---|
4767 | } |
---|
4768 | for( j=0; (s2=topol[i][1][j]) > -1; j++ ) |
---|
4769 | { |
---|
4770 | rootnode[s2] += (double)len[i][1] * eff[s2]; |
---|
4771 | eff[s2] *= 0.5; |
---|
4772 | /* |
---|
4773 | rootnode[s2] *= 0.5; |
---|
4774 | */ |
---|
4775 | |
---|
4776 | } |
---|
4777 | } |
---|
4778 | for( i=0; i<nseq; i++ ) |
---|
4779 | { |
---|
4780 | #if 1 /* 97.9.29 */ |
---|
4781 | rootnode[i] += GETA3; |
---|
4782 | #endif |
---|
4783 | #if 0 |
---|
4784 | fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] ); |
---|
4785 | #endif |
---|
4786 | } |
---|
4787 | #if 1 |
---|
4788 | total = 0.0; |
---|
4789 | for( i=0; i<nseq; i++ ) |
---|
4790 | { |
---|
4791 | total += rootnode[i]; |
---|
4792 | } |
---|
4793 | #else |
---|
4794 | total = 1.0; |
---|
4795 | #endif |
---|
4796 | |
---|
4797 | for( i=0; i<nseq; i++ ) |
---|
4798 | { |
---|
4799 | node[i] = rootnode[i] / total; |
---|
4800 | } |
---|
4801 | |
---|
4802 | #if 0 |
---|
4803 | fprintf( stderr, "weight array in counteff_simple\n" ); |
---|
4804 | for( i=0; i<nseq; i++ ) |
---|
4805 | fprintf( stderr, "%f\n", node[i] ); |
---|
4806 | printf( "\n" ); |
---|
4807 | exit( 1 ); |
---|
4808 | #endif |
---|
4809 | free( rootnode ); |
---|
4810 | free( eff ); |
---|
4811 | } |
---|
4812 | |
---|
4813 | void counteff_simple( int nseq, int ***topol, double **len, double *node ) |
---|
4814 | { |
---|
4815 | int i, j, s1, s2; |
---|
4816 | double total; |
---|
4817 | #if 0 |
---|
4818 | static double rootnode[M]; |
---|
4819 | static double eff[M]; |
---|
4820 | #else |
---|
4821 | double *rootnode; |
---|
4822 | double *eff; |
---|
4823 | rootnode = AllocateDoubleVec( nseq ); |
---|
4824 | eff = AllocateDoubleVec( nseq ); |
---|
4825 | #endif |
---|
4826 | |
---|
4827 | #if DEBUG |
---|
4828 | for( i=0; i<nseq; i++ ){ |
---|
4829 | fprintf( stderr, "len0 = %f\n", len[i][0] ); |
---|
4830 | fprintf( stderr, "len1 = %f\n", len[i][1] ); |
---|
4831 | } |
---|
4832 | #endif |
---|
4833 | for( i=0; i<nseq; i++ ) |
---|
4834 | { |
---|
4835 | rootnode[i] = 0.0; |
---|
4836 | eff[i] = 1.0; |
---|
4837 | /* |
---|
4838 | rootnode[i] = 1.0; |
---|
4839 | */ |
---|
4840 | } |
---|
4841 | for( i=0; i<nseq-1; i++ ) |
---|
4842 | { |
---|
4843 | for( j=0; (s1=topol[i][0][j]) > -1; j++ ) |
---|
4844 | { |
---|
4845 | rootnode[s1] += len[i][0] * eff[s1]; |
---|
4846 | eff[s1] *= 0.5; |
---|
4847 | /* |
---|
4848 | rootnode[s1] *= 0.5; |
---|
4849 | */ |
---|
4850 | |
---|
4851 | } |
---|
4852 | for( j=0; (s2=topol[i][1][j]) > -1; j++ ) |
---|
4853 | { |
---|
4854 | rootnode[s2] += len[i][1] * eff[s2]; |
---|
4855 | eff[s2] *= 0.5; |
---|
4856 | /* |
---|
4857 | rootnode[s2] *= 0.5; |
---|
4858 | */ |
---|
4859 | |
---|
4860 | } |
---|
4861 | } |
---|
4862 | for( i=0; i<nseq; i++ ) |
---|
4863 | { |
---|
4864 | #if 1 /* 97.9.29 */ |
---|
4865 | rootnode[i] += GETA3; |
---|
4866 | #endif |
---|
4867 | #if 0 |
---|
4868 | fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] ); |
---|
4869 | #endif |
---|
4870 | } |
---|
4871 | #if 1 |
---|
4872 | total = 0.0; |
---|
4873 | for( i=0; i<nseq; i++ ) |
---|
4874 | { |
---|
4875 | total += rootnode[i]; |
---|
4876 | } |
---|
4877 | #else |
---|
4878 | total = 1.0; |
---|
4879 | #endif |
---|
4880 | |
---|
4881 | for( i=0; i<nseq; i++ ) |
---|
4882 | { |
---|
4883 | node[i] = rootnode[i] / total; |
---|
4884 | } |
---|
4885 | |
---|
4886 | #if 0 |
---|
4887 | fprintf( stderr, "weight array in counteff_simple\n" ); |
---|
4888 | for( i=0; i<nseq; i++ ) |
---|
4889 | fprintf( stderr, "%f\n", node[i] ); |
---|
4890 | printf( "\n" ); |
---|
4891 | exit( 1 ); |
---|
4892 | #endif |
---|
4893 | #if 1 |
---|
4894 | free( rootnode ); |
---|
4895 | free( eff ); |
---|
4896 | #endif |
---|
4897 | } |
---|
4898 | |
---|
4899 | |
---|
4900 | void counteff( int nseq, int ***topol, double **len, double **node ) |
---|
4901 | { |
---|
4902 | int i, j, k, s1, s2; |
---|
4903 | double rootnode[M]; |
---|
4904 | double eff[M]; |
---|
4905 | |
---|
4906 | if( mix ) |
---|
4907 | { |
---|
4908 | switch( weight ) |
---|
4909 | { |
---|
4910 | case( 2 ): |
---|
4911 | weight = 3; |
---|
4912 | break; |
---|
4913 | case( 3 ): |
---|
4914 | weight = 2; |
---|
4915 | break; |
---|
4916 | default: |
---|
4917 | ErrorExit( "mix error" ); |
---|
4918 | break; |
---|
4919 | } |
---|
4920 | } |
---|
4921 | |
---|
4922 | if( weight == 2 ) |
---|
4923 | { |
---|
4924 | for( i=0; i<nseq; i++ ) rootnode[i] = 0; |
---|
4925 | for( i=0; i<nseq-2; i++ ) |
---|
4926 | { |
---|
4927 | for( j=0; topol[i][0][j]>-1; j++ ) |
---|
4928 | rootnode[topol[i][0][j]]++; |
---|
4929 | for( j=0; topol[i][1][j]>-1; j++ ) |
---|
4930 | rootnode[topol[i][1][j]]++; |
---|
4931 | for( j=0; topol[i][0][j]>-1; j++ ) |
---|
4932 | { |
---|
4933 | s1 = topol[i][0][j]; |
---|
4934 | for( k=0; topol[i][1][k]>-1; k++ ) |
---|
4935 | { |
---|
4936 | s2 = topol[i][1][k]; |
---|
4937 | node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1; |
---|
4938 | } |
---|
4939 | } |
---|
4940 | } |
---|
4941 | for( j=0; topol[nseq-2][0][j]>-1; j++ ) |
---|
4942 | { |
---|
4943 | s1 = topol[nseq-2][0][j]; |
---|
4944 | for( k=0; topol[nseq-2][1][k]>-1; k++ ) |
---|
4945 | { |
---|
4946 | s2 = topol[nseq-2][1][k]; |
---|
4947 | node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2]; |
---|
4948 | } |
---|
4949 | } |
---|
4950 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
4951 | node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2; |
---|
4952 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
4953 | node[j][i] = node[i][j]; |
---|
4954 | } |
---|
4955 | |
---|
4956 | if( weight == 3 ) |
---|
4957 | { |
---|
4958 | #if DEBUG |
---|
4959 | for( i=0; i<nseq; i++ ){ |
---|
4960 | fprintf( stderr, "len0 = %f\n", len[i][0] ); |
---|
4961 | fprintf( stderr, "len1 = %f\n", len[i][1] ); |
---|
4962 | } |
---|
4963 | #endif |
---|
4964 | for( i=0; i<nseq; i++ ) |
---|
4965 | { |
---|
4966 | rootnode[i] = 0.0; |
---|
4967 | eff[i] = 1.0; |
---|
4968 | /* |
---|
4969 | rootnode[i] = 1.0; |
---|
4970 | */ |
---|
4971 | } |
---|
4972 | for( i=0; i<nseq-1; i++ ) |
---|
4973 | { |
---|
4974 | for( j=0; (s1=topol[i][0][j]) > -1; j++ ) |
---|
4975 | { |
---|
4976 | rootnode[s1] += len[i][0] * eff[s1]; |
---|
4977 | eff[s1] *= 0.5; |
---|
4978 | /* |
---|
4979 | rootnode[s1] *= 0.5; |
---|
4980 | */ |
---|
4981 | |
---|
4982 | } |
---|
4983 | for( j=0; (s2=topol[i][1][j]) > -1; j++ ) |
---|
4984 | { |
---|
4985 | rootnode[s2] += len[i][1] * eff[s2]; |
---|
4986 | eff[s2] *= 0.5; |
---|
4987 | /* |
---|
4988 | rootnode[s2] *= 0.5; |
---|
4989 | */ |
---|
4990 | |
---|
4991 | } |
---|
4992 | } |
---|
4993 | for( i=0; i<nseq; i++ ) |
---|
4994 | { |
---|
4995 | #if 1 /* 97.9.29 */ |
---|
4996 | rootnode[i] += GETA3; |
---|
4997 | #endif |
---|
4998 | #if DEBUG |
---|
4999 | fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] ); |
---|
5000 | #endif |
---|
5001 | } |
---|
5002 | for( i=0; i<nseq; i++ ) |
---|
5003 | { |
---|
5004 | for( j=0; j<nseq; j++ ) |
---|
5005 | if( j != i ) |
---|
5006 | node[i][j] = (double)rootnode[i] * rootnode[j]; |
---|
5007 | else node[i][i] = rootnode[i]; |
---|
5008 | } |
---|
5009 | } |
---|
5010 | |
---|
5011 | #if 0 |
---|
5012 | printf( "weight matrix in counteff\n" ); |
---|
5013 | for( i=0; i<nseq; i++ ) |
---|
5014 | { |
---|
5015 | for( j=0; j<nseq; j++ ) |
---|
5016 | { |
---|
5017 | printf( "%f ", node[i][j] ); |
---|
5018 | } |
---|
5019 | printf( "\n" ); |
---|
5020 | } |
---|
5021 | #endif |
---|
5022 | } |
---|
5023 | |
---|
5024 | float score_calcp( char *seq1, char *seq2, int len ) |
---|
5025 | { |
---|
5026 | int k; |
---|
5027 | int ms1, ms2; |
---|
5028 | float tmpscore; |
---|
5029 | int len2 = len - 2; |
---|
5030 | |
---|
5031 | tmpscore = 0.0; |
---|
5032 | for( k=0; k<len; k++ ) |
---|
5033 | { |
---|
5034 | ms1 = (int)seq1[k]; |
---|
5035 | ms2 = (int)seq2[k]; |
---|
5036 | if( ms1 == (int)'-' && ms2 == (int)'-' ) continue; |
---|
5037 | tmpscore += (float)amino_dis[ms1][ms2]; |
---|
5038 | |
---|
5039 | if( ms1 == (int)'-' ) |
---|
5040 | { |
---|
5041 | tmpscore += (float)penalty; |
---|
5042 | tmpscore += (float)amino_dis[ms1][ms2]; |
---|
5043 | while( (ms1=(int)seq1[++k]) == (int)'-' ) |
---|
5044 | tmpscore += (float)amino_dis[ms1][ms2]; |
---|
5045 | k--; |
---|
5046 | if( k >len2 ) break; |
---|
5047 | continue; |
---|
5048 | } |
---|
5049 | if( ms2 == (int)'-' ) |
---|
5050 | { |
---|
5051 | tmpscore += (float)penalty; |
---|
5052 | tmpscore += (float)amino_dis[ms1][ms2]; |
---|
5053 | while( (ms2=(int)seq2[++k]) == (int)'-' ) |
---|
5054 | tmpscore += (float)amino_dis[ms1][ms2]; |
---|
5055 | k--; |
---|
5056 | if( k > len2 ) break; |
---|
5057 | continue; |
---|
5058 | } |
---|
5059 | } |
---|
5060 | return( tmpscore ); |
---|
5061 | } |
---|
5062 | |
---|
5063 | float score_calc1( char *seq1, char *seq2 ) /* method 1 */ |
---|
5064 | { |
---|
5065 | int k; |
---|
5066 | float score = 0.0; |
---|
5067 | int count = 0; |
---|
5068 | int len = strlen( seq1 ); |
---|
5069 | |
---|
5070 | for( k=0; k<len; k++ ) |
---|
5071 | { |
---|
5072 | if( seq1[k] != '-' && seq2[k] != '-' ) |
---|
5073 | { |
---|
5074 | score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]]; |
---|
5075 | count++; |
---|
5076 | } |
---|
5077 | } |
---|
5078 | if( count ) score /= (float)count; |
---|
5079 | else score = 1.0; |
---|
5080 | return( score ); |
---|
5081 | } |
---|
5082 | |
---|
5083 | float substitution_nid( char *seq1, char *seq2 ) |
---|
5084 | { |
---|
5085 | int k; |
---|
5086 | float s12; |
---|
5087 | int len = strlen( seq1 ); |
---|
5088 | |
---|
5089 | s12 = 0.0; |
---|
5090 | for( k=0; k<len; k++ ) |
---|
5091 | if( seq1[k] != '-' && seq2[k] != '-' ) |
---|
5092 | s12 += ( seq1[k] == seq2[k] ); |
---|
5093 | |
---|
5094 | // fprintf( stdout, "s12 = %f\n", s12 ); |
---|
5095 | return( s12 ); |
---|
5096 | } |
---|
5097 | |
---|
5098 | float substitution_score( char *seq1, char *seq2 ) |
---|
5099 | { |
---|
5100 | int k; |
---|
5101 | float s12; |
---|
5102 | int len = strlen( seq1 ); |
---|
5103 | |
---|
5104 | s12 = 0.0; |
---|
5105 | for( k=0; k<len; k++ ) |
---|
5106 | if( seq1[k] != '-' && seq2[k] != '-' ) |
---|
5107 | s12 += amino_dis[(int)seq1[k]][(int)seq2[k]]; |
---|
5108 | |
---|
5109 | // fprintf( stdout, "s12 = %f\n", s12 ); |
---|
5110 | return( s12 ); |
---|
5111 | } |
---|
5112 | |
---|
5113 | float substitution_hosei( char *seq1, char *seq2 ) /* method 1 */ |
---|
5114 | #if 0 |
---|
5115 | { |
---|
5116 | int k; |
---|
5117 | float score = 0.0; |
---|
5118 | int count = 0; |
---|
5119 | int len = strlen( seq1 ); |
---|
5120 | |
---|
5121 | for( k=0; k<len; k++ ) |
---|
5122 | { |
---|
5123 | if( seq1[k] != '-' && seq2[k] != '-' ) |
---|
5124 | { |
---|
5125 | score += (float)( seq1[k] != seq2[k] ); |
---|
5126 | count++; |
---|
5127 | } |
---|
5128 | } |
---|
5129 | if( count ) score /= (float)count; |
---|
5130 | else score = 1.0; |
---|
5131 | if( score < 0.95 ) score = - log( 1.0 - score ); |
---|
5132 | else score = 3.0; |
---|
5133 | return( score ); |
---|
5134 | } |
---|
5135 | #else |
---|
5136 | { |
---|
5137 | int count = 0; |
---|
5138 | float score; |
---|
5139 | int iscore = 0; |
---|
5140 | char s1, s2; |
---|
5141 | |
---|
5142 | while( (s1=*seq1++) ) |
---|
5143 | { |
---|
5144 | s2 = *seq2++; |
---|
5145 | if( s1 == '-' ) continue; |
---|
5146 | if( s2 == '-' ) continue; |
---|
5147 | iscore += ( s1 != s2 ); |
---|
5148 | count++; |
---|
5149 | } |
---|
5150 | if( count ) score = (float)iscore / count; |
---|
5151 | else score = 1.0; |
---|
5152 | if( score < 0.95 ) score = - log( 1.0 - score ); |
---|
5153 | else score = 3.0; |
---|
5154 | return( score ); |
---|
5155 | } |
---|
5156 | #endif |
---|
5157 | |
---|
5158 | float substitution( char *seq1, char *seq2 ) /* method 1 */ |
---|
5159 | { |
---|
5160 | int k; |
---|
5161 | float score = 0.0; |
---|
5162 | int count = 0; |
---|
5163 | int len = strlen( seq1 ); |
---|
5164 | |
---|
5165 | for( k=0; k<len; k++ ) |
---|
5166 | { |
---|
5167 | if( seq1[k] != '-' && seq2[k] != '-' ) |
---|
5168 | { |
---|
5169 | score += (float)( seq1[k] != seq2[k] ); |
---|
5170 | count++; |
---|
5171 | } |
---|
5172 | } |
---|
5173 | if( count ) score /= (float)count; |
---|
5174 | else score = 1.0; |
---|
5175 | return( score ); |
---|
5176 | } |
---|
5177 | |
---|
5178 | |
---|
5179 | void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff ) |
---|
5180 | { |
---|
5181 | int i, j; |
---|
5182 | |
---|
5183 | if( weight > 1 ) |
---|
5184 | { |
---|
5185 | if( utree == 0 ) |
---|
5186 | { |
---|
5187 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
5188 | { |
---|
5189 | /* |
---|
5190 | eff[i][j] = (double)score_calc1( seq[i], seq[j] ); |
---|
5191 | */ |
---|
5192 | eff[i][j] = (double)substitution_hosei( seq[i], seq[j] ); |
---|
5193 | /* |
---|
5194 | fprintf( stderr, "%f\n", eff[i][j] ); |
---|
5195 | */ |
---|
5196 | } |
---|
5197 | /* |
---|
5198 | fprintf( stderr, "distance matrix\n" ); |
---|
5199 | for( i=0; i<nseq; i++ ) |
---|
5200 | { |
---|
5201 | for( j=0; j<nseq; j++ ) |
---|
5202 | { |
---|
5203 | fprintf( stderr, "%f ", eff[i][j] ); |
---|
5204 | } |
---|
5205 | fprintf( stderr, "\n" ); |
---|
5206 | } |
---|
5207 | */ |
---|
5208 | /* |
---|
5209 | upg( nseq, eff, topol, len ); |
---|
5210 | upg2( nseq, eff, topol, len ); |
---|
5211 | */ |
---|
5212 | spg( nseq, eff, topol, len ); |
---|
5213 | counteff( nseq, topol, len, eff ); |
---|
5214 | } |
---|
5215 | } |
---|
5216 | else |
---|
5217 | { |
---|
5218 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) |
---|
5219 | eff[i][j] = 1.0; |
---|
5220 | } |
---|
5221 | /* |
---|
5222 | fprintf( stderr, "weight matrix\n" ); |
---|
5223 | for( i=0; i<nseq; i++ ) |
---|
5224 | { |
---|
5225 | for( j=0; j<nseq; j++ ) |
---|
5226 | { |
---|
5227 | fprintf( stderr, "%f ", eff[i][j] ); |
---|
5228 | } |
---|
5229 | fprintf( stderr, "\n" ); |
---|
5230 | } |
---|
5231 | */ |
---|
5232 | } |
---|
5233 | |
---|
5234 | float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */ |
---|
5235 | { |
---|
5236 | int i, j, k; |
---|
5237 | int gb1, gb2, gc1, gc2; |
---|
5238 | int cob; |
---|
5239 | int nglen; |
---|
5240 | int len = strlen( seq[0] ); |
---|
5241 | long score; |
---|
5242 | |
---|
5243 | score = 0; |
---|
5244 | nglen = 0; |
---|
5245 | for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) |
---|
5246 | { |
---|
5247 | double efficient = eff[i][j]; |
---|
5248 | |
---|
5249 | gc1 = 0; |
---|
5250 | gc2 = 0; |
---|
5251 | for( k=0; k<len; k++ ) |
---|
5252 | { |
---|
5253 | gb1 = gc1; |
---|
5254 | gb2 = gc2; |
---|
5255 | |
---|
5256 | gc1 = ( seq[i][k] == '-' ); |
---|
5257 | gc2 = ( seq[j][k] == '-' ); |
---|
5258 | |
---|
5259 | cob = |
---|
5260 | !gb1 * gc1 |
---|
5261 | * !gb2 * !gc2 |
---|
5262 | |
---|
5263 | + !gb1 * !gc1 |
---|
5264 | * !gb2 * gc2 |
---|
5265 | |
---|
5266 | + !gb1 * gc1 |
---|
5267 | * gb2 * !gc2 |
---|
5268 | |
---|
5269 | + gb1 * !gc1 |
---|
5270 | * !gb2 * gc2 |
---|
5271 | |
---|
5272 | + gb1 * !gc1 |
---|
5273 | * gb2 * gc2 *BEFF |
---|
5274 | |
---|
5275 | + gb1 * gc1 |
---|
5276 | * gb2 * !gc2 *BEFF |
---|
5277 | ; |
---|
5278 | score += (long)cob * penalty * efficient; |
---|
5279 | score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient; |
---|
5280 | nglen += ( !gc1 * !gc2 ); |
---|
5281 | } |
---|
5282 | } |
---|
5283 | return( (float)score / nglen + 400.0 * !scoremtx ); |
---|
5284 | } |
---|
5285 | |
---|
5286 | void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax ) |
---|
5287 | { |
---|
5288 | *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 ); |
---|
5289 | *mseq1pt = AllocateCharVec( locnlenmax+1 ); |
---|
5290 | } |
---|
5291 | |
---|
5292 | void FreeTmpSeqs( char **mseq2, char *mseq1 ) |
---|
5293 | { |
---|
5294 | FreeCharMtx( mseq2 ); |
---|
5295 | free( (char *)mseq1 ); |
---|
5296 | } |
---|
5297 | |
---|
5298 | |
---|
5299 | void gappick0( char *aseq, char *seq ) |
---|
5300 | { |
---|
5301 | for( ; *seq != 0; seq++ ) |
---|
5302 | { |
---|
5303 | if( *seq != '-' ) |
---|
5304 | *aseq++ = *seq; |
---|
5305 | } |
---|
5306 | *aseq = 0; |
---|
5307 | |
---|
5308 | } |
---|
5309 | |
---|
5310 | void gappick( int nseq, int s, char **aseq, char **mseq2, |
---|
5311 | double **eff, double *effarr ) |
---|
5312 | { |
---|
5313 | int i, j, count, countjob, len, allgap; |
---|
5314 | len = strlen( aseq[0] ); |
---|
5315 | for( i=0, count=0; i<len; i++ ) |
---|
5316 | { |
---|
5317 | allgap = 1; |
---|
5318 | for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' ); |
---|
5319 | if( allgap == 0 ) |
---|
5320 | { |
---|
5321 | for( j=0, countjob=0; j<nseq; j++ ) |
---|
5322 | { |
---|
5323 | if( j != s ) |
---|
5324 | { |
---|
5325 | mseq2[countjob][count] = aseq[j][i]; |
---|
5326 | countjob++; |
---|
5327 | } |
---|
5328 | } |
---|
5329 | count++; |
---|
5330 | } |
---|
5331 | } |
---|
5332 | for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0; |
---|
5333 | |
---|
5334 | for( i=0, countjob=0; i<nseq; i++ ) |
---|
5335 | { |
---|
5336 | if( i != s ) |
---|
5337 | { |
---|
5338 | effarr[countjob] = eff[s][i]; |
---|
5339 | countjob++; |
---|
5340 | } |
---|
5341 | } |
---|
5342 | /* |
---|
5343 | fprintf( stdout, "effarr in gappick s = %d\n", s+1 ); |
---|
5344 | for( i=0; i<countjob; i++ ) |
---|
5345 | fprintf( stdout, " %f", effarr[i] ); |
---|
5346 | printf( "\n" ); |
---|
5347 | */ |
---|
5348 | } |
---|
5349 | |
---|
5350 | void commongappick_record( int nseq, char **seq, int *map ) |
---|
5351 | { |
---|
5352 | int i, j, count; |
---|
5353 | int len = strlen( seq[0] ); |
---|
5354 | |
---|
5355 | |
---|
5356 | for( i=0, count=0; i<=len; i++ ) |
---|
5357 | { |
---|
5358 | /* |
---|
5359 | allgap = 1; |
---|
5360 | for( j=0; j<nseq; j++ ) |
---|
5361 | allgap *= ( seq[j][i] == '-' ); |
---|
5362 | if( !allgap ) |
---|
5363 | */ |
---|
5364 | for( j=0; j<nseq; j++ ) |
---|
5365 | if( seq[j][i] != '-' ) break; |
---|
5366 | if( j != nseq ) |
---|
5367 | { |
---|
5368 | for( j=0; j<nseq; j++ ) |
---|
5369 | { |
---|
5370 | seq[j][count] = seq[j][i]; |
---|
5371 | } |
---|
5372 | map[count] = i; |
---|
5373 | count++; |
---|
5374 | } |
---|
5375 | } |
---|
5376 | } |
---|
5377 | |
---|
5378 | void commongappick( int nseq, char **seq ) |
---|
5379 | { |
---|
5380 | int i, j, count; |
---|
5381 | int len = strlen( seq[0] ); |
---|
5382 | |
---|
5383 | for( i=0, count=0; i<=len; i++ ) |
---|
5384 | { |
---|
5385 | /* |
---|
5386 | allgap = 1; |
---|
5387 | for( j=0; j<nseq; j++ ) |
---|
5388 | allgap *= ( seq[j][i] == '-' ); |
---|
5389 | if( !allgap ) |
---|
5390 | */ |
---|
5391 | for( j=0; j<nseq; j++ ) |
---|
5392 | if( seq[j][i] != '-' ) break; |
---|
5393 | if( j != nseq ) |
---|
5394 | { |
---|
5395 | for( j=0; j<nseq; j++ ) |
---|
5396 | { |
---|
5397 | seq[j][count] = seq[j][i]; |
---|
5398 | } |
---|
5399 | count++; |
---|
5400 | } |
---|
5401 | } |
---|
5402 | } |
---|
5403 | |
---|
5404 | double score_calc0( char **seq, int s, double **eff, int ex ) |
---|
5405 | { |
---|
5406 | double tmp; |
---|
5407 | |
---|
5408 | if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex ); |
---|
5409 | if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex ); |
---|
5410 | else tmp = score_calc5( seq, s, eff, ex ); |
---|
5411 | |
---|
5412 | return( tmp ); |
---|
5413 | |
---|
5414 | } |
---|
5415 | |
---|
5416 | /* |
---|
5417 | float score_m_1( char **seq, int ex, double **eff ) |
---|
5418 | { |
---|
5419 | int i, j, k; |
---|
5420 | int len = strlen( seq[0] ); |
---|
5421 | int gb1, gb2, gc1, gc2; |
---|
5422 | int cob; |
---|
5423 | int nglen; |
---|
5424 | double score; |
---|
5425 | |
---|
5426 | score = 0.0; |
---|
5427 | nglen = 0; |
---|
5428 | for( i=0; i<njob; i++ ) |
---|
5429 | { |
---|
5430 | double efficient = eff[MIN(i,ex)][MAX(i,ex)]; |
---|
5431 | if( i == ex ) continue; |
---|
5432 | |
---|
5433 | gc1 = 0; |
---|
5434 | gc2 = 0; |
---|
5435 | for( k=0; k<len; k++ ) |
---|
5436 | { |
---|
5437 | gb1 = gc1; |
---|
5438 | gb2 = gc2; |
---|
5439 | |
---|
5440 | gc1 = ( seq[i][k] == '-' ); |
---|
5441 | gc2 = ( seq[ex][k] == '-' ); |
---|
5442 | |
---|
5443 | cob = |
---|
5444 | !gb1 * gc1 |
---|
5445 | * !gb2 * !gc2 |
---|
5446 | |
---|
5447 | + !gb1 * !gc1 |
---|
5448 | * !gb2 * gc2 |
---|
5449 | |
---|
5450 | + !gb1 * gc1 |
---|
5451 | * gb2 * !gc2 |
---|
5452 | |
---|
5453 | + gb1 * !gc1 |
---|
5454 | * !gb2 * gc2 |
---|
5455 | |
---|
5456 | + gb1 * !gc1 |
---|
5457 | * gb2 * gc2 *BEFF |
---|
5458 | |
---|
5459 | + gb1 * gc1 |
---|
5460 | * gb2 * !gc2 *BEFF |
---|
5461 | ; |
---|
5462 | score += (double)cob * penalty * efficient; |
---|
5463 | score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient; |
---|
5464 | * |
---|
5465 | nglen += ( !gc1 * !gc2 ); |
---|
5466 | * |
---|
5467 | if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score ); |
---|
5468 | } |
---|
5469 | } |
---|
5470 | return( (float)score / nglen + 400.0 * !scoremtx ); |
---|
5471 | } |
---|
5472 | */ |
---|
5473 | |
---|
5474 | #if 0 |
---|
5475 | void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] ) |
---|
5476 | { |
---|
5477 | int i, j, k; |
---|
5478 | int len = strlen( seq[0] ); |
---|
5479 | double tmp; |
---|
5480 | double count; |
---|
5481 | int ch; |
---|
5482 | double sco[N]; |
---|
5483 | |
---|
5484 | for( i=0; i<len; i++ ) |
---|
5485 | { |
---|
5486 | tmp = 0.0; count = 0; |
---|
5487 | for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) |
---|
5488 | { |
---|
5489 | /* |
---|
5490 | if( seq[j][i] != '-' && seq[k][i] != '-' ) |
---|
5491 | */ |
---|
5492 | { |
---|
5493 | tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx; |
---|
5494 | count++; |
---|
5495 | } |
---|
5496 | } |
---|
5497 | if( count > 0.0 ) tmp /= count; |
---|
5498 | else( tmp = 0.0 ); |
---|
5499 | ch = (int)( tmp/100.0 - 0.000001 ); |
---|
5500 | sprintf( sco1+i, "%c", ch+0x61 ); |
---|
5501 | } |
---|
5502 | sco1[len] = 0; |
---|
5503 | |
---|
5504 | for( i=0; i<len; i++ ) |
---|
5505 | { |
---|
5506 | tmp = 0.0; count = 0; |
---|
5507 | for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ ) |
---|
5508 | { |
---|
5509 | /* |
---|
5510 | if( seq[j][i] != '-' && seq[k][i] != '-' ) |
---|
5511 | */ |
---|
5512 | { |
---|
5513 | tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx ); |
---|
5514 | count += eff[j][k]; |
---|
5515 | } |
---|
5516 | } |
---|
5517 | if( count > 0.0 ) tmp /= count; |
---|
5518 | else( tmp = 0.0 ); |
---|
5519 | tmp = ( tmp - 400 * !scoremtx ) * 2; |
---|
5520 | if( tmp < 0 ) tmp = 0; |
---|
5521 | ch = (int)( tmp/100.0 - 0.000001 ); |
---|
5522 | sprintf( sco2+i, "%c", ch+0x61 ); |
---|
5523 | sco[i] = tmp; |
---|
5524 | } |
---|
5525 | sco2[len] = 0; |
---|
5526 | |
---|
5527 | for( i=WIN; i<len-WIN; i++ ) |
---|
5528 | { |
---|
5529 | tmp = 0.0; |
---|
5530 | for( j=i-WIN; j<=i+WIN; j++ ) |
---|
5531 | { |
---|
5532 | tmp += sco[j]; |
---|
5533 | } |
---|
5534 | for( j=0; j<njob; j++ ) |
---|
5535 | { |
---|
5536 | if( seq[j][i] == '-' ) |
---|
5537 | { |
---|
5538 | tmp = 0.0; |
---|
5539 | break; |
---|
5540 | } |
---|
5541 | } |
---|
5542 | tmp /= WIN * 2 + 1; |
---|
5543 | ch = (int)( tmp/100.0 - 0.0000001 ); |
---|
5544 | sprintf( sco3+i, "%c", ch+0x61 ); |
---|
5545 | } |
---|
5546 | for( i=0; i<WIN; i++ ) sco3[i] = '-'; |
---|
5547 | for( i=len-WIN; i<len; i++ ) sco3[i] = '-'; |
---|
5548 | sco3[len] = 0; |
---|
5549 | } |
---|
5550 | #endif |
---|
5551 | |
---|
5552 | void strins( char *str1, char *str2 ) |
---|
5553 | { |
---|
5554 | char *bk; |
---|
5555 | int len1 = strlen( str1 ); |
---|
5556 | int len2 = strlen( str2 ); |
---|
5557 | |
---|
5558 | bk = str2; |
---|
5559 | str2 += len1+len2; |
---|
5560 | str1 += len1-1; |
---|
5561 | |
---|
5562 | while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog |
---|
5563 | while( str2 >= bk ) { *str2-- = *str1--; } |
---|
5564 | } |
---|
5565 | |
---|
5566 | int isaligned( int nseq, char **seq ) |
---|
5567 | { |
---|
5568 | int i; |
---|
5569 | int len = strlen( seq[0] ); |
---|
5570 | for( i=1; i<nseq; i++ ) |
---|
5571 | { |
---|
5572 | if( strlen( seq[i] ) != len ) return( 0 ); |
---|
5573 | } |
---|
5574 | return( 1 ); |
---|
5575 | } |
---|
5576 | |
---|
5577 | double score_calc_for_score( int nseq, char **seq ) |
---|
5578 | { |
---|
5579 | int i, j, k, c; |
---|
5580 | int len = strlen( seq[0] ); |
---|
5581 | double score; |
---|
5582 | double tmpscore; |
---|
5583 | char *mseq1, *mseq2; |
---|
5584 | |
---|
5585 | score = 0.0; |
---|
5586 | for( i=0; i<nseq-1; i++ ) |
---|
5587 | { |
---|
5588 | for( j=i+1; j<nseq; j++ ) |
---|
5589 | { |
---|
5590 | mseq1 = seq[i]; |
---|
5591 | mseq2 = seq[j]; |
---|
5592 | tmpscore = 0.0; |
---|
5593 | c = 0; |
---|
5594 | for( k=0; k<len; k++ ) |
---|
5595 | { |
---|
5596 | if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; |
---|
5597 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
5598 | c++; |
---|
5599 | if( mseq1[k] == '-' ) |
---|
5600 | { |
---|
5601 | tmpscore += penalty - n_dis[0][24]; |
---|
5602 | while( mseq1[++k] == '-' ) |
---|
5603 | ; |
---|
5604 | k--; |
---|
5605 | if( k > len-2 ) break; |
---|
5606 | continue; |
---|
5607 | } |
---|
5608 | if( mseq2[k] == '-' ) |
---|
5609 | { |
---|
5610 | tmpscore += penalty - n_dis[0][24]; |
---|
5611 | while( mseq2[++k] == '-' ) |
---|
5612 | ; |
---|
5613 | k--; |
---|
5614 | if( k > len-2 ) break; |
---|
5615 | continue; |
---|
5616 | } |
---|
5617 | } |
---|
5618 | score += (double)tmpscore / (double)c; |
---|
5619 | #if DEBUG |
---|
5620 | printf( "tmpscore in mltaln9.c = %f\n", tmpscore ); |
---|
5621 | printf( "tmpscore / c = %f\n", tmpscore/(double)c ); |
---|
5622 | #endif |
---|
5623 | } |
---|
5624 | } |
---|
5625 | fprintf( stderr, "raw score = %f\n", score ); |
---|
5626 | score /= (double)nseq * ( nseq-1.0 ) / 2.0; |
---|
5627 | score += 400.0; |
---|
5628 | #if DEBUG |
---|
5629 | printf( "score in mltaln9.c = %f\n", score ); |
---|
5630 | #endif |
---|
5631 | return( (double)score ); |
---|
5632 | } |
---|
5633 | |
---|
5634 | void floatncpy( float *vec1, float *vec2, int len ) |
---|
5635 | { |
---|
5636 | while( len-- ) |
---|
5637 | *vec1++ = *vec2++; |
---|
5638 | } |
---|
5639 | |
---|
5640 | float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */ |
---|
5641 | { |
---|
5642 | int i, j, k; |
---|
5643 | int gb1, gb2, gc1, gc2; |
---|
5644 | int cob; |
---|
5645 | int nglen; |
---|
5646 | int len = strlen( seq[0] ); |
---|
5647 | float score; |
---|
5648 | |
---|
5649 | score = 0; |
---|
5650 | nglen = 0; |
---|
5651 | for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) |
---|
5652 | { |
---|
5653 | double efficient = eff[i][j]; |
---|
5654 | |
---|
5655 | gc1 = 0; |
---|
5656 | gc2 = 0; |
---|
5657 | for( k=0; k<len; k++ ) |
---|
5658 | { |
---|
5659 | gb1 = gc1; |
---|
5660 | gb2 = gc2; |
---|
5661 | |
---|
5662 | gc1 = ( seq[i][k] == '-' ); |
---|
5663 | gc2 = ( seq[j][k] == '-' ); |
---|
5664 | |
---|
5665 | cob = |
---|
5666 | !gb1 * gc1 |
---|
5667 | * !gb2 * !gc2 |
---|
5668 | |
---|
5669 | + gb1 * !gc1 |
---|
5670 | * !gb2 * !gc2 |
---|
5671 | |
---|
5672 | + !gb1 * !gc1 |
---|
5673 | * !gb2 * gc2 |
---|
5674 | |
---|
5675 | + !gb1 * !gc1 |
---|
5676 | * gb2 * !gc2 |
---|
5677 | |
---|
5678 | + !gb1 * gc1 |
---|
5679 | * gb2 * !gc2 |
---|
5680 | |
---|
5681 | + gb1 * !gc1 |
---|
5682 | * !gb2 * gc2 |
---|
5683 | |
---|
5684 | + gb1 * !gc1 |
---|
5685 | * gb2 * gc2 |
---|
5686 | |
---|
5687 | + gb1 * gc1 |
---|
5688 | * gb2 * !gc2 |
---|
5689 | |
---|
5690 | + !gb1 * gc1 |
---|
5691 | * gb2 * gc2 |
---|
5692 | |
---|
5693 | + gb1 * gc1 |
---|
5694 | * !gb2 * gc2 |
---|
5695 | ; |
---|
5696 | score += 0.5 * (float)cob * penalty * efficient; |
---|
5697 | score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient; |
---|
5698 | nglen += ( !gc1 * !gc2 ); |
---|
5699 | } |
---|
5700 | } |
---|
5701 | return( (float)score / nglen + 400.0 * !scoremtx ); |
---|
5702 | } |
---|
5703 | |
---|
5704 | |
---|
5705 | float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */ |
---|
5706 | { |
---|
5707 | int i, j, k; |
---|
5708 | int gb1, gb2, gc1, gc2; |
---|
5709 | int cob; |
---|
5710 | int nglen; |
---|
5711 | int len = strlen( seq[0] ); |
---|
5712 | float score; |
---|
5713 | |
---|
5714 | score = 0; |
---|
5715 | nglen = 0; |
---|
5716 | for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) |
---|
5717 | { |
---|
5718 | double efficient = eff[i][j]; |
---|
5719 | |
---|
5720 | gc1 = 0; |
---|
5721 | gc2 = 0; |
---|
5722 | for( k=0; k<len; k++ ) |
---|
5723 | { |
---|
5724 | gb1 = gc1; |
---|
5725 | gb2 = gc2; |
---|
5726 | |
---|
5727 | gc1 = ( seq[i][k] == '-' ); |
---|
5728 | gc2 = ( seq[j][k] == '-' ); |
---|
5729 | |
---|
5730 | cob = |
---|
5731 | !gb1 * gc1 |
---|
5732 | * !gb2 * !gc2 |
---|
5733 | |
---|
5734 | + gb1 * !gc1 |
---|
5735 | * !gb2 * !gc2 |
---|
5736 | |
---|
5737 | + !gb1 * !gc1 |
---|
5738 | * !gb2 * gc2 |
---|
5739 | |
---|
5740 | + !gb1 * !gc1 |
---|
5741 | * gb2 * !gc2 |
---|
5742 | |
---|
5743 | + !gb1 * gc1 |
---|
5744 | * gb2 * !gc2 |
---|
5745 | |
---|
5746 | + gb1 * !gc1 |
---|
5747 | * !gb2 * gc2 |
---|
5748 | |
---|
5749 | #if 0 |
---|
5750 | + gb1 * !gc1 |
---|
5751 | * gb2 * gc2 |
---|
5752 | |
---|
5753 | + gb1 * gc1 |
---|
5754 | * gb2 * !gc2 |
---|
5755 | |
---|
5756 | + !gb1 * gc1 |
---|
5757 | * gb2 * gc2 |
---|
5758 | |
---|
5759 | + gb1 * gc1 |
---|
5760 | * !gb2 * gc2 |
---|
5761 | #endif |
---|
5762 | ; |
---|
5763 | score += 0.5 * (float)cob * penalty * efficient; |
---|
5764 | score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient; |
---|
5765 | nglen += ( !gc1 * !gc2 ); |
---|
5766 | } |
---|
5767 | } |
---|
5768 | return( (float)score / nglen + 400.0 ); |
---|
5769 | } |
---|
5770 | |
---|
5771 | double score_calc_for_score_s( int s, char **seq ) /* algorithm S */ |
---|
5772 | { |
---|
5773 | int i, j, k; |
---|
5774 | int gb1, gb2, gc1, gc2; |
---|
5775 | int cob; |
---|
5776 | int nglen; |
---|
5777 | int len = strlen( seq[0] ); |
---|
5778 | float score; |
---|
5779 | |
---|
5780 | score = 0; |
---|
5781 | nglen = 0; |
---|
5782 | for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) |
---|
5783 | { |
---|
5784 | |
---|
5785 | gc1 = 0; |
---|
5786 | gc2 = 0; |
---|
5787 | for( k=0; k<len; k++ ) |
---|
5788 | { |
---|
5789 | gb1 = gc1; |
---|
5790 | gb2 = gc2; |
---|
5791 | |
---|
5792 | gc1 = ( seq[i][k] == '-' ); |
---|
5793 | gc2 = ( seq[j][k] == '-' ); |
---|
5794 | |
---|
5795 | cob = |
---|
5796 | !gb1 * gc1 |
---|
5797 | * !gb2 * !gc2 |
---|
5798 | |
---|
5799 | + gb1 * !gc1 |
---|
5800 | * !gb2 * !gc2 |
---|
5801 | |
---|
5802 | + !gb1 * !gc1 |
---|
5803 | * !gb2 * gc2 |
---|
5804 | |
---|
5805 | + !gb1 * !gc1 |
---|
5806 | * gb2 * !gc2 |
---|
5807 | |
---|
5808 | + !gb1 * gc1 |
---|
5809 | * gb2 * !gc2 |
---|
5810 | |
---|
5811 | + gb1 * !gc1 |
---|
5812 | * !gb2 * gc2 |
---|
5813 | |
---|
5814 | #if 0 |
---|
5815 | + gb1 * !gc1 |
---|
5816 | * gb2 * gc2 |
---|
5817 | |
---|
5818 | + gb1 * gc1 |
---|
5819 | * gb2 * !gc2 |
---|
5820 | |
---|
5821 | + !gb1 * gc1 |
---|
5822 | * gb2 * gc2 |
---|
5823 | |
---|
5824 | + gb1 * gc1 |
---|
5825 | * !gb2 * gc2 |
---|
5826 | #endif |
---|
5827 | ; |
---|
5828 | score += 0.5 * (float)cob * penalty; |
---|
5829 | score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]]; |
---|
5830 | nglen += ( !gc1 * !gc2 ); |
---|
5831 | } |
---|
5832 | #if 0 |
---|
5833 | fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 ); |
---|
5834 | fprintf( stderr, "score = %f\n", score ); |
---|
5835 | #endif |
---|
5836 | } |
---|
5837 | return( (double)score / nglen + 400.0 ); |
---|
5838 | } |
---|
5839 | |
---|
5840 | double SSPscore___( int s, char **seq, int ex ) /* algorithm S */ |
---|
5841 | { |
---|
5842 | int i, j, k; |
---|
5843 | int gb1, gb2, gc1, gc2; |
---|
5844 | int cob; |
---|
5845 | int nglen; |
---|
5846 | int len = strlen( seq[0] ); |
---|
5847 | float score; |
---|
5848 | |
---|
5849 | score = 0; |
---|
5850 | nglen = 0; |
---|
5851 | i=ex; for( j=0; j<s; j++ ) |
---|
5852 | { |
---|
5853 | |
---|
5854 | if( j == ex ) continue; |
---|
5855 | |
---|
5856 | gc1 = 0; |
---|
5857 | gc2 = 0; |
---|
5858 | for( k=0; k<len; k++ ) |
---|
5859 | { |
---|
5860 | gb1 = gc1; |
---|
5861 | gb2 = gc2; |
---|
5862 | |
---|
5863 | gc1 = ( seq[i][k] == '-' ); |
---|
5864 | gc2 = ( seq[j][k] == '-' ); |
---|
5865 | |
---|
5866 | cob = |
---|
5867 | !gb1 * gc1 |
---|
5868 | * !gb2 * !gc2 |
---|
5869 | |
---|
5870 | + gb1 * !gc1 |
---|
5871 | * !gb2 * !gc2 |
---|
5872 | |
---|
5873 | + !gb1 * !gc1 |
---|
5874 | * !gb2 * gc2 |
---|
5875 | |
---|
5876 | + !gb1 * !gc1 |
---|
5877 | * gb2 * !gc2 |
---|
5878 | |
---|
5879 | + !gb1 * gc1 |
---|
5880 | * gb2 * !gc2 * 2.0 |
---|
5881 | |
---|
5882 | + gb1 * !gc1 |
---|
5883 | * !gb2 * gc2 * 2.0 |
---|
5884 | |
---|
5885 | #if 0 |
---|
5886 | + gb1 * !gc1 |
---|
5887 | * gb2 * gc2 |
---|
5888 | |
---|
5889 | + gb1 * gc1 |
---|
5890 | * gb2 * !gc2 |
---|
5891 | |
---|
5892 | + !gb1 * gc1 |
---|
5893 | * gb2 * gc2 |
---|
5894 | |
---|
5895 | + gb1 * gc1 |
---|
5896 | * !gb2 * gc2 |
---|
5897 | #endif |
---|
5898 | ; |
---|
5899 | score += 0.5 * (float)cob * penalty; |
---|
5900 | score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]]; |
---|
5901 | nglen += ( !gc1 * !gc2 ); /* tsukawanai */ |
---|
5902 | } |
---|
5903 | #if 0 |
---|
5904 | fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 ); |
---|
5905 | fprintf( stderr, "score = %f\n", score ); |
---|
5906 | #endif |
---|
5907 | } |
---|
5908 | return( (double)score ); |
---|
5909 | } |
---|
5910 | |
---|
5911 | double SSPscore( int s, char **seq ) /* algorithm S */ |
---|
5912 | { |
---|
5913 | int i, j, k; |
---|
5914 | int gb1, gb2, gc1, gc2; |
---|
5915 | int cob; |
---|
5916 | int nglen; |
---|
5917 | int len = strlen( seq[0] ); |
---|
5918 | float score; |
---|
5919 | |
---|
5920 | score = 0; |
---|
5921 | nglen = 0; |
---|
5922 | for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ ) |
---|
5923 | { |
---|
5924 | |
---|
5925 | gc1 = 0; |
---|
5926 | gc2 = 0; |
---|
5927 | for( k=0; k<len; k++ ) |
---|
5928 | { |
---|
5929 | gb1 = gc1; |
---|
5930 | gb2 = gc2; |
---|
5931 | |
---|
5932 | gc1 = ( seq[i][k] == '-' ); |
---|
5933 | gc2 = ( seq[j][k] == '-' ); |
---|
5934 | |
---|
5935 | cob = |
---|
5936 | !gb1 * gc1 |
---|
5937 | * !gb2 * !gc2 |
---|
5938 | |
---|
5939 | + gb1 * !gc1 |
---|
5940 | * !gb2 * !gc2 |
---|
5941 | |
---|
5942 | + !gb1 * !gc1 |
---|
5943 | * !gb2 * gc2 |
---|
5944 | |
---|
5945 | + !gb1 * !gc1 |
---|
5946 | * gb2 * !gc2 |
---|
5947 | |
---|
5948 | + !gb1 * gc1 |
---|
5949 | * gb2 * !gc2 |
---|
5950 | |
---|
5951 | + gb1 * !gc1 |
---|
5952 | * !gb2 * gc2 |
---|
5953 | |
---|
5954 | #if 0 |
---|
5955 | + gb1 * !gc1 |
---|
5956 | * gb2 * gc2 |
---|
5957 | |
---|
5958 | + gb1 * gc1 |
---|
5959 | * gb2 * !gc2 |
---|
5960 | |
---|
5961 | + !gb1 * gc1 |
---|
5962 | * gb2 * gc2 |
---|
5963 | |
---|
5964 | + gb1 * gc1 |
---|
5965 | * !gb2 * gc2 |
---|
5966 | #endif |
---|
5967 | ; |
---|
5968 | score += 0.5 * (float)cob * penalty; |
---|
5969 | score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]]; |
---|
5970 | nglen += ( !gc1 * !gc2 ); /* tsukawanai */ |
---|
5971 | } |
---|
5972 | #if 0 |
---|
5973 | fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 ); |
---|
5974 | fprintf( stderr, "score = %f\n", score ); |
---|
5975 | #endif |
---|
5976 | } |
---|
5977 | return( (double)score ); |
---|
5978 | } |
---|
5979 | |
---|
5980 | |
---|
5981 | |
---|
5982 | double DSPscore( int s, char **seq ) /* method 3 deha nai */ |
---|
5983 | { |
---|
5984 | int i, j, k; |
---|
5985 | double c; |
---|
5986 | int len = strlen( seq[0] ); |
---|
5987 | double score; |
---|
5988 | double tmpscore; |
---|
5989 | char *mseq1, *mseq2; |
---|
5990 | #if DEBUG |
---|
5991 | FILE *fp; |
---|
5992 | #endif |
---|
5993 | |
---|
5994 | score = 0.0; |
---|
5995 | c = 0.0; |
---|
5996 | |
---|
5997 | for( i=0; i<s-1; i++ ) |
---|
5998 | { |
---|
5999 | for( j=i+1; j<s; j++ ) |
---|
6000 | { |
---|
6001 | mseq1 = seq[i]; |
---|
6002 | mseq2 = seq[j]; |
---|
6003 | tmpscore = 0.0; |
---|
6004 | for( k=0; k<len; k++ ) |
---|
6005 | { |
---|
6006 | if( mseq1[k] == '-' && mseq2[k] == '-' ) continue; |
---|
6007 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
6008 | |
---|
6009 | if( mseq1[k] == '-' ) |
---|
6010 | { |
---|
6011 | tmpscore += penalty; |
---|
6012 | while( mseq1[++k] == '-' ) |
---|
6013 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
6014 | k--; |
---|
6015 | if( k > len-2 ) break; |
---|
6016 | continue; |
---|
6017 | } |
---|
6018 | if( mseq2[k] == '-' ) |
---|
6019 | { |
---|
6020 | tmpscore += penalty; |
---|
6021 | while( mseq2[++k] == '-' ) |
---|
6022 | tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]]; |
---|
6023 | k--; |
---|
6024 | if( k > len-2 ) break; |
---|
6025 | continue; |
---|
6026 | } |
---|
6027 | } |
---|
6028 | score += (double)tmpscore; |
---|
6029 | } |
---|
6030 | } |
---|
6031 | |
---|
6032 | return( score ); |
---|
6033 | } |
---|
6034 | |
---|
6035 | |
---|
6036 | #define SEGMENTSIZE 150 |
---|
6037 | |
---|
6038 | int searchAnchors( int nseq, char **seq, Segment *seg ) |
---|
6039 | { |
---|
6040 | int i, j, k, kcyc; |
---|
6041 | int status; |
---|
6042 | double score; |
---|
6043 | int value = 0; |
---|
6044 | int len; |
---|
6045 | int length; |
---|
6046 | static double *stra = NULL; |
---|
6047 | static int alloclen = 0; |
---|
6048 | double cumscore; |
---|
6049 | static double threshold; |
---|
6050 | |
---|
6051 | len = strlen( seq[0] ); |
---|
6052 | if( alloclen < len ) |
---|
6053 | { |
---|
6054 | if( alloclen ) |
---|
6055 | { |
---|
6056 | FreeDoubleVec( stra ); |
---|
6057 | } |
---|
6058 | else |
---|
6059 | { |
---|
6060 | threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize; |
---|
6061 | } |
---|
6062 | stra = AllocateDoubleVec( len ); |
---|
6063 | alloclen = len; |
---|
6064 | } |
---|
6065 | |
---|
6066 | for( i=0; i<len; i++ ) |
---|
6067 | { |
---|
6068 | #if 0 |
---|
6069 | /* make prf */ |
---|
6070 | for( j=0; j<26; j++ ) |
---|
6071 | { |
---|
6072 | prf[j] = 0.0; |
---|
6073 | } |
---|
6074 | for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0; |
---|
6075 | |
---|
6076 | /* make hat */ |
---|
6077 | pre = 26; |
---|
6078 | for( j=25; j>=0; j-- ) |
---|
6079 | { |
---|
6080 | if( prf[j] ) |
---|
6081 | { |
---|
6082 | hat[pre] = j; |
---|
6083 | pre = j; |
---|
6084 | } |
---|
6085 | } |
---|
6086 | hat[pre] = -1; |
---|
6087 | |
---|
6088 | /* make site score */ |
---|
6089 | stra[i] = 0.0; |
---|
6090 | for( k=hat[26]; k!=-1; k=hat[k] ) |
---|
6091 | for( j=hat[26]; j!=-1; j=hat[j] ) |
---|
6092 | stra[i] += n_dis[k][j] * prf[k] * prf[j]; |
---|
6093 | #else |
---|
6094 | stra[i] = 0.0; |
---|
6095 | kcyc = nseq-1; |
---|
6096 | for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ ) |
---|
6097 | stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]]; |
---|
6098 | stra[i] /= (double)nseq * ( nseq-1 ) / 2; |
---|
6099 | #endif |
---|
6100 | } |
---|
6101 | |
---|
6102 | (seg+0)->skipForeward = 0; |
---|
6103 | (seg+1)->skipBackward = 0; |
---|
6104 | status = 0; |
---|
6105 | cumscore = 0.0; |
---|
6106 | score = 0.0; |
---|
6107 | length = 0; /* modified at 01/09/11 */ |
---|
6108 | for( j=0; j<divWinSize; j++ ) score += stra[j]; |
---|
6109 | for( i=1; i<len-divWinSize; i++ ) |
---|
6110 | { |
---|
6111 | score = score - stra[i-1] + stra[i+divWinSize-1]; |
---|
6112 | #if DEBUG |
---|
6113 | fprintf( stderr, "%d %f ? %f", i, score, threshold ); |
---|
6114 | if( score > threshold ) fprintf( stderr, "YES\n" ); |
---|
6115 | else fprintf( stderr, "NO\n" ); |
---|
6116 | #endif |
---|
6117 | |
---|
6118 | if( score > threshold ) |
---|
6119 | { |
---|
6120 | if( !status ) |
---|
6121 | { |
---|
6122 | status = 1; |
---|
6123 | seg->start = i; |
---|
6124 | length = 0; |
---|
6125 | cumscore = 0.0; |
---|
6126 | } |
---|
6127 | length++; |
---|
6128 | cumscore += score; |
---|
6129 | } |
---|
6130 | if( score <= threshold || length > SEGMENTSIZE ) |
---|
6131 | { |
---|
6132 | if( status ) |
---|
6133 | { |
---|
6134 | seg->end = i; |
---|
6135 | seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; |
---|
6136 | seg->score = cumscore; |
---|
6137 | #if DEBUG |
---|
6138 | fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); |
---|
6139 | #endif |
---|
6140 | if( length > SEGMENTSIZE ) |
---|
6141 | { |
---|
6142 | (seg+0)->skipForeward = 1; |
---|
6143 | (seg+1)->skipBackward = 1; |
---|
6144 | } |
---|
6145 | else |
---|
6146 | { |
---|
6147 | (seg+0)->skipForeward = 0; |
---|
6148 | (seg+1)->skipBackward = 0; |
---|
6149 | } |
---|
6150 | length = 0; |
---|
6151 | cumscore = 0.0; |
---|
6152 | status = 0; |
---|
6153 | value++; |
---|
6154 | seg++; |
---|
6155 | if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!"); |
---|
6156 | } |
---|
6157 | } |
---|
6158 | } |
---|
6159 | if( status ) |
---|
6160 | { |
---|
6161 | seg->end = i; |
---|
6162 | seg->center = ( seg->start + seg->end + divWinSize ) / 2 ; |
---|
6163 | seg->score = cumscore; |
---|
6164 | #if DEBUG |
---|
6165 | fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length ); |
---|
6166 | #endif |
---|
6167 | value++; |
---|
6168 | } |
---|
6169 | return( value ); |
---|
6170 | } |
---|
6171 | |
---|
6172 | void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) |
---|
6173 | { |
---|
6174 | int i, j; |
---|
6175 | LocalHom *ptr; |
---|
6176 | int *nogaplen; |
---|
6177 | |
---|
6178 | nogaplen = AllocateIntVec( nseq ); |
---|
6179 | |
---|
6180 | for( i=0; i<nseq; i++ ) |
---|
6181 | { |
---|
6182 | nogaplen[i] = seqlen( seq[i] ); |
---|
6183 | // fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] ); |
---|
6184 | } |
---|
6185 | |
---|
6186 | for( i=0; i<nseq; i++ ) |
---|
6187 | { |
---|
6188 | for( j=0; j<nseq; j++ ) |
---|
6189 | { |
---|
6190 | for( ptr=localhom[i]+j; ptr; ptr=ptr->next ) |
---|
6191 | { |
---|
6192 | // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr ); |
---|
6193 | #if 1 |
---|
6194 | ptr->importance = ptr->opt / ptr->overlapaa; |
---|
6195 | ptr->fimportance = (float)ptr->importance; |
---|
6196 | #else |
---|
6197 | ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); |
---|
6198 | #endif |
---|
6199 | } |
---|
6200 | } |
---|
6201 | } |
---|
6202 | free( nogaplen ); |
---|
6203 | } |
---|
6204 | |
---|
6205 | void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom ) |
---|
6206 | { |
---|
6207 | int i, j, nseq1; |
---|
6208 | LocalHom *ptr; |
---|
6209 | #if 1 |
---|
6210 | #else |
---|
6211 | int *nogaplen; |
---|
6212 | nogaplen = AllocateIntVec( nseq ); |
---|
6213 | for( i=0; i<nseq; i++ ) |
---|
6214 | { |
---|
6215 | nogaplen[i] = seqlen( seq[i] ); |
---|
6216 | // fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] ); |
---|
6217 | } |
---|
6218 | #endif |
---|
6219 | |
---|
6220 | nseq1 = nseq - 1; |
---|
6221 | for( i=0; i<nseq1; i++ ) |
---|
6222 | { |
---|
6223 | j=0; |
---|
6224 | { |
---|
6225 | for( ptr=localhom[i]+j; ptr; ptr=ptr->next ) |
---|
6226 | { |
---|
6227 | // fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr ); |
---|
6228 | #if 1 |
---|
6229 | // ptr->importance = ptr->opt / ptr->overlapaa; |
---|
6230 | ptr->importance = ptr->opt * 0.5; // tekitou |
---|
6231 | ptr->fimportance = (float)ptr->importance; |
---|
6232 | // fprintf( stderr, "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt ); |
---|
6233 | #else |
---|
6234 | ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] ); |
---|
6235 | #endif |
---|
6236 | } |
---|
6237 | } |
---|
6238 | } |
---|
6239 | #if 1 |
---|
6240 | #else |
---|
6241 | free( nogaplen ); |
---|
6242 | #endif |
---|
6243 | } |
---|
6244 | |
---|
6245 | void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom ) |
---|
6246 | { |
---|
6247 | int i, j, pos, len; |
---|
6248 | double *importance; // static -> local, 2012/02/25 |
---|
6249 | double tmpdouble; |
---|
6250 | double *ieff, totaleff; // counteff_simple_float ni utsusu kamo |
---|
6251 | int *nogaplen; // static -> local, 2012/02/25 |
---|
6252 | LocalHom *tmpptr; |
---|
6253 | |
---|
6254 | importance = AllocateDoubleVec( nlenmax ); |
---|
6255 | nogaplen = AllocateIntVec( nseq ); |
---|
6256 | ieff = AllocateDoubleVec( nseq ); |
---|
6257 | |
---|
6258 | totaleff = 0.0; |
---|
6259 | for( i=0; i<nseq; i++ ) |
---|
6260 | { |
---|
6261 | nogaplen[i] = seqlen( seq[i] ); |
---|
6262 | // fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] ); |
---|
6263 | if( nogaplen[i] == 0 ) ieff[i] = 0.0; |
---|
6264 | else ieff[i] = eff[i]; |
---|
6265 | totaleff += ieff[i]; |
---|
6266 | } |
---|
6267 | for( i=0; i<nseq; i++ ) ieff[i] /= totaleff; |
---|
6268 | // for( i=0; i<nseq; i++ ) fprintf( stderr, "eff[%d] = %f\n", i, ieff[i] ); |
---|
6269 | |
---|
6270 | #if 0 |
---|
6271 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) |
---|
6272 | { |
---|
6273 | tmpptr = localhom[i]+j; |
---|
6274 | fprintf( stderr, "%d-%d\n", i, j ); |
---|
6275 | do |
---|
6276 | { |
---|
6277 | fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt ); |
---|
6278 | } while( tmpptr=tmpptr->next ); |
---|
6279 | } |
---|
6280 | #endif |
---|
6281 | |
---|
6282 | |
---|
6283 | for( i=0; i<nseq; i++ ) |
---|
6284 | { |
---|
6285 | // fprintf( stderr, "i = %d\n", i ); |
---|
6286 | for( pos=0; pos<nlenmax; pos++ ) |
---|
6287 | importance[pos] = 0.0; |
---|
6288 | for( j=0; j<nseq; j++ ) |
---|
6289 | { |
---|
6290 | if( i == j ) continue; |
---|
6291 | tmpptr = localhom[i]+j; |
---|
6292 | for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) |
---|
6293 | { |
---|
6294 | if( tmpptr->opt == -1 ) continue; |
---|
6295 | for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) |
---|
6296 | #if 1 |
---|
6297 | importance[pos] += ieff[j]; |
---|
6298 | #else |
---|
6299 | importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] ); |
---|
6300 | importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa; |
---|
6301 | #endif |
---|
6302 | } |
---|
6303 | } |
---|
6304 | #if 0 |
---|
6305 | fprintf( stderr, "position specific importance of seq %d:\n", i ); |
---|
6306 | for( pos=0; pos<nlenmax; pos++ ) |
---|
6307 | fprintf( stderr, "%d: %f\n", pos, importance[pos] ); |
---|
6308 | fprintf( stderr, "\n" ); |
---|
6309 | #endif |
---|
6310 | for( j=0; j<nseq; j++ ) |
---|
6311 | { |
---|
6312 | // fprintf( stderr, "i=%d, j=%d\n", i, j ); |
---|
6313 | if( i == j ) continue; |
---|
6314 | if( localhom[i][j].opt == -1.0 ) continue; |
---|
6315 | #if 1 |
---|
6316 | for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) |
---|
6317 | { |
---|
6318 | if( tmpptr->opt == -1.0 ) continue; |
---|
6319 | tmpdouble = 0.0; |
---|
6320 | len = 0; |
---|
6321 | for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) |
---|
6322 | { |
---|
6323 | tmpdouble += importance[pos]; |
---|
6324 | len++; |
---|
6325 | } |
---|
6326 | |
---|
6327 | tmpdouble /= (double)len; |
---|
6328 | |
---|
6329 | tmpptr->importance = tmpdouble * tmpptr->opt; |
---|
6330 | tmpptr->fimportance = (float)tmpptr->importance; |
---|
6331 | } |
---|
6332 | #else |
---|
6333 | tmpdouble = 0.0; |
---|
6334 | len = 0; |
---|
6335 | for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) |
---|
6336 | { |
---|
6337 | if( tmpptr->opt == -1.0 ) continue; |
---|
6338 | for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) |
---|
6339 | { |
---|
6340 | tmpdouble += importance[pos]; |
---|
6341 | len++; |
---|
6342 | } |
---|
6343 | } |
---|
6344 | |
---|
6345 | tmpdouble /= (double)len; |
---|
6346 | |
---|
6347 | for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) |
---|
6348 | { |
---|
6349 | if( tmpptr->opt == -1.0 ) continue; |
---|
6350 | tmpptr->importance = tmpdouble * tmpptr->opt; |
---|
6351 | // tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //$B$J$+$C$?$3$H$K$9$k(B |
---|
6352 | } |
---|
6353 | #endif |
---|
6354 | |
---|
6355 | // fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); |
---|
6356 | } |
---|
6357 | } |
---|
6358 | |
---|
6359 | #if 0 |
---|
6360 | fprintf( stderr, "before averaging:\n" ); |
---|
6361 | |
---|
6362 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) |
---|
6363 | { |
---|
6364 | fprintf( stderr, "%d-%d\n", i, j ); |
---|
6365 | for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) |
---|
6366 | { |
---|
6367 | fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt ); |
---|
6368 | } |
---|
6369 | } |
---|
6370 | #endif |
---|
6371 | |
---|
6372 | #if 1 |
---|
6373 | // fprintf( stderr, "average?\n" ); |
---|
6374 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
6375 | { |
---|
6376 | double imp; |
---|
6377 | LocalHom *tmpptr1, *tmpptr2; |
---|
6378 | |
---|
6379 | // fprintf( stderr, "i=%d, j=%d\n", i, j ); |
---|
6380 | |
---|
6381 | tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i; |
---|
6382 | for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next) |
---|
6383 | { |
---|
6384 | if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 ) |
---|
6385 | { |
---|
6386 | // fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt ); |
---|
6387 | continue; |
---|
6388 | } |
---|
6389 | // fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance ); |
---|
6390 | imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance ); |
---|
6391 | tmpptr1->importance = tmpptr2->importance = imp; |
---|
6392 | tmpptr1->fimportance = tmpptr2->fimportance = (float)imp; |
---|
6393 | |
---|
6394 | // fprintf( stderr, "## importance = %f\n", tmpptr1->importance ); |
---|
6395 | |
---|
6396 | } |
---|
6397 | |
---|
6398 | #if 0 // commented out, 2012/02/10 |
---|
6399 | if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) ) |
---|
6400 | { |
---|
6401 | fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j ); |
---|
6402 | exit( 1 ); |
---|
6403 | } |
---|
6404 | #endif |
---|
6405 | } |
---|
6406 | #endif |
---|
6407 | #if 0 |
---|
6408 | fprintf( stderr, "after averaging:\n" ); |
---|
6409 | |
---|
6410 | for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) |
---|
6411 | { |
---|
6412 | for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next ) |
---|
6413 | { |
---|
6414 | if( tmpptr->end1 ) |
---|
6415 | fprintf( stderr, "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt ); |
---|
6416 | } |
---|
6417 | } |
---|
6418 | #endif |
---|
6419 | free( importance ); |
---|
6420 | free( nogaplen ); |
---|
6421 | free( ieff ); |
---|
6422 | } |
---|
6423 | |
---|
6424 | |
---|
6425 | |
---|
6426 | #if 0 |
---|
6427 | void weightimportance( int nseq, double **eff, LocalHom **localhom ) |
---|
6428 | { |
---|
6429 | int i, j, pos, len; |
---|
6430 | static double *importance; |
---|
6431 | double tmpdouble; |
---|
6432 | LocalHom *tmpptr, *tmpptr1, *tmpptr2; |
---|
6433 | if( importance == NULL ) |
---|
6434 | importance = AllocateDoubleVec( nlenmax ); |
---|
6435 | |
---|
6436 | |
---|
6437 | fprintf( stderr, "effmtx = :\n" ); |
---|
6438 | for( i=0; i<nseq; i++ ) |
---|
6439 | { |
---|
6440 | for( j=0; j<nseq; j++ ) |
---|
6441 | { |
---|
6442 | fprintf( stderr, "%6.3f ", eff[i][j] ); |
---|
6443 | } |
---|
6444 | fprintf( stderr, "\n" ); |
---|
6445 | } |
---|
6446 | for( i=0; i<nseq; i++ ) |
---|
6447 | { |
---|
6448 | for( pos=0; pos<nlenmax; pos++ ) |
---|
6449 | importance[pos] = 0.0; |
---|
6450 | for( j=0; j<nseq; j++ ) |
---|
6451 | { |
---|
6452 | |
---|
6453 | if( i == j ) continue; |
---|
6454 | tmpptr = localhom[i]+j; |
---|
6455 | while( 1 ) |
---|
6456 | { |
---|
6457 | fprintf( stderr, "i=%d, j=%d\n", i, j ); |
---|
6458 | for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) |
---|
6459 | // importance[pos] += eff[i][j] * tmpptr->importance; |
---|
6460 | importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0; |
---|
6461 | fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance ); |
---|
6462 | tmpptr = tmpptr->next; |
---|
6463 | if( tmpptr == NULL ) break; |
---|
6464 | } |
---|
6465 | |
---|
6466 | } |
---|
6467 | #if 0 |
---|
6468 | fprintf( stderr, "position specific importance of seq %d:\n", i ); |
---|
6469 | for( pos=0; pos<nlenmax; pos++ ) |
---|
6470 | fprintf( stderr, "%d: %f\n", pos, importance[pos] ); |
---|
6471 | fprintf( stderr, "\n" ); |
---|
6472 | #endif |
---|
6473 | for( j=0; j<nseq; j++ ) |
---|
6474 | { |
---|
6475 | fprintf( stderr, "i=%d, j=%d\n", i, j ); |
---|
6476 | if( i == j ) continue; |
---|
6477 | tmpptr = localhom[i]+j; |
---|
6478 | do |
---|
6479 | { |
---|
6480 | tmpdouble = 0.0; |
---|
6481 | len = 0; |
---|
6482 | for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ ) |
---|
6483 | { |
---|
6484 | tmpdouble += importance[pos]; |
---|
6485 | len++; |
---|
6486 | } |
---|
6487 | tmpdouble /= (double)len; |
---|
6488 | tmpptr->importance = tmpdouble; |
---|
6489 | fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble ); |
---|
6490 | tmpptr = tmpptr->next; |
---|
6491 | } while( tmpptr ); |
---|
6492 | } |
---|
6493 | } |
---|
6494 | #if 1 |
---|
6495 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
6496 | { |
---|
6497 | fprintf( stderr, "i = %d, j=%d\n", i, j ); |
---|
6498 | tmpptr1 = localhom[i]+j; |
---|
6499 | tmpptr2 = localhom[j]+i; |
---|
6500 | while( tmpptr1 && tmpptr2 ) |
---|
6501 | { |
---|
6502 | tmpptr1->importance += tmpptr2->importance; |
---|
6503 | tmpptr1->importance *= 0.5; |
---|
6504 | tmpptr2->importance *= tmpptr1->importance; |
---|
6505 | fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance ); |
---|
6506 | tmpptr1 = tmpptr1->next; |
---|
6507 | tmpptr2 = tmpptr2->next; |
---|
6508 | fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 ); |
---|
6509 | } |
---|
6510 | } |
---|
6511 | #endif |
---|
6512 | } |
---|
6513 | |
---|
6514 | void weightimportance2( int nseq, double *eff, LocalHom **localhom ) |
---|
6515 | { |
---|
6516 | int i, j, pos, len; |
---|
6517 | static double *wimportance; |
---|
6518 | double tmpdouble; |
---|
6519 | if( wimportance == NULL ) |
---|
6520 | wimportance = AllocateDoubleVec( nlenmax ); |
---|
6521 | |
---|
6522 | |
---|
6523 | fprintf( stderr, "effmtx = :\n" ); |
---|
6524 | for( i=0; i<nseq; i++ ) |
---|
6525 | { |
---|
6526 | for( j=0; j<nseq; j++ ) |
---|
6527 | { |
---|
6528 | fprintf( stderr, "%6.3f ", eff[i] * eff[j] ); |
---|
6529 | } |
---|
6530 | fprintf( stderr, "\n" ); |
---|
6531 | } |
---|
6532 | for( i=0; i<nseq; i++ ) |
---|
6533 | { |
---|
6534 | fprintf( stderr, "i = %d\n", i ); |
---|
6535 | for( pos=0; pos<nlenmax; pos++ ) |
---|
6536 | wimportance[pos] = 0.0; |
---|
6537 | for( j=0; j<nseq; j++ ) |
---|
6538 | { |
---|
6539 | if( i == j ) continue; |
---|
6540 | for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ ) |
---|
6541 | // wimportance[pos] += eff[i][j]; |
---|
6542 | wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0; |
---|
6543 | } |
---|
6544 | #if 0 |
---|
6545 | fprintf( stderr, "position specific wimportance of seq %d:\n", i ); |
---|
6546 | for( pos=0; pos<nlenmax; pos++ ) |
---|
6547 | fprintf( stderr, "%d: %f\n", pos, wimportance[pos] ); |
---|
6548 | fprintf( stderr, "\n" ); |
---|
6549 | #endif |
---|
6550 | for( j=0; j<nseq; j++ ) |
---|
6551 | { |
---|
6552 | if( i == j ) continue; |
---|
6553 | tmpdouble = 0.0; |
---|
6554 | len = 0; |
---|
6555 | for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ ) |
---|
6556 | { |
---|
6557 | tmpdouble += wimportance[pos]; |
---|
6558 | len++; |
---|
6559 | } |
---|
6560 | tmpdouble /= (double)len; |
---|
6561 | localhom[i][j].wimportance = tmpdouble; |
---|
6562 | fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble ); |
---|
6563 | } |
---|
6564 | } |
---|
6565 | #if 1 |
---|
6566 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
6567 | { |
---|
6568 | localhom[i][j].wimportance += localhom[j][i].wimportance; |
---|
6569 | localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance ); |
---|
6570 | } |
---|
6571 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
6572 | { |
---|
6573 | localhom[j][i].wimportance = localhom[i][j].wimportance; |
---|
6574 | } |
---|
6575 | #endif |
---|
6576 | } |
---|
6577 | |
---|
6578 | void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom ) |
---|
6579 | { |
---|
6580 | int i, j, pos, len; |
---|
6581 | static double *wimportance; |
---|
6582 | LocalHom *tmpptr, *tmpptr1, *tmpptr2; |
---|
6583 | if( wimportance == NULL ) |
---|
6584 | wimportance = AllocateDoubleVec( nlenmax ); |
---|
6585 | |
---|
6586 | |
---|
6587 | #if 0 |
---|
6588 | fprintf( stderr, "effarr1 = :\n" ); |
---|
6589 | for( i=0; i<clus1; i++ ) |
---|
6590 | fprintf( stderr, "%6.3f\n", eff1[i] ); |
---|
6591 | fprintf( stderr, "effarr2 = :\n" ); |
---|
6592 | for( i=0; i<clus2; i++ ) |
---|
6593 | fprintf( stderr, "%6.3f\n", eff2[i] ); |
---|
6594 | #endif |
---|
6595 | |
---|
6596 | for( i=0; i<clus1; i++ ) |
---|
6597 | { |
---|
6598 | for( j=0; j<clus2; j++ ) |
---|
6599 | { |
---|
6600 | // fprintf( stderr, "i=%d, j=%d\n", i, j ); |
---|
6601 | tmpptr = localhom[i][j]; |
---|
6602 | do |
---|
6603 | { |
---|
6604 | tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j]; |
---|
6605 | tmpptr = tmpptr->next; |
---|
6606 | } while( tmpptr ); |
---|
6607 | } |
---|
6608 | } |
---|
6609 | } |
---|
6610 | |
---|
6611 | static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt ) |
---|
6612 | { |
---|
6613 | LocalHom *tmpptr; |
---|
6614 | tmpptr = localhom; |
---|
6615 | |
---|
6616 | fprintf( stderr, "adding localhom\n" ); |
---|
6617 | while( tmpptr->next ) |
---|
6618 | tmpptr = tmpptr->next; |
---|
6619 | fprintf( stderr, "allocating localhom\n" ); |
---|
6620 | tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); |
---|
6621 | fprintf( stderr, "done\n" ); |
---|
6622 | tmpptr = tmpptr->next; |
---|
6623 | |
---|
6624 | tmpptr->start1 = start1; |
---|
6625 | tmpptr->start2 = start2; |
---|
6626 | tmpptr->end1 = end1; |
---|
6627 | tmpptr->end2 = end2; |
---|
6628 | tmpptr->opt = opt; |
---|
6629 | |
---|
6630 | fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 ); |
---|
6631 | } |
---|
6632 | |
---|
6633 | #if 0 |
---|
6634 | #endif |
---|
6635 | |
---|
6636 | |
---|
6637 | |
---|
6638 | void extendlocalhom( int nseq, LocalHom **localhom ) |
---|
6639 | { |
---|
6640 | int i, j, k, pos0, pos1, pos2, st; |
---|
6641 | int start1, start2, end1, end2; |
---|
6642 | static int *tmpint1 = NULL; |
---|
6643 | static int *tmpint2 = NULL; |
---|
6644 | static int *tmpdouble1 = NULL; |
---|
6645 | static int *tmpdouble2 = NULL; |
---|
6646 | double opt; |
---|
6647 | LocalHom *tmpptr; |
---|
6648 | if( tmpint1 == NULL ) |
---|
6649 | { |
---|
6650 | tmpint1 = AllocateIntVec( nlenmax ); |
---|
6651 | tmpint2 = AllocateIntVec( nlenmax ); |
---|
6652 | tmpdouble1 = AllocateIntVec( nlenmax ); |
---|
6653 | tmpdouble2 = AllocateIntVec( nlenmax ); |
---|
6654 | } |
---|
6655 | |
---|
6656 | |
---|
6657 | for( k=0; k<nseq; k++ ) |
---|
6658 | { |
---|
6659 | for( i=0; i<nseq-1; i++ ) |
---|
6660 | { |
---|
6661 | if( i == k ) continue; |
---|
6662 | for( pos0=0; pos0<nlenmax; pos0++ ) |
---|
6663 | tmpint1[pos0] = -1; |
---|
6664 | |
---|
6665 | tmpptr=localhom[k]+i; |
---|
6666 | do |
---|
6667 | { |
---|
6668 | pos0 = tmpptr->start1; |
---|
6669 | pos1 = tmpptr->start2; |
---|
6670 | while( pos0<=tmpptr->end1 ) |
---|
6671 | { |
---|
6672 | tmpint1[pos0] = pos1++; |
---|
6673 | tmpdouble1[pos0] = tmpptr->opt; |
---|
6674 | pos0++; |
---|
6675 | } |
---|
6676 | } while( tmpptr = tmpptr->next ); |
---|
6677 | |
---|
6678 | |
---|
6679 | for( j=i+1; j<nseq; j++ ) |
---|
6680 | { |
---|
6681 | if( j == k ) continue; |
---|
6682 | for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1; |
---|
6683 | tmpptr=localhom[k]+j; |
---|
6684 | do |
---|
6685 | { |
---|
6686 | pos0 = tmpptr->start1; |
---|
6687 | pos2 = tmpptr->start2; |
---|
6688 | while( pos0<=tmpptr->end1 ) |
---|
6689 | { |
---|
6690 | tmpint2[pos0] = pos2++; |
---|
6691 | tmpdouble2[pos0++] = tmpptr->opt; |
---|
6692 | } |
---|
6693 | } while( tmpptr = tmpptr->next ); |
---|
6694 | |
---|
6695 | #if 0 |
---|
6696 | |
---|
6697 | fprintf( stderr, "i,j=%d,%d\n", i, j ); |
---|
6698 | |
---|
6699 | for( pos0=0; pos0<nlenmax; pos0++ ) |
---|
6700 | fprintf( stderr, "%d ", tmpint1[pos0] ); |
---|
6701 | fprintf( stderr, "\n" ); |
---|
6702 | |
---|
6703 | for( pos0=0; pos0<nlenmax; pos0++ ) |
---|
6704 | fprintf( stderr, "%d ", tmpint2[pos0] ); |
---|
6705 | fprintf( stderr, "\n" ); |
---|
6706 | #endif |
---|
6707 | |
---|
6708 | |
---|
6709 | st = 0; |
---|
6710 | for( pos0=0; pos0<nlenmax; pos0++ ) |
---|
6711 | { |
---|
6712 | // fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] ); |
---|
6713 | if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 ) |
---|
6714 | { |
---|
6715 | if( st == 0 ) |
---|
6716 | { |
---|
6717 | st = 1; |
---|
6718 | start1 = tmpint1[pos0]; |
---|
6719 | start2 = tmpint2[pos0]; |
---|
6720 | opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); |
---|
6721 | } |
---|
6722 | else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 ) |
---|
6723 | { |
---|
6724 | addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); |
---|
6725 | addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); |
---|
6726 | start1 = tmpint1[pos0]; |
---|
6727 | start2 = tmpint2[pos0]; |
---|
6728 | opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] ); |
---|
6729 | } |
---|
6730 | } |
---|
6731 | if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 ) |
---|
6732 | { |
---|
6733 | if( st == 1 ) |
---|
6734 | { |
---|
6735 | st = 0; |
---|
6736 | addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt ); |
---|
6737 | addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt ); |
---|
6738 | } |
---|
6739 | } |
---|
6740 | } |
---|
6741 | } |
---|
6742 | } |
---|
6743 | } |
---|
6744 | } |
---|
6745 | #endif |
---|
6746 | |
---|
6747 | static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm ) |
---|
6748 | { |
---|
6749 | // dokka machigatteru |
---|
6750 | if( pt != lh ) // susumeru |
---|
6751 | { |
---|
6752 | pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); |
---|
6753 | pt = pt->next; |
---|
6754 | pt->next = NULL; |
---|
6755 | lh->last = pt; |
---|
6756 | } |
---|
6757 | else // sonomamatsukau |
---|
6758 | { |
---|
6759 | lh->last = pt; |
---|
6760 | } |
---|
6761 | lh->nokori++; |
---|
6762 | // fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj ); |
---|
6763 | |
---|
6764 | pt->start1 = sti; |
---|
6765 | pt->start2 = stj; |
---|
6766 | pt->end1 = eni; |
---|
6767 | pt->end2 = enj; |
---|
6768 | pt->opt = opt; |
---|
6769 | pt->extended = interm; |
---|
6770 | pt->overlapaa = overlp; |
---|
6771 | #if 0 |
---|
6772 | fprintf( stderr, "i: %d-%d\n", sti, eni ); |
---|
6773 | fprintf( stderr, "j: %d-%d\n", stj, enj ); |
---|
6774 | fprintf( stderr, "opt=%f\n", opt ); |
---|
6775 | fprintf( stderr, "overlp=%d\n", overlp ); |
---|
6776 | #endif |
---|
6777 | } |
---|
6778 | |
---|
6779 | void extendlocalhom2( int nseq, LocalHom **localhom, double **dist ) |
---|
6780 | { |
---|
6781 | int overlp, plim; |
---|
6782 | int i, j, k; |
---|
6783 | int pi, pj, pk, len; |
---|
6784 | int status, sti, stj; |
---|
6785 | int *ipt; |
---|
6786 | int co; |
---|
6787 | static int *ini = NULL; |
---|
6788 | static int *inj = NULL; |
---|
6789 | LocalHom *pt; |
---|
6790 | |
---|
6791 | sti = 0; // by D.Mathog, a guess |
---|
6792 | stj = 0; // by D.Mathog, a guess |
---|
6793 | |
---|
6794 | if( ini == NULL ) |
---|
6795 | { |
---|
6796 | ini = AllocateIntVec( nlenmax+1 ); |
---|
6797 | inj = AllocateIntVec( nlenmax+1 ); |
---|
6798 | } |
---|
6799 | |
---|
6800 | |
---|
6801 | for( i=0; i<nseq-1; i++ ) |
---|
6802 | { |
---|
6803 | for( j=i+1; j<nseq; j++ ) |
---|
6804 | { |
---|
6805 | #if 0 |
---|
6806 | for( k=0; k<nseq; k++ ) sai[k] = 0; |
---|
6807 | numint = ncons; |
---|
6808 | while( 1 ) |
---|
6809 | { |
---|
6810 | k = (int)( rnd() * nseq ); |
---|
6811 | if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii |
---|
6812 | if( numint-- == 0 ) break; |
---|
6813 | if( sai[k] ) continue; |
---|
6814 | sai[k] = 1; |
---|
6815 | #else |
---|
6816 | for( k=0; k<nseq; k++ ) |
---|
6817 | { |
---|
6818 | #endif |
---|
6819 | // fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter ); |
---|
6820 | if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii |
---|
6821 | if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue; |
---|
6822 | ipt = ini; co = nlenmax+1; |
---|
6823 | while( co-- ) *ipt++ = -1; |
---|
6824 | ipt = inj; co = nlenmax+1; |
---|
6825 | while( co-- ) *ipt++ = -1; |
---|
6826 | overlp = 0; |
---|
6827 | |
---|
6828 | { |
---|
6829 | for( pt=localhom[i]+k; pt; pt=pt->next ) |
---|
6830 | { |
---|
6831 | // fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended ); |
---|
6832 | if( pt->opt == -1 ) |
---|
6833 | { |
---|
6834 | fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); |
---|
6835 | } |
---|
6836 | if( pt->extended > -1 ) break; |
---|
6837 | pi = pt->start1; |
---|
6838 | pk = pt->start2; |
---|
6839 | len = pt->end1 - pt->start1 + 1; |
---|
6840 | ipt = ini + pk; |
---|
6841 | while( len-- ) *ipt++ = pi++; |
---|
6842 | } |
---|
6843 | } |
---|
6844 | |
---|
6845 | { |
---|
6846 | for( pt=localhom[j]+k; pt; pt=pt->next ) |
---|
6847 | { |
---|
6848 | if( pt->opt == -1 ) |
---|
6849 | { |
---|
6850 | fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt ); |
---|
6851 | } |
---|
6852 | if( pt->extended > -1 ) break; |
---|
6853 | pj = pt->start1; |
---|
6854 | pk = pt->start2; |
---|
6855 | len = pt->end1 - pt->start1 + 1; |
---|
6856 | ipt = inj + pk; |
---|
6857 | while( len-- ) *ipt++ = pj++; |
---|
6858 | } |
---|
6859 | } |
---|
6860 | #if 0 |
---|
6861 | fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); |
---|
6862 | overlp = 0; |
---|
6863 | for( pk = 0; pk < nlenmax; pk++ ) |
---|
6864 | { |
---|
6865 | if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; |
---|
6866 | fprintf( stderr, " %d", inj[pk] ); |
---|
6867 | } |
---|
6868 | fprintf( stderr, "\n" ); |
---|
6869 | |
---|
6870 | fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k ); |
---|
6871 | overlp = 0; |
---|
6872 | for( pk = 0; pk < nlenmax; pk++ ) |
---|
6873 | { |
---|
6874 | if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; |
---|
6875 | fprintf( stderr, " %d", ini[pk] ); |
---|
6876 | } |
---|
6877 | fprintf( stderr, "\n" ); |
---|
6878 | #endif |
---|
6879 | overlp = 0; |
---|
6880 | plim = nlenmax+1; |
---|
6881 | for( pk = 0; pk < plim; pk++ ) |
---|
6882 | if( ini[pk] != -1 && inj[pk] != -1 ) overlp++; |
---|
6883 | |
---|
6884 | |
---|
6885 | status = 0; |
---|
6886 | plim = nlenmax+1; |
---|
6887 | for( pk=0; pk<plim; pk++ ) |
---|
6888 | { |
---|
6889 | // fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] ); |
---|
6890 | if( status ) |
---|
6891 | { |
---|
6892 | if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori |
---|
6893 | { |
---|
6894 | status = 0; |
---|
6895 | // fprintf( stderr, "end here!\n" ); |
---|
6896 | |
---|
6897 | pt = localhom[i][j].last; |
---|
6898 | // fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k ); |
---|
6899 | addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); |
---|
6900 | // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); |
---|
6901 | |
---|
6902 | pt = localhom[j][i].last; |
---|
6903 | // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next ); |
---|
6904 | // fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k ); |
---|
6905 | addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k ); |
---|
6906 | // fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next ); |
---|
6907 | } |
---|
6908 | } |
---|
6909 | if( !status ) // else deha arimasenn. |
---|
6910 | { |
---|
6911 | if( ini[pk] == -1 || inj[pk] == -1 ) continue; |
---|
6912 | sti = ini[pk]; |
---|
6913 | stj = inj[pk]; |
---|
6914 | // fprintf( stderr, "start here!\n" ); |
---|
6915 | status = 1; |
---|
6916 | } |
---|
6917 | } |
---|
6918 | // if( status ) fprintf( stderr, "end here\n" ); |
---|
6919 | |
---|
6920 | // exit( 1 ); |
---|
6921 | // fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); |
---|
6922 | } |
---|
6923 | #if 0 |
---|
6924 | for( pt=localhomtable[i]+j; pt; pt=pt->next ) |
---|
6925 | { |
---|
6926 | if( tmpptr->opt == -1.0 ) continue; |
---|
6927 | fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next ); |
---|
6928 | } |
---|
6929 | #endif |
---|
6930 | } |
---|
6931 | } |
---|
6932 | } |
---|
6933 | |
---|
6934 | int makelocal( char *s1, char *s2, int thr ) |
---|
6935 | { |
---|
6936 | int start, maxstart, maxend; |
---|
6937 | char *pt1, *pt2; |
---|
6938 | double score; |
---|
6939 | double maxscore; |
---|
6940 | |
---|
6941 | pt1 = s1; |
---|
6942 | pt2 = s2; |
---|
6943 | |
---|
6944 | maxend = 0; // by D.Mathog, a guess |
---|
6945 | |
---|
6946 | // fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 ); |
---|
6947 | maxscore = 0.0; |
---|
6948 | score = 0.0; |
---|
6949 | start = 0; |
---|
6950 | maxstart = 0; |
---|
6951 | while( *pt1 ) |
---|
6952 | { |
---|
6953 | // fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 ); |
---|
6954 | if( *pt1 == '-' || *pt2 == '-' ) |
---|
6955 | { |
---|
6956 | // fprintf( stderr, "penalty = %d\n", penalty ); |
---|
6957 | score += penalty; |
---|
6958 | while( *pt1 == '-' || *pt2 == '-' ) |
---|
6959 | { |
---|
6960 | pt1++; pt2++; |
---|
6961 | } |
---|
6962 | continue; |
---|
6963 | } |
---|
6964 | |
---|
6965 | score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr ); |
---|
6966 | // score += ( amino_dis[(int)*pt1++][(int)*pt2++] ); |
---|
6967 | if( score > maxscore ) |
---|
6968 | { |
---|
6969 | // fprintf( stderr, "score = %f\n", score ); |
---|
6970 | maxscore = score; |
---|
6971 | maxstart = start; |
---|
6972 | // fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start ); |
---|
6973 | } |
---|
6974 | if( score < 0.0 ) |
---|
6975 | { |
---|
6976 | // fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart ); |
---|
6977 | if( start == maxstart ) |
---|
6978 | { |
---|
6979 | maxend = pt1 - s1; |
---|
6980 | // fprintf( stderr, "maxend = %d\n", maxend ); |
---|
6981 | } |
---|
6982 | score = 0.0; |
---|
6983 | start = pt1 - s1; |
---|
6984 | } |
---|
6985 | } |
---|
6986 | if( start == maxstart ) |
---|
6987 | maxend = pt1 - s1 - 1; |
---|
6988 | |
---|
6989 | // fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore ); |
---|
6990 | s1[maxend+1] = 0; |
---|
6991 | s2[maxend+1] = 0; |
---|
6992 | return( maxstart ); |
---|
6993 | } |
---|
6994 | |
---|
6995 | void resetlocalhom( int nseq, LocalHom **lh ) |
---|
6996 | { |
---|
6997 | int i, j; |
---|
6998 | LocalHom *pt; |
---|
6999 | |
---|
7000 | for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
7001 | { |
---|
7002 | for( pt=lh[i]+j; pt; pt=pt->next ) |
---|
7003 | pt->opt = 1.0; |
---|
7004 | } |
---|
7005 | |
---|
7006 | } |
---|
7007 | |
---|
7008 | void gapireru( char *res, char *ori, char *gt ) |
---|
7009 | { |
---|
7010 | char g; |
---|
7011 | while( (g = *gt++) ) |
---|
7012 | { |
---|
7013 | if( g == '-' ) |
---|
7014 | { |
---|
7015 | *res++ = *newgapstr; |
---|
7016 | } |
---|
7017 | else |
---|
7018 | { |
---|
7019 | *res++ = *ori++; |
---|
7020 | } |
---|
7021 | } |
---|
7022 | *res = 0; |
---|
7023 | } |
---|
7024 | |
---|
7025 | void getkyokaigap( char *g, char **s, int pos, int n ) |
---|
7026 | { |
---|
7027 | // char *bk = g; |
---|
7028 | // while( n-- ) *g++ = '-'; |
---|
7029 | while( n-- ) *g++ = (*s++)[pos]; |
---|
7030 | |
---|
7031 | // fprintf( stderr, "bk = %s\n", bk ); |
---|
7032 | } |
---|
7033 | |
---|
7034 | void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat ) |
---|
7035 | #if 0 |
---|
7036 | { |
---|
7037 | int i, j, gc, gb; |
---|
7038 | float feff; |
---|
7039 | |
---|
7040 | |
---|
7041 | for( i=0; i<len+1; i++ ) ogcp[i] = 0.0; |
---|
7042 | for( j=0; j<clus; j++ ) |
---|
7043 | { |
---|
7044 | feff = (float)eff[j]; |
---|
7045 | gc = ( sgappat[j] == '-' ); |
---|
7046 | for( i=0; i<len; i++ ) |
---|
7047 | { |
---|
7048 | gb = gc; |
---|
7049 | gc = ( seq[j][i] == '-' ); |
---|
7050 | if( !gb * gc ) ogcp[i] += feff; |
---|
7051 | } |
---|
7052 | } |
---|
7053 | } |
---|
7054 | #else |
---|
7055 | { |
---|
7056 | int i, j, gc, gb; |
---|
7057 | float feff; |
---|
7058 | float *fpt; |
---|
7059 | char *spt; |
---|
7060 | |
---|
7061 | fpt = ogcp; |
---|
7062 | i = len; |
---|
7063 | while( i-- ) *fpt++ = 0.0; |
---|
7064 | for( j=0; j<clus; j++ ) |
---|
7065 | { |
---|
7066 | feff = (float)eff[j]; |
---|
7067 | spt = seq[j]; |
---|
7068 | fpt = ogcp; |
---|
7069 | gc = ( sgappat[j] == '-' ); |
---|
7070 | i = len; |
---|
7071 | while( i-- ) |
---|
7072 | { |
---|
7073 | gb = gc; |
---|
7074 | gc = ( *spt++ == '-' ); |
---|
7075 | { |
---|
7076 | if( !gb * gc ) *fpt += feff; |
---|
7077 | fpt++; |
---|
7078 | } |
---|
7079 | } |
---|
7080 | } |
---|
7081 | } |
---|
7082 | #endif |
---|
7083 | void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat ) |
---|
7084 | #if 0 |
---|
7085 | { |
---|
7086 | int i, j, gc, gb; |
---|
7087 | float feff; |
---|
7088 | |
---|
7089 | |
---|
7090 | for( i=0; i<len+1; i++ ) ogcp[i] = 0.0; |
---|
7091 | for( j=0; j<clus; j++ ) |
---|
7092 | { |
---|
7093 | feff = (float)eff[j]; |
---|
7094 | gc = ( sgappat[j] == '-' ); |
---|
7095 | for( i=0; i<len; i++ ) |
---|
7096 | { |
---|
7097 | gb = gc; |
---|
7098 | gc = ( seq[j][i] == '-' ); |
---|
7099 | if( !gb * gc ) ogcp[i] += feff; |
---|
7100 | } |
---|
7101 | { |
---|
7102 | gb = gc; |
---|
7103 | gc = ( egappat[j] == '-' ); |
---|
7104 | if( !gb * gc ) ogcp[i] += feff; |
---|
7105 | } |
---|
7106 | } |
---|
7107 | } |
---|
7108 | #else |
---|
7109 | { |
---|
7110 | int i, j, gc, gb; |
---|
7111 | float feff; |
---|
7112 | float *fpt; |
---|
7113 | char *spt; |
---|
7114 | |
---|
7115 | fpt = ogcp; |
---|
7116 | i = len+2; |
---|
7117 | while( i-- ) *fpt++ = 0.0; |
---|
7118 | for( j=0; j<clus; j++ ) |
---|
7119 | { |
---|
7120 | feff = (float)eff[j]; |
---|
7121 | spt = seq[j]; |
---|
7122 | fpt = ogcp; |
---|
7123 | gc = ( sgappat[j] == '-' ); |
---|
7124 | i = len; |
---|
7125 | while( i-- ) |
---|
7126 | { |
---|
7127 | gb = gc; |
---|
7128 | gc = ( *spt++ == '-' ); |
---|
7129 | { |
---|
7130 | if( !gb * gc ) *fpt += feff; |
---|
7131 | fpt++; |
---|
7132 | } |
---|
7133 | } |
---|
7134 | { |
---|
7135 | gb = gc; |
---|
7136 | gc = ( egappat[j] == '-' ); |
---|
7137 | if( !gb * gc ) *fpt += feff; |
---|
7138 | } |
---|
7139 | } |
---|
7140 | } |
---|
7141 | #endif |
---|
7142 | |
---|
7143 | void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat ) |
---|
7144 | #if 0 |
---|
7145 | { |
---|
7146 | int i, j, gc, gb; |
---|
7147 | float feff; |
---|
7148 | |
---|
7149 | for( i=0; i<len+1; i++ ) fgcp[i] = 0.0; |
---|
7150 | for( j=0; j<clus; j++ ) |
---|
7151 | { |
---|
7152 | feff = (float)eff[j]; |
---|
7153 | gc = ( sgappat[j] == '-' ); |
---|
7154 | for( i=0; i<len; i++ ) |
---|
7155 | { |
---|
7156 | gb = gc; |
---|
7157 | gc = ( seq[j][i] == '-' ); |
---|
7158 | { |
---|
7159 | if( gb * !gc ) fgcp[i] += feff; |
---|
7160 | } |
---|
7161 | } |
---|
7162 | { |
---|
7163 | gb = gc; |
---|
7164 | gc = ( egappat[j] == '-' ); |
---|
7165 | { |
---|
7166 | if( gb * !gc ) fgcp[len] += feff; |
---|
7167 | } |
---|
7168 | } |
---|
7169 | } |
---|
7170 | } |
---|
7171 | #else |
---|
7172 | { |
---|
7173 | int i, j, gc, gb; |
---|
7174 | float feff; |
---|
7175 | float *fpt; |
---|
7176 | char *spt; |
---|
7177 | |
---|
7178 | fpt = fgcp; |
---|
7179 | i = len+2; |
---|
7180 | while( i-- ) *fpt++ = 0.0; |
---|
7181 | for( j=0; j<clus; j++ ) |
---|
7182 | { |
---|
7183 | feff = (float)eff[j]; |
---|
7184 | fpt = fgcp; |
---|
7185 | spt = seq[j]; |
---|
7186 | gc = ( sgappat[j] == '-' ); |
---|
7187 | i = len; |
---|
7188 | while( i-- ) |
---|
7189 | { |
---|
7190 | gb = gc; |
---|
7191 | gc = ( *spt++ == '-' ); |
---|
7192 | { |
---|
7193 | if( gb * !gc ) *fpt += feff; |
---|
7194 | fpt++; |
---|
7195 | } |
---|
7196 | } |
---|
7197 | { |
---|
7198 | gb = gc; |
---|
7199 | gc = ( egappat[j] == '-' ); |
---|
7200 | { |
---|
7201 | if( gb * !gc ) *fpt += feff; |
---|
7202 | } |
---|
7203 | } |
---|
7204 | } |
---|
7205 | } |
---|
7206 | #endif |
---|
7207 | void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat ) |
---|
7208 | #if 0 |
---|
7209 | { |
---|
7210 | int i, j, gc, gb; |
---|
7211 | float feff; |
---|
7212 | |
---|
7213 | for( i=0; i<len; i++ ) fgcp[i] = 0.0; |
---|
7214 | for( j=0; j<clus; j++ ) |
---|
7215 | { |
---|
7216 | feff = (float)eff[j]; |
---|
7217 | gc = ( seq[j][0] == '-' ); |
---|
7218 | for( i=1; i<len; i++ ) |
---|
7219 | { |
---|
7220 | gb = gc; |
---|
7221 | gc = ( seq[j][i] == '-' ); |
---|
7222 | { |
---|
7223 | if( gb * !gc ) fgcp[i-1] += feff; |
---|
7224 | } |
---|
7225 | } |
---|
7226 | { |
---|
7227 | gb = gc; |
---|
7228 | gc = ( egappat[j] == '-' ); |
---|
7229 | { |
---|
7230 | if( gb * !gc ) fgcp[len-1] += feff; |
---|
7231 | } |
---|
7232 | } |
---|
7233 | } |
---|
7234 | } |
---|
7235 | #else |
---|
7236 | { |
---|
7237 | int i, j, gc, gb; |
---|
7238 | float feff; |
---|
7239 | float *fpt; |
---|
7240 | char *spt; |
---|
7241 | |
---|
7242 | fpt = fgcp; |
---|
7243 | i = len; |
---|
7244 | while( i-- ) *fpt++ = 0.0; |
---|
7245 | for( j=0; j<clus; j++ ) |
---|
7246 | { |
---|
7247 | feff = (float)eff[j]; |
---|
7248 | fpt = fgcp; |
---|
7249 | spt = seq[j]; |
---|
7250 | gc = ( *spt == '-' ); |
---|
7251 | i = len; |
---|
7252 | while( i-- ) |
---|
7253 | { |
---|
7254 | gb = gc; |
---|
7255 | gc = ( *++spt == '-' ); |
---|
7256 | { |
---|
7257 | if( gb * !gc ) *fpt += feff; |
---|
7258 | fpt++; |
---|
7259 | } |
---|
7260 | } |
---|
7261 | { |
---|
7262 | gb = gc; |
---|
7263 | gc = ( egappat[j] == '-' ); |
---|
7264 | { |
---|
7265 | if( gb * !gc ) *fpt += feff; |
---|
7266 | } |
---|
7267 | } |
---|
7268 | } |
---|
7269 | } |
---|
7270 | #endif |
---|
7271 | |
---|
7272 | void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len ) |
---|
7273 | { |
---|
7274 | int i, j, gc, gb; |
---|
7275 | float feff; |
---|
7276 | float *fpt; |
---|
7277 | char *spt; |
---|
7278 | |
---|
7279 | fpt = ogcp; |
---|
7280 | i = len; |
---|
7281 | while( i-- ) *fpt++ = 0.0; |
---|
7282 | for( j=0; j<clus; j++ ) |
---|
7283 | { |
---|
7284 | feff = (float)eff[j]; |
---|
7285 | spt = seq[j]; |
---|
7286 | fpt = ogcp; |
---|
7287 | gc = 0; |
---|
7288 | // gc = 1; |
---|
7289 | i = len; |
---|
7290 | while( i-- ) |
---|
7291 | { |
---|
7292 | gb = gc; |
---|
7293 | gc = ( *spt++ == '-' ); |
---|
7294 | { |
---|
7295 | if( !gb * gc ) *fpt += feff; |
---|
7296 | fpt++; |
---|
7297 | } |
---|
7298 | } |
---|
7299 | } |
---|
7300 | ogcp[len] = 0.0; |
---|
7301 | } |
---|
7302 | |
---|
7303 | void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len ) |
---|
7304 | { |
---|
7305 | int i, j, gc, gb; |
---|
7306 | float feff; |
---|
7307 | float *fpt; |
---|
7308 | char *spt; |
---|
7309 | |
---|
7310 | fpt = fgcp; |
---|
7311 | i = len+1; |
---|
7312 | while( i-- ) *fpt++ = 0.0; |
---|
7313 | for( j=0; j<clus; j++ ) |
---|
7314 | { |
---|
7315 | feff = (float)eff[j]; |
---|
7316 | fpt = fgcp+1; |
---|
7317 | spt = seq[j]; |
---|
7318 | gc = ( *spt == '-' ); |
---|
7319 | i = len; |
---|
7320 | // for( i=1; i<len; i++ ) |
---|
7321 | while( i-- ) |
---|
7322 | { |
---|
7323 | gb = gc; |
---|
7324 | gc = ( *++spt == '-' ); |
---|
7325 | { |
---|
7326 | if( gb * !gc ) *fpt += feff; |
---|
7327 | fpt++; |
---|
7328 | } |
---|
7329 | } |
---|
7330 | { |
---|
7331 | gb = gc; |
---|
7332 | gc = 0; |
---|
7333 | // gc = 1; |
---|
7334 | { |
---|
7335 | if( gb * !gc ) *fpt += feff; |
---|
7336 | } |
---|
7337 | } |
---|
7338 | } |
---|
7339 | } |
---|
7340 | |
---|
7341 | void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len ) |
---|
7342 | { |
---|
7343 | int i, j, gc, gb; |
---|
7344 | float feff; |
---|
7345 | float *fpt; |
---|
7346 | char *spt; |
---|
7347 | |
---|
7348 | fpt = fgcp; |
---|
7349 | i = len; |
---|
7350 | while( i-- ) *fpt++ = 0.0; |
---|
7351 | for( j=0; j<clus; j++ ) |
---|
7352 | { |
---|
7353 | feff = (float)eff[j]; |
---|
7354 | fpt = fgcp; |
---|
7355 | spt = seq[j]; |
---|
7356 | gc = ( *spt == '-' ); |
---|
7357 | i = len; |
---|
7358 | // for( i=1; i<len; i++ ) |
---|
7359 | while( i-- ) |
---|
7360 | { |
---|
7361 | gb = gc; |
---|
7362 | gc = ( *++spt == '-' ); |
---|
7363 | { |
---|
7364 | if( gb * !gc ) *fpt += feff; |
---|
7365 | fpt++; |
---|
7366 | } |
---|
7367 | } |
---|
7368 | { |
---|
7369 | gb = gc; |
---|
7370 | gc = 0; |
---|
7371 | // gc = 1; |
---|
7372 | { |
---|
7373 | if( gb * !gc ) *fpt += feff; |
---|
7374 | } |
---|
7375 | } |
---|
7376 | } |
---|
7377 | } |
---|
7378 | |
---|
7379 | void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx ) |
---|
7380 | { |
---|
7381 | int i, j, gc, gb; |
---|
7382 | float feff; |
---|
7383 | float *fpt; |
---|
7384 | char *spt; |
---|
7385 | |
---|
7386 | fpt = fgcp; |
---|
7387 | i = len+1; |
---|
7388 | while( i-- ) *fpt++ = 0.0; |
---|
7389 | for( j=0; j<clus; j++ ) |
---|
7390 | { |
---|
7391 | feff = (float)eff[j]; |
---|
7392 | fpt = fgcp; |
---|
7393 | spt = seq[j]; |
---|
7394 | gc = ( *spt == '-' ); |
---|
7395 | i = len+1; |
---|
7396 | while( i-- ) |
---|
7397 | { |
---|
7398 | gb = gc; |
---|
7399 | gc = ( *++spt == '-' ); |
---|
7400 | { |
---|
7401 | if( gb * !gc ) *fpt += feff; |
---|
7402 | fpt++; |
---|
7403 | } |
---|
7404 | } |
---|
7405 | #if 0 |
---|
7406 | { |
---|
7407 | gb = gc; |
---|
7408 | gc = ( egappat[j] == '-' ); |
---|
7409 | { |
---|
7410 | if( gb * !gc ) *fpt += feff; |
---|
7411 | } |
---|
7412 | } |
---|
7413 | #endif |
---|
7414 | } |
---|
7415 | for( j=0; j<len; j++ ) |
---|
7416 | { |
---|
7417 | fprintf( stderr, "%d, %f\n", j, fgcp[j] ); |
---|
7418 | } |
---|
7419 | } |
---|
7420 | |
---|
7421 | void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len ) |
---|
7422 | { |
---|
7423 | int i, j; |
---|
7424 | float feff; |
---|
7425 | for( i=0; i<len+1; i++ ) freq[i] = 0.0; |
---|
7426 | for( i=0; i<clus; i++ ) |
---|
7427 | { |
---|
7428 | feff = eff[i]; |
---|
7429 | if( 0 && seq[i][0] == '-' ) // machigai kamo |
---|
7430 | freq[0] += feff; |
---|
7431 | for( j=1; j<len; j++ ) |
---|
7432 | { |
---|
7433 | if( seq[i][j] == '-' && seq[i][j-1] == '-' ) |
---|
7434 | freq[j] += feff; |
---|
7435 | } |
---|
7436 | if( 0 && seq[i][len-1] == '-' ) |
---|
7437 | freq[len] += feff; |
---|
7438 | } |
---|
7439 | // fprintf( stderr, "\ndigapf = \n" ); |
---|
7440 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7441 | } |
---|
7442 | |
---|
7443 | void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len ) |
---|
7444 | { |
---|
7445 | int i, j; |
---|
7446 | float feff; |
---|
7447 | for( i=0; i<len+2; i++ ) freq[i] = 0.0; |
---|
7448 | for( i=0; i<clus; i++ ) |
---|
7449 | { |
---|
7450 | feff = eff[i]; |
---|
7451 | if( seq[i][0] != '-' ) // tadashii |
---|
7452 | freq[0] += feff; |
---|
7453 | for( j=1; j<len; j++ ) |
---|
7454 | { |
---|
7455 | if( seq[i][j] != '-' && seq[i][j-1] != '-' ) |
---|
7456 | freq[j] += feff; |
---|
7457 | } |
---|
7458 | if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi |
---|
7459 | freq[len] += feff; |
---|
7460 | } |
---|
7461 | // fprintf( stderr, "\ndiaaf = \n" ); |
---|
7462 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7463 | } |
---|
7464 | |
---|
7465 | void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len ) |
---|
7466 | { |
---|
7467 | int i, j; |
---|
7468 | float feff; |
---|
7469 | for( i=0; i<len+1; i++ ) freq[i] = 0.0; |
---|
7470 | for( i=0; i<clus; i++ ) |
---|
7471 | { |
---|
7472 | feff = eff[i]; |
---|
7473 | if( seq[i][0] != '-' ) |
---|
7474 | freq[0] += feff; |
---|
7475 | for( j=1; j<len; j++ ) |
---|
7476 | { |
---|
7477 | if( seq[i][j] != '-' && seq[i][j-1] != '-' ) |
---|
7478 | freq[j] += feff; |
---|
7479 | } |
---|
7480 | // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi |
---|
7481 | freq[len] += feff; |
---|
7482 | } |
---|
7483 | // fprintf( stderr, "\ndiaaf = \n" ); |
---|
7484 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7485 | } |
---|
7486 | |
---|
7487 | void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat ) |
---|
7488 | { |
---|
7489 | int i, j; |
---|
7490 | float feff; |
---|
7491 | for( i=0; i<len+2; i++ ) freq[i] = 0.0; |
---|
7492 | for( i=0; i<clus; i++ ) |
---|
7493 | { |
---|
7494 | feff = eff[i]; |
---|
7495 | // if( seq[i][0] == '-' ) |
---|
7496 | if( seq[i][0] == '-' && sgappat[i] == '-' ) |
---|
7497 | freq[0] += feff; |
---|
7498 | for( j=1; j<len; j++ ) |
---|
7499 | { |
---|
7500 | if( seq[i][j] == '-' && seq[i][j-1] == '-' ) |
---|
7501 | freq[j] += feff; |
---|
7502 | } |
---|
7503 | // if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai |
---|
7504 | if( egappat[i] == '-' && seq[i][len-1] == '-' ) |
---|
7505 | freq[len] += feff; |
---|
7506 | } |
---|
7507 | // fprintf( stderr, "\ndigapf = \n" ); |
---|
7508 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7509 | } |
---|
7510 | |
---|
7511 | void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat ) |
---|
7512 | { |
---|
7513 | int i, j; |
---|
7514 | float feff; |
---|
7515 | for( i=0; i<len+2; i++ ) freq[i] = 0.0; |
---|
7516 | for( i=0; i<clus; i++ ) |
---|
7517 | { |
---|
7518 | feff = eff[i]; |
---|
7519 | if( seq[i][0] != '-' && sgappat[i] != '-' ) |
---|
7520 | freq[0] += feff; |
---|
7521 | for( j=1; j<len; j++ ) |
---|
7522 | { |
---|
7523 | if( seq[i][j] != '-' && seq[i][j-1] != '-' ) |
---|
7524 | freq[j] += feff; |
---|
7525 | } |
---|
7526 | // if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi |
---|
7527 | if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi |
---|
7528 | freq[len] += feff; |
---|
7529 | } |
---|
7530 | // fprintf( stderr, "\ndiaaf = \n" ); |
---|
7531 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7532 | } |
---|
7533 | |
---|
7534 | void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap ) |
---|
7535 | { |
---|
7536 | int i, j; |
---|
7537 | float feff; |
---|
7538 | for( i=0; i<len+2; i++ ) freq[i] = 0.0; |
---|
7539 | for( i=0; i<clus; i++ ) |
---|
7540 | { |
---|
7541 | feff = eff[i]; |
---|
7542 | if( sgap[i] == '-' ) |
---|
7543 | freq[0] += feff; |
---|
7544 | for( j=0; j<len; j++ ) |
---|
7545 | { |
---|
7546 | if( seq[i][j] == '-' ) |
---|
7547 | freq[j+1] += feff; |
---|
7548 | } |
---|
7549 | // if( egap[i] == '-' ) |
---|
7550 | // freq[len+1] += feff; |
---|
7551 | } |
---|
7552 | // fprintf( stderr, "\ngapf = \n" ); |
---|
7553 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7554 | } |
---|
7555 | |
---|
7556 | void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len ) |
---|
7557 | { |
---|
7558 | int i, j; |
---|
7559 | float feff; |
---|
7560 | for( i=0; i<len+1; i++ ) freq[i] = 0.0; |
---|
7561 | for( i=0; i<clus; i++ ) |
---|
7562 | { |
---|
7563 | feff = eff[i]; |
---|
7564 | for( j=0; j<len; j++ ) |
---|
7565 | { |
---|
7566 | if( seq[i][j] == '-' ) |
---|
7567 | freq[j+1] += feff; |
---|
7568 | } |
---|
7569 | } |
---|
7570 | freq[len+1] = 0.0; |
---|
7571 | // fprintf( stderr, "\ngapf = \n" ); |
---|
7572 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7573 | } |
---|
7574 | |
---|
7575 | void getgapfreq( float *freq, int clus, char **seq, double *eff, int len ) |
---|
7576 | { |
---|
7577 | int i, j; |
---|
7578 | float feff; |
---|
7579 | for( i=0; i<len+1; i++ ) freq[i] = 0.0; |
---|
7580 | for( i=0; i<clus; i++ ) |
---|
7581 | { |
---|
7582 | feff = eff[i]; |
---|
7583 | for( j=0; j<len; j++ ) |
---|
7584 | { |
---|
7585 | if( seq[i][j] == '-' ) |
---|
7586 | freq[j] += feff; |
---|
7587 | } |
---|
7588 | } |
---|
7589 | freq[len] = 0.0; |
---|
7590 | // fprintf( stderr, "\ngapf = \n" ); |
---|
7591 | // for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] ); |
---|
7592 | } |
---|
7593 | |
---|
7594 | void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len ) |
---|
7595 | { |
---|
7596 | int i, j, k, gb, gc; |
---|
7597 | int known; |
---|
7598 | float feff; |
---|
7599 | Gappat **fpt; |
---|
7600 | char *spt; |
---|
7601 | int gaplen; |
---|
7602 | |
---|
7603 | fpt = pat; |
---|
7604 | i = len+1; |
---|
7605 | while( i-- ) |
---|
7606 | { |
---|
7607 | if( *fpt ) free( *fpt ); |
---|
7608 | *fpt++ = NULL; |
---|
7609 | } |
---|
7610 | |
---|
7611 | for( j=0; j<clus; j++ ) |
---|
7612 | { |
---|
7613 | // fprintf( stderr, "seq[%d] = %s\n", j, seq[j] ); |
---|
7614 | feff = (float)eff[j]; |
---|
7615 | |
---|
7616 | fpt = pat; |
---|
7617 | *fpt = NULL; // Falign.c kara yobareru tokiha chigau. |
---|
7618 | spt = seq[j]; |
---|
7619 | gc = 0; |
---|
7620 | gaplen = 0; |
---|
7621 | |
---|
7622 | for( i=0; i<len+1; i++ ) |
---|
7623 | // while( i-- ) |
---|
7624 | { |
---|
7625 | // fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen ); |
---|
7626 | gb = gc; |
---|
7627 | gc = ( i != len && *spt++ == '-' ); |
---|
7628 | if( gc ) |
---|
7629 | gaplen++; |
---|
7630 | else |
---|
7631 | { |
---|
7632 | if( gb && gaplen ) |
---|
7633 | { |
---|
7634 | k = 1; |
---|
7635 | known = 0; |
---|
7636 | if( *fpt ) for( ; (*fpt)[k].len != -1; k++ ) |
---|
7637 | { |
---|
7638 | if( (*fpt)[k].len == gaplen ) |
---|
7639 | { |
---|
7640 | // fprintf( stderr, "known\n" ); |
---|
7641 | known = 1; |
---|
7642 | break; |
---|
7643 | } |
---|
7644 | } |
---|
7645 | |
---|
7646 | if( known == 0 ) |
---|
7647 | { |
---|
7648 | *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term |
---|
7649 | if( !*fpt ) |
---|
7650 | { |
---|
7651 | fprintf( stderr, "Cannot allocate gappattern!'n" ); |
---|
7652 | fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" ); |
---|
7653 | exit( 1 ); |
---|
7654 | } |
---|
7655 | (*fpt)[k].freq = 0.0; |
---|
7656 | (*fpt)[k].len = gaplen; |
---|
7657 | (*fpt)[k+1].len = -1; |
---|
7658 | (*fpt)[k+1].freq = 0.0; // iranai |
---|
7659 | // fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq ); |
---|
7660 | } |
---|
7661 | |
---|
7662 | // fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq ); |
---|
7663 | (*fpt)[k].freq += feff; |
---|
7664 | // fprintf( stderr, "%f\n", (*fpt)[k].freq ); |
---|
7665 | gaplen = 0; |
---|
7666 | } |
---|
7667 | } |
---|
7668 | fpt++; |
---|
7669 | } |
---|
7670 | } |
---|
7671 | #if 1 |
---|
7672 | for( j=0; j<len+1; j++ ) |
---|
7673 | { |
---|
7674 | if( pat[j] ) |
---|
7675 | { |
---|
7676 | // fprintf( stderr, "j=%d\n", j ); |
---|
7677 | // for( i=1; pat[j][i].len!=-1; i++ ) |
---|
7678 | // fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq ); |
---|
7679 | |
---|
7680 | pat[j][0].len = 0; // iminashi |
---|
7681 | pat[j][0].freq = 0.0; |
---|
7682 | for( i=1; pat[j][i].len!=-1;i++ ) |
---|
7683 | { |
---|
7684 | pat[j][0].freq += pat[j][i].freq; |
---|
7685 | // fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq ); |
---|
7686 | } |
---|
7687 | // fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq ); |
---|
7688 | |
---|
7689 | pat[j][i].freq = 1.0 - pat[j][0].freq; |
---|
7690 | pat[j][i].len = 0; // imiari |
---|
7691 | pat[j][i+1].len = -1; |
---|
7692 | } |
---|
7693 | else |
---|
7694 | { |
---|
7695 | pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) ); |
---|
7696 | pat[j][0].freq = 0.0; |
---|
7697 | pat[j][0].len = 0; // iminashi |
---|
7698 | |
---|
7699 | pat[j][1].freq = 1.0 - pat[j][0].freq; |
---|
7700 | pat[j][1].len = 0; // imiari |
---|
7701 | pat[j][2].len = -1; |
---|
7702 | } |
---|
7703 | } |
---|
7704 | #endif |
---|
7705 | } |
---|
7706 | |
---|
7707 | static void commongappickpair( char *r1, char *r2, char *i1, char *i2 ) |
---|
7708 | { |
---|
7709 | // strcpy( r1, i1 ); |
---|
7710 | // strcpy( r2, i2 ); |
---|
7711 | // return; // not SP |
---|
7712 | while( *i1 ) |
---|
7713 | { |
---|
7714 | if( *i1 == '-' && *i2 == '-' ) |
---|
7715 | { |
---|
7716 | i1++; |
---|
7717 | i2++; |
---|
7718 | } |
---|
7719 | else |
---|
7720 | { |
---|
7721 | *r1++ = *i1++; |
---|
7722 | *r2++ = *i2++; |
---|
7723 | } |
---|
7724 | } |
---|
7725 | *r1 = 0; |
---|
7726 | *r2 = 0; |
---|
7727 | } |
---|
7728 | |
---|
7729 | float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ) |
---|
7730 | { |
---|
7731 | // return( 0 ); |
---|
7732 | int i, j; |
---|
7733 | float val; |
---|
7734 | float valf; |
---|
7735 | int pv; |
---|
7736 | double deff; |
---|
7737 | char *p1, *p2, *p1p, *p2p; |
---|
7738 | val = 0.0; |
---|
7739 | for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) |
---|
7740 | { |
---|
7741 | deff = eff1[i] * eff2[j]; |
---|
7742 | // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff ); |
---|
7743 | // fprintf( stderr, "i1 = %s\n", seq1[i] ); |
---|
7744 | // fprintf( stderr, "i2 = %s\n", seq2[j] ); |
---|
7745 | // fprintf( stderr, "s1 = %s\n", s1 ); |
---|
7746 | // fprintf( stderr, "s2 = %s\n", s2 ); |
---|
7747 | // fprintf( stderr, "penal = %d\n", penal ); |
---|
7748 | |
---|
7749 | valf = 0; |
---|
7750 | p1 = seq1[i]; p2 = seq2[j]; |
---|
7751 | pv = 0; |
---|
7752 | if( *p1 == '-' && *p2 != '-' ) |
---|
7753 | pv = penal; |
---|
7754 | if( *p1 != '-' && *p2 == '-' ) |
---|
7755 | pv = penal; |
---|
7756 | // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); |
---|
7757 | p1p = p1; p2p = p2; |
---|
7758 | valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv; |
---|
7759 | while( *p1p ) |
---|
7760 | { |
---|
7761 | pv = 0; |
---|
7762 | if( *p1p != '-' && *p2p != '-' ) |
---|
7763 | { |
---|
7764 | if( *p1 == '-' && *p2 != '-' ) |
---|
7765 | pv = penal; |
---|
7766 | if( *p1 != '-' && *p2 == '-' ) |
---|
7767 | pv = penal; |
---|
7768 | if( *p1 != '-' && *p2 != '-' ) |
---|
7769 | ; |
---|
7770 | if( *p1 == '-' && *p2 == '-' ) |
---|
7771 | ; |
---|
7772 | } |
---|
7773 | if( *p1p == '-' && *p2p == '-' ) |
---|
7774 | { |
---|
7775 | if( *p1 == '-' && *p2 != '-' ) |
---|
7776 | pv = penal; |
---|
7777 | // ; |
---|
7778 | if( *p1 != '-' && *p2 == '-' ) |
---|
7779 | pv = penal; |
---|
7780 | // ; |
---|
7781 | if( *p1 != '-' && *p2 != '-' ) |
---|
7782 | ; |
---|
7783 | if( *p1 == '-' && *p2 == '-' ) |
---|
7784 | ; |
---|
7785 | } |
---|
7786 | if( *p1p != '-' && *p2p == '-' ) |
---|
7787 | { |
---|
7788 | if( *p1 == '-' && *p2 != '-' ) |
---|
7789 | pv = penal * 2; // ?? |
---|
7790 | // ; |
---|
7791 | if( *p1 != '-' && *p2 == '-' ) |
---|
7792 | ; |
---|
7793 | if( *p1 != '-' && *p2 != '-' ) |
---|
7794 | pv = penal; |
---|
7795 | // ; |
---|
7796 | if( *p1 == '-' && *p2 == '-' ) |
---|
7797 | pv = penal; |
---|
7798 | // ; |
---|
7799 | } |
---|
7800 | if( *p1p == '-' && *p2p != '-' ) |
---|
7801 | { |
---|
7802 | if( *p1 == '-' && *p2 != '-' ) |
---|
7803 | ; |
---|
7804 | if( *p1 != '-' && *p2 == '-' ) |
---|
7805 | pv = penal * 2; // ?? |
---|
7806 | // ; |
---|
7807 | if( *p1 != '-' && *p2 != '-' ) |
---|
7808 | pv = penal; |
---|
7809 | // ; |
---|
7810 | if( *p1 == '-' && *p2 == '-' ) |
---|
7811 | pv = penal; |
---|
7812 | // ; |
---|
7813 | } |
---|
7814 | // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); |
---|
7815 | // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); |
---|
7816 | valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv; |
---|
7817 | p1p++; p2p++; |
---|
7818 | } |
---|
7819 | // fprintf( stderr, "valf = %d\n", valf ); |
---|
7820 | val += deff * ( valf ); |
---|
7821 | } |
---|
7822 | fprintf( stderr, "val = %f\n", val ); |
---|
7823 | return( val ); |
---|
7824 | // exit( 1 ); |
---|
7825 | } |
---|
7826 | float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ) |
---|
7827 | { |
---|
7828 | int i, j; |
---|
7829 | float val; |
---|
7830 | float valf; |
---|
7831 | int pv; |
---|
7832 | double deff; |
---|
7833 | char *p1, *p2, *p1p, *p2p; |
---|
7834 | return( 0 ); |
---|
7835 | val = 0.0; |
---|
7836 | for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) |
---|
7837 | { |
---|
7838 | deff = eff1[i] * eff2[j]; |
---|
7839 | // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff ); |
---|
7840 | // fprintf( stderr, "i1 = %s\n", seq1[i] ); |
---|
7841 | // fprintf( stderr, "i2 = %s\n", seq2[j] ); |
---|
7842 | // fprintf( stderr, "s1 = %s\n", s1 ); |
---|
7843 | // fprintf( stderr, "s2 = %s\n", s2 ); |
---|
7844 | // fprintf( stderr, "penal = %d\n", penal ); |
---|
7845 | |
---|
7846 | valf = 0; |
---|
7847 | p1 = seq1[i]; p2 = seq2[j]; |
---|
7848 | pv = 0; |
---|
7849 | if( *p1 == '-' && *p2 != '-' ) |
---|
7850 | pv = penal; |
---|
7851 | if( *p1 != '-' && *p2 == '-' ) |
---|
7852 | pv = penal; |
---|
7853 | // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); |
---|
7854 | p1p = p1; p2p = p2; |
---|
7855 | valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv; |
---|
7856 | while( *p1p ) |
---|
7857 | { |
---|
7858 | pv = 0; |
---|
7859 | if( *p1p != '-' && *p2p != '-' ) |
---|
7860 | { |
---|
7861 | if( *p1 == '-' && *p2 != '-' ) |
---|
7862 | pv = penal; |
---|
7863 | if( *p1 != '-' && *p2 == '-' ) |
---|
7864 | pv = penal; |
---|
7865 | if( *p1 != '-' && *p2 != '-' ) |
---|
7866 | ; |
---|
7867 | if( *p1 == '-' && *p2 == '-' ) |
---|
7868 | ; |
---|
7869 | } |
---|
7870 | if( *p1p == '-' && *p2p == '-' ) |
---|
7871 | { |
---|
7872 | if( *p1 == '-' && *p2 != '-' ) |
---|
7873 | // pv = penal; |
---|
7874 | ; |
---|
7875 | if( *p1 != '-' && *p2 == '-' ) |
---|
7876 | // pv = penal; |
---|
7877 | ; |
---|
7878 | if( *p1 != '-' && *p2 != '-' ) |
---|
7879 | ; |
---|
7880 | if( *p1 == '-' && *p2 == '-' ) |
---|
7881 | ; |
---|
7882 | } |
---|
7883 | if( *p1p != '-' && *p2p == '-' ) |
---|
7884 | { |
---|
7885 | if( *p1 == '-' && *p2 != '-' ) |
---|
7886 | pv = penal * 2; // ?? |
---|
7887 | // ; |
---|
7888 | if( *p1 != '-' && *p2 == '-' ) |
---|
7889 | ; |
---|
7890 | if( *p1 != '-' && *p2 != '-' ) |
---|
7891 | pv = penal; |
---|
7892 | // ; |
---|
7893 | if( *p1 == '-' && *p2 == '-' ) |
---|
7894 | // pv = penal; |
---|
7895 | ; |
---|
7896 | } |
---|
7897 | if( *p1p == '-' && *p2p != '-' ) |
---|
7898 | { |
---|
7899 | if( *p1 == '-' && *p2 != '-' ) |
---|
7900 | ; |
---|
7901 | if( *p1 != '-' && *p2 == '-' ) |
---|
7902 | pv = penal * 2; // ?? |
---|
7903 | // ; |
---|
7904 | if( *p1 != '-' && *p2 != '-' ) |
---|
7905 | pv = penal; |
---|
7906 | // ; |
---|
7907 | if( *p1 == '-' && *p2 == '-' ) |
---|
7908 | // pv = penal; |
---|
7909 | ; |
---|
7910 | } |
---|
7911 | // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); |
---|
7912 | // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); |
---|
7913 | valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv; |
---|
7914 | p1p++; p2p++; |
---|
7915 | } |
---|
7916 | // fprintf( stderr, "valf = %d\n", valf ); |
---|
7917 | val += deff * ( valf ); |
---|
7918 | } |
---|
7919 | fprintf( stderr, "val = %f\n", val ); |
---|
7920 | return( val ); |
---|
7921 | // exit( 1 ); |
---|
7922 | } |
---|
7923 | float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ) |
---|
7924 | { |
---|
7925 | int i, j; |
---|
7926 | float val; |
---|
7927 | float valf; |
---|
7928 | int pv; |
---|
7929 | // float feff = 0.0; // by D.Mathog, a guess |
---|
7930 | double deff; |
---|
7931 | char *p1, *p2, *p1p, *p2p; |
---|
7932 | val = 0.0; |
---|
7933 | for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) |
---|
7934 | { |
---|
7935 | deff = eff1[i] * eff2[j]; |
---|
7936 | // fprintf( stderr, "i1 = %s\n", seq1[i] ); |
---|
7937 | // fprintf( stderr, "i2 = %s\n", seq2[j] ); |
---|
7938 | // fprintf( stderr, "s1 = %s\n", s1 ); |
---|
7939 | // fprintf( stderr, "s2 = %s\n", s2 ); |
---|
7940 | // fprintf( stderr, "penal = %d\n", penal ); |
---|
7941 | |
---|
7942 | valf = 0; |
---|
7943 | p1 = seq1[i]; p2 = seq2[j]; |
---|
7944 | pv = 0; |
---|
7945 | if( *p1 == '-' && *p2 != '-' ) |
---|
7946 | pv = penal; |
---|
7947 | if( *p1 != '-' && *p2 == '-' ) |
---|
7948 | pv = penal; |
---|
7949 | if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) ); |
---|
7950 | p1p = p1; p2p = p2; |
---|
7951 | valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv; |
---|
7952 | while( *p1p ) |
---|
7953 | { |
---|
7954 | pv = 0; |
---|
7955 | if( *p1p != '-' && *p2p != '-' ) |
---|
7956 | { |
---|
7957 | if( *p1 == '-' && *p2 != '-' ) |
---|
7958 | pv = penal; |
---|
7959 | if( *p1 != '-' && *p2 == '-' ) |
---|
7960 | pv = penal; |
---|
7961 | if( *p1 != '-' && *p2 != '-' ) |
---|
7962 | ; |
---|
7963 | if( *p1 == '-' && *p2 == '-' ) |
---|
7964 | ; |
---|
7965 | } |
---|
7966 | if( *p1p == '-' && *p2p == '-' ) |
---|
7967 | { |
---|
7968 | if( *p1 == '-' && *p2 != '-' ) |
---|
7969 | // pv = penal; |
---|
7970 | ; |
---|
7971 | if( *p1 != '-' && *p2 == '-' ) |
---|
7972 | // pv = penal; |
---|
7973 | ; |
---|
7974 | if( *p1 != '-' && *p2 != '-' ) |
---|
7975 | ; |
---|
7976 | if( *p1 == '-' && *p2 == '-' ) |
---|
7977 | ; |
---|
7978 | } |
---|
7979 | if( *p1p != '-' && *p2p == '-' ) |
---|
7980 | { |
---|
7981 | if( *p1 == '-' && *p2 != '-' ) |
---|
7982 | // pv = penal; |
---|
7983 | ; |
---|
7984 | if( *p1 != '-' && *p2 == '-' ) |
---|
7985 | ; |
---|
7986 | if( *p1 != '-' && *p2 != '-' ) |
---|
7987 | pv = penal; |
---|
7988 | if( *p1 == '-' && *p2 == '-' ) |
---|
7989 | // pv = penal; |
---|
7990 | ; |
---|
7991 | } |
---|
7992 | if( *p1p == '-' && *p2p != '-' ) |
---|
7993 | { |
---|
7994 | if( *p1 == '-' && *p2 != '-' ) |
---|
7995 | ; |
---|
7996 | if( *p1 != '-' && *p2 == '-' ) |
---|
7997 | // pv = penal; |
---|
7998 | ; |
---|
7999 | if( *p1 != '-' && *p2 != '-' ) |
---|
8000 | pv = penal; |
---|
8001 | if( *p1 == '-' && *p2 == '-' ) |
---|
8002 | // pv = penal; |
---|
8003 | ; |
---|
8004 | } |
---|
8005 | // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); |
---|
8006 | // if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] ); |
---|
8007 | valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv; |
---|
8008 | p1p++; p2p++; |
---|
8009 | } |
---|
8010 | // fprintf( stderr, "valf = %d\n", valf ); |
---|
8011 | val += deff * ( valf ); |
---|
8012 | } |
---|
8013 | fprintf( stderr, "val = %f\n", val ); |
---|
8014 | return( val ); |
---|
8015 | // exit( 1 ); |
---|
8016 | } |
---|
8017 | |
---|
8018 | float naivepairscore11( char *seq1, char *seq2, int penal ) |
---|
8019 | { |
---|
8020 | float vali; |
---|
8021 | int len = strlen( seq1 ); |
---|
8022 | char *s1, *s2, *p1, *p2; |
---|
8023 | s1 = calloc( len+1, sizeof( char ) ); |
---|
8024 | s2 = calloc( len+1, sizeof( char ) ); |
---|
8025 | { |
---|
8026 | vali = 0.0; |
---|
8027 | commongappickpair( s1, s2, seq1, seq2 ); |
---|
8028 | // fprintf( stderr, "###i1 = %s\n", s1 ); |
---|
8029 | // fprintf( stderr, "###i2 = %s\n", s2 ); |
---|
8030 | // fprintf( stderr, "###penal = %d\n", penal ); |
---|
8031 | |
---|
8032 | p1 = s1; p2 = s2; |
---|
8033 | while( *p1 ) |
---|
8034 | { |
---|
8035 | if( *p1 == '-' ) |
---|
8036 | { |
---|
8037 | // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); |
---|
8038 | vali += (float)penal; |
---|
8039 | // while( *p1 == '-' || *p2 == '-' ) |
---|
8040 | while( *p1 == '-' ) // SP |
---|
8041 | { |
---|
8042 | p1++; |
---|
8043 | p2++; |
---|
8044 | } |
---|
8045 | continue; |
---|
8046 | } |
---|
8047 | if( *p2 == '-' ) |
---|
8048 | { |
---|
8049 | // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); |
---|
8050 | vali += (float)penal; |
---|
8051 | // while( *p2 == '-' || *p1 == '-' ) |
---|
8052 | while( *p2 == '-' ) // SP |
---|
8053 | { |
---|
8054 | p1++; |
---|
8055 | p2++; |
---|
8056 | } |
---|
8057 | continue; |
---|
8058 | } |
---|
8059 | // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); |
---|
8060 | vali += (float)amino_dis[(int)*p1++][(int)*p2++]; |
---|
8061 | } |
---|
8062 | } |
---|
8063 | free( s1 ); |
---|
8064 | free( s2 ); |
---|
8065 | // fprintf( stderr, "###vali = %d\n", vali ); |
---|
8066 | return( vali ); |
---|
8067 | } |
---|
8068 | |
---|
8069 | float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal ) |
---|
8070 | { |
---|
8071 | // return( 0.0 ); |
---|
8072 | int i, j; |
---|
8073 | float val; |
---|
8074 | int vali; |
---|
8075 | float feff; |
---|
8076 | int len = strlen( seq1[0] ); |
---|
8077 | char *s1, *s2, *p1, *p2; |
---|
8078 | s1 = calloc( len+1, sizeof( char ) ); |
---|
8079 | s2 = calloc( len+1, sizeof( char ) ); |
---|
8080 | val = 0.0; |
---|
8081 | for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) |
---|
8082 | { |
---|
8083 | vali = 0; |
---|
8084 | feff = eff1[i] * eff2[j]; |
---|
8085 | // fprintf( stderr, "feff %d-%d = %f\n", i, j, feff ); |
---|
8086 | commongappickpair( s1, s2, seq1[i], seq2[j] ); |
---|
8087 | // fprintf( stderr, "i1 = %s\n", seq1[i] ); |
---|
8088 | // fprintf( stderr, "i2 = %s\n", seq2[j] ); |
---|
8089 | // fprintf( stderr, "s1 = %s\n", s1 ); |
---|
8090 | // fprintf( stderr, "s2 = %s\n", s2 ); |
---|
8091 | // fprintf( stderr, "penal = %d\n", penal ); |
---|
8092 | |
---|
8093 | p1 = s1; p2 = s2; |
---|
8094 | while( *p1 ) |
---|
8095 | { |
---|
8096 | if( *p1 == '-' ) |
---|
8097 | { |
---|
8098 | // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); |
---|
8099 | vali += penal; |
---|
8100 | // while( *p1 == '-' || *p2 == '-' ) |
---|
8101 | while( *p1 == '-' ) // SP |
---|
8102 | { |
---|
8103 | p1++; |
---|
8104 | p2++; |
---|
8105 | } |
---|
8106 | continue; |
---|
8107 | } |
---|
8108 | if( *p2 == '-' ) |
---|
8109 | { |
---|
8110 | // fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff ); |
---|
8111 | vali += penal; |
---|
8112 | // while( *p2 == '-' || *p1 == '-' ) |
---|
8113 | while( *p2 == '-' ) // SP |
---|
8114 | { |
---|
8115 | p1++; |
---|
8116 | p2++; |
---|
8117 | } |
---|
8118 | continue; |
---|
8119 | } |
---|
8120 | // fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] ); |
---|
8121 | vali += amino_dis[(int)*p1++][(int)*p2++]; |
---|
8122 | } |
---|
8123 | // fprintf( stderr, "vali = %d\n", vali ); |
---|
8124 | val += feff * vali; |
---|
8125 | } |
---|
8126 | free( s1 ); |
---|
8127 | free( s2 ); |
---|
8128 | fprintf( stderr, "val = %f\n", val ); |
---|
8129 | return( val ); |
---|
8130 | // exit( 1 ); |
---|
8131 | } |
---|
8132 | |
---|
8133 | double plainscore( int nseq, char **s ) |
---|
8134 | { |
---|
8135 | int i, j, ilim; |
---|
8136 | double v = 0.0; |
---|
8137 | |
---|
8138 | ilim = nseq-1; |
---|
8139 | for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ ) |
---|
8140 | { |
---|
8141 | v += (double)naivepairscore11( s[i], s[j], penalty ); |
---|
8142 | } |
---|
8143 | |
---|
8144 | fprintf( stderr, "penalty = %d\n", penalty ); |
---|
8145 | |
---|
8146 | return( v ); |
---|
8147 | } |
---|
8148 | |
---|
8149 | void intcat( int *s1, int *s2 ) |
---|
8150 | { |
---|
8151 | while( *s1 != -1 ) s1++; |
---|
8152 | while( *s2 != -1 ) |
---|
8153 | { |
---|
8154 | // fprintf( stderr, "copying %d\n", *s2 ); |
---|
8155 | *s1++ = *s2++; |
---|
8156 | } |
---|
8157 | *s1 = -1; |
---|
8158 | } |
---|
8159 | |
---|
8160 | void intcpy( int *s1, int *s2 ) |
---|
8161 | { |
---|
8162 | while( *s2 != -1 ) |
---|
8163 | { |
---|
8164 | // fprintf( stderr, "copying %d\n", *s2 ); |
---|
8165 | *s1++ = *s2++; |
---|
8166 | } |
---|
8167 | *s1 = -1; |
---|
8168 | } |
---|
8169 | |
---|
8170 | static int countmem( int *s ) |
---|
8171 | { |
---|
8172 | int v = 0; |
---|
8173 | while( *s++ != -1 ) v++; |
---|
8174 | return( v ); |
---|
8175 | } |
---|
8176 | |
---|
8177 | static int lastmem( int *s ) |
---|
8178 | { |
---|
8179 | while( *s++ != -1 ) |
---|
8180 | ; |
---|
8181 | return( *(s-2) ); |
---|
8182 | } |
---|
8183 | |
---|
8184 | |
---|
8185 | int addonetip( int njobc, int ***topolc, float **lenc, float **iscorec, int ***topol, float **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name ) |
---|
8186 | { |
---|
8187 | int i, j, mem0, mem1, posinnew, m; |
---|
8188 | int nstep; |
---|
8189 | int norg; |
---|
8190 | float minscore, minscoreo, eff0, eff1, addedlen, tmpmin; |
---|
8191 | int nearest, nearesto; |
---|
8192 | int repnorg; |
---|
8193 | int *leaf2node; |
---|
8194 | int *additionaltopol; |
---|
8195 | double (*clusterfuncpt[1])(double,double); |
---|
8196 | Bchain *ac, *acpt, *acori, *acnext, *acprev; |
---|
8197 | int neighbor; |
---|
8198 | char *neighborlist; |
---|
8199 | char *npt; |
---|
8200 | // char **tree; //static? |
---|
8201 | // char *treetmp; //static? |
---|
8202 | |
---|
8203 | |
---|
8204 | // treetmp = AllocateCharVec( njob*150 ); |
---|
8205 | // tree = AllocateCharMtx( njob, njob*150 ); |
---|
8206 | |
---|
8207 | sueff1_double = 1 - SUEFF; |
---|
8208 | sueff05_double = SUEFF * 0.5; |
---|
8209 | if ( treemethod == 'X' ) |
---|
8210 | clusterfuncpt[0] = cluster_mix_double; |
---|
8211 | else if ( treemethod == 'E' ) |
---|
8212 | clusterfuncpt[0] = cluster_average_double; |
---|
8213 | else if ( treemethod == 'q' ) |
---|
8214 | clusterfuncpt[0] = cluster_minimum_double; |
---|
8215 | else |
---|
8216 | { |
---|
8217 | fprintf( stderr, "Unknown treemethod, %c\n", treemethod ); |
---|
8218 | exit( 1 ); |
---|
8219 | } |
---|
8220 | |
---|
8221 | norg = njobc-1; |
---|
8222 | nstep = njobc-2; |
---|
8223 | |
---|
8224 | additionaltopol = (int *)calloc( 2, sizeof( int ) ); |
---|
8225 | leaf2node= (int *)calloc( norg, sizeof( int ) ); |
---|
8226 | if( treeout ) |
---|
8227 | { |
---|
8228 | neighborlist = calloc( norg * 30, sizeof( char ) ); |
---|
8229 | } |
---|
8230 | // for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 ); |
---|
8231 | if( !leaf2node ) |
---|
8232 | { |
---|
8233 | fprintf( stderr, "Cannot allocate leaf2node.\n" ); |
---|
8234 | exit( 1 ); |
---|
8235 | } |
---|
8236 | additionaltopol[0] = norg; |
---|
8237 | additionaltopol[1] = -1; |
---|
8238 | |
---|
8239 | ac = (Bchain *)malloc( norg * sizeof( Bchain ) ); |
---|
8240 | for( i=0; i<norg; i++ ) |
---|
8241 | { |
---|
8242 | ac[i].next = ac+i+1; |
---|
8243 | ac[i].prev = ac+i-1; |
---|
8244 | ac[i].pos = i; |
---|
8245 | } |
---|
8246 | ac[norg-1].next = NULL; |
---|
8247 | |
---|
8248 | |
---|
8249 | acori = (Bchain *)malloc( 1 * sizeof( Bchain ) ); |
---|
8250 | acori->next = ac; |
---|
8251 | acori->pos = -1; |
---|
8252 | ac[0].prev = acori; |
---|
8253 | |
---|
8254 | |
---|
8255 | // for( i=0; i<nstep; i++ ) |
---|
8256 | // { |
---|
8257 | // fprintf( stderr, "distfromtip = %f\n", dep[i].distfromtip ); |
---|
8258 | // } |
---|
8259 | // |
---|
8260 | // for( i=0; i<norg; i++ ) |
---|
8261 | // { |
---|
8262 | // fprintf( stderr, "disttofrag(%d,%d) = %f\n", i, njobc-1, iscorec[i][norg-i] ); |
---|
8263 | // } |
---|
8264 | |
---|
8265 | minscore = 9999.9; |
---|
8266 | nearest = -1; |
---|
8267 | for( i=0; i<norg; i++ ) |
---|
8268 | { |
---|
8269 | tmpmin = iscorec[i][norg-i]; |
---|
8270 | if( minscore > tmpmin ) |
---|
8271 | { |
---|
8272 | minscore = tmpmin; |
---|
8273 | nearest = i; |
---|
8274 | } |
---|
8275 | } |
---|
8276 | nearesto = nearest; |
---|
8277 | minscoreo = minscore; |
---|
8278 | |
---|
8279 | |
---|
8280 | |
---|
8281 | // for( i=0; i<njobc-1; i++ ) for( j=i+1; j<njobc; j++ ) |
---|
8282 | // fprintf( stderr, "iscorec[%d][%d] = %f\n", i, j, iscorec[i][j-i] ); |
---|
8283 | |
---|
8284 | posinnew = 0; |
---|
8285 | repnorg = -1; |
---|
8286 | for( i=0; i<norg; i++ ) leaf2node[i] = -1; |
---|
8287 | for( i=0; i<nstep; i++ ) |
---|
8288 | { |
---|
8289 | mem0 = topol[i][0][0]; |
---|
8290 | mem1 = topol[i][1][0]; |
---|
8291 | // fprintf( stderr, "step %d\n", i ); |
---|
8292 | // fprintf( stderr, "mem0 = %d\n", mem0 ); |
---|
8293 | // fprintf( stderr, "mem1 = %d\n", mem1 ); |
---|
8294 | |
---|
8295 | if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) |
---|
8296 | { |
---|
8297 | // fprintf( stderr, "\n\n\nminscore = %f\n", minscore ); |
---|
8298 | // fprintf( stderr, "distfromtip = %f\n", dep[i].distfromtip ); |
---|
8299 | // fprintf( stderr, "INSERT HERE, %d-%d\n", nearest, norg ); |
---|
8300 | // fprintf( stderr, "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] ); |
---|
8301 | |
---|
8302 | if( leaf2node[nearest] == -1 ) |
---|
8303 | { |
---|
8304 | // fprintf( stderr, "INSERTING to 0!!!\n" ); |
---|
8305 | topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) ); |
---|
8306 | topolc[posinnew][0][0] = nearest; |
---|
8307 | topolc[posinnew][0][1] = -1; |
---|
8308 | |
---|
8309 | addedlen = lenc[posinnew][0] = minscore / 2; |
---|
8310 | } |
---|
8311 | else |
---|
8312 | { |
---|
8313 | // fprintf( stderr, "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) ); |
---|
8314 | topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[leaf2node[nearest]][0] ) + countmem( topol[leaf2node[nearest]][1] ) + 1 ) * sizeof( int ) ) ); |
---|
8315 | // fprintf( stderr, "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] ); |
---|
8316 | intcpy( topolc[posinnew][0], topol[leaf2node[nearest]][0] ); |
---|
8317 | intcat( topolc[posinnew][0], topol[leaf2node[nearest]][1] ); |
---|
8318 | addedlen = lenc[posinnew][0] = minscore / 2 - len[leaf2node[nearest]][0]; |
---|
8319 | } |
---|
8320 | neighbor = lastmem( topolc[posinnew][0] ); |
---|
8321 | |
---|
8322 | if( treeout ) |
---|
8323 | { |
---|
8324 | #if 0 |
---|
8325 | fp = fopen( "infile.tree", "a" ); // kyougou!! |
---|
8326 | if( fp == 0 ) |
---|
8327 | { |
---|
8328 | fprintf( stderr, "File error!\n" ); |
---|
8329 | exit( 1 ); |
---|
8330 | } |
---|
8331 | fprintf( fp, "\n" ); |
---|
8332 | fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); |
---|
8333 | fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); |
---|
8334 | fprintf( fp, " distance: %f\n", minscore ); |
---|
8335 | fprintf( fp, " cousin: " ); |
---|
8336 | for( j=0; topolc[posinnew][0][j]!=-1; j++ ) |
---|
8337 | fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); |
---|
8338 | fprintf( fp, "\n" ); |
---|
8339 | fclose( fp ); |
---|
8340 | #else |
---|
8341 | addtree[iadd].nearest = nearesto; |
---|
8342 | addtree[iadd].dist1 = minscoreo; |
---|
8343 | addtree[iadd].dist2 = minscore; |
---|
8344 | neighborlist[0] = 0; |
---|
8345 | npt = neighborlist; |
---|
8346 | for( j=0; topolc[posinnew][0][j]!=-1; j++ ) |
---|
8347 | { |
---|
8348 | sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); |
---|
8349 | npt += strlen( npt ); |
---|
8350 | } |
---|
8351 | addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); |
---|
8352 | strcpy( addtree[iadd].neighbors, neighborlist ); |
---|
8353 | #endif |
---|
8354 | } |
---|
8355 | |
---|
8356 | // fprintf( stderr, "INSERTING to 1!!!\n" ); |
---|
8357 | topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) ); |
---|
8358 | topolc[posinnew][1][0] = norg; |
---|
8359 | topolc[posinnew][1][1] = -1; |
---|
8360 | lenc[posinnew][1] = minscore / 2; |
---|
8361 | |
---|
8362 | repnorg = nearest; |
---|
8363 | |
---|
8364 | // fprintf( stderr, "STEP %d\n", posinnew ); |
---|
8365 | // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][0][j] ); |
---|
8366 | // fprintf( stderr, "\n len=%f\n", lenc[i][0] ); |
---|
8367 | // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][1][j] ); |
---|
8368 | // fprintf( stderr, "\n len=%f\n", lenc[i][1] ); |
---|
8369 | |
---|
8370 | // im = topolc[posinnew][0][0]; |
---|
8371 | // jm = topolc[posinnew][1][0]; |
---|
8372 | // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); |
---|
8373 | // strcpy( tree[im], treetmp ); |
---|
8374 | |
---|
8375 | posinnew++; |
---|
8376 | } |
---|
8377 | |
---|
8378 | // fprintf( stderr, "minscore = %f\n", minscore ); |
---|
8379 | // fprintf( stderr, "distfromtip = %f\n", dep[i].distfromtip ); |
---|
8380 | // fprintf( stderr, "Modify matrix, %d-%d\n", nearest, norg ); |
---|
8381 | eff0 = iscorec[mem0][norg-mem0]; |
---|
8382 | eff1 = iscorec[mem1][norg-mem1]; |
---|
8383 | |
---|
8384 | iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 ); |
---|
8385 | iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda |
---|
8386 | |
---|
8387 | acprev = ac[mem1].prev; |
---|
8388 | acnext = ac[mem1].next; |
---|
8389 | acprev->next = acnext; |
---|
8390 | if( acnext != NULL ) acnext->prev = acprev; |
---|
8391 | |
---|
8392 | if( ( nearest == mem1 || nearest == mem0 ) ) |
---|
8393 | { |
---|
8394 | minscore = 9999.9; |
---|
8395 | // for( j=0; j<norg; j++ ) // sukoshi muda |
---|
8396 | // { |
---|
8397 | // if( minscore > iscorec[j][norg-j] ) |
---|
8398 | // { |
---|
8399 | // minscore = iscorec[j][norg-j]; |
---|
8400 | // nearest = j; |
---|
8401 | // } |
---|
8402 | // } |
---|
8403 | // fprintf( stderr, "searching on modified ac " ); |
---|
8404 | for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda |
---|
8405 | { |
---|
8406 | // fprintf( stderr, "." ); |
---|
8407 | j = acpt->pos; |
---|
8408 | tmpmin = iscorec[j][norg-j]; |
---|
8409 | if( minscore > tmpmin ) |
---|
8410 | { |
---|
8411 | minscore = tmpmin; |
---|
8412 | nearest = j; |
---|
8413 | } |
---|
8414 | } |
---|
8415 | // fprintf( stderr, "done\n" ); |
---|
8416 | } |
---|
8417 | |
---|
8418 | // fprintf( stderr, "posinnew = %d\n", posinnew ); |
---|
8419 | |
---|
8420 | |
---|
8421 | if( topol[i][0][0] == repnorg ) |
---|
8422 | { |
---|
8423 | topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) ); |
---|
8424 | intcpy( topolc[posinnew][0], topol[i][0] ); |
---|
8425 | intcat( topolc[posinnew][0], additionaltopol ); |
---|
8426 | lenc[posinnew][0] = len[i][0] - addedlen; // gennmitsu niha chigau |
---|
8427 | addedlen = 0.0; |
---|
8428 | } |
---|
8429 | else |
---|
8430 | { |
---|
8431 | topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) ); |
---|
8432 | intcpy( topolc[posinnew][0], topol[i][0] ); |
---|
8433 | lenc[posinnew][0] = len[i][0]; |
---|
8434 | } |
---|
8435 | |
---|
8436 | if( topol[i][1][0] == repnorg ) |
---|
8437 | { |
---|
8438 | topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) ); |
---|
8439 | intcpy( topolc[posinnew][1], topol[i][1] ); |
---|
8440 | intcat( topolc[posinnew][1], additionaltopol ); |
---|
8441 | lenc[posinnew][1] = len[i][1] - addedlen; // gennmitsu niha chigau |
---|
8442 | addedlen = 0.0; |
---|
8443 | |
---|
8444 | repnorg = topolc[posinnew][0][0]; // juuyou |
---|
8445 | } |
---|
8446 | else |
---|
8447 | { |
---|
8448 | topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); |
---|
8449 | intcpy( topolc[posinnew][1], topol[i][1] ); |
---|
8450 | lenc[posinnew][1] = len[i][1]; |
---|
8451 | } |
---|
8452 | |
---|
8453 | // fprintf( stderr, "STEP %d\n", posinnew ); |
---|
8454 | // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( stderr, " %d->%d", topolc[posinnew][0][j], i+1 ); |
---|
8455 | // fprintf( stderr, "\n len=%f\n", lenc[posinnew][0] ); |
---|
8456 | // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) fprintf( stderr, " %d->%d", topolc[posinnew][1][j], -i-1 ); |
---|
8457 | // fprintf( stderr, "\n len=%f\n", lenc[posinnew][1] ); |
---|
8458 | |
---|
8459 | for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i; |
---|
8460 | for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i; |
---|
8461 | |
---|
8462 | // im = topolc[posinnew][0][0]; |
---|
8463 | // jm = topolc[posinnew][1][0]; |
---|
8464 | // sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] ); |
---|
8465 | // strcpy( tree[im], treetmp ); |
---|
8466 | // |
---|
8467 | // fprintf( stderr, "%s\n", treetmp ); |
---|
8468 | |
---|
8469 | posinnew++; |
---|
8470 | } |
---|
8471 | |
---|
8472 | if( nstep ) |
---|
8473 | { |
---|
8474 | i--; |
---|
8475 | } |
---|
8476 | else |
---|
8477 | { |
---|
8478 | i = 0; |
---|
8479 | topol[i][0] = calloc( 2, sizeof( int ) ); |
---|
8480 | topol[i][1] = calloc( 1, sizeof( int ) ); |
---|
8481 | topol[i][0][0] = 0; |
---|
8482 | topol[i][0][1] = -1; |
---|
8483 | topol[i][1][0] = -1; |
---|
8484 | } |
---|
8485 | if( repnorg == -1 ) |
---|
8486 | { |
---|
8487 | topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) ); |
---|
8488 | intcpy( topolc[posinnew][0], topol[i][0] ); |
---|
8489 | intcat( topolc[posinnew][0], topol[i][1] ); |
---|
8490 | lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; |
---|
8491 | |
---|
8492 | topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) ); |
---|
8493 | intcpy( topolc[posinnew][1], additionaltopol ); |
---|
8494 | lenc[posinnew][1] = minscore / 2; |
---|
8495 | |
---|
8496 | // neighbor = lastmem( topolc[posinnew][0] ); |
---|
8497 | neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji |
---|
8498 | |
---|
8499 | if( treeout ) |
---|
8500 | { |
---|
8501 | #if 0 |
---|
8502 | fp = fopen( "infile.tree", "a" ); // kyougou!! |
---|
8503 | if( fp == 0 ) |
---|
8504 | { |
---|
8505 | fprintf( stderr, "File error!\n" ); |
---|
8506 | exit( 1 ); |
---|
8507 | } |
---|
8508 | fprintf( fp, "\n" ); |
---|
8509 | fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] ); |
---|
8510 | fprintf( fp, " nearest sequence: %d\n", nearest + 1 ); |
---|
8511 | fprintf( fp, " cousin: " ); |
---|
8512 | for( j=0; topolc[posinnew][0][j]!=-1; j++ ) |
---|
8513 | fprintf( fp, "%d ", topolc[posinnew][0][j]+1 ); |
---|
8514 | fprintf( fp, "\n" ); |
---|
8515 | fclose( fp ); |
---|
8516 | #else |
---|
8517 | addtree[iadd].nearest = nearesto; |
---|
8518 | addtree[iadd].dist1 = minscoreo; |
---|
8519 | addtree[iadd].dist2 = minscore; |
---|
8520 | neighborlist[0] = 0; |
---|
8521 | npt = neighborlist; |
---|
8522 | for( j=0; topolc[posinnew][0][j]!=-1; j++ ) |
---|
8523 | { |
---|
8524 | sprintf( npt, "%d ", topolc[posinnew][0][j]+1 ); |
---|
8525 | npt += strlen( npt ); |
---|
8526 | } |
---|
8527 | addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) ); |
---|
8528 | strcpy( addtree[iadd].neighbors, neighborlist ); |
---|
8529 | #endif |
---|
8530 | } |
---|
8531 | |
---|
8532 | // fprintf( stderr, "STEP %d\n", posinnew ); |
---|
8533 | // for( j=0; topolc[posinnew][0][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][0][j] ); |
---|
8534 | // fprintf( stderr, "\n len=%f", lenc[posinnew][0] ); |
---|
8535 | // for( j=0; topolc[posinnew][1][j]!=-1; j++ ) fprintf( stderr, " %d", topolc[posinnew][1][j] ); |
---|
8536 | // fprintf( stderr, "\n len=%f\n", lenc[posinnew][1] ); |
---|
8537 | } |
---|
8538 | |
---|
8539 | free( leaf2node ); |
---|
8540 | free( additionaltopol ); |
---|
8541 | free( ac ); |
---|
8542 | free( acori ); |
---|
8543 | if( treeout ) free( neighborlist ); |
---|
8544 | return( neighbor ); |
---|
8545 | } |
---|
8546 | |
---|
8547 | #if 0 |
---|
8548 | int samemember( int *mem, int *cand ) |
---|
8549 | { |
---|
8550 | int i, j; |
---|
8551 | |
---|
8552 | #if 0 |
---|
8553 | fprintf( stderr, "mem = " ); |
---|
8554 | for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] ); |
---|
8555 | fprintf( stderr, "\n" ); |
---|
8556 | |
---|
8557 | fprintf( stderr, "cand = " ); |
---|
8558 | for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] ); |
---|
8559 | fprintf( stderr, "\n" ); |
---|
8560 | #endif |
---|
8561 | |
---|
8562 | for( i=0, j=0; mem[i]>-1; ) |
---|
8563 | { |
---|
8564 | if( mem[i++] != cand[j++] ) return( 0 ); |
---|
8565 | } |
---|
8566 | |
---|
8567 | if( cand[j] == -1 ) |
---|
8568 | { |
---|
8569 | return( 1 ); |
---|
8570 | } |
---|
8571 | else |
---|
8572 | { |
---|
8573 | return( 0 ); |
---|
8574 | } |
---|
8575 | } |
---|
8576 | #else |
---|
8577 | int samemember( int *mem, int *cand ) |
---|
8578 | { |
---|
8579 | int i, j; |
---|
8580 | int nm, nc; |
---|
8581 | #if 0 |
---|
8582 | fprintf( stderr, "mem = " ); |
---|
8583 | for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] ); |
---|
8584 | fprintf( stderr, "\n" ); |
---|
8585 | |
---|
8586 | fprintf( stderr, "cand = " ); |
---|
8587 | for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] ); |
---|
8588 | fprintf( stderr, "\n" ); |
---|
8589 | #endif |
---|
8590 | |
---|
8591 | nm = 0; for( i=0; mem[i]>-1; i++ ) nm++; |
---|
8592 | nc = 0; for( i=0; cand[i]>-1; i++ ) nc++; |
---|
8593 | |
---|
8594 | if( nm != nc ) return( 0 ); |
---|
8595 | |
---|
8596 | for( i=0; mem[i]>-1; i++ ) |
---|
8597 | { |
---|
8598 | for( j=0; cand[j]>-1; j++ ) |
---|
8599 | if( mem[i] == cand[j] ) break; |
---|
8600 | if( cand[j] == -1 ) return( 0 ); |
---|
8601 | } |
---|
8602 | |
---|
8603 | if( mem[i] == -1 ) |
---|
8604 | { |
---|
8605 | return( 1 ); |
---|
8606 | } |
---|
8607 | else |
---|
8608 | { |
---|
8609 | return( 0 ); |
---|
8610 | } |
---|
8611 | } |
---|
8612 | #endif |
---|
8613 | |
---|
8614 | |
---|
8615 | int includemember( int *mem, int *cand ) // mem in cand |
---|
8616 | { |
---|
8617 | int i, j; |
---|
8618 | |
---|
8619 | #if 0 |
---|
8620 | fprintf( stderr, "mem = " ); |
---|
8621 | for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] ); |
---|
8622 | fprintf( stderr, "\n" ); |
---|
8623 | |
---|
8624 | fprintf( stderr, "cand = " ); |
---|
8625 | for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] ); |
---|
8626 | fprintf( stderr, "\n" ); |
---|
8627 | #endif |
---|
8628 | |
---|
8629 | for( i=0; mem[i]>-1; i++ ) |
---|
8630 | { |
---|
8631 | for( j=0; cand[j]>-1; j++ ) |
---|
8632 | if( mem[i] == cand[j] ) break; |
---|
8633 | if( cand[j] == -1 ) return( 0 ); |
---|
8634 | } |
---|
8635 | // fprintf( stderr, "INCLUDED! mem[0]=%d\n", mem[0] ); |
---|
8636 | return( 1 ); |
---|
8637 | } |
---|
8638 | |
---|
8639 | int overlapmember( int *mem1, int *mem2 ) |
---|
8640 | { |
---|
8641 | int i, j; |
---|
8642 | |
---|
8643 | for( i=0; mem1[i]>-1; i++ ) |
---|
8644 | for( j=0; mem2[j]>-1; j++ ) |
---|
8645 | if( mem1[i] == mem2[j] ) return( 1 ); |
---|
8646 | return( 0 ); |
---|
8647 | } |
---|