1 | #include <stdio.h> |
---|
2 | #include <string.h> |
---|
3 | #include <ctype.h> |
---|
4 | #include <stdlib.h> |
---|
5 | #include "clustalw.h" |
---|
6 | |
---|
7 | |
---|
8 | /* |
---|
9 | * Prototypes |
---|
10 | */ |
---|
11 | |
---|
12 | /* |
---|
13 | * Global Variables |
---|
14 | */ |
---|
15 | |
---|
16 | extern double **tmat; |
---|
17 | extern Boolean no_weights; |
---|
18 | extern sint debug; |
---|
19 | extern sint max_aa; |
---|
20 | extern sint nseqs; |
---|
21 | extern sint profile1_nseqs; |
---|
22 | extern sint nsets; |
---|
23 | extern sint **sets; |
---|
24 | extern sint divergence_cutoff; |
---|
25 | extern sint *seq_weight; |
---|
26 | extern sint output_order, *output_index; |
---|
27 | extern Boolean distance_tree; |
---|
28 | extern char seqname[]; |
---|
29 | extern sint *seqlen_array; |
---|
30 | extern char **seq_array; |
---|
31 | |
---|
32 | sint malign(sint istart,char *phylip_name) /* full progressive alignment*/ |
---|
33 | { |
---|
34 | static sint *aligned; |
---|
35 | static sint *group; |
---|
36 | static sint ix; |
---|
37 | |
---|
38 | sint *maxid, max, sum; |
---|
39 | sint *tree_weight; |
---|
40 | sint i,j,set,iseq=0; |
---|
41 | sint status,entries; |
---|
42 | lint score = 0; |
---|
43 | |
---|
44 | |
---|
45 | info("Start of Multiple Alignment"); |
---|
46 | |
---|
47 | /* get the phylogenetic tree from *.ph */ |
---|
48 | |
---|
49 | if (nseqs >= 2) |
---|
50 | { |
---|
51 | status = read_tree(phylip_name, (sint)0, nseqs); |
---|
52 | if (status == 0) return((sint)0); |
---|
53 | } |
---|
54 | |
---|
55 | /* calculate sequence weights according to branch lengths of the tree - |
---|
56 | weights in global variable seq_weight normalised to sum to 100 */ |
---|
57 | |
---|
58 | calc_seq_weights((sint)0, nseqs, seq_weight); |
---|
59 | |
---|
60 | /* recalculate tmat matrix as percent similarity matrix */ |
---|
61 | |
---|
62 | status = calc_similarities(nseqs); |
---|
63 | if (status == 0) return((sint)0); |
---|
64 | |
---|
65 | /* for each sequence, find the most closely related sequence */ |
---|
66 | |
---|
67 | maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
68 | for (i=1;i<=nseqs;i++) |
---|
69 | { |
---|
70 | maxid[i] = -1; |
---|
71 | for (j=1;j<=nseqs;j++) |
---|
72 | if (j!=i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j]; |
---|
73 | } |
---|
74 | |
---|
75 | /* group the sequences according to their relative divergence */ |
---|
76 | |
---|
77 | if (istart == 0) |
---|
78 | { |
---|
79 | sets = (sint **) ckalloc( (nseqs+1) * sizeof (sint *) ); |
---|
80 | for(i=0;i<=nseqs;i++) |
---|
81 | sets[i] = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); |
---|
82 | |
---|
83 | create_sets((sint)0,nseqs); |
---|
84 | info("There are %d groups",(pint)nsets); |
---|
85 | |
---|
86 | /* clear the memory used for the phylogenetic tree */ |
---|
87 | |
---|
88 | if (nseqs >= 2) |
---|
89 | clear_tree(NULL); |
---|
90 | |
---|
91 | /* start the multiple alignments......... */ |
---|
92 | |
---|
93 | info("Aligning..."); |
---|
94 | |
---|
95 | /* first pass, align closely related sequences first.... */ |
---|
96 | |
---|
97 | ix = 0; |
---|
98 | aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); |
---|
99 | for (i=0;i<=nseqs;i++) aligned[i] = 0; |
---|
100 | |
---|
101 | for(set=1;set<=nsets;++set) |
---|
102 | { |
---|
103 | entries=0; |
---|
104 | for (i=1;i<=nseqs;i++) |
---|
105 | { |
---|
106 | if ((sets[set][i] != 0) && (maxid[i] > divergence_cutoff)) |
---|
107 | { |
---|
108 | entries++; |
---|
109 | if (aligned[i] == 0) |
---|
110 | { |
---|
111 | if (output_order==INPUT) |
---|
112 | { |
---|
113 | ++ix; |
---|
114 | output_index[i] = i; |
---|
115 | } |
---|
116 | else output_index[++ix] = i; |
---|
117 | aligned[i] = 1; |
---|
118 | } |
---|
119 | } |
---|
120 | } |
---|
121 | |
---|
122 | if(entries > 0) score = prfalign(sets[set], aligned); |
---|
123 | else score=0.0; |
---|
124 | |
---|
125 | |
---|
126 | /* negative score means fatal error... exit now! */ |
---|
127 | |
---|
128 | if (score < 0) |
---|
129 | { |
---|
130 | return(-1); |
---|
131 | } |
---|
132 | if ((entries > 0) && (score > 0)) |
---|
133 | info("Group %d: Sequences:%4d Score:%d", |
---|
134 | (pint)set,(pint)entries,(pint)score); |
---|
135 | else |
---|
136 | info("Group %d: Delayed", |
---|
137 | (pint)set); |
---|
138 | } |
---|
139 | |
---|
140 | for (i=0;i<=nseqs;i++) |
---|
141 | sets[i]=ckfree((void *)sets[i]); |
---|
142 | sets=ckfree(sets); |
---|
143 | } |
---|
144 | else |
---|
145 | { |
---|
146 | /* clear the memory used for the phylogenetic tree */ |
---|
147 | |
---|
148 | if (nseqs >= 2) |
---|
149 | clear_tree(NULL); |
---|
150 | |
---|
151 | aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); |
---|
152 | ix = 0; |
---|
153 | for (i=1;i<=istart+1;i++) |
---|
154 | { |
---|
155 | aligned[i] = 1; |
---|
156 | ++ix; |
---|
157 | output_index[i] = i; |
---|
158 | } |
---|
159 | for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; |
---|
160 | } |
---|
161 | |
---|
162 | /* second pass - align remaining, more divergent sequences..... */ |
---|
163 | |
---|
164 | /* if not all sequences were aligned, for each unaligned sequence, |
---|
165 | find it's closest pair amongst the aligned sequences. */ |
---|
166 | |
---|
167 | group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
168 | tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); |
---|
169 | for (i=0;i<nseqs;i++) |
---|
170 | tree_weight[i] = seq_weight[i]; |
---|
171 | |
---|
172 | /* if we haven't aligned any sequences, in the first pass - align the |
---|
173 | two most closely related sequences now */ |
---|
174 | if(ix==0) |
---|
175 | { |
---|
176 | max = -1; |
---|
177 | iseq = 0; |
---|
178 | for (i=1;i<=nseqs;i++) |
---|
179 | { |
---|
180 | for (j=i+1;j<=nseqs;j++) |
---|
181 | { |
---|
182 | if (max < tmat[i][j]) |
---|
183 | { |
---|
184 | max = tmat[i][j]; |
---|
185 | iseq = i; |
---|
186 | } |
---|
187 | } |
---|
188 | } |
---|
189 | aligned[iseq]=1; |
---|
190 | if (output_order == INPUT) |
---|
191 | { |
---|
192 | ++ix; |
---|
193 | output_index[iseq] = iseq; |
---|
194 | } |
---|
195 | else |
---|
196 | output_index[++ix] = iseq; |
---|
197 | } |
---|
198 | |
---|
199 | while (ix < nseqs) |
---|
200 | { |
---|
201 | for (i=1;i<=nseqs;i++) { |
---|
202 | if (aligned[i] == 0) |
---|
203 | { |
---|
204 | maxid[i] = -1; |
---|
205 | for (j=1;j<=nseqs;j++) |
---|
206 | if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0)) |
---|
207 | maxid[i] = tmat[i][j]; |
---|
208 | } |
---|
209 | } |
---|
210 | /* find the most closely related sequence to those already aligned */ |
---|
211 | |
---|
212 | max = -1; |
---|
213 | iseq = 0; |
---|
214 | for (i=1;i<=nseqs;i++) |
---|
215 | { |
---|
216 | if ((aligned[i] == 0) && (maxid[i] > max)) |
---|
217 | { |
---|
218 | max = maxid[i]; |
---|
219 | iseq = i; |
---|
220 | } |
---|
221 | } |
---|
222 | |
---|
223 | |
---|
224 | /* align this sequence to the existing alignment */ |
---|
225 | /* weight sequences with percent identity with profile*/ |
---|
226 | /* OR...., multiply sequence weights from tree by percent identity with new sequence */ |
---|
227 | if(no_weights==FALSE) { |
---|
228 | for (j=0;j<nseqs;j++) |
---|
229 | if (aligned[j+1] != 0) |
---|
230 | seq_weight[j] = tree_weight[j] * tmat[j+1][iseq]; |
---|
231 | /* |
---|
232 | Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR |
---|
233 | */ |
---|
234 | |
---|
235 | sum = 0; |
---|
236 | for (j=0;j<nseqs;j++) |
---|
237 | if (aligned[j+1] != 0) |
---|
238 | sum += seq_weight[j]; |
---|
239 | if (sum == 0) |
---|
240 | { |
---|
241 | for (j=0;j<nseqs;j++) |
---|
242 | seq_weight[j] = 1; |
---|
243 | sum = j; |
---|
244 | } |
---|
245 | for (j=0;j<nseqs;j++) |
---|
246 | if (aligned[j+1] != 0) |
---|
247 | { |
---|
248 | seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum; |
---|
249 | if (seq_weight[j] < 1) seq_weight[j] = 1; |
---|
250 | } |
---|
251 | } |
---|
252 | |
---|
253 | entries = 0; |
---|
254 | for (j=1;j<=nseqs;j++) |
---|
255 | if (aligned[j] != 0) |
---|
256 | { |
---|
257 | group[j] = 1; |
---|
258 | entries++; |
---|
259 | } |
---|
260 | else if (iseq==j) |
---|
261 | { |
---|
262 | group[j] = 2; |
---|
263 | entries++; |
---|
264 | } |
---|
265 | aligned[iseq] = 1; |
---|
266 | |
---|
267 | score = prfalign(group, aligned); |
---|
268 | info("Sequence:%d Score:%d",(pint)iseq,(pint)score); |
---|
269 | if (output_order == INPUT) |
---|
270 | { |
---|
271 | ++ix; |
---|
272 | output_index[iseq] = iseq; |
---|
273 | } |
---|
274 | else |
---|
275 | output_index[++ix] = iseq; |
---|
276 | } |
---|
277 | |
---|
278 | group=ckfree((void *)group); |
---|
279 | aligned=ckfree((void *)aligned); |
---|
280 | maxid=ckfree((void *)maxid); |
---|
281 | tree_weight=ckfree((void *)tree_weight); |
---|
282 | |
---|
283 | aln_score(); |
---|
284 | |
---|
285 | /* make the rest (output stuff) into routine clustal_out in file amenu.c */ |
---|
286 | |
---|
287 | return(nseqs); |
---|
288 | |
---|
289 | } |
---|
290 | |
---|
291 | sint seqalign(sint istart,char *phylip_name) /* sequence alignment to existing profile */ |
---|
292 | { |
---|
293 | static sint *aligned, *tree_weight; |
---|
294 | static sint *group; |
---|
295 | static sint ix; |
---|
296 | |
---|
297 | sint *maxid, max; |
---|
298 | sint i,j,status,iseq; |
---|
299 | sint sum,entries; |
---|
300 | lint score = 0; |
---|
301 | |
---|
302 | |
---|
303 | info("Start of Multiple Alignment"); |
---|
304 | |
---|
305 | /* get the phylogenetic tree from *.ph */ |
---|
306 | |
---|
307 | if (nseqs >= 2) |
---|
308 | { |
---|
309 | status = read_tree(phylip_name, (sint)0, nseqs); |
---|
310 | if (status == 0) return(0); |
---|
311 | } |
---|
312 | |
---|
313 | /* calculate sequence weights according to branch lengths of the tree - |
---|
314 | weights in global variable seq_weight normalised to sum to 100 */ |
---|
315 | |
---|
316 | calc_seq_weights((sint)0, nseqs, seq_weight); |
---|
317 | |
---|
318 | tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); |
---|
319 | for (i=0;i<nseqs;i++) |
---|
320 | tree_weight[i] = seq_weight[i]; |
---|
321 | |
---|
322 | /* recalculate tmat matrix as percent similarity matrix */ |
---|
323 | |
---|
324 | status = calc_similarities(nseqs); |
---|
325 | if (status == 0) return((sint)0); |
---|
326 | |
---|
327 | /* for each sequence, find the most closely related sequence */ |
---|
328 | |
---|
329 | maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
330 | for (i=1;i<=nseqs;i++) |
---|
331 | { |
---|
332 | maxid[i] = -1; |
---|
333 | for (j=1;j<=nseqs;j++) |
---|
334 | if (maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j]; |
---|
335 | } |
---|
336 | |
---|
337 | /* clear the memory used for the phylogenetic tree */ |
---|
338 | |
---|
339 | if (nseqs >= 2) |
---|
340 | clear_tree(NULL); |
---|
341 | |
---|
342 | aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); |
---|
343 | ix = 0; |
---|
344 | for (i=1;i<=istart+1;i++) |
---|
345 | { |
---|
346 | aligned[i] = 1; |
---|
347 | ++ix; |
---|
348 | output_index[i] = i; |
---|
349 | } |
---|
350 | for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; |
---|
351 | |
---|
352 | /* for each unaligned sequence, find it's closest pair amongst the |
---|
353 | aligned sequences. */ |
---|
354 | |
---|
355 | group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
356 | |
---|
357 | while (ix < nseqs) |
---|
358 | { |
---|
359 | if (ix > 0) |
---|
360 | { |
---|
361 | for (i=1;i<=nseqs;i++) { |
---|
362 | if (aligned[i] == 0) |
---|
363 | { |
---|
364 | maxid[i] = -1; |
---|
365 | for (j=1;j<=nseqs;j++) |
---|
366 | if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0)) |
---|
367 | maxid[i] = tmat[i][j]; |
---|
368 | } |
---|
369 | } |
---|
370 | } |
---|
371 | |
---|
372 | /* find the most closely related sequence to those already aligned */ |
---|
373 | |
---|
374 | max = -1; |
---|
375 | for (i=1;i<=nseqs;i++) |
---|
376 | { |
---|
377 | if ((aligned[i] == 0) && (maxid[i] > max)) |
---|
378 | { |
---|
379 | max = maxid[i]; |
---|
380 | iseq = i; |
---|
381 | } |
---|
382 | } |
---|
383 | |
---|
384 | /* align this sequence to the existing alignment */ |
---|
385 | |
---|
386 | entries = 0; |
---|
387 | for (j=1;j<=nseqs;j++) |
---|
388 | if (aligned[j] != 0) |
---|
389 | { |
---|
390 | group[j] = 1; |
---|
391 | entries++; |
---|
392 | } |
---|
393 | else if (iseq==j) |
---|
394 | { |
---|
395 | group[j] = 2; |
---|
396 | entries++; |
---|
397 | } |
---|
398 | aligned[iseq] = 1; |
---|
399 | |
---|
400 | |
---|
401 | /* EITHER....., set sequence weights equal to percent identity with new sequence */ |
---|
402 | /* |
---|
403 | for (j=0;j<nseqs;j++) |
---|
404 | seq_weight[j] = tmat[j+1][iseq]; |
---|
405 | */ |
---|
406 | /* OR...., multiply sequence weights from tree by percent identity with new sequence */ |
---|
407 | for (j=0;j<nseqs;j++) |
---|
408 | seq_weight[j] = tree_weight[j] * tmat[j+1][iseq]; |
---|
409 | if (debug>1) |
---|
410 | for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf (stdout,"sequence %d: %d\n", j+1,tree_weight[j]); |
---|
411 | /* |
---|
412 | Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR |
---|
413 | */ |
---|
414 | |
---|
415 | sum = 0; |
---|
416 | for (j=0;j<nseqs;j++) |
---|
417 | if (group[j+1] == 1) sum += seq_weight[j]; |
---|
418 | if (sum == 0) |
---|
419 | { |
---|
420 | for (j=0;j<nseqs;j++) |
---|
421 | seq_weight[j] = 1; |
---|
422 | sum = j; |
---|
423 | } |
---|
424 | for (j=0;j<nseqs;j++) |
---|
425 | { |
---|
426 | seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum; |
---|
427 | if (seq_weight[j] < 1) seq_weight[j] = 1; |
---|
428 | } |
---|
429 | |
---|
430 | if (debug > 1) { |
---|
431 | fprintf(stdout,"new weights\n"); |
---|
432 | for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]); |
---|
433 | } |
---|
434 | |
---|
435 | score = prfalign(group, aligned); |
---|
436 | info("Sequence:%d Score:%d",(pint)iseq,(pint)score); |
---|
437 | if (output_order == INPUT) |
---|
438 | { |
---|
439 | ++ix; |
---|
440 | output_index[iseq] = iseq; |
---|
441 | } |
---|
442 | else |
---|
443 | output_index[++ix] = iseq; |
---|
444 | } |
---|
445 | |
---|
446 | group=ckfree((void *)group); |
---|
447 | aligned=ckfree((void *)aligned); |
---|
448 | maxid=ckfree((void *)maxid); |
---|
449 | |
---|
450 | aln_score(); |
---|
451 | |
---|
452 | /* make the rest (output stuff) into routine clustal_out in file amenu.c */ |
---|
453 | |
---|
454 | return(nseqs); |
---|
455 | |
---|
456 | } |
---|
457 | |
---|
458 | |
---|
459 | sint palign1(void) /* a profile alignment */ |
---|
460 | { |
---|
461 | sint i,j,temp; |
---|
462 | sint entries; |
---|
463 | sint *aligned, *group; |
---|
464 | float dscore; |
---|
465 | lint score; |
---|
466 | |
---|
467 | info("Start of Initial Alignment"); |
---|
468 | |
---|
469 | /* calculate sequence weights according to branch lengths of the tree - |
---|
470 | weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */ |
---|
471 | |
---|
472 | temp = INT_SCALE_FACTOR/nseqs; |
---|
473 | for (i=0;i<nseqs;i++) seq_weight[i] = temp; |
---|
474 | |
---|
475 | distance_tree = FALSE; |
---|
476 | |
---|
477 | /* do the initial alignment......... */ |
---|
478 | |
---|
479 | group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
480 | |
---|
481 | for(i=1; i<=profile1_nseqs; ++i) |
---|
482 | group[i] = 1; |
---|
483 | for(i=profile1_nseqs+1; i<=nseqs; ++i) |
---|
484 | group[i] = 2; |
---|
485 | entries = nseqs; |
---|
486 | |
---|
487 | aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); |
---|
488 | for (i=1;i<=nseqs;i++) aligned[i] = 1; |
---|
489 | |
---|
490 | score = prfalign(group, aligned); |
---|
491 | info("Sequences:%d Score:%d",(pint)entries,(pint)score); |
---|
492 | group=ckfree((void *)group); |
---|
493 | aligned=ckfree((void *)aligned); |
---|
494 | |
---|
495 | for (i=1;i<=nseqs;i++) { |
---|
496 | for (j=i+1;j<=nseqs;j++) { |
---|
497 | dscore = countid(i,j); |
---|
498 | tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0; |
---|
499 | tmat[j][i] = tmat[i][j]; |
---|
500 | } |
---|
501 | } |
---|
502 | |
---|
503 | return(nseqs); |
---|
504 | } |
---|
505 | |
---|
506 | float countid(sint s1, sint s2) |
---|
507 | { |
---|
508 | char c1,c2; |
---|
509 | sint i; |
---|
510 | sint count,total; |
---|
511 | float score; |
---|
512 | |
---|
513 | count = total = 0; |
---|
514 | for (i=1;i<=seqlen_array[s1] && i<=seqlen_array[s2];i++) { |
---|
515 | c1 = seq_array[s1][i]; |
---|
516 | c2 = seq_array[s2][i]; |
---|
517 | if ((c1>=0) && (c1<max_aa)) { |
---|
518 | total++; |
---|
519 | if (c1 == c2) count++; |
---|
520 | } |
---|
521 | |
---|
522 | } |
---|
523 | |
---|
524 | if(total==0) score=0; |
---|
525 | else |
---|
526 | score = 100.0 * (float)count / (float)total; |
---|
527 | return(score); |
---|
528 | |
---|
529 | } |
---|
530 | |
---|
531 | sint palign2(char *p1_tree_name,char *p2_tree_name) /* a profile alignment */ |
---|
532 | { |
---|
533 | sint i,j,sum,entries,status; |
---|
534 | lint score; |
---|
535 | sint *aligned, *group; |
---|
536 | sint *maxid,*p1_weight,*p2_weight; |
---|
537 | |
---|
538 | info("Start of Multiple Alignment"); |
---|
539 | |
---|
540 | /* get the phylogenetic trees from *.ph */ |
---|
541 | |
---|
542 | if (profile1_nseqs >= 2) |
---|
543 | { |
---|
544 | status = read_tree(p1_tree_name, (sint)0, profile1_nseqs); |
---|
545 | if (status == 0) return(0); |
---|
546 | } |
---|
547 | |
---|
548 | /* calculate sequence weights according to branch lengths of the tree - |
---|
549 | weights in global variable seq_weight normalised to sum to 100 */ |
---|
550 | |
---|
551 | p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) ); |
---|
552 | |
---|
553 | calc_seq_weights((sint)0, profile1_nseqs, p1_weight); |
---|
554 | |
---|
555 | /* clear the memory for the phylogenetic tree */ |
---|
556 | |
---|
557 | if (profile1_nseqs >= 2) |
---|
558 | clear_tree(NULL); |
---|
559 | |
---|
560 | if (nseqs-profile1_nseqs >= 2) |
---|
561 | { |
---|
562 | status = read_tree(p2_tree_name, profile1_nseqs, nseqs); |
---|
563 | if (status == 0) return(0); |
---|
564 | } |
---|
565 | |
---|
566 | p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); |
---|
567 | |
---|
568 | calc_seq_weights(profile1_nseqs,nseqs, p2_weight); |
---|
569 | |
---|
570 | |
---|
571 | /* clear the memory for the phylogenetic tree */ |
---|
572 | |
---|
573 | if (nseqs-profile1_nseqs >= 2) |
---|
574 | clear_tree(NULL); |
---|
575 | |
---|
576 | /* convert tmat distances to similarities */ |
---|
577 | |
---|
578 | for (i=1;i<nseqs;i++) |
---|
579 | for (j=i+1;j<=nseqs;j++) { |
---|
580 | tmat[i][j]=100.0-tmat[i][j]*100.0; |
---|
581 | tmat[j][i]=tmat[i][j]; |
---|
582 | } |
---|
583 | |
---|
584 | |
---|
585 | /* weight sequences with max percent identity with other profile*/ |
---|
586 | |
---|
587 | maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
588 | for (i=0;i<profile1_nseqs;i++) { |
---|
589 | maxid[i] = 0; |
---|
590 | for (j=profile1_nseqs+1;j<=nseqs;j++) |
---|
591 | if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j]; |
---|
592 | seq_weight[i] = maxid[i]*p1_weight[i]; |
---|
593 | } |
---|
594 | |
---|
595 | for (i=profile1_nseqs;i<nseqs;i++) { |
---|
596 | maxid[i] = -1; |
---|
597 | for (j=1;j<=profile1_nseqs;j++) |
---|
598 | if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j]; |
---|
599 | seq_weight[i] = maxid[i]*p2_weight[i]; |
---|
600 | } |
---|
601 | /* |
---|
602 | Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR |
---|
603 | */ |
---|
604 | |
---|
605 | sum = 0; |
---|
606 | for (j=0;j<nseqs;j++) |
---|
607 | sum += seq_weight[j]; |
---|
608 | if (sum == 0) |
---|
609 | { |
---|
610 | for (j=0;j<nseqs;j++) |
---|
611 | seq_weight[j] = 1; |
---|
612 | sum = j; |
---|
613 | } |
---|
614 | for (j=0;j<nseqs;j++) |
---|
615 | { |
---|
616 | seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum; |
---|
617 | if (seq_weight[j] < 1) seq_weight[j] = 1; |
---|
618 | } |
---|
619 | if (debug > 1) { |
---|
620 | fprintf(stdout,"new weights\n"); |
---|
621 | for (j=0;j<nseqs;j++) fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]); |
---|
622 | } |
---|
623 | |
---|
624 | |
---|
625 | /* do the alignment......... */ |
---|
626 | |
---|
627 | info("Aligning..."); |
---|
628 | |
---|
629 | group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); |
---|
630 | |
---|
631 | for(i=1; i<=profile1_nseqs; ++i) |
---|
632 | group[i] = 1; |
---|
633 | for(i=profile1_nseqs+1; i<=nseqs; ++i) |
---|
634 | group[i] = 2; |
---|
635 | entries = nseqs; |
---|
636 | |
---|
637 | aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); |
---|
638 | for (i=1;i<=nseqs;i++) aligned[i] = 1; |
---|
639 | |
---|
640 | score = prfalign(group, aligned); |
---|
641 | info("Sequences:%d Score:%d",(pint)entries,(pint)score); |
---|
642 | group=ckfree((void *)group); |
---|
643 | p1_weight=ckfree((void *)p1_weight); |
---|
644 | p2_weight=ckfree((void *)p2_weight); |
---|
645 | aligned=ckfree((void *)aligned); |
---|
646 | maxid=ckfree((void *)maxid); |
---|
647 | |
---|
648 | /* DES output_index = (int *)ckalloc( (nseqs+1) * sizeof (int)); */ |
---|
649 | for (i=1;i<=nseqs;i++) output_index[i] = i; |
---|
650 | |
---|
651 | return(nseqs); |
---|
652 | } |
---|
653 | |
---|