1 | #include <stdio.h> |
---|
2 | #include <string.h> |
---|
3 | #include <stdlib.h> |
---|
4 | #include "clustalv.h" |
---|
5 | |
---|
6 | /* |
---|
7 | * Prototypes |
---|
8 | */ |
---|
9 | |
---|
10 | extern void *ckalloc(size_t); |
---|
11 | extern Boolean read_tree(int *,double *,int *,int *,int *); |
---|
12 | extern void warning(char *,...); |
---|
13 | void init_upgma(void); |
---|
14 | void upgma(int); |
---|
15 | |
---|
16 | /* |
---|
17 | * Global variables |
---|
18 | */ |
---|
19 | |
---|
20 | extern FILE *tree; |
---|
21 | extern char treename[]; |
---|
22 | extern double **tmat; |
---|
23 | |
---|
24 | static int *combine; |
---|
25 | static int *otree_array,*tree_array; |
---|
26 | double **smat; |
---|
27 | static int *group1,*group2; |
---|
28 | |
---|
29 | |
---|
30 | void init_upgma() |
---|
31 | { |
---|
32 | register int i; |
---|
33 | |
---|
34 | combine = (int *)ckalloc( (MAXN+1) * sizeof (int) ); |
---|
35 | otree_array = (int *)ckalloc( (MAXN+1) * sizeof (int) ); |
---|
36 | |
---|
37 | smat = (double **) ckalloc( (MAXN+1) * sizeof (double *) ); |
---|
38 | for(i=0;i<MAXN+1;i++) |
---|
39 | smat[i] = (double *)ckalloc( (MAXN+1) * sizeof (double) ); |
---|
40 | |
---|
41 | tree_array = (int *)ckalloc( (MAXN+1) * sizeof (int) ); |
---|
42 | group1 = (int *)ckalloc( (MAXN+1) * sizeof (int) ); |
---|
43 | group2 = (int *)ckalloc( (MAXN+1) * sizeof (int) ); |
---|
44 | } |
---|
45 | |
---|
46 | void upgma(int totseqs) |
---|
47 | { |
---|
48 | int i,j,k,sub1,sub2,lowp,highp,ndone,chunks,comv,flag,gp2,iter,n,idummy; |
---|
49 | int m,m2; |
---|
50 | int bottom,top; |
---|
51 | double score,med,acc,dummy; |
---|
52 | |
---|
53 | iter=0; |
---|
54 | |
---|
55 | for(i=1;i<=totseqs;++i) { |
---|
56 | combine[i]=otree_array[i]=0; |
---|
57 | tmat[i][i]=0.0; |
---|
58 | } |
---|
59 | |
---|
60 | for(i=1;i<=totseqs;++i) |
---|
61 | for(j=1;j<=totseqs;++j) |
---|
62 | smat[i][j]=tmat[i][j]; |
---|
63 | |
---|
64 | while(TRUE) { |
---|
65 | score = 0.0; |
---|
66 | sub1 = sub2 =0; |
---|
67 | |
---|
68 | for(i=1;i<=totseqs;++i) |
---|
69 | if(combine[i]==0 || combine[i]==i) |
---|
70 | for(j=1;j<=totseqs;++j) { |
---|
71 | if(combine[j]!=0 && combine[j]!=j) continue; |
---|
72 | if(smat[i][j]> score) { |
---|
73 | score = smat[i][j]; |
---|
74 | sub1 = i; |
---|
75 | sub2 = j; |
---|
76 | } |
---|
77 | } |
---|
78 | |
---|
79 | |
---|
80 | bottom = (sub1<sub2) ? sub1 : sub2; |
---|
81 | top = (sub1>sub2) ? sub1 : sub2; |
---|
82 | for(i=1;i<=totseqs;++i) |
---|
83 | tree_array[i]=0; |
---|
84 | |
---|
85 | if(combine[bottom]==0 && combine[top]==0) { |
---|
86 | combine[bottom]=bottom; |
---|
87 | combine[top]=bottom; |
---|
88 | lowp=highp=0; |
---|
89 | tree_array[bottom]=1; |
---|
90 | tree_array[top]=2; |
---|
91 | } |
---|
92 | else if(combine[bottom]==0 && combine[top]>0) { |
---|
93 | combine[bottom]=bottom; |
---|
94 | lowp=0; |
---|
95 | tree_array[bottom]=1; |
---|
96 | for(i=top;i<=totseqs;++i) |
---|
97 | if(combine[i]==top) { |
---|
98 | combine[i]=bottom; |
---|
99 | tree_array[i]=2; |
---|
100 | } |
---|
101 | } |
---|
102 | else if(combine[bottom]>0 && combine[top]==0) { |
---|
103 | highp=0; |
---|
104 | for(i=bottom;i<=totseqs;++i) |
---|
105 | if(combine[i]==bottom) |
---|
106 | tree_array[i]=1; |
---|
107 | combine[top]=bottom; |
---|
108 | tree_array[top]=2; |
---|
109 | } |
---|
110 | else { |
---|
111 | for(i=1;i<=totseqs;++i) { |
---|
112 | if(combine[i]==bottom) |
---|
113 | tree_array[i]=1; |
---|
114 | if(combine[i]==top) { |
---|
115 | combine[i]=bottom; |
---|
116 | tree_array[i]=2; |
---|
117 | } |
---|
118 | } |
---|
119 | } |
---|
120 | |
---|
121 | m=m2=ndone=0; |
---|
122 | for(i=1;i<=totseqs;++i) { |
---|
123 | if(tree_array[i]==1) |
---|
124 | ++m; |
---|
125 | if(tree_array[i]==2) |
---|
126 | ++m2; |
---|
127 | if(combine[i]==bottom) { |
---|
128 | ++ndone; |
---|
129 | group1[ndone]=i; |
---|
130 | } |
---|
131 | } |
---|
132 | |
---|
133 | chunks=0; |
---|
134 | |
---|
135 | if(ndone>2) { |
---|
136 | flag=FALSE; |
---|
137 | if(m>1) { |
---|
138 | tree=fopen(treename,"r"); |
---|
139 | while(TRUE) { |
---|
140 | if(!read_tree(&n,&dummy,&idummy,&idummy,otree_array)) { |
---|
141 | flag=TRUE; |
---|
142 | break; |
---|
143 | } |
---|
144 | ++chunks; |
---|
145 | if(n!=m) |
---|
146 | continue; |
---|
147 | else { |
---|
148 | comv=0; |
---|
149 | for(i=1;i<=totseqs;++i) |
---|
150 | if(otree_array[i]>0 && tree_array[i]==1) |
---|
151 | ++comv; |
---|
152 | if(comv!=m) |
---|
153 | continue; |
---|
154 | lowp=chunks; |
---|
155 | break; |
---|
156 | } |
---|
157 | break; |
---|
158 | } |
---|
159 | fclose(tree); |
---|
160 | } |
---|
161 | if(flag) |
---|
162 | warning("Dutch Elm Disease. Bad tree"); |
---|
163 | |
---|
164 | flag=FALSE; |
---|
165 | chunks=0; |
---|
166 | if(m2>1) { |
---|
167 | tree=fopen(treename,"r"); |
---|
168 | while(TRUE) { |
---|
169 | if(!read_tree(&n,&dummy,&idummy,&idummy,otree_array)) { |
---|
170 | flag=TRUE; |
---|
171 | break; |
---|
172 | } |
---|
173 | ++chunks; |
---|
174 | if(n!=m2) |
---|
175 | continue; |
---|
176 | else { |
---|
177 | comv=0; |
---|
178 | for(i=1;i<=totseqs;++i) |
---|
179 | if(otree_array[i]>0 && tree_array[i]==2) |
---|
180 | ++comv; |
---|
181 | if(comv!=m2) |
---|
182 | continue; |
---|
183 | highp=chunks; |
---|
184 | break; |
---|
185 | } |
---|
186 | break; |
---|
187 | } |
---|
188 | fclose(tree); |
---|
189 | } |
---|
190 | if(flag) |
---|
191 | warning("Dutch Elm Disease. Bad tree"); |
---|
192 | } |
---|
193 | |
---|
194 | tree=fopen(treename,"a"); |
---|
195 | fprintf(tree," %7.1f %3d %3d %3d ",score,lowp,highp,ndone); |
---|
196 | for(i=1;i<=totseqs;++i) |
---|
197 | fprintf(tree,"%1d",tree_array[i]); |
---|
198 | fprintf(tree,"\n"); |
---|
199 | fclose(tree); |
---|
200 | |
---|
201 | for(i=1;i<=totseqs;++i) { |
---|
202 | gp2=0; |
---|
203 | if(combine[i]==bottom) |
---|
204 | continue; |
---|
205 | else if(combine[i]==0) { |
---|
206 | gp2=1; |
---|
207 | group2[1]=i; |
---|
208 | } |
---|
209 | else if(combine[i]==i) { |
---|
210 | for(j=1;j<=totseqs;++j) |
---|
211 | if(combine[j]==i) { |
---|
212 | ++gp2; |
---|
213 | group2[gp2]=j; |
---|
214 | } |
---|
215 | } |
---|
216 | else continue; |
---|
217 | |
---|
218 | acc=0.0; |
---|
219 | comv=0; |
---|
220 | for(j=1;j<=gp2;++j) |
---|
221 | for(k=1;k<=ndone;++k) { |
---|
222 | acc += tmat[group2[j]][group1[k]]; |
---|
223 | ++comv; |
---|
224 | } |
---|
225 | med=acc/(double)comv; |
---|
226 | smat[i][bottom]=med; |
---|
227 | smat[bottom][i]=med; |
---|
228 | } |
---|
229 | |
---|
230 | ++iter; |
---|
231 | if(iter>800) |
---|
232 | return; |
---|
233 | flag=FALSE; |
---|
234 | for(i=1;i<=totseqs;++i) |
---|
235 | if(combine[i]!=1) |
---|
236 | flag=TRUE; |
---|
237 | if(!flag) |
---|
238 | break; |
---|
239 | } |
---|
240 | } |
---|