| 1 | #include <stdio.h> |
|---|
| 2 | #include <ctype.h> |
|---|
| 3 | #include <stdlib.h> |
|---|
| 4 | #include <string.h> |
|---|
| 5 | #include "clustalw.h" |
|---|
| 6 | |
|---|
| 7 | |
|---|
| 8 | /* |
|---|
| 9 | * Prototypes |
|---|
| 10 | */ |
|---|
| 11 | void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight); |
|---|
| 12 | void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight); |
|---|
| 13 | void calc_v_penalties(char **aln, sint n, sint fs, sint *weight); |
|---|
| 14 | sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight); |
|---|
| 15 | float percentid(char *s1, char *s2,sint length); |
|---|
| 16 | /* |
|---|
| 17 | * Global variables |
|---|
| 18 | */ |
|---|
| 19 | |
|---|
| 20 | extern sint gap_dist; |
|---|
| 21 | extern sint max_aa; |
|---|
| 22 | extern sint debug; |
|---|
| 23 | extern Boolean dnaflag; |
|---|
| 24 | extern Boolean use_endgaps; |
|---|
| 25 | extern Boolean endgappenalties; |
|---|
| 26 | extern Boolean no_var_penalties, no_hyd_penalties, no_pref_penalties; |
|---|
| 27 | extern char hyd_residues[]; |
|---|
| 28 | extern char *amino_acid_codes; |
|---|
| 29 | |
|---|
| 30 | /* vwindow is the number of residues used for a window for the variable zone penalties */ |
|---|
| 31 | /* vll is the lower limit for the variable zone penalties (vll < pen < 1.0) */ |
|---|
| 32 | int vll=50; |
|---|
| 33 | int vwindow=5; |
|---|
| 34 | |
|---|
| 35 | sint vlut[26][26] = { |
|---|
| 36 | /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ |
|---|
| 37 | /*A*/ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 38 | /*B*/ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 39 | /*C*/ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 40 | /*D*/ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 41 | /*E*/ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 42 | /*F*/ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 43 | /*G*/ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 44 | /*H*/ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 45 | /*I*/ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 46 | /*J*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 47 | /*K*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 48 | /*L*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 49 | /*M*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 50 | /*N*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 51 | /*O*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 52 | /*P*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 53 | /*Q*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 54 | /*R*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 55 | /*S*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 56 | /*T*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, |
|---|
| 57 | /*U*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, |
|---|
| 58 | /*V*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, |
|---|
| 59 | /*W*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, |
|---|
| 60 | /*X*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
|---|
| 61 | /*Y*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, |
|---|
| 62 | /*Z*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 |
|---|
| 63 | }; |
|---|
| 64 | |
|---|
| 65 | /* pascarella probabilities for opening a gap at specific residues */ |
|---|
| 66 | char pr[] = {'A' , 'C', 'D', 'E', 'F', 'G', 'H', 'K', 'I', 'L', |
|---|
| 67 | 'M' , 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'Y', 'W'}; |
|---|
| 68 | sint pas_op[] = { 87, 87,104, 69, 80,139,100,104, 68, 79, |
|---|
| 69 | 71,137,126, 93,128,124,111, 75,100, 77}; |
|---|
| 70 | sint pas_op2[] ={ 88, 57,111, 98, 75,126, 95, 97, 70, 90, |
|---|
| 71 | 60,122,110,107, 91,125,124, 81,106, 88}; |
|---|
| 72 | sint pal_op[] = { 84, 69,128, 78, 88,176, 53, 95, 55, 49, |
|---|
| 73 | 52,148,147,100, 91,129,105, 51,128, 88}; |
|---|
| 74 | |
|---|
| 75 | float reduced_gap = 1.0; |
|---|
| 76 | Boolean nvar_pen,nhyd_pen,npref_pen; /* local copies of ho_hyd_penalties, no_pref_penalties */ |
|---|
| 77 | sint gdist; /* local copy of gap_dist */ |
|---|
| 78 | |
|---|
| 79 | void calc_gap_coeff(char **alignment, sint *gaps, sint **profile, Boolean struct_penalties, |
|---|
| 80 | char *gap_penalty_mask, sint first_seq, sint last_seq, |
|---|
| 81 | sint prf_length, sint gapcoef, sint lencoef) |
|---|
| 82 | { |
|---|
| 83 | |
|---|
| 84 | char c; |
|---|
| 85 | sint i, j; |
|---|
| 86 | sint is, ie; |
|---|
| 87 | static sint numseq,val,pcid; |
|---|
| 88 | static sint *gap_pos; |
|---|
| 89 | static sint *v_weight, *p_weight, *h_weight; |
|---|
| 90 | static float scale; |
|---|
| 91 | |
|---|
| 92 | numseq = last_seq - first_seq; |
|---|
| 93 | if(numseq == 2) |
|---|
| 94 | { |
|---|
| 95 | pcid=percentid(alignment[first_seq],alignment[first_seq+1],prf_length); |
|---|
| 96 | } |
|---|
| 97 | else pcid=0; |
|---|
| 98 | |
|---|
| 99 | for (j=0; j<prf_length; j++) |
|---|
| 100 | gaps[j] = 0; |
|---|
| 101 | /* |
|---|
| 102 | Check for a gap penalty mask |
|---|
| 103 | */ |
|---|
| 104 | if (struct_penalties != NONE) |
|---|
| 105 | { |
|---|
| 106 | nvar_pen = nhyd_pen = npref_pen = TRUE; |
|---|
| 107 | gdist = 0; |
|---|
| 108 | } |
|---|
| 109 | else if (no_var_penalties == FALSE && pcid > 60) |
|---|
| 110 | { |
|---|
| 111 | if(debug>0) fprintf(stderr,"Using variable zones to set gap penalties (pcid = %d)\n",pcid); |
|---|
| 112 | nhyd_pen = npref_pen = TRUE; |
|---|
| 113 | nvar_pen = FALSE; |
|---|
| 114 | } |
|---|
| 115 | else |
|---|
| 116 | { |
|---|
| 117 | nvar_pen = TRUE; |
|---|
| 118 | nhyd_pen = no_hyd_penalties; |
|---|
| 119 | npref_pen = no_pref_penalties; |
|---|
| 120 | gdist = gap_dist; |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | for (i=first_seq; i<last_seq; i++) |
|---|
| 124 | { |
|---|
| 125 | /* |
|---|
| 126 | Include end gaps as gaps ? |
|---|
| 127 | */ |
|---|
| 128 | is = 0; |
|---|
| 129 | ie = prf_length; |
|---|
| 130 | if (use_endgaps == FALSE && endgappenalties==FALSE) |
|---|
| 131 | { |
|---|
| 132 | for (j=0; j<prf_length; j++) |
|---|
| 133 | { |
|---|
| 134 | c = alignment[i][j]; |
|---|
| 135 | if ((c < 0) || (c > max_aa)) |
|---|
| 136 | is++; |
|---|
| 137 | else |
|---|
| 138 | break; |
|---|
| 139 | } |
|---|
| 140 | for (j=prf_length-1; j>=0; j--) |
|---|
| 141 | { |
|---|
| 142 | c = alignment[i][j]; |
|---|
| 143 | if ((c < 0) || (c > max_aa)) |
|---|
| 144 | ie--; |
|---|
| 145 | else |
|---|
| 146 | break; |
|---|
| 147 | } |
|---|
| 148 | } |
|---|
| 149 | |
|---|
| 150 | for (j=is; j<ie; j++) |
|---|
| 151 | { |
|---|
| 152 | if ((alignment[i][j] < 0) || (alignment[i][j] > max_aa)) |
|---|
| 153 | gaps[j]++; |
|---|
| 154 | } |
|---|
| 155 | } |
|---|
| 156 | |
|---|
| 157 | if ((!dnaflag) && (nvar_pen == FALSE)) |
|---|
| 158 | { |
|---|
| 159 | v_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); |
|---|
| 160 | calc_v_penalties(alignment, prf_length, first_seq, v_weight); |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | |
|---|
| 164 | if ((!dnaflag) && (npref_pen == FALSE)) |
|---|
| 165 | { |
|---|
| 166 | p_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); |
|---|
| 167 | calc_p_penalties(alignment, prf_length, first_seq, last_seq, p_weight); |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | if ((!dnaflag) && (nhyd_pen == FALSE)) |
|---|
| 171 | { |
|---|
| 172 | h_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); |
|---|
| 173 | calc_h_penalties(alignment, prf_length, first_seq, last_seq, h_weight); |
|---|
| 174 | } |
|---|
| 175 | |
|---|
| 176 | gap_pos = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); |
|---|
| 177 | /* |
|---|
| 178 | mark the residues close to an existing gap (set gaps[i] = -ve) |
|---|
| 179 | */ |
|---|
| 180 | if (dnaflag || (gdist <= 0)) |
|---|
| 181 | { |
|---|
| 182 | for (i=0;i<prf_length;i++) gap_pos[i] = gaps[i]; |
|---|
| 183 | } |
|---|
| 184 | else |
|---|
| 185 | { |
|---|
| 186 | i=0; |
|---|
| 187 | while (i<prf_length) |
|---|
| 188 | { |
|---|
| 189 | if (gaps[i] <= 0) |
|---|
| 190 | { |
|---|
| 191 | gap_pos[i] = gaps[i]; |
|---|
| 192 | i++; |
|---|
| 193 | } |
|---|
| 194 | else |
|---|
| 195 | { |
|---|
| 196 | for (j = -gdist+1; j<0; j++) |
|---|
| 197 | { |
|---|
| 198 | if ((i+j>=0) && (i+j<prf_length) && |
|---|
| 199 | ((gaps[i+j] == 0) || (gaps[i+j] < j))) gap_pos[i+j] = j; |
|---|
| 200 | } |
|---|
| 201 | while (gaps[i] > 0) |
|---|
| 202 | { |
|---|
| 203 | if (i>=prf_length) break; |
|---|
| 204 | gap_pos[i] = gaps[i]; |
|---|
| 205 | i++; |
|---|
| 206 | } |
|---|
| 207 | for (j = 0; j<gdist; j++) |
|---|
| 208 | { |
|---|
| 209 | if (gaps[i+j] > 0) break; |
|---|
| 210 | if ((i+j>=0) && (i+j<prf_length) && |
|---|
| 211 | ((gaps[i+j] == 0) || (gaps[i+j] < -j))) gap_pos[i+j] = -j-1; |
|---|
| 212 | } |
|---|
| 213 | i += j; |
|---|
| 214 | } |
|---|
| 215 | } |
|---|
| 216 | } |
|---|
| 217 | if (debug>1) |
|---|
| 218 | { |
|---|
| 219 | fprintf(stdout,"gap open %d gap ext %d\n",(pint)gapcoef,(pint)lencoef); |
|---|
| 220 | fprintf(stdout,"gaps:\n"); |
|---|
| 221 | for(i=0;i<prf_length;i++) fprintf(stdout,"%d ", (pint)gaps[i]); |
|---|
| 222 | fprintf(stdout,"\n"); |
|---|
| 223 | fprintf(stdout,"gap_pos:\n"); |
|---|
| 224 | for(i=0;i<prf_length;i++) fprintf(stdout,"%d ", (pint)gap_pos[i]); |
|---|
| 225 | fprintf(stdout,"\n"); |
|---|
| 226 | } |
|---|
| 227 | |
|---|
| 228 | |
|---|
| 229 | for (j=0;j<prf_length; j++) |
|---|
| 230 | { |
|---|
| 231 | |
|---|
| 232 | if (gap_pos[j] <= 0) |
|---|
| 233 | { |
|---|
| 234 | /* |
|---|
| 235 | apply residue-specific and hydrophilic gap penalties. |
|---|
| 236 | */ |
|---|
| 237 | if (!dnaflag) { |
|---|
| 238 | profile[j+1][GAPCOL] = local_penalty(gapcoef, j, |
|---|
| 239 | p_weight, h_weight, v_weight); |
|---|
| 240 | profile[j+1][LENCOL] = lencoef; |
|---|
| 241 | } |
|---|
| 242 | else { |
|---|
| 243 | profile[j+1][GAPCOL] = gapcoef; |
|---|
| 244 | profile[j+1][LENCOL] = lencoef; |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | /* |
|---|
| 248 | increase gap penalty near to existing gaps. |
|---|
| 249 | */ |
|---|
| 250 | if (gap_pos[j] < 0) |
|---|
| 251 | { |
|---|
| 252 | profile[j+1][GAPCOL] *= 2.0+2.0*(gdist+gap_pos[j])/gdist; |
|---|
| 253 | } |
|---|
| 254 | |
|---|
| 255 | |
|---|
| 256 | } |
|---|
| 257 | else |
|---|
| 258 | { |
|---|
| 259 | scale = ((float)(numseq-gaps[j])/(float)numseq) * reduced_gap; |
|---|
| 260 | profile[j+1][GAPCOL] = scale*gapcoef; |
|---|
| 261 | profile[j+1][LENCOL] = 0.5 * lencoef; |
|---|
| 262 | } |
|---|
| 263 | /* |
|---|
| 264 | apply the gap penalty mask |
|---|
| 265 | */ |
|---|
| 266 | if (struct_penalties != NONE) |
|---|
| 267 | { |
|---|
| 268 | val = gap_penalty_mask[j]-'0'; |
|---|
| 269 | if (val > 0 && val < 10) |
|---|
| 270 | { |
|---|
| 271 | profile[j+1][GAPCOL] *= val; |
|---|
| 272 | profile[j+1][LENCOL] *= val; |
|---|
| 273 | } |
|---|
| 274 | } |
|---|
| 275 | /* |
|---|
| 276 | make sure no penalty is zero - even for all-gap positions |
|---|
| 277 | */ |
|---|
| 278 | if (profile[j+1][GAPCOL] <= 0) profile[j+1][GAPCOL] = 1; |
|---|
| 279 | if (profile[j+1][LENCOL] <= 0) profile[j+1][LENCOL] = 1; |
|---|
| 280 | } |
|---|
| 281 | |
|---|
| 282 | /* set the penalties at the beginning and end of the profile */ |
|---|
| 283 | if(endgappenalties==TRUE) |
|---|
| 284 | { |
|---|
| 285 | profile[0][GAPCOL] = gapcoef; |
|---|
| 286 | profile[0][LENCOL] = lencoef; |
|---|
| 287 | } |
|---|
| 288 | else |
|---|
| 289 | { |
|---|
| 290 | profile[0][GAPCOL] = 0; |
|---|
| 291 | profile[0][LENCOL] = 0; |
|---|
| 292 | profile[prf_length][GAPCOL] = 0; |
|---|
| 293 | profile[prf_length][LENCOL] = 0; |
|---|
| 294 | } |
|---|
| 295 | if (debug>0) |
|---|
| 296 | { |
|---|
| 297 | fprintf(stdout,"Opening penalties:\n"); |
|---|
| 298 | for(i=0;i<=prf_length;i++) fprintf(stdout," %d:%d ",i, (pint)profile[i][GAPCOL]); |
|---|
| 299 | fprintf(stdout,"\n"); |
|---|
| 300 | } |
|---|
| 301 | if (debug>0) |
|---|
| 302 | { |
|---|
| 303 | fprintf(stdout,"Extension penalties:\n"); |
|---|
| 304 | for(i=0;i<=prf_length;i++) fprintf(stdout,"%d:%d ",i, (pint)profile[i][LENCOL]); |
|---|
| 305 | fprintf(stdout,"\n"); |
|---|
| 306 | } |
|---|
| 307 | if ((!dnaflag) && (nvar_pen == FALSE)) |
|---|
| 308 | v_weight=ckfree((void *)v_weight); |
|---|
| 309 | |
|---|
| 310 | if ((!dnaflag) && (npref_pen == FALSE)) |
|---|
| 311 | p_weight=ckfree((void *)p_weight); |
|---|
| 312 | |
|---|
| 313 | if ((!dnaflag) && (nhyd_pen == FALSE)) |
|---|
| 314 | h_weight=ckfree((void *)h_weight); |
|---|
| 315 | |
|---|
| 316 | |
|---|
| 317 | gap_pos=ckfree((void *)gap_pos); |
|---|
| 318 | } |
|---|
| 319 | |
|---|
| 320 | void calc_v_penalties(char **aln, sint n, sint fs, sint *weight) |
|---|
| 321 | { |
|---|
| 322 | char ix1,ix2; |
|---|
| 323 | sint i,j,t; |
|---|
| 324 | |
|---|
| 325 | for (i=0;i<n;i++) |
|---|
| 326 | { |
|---|
| 327 | weight[i] = 0; |
|---|
| 328 | t=0; |
|---|
| 329 | for(j=i-vwindow;j<i+vwindow;j++) |
|---|
| 330 | { |
|---|
| 331 | if(j>=0 && j<n) |
|---|
| 332 | { |
|---|
| 333 | ix1 = aln[fs][j]; |
|---|
| 334 | ix2 = aln[fs+1][j]; |
|---|
| 335 | if ((ix1 < 0) || (ix1 > max_aa) || (ix2< 0) || (ix2> max_aa)) continue; |
|---|
| 336 | weight[i] += vlut[amino_acid_codes[ix1]-'A'][amino_acid_codes[ix2]-'A']; |
|---|
| 337 | t++; |
|---|
| 338 | } |
|---|
| 339 | } |
|---|
| 340 | /* now we have a weight -t < w < t */ |
|---|
| 341 | weight[i] +=t; |
|---|
| 342 | if(t>0) |
|---|
| 343 | weight[i] = (weight[i]*100)/(2*t); |
|---|
| 344 | else |
|---|
| 345 | weight[i] = 100; |
|---|
| 346 | /* now we have a weight vll < w < 100 */ |
|---|
| 347 | if (weight[i]<vll) weight[i]=vll; |
|---|
| 348 | } |
|---|
| 349 | |
|---|
| 350 | |
|---|
| 351 | } |
|---|
| 352 | |
|---|
| 353 | void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight) |
|---|
| 354 | { |
|---|
| 355 | char ix; |
|---|
| 356 | sint j,k,numseq; |
|---|
| 357 | sint i; |
|---|
| 358 | |
|---|
| 359 | numseq = ls - fs; |
|---|
| 360 | for (i=0;i<n;i++) |
|---|
| 361 | { |
|---|
| 362 | weight[i] = 0; |
|---|
| 363 | for (k=fs;k<ls;k++) |
|---|
| 364 | { |
|---|
| 365 | for (j=0;j<22;j++) |
|---|
| 366 | { |
|---|
| 367 | ix = aln[k][i]; |
|---|
| 368 | if ((ix < 0) || (ix > max_aa)) continue; |
|---|
| 369 | if (amino_acid_codes[ix] == pr[j]) |
|---|
| 370 | { |
|---|
| 371 | weight[i] += (180-pas_op[j]); |
|---|
| 372 | break; |
|---|
| 373 | } |
|---|
| 374 | } |
|---|
| 375 | } |
|---|
| 376 | weight[i] /= numseq; |
|---|
| 377 | } |
|---|
| 378 | |
|---|
| 379 | } |
|---|
| 380 | |
|---|
| 381 | void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight) |
|---|
| 382 | { |
|---|
| 383 | |
|---|
| 384 | /* |
|---|
| 385 | weight[] is the length of the hydrophilic run of residues. |
|---|
| 386 | */ |
|---|
| 387 | char ix; |
|---|
| 388 | sint nh,j,k; |
|---|
| 389 | sint i,e,s; |
|---|
| 390 | sint *hyd; |
|---|
| 391 | float scale; |
|---|
| 392 | |
|---|
| 393 | hyd = (sint *)ckalloc((n+2) * sizeof(sint)); |
|---|
| 394 | nh = (sint)strlen(hyd_residues); |
|---|
| 395 | for (i=0;i<n;i++) |
|---|
| 396 | weight[i] = 0; |
|---|
| 397 | |
|---|
| 398 | for (k=fs;k<ls;k++) |
|---|
| 399 | { |
|---|
| 400 | for (i=0;i<n;i++) |
|---|
| 401 | { |
|---|
| 402 | hyd[i] = 0; |
|---|
| 403 | for (j=0;j<nh;j++) |
|---|
| 404 | { |
|---|
| 405 | ix = aln[k][i]; |
|---|
| 406 | if ((ix < 0) || (ix > max_aa)) continue; |
|---|
| 407 | if (amino_acid_codes[ix] == hyd_residues[j]) |
|---|
| 408 | { |
|---|
| 409 | hyd[i] = 1; |
|---|
| 410 | break; |
|---|
| 411 | } |
|---|
| 412 | } |
|---|
| 413 | } |
|---|
| 414 | i = 0; |
|---|
| 415 | while (i < n) |
|---|
| 416 | { |
|---|
| 417 | if (hyd[i] == 0) i++; |
|---|
| 418 | else |
|---|
| 419 | { |
|---|
| 420 | s = i; |
|---|
| 421 | while ((hyd[i] != 0) && (i<n)) i++; |
|---|
| 422 | e = i; |
|---|
| 423 | if (e-s > 3) |
|---|
| 424 | for (j=s; j<e; j++) weight[j] += 100; |
|---|
| 425 | } |
|---|
| 426 | } |
|---|
| 427 | } |
|---|
| 428 | |
|---|
| 429 | scale = ls - fs; |
|---|
| 430 | for (i=0;i<n;i++) |
|---|
| 431 | weight[i] /= scale; |
|---|
| 432 | |
|---|
| 433 | hyd=ckfree((void *)hyd); |
|---|
| 434 | |
|---|
| 435 | if (debug>1) |
|---|
| 436 | { |
|---|
| 437 | for(i=0;i<n;i++) fprintf(stdout,"%d ", (pint)weight[i]); |
|---|
| 438 | fprintf(stdout,"\n"); |
|---|
| 439 | } |
|---|
| 440 | |
|---|
| 441 | } |
|---|
| 442 | |
|---|
| 443 | sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight) |
|---|
| 444 | { |
|---|
| 445 | |
|---|
| 446 | Boolean h = FALSE; |
|---|
| 447 | float gw; |
|---|
| 448 | |
|---|
| 449 | if (dnaflag) return(1); |
|---|
| 450 | |
|---|
| 451 | gw = 1.0; |
|---|
| 452 | if (nvar_pen == FALSE) |
|---|
| 453 | { |
|---|
| 454 | gw *= (float)vweight[n]/100.0; |
|---|
| 455 | } |
|---|
| 456 | |
|---|
| 457 | if (nhyd_pen == FALSE) |
|---|
| 458 | { |
|---|
| 459 | if (hweight[n] > 0) |
|---|
| 460 | { |
|---|
| 461 | gw *= 0.5; |
|---|
| 462 | h = TRUE; |
|---|
| 463 | } |
|---|
| 464 | } |
|---|
| 465 | if ((npref_pen == FALSE) && (h==FALSE)) |
|---|
| 466 | { |
|---|
| 467 | gw *= ((float)pweight[n]/100.0); |
|---|
| 468 | } |
|---|
| 469 | |
|---|
| 470 | gw *= penalty; |
|---|
| 471 | return((sint)gw); |
|---|
| 472 | |
|---|
| 473 | } |
|---|
| 474 | |
|---|
| 475 | float percentid(char *s1, char *s2,sint length) |
|---|
| 476 | { |
|---|
| 477 | sint i; |
|---|
| 478 | sint count,total; |
|---|
| 479 | float score; |
|---|
| 480 | |
|---|
| 481 | count = total = 0; |
|---|
| 482 | for (i=0;i<length;i++) { |
|---|
| 483 | if ((s1[i]>=0) && (s1[i]<max_aa)) { |
|---|
| 484 | total++; |
|---|
| 485 | if (s1[i] == s2[i]) count++; |
|---|
| 486 | } |
|---|
| 487 | if (s1[i]==(-3) || s2[i]==(-3)) break; |
|---|
| 488 | |
|---|
| 489 | } |
|---|
| 490 | |
|---|
| 491 | if(total==0) score=0; |
|---|
| 492 | else |
|---|
| 493 | score = 100.0 * (float)count / (float)total; |
|---|
| 494 | return(score); |
|---|
| 495 | |
|---|
| 496 | } |
|---|
| 497 | |
|---|