1 | |
---|
2 | #include <ctype.h> |
---|
3 | // #include <malloc.h> |
---|
4 | #include <stdlib.h> |
---|
5 | |
---|
6 | #include "ali_misc.hxx" |
---|
7 | #include "ali_profile.hxx" |
---|
8 | #include "ali_solution.hxx" |
---|
9 | #include <BI_helix.hxx> |
---|
10 | |
---|
11 | |
---|
12 | |
---|
13 | GB_INLINE int ALI_PROFILE::is_binding_marker(char c) { |
---|
14 | return (c == '~' || c == 'x'); |
---|
15 | } |
---|
16 | |
---|
17 | |
---|
18 | /* |
---|
19 | * find the family in the pt server |
---|
20 | */ |
---|
21 | ALI_TLIST<ali_family_member *> *ALI_PROFILE::find_family(ALI_SEQUENCE *Sequence, |
---|
22 | ALI_PROFILE_CONTEXT *context) |
---|
23 | { |
---|
24 | char message_buffer[100]; |
---|
25 | ALI_PT &pt = (ALI_PT &) *(context->pt); |
---|
26 | ALI_ARBDB &arbdb = (ALI_ARBDB &) *(context->arbdb); |
---|
27 | ALI_SEQUENCE *seq; |
---|
28 | ali_family_member *family_member; |
---|
29 | ALI_TLIST<ali_family_member *> *family_list; |
---|
30 | ALI_TLIST<ali_pt_member *> *pt_fam_list; |
---|
31 | ALI_TLIST<ali_pt_member *> *pt_ext_list; |
---|
32 | ali_pt_member *pt_member; |
---|
33 | float weight, d; |
---|
34 | unsigned long number; |
---|
35 | |
---|
36 | /* |
---|
37 | * Initialisation |
---|
38 | */ |
---|
39 | family_list = new ALI_TLIST<ali_family_member *>; |
---|
40 | |
---|
41 | ali_message("Searching for the family"); |
---|
42 | pt.find_family(Sequence,context->find_family_mode); |
---|
43 | ali_message("Family found"); |
---|
44 | |
---|
45 | pt_fam_list = pt.get_family_list(); |
---|
46 | pt_ext_list = pt.get_extension_list(); |
---|
47 | |
---|
48 | ali_message("Reading family:"); |
---|
49 | |
---|
50 | d = (context->ext_max_weight - 1.0) / (float) pt_fam_list->cardinality(); |
---|
51 | |
---|
52 | arbdb.begin_transaction(); |
---|
53 | |
---|
54 | /* |
---|
55 | * calculate the real family members |
---|
56 | */ |
---|
57 | number = 0; |
---|
58 | while (!pt_fam_list->is_empty()) { |
---|
59 | pt_member = pt_fam_list->first(); |
---|
60 | seq = arbdb.get_sequence(pt_member->name,context->mark_family_flag); |
---|
61 | if (seq) { |
---|
62 | weight = 1 + d * number; |
---|
63 | sprintf(message_buffer,"%s (weight = %f, matches = %d)", |
---|
64 | pt_member->name,weight,pt_member->matches); |
---|
65 | ali_message(message_buffer); |
---|
66 | family_member = new ali_family_member(seq, |
---|
67 | (float) pt_member->matches, |
---|
68 | weight); |
---|
69 | family_list->append_end(family_member); |
---|
70 | number++; |
---|
71 | } |
---|
72 | else { |
---|
73 | ali_warning("Sequence not found in Database (Sequence ignored)"); |
---|
74 | } |
---|
75 | pt_fam_list->delete_element(); |
---|
76 | } |
---|
77 | delete pt_fam_list; |
---|
78 | |
---|
79 | ali_message("Reading family extension:"); |
---|
80 | |
---|
81 | d = -1.0 * context->ext_max_weight / (float) pt_ext_list->cardinality(); |
---|
82 | |
---|
83 | /* |
---|
84 | * calculate the extension of the family |
---|
85 | */ |
---|
86 | number = 0; |
---|
87 | while (!pt_ext_list->is_empty()) { |
---|
88 | pt_member = pt_ext_list->first(); |
---|
89 | seq = arbdb.get_sequence(pt_member->name, |
---|
90 | context->mark_family_extension_flag); |
---|
91 | if (seq) { |
---|
92 | weight = context->ext_max_weight + d * number; |
---|
93 | sprintf(message_buffer,"%s (weight = %f, matches = %d)", |
---|
94 | pt_member->name,weight,pt_member->matches); |
---|
95 | ali_message(message_buffer); |
---|
96 | family_member = new ali_family_member(seq, |
---|
97 | (float) pt_member->matches, |
---|
98 | weight); |
---|
99 | family_list->append_end(family_member); |
---|
100 | number++; |
---|
101 | } |
---|
102 | else { |
---|
103 | ali_warning("Sequence not found in Database (Sequence ignored)"); |
---|
104 | } |
---|
105 | pt_ext_list->delete_element(); |
---|
106 | } |
---|
107 | delete pt_ext_list; |
---|
108 | |
---|
109 | arbdb.commit_transaction(); |
---|
110 | |
---|
111 | return family_list; |
---|
112 | } |
---|
113 | |
---|
114 | /* |
---|
115 | * calculate the costs for aligning against a family |
---|
116 | */ |
---|
117 | void ALI_PROFILE::calculate_costs(ALI_TLIST<ali_family_member *> *family_list, |
---|
118 | ALI_PROFILE_CONTEXT *context) |
---|
119 | { |
---|
120 | ali_family_member *family_member; |
---|
121 | float a[7], w[7], w_sum, sm[7][7]; |
---|
122 | float base_gap_weights[5], w_bg_sum; |
---|
123 | long members; |
---|
124 | size_t p; |
---|
125 | int i; |
---|
126 | size_t j; |
---|
127 | unsigned long *l; |
---|
128 | float *g; |
---|
129 | unsigned char **seq; |
---|
130 | long *seq_len; |
---|
131 | float (*w_Del)[], (*percent)[]; |
---|
132 | |
---|
133 | /* |
---|
134 | * allocate temporary memory |
---|
135 | */ |
---|
136 | members = family_list->cardinality(); |
---|
137 | l = (unsigned long *) CALLOC((unsigned int) members,sizeof(long)); |
---|
138 | g = (float *) CALLOC((unsigned int) members,sizeof(float)); |
---|
139 | seq = (unsigned char ** ) CALLOC((unsigned int) members,sizeof(char *)); |
---|
140 | seq_len = (long *) CALLOC((unsigned int) members,sizeof(long)); |
---|
141 | if (l == 0 || g == 0 || seq == 0 || seq_len == 0) |
---|
142 | ali_fatal_error("Out of memory"); |
---|
143 | |
---|
144 | /* |
---|
145 | * initialize arrays |
---|
146 | */ |
---|
147 | family_member = family_list->first(); |
---|
148 | prof_len = family_member->sequence->length(); |
---|
149 | seq[0] = family_member->sequence->sequence(); |
---|
150 | seq_len[0] = family_member->sequence->length(); |
---|
151 | g[0] = family_member->weight; |
---|
152 | i = 1; |
---|
153 | sub_costs_maximum = 0.0; |
---|
154 | |
---|
155 | while (family_list->is_next()) { |
---|
156 | family_member = family_list->next(); |
---|
157 | seq[i] = family_member->sequence->sequence(); |
---|
158 | seq_len[i] = family_member->sequence->length(); |
---|
159 | g[i] = family_member->weight; |
---|
160 | i++; |
---|
161 | if (prof_len < family_member->sequence->length()) { |
---|
162 | ali_warning("Family members have different length"); |
---|
163 | prof_len = family_member->sequence->length(); |
---|
164 | } |
---|
165 | } |
---|
166 | |
---|
167 | /* |
---|
168 | * Calculate the substitution cost matrix |
---|
169 | */ |
---|
170 | for (i = 0; i < 5; i++) |
---|
171 | for (j = 0; j < 5; j++) |
---|
172 | sm[i][j] = context->substitute_matrix[i][j]; |
---|
173 | |
---|
174 | /* |
---|
175 | * Initialize l-array (position of last base) |
---|
176 | */ |
---|
177 | for (i = 0; i < members; i++) |
---|
178 | l[i] = prof_len + 1; |
---|
179 | |
---|
180 | /* |
---|
181 | * allocate memory for costs |
---|
182 | */ |
---|
183 | |
---|
184 | base_weights = (float (**) [4]) CALLOC((unsigned int) prof_len, sizeof(float [4])); |
---|
185 | //base_weights = (float (*) [1][4]) CALLOC((unsigned int) prof_len, sizeof(float [4])); |
---|
186 | sub_costs = (float (**) [6]) CALLOC((unsigned int) prof_len, sizeof(float [6])); |
---|
187 | //sub_costs = (float (*) [1][6]) CALLOC((unsigned int) prof_len, sizeof(float [6])); |
---|
188 | binding_costs = (float (*) [5][5]) CALLOC((unsigned int) 5, sizeof(float [5])); |
---|
189 | lmin = (unsigned long *) CALLOC((unsigned int) prof_len, sizeof(unsigned long)); |
---|
190 | lmax = (unsigned long *) CALLOC((unsigned int) prof_len, sizeof(unsigned long)); |
---|
191 | gap_costs = (float ***) CALLOC((unsigned int) prof_len, sizeof(float *)); |
---|
192 | //gap_costs = (float *(*)[1]) CALLOC((unsigned int) prof_len, sizeof(float *)); |
---|
193 | gap_percents = (float***) CALLOC((unsigned int) prof_len, sizeof(float *)); |
---|
194 | //gap_percents = (float*(*)[1]) CALLOC((unsigned int) prof_len, sizeof(float *)); |
---|
195 | if (binding_costs == 0 || sub_costs == 0 || lmin == 0 || lmax == 0 || |
---|
196 | gap_costs == 0 || gap_percents == 0 || base_weights == 0) { |
---|
197 | ali_fatal_error("Out of memory"); |
---|
198 | } |
---|
199 | |
---|
200 | /* |
---|
201 | * Copy the binding costs matrix |
---|
202 | */ |
---|
203 | w_bind_maximum = context->binding_matrix[0][0]; |
---|
204 | for (i = 0; i < 5; i++) |
---|
205 | for (j = 0; j < 5; j++) { |
---|
206 | (*binding_costs)[i][j] = context->binding_matrix[i][j]; |
---|
207 | if ((*binding_costs)[i][j] > w_bind_maximum) |
---|
208 | w_bind_maximum = (*binding_costs)[i][j]; |
---|
209 | } |
---|
210 | |
---|
211 | /* |
---|
212 | * calculate the costs for EVERY position |
---|
213 | */ |
---|
214 | ali_message("Calculating costs for substitution"); |
---|
215 | for (p = 0; p < prof_len; p++) { |
---|
216 | /* |
---|
217 | * Initialisation |
---|
218 | */ |
---|
219 | for (i = 0; i < 7; i++) |
---|
220 | a[i] = w[i] = sm[5][i] = sm[i][5] = sm[6][i] = sm[i][6] = 0.0; |
---|
221 | for (i = 0; i < 6; i++) |
---|
222 | (*sub_costs)[p][i] = 0.0; |
---|
223 | w_sum = 0.0; |
---|
224 | w_bg_sum = 0.0; |
---|
225 | |
---|
226 | /* |
---|
227 | * Statistic consensus |
---|
228 | */ |
---|
229 | for (i = 0; i < members; i++) { |
---|
230 | if (p < size_t(seq_len[i])) { |
---|
231 | a[seq[i][p]]++; |
---|
232 | w[seq[i][p]] += g[i]; |
---|
233 | if (ali_is_real_base(seq[i][p])) |
---|
234 | w_sum += g[i]; |
---|
235 | if (ali_is_real_base(seq[i][p]) || ali_is_gap(seq[i][p])) |
---|
236 | w_bg_sum += g[i]; |
---|
237 | } |
---|
238 | else { |
---|
239 | a[ALI_DOT_CODE]++; |
---|
240 | w[ALI_DOT_CODE] += g[i]; |
---|
241 | } |
---|
242 | } |
---|
243 | |
---|
244 | /* |
---|
245 | * Relative weight of bases |
---|
246 | */ |
---|
247 | if (w_sum != 0) |
---|
248 | for (i = 0; i < 4; i++) |
---|
249 | (*base_weights)[p][i] = w[i] / w_sum; |
---|
250 | else |
---|
251 | for (i = 0; i < 4; i++) |
---|
252 | (*base_weights)[p][i] = 0.25; |
---|
253 | |
---|
254 | /* |
---|
255 | * Relative weight of bases and gaps |
---|
256 | */ |
---|
257 | if (w_bg_sum != 0) |
---|
258 | for (i = 0; i < 5; i++) |
---|
259 | base_gap_weights[i] = w[i] / w_bg_sum; |
---|
260 | else |
---|
261 | for (i = 0; i < 5; i++) |
---|
262 | base_gap_weights[i] = 0.2; |
---|
263 | |
---|
264 | /* |
---|
265 | * Expandation of substitute matrix (for 'n') |
---|
266 | */ |
---|
267 | for (j = 0; j < 5; j++) { |
---|
268 | for (i = 0; i < 4; i++) { |
---|
269 | sm[5][j] += (*base_weights)[p][i] * sm[i][j]; |
---|
270 | sm[j][5] += (*base_weights)[p][i] * sm[j][i]; |
---|
271 | } |
---|
272 | } |
---|
273 | for (i = 0; i < 4; i++) |
---|
274 | sm[5][5] += (*base_weights)[p][i] * sm[i][i]; |
---|
275 | |
---|
276 | /* |
---|
277 | * Expandation of substitute matrix (for '.') |
---|
278 | */ |
---|
279 | for (j = 0; j < 6; j++) |
---|
280 | for (i = 0; i < 5; i++) { |
---|
281 | sm[6][j] += base_gap_weights[i] * sm[i][j]; |
---|
282 | sm[j][6] += base_gap_weights[i] * sm[j][i]; |
---|
283 | } |
---|
284 | for (i = 0; i < 5; i++) |
---|
285 | sm[6][6] += base_gap_weights[i] * sm[i][i]; |
---|
286 | |
---|
287 | /* |
---|
288 | * Substitution costs |
---|
289 | */ |
---|
290 | for (i = 0; i < members; i++) { |
---|
291 | if (p < size_t(seq_len[i])) { |
---|
292 | for (j = 0; j < 6; j++) { |
---|
293 | (*sub_costs)[p][j] += g[i] * sm[seq[i][p]][j]; |
---|
294 | } |
---|
295 | } else { |
---|
296 | for (j = 0; j < 6; j++) { |
---|
297 | (*sub_costs)[p][j] += g[i] * sm[ALI_DOT_CODE][j]; |
---|
298 | } |
---|
299 | } |
---|
300 | } |
---|
301 | for (j = 0; j < 6; j++) { |
---|
302 | (*sub_costs)[p][j] /= members; |
---|
303 | if ((*sub_costs)[p][j] > sub_costs_maximum) |
---|
304 | sub_costs_maximum = (*sub_costs)[p][j]; |
---|
305 | } |
---|
306 | |
---|
307 | /* |
---|
308 | * Calculate dynamic deletion costs and percents of real gaps |
---|
309 | */ |
---|
310 | lmax[p] = 0; |
---|
311 | lmin[p] = p; |
---|
312 | for (i = 0; i < members; i++) |
---|
313 | if (l[i] < p) { |
---|
314 | if (lmin[p] > l[i]) |
---|
315 | lmin[p] = l[i]; |
---|
316 | if (lmax[p] < l[i]) |
---|
317 | lmax[p] = l[i]; |
---|
318 | } |
---|
319 | if (lmin[p] == p && lmax[p] == 0) { |
---|
320 | lmin[p] = lmax[p] = p; |
---|
321 | } |
---|
322 | |
---|
323 | w_Del = (float (*) []) CALLOC((unsigned int) (lmax[p]-lmin[p]+2), sizeof(float)); |
---|
324 | percent = (float (*) []) CALLOC((unsigned int) (lmax[p]-lmin[p]+2), sizeof(float)); |
---|
325 | if (w_Del == 0 || percent == 0) |
---|
326 | ali_fatal_error("Out of memory"); |
---|
327 | (*gap_costs)[p] = (float *) w_Del; |
---|
328 | (*gap_percents)[p] = (float *) percent; |
---|
329 | |
---|
330 | /* |
---|
331 | * Calculate dynamic deletion costs |
---|
332 | */ |
---|
333 | for (j = 0; j <= lmax[p] - lmin[p] + 1; j++) { |
---|
334 | (*w_Del)[j] = 0; |
---|
335 | for (i = 0; i < members; i++) { |
---|
336 | /* |
---|
337 | * Normal case |
---|
338 | */ |
---|
339 | if (p < size_t(seq_len[i]) /* && !ali_is_dot(seq[i][p]) */) { |
---|
340 | if (l[i] == prof_len + 1 || l[i] >= j + lmin[p]) { |
---|
341 | (*w_Del)[j] += g[i] * sm[seq[i][p]][4] * context->multi_gap_factor; |
---|
342 | }else{ |
---|
343 | (*w_Del)[j] += g[i] * sm[seq[i][p]][4]; |
---|
344 | } |
---|
345 | } |
---|
346 | /* |
---|
347 | * expand sequence with dots |
---|
348 | */ |
---|
349 | else { |
---|
350 | if (l[i] >= j + lmin[p] && l[i] < prof_len+1) { |
---|
351 | (*w_Del)[j] += g[i] * sm[ALI_DOT_CODE][4] * context->multi_gap_factor; |
---|
352 | }else{ |
---|
353 | (*w_Del)[j] += g[i] * sm[ALI_DOT_CODE][4]; |
---|
354 | } |
---|
355 | } |
---|
356 | } |
---|
357 | (*w_Del)[j] /= members; |
---|
358 | } |
---|
359 | |
---|
360 | /* |
---|
361 | * Update the l-array |
---|
362 | */ |
---|
363 | for (i = 0; i < members; i++){ |
---|
364 | if (!ali_is_gap(seq[i][p])) |
---|
365 | l[i] = p; |
---|
366 | } |
---|
367 | |
---|
368 | /* |
---|
369 | * Calculate percents of real gaps |
---|
370 | */ |
---|
371 | for (j = 0; j <= lmax[p] - lmin[p] + 1; j++) { |
---|
372 | (*percent)[j] = 0; |
---|
373 | for (i = 0; i < members; i++) { |
---|
374 | if (l[i] >= j + lmin[p] && l[i] < prof_len+1) { |
---|
375 | (*percent)[j] += 1.0; |
---|
376 | } |
---|
377 | } |
---|
378 | (*percent)[j] /= members; |
---|
379 | } |
---|
380 | } |
---|
381 | |
---|
382 | ali_message("Calculation finished"); |
---|
383 | |
---|
384 | free((char *) l); |
---|
385 | free((char *) g); |
---|
386 | free((char *) seq); |
---|
387 | free((char *) seq_len); |
---|
388 | } |
---|
389 | |
---|
390 | /* |
---|
391 | * find the next helix |
---|
392 | */ |
---|
393 | int ALI_PROFILE::find_next_helix(char h[], unsigned long h_len, |
---|
394 | unsigned long pos, |
---|
395 | unsigned long *helix_nr, |
---|
396 | unsigned long *start, unsigned long *end) |
---|
397 | { |
---|
398 | unsigned long i; |
---|
399 | |
---|
400 | for (i = pos; i < h_len && !isdigit(h[i]); i++) ; |
---|
401 | if (i >= h_len) return -1; |
---|
402 | |
---|
403 | *start = i; |
---|
404 | sscanf(&h[i],"%ld",helix_nr); |
---|
405 | for (; i < h_len && isdigit(h[i]); i++) ; |
---|
406 | for (; i < h_len && !isdigit(h[i]); i++) ; |
---|
407 | *end = i - 1; |
---|
408 | |
---|
409 | return 0; |
---|
410 | } |
---|
411 | |
---|
412 | /* |
---|
413 | * find the complementary part of a helix |
---|
414 | */ |
---|
415 | int ALI_PROFILE::find_comp_helix(char h[], unsigned long h_len, |
---|
416 | unsigned long pos, unsigned long helix_nr, |
---|
417 | unsigned long *start, unsigned long *end) |
---|
418 | { |
---|
419 | unsigned long nr, i; |
---|
420 | |
---|
421 | i = pos; |
---|
422 | do { |
---|
423 | for (; i < h_len && !isdigit(h[i]); i++) ; |
---|
424 | if (i >= h_len) return -1; |
---|
425 | *start = i; |
---|
426 | sscanf(&h[i],"%ld",&nr); |
---|
427 | for (; i < h_len && isdigit(h[i]); i++) ; |
---|
428 | } while (helix_nr != nr); |
---|
429 | |
---|
430 | for (; i < h_len && !isdigit(h[i]); i++) ; |
---|
431 | *end = i - 1; |
---|
432 | |
---|
433 | return 0; |
---|
434 | } |
---|
435 | |
---|
436 | void ALI_PROFILE::delete_comp_helix(char h1[], char h2[], unsigned long h_len, |
---|
437 | unsigned long start, unsigned long end) |
---|
438 | { |
---|
439 | unsigned long i; |
---|
440 | |
---|
441 | for (i = start; i < h_len && i <= end; i++) { |
---|
442 | h1[i] = '.'; |
---|
443 | h2[i] = '.'; |
---|
444 | } |
---|
445 | } |
---|
446 | |
---|
447 | #if 0 |
---|
448 | |
---|
449 | int ALI_PROFILE::map_helix(char h[], unsigned long h_len, |
---|
450 | unsigned long start1, unsigned long end1, |
---|
451 | unsigned long start2, unsigned long end2) |
---|
452 | { |
---|
453 | unsigned long p1, p2; |
---|
454 | unsigned long last1, last2; |
---|
455 | |
---|
456 | if (end1 >= h_len || end2 >= h_len || start1 > end1 || start2 > end2) |
---|
457 | ali_fatal_error("Inconsistent parameters","ALI_PROFILE::map_helix()"); |
---|
458 | |
---|
459 | p1 = start1; |
---|
460 | for (p2 = end2; p2 >= start2 && !is_binding_marker(h[p2]); p2--); |
---|
461 | |
---|
462 | (*helix_borders)[p1] = ALI_PROFILE_BORDER_LEFT; |
---|
463 | (*helix_borders)[p2] = ALI_PROFILE_BORDER_RIGHT; |
---|
464 | last1 = p1; |
---|
465 | last2 = p2; |
---|
466 | while (p1 <= end1 && p2 >= start2) { |
---|
467 | (*helix)[p1] = p2; |
---|
468 | (*helix)[p2] = p1; |
---|
469 | last1 = p1; |
---|
470 | last2 = p2; |
---|
471 | for (p1++; p1 <= end1 && !is_binding_marker(h[p1]); p1++); |
---|
472 | for (p2--; p2 >= start2 && !is_binding_marker(h[p2]); p2--); |
---|
473 | } |
---|
474 | (*helix_borders)[last1] = ALI_PROFILE_BORDER_RIGHT; |
---|
475 | (*helix_borders)[last2] = ALI_PROFILE_BORDER_LEFT; |
---|
476 | |
---|
477 | if (p1 <= end1 || p2 >= start2) |
---|
478 | return -1; |
---|
479 | |
---|
480 | return 0; |
---|
481 | } |
---|
482 | #endif |
---|
483 | |
---|
484 | /* |
---|
485 | * initialize the array, representing the helix |
---|
486 | */ |
---|
487 | void ALI_PROFILE::initialize_helix(ALI_PROFILE_CONTEXT *context) |
---|
488 | { |
---|
489 | const char *error_string; |
---|
490 | BI_helix bi_helix; |
---|
491 | |
---|
492 | unsigned long i; |
---|
493 | |
---|
494 | /* |
---|
495 | * read helix |
---|
496 | */ |
---|
497 | if ((error_string = bi_helix.init(context->arbdb->gb_main)) != 0) |
---|
498 | ali_warning(error_string); |
---|
499 | |
---|
500 | helix_len = bi_helix.size(); |
---|
501 | helix = (long **) CALLOC((unsigned int) helix_len, sizeof(long)); |
---|
502 | helix_borders = (char **) CALLOC((unsigned int) helix_len, sizeof(long)); |
---|
503 | if (helix == 0 || helix_borders == 0) ali_fatal_error("Out of memory"); |
---|
504 | |
---|
505 | /* |
---|
506 | * convert helix for internal use |
---|
507 | */ |
---|
508 | for (i = 0; i < helix_len; i++) |
---|
509 | if (bi_helix.pairtype(i) == HELIX_PAIR) |
---|
510 | (*helix)[i] = bi_helix.opposite_position(i); |
---|
511 | else |
---|
512 | (*helix)[i] = -1; |
---|
513 | } |
---|
514 | |
---|
515 | |
---|
516 | ALI_PROFILE::ALI_PROFILE(ALI_SEQUENCE *seq, ALI_PROFILE_CONTEXT *context) |
---|
517 | { |
---|
518 | char message_buffer[100]; |
---|
519 | ali_family_member *family_member; |
---|
520 | ALI_TLIST<ali_family_member *> *family_list; |
---|
521 | |
---|
522 | norm_sequence = new ALI_NORM_SEQUENCE(seq); |
---|
523 | |
---|
524 | multi_gap_factor = context->multi_gap_factor; |
---|
525 | |
---|
526 | initialize_helix(context); |
---|
527 | |
---|
528 | family_list = find_family(seq,context); |
---|
529 | if (family_list->is_empty()) { |
---|
530 | ali_error("Family not found (maybe incompatible PT and DB Servers)"); |
---|
531 | } |
---|
532 | |
---|
533 | calculate_costs(family_list,context); |
---|
534 | |
---|
535 | insert_cost = sub_costs_maximum * context->insert_factor; |
---|
536 | multi_insert_cost = insert_cost * context->multi_insert_factor; |
---|
537 | |
---|
538 | sprintf(message_buffer,"Multi gap factor = %f",multi_gap_factor); |
---|
539 | ali_message(message_buffer); |
---|
540 | sprintf(message_buffer,"Maximal substitution costs = %f",sub_costs_maximum); |
---|
541 | ali_message(message_buffer); |
---|
542 | sprintf(message_buffer,"Normal insert costs = %f",insert_cost); |
---|
543 | ali_message(message_buffer); |
---|
544 | sprintf(message_buffer,"Multiple insert costs = %f",multi_insert_cost); |
---|
545 | ali_message(message_buffer); |
---|
546 | |
---|
547 | /* |
---|
548 | * Delete the family list |
---|
549 | */ |
---|
550 | family_member = family_list->first(); |
---|
551 | delete family_member->sequence; |
---|
552 | delete family_member; |
---|
553 | while (family_list->is_next()) { |
---|
554 | family_member = family_list->next(); |
---|
555 | delete family_member->sequence; |
---|
556 | delete family_member; |
---|
557 | } |
---|
558 | delete family_list; |
---|
559 | } |
---|
560 | |
---|
561 | ALI_PROFILE::~ALI_PROFILE(void) |
---|
562 | { |
---|
563 | size_t i; |
---|
564 | |
---|
565 | if (helix) |
---|
566 | free((char *) helix); |
---|
567 | if (helix_borders) |
---|
568 | free((char *) helix_borders); |
---|
569 | if (binding_costs) |
---|
570 | free((char *) binding_costs); |
---|
571 | if (sub_costs) |
---|
572 | free((char *) sub_costs); |
---|
573 | if (gap_costs) { |
---|
574 | for (i = 0; i < prof_len; i++) |
---|
575 | if ((*gap_costs)[i]) |
---|
576 | free((char *) (*gap_costs)[i]); |
---|
577 | free((char *) gap_costs); |
---|
578 | } |
---|
579 | if (gap_percents) { |
---|
580 | for (i = 0; i < prof_len; i++) |
---|
581 | if ((*gap_percents)[i]) |
---|
582 | free((char *) (*gap_percents)[i]); |
---|
583 | free((char *) gap_percents); |
---|
584 | } |
---|
585 | if (lmin) |
---|
586 | free((char *) lmin); |
---|
587 | if (lmax) |
---|
588 | free((char *) lmax); |
---|
589 | if (norm_sequence) |
---|
590 | delete norm_sequence; |
---|
591 | } |
---|
592 | |
---|
593 | |
---|
594 | /* |
---|
595 | * test whether a position is inside a helix |
---|
596 | */ |
---|
597 | int ALI_PROFILE::is_in_helix(unsigned long pos, |
---|
598 | unsigned long *first, unsigned long *last) { |
---|
599 | long i; |
---|
600 | |
---|
601 | if (pos > helix_len) |
---|
602 | return 0; |
---|
603 | |
---|
604 | switch ((*helix_borders)[pos]) { |
---|
605 | case ALI_PROFILE_BORDER_LEFT: |
---|
606 | *first = pos; |
---|
607 | for (i = (long) pos + 1; i < (long) prof_len; i++) |
---|
608 | if ((*helix_borders)[i] == ALI_PROFILE_BORDER_RIGHT) { |
---|
609 | *last = (unsigned long) i; |
---|
610 | return 1; |
---|
611 | } |
---|
612 | ali_warning("Helix borders inconsistent (1)"); |
---|
613 | return 0; |
---|
614 | case ALI_PROFILE_BORDER_RIGHT: |
---|
615 | *last = pos; |
---|
616 | for (i = (long) pos - 1; i >= 0; i--) |
---|
617 | if ((*helix_borders)[i] == ALI_PROFILE_BORDER_LEFT) { |
---|
618 | *first = (unsigned long) i; |
---|
619 | return 1; |
---|
620 | } |
---|
621 | ali_warning("Helix borders inconsistent (2)"); |
---|
622 | return 0; |
---|
623 | default: |
---|
624 | for (i = (long) pos - 1; i >= 0; i--) |
---|
625 | switch ((*helix_borders)[i]) { |
---|
626 | case ALI_PROFILE_BORDER_RIGHT: |
---|
627 | return 0; |
---|
628 | case ALI_PROFILE_BORDER_LEFT: |
---|
629 | *first = (unsigned long) i; |
---|
630 | for (i = (long) pos + 1; i < (long) prof_len; i++) |
---|
631 | switch ((*helix_borders)[i]) { |
---|
632 | case ALI_PROFILE_BORDER_LEFT: |
---|
633 | ali_warning("Helix borders inconsistent (3)"); |
---|
634 | printf("pos = %ld\n",pos); |
---|
635 | return 0; |
---|
636 | case ALI_PROFILE_BORDER_RIGHT: |
---|
637 | *last = (unsigned long) i; |
---|
638 | return 1; |
---|
639 | } |
---|
640 | } |
---|
641 | } |
---|
642 | return 0; |
---|
643 | } |
---|
644 | |
---|
645 | /* |
---|
646 | * test, whether a position is outside a helix |
---|
647 | */ |
---|
648 | int ALI_PROFILE::is_outside_helix(unsigned long pos, |
---|
649 | unsigned long *first, unsigned long *last) { |
---|
650 | long i; |
---|
651 | |
---|
652 | switch ((*helix_borders)[pos]) { |
---|
653 | case ALI_PROFILE_BORDER_LEFT: |
---|
654 | return 0; |
---|
655 | case ALI_PROFILE_BORDER_RIGHT: |
---|
656 | return 0; |
---|
657 | default: |
---|
658 | for (i = (long) pos - 1; i >= 0; i--) |
---|
659 | switch ((*helix_borders)[i]) { |
---|
660 | case ALI_PROFILE_BORDER_LEFT: |
---|
661 | return 0; |
---|
662 | case ALI_PROFILE_BORDER_RIGHT: |
---|
663 | *first = (unsigned long) i + 1; |
---|
664 | for (i = (long) pos + 1; i < (long) prof_len; i++) |
---|
665 | switch ((*helix_borders)[i]) { |
---|
666 | case ALI_PROFILE_BORDER_LEFT: |
---|
667 | *last = (unsigned long) i - 1; |
---|
668 | return 1; |
---|
669 | case ALI_PROFILE_BORDER_RIGHT: |
---|
670 | ali_warning("Helix borders inconsistent [1]"); |
---|
671 | return 0; |
---|
672 | } |
---|
673 | *last = prof_len - 1; |
---|
674 | return 1; |
---|
675 | } |
---|
676 | *first = 0; |
---|
677 | for (i = (long) pos + 1; i < (long) prof_len; i++) |
---|
678 | switch ((*helix_borders)[i]) { |
---|
679 | case ALI_PROFILE_BORDER_LEFT: |
---|
680 | *last = (unsigned long) i - 1; |
---|
681 | return 1; |
---|
682 | case ALI_PROFILE_BORDER_RIGHT: |
---|
683 | ali_warning("Helix borders inconsistent [2]"); |
---|
684 | return 0; |
---|
685 | } |
---|
686 | *last = prof_len - 1; |
---|
687 | return 1; |
---|
688 | } |
---|
689 | } |
---|
690 | |
---|
691 | |
---|
692 | /* |
---|
693 | * generate a 'consensus sequence' |
---|
694 | */ |
---|
695 | char *ALI_PROFILE::cheapest_sequence(void) |
---|
696 | { |
---|
697 | |
---|
698 | char *seq; |
---|
699 | size_t p; |
---|
700 | int i, min_i; |
---|
701 | float min; |
---|
702 | |
---|
703 | |
---|
704 | seq = (char *) CALLOC((unsigned int) prof_len + 1, sizeof(char)); |
---|
705 | if (seq == 0) |
---|
706 | ali_fatal_error("Out of memory"); |
---|
707 | |
---|
708 | for (p = 0; p < prof_len; p++) { |
---|
709 | min = (*sub_costs)[p][0]; |
---|
710 | min_i = 0; |
---|
711 | for (i = 1; i < 5; i++) { |
---|
712 | if (min > (*sub_costs)[p][i]) { |
---|
713 | min = (*sub_costs)[p][i]; |
---|
714 | min_i = i; |
---|
715 | } |
---|
716 | else { |
---|
717 | if (min == (*sub_costs)[p][i]) |
---|
718 | min_i = -1; |
---|
719 | } |
---|
720 | } |
---|
721 | if (min_i >= 0) |
---|
722 | seq[p] = ali_number_to_base(min_i); |
---|
723 | else { |
---|
724 | if (min > 0) |
---|
725 | seq[p] = '*'; |
---|
726 | else |
---|
727 | seq[p] = '.'; |
---|
728 | } |
---|
729 | } |
---|
730 | seq[prof_len] = '\0'; |
---|
731 | |
---|
732 | return seq; |
---|
733 | } |
---|
734 | |
---|
735 | /* |
---|
736 | * calculate the costs of a binding |
---|
737 | */ |
---|
738 | float ALI_PROFILE::w_binding(unsigned long first_seq_pos, |
---|
739 | ALI_SEQUENCE *seq) |
---|
740 | { |
---|
741 | unsigned long pos_1_seq, pos_2_seq, last_seq_pos; |
---|
742 | long pos_compl; |
---|
743 | float costs = 0; |
---|
744 | |
---|
745 | last_seq_pos = first_seq_pos + seq->length() - 1; |
---|
746 | for (pos_1_seq = first_seq_pos; pos_1_seq <= last_seq_pos; pos_1_seq++) { |
---|
747 | pos_compl = (*helix)[pos_1_seq]; |
---|
748 | if (pos_compl >= 0) { |
---|
749 | pos_2_seq = (unsigned long) pos_compl; |
---|
750 | if (pos_2_seq > pos_1_seq && pos_2_seq <= last_seq_pos) |
---|
751 | costs += w_bind(pos_1_seq, seq->base(pos_1_seq), |
---|
752 | pos_2_seq, seq->base(pos_2_seq)); |
---|
753 | else |
---|
754 | if (pos_2_seq < first_seq_pos || pos_2_seq > last_seq_pos) |
---|
755 | costs += w_bind_maximum; |
---|
756 | } |
---|
757 | } |
---|
758 | |
---|
759 | return costs; |
---|
760 | } |
---|
761 | |
---|
762 | |
---|