1 | #include <cstdio> |
---|
2 | #include <cstdlib> |
---|
3 | #include <cstring> |
---|
4 | #include <climits> |
---|
5 | |
---|
6 | #include <PT_server.h> |
---|
7 | #include <PT_server_prototypes.h> |
---|
8 | #include <struct_man.h> |
---|
9 | #include "probe.h" |
---|
10 | #include "probe_tree.hxx" |
---|
11 | #include <arbdbt.h> |
---|
12 | #include <inline.h> |
---|
13 | |
---|
14 | #ifdef P_ |
---|
15 | #error P_ already defined |
---|
16 | #endif |
---|
17 | |
---|
18 | #include "pt_prototypes.h" |
---|
19 | |
---|
20 | // overloaded functions to avoid problems with type-punning: |
---|
21 | inline void aisc_link(dll_public *dll, PT_tprobes *tprobe) { aisc_link(reinterpret_cast<dllpublic_ext*>(dll), reinterpret_cast<dllheader_ext*>(tprobe)); } |
---|
22 | inline void aisc_link(dll_public *dll, PT_probeparts *parts) { aisc_link(reinterpret_cast<dllpublic_ext*>(dll), reinterpret_cast<dllheader_ext*>(parts)); } |
---|
23 | inline void aisc_link(dll_public *dll, PT_probematch *match) { aisc_link(reinterpret_cast<dllpublic_ext*>(dll), reinterpret_cast<dllheader_ext*>(match)); } |
---|
24 | |
---|
25 | extern "C" { |
---|
26 | int pt_init_bond_matrix(PT_pdc *THIS) |
---|
27 | { |
---|
28 | THIS->bond[0].val = 0.0; |
---|
29 | THIS->bond[1].val = 0.0; |
---|
30 | THIS->bond[2].val = 0.5; |
---|
31 | THIS->bond[3].val = 1.1; |
---|
32 | THIS->bond[4].val = 0.0; |
---|
33 | THIS->bond[5].val = 0.0; |
---|
34 | THIS->bond[6].val = 1.5; |
---|
35 | THIS->bond[7].val = 0.0; |
---|
36 | THIS->bond[8].val = 0.5; |
---|
37 | THIS->bond[9].val = 1.5; |
---|
38 | THIS->bond[10].val = 0.4; |
---|
39 | THIS->bond[11].val = 0.9; |
---|
40 | THIS->bond[12].val = 1.1; |
---|
41 | THIS->bond[13].val = 0.0; |
---|
42 | THIS->bond[14].val = 0.9; |
---|
43 | THIS->bond[15].val = 0.0; |
---|
44 | |
---|
45 | return 0; |
---|
46 | } |
---|
47 | } |
---|
48 | struct ptnd_loop_com { |
---|
49 | PT_pdc *pdc; |
---|
50 | PT_local *locs; |
---|
51 | PT_probeparts *parts; |
---|
52 | int mishits; |
---|
53 | int new_match; /* match or design the probe: 1 match 0 design */ |
---|
54 | double sum_bonds; /* sum of bond of longest non mismatch string */ |
---|
55 | double dt; /* sum of mismatches */ |
---|
56 | } ptnd; |
---|
57 | |
---|
58 | |
---|
59 | static int ptnd_compare_quality(const void *PT_tprobes_ptr1, const void *PT_tprobes_ptr2, void *) { |
---|
60 | const PT_tprobes *tprobe1 = (const PT_tprobes*)PT_tprobes_ptr1; |
---|
61 | const PT_tprobes *tprobe2 = (const PT_tprobes*)PT_tprobes_ptr2; |
---|
62 | |
---|
63 | // sort best quality first |
---|
64 | if (tprobe1->quality < tprobe2->quality) return 1; |
---|
65 | if (tprobe1->quality > tprobe2->quality) return -1; |
---|
66 | return 0; |
---|
67 | } |
---|
68 | |
---|
69 | static int ptnd_compare_sequence(const void *PT_tprobes_ptr1, const void *PT_tprobes_ptr2, void*) { |
---|
70 | const PT_tprobes *tprobe1 = (const PT_tprobes*)PT_tprobes_ptr1; |
---|
71 | const PT_tprobes *tprobe2 = (const PT_tprobes*)PT_tprobes_ptr2; |
---|
72 | |
---|
73 | return strcmp(tprobe1->sequence,tprobe2->sequence); |
---|
74 | } |
---|
75 | |
---|
76 | static void ptnd_sort_probes_by(PT_pdc *pdc, int mode) /* mode 0 quality, mode 1 sequence */ |
---|
77 | { |
---|
78 | PT_tprobes **my_list; |
---|
79 | int list_len; |
---|
80 | PT_tprobes *tprobe; |
---|
81 | int i; |
---|
82 | |
---|
83 | if (!pdc->tprobes) return; |
---|
84 | list_len = get_TPROBE_CNT(pdc->tprobes); |
---|
85 | if (list_len <= 1) return; |
---|
86 | my_list = (PT_tprobes **)calloc(sizeof(void *),list_len); |
---|
87 | for (i=0, tprobe = pdc->tprobes; |
---|
88 | tprobe; |
---|
89 | i++,tprobe=tprobe->next) |
---|
90 | { |
---|
91 | my_list[i] = tprobe; |
---|
92 | } |
---|
93 | switch (mode) { |
---|
94 | case 0: |
---|
95 | GB_sort((void **)my_list, 0, list_len, ptnd_compare_quality, 0); |
---|
96 | break; |
---|
97 | case 1: |
---|
98 | GB_sort((void **)my_list, 0, list_len, ptnd_compare_sequence, 0); |
---|
99 | break; |
---|
100 | } |
---|
101 | |
---|
102 | for (i=0;i<list_len;i++) { |
---|
103 | aisc_unlink(reinterpret_cast<dllheader_ext*>(my_list[i])); |
---|
104 | aisc_link(&pdc->ptprobes, my_list[i]); |
---|
105 | } |
---|
106 | free((char *)my_list); |
---|
107 | } |
---|
108 | static void ptnd_probe_delete_all_but(PT_pdc *pdc, int count) |
---|
109 | { |
---|
110 | PT_tprobes *tprobe; |
---|
111 | int i; |
---|
112 | |
---|
113 | for (i=0,tprobe = pdc->tprobes; |
---|
114 | tprobe && i< count; |
---|
115 | i++,tprobe = tprobe->next ) ; |
---|
116 | |
---|
117 | if (tprobe) { |
---|
118 | while(tprobe->next) { |
---|
119 | destroy_PT_tprobes( tprobe->next ); |
---|
120 | } |
---|
121 | } |
---|
122 | } |
---|
123 | static int pt_get_gc_content(char *probe) |
---|
124 | { |
---|
125 | int gc = 0; |
---|
126 | while (*probe){ |
---|
127 | if ( *probe == PT_G || |
---|
128 | *probe == PT_C) { |
---|
129 | gc++; |
---|
130 | } |
---|
131 | probe ++; |
---|
132 | } |
---|
133 | return gc; |
---|
134 | } |
---|
135 | static double pt_get_temperature(const char *probe) |
---|
136 | { |
---|
137 | int i; |
---|
138 | double t = 0; |
---|
139 | while (( i=*(probe++) )) { |
---|
140 | if (i == PT_G || i == PT_C) t+=4.0;else t+= 2.0; |
---|
141 | } |
---|
142 | return t; |
---|
143 | } |
---|
144 | |
---|
145 | #if 0 |
---|
146 | int ptnd_check_pure(char *probe) |
---|
147 | { |
---|
148 | int i; |
---|
149 | while (( i=*(probe++) )) { |
---|
150 | if ( i < PT_A || i > PT_T) return 1; |
---|
151 | } |
---|
152 | return 0; |
---|
153 | } |
---|
154 | #endif |
---|
155 | |
---|
156 | static void ptnd_calc_quality(PT_pdc *pdc) { |
---|
157 | PT_tprobes *tprobe; |
---|
158 | int i; |
---|
159 | |
---|
160 | for (tprobe = pdc->tprobes; |
---|
161 | tprobe; |
---|
162 | tprobe = tprobe->next) |
---|
163 | { |
---|
164 | for (i=0; i< PERC_SIZE-1; i++) { |
---|
165 | if (tprobe->perc[i] > tprobe->mishit) break; |
---|
166 | } |
---|
167 | tprobe->quality = ((double)tprobe->groupsize * i) + 1000.0/(1000.0 + tprobe->perc[i]) ; |
---|
168 | } |
---|
169 | } |
---|
170 | /*********************************************************************** |
---|
171 | check the bond val for a probe |
---|
172 | ***********************************************************************/ |
---|
173 | static double ptnd_check_max_bond( PT_pdc *pdc, char base) |
---|
174 | { |
---|
175 | int complement = PT_complement(base); |
---|
176 | return pdc->bond[ (complement-(int)PT_A)*4 + base-(int)PT_A].val; |
---|
177 | } |
---|
178 | |
---|
179 | double ptnd_check_split(PT_pdc *pdc, char *probe, int pos, char ref) { |
---|
180 | int base = probe[pos]; |
---|
181 | int complement = PT_complement(base); |
---|
182 | double max_bind = pdc->bond[ (complement-(int)PT_A)*4 + base-(int)PT_A].val; |
---|
183 | double new_bind = pdc->bond[ (complement-(int)PT_A)*4 + ref-(int)PT_A].val; |
---|
184 | |
---|
185 | if ( new_bind < pdc->split) |
---|
186 | return new_bind-max_bind; /* negative values indicate split */ |
---|
187 | /* this mismatch splits the probe in several domains */ |
---|
188 | return (max_bind - new_bind); |
---|
189 | } |
---|
190 | |
---|
191 | |
---|
192 | /*********************************************************************** |
---|
193 | primary search for probes |
---|
194 | ***********************************************************************/ |
---|
195 | /* count all mishits for a probe */ |
---|
196 | |
---|
197 | struct ptnd_chain_count_mishits { |
---|
198 | int operator()(int name, int apos, int rpos) { |
---|
199 | char *probe = psg.probe; |
---|
200 | psg.abs_pos.announce(apos); |
---|
201 | if (psg.data[name].is_group) return 0; /* dont count group or neverminds */ |
---|
202 | if (probe) { |
---|
203 | rpos+=psg.height; |
---|
204 | while (*probe && psg.data[name].data[rpos]) { |
---|
205 | if (psg.data[name].data[rpos] != *(probe)) |
---|
206 | return 0; |
---|
207 | probe++; rpos++; |
---|
208 | } |
---|
209 | } |
---|
210 | ptnd.mishits++; |
---|
211 | return 0; |
---|
212 | } |
---|
213 | }; |
---|
214 | |
---|
215 | /* go down the tree to chains and leafs; count the species that are in the non member group */ |
---|
216 | static int ptnd_count_mishits2(POS_TREE *pt) |
---|
217 | { |
---|
218 | int base; |
---|
219 | int name; |
---|
220 | int mishits = 0; |
---|
221 | |
---|
222 | if (pt == NULL) |
---|
223 | return 0; |
---|
224 | if (PT_read_type(pt) == PT_NT_LEAF) { |
---|
225 | name = PT_read_name(psg.ptmain,pt); |
---|
226 | int apos = PT_read_apos(psg.ptmain,pt); |
---|
227 | psg.abs_pos.announce(apos); |
---|
228 | if (!psg.data[name].is_group) return 1; |
---|
229 | return 0; |
---|
230 | }else if (PT_read_type(pt) == PT_NT_CHAIN) { |
---|
231 | psg.probe = 0; |
---|
232 | ptnd.mishits = 0; |
---|
233 | PT_read_chain(psg.ptmain,pt, ptnd_chain_count_mishits()); |
---|
234 | return ptnd.mishits; |
---|
235 | }else{ |
---|
236 | for (base = PT_QU; base< PT_B_MAX; base++) { |
---|
237 | mishits += ptnd_count_mishits2(PT_read_son(psg.ptmain,pt,(PT_BASES)base)); |
---|
238 | } |
---|
239 | return mishits; |
---|
240 | } |
---|
241 | } |
---|
242 | extern "C" char *get_design_info(PT_tprobes *tprobe) |
---|
243 | { |
---|
244 | char *buffer = (char *)GB_give_buffer(2000); |
---|
245 | char *probe = (char *)GB_give_buffer2(tprobe->seq_len + 10); |
---|
246 | PT_pdc *pdc = (PT_pdc *)tprobe->mh.parent->parent; |
---|
247 | char *p; |
---|
248 | int i; |
---|
249 | int sum; |
---|
250 | |
---|
251 | p = buffer; |
---|
252 | |
---|
253 | // target |
---|
254 | strcpy(probe,tprobe->sequence); |
---|
255 | PT_base_2_string(probe,0); /* convert probe to real ASCII */ |
---|
256 | sprintf(p, "%-*s", pdc->probelen+1, probe); |
---|
257 | p += strlen(p); |
---|
258 | |
---|
259 | int apos = tprobe->apos; |
---|
260 | int c = 0; |
---|
261 | int cs = '='; |
---|
262 | { // search nearest hit |
---|
263 | for (c=0;c<ALPHA_SIZE;c++) { |
---|
264 | if (!pdc->pos_groups[c]) { // new group |
---|
265 | pdc->pos_groups[c] = tprobe->apos; |
---|
266 | break; |
---|
267 | } |
---|
268 | int dist = tprobe->apos - pdc->pos_groups[c]; |
---|
269 | if (dist<0) dist = -dist; |
---|
270 | if ( dist < pdc->probelen) { |
---|
271 | apos = tprobe->apos - pdc->pos_groups[c]; |
---|
272 | if (apos > 0) { |
---|
273 | cs = '+'; |
---|
274 | }else{ |
---|
275 | apos = -apos; cs = '-'; |
---|
276 | } |
---|
277 | break; |
---|
278 | } |
---|
279 | } |
---|
280 | } |
---|
281 | |
---|
282 | // le apos ecol |
---|
283 | sprintf(p,"%2i %c%c%6i %4li ", tprobe->seq_len, c+'A', cs, apos, PT_abs_2_rel(tprobe->apos)); |
---|
284 | p += strlen(p); |
---|
285 | |
---|
286 | // grps |
---|
287 | sprintf(p,"%4i ",tprobe->groupsize); |
---|
288 | p += strlen(p); |
---|
289 | |
---|
290 | // G+C |
---|
291 | sprintf(p,"%-4.1f ",((double)pt_get_gc_content(tprobe->sequence))/tprobe->seq_len*100.0); |
---|
292 | p += strlen(p); |
---|
293 | |
---|
294 | // 4GC+2AT |
---|
295 | sprintf(p,"%-7.1f ", pt_get_temperature(tprobe->sequence)); |
---|
296 | p += strlen(p); |
---|
297 | |
---|
298 | // probe string |
---|
299 | probe = reverse_probe(tprobe->sequence,0); |
---|
300 | complement_probe(probe,0); |
---|
301 | PT_base_2_string(probe,0); /* convert probe to real ASCII */ |
---|
302 | sprintf(p,"%-*s |", pdc->probelen, probe); |
---|
303 | free(probe); |
---|
304 | p += strlen(p); |
---|
305 | |
---|
306 | // non-group hits by temp. decrease |
---|
307 | for (sum=i=0;i<PERC_SIZE;i++) { |
---|
308 | sum+= tprobe->perc[i]; |
---|
309 | sprintf(p,"%3i,",sum); |
---|
310 | p += strlen(p); |
---|
311 | } |
---|
312 | |
---|
313 | return buffer; |
---|
314 | } |
---|
315 | |
---|
316 | |
---|
317 | extern "C" char *get_design_hinfo(PT_tprobes *tprobe) { |
---|
318 | char *buffer = (char *)GB_give_buffer(2000); |
---|
319 | char *s = buffer; |
---|
320 | PT_pdc *pdc; |
---|
321 | |
---|
322 | if (!tprobe) { |
---|
323 | return (char*)"Sorry, there are no probes for your selection !!!"; |
---|
324 | } |
---|
325 | pdc = (PT_pdc *)tprobe->mh.parent->parent; |
---|
326 | sprintf(buffer, |
---|
327 | "Probe design Parameters:\n" |
---|
328 | "Length of probe %4i\n" |
---|
329 | "Temperature [%4.1f -%4.1f]\n" |
---|
330 | "GC-Content [%4.1f -%4.1f]\n" |
---|
331 | "E.Coli Position [%4i -%4i]\n" |
---|
332 | "Max Non Group Hits %4i\n" |
---|
333 | "Min Group Hits %4.0f%%\n", |
---|
334 | pdc->probelen, |
---|
335 | pdc->mintemp,pdc->maxtemp, |
---|
336 | pdc->min_gc*100.0,pdc->max_gc*100.0, |
---|
337 | pdc->minpos, pdc->maxpos, |
---|
338 | pdc->mishit, pdc->mintarget*100.0); |
---|
339 | |
---|
340 | s += strlen(s); |
---|
341 | |
---|
342 | sprintf(s,"%-*s", pdc->probelen+1, "Target"); |
---|
343 | s += strlen(s); |
---|
344 | |
---|
345 | sprintf(s,"%2s %8s %4s ","le","apos","ecol"); |
---|
346 | s += strlen(s); |
---|
347 | |
---|
348 | sprintf(s,"%4s ","grps"); // groupsize |
---|
349 | s += strlen(s); |
---|
350 | |
---|
351 | sprintf(s,"%-4s %-7s %-*s | "," G+C","4GC+2AT", pdc->probelen, "Probe sequence"); |
---|
352 | s += strlen(s); |
---|
353 | |
---|
354 | sprintf(s, "Decrease T by n*.3C -> probe matches n non group species"); |
---|
355 | // s += strlen(s); |
---|
356 | |
---|
357 | return buffer; |
---|
358 | } |
---|
359 | |
---|
360 | /* search down the tree to find matching species for the given probe */ |
---|
361 | static int ptnd_count_mishits(char *probe, POS_TREE *pt,int height) |
---|
362 | { |
---|
363 | int name; |
---|
364 | int i; |
---|
365 | POS_TREE *pthelp; |
---|
366 | int pos; |
---|
367 | int mishits; |
---|
368 | |
---|
369 | if (!pt) return 0; |
---|
370 | mishits = 0; |
---|
371 | if (PT_read_type(pt) == PT_NT_NODE && *probe) { |
---|
372 | for (i=PT_A; i<PT_B_MAX; i++) { |
---|
373 | if (i!= *probe) continue; |
---|
374 | if (( pthelp = PT_read_son(psg.ptmain,pt, (PT_BASES)i) )) |
---|
375 | mishits += ptnd_count_mishits(probe+1,pthelp, height+1); |
---|
376 | } |
---|
377 | return mishits; |
---|
378 | } |
---|
379 | if (*probe) { |
---|
380 | if (PT_read_type(pt) == PT_NT_LEAF) { |
---|
381 | pos = PT_read_rpos(psg.ptmain,pt) + height; |
---|
382 | name = PT_read_name(psg.ptmain,pt); |
---|
383 | if (pos + (int)(strlen(probe)) >= psg.data[name].size) /* after end */ |
---|
384 | return 0; |
---|
385 | |
---|
386 | while (*probe) { |
---|
387 | if (psg.data[name].data[pos++] != *(probe++)) |
---|
388 | return 0; |
---|
389 | } |
---|
390 | } else { /* chain */ |
---|
391 | psg.probe = probe; |
---|
392 | psg.height = height; |
---|
393 | ptnd.mishits = 0; |
---|
394 | PT_read_chain(psg.ptmain,pt, ptnd_chain_count_mishits()); |
---|
395 | return ptnd.mishits; |
---|
396 | } |
---|
397 | } |
---|
398 | return ptnd_count_mishits2(pt); |
---|
399 | } |
---|
400 | |
---|
401 | static void ptnd_first_check(PT_pdc *pdc) /* checks the direkt mishits */ |
---|
402 | { |
---|
403 | PT_tprobes *tprobe, *tprobe_next; |
---|
404 | for ( tprobe = pdc->tprobes; |
---|
405 | tprobe; |
---|
406 | tprobe = tprobe_next ) { |
---|
407 | tprobe_next = tprobe->next; |
---|
408 | psg.abs_pos.clear(); |
---|
409 | tprobe->mishit = ptnd_count_mishits(tprobe->sequence,psg.pt,0); |
---|
410 | tprobe->apos = psg.abs_pos.get_most_used(); |
---|
411 | if (tprobe->mishit > pdc->mishit) { |
---|
412 | destroy_PT_tprobes(tprobe); |
---|
413 | } |
---|
414 | } |
---|
415 | psg.abs_pos.clear(); |
---|
416 | } |
---|
417 | /*********************************************************************** |
---|
418 | Check the probes Position |
---|
419 | ***********************************************************************/ |
---|
420 | |
---|
421 | static void ptnd_check_position(PT_pdc *pdc) /* checks the direkt mishits */ |
---|
422 | { |
---|
423 | PT_tprobes *tprobe, *tprobe_next; |
---|
424 | if ( pdc->minpos == pdc->maxpos) return; |
---|
425 | |
---|
426 | for ( tprobe = pdc->tprobes; |
---|
427 | tprobe; |
---|
428 | tprobe = tprobe_next ) { |
---|
429 | tprobe_next = tprobe->next; |
---|
430 | long relpos = PT_abs_2_rel(tprobe->apos); |
---|
431 | if (relpos < pdc->minpos || relpos > pdc->maxpos) { |
---|
432 | destroy_PT_tprobes(tprobe); |
---|
433 | } |
---|
434 | } |
---|
435 | } |
---|
436 | /*********************************************************************** |
---|
437 | check the average bond size |
---|
438 | ***********************************************************************/ |
---|
439 | static void ptnd_check_bonds(PT_pdc *pdc, int match) /* checks probe hairpin bonds */ |
---|
440 | { |
---|
441 | PT_tprobes *tprobe, *tprobe_next; |
---|
442 | int i; |
---|
443 | double sbond; |
---|
444 | for ( tprobe = pdc->tprobes; |
---|
445 | tprobe; |
---|
446 | tprobe = tprobe_next ) { |
---|
447 | tprobe_next = tprobe->next; |
---|
448 | tprobe->seq_len = strlen(tprobe->sequence); |
---|
449 | sbond = 0.0; |
---|
450 | for (i=0;i<tprobe->seq_len;i++) { |
---|
451 | sbond += ptnd_check_max_bond(pdc,tprobe->sequence[i]); |
---|
452 | } |
---|
453 | tprobe->sum_bonds = sbond; |
---|
454 | } |
---|
455 | match = match; |
---|
456 | } |
---|
457 | /*********************************************************************** |
---|
458 | split the probes in probeparts |
---|
459 | ***********************************************************************/ |
---|
460 | static void ptnd_cp_tprobe_2_probepart(PT_pdc *pdc) |
---|
461 | { |
---|
462 | PT_tprobes *tprobe; |
---|
463 | PT_probeparts *parts; |
---|
464 | int pos; |
---|
465 | int probelen; |
---|
466 | |
---|
467 | while (pdc->parts) destroy_PT_probeparts(pdc->parts); |
---|
468 | for ( tprobe = pdc->tprobes; |
---|
469 | tprobe; |
---|
470 | tprobe = tprobe->next ) { |
---|
471 | probelen = strlen(tprobe->sequence); |
---|
472 | probelen -= DOMAIN_MIN_LENGTH; |
---|
473 | for (pos = 0; pos < probelen; pos ++ ) { |
---|
474 | parts = create_PT_probeparts(); |
---|
475 | parts->sequence = strdup(tprobe->sequence+pos); |
---|
476 | parts->source = tprobe; |
---|
477 | parts->start = pos; |
---|
478 | aisc_link(&pdc->pparts, parts); |
---|
479 | } |
---|
480 | } |
---|
481 | } |
---|
482 | static void ptnd_duplikate_probepart_rek(PT_pdc *pdc, char *insequence, int deep, double dt,double sum_bonds, PT_probeparts *parts) |
---|
483 | { |
---|
484 | PT_probeparts *newparts; |
---|
485 | int base; |
---|
486 | int i; |
---|
487 | double max_bind; |
---|
488 | double split; |
---|
489 | double ndt,nsum_bonds; |
---|
490 | char *sequence; |
---|
491 | |
---|
492 | sequence = strdup(insequence); |
---|
493 | if (deep >= PT_PART_DEEP) { /* now do it */ |
---|
494 | newparts = create_PT_probeparts(); |
---|
495 | newparts->sequence = sequence; |
---|
496 | newparts->source = parts->source; |
---|
497 | newparts->dt = dt; |
---|
498 | newparts->start = parts->start; |
---|
499 | newparts->sum_bonds = sum_bonds; |
---|
500 | aisc_link(&pdc->pdparts, newparts); |
---|
501 | return; |
---|
502 | } |
---|
503 | base = sequence[deep]; |
---|
504 | max_bind = ptnd_check_max_bond(pdc, base); |
---|
505 | for (i = PT_A; i <=PT_T; i++) { |
---|
506 | sequence[deep] = i; |
---|
507 | if ( (split = ptnd_check_split(pdc, insequence, deep, i)) < 0.0) continue; |
---|
508 | /* this mismatch splits the probe in several domains */ |
---|
509 | ndt = split + dt; |
---|
510 | nsum_bonds = sum_bonds+max_bind-split; |
---|
511 | ptnd_duplikate_probepart_rek(pdc,sequence,deep+1,ndt,nsum_bonds,parts); |
---|
512 | } |
---|
513 | free(sequence); |
---|
514 | } |
---|
515 | static void ptnd_duplikate_probepart(PT_pdc *pdc) |
---|
516 | { |
---|
517 | PT_probeparts *parts; |
---|
518 | for (parts = pdc->parts; parts; parts = parts->next) |
---|
519 | ptnd_duplikate_probepart_rek(pdc,parts->sequence,0,parts->dt,0.0, parts); |
---|
520 | |
---|
521 | while (( parts = pdc->parts )) |
---|
522 | destroy_PT_probeparts(parts); /* delete the source */ |
---|
523 | |
---|
524 | while (( parts = pdc->dparts )) |
---|
525 | { |
---|
526 | aisc_unlink((struct_dllheader_ext*)parts); |
---|
527 | aisc_link(&pdc->pparts, parts); |
---|
528 | } |
---|
529 | } |
---|
530 | /*********************************************************************** |
---|
531 | sort the parts and check for duplicated parts |
---|
532 | ***********************************************************************/ |
---|
533 | |
---|
534 | static int ptnd_compare_parts(const void *PT_probeparts_ptr1, const void *PT_probeparts_ptr2, void*) { |
---|
535 | const PT_probeparts *tprobe1 = (const PT_probeparts*)PT_probeparts_ptr1; |
---|
536 | const PT_probeparts *tprobe2 = (const PT_probeparts*)PT_probeparts_ptr2; |
---|
537 | |
---|
538 | return strcmp(tprobe1->sequence,tprobe2->sequence); |
---|
539 | } |
---|
540 | |
---|
541 | static void ptnd_sort_parts(PT_pdc *pdc) |
---|
542 | { |
---|
543 | PT_probeparts **my_list; |
---|
544 | int list_len; |
---|
545 | PT_probeparts *tprobe; |
---|
546 | int i; |
---|
547 | |
---|
548 | if (!pdc->parts) return; |
---|
549 | list_len = get_TPROBEPARTS_CNT(pdc->parts); |
---|
550 | if (list_len <= 1) return; |
---|
551 | my_list = (PT_probeparts **)calloc(sizeof(void *),list_len); |
---|
552 | for ( i=0, tprobe = pdc->parts; |
---|
553 | tprobe; |
---|
554 | i++,tprobe=tprobe->next ) { |
---|
555 | my_list[i] = tprobe; |
---|
556 | } |
---|
557 | GB_sort((void **)my_list, 0, list_len, ptnd_compare_parts, 0); |
---|
558 | |
---|
559 | for (i=0;i<list_len;i++) { |
---|
560 | aisc_unlink((struct_dllheader_ext*)my_list[i]); |
---|
561 | aisc_link(&pdc->pparts, my_list[i]); |
---|
562 | } |
---|
563 | free((char *)my_list); |
---|
564 | } |
---|
565 | static void ptnd_remove_duplikated_probepart(PT_pdc *pdc) |
---|
566 | { |
---|
567 | PT_probeparts *parts, *parts_next; |
---|
568 | for ( parts = pdc->parts; |
---|
569 | parts; |
---|
570 | parts = parts_next) { |
---|
571 | parts_next = parts->next; |
---|
572 | if (parts_next) { |
---|
573 | if ( (parts->source== parts_next->source) && !strcmp(parts->sequence, parts_next->sequence)) { /* equal sequence */ |
---|
574 | if (parts->dt < parts_next->dt) { /* delete higher dt */ |
---|
575 | destroy_PT_probeparts(parts_next); |
---|
576 | parts_next = parts; |
---|
577 | }else{ |
---|
578 | destroy_PT_probeparts(parts); |
---|
579 | } |
---|
580 | } |
---|
581 | } |
---|
582 | } |
---|
583 | } |
---|
584 | /*********************************************************************** |
---|
585 | test the probe parts, search the longest non mismatch string |
---|
586 | ***********************************************************************/ |
---|
587 | static void ptnd_check_part_inc_dt(PT_pdc *pdc, PT_probeparts *parts, |
---|
588 | int name, int apos, int rpos, |
---|
589 | double dt,double sum_bonds) |
---|
590 | { |
---|
591 | PT_tprobes *tprobe = parts->source; |
---|
592 | double ndt; |
---|
593 | int pos; |
---|
594 | int start; |
---|
595 | double h; |
---|
596 | char *probe; |
---|
597 | int split; |
---|
598 | |
---|
599 | if (( start = parts->start )) { /* look backwards */ |
---|
600 | probe = parts->source->sequence; |
---|
601 | pos = rpos-1; |
---|
602 | start--; /* test the base left of start */ |
---|
603 | split = 0; |
---|
604 | while (start>=0) { |
---|
605 | if (pos<0) break; /* out of sight */ |
---|
606 | h = ptnd_check_split(ptnd.pdc, probe, start,psg.data[name].data[pos]); |
---|
607 | if (h>0.0 && !split) return; /* there is a longer part matching this */ |
---|
608 | dt -= h; |
---|
609 | start --; |
---|
610 | pos --; |
---|
611 | split = 1; |
---|
612 | } |
---|
613 | } |
---|
614 | ndt = (dt * pdc->dt + (tprobe->sum_bonds - sum_bonds)*pdc->dte ) / tprobe->seq_len; |
---|
615 | pos = (int)ndt; |
---|
616 | |
---|
617 | pt_assert(pos >= 0); |
---|
618 | |
---|
619 | if (pos >= PERC_SIZE) return; /* out of observation */ |
---|
620 | tprobe->perc[pos] ++; |
---|
621 | if (ptnd.new_match) { /* save the result in probematch */ |
---|
622 | PT_probematch *match; |
---|
623 | if (psg.data[name].match) { |
---|
624 | if (psg.data[name].match->dt < ndt) return; |
---|
625 | /* there is a better hit for that sequence */ |
---|
626 | match = psg.data[name].match; |
---|
627 | }else{ |
---|
628 | match = create_PT_probematch(); |
---|
629 | aisc_link(&ptnd.locs->ppm, match); |
---|
630 | psg.data[name].match = match; |
---|
631 | } |
---|
632 | match->name = name; |
---|
633 | match->b_pos = apos - parts->start; /* thats not correct !!! */ |
---|
634 | match->rpos = rpos-parts->start; |
---|
635 | match->N_mismatches = -1; /* there are no mismatches in this mode */ |
---|
636 | match->mismatches = -1; |
---|
637 | match->wmismatches = dt; /* only weighted mismatches (maybe) */ |
---|
638 | match->dt = ndt; |
---|
639 | match->sequence = psg.main_probe; |
---|
640 | match->reversed = psg.reversed; |
---|
641 | } |
---|
642 | } |
---|
643 | static int ptnd_check_inc_mode(PT_pdc *pdc ,PT_probeparts *parts,double dt,double sum_bonds) |
---|
644 | { |
---|
645 | PT_tprobes *tprobe = parts->source; |
---|
646 | double ndt = (dt * pdc->dt + (tprobe->sum_bonds - sum_bonds)*pdc->dte ) / tprobe->seq_len; |
---|
647 | int pos = (int)ndt; |
---|
648 | |
---|
649 | pt_assert(pos >= 0); |
---|
650 | |
---|
651 | if (pos >= PERC_SIZE) return 1; /* out of observation */ |
---|
652 | return 0; |
---|
653 | } |
---|
654 | |
---|
655 | struct ptnd_chain_check_part { |
---|
656 | ptnd_chain_check_part(int s) : split(s) {} |
---|
657 | int split; |
---|
658 | int operator() (int name, int apos, int rpos) |
---|
659 | { |
---|
660 | char *probe = psg.probe; |
---|
661 | int height = psg.height; |
---|
662 | double sbond = ptnd.sum_bonds; |
---|
663 | double dt = ptnd.dt; |
---|
664 | double h = 1.0; |
---|
665 | int pos; |
---|
666 | int base; |
---|
667 | |
---|
668 | if (!ptnd.new_match && psg.data[name].is_group) return 0; /* dont count group or neverminds */ |
---|
669 | if (probe) { |
---|
670 | pos = rpos+psg.height; |
---|
671 | while (probe[height] && (base = psg.data[name].data[pos])) { |
---|
672 | if (!split && (h = ptnd_check_split(ptnd.pdc, probe, height, |
---|
673 | base) < 0.0) ) { |
---|
674 | dt -= h; |
---|
675 | split = 1; |
---|
676 | }else{ |
---|
677 | dt += h; |
---|
678 | sbond += ptnd_check_max_bond(ptnd.pdc,probe[height]) - h; |
---|
679 | } |
---|
680 | height++; pos++; |
---|
681 | } |
---|
682 | } |
---|
683 | ptnd_check_part_inc_dt(ptnd.pdc,ptnd.parts,name,apos,rpos,dt,sbond); |
---|
684 | return 0; |
---|
685 | } |
---|
686 | }; |
---|
687 | |
---|
688 | /* go down the tree to chains and leafs; check all */ |
---|
689 | static void ptnd_check_part_all(POS_TREE *pt, double dt, double sum_bonds) |
---|
690 | { |
---|
691 | int base; |
---|
692 | int name, apos, rpos; |
---|
693 | |
---|
694 | if (pt == NULL) |
---|
695 | return; |
---|
696 | if (PT_read_type(pt) == PT_NT_LEAF) { |
---|
697 | name = PT_read_name(psg.ptmain,pt); |
---|
698 | if (!ptnd.new_match && psg.data[name].is_group) return; |
---|
699 | rpos = PT_read_rpos(psg.ptmain,pt); |
---|
700 | apos = PT_read_apos(psg.ptmain,pt); |
---|
701 | ptnd_check_part_inc_dt( ptnd.pdc, ptnd.parts, |
---|
702 | name,apos,rpos, dt, sum_bonds); |
---|
703 | }else if (PT_read_type(pt) == PT_NT_CHAIN) { |
---|
704 | psg.probe = 0; |
---|
705 | ptnd.dt = dt; |
---|
706 | ptnd.sum_bonds = sum_bonds; |
---|
707 | PT_read_chain(psg.ptmain,pt, ptnd_chain_check_part(0)); |
---|
708 | }else{ |
---|
709 | for (base = PT_QU; base< PT_B_MAX; base++) { |
---|
710 | ptnd_check_part_all(PT_read_son(psg.ptmain,pt,(PT_BASES)base),dt,sum_bonds); |
---|
711 | } |
---|
712 | } |
---|
713 | } |
---|
714 | /* search down the tree to find matching species for the given probe */ |
---|
715 | static void ptnd_check_part(char *probe, POS_TREE *pt,int height, double dt, double sum_bonds, int split) |
---|
716 | { |
---|
717 | int name; |
---|
718 | int i; |
---|
719 | POS_TREE *pthelp; |
---|
720 | int rpos,apos,pos; |
---|
721 | double ndt,nsum_bonds,h; |
---|
722 | int nsplit; |
---|
723 | int ref; |
---|
724 | |
---|
725 | if (!pt) return; |
---|
726 | if (dt/ptnd.parts->source->seq_len > PERC_SIZE) return; /* out of scope */ |
---|
727 | if (PT_read_type(pt) == PT_NT_NODE && probe[height]) { |
---|
728 | if (split && ptnd_check_inc_mode(ptnd.pdc ,ptnd.parts, dt, sum_bonds)) return; |
---|
729 | for (i=PT_A; i<PT_B_MAX; i++) { |
---|
730 | if (( pthelp = PT_read_son(psg.ptmain,pt, (PT_BASES)i) )) |
---|
731 | { |
---|
732 | nsplit = split; |
---|
733 | nsum_bonds = sum_bonds; |
---|
734 | if (height < PT_PART_DEEP) { |
---|
735 | if ( i != probe[height] ) continue; |
---|
736 | ndt = dt; |
---|
737 | }else{ |
---|
738 | if (split) { |
---|
739 | h = ptnd_check_split(ptnd.pdc, probe, height,i); |
---|
740 | if (h>0.0) ndt = dt+h; else ndt = dt-h; |
---|
741 | }else if ((h = ptnd_check_split(ptnd.pdc, probe, height,i)) < 0.0 ) { |
---|
742 | ndt = dt - h; |
---|
743 | nsplit = 1; |
---|
744 | }else{ |
---|
745 | ndt = dt + h; |
---|
746 | nsum_bonds += |
---|
747 | ptnd_check_max_bond(ptnd.pdc,probe[height]) - h; |
---|
748 | } |
---|
749 | } |
---|
750 | if (nsplit && height <= DOMAIN_MIN_LENGTH) continue; |
---|
751 | ptnd_check_part(probe,pthelp,height+1,ndt,nsum_bonds,nsplit); |
---|
752 | } |
---|
753 | } |
---|
754 | return; |
---|
755 | } |
---|
756 | if (probe[height]) { |
---|
757 | if (PT_read_type(pt) == PT_NT_LEAF) { |
---|
758 | name = PT_read_name(psg.ptmain,pt); |
---|
759 | if (!ptnd.new_match && psg.data[name].is_group) return; |
---|
760 | rpos = PT_read_rpos(psg.ptmain,pt); |
---|
761 | apos = PT_read_apos(psg.ptmain,pt); |
---|
762 | pos = rpos + height; |
---|
763 | if (pos + (int)(strlen(probe+height)) >= psg.data[name].size) /* after end */ |
---|
764 | return; |
---|
765 | while (probe[height] && (ref = psg.data[name].data[pos])) { |
---|
766 | if (split) { |
---|
767 | h = ptnd_check_split(ptnd.pdc, probe, height,ref); |
---|
768 | if (h<0.0) dt -= h; else dt += h; |
---|
769 | }else if ( (h = ptnd_check_split(ptnd.pdc, probe, height, |
---|
770 | ref)) < 0.0 ) { |
---|
771 | dt -= h; |
---|
772 | split = 1; |
---|
773 | }else{ |
---|
774 | dt += h; |
---|
775 | sum_bonds += ptnd_check_max_bond(ptnd.pdc,probe[height]) - h; |
---|
776 | } |
---|
777 | height++; pos++; |
---|
778 | } |
---|
779 | ptnd_check_part_inc_dt( ptnd.pdc,ptnd.parts, |
---|
780 | name, apos, rpos, dt, sum_bonds); |
---|
781 | return; |
---|
782 | } else { /* chain */ |
---|
783 | psg.probe = probe; |
---|
784 | psg.height = height; |
---|
785 | ptnd.dt = dt; |
---|
786 | ptnd.sum_bonds = sum_bonds; |
---|
787 | PT_read_chain(psg.ptmain,pt, ptnd_chain_check_part(split)); |
---|
788 | return; |
---|
789 | } |
---|
790 | } |
---|
791 | ptnd_check_part_all(pt,dt,sum_bonds); |
---|
792 | } |
---|
793 | static void ptnd_check_probepart(PT_pdc *pdc) |
---|
794 | { |
---|
795 | PT_probeparts *parts; |
---|
796 | ptnd.pdc = pdc; |
---|
797 | for ( parts = pdc->parts; |
---|
798 | parts; |
---|
799 | parts = parts->next) { |
---|
800 | ptnd.parts = parts; |
---|
801 | ptnd_check_part(parts->sequence, psg.pt, 0, parts->dt, parts->sum_bonds,0); |
---|
802 | } |
---|
803 | } |
---|
804 | /*********************************************************************** |
---|
805 | search for possible probes |
---|
806 | ***********************************************************************/ |
---|
807 | inline int ptnd_check_tprobe(PT_pdc *pdc, const char *probe, int len) |
---|
808 | { |
---|
809 | int occ[PT_B_MAX] = { 0, 0, 0, 0, 0, 0 }; |
---|
810 | |
---|
811 | int count = 0; |
---|
812 | while (count<len) { |
---|
813 | occ[safeCharIndex(probe[count++])]++; |
---|
814 | } |
---|
815 | int gc = occ[PT_G]+occ[PT_C]; |
---|
816 | int at = occ[PT_A]+occ[PT_T]; |
---|
817 | |
---|
818 | int all = gc+at; |
---|
819 | |
---|
820 | if (all < len) return 1; // there were some unwanted characters ('N' or '.') |
---|
821 | if (all < pdc->probelen) return 1; |
---|
822 | |
---|
823 | if (gc < pdc->min_gc*len) return 1; |
---|
824 | if (gc > pdc->max_gc*len) return 1; |
---|
825 | |
---|
826 | double temp = at*2 + gc*4; |
---|
827 | if (temp< pdc->mintemp) return 1; |
---|
828 | if (temp>pdc->maxtemp) return 1; |
---|
829 | |
---|
830 | return 0; |
---|
831 | } |
---|
832 | |
---|
833 | static long ptnd_build_probes_collect(const char *probe, long count, void*) { |
---|
834 | PT_tprobes *tprobe; |
---|
835 | if (count >= ptnd.locs->group_count*ptnd.pdc->mintarget) { |
---|
836 | tprobe = create_PT_tprobes(); |
---|
837 | tprobe->sequence = strdup(probe); |
---|
838 | tprobe->temp = pt_get_temperature(probe); |
---|
839 | tprobe->groupsize = (int)count; |
---|
840 | aisc_link(&ptnd.pdc->ptprobes, tprobe); |
---|
841 | } |
---|
842 | return count; |
---|
843 | } |
---|
844 | |
---|
845 | inline void PT_incr_hash(GB_HASH *hash, char *sequence, int len) { |
---|
846 | char c = sequence[len]; |
---|
847 | sequence[len] = 0; |
---|
848 | |
---|
849 | pt_assert(strlen(sequence) == (size_t)len); |
---|
850 | |
---|
851 | GBS_incr_hash(hash, sequence); |
---|
852 | |
---|
853 | sequence[len] = c; |
---|
854 | } |
---|
855 | |
---|
856 | static void ptnd_add_sequence_to_hash(PT_pdc *pdc, GB_HASH *hash, char *sequence, int seq_len, int probe_len, char *prefix, int prefix_len) { |
---|
857 | int pos; |
---|
858 | if (*prefix) { // partition search, else very large hash tables (>60 mbytes) |
---|
859 | for (pos = seq_len-probe_len; pos >= 0; pos--, sequence++) { |
---|
860 | if ((*prefix!= *sequence || strncmp(sequence,prefix,prefix_len)) ) continue; |
---|
861 | if (!ptnd_check_tprobe(pdc,sequence, probe_len)) { |
---|
862 | PT_incr_hash(hash, sequence, probe_len); |
---|
863 | } |
---|
864 | } |
---|
865 | } |
---|
866 | else { |
---|
867 | for (pos = seq_len-probe_len; pos >= 0; pos--, sequence++) { |
---|
868 | if (!ptnd_check_tprobe(pdc,sequence, probe_len)) { |
---|
869 | PT_incr_hash(hash, sequence, probe_len); |
---|
870 | } |
---|
871 | } |
---|
872 | } |
---|
873 | } |
---|
874 | |
---|
875 | static long ptnd_collect_hash(const char *key, long val, void *gb_hash) { |
---|
876 | pt_assert(val); |
---|
877 | GBS_incr_hash((GB_HASH*)gb_hash, key); |
---|
878 | return val; |
---|
879 | } |
---|
880 | |
---|
881 | #if defined(DEVEL_RALF) |
---|
882 | static long avoid_hash_warning(const char *, long , void*) { return 0; } |
---|
883 | #endif // DEVEL_RALF |
---|
884 | |
---|
885 | static void ptnd_build_tprobes(PT_pdc *pdc, int group_count) { |
---|
886 | int *group_idx = new int[group_count]; |
---|
887 | unsigned long datasize = 0; // of marked species/genes |
---|
888 | unsigned long maxseqlength = 0; // of marked species/genes |
---|
889 | |
---|
890 | // get group indices and count datasize of marked species |
---|
891 | { |
---|
892 | int used_idx = 0; |
---|
893 | for (int name = 0; name < psg.data_count; name++) { |
---|
894 | if(psg.data[name].is_group == 1) { |
---|
895 | group_idx[used_idx++] = name; // store marked group indices |
---|
896 | unsigned long size = psg.data[name].size; |
---|
897 | datasize += size; |
---|
898 | if (datasize<size) datasize = ULONG_MAX; // avoid overflow! |
---|
899 | if (size > maxseqlength) maxseqlength = size; |
---|
900 | } |
---|
901 | } |
---|
902 | pt_assert(used_idx == group_count); |
---|
903 | } |
---|
904 | |
---|
905 | unsigned long outer_hash_size; |
---|
906 | int partsize = 0; // no partitions |
---|
907 | { |
---|
908 | const unsigned long max_allowed_hash_size = 1000000; // approx. |
---|
909 | const unsigned long max_allowed_tprobes = (unsigned long)(max_allowed_hash_size*0.66); // results in 66% fill rate for hash (which is much!) |
---|
910 | |
---|
911 | // tests found about 5-8% tprobes (in relation to datasize) -> we use 10% here |
---|
912 | unsigned long estimated_no_of_tprobes = (unsigned long)((datasize-pdc->probelen+1)*0.10+0.5); |
---|
913 | |
---|
914 | while (estimated_no_of_tprobes > max_allowed_tprobes) { |
---|
915 | partsize++; |
---|
916 | estimated_no_of_tprobes /= 4; // 4 different bases |
---|
917 | } |
---|
918 | |
---|
919 | outer_hash_size = (unsigned long)(estimated_no_of_tprobes*1.5); |
---|
920 | |
---|
921 | #if defined(DEBUG) |
---|
922 | printf("marked=%i datasize=%lu partsize=%i estimated_no_of_tprobes=%lu outer_hash_size=%lu\n", |
---|
923 | group_count, datasize, partsize, estimated_no_of_tprobes, outer_hash_size); |
---|
924 | #endif // DEBUG |
---|
925 | } |
---|
926 | |
---|
927 | #if defined(DEBUG) |
---|
928 | GBS_clear_hash_statistic_summary("outer"); |
---|
929 | #endif // DEBUG |
---|
930 | |
---|
931 | const int hash_multiply = 4; // hash fill ratio is 1/hash_multiply |
---|
932 | |
---|
933 | char partstring[256]; |
---|
934 | PT_init_base_string_counter(partstring,PT_A,partsize); |
---|
935 | while (!PT_base_string_counter_eof(partstring)) { |
---|
936 | #if defined(DEBUG) |
---|
937 | fputs("partition='", stdout); |
---|
938 | for (int i = 0; i<partsize; ++i) { |
---|
939 | putchar("ACGT"[partstring[i]-PT_A]); |
---|
940 | } |
---|
941 | fputs("'\n", stdout); |
---|
942 | #endif // DEBUG |
---|
943 | |
---|
944 | GB_HASH *hash_outer = GBS_create_hash(outer_hash_size, GB_MIND_CASE); // count in how many groups/sequences the tprobe occurs (key = tprobe, value = counter) |
---|
945 | |
---|
946 | #if defined(DEBUG) |
---|
947 | GBS_clear_hash_statistic_summary("inner"); |
---|
948 | #endif // DEBUG |
---|
949 | |
---|
950 | // for (name = 0; name < psg.data_count; name++) { |
---|
951 | // if(psg.data[name].is_group != 1) continue; |
---|
952 | for (int g = 0; g<group_count; ++g) { |
---|
953 | int name = group_idx[g]; |
---|
954 | long possible_tprobes = psg.data[name].size-pdc->probelen+1; |
---|
955 | GB_HASH *hash_one = GBS_create_hash(possible_tprobes*hash_multiply, GB_MIND_CASE); // count tprobe occurrences for one group/sequence |
---|
956 | ptnd_add_sequence_to_hash(pdc, hash_one, psg.data[name].data, psg.data[name].size, pdc->probelen, partstring, partsize); |
---|
957 | GBS_hash_do_loop(hash_one, ptnd_collect_hash, hash_outer); // merge hash_one into hash |
---|
958 | #if defined(DEBUG) |
---|
959 | GBS_calc_hash_statistic(hash_one, "inner", 0); |
---|
960 | #endif // DEBUG |
---|
961 | GBS_free_hash(hash_one); |
---|
962 | } |
---|
963 | PT_sequence *seq; |
---|
964 | for ( seq = pdc->sequences; seq; seq = seq->next) { |
---|
965 | long possible_tprobes = seq->seq.size-pdc->probelen+1; |
---|
966 | GB_HASH *hash_one = GBS_create_hash(possible_tprobes*hash_multiply, GB_MIND_CASE); // count tprobe occurrences for one group/sequence |
---|
967 | ptnd_add_sequence_to_hash(pdc, hash_one, seq->seq.data, seq->seq.size, pdc->probelen, partstring, partsize); |
---|
968 | GBS_hash_do_loop(hash_one, ptnd_collect_hash, hash_outer); // merge hash_one into hash |
---|
969 | #if defined(DEBUG) |
---|
970 | GBS_calc_hash_statistic(hash_one, "inner", 0); |
---|
971 | #endif // DEBUG |
---|
972 | GBS_free_hash(hash_one); |
---|
973 | } |
---|
974 | |
---|
975 | #if defined(DEBUG) |
---|
976 | GBS_print_hash_statistic_summary("inner"); |
---|
977 | #endif // DEBUG |
---|
978 | |
---|
979 | GBS_hash_do_loop(hash_outer, ptnd_build_probes_collect, NULL); |
---|
980 | |
---|
981 | #if defined(DEBUG) |
---|
982 | GBS_calc_hash_statistic(hash_outer, "outer", 1); |
---|
983 | #if defined(DEVEL_RALF) |
---|
984 | GBS_hash_do_loop(hash_outer, avoid_hash_warning, NULL); // hash is known to be overfilled - avoid assertion |
---|
985 | #endif // DEVEL_RALF |
---|
986 | #endif // DEBUG |
---|
987 | |
---|
988 | GBS_free_hash(hash_outer); |
---|
989 | PT_inc_base_string_count(partstring,PT_A,PT_B_MAX,partsize); |
---|
990 | } |
---|
991 | #if defined(DEBUG) |
---|
992 | GBS_print_hash_statistic_summary("outer"); |
---|
993 | #endif // DEBUG |
---|
994 | } |
---|
995 | |
---|
996 | #if 0 |
---|
997 | static void ptnd_print_probes(PT_pdc *pdc) { |
---|
998 | PT_tprobes *tprobe; |
---|
999 | char buffer[1024]; |
---|
1000 | int i; |
---|
1001 | |
---|
1002 | for ( tprobe = pdc->tprobes; |
---|
1003 | tprobe; |
---|
1004 | tprobe = tprobe->next ) { |
---|
1005 | strncpy(buffer,tprobe->sequence,250); |
---|
1006 | buffer[250] = 0; |
---|
1007 | PT_base_2_string(buffer,0); |
---|
1008 | printf("seq: %s group %i mishits: %i ",buffer,tprobe->groupsize,tprobe->mishit); |
---|
1009 | for (i=0;i<PERC_SIZE;i++) { |
---|
1010 | printf("%3i,",tprobe->perc[i]); |
---|
1011 | } |
---|
1012 | printf("\n"); |
---|
1013 | } |
---|
1014 | } |
---|
1015 | #endif |
---|
1016 | |
---|
1017 | extern "C" int PT_start_design(PT_pdc *pdc, int /*dummy*/) { |
---|
1018 | |
---|
1019 | // IDP probe design |
---|
1020 | |
---|
1021 | PT_local *locs = (PT_local*)pdc->mh.parent->parent; |
---|
1022 | ptnd.new_match = 0; |
---|
1023 | ptnd.locs = locs; |
---|
1024 | ptnd.pdc = pdc; |
---|
1025 | |
---|
1026 | const char *error; |
---|
1027 | { |
---|
1028 | char *unknown_names = ptpd_read_names(locs, pdc->names.data, pdc->checksums.data, error); /* read the names */ |
---|
1029 | if (unknown_names) free(unknown_names); // unused here (handled by PT_unknown_names) |
---|
1030 | } |
---|
1031 | |
---|
1032 | if (error) { |
---|
1033 | pt_export_error(locs, error); |
---|
1034 | return 0; |
---|
1035 | } |
---|
1036 | |
---|
1037 | PT_sequence *seq; |
---|
1038 | for ( seq = pdc->sequences; seq; seq = seq->next) { // Convert all external sequence to internal format |
---|
1039 | seq->seq.size = probe_compress_sequence(seq->seq.data, seq->seq.size); |
---|
1040 | locs->group_count++; |
---|
1041 | } |
---|
1042 | |
---|
1043 | if (locs->group_count <=0) { |
---|
1044 | pt_export_error(locs, GBS_global_string("No %s marked - no probes designed", |
---|
1045 | gene_flag ? "genes" : "species")); |
---|
1046 | return 0; |
---|
1047 | } |
---|
1048 | |
---|
1049 | while (pdc->tprobes) destroy_PT_tprobes(pdc->tprobes); |
---|
1050 | ptnd_build_tprobes(pdc, locs->group_count); |
---|
1051 | while (pdc->sequences) destroy_PT_sequence(pdc->sequences); |
---|
1052 | ptnd_sort_probes_by(pdc,1); |
---|
1053 | ptnd_first_check(pdc); |
---|
1054 | ptnd_check_position(pdc); |
---|
1055 | ptnd_check_bonds(pdc,ptnd.new_match); |
---|
1056 | ptnd_cp_tprobe_2_probepart(pdc); |
---|
1057 | ptnd_duplikate_probepart(pdc); |
---|
1058 | ptnd_sort_parts(pdc); |
---|
1059 | ptnd_remove_duplikated_probepart(pdc); |
---|
1060 | ptnd_check_probepart(pdc); |
---|
1061 | while (pdc->parts) destroy_PT_probeparts(pdc->parts); |
---|
1062 | ptnd_calc_quality(pdc); |
---|
1063 | ptnd_sort_probes_by(pdc,0); |
---|
1064 | ptnd_probe_delete_all_but(pdc,pdc->clipresult); |
---|
1065 | /* ptnd_print_probes(pdc); */ |
---|
1066 | |
---|
1067 | return 0; |
---|
1068 | } |
---|
1069 | |
---|
1070 | void ptnd_new_match(PT_local * locs, char *probestring) |
---|
1071 | { |
---|
1072 | PT_pdc *pdc = locs->pdc; |
---|
1073 | PT_tprobes *tprobe; |
---|
1074 | |
---|
1075 | ptnd.locs = locs; |
---|
1076 | ptnd.pdc = pdc; |
---|
1077 | ptnd.new_match = 1; |
---|
1078 | |
---|
1079 | if (!pdc) return; /* no config */ |
---|
1080 | |
---|
1081 | tprobe = create_PT_tprobes(); |
---|
1082 | tprobe->sequence = strdup(probestring); |
---|
1083 | |
---|
1084 | aisc_link(&pdc->ptprobes, tprobe); |
---|
1085 | ptnd_check_bonds(pdc,ptnd.new_match); |
---|
1086 | ptnd_cp_tprobe_2_probepart(pdc); |
---|
1087 | ptnd_duplikate_probepart(pdc); |
---|
1088 | ptnd_sort_parts(pdc); |
---|
1089 | ptnd_remove_duplikated_probepart(pdc); |
---|
1090 | ptnd_check_probepart(pdc); |
---|
1091 | |
---|
1092 | while (pdc->parts) destroy_PT_probeparts(pdc->parts); |
---|
1093 | while (( tprobe = pdc->tprobes )) destroy_PT_tprobes(tprobe); |
---|
1094 | } |
---|