1 | |
---|
2 | // #include <malloc.h> |
---|
3 | |
---|
4 | #include "ali_misc.hxx" |
---|
5 | #include "ali_tlist.hxx" |
---|
6 | #include "ali_postree.hxx" |
---|
7 | |
---|
8 | |
---|
9 | |
---|
10 | /***************************************************************************** |
---|
11 | * |
---|
12 | * STRUCT: ALI_POSTREE_NODE |
---|
13 | * |
---|
14 | *****************************************************************************/ |
---|
15 | |
---|
16 | |
---|
17 | ALI_POSTREE_NODE::ALI_POSTREE_NODE(ALI_POSTREE_NODE_TYPE t, |
---|
18 | unsigned long nochild_position) |
---|
19 | { |
---|
20 | typ = t; |
---|
21 | if (typ == Node) { |
---|
22 | node.number_of_children = nochild_position; |
---|
23 | node.children = (ALI_POSTREE_NODE *(*) []) |
---|
24 | CALLOC((unsigned int) nochild_position, |
---|
25 | sizeof(ALI_POSTREE_NODE *)); |
---|
26 | if (node.children == 0) |
---|
27 | ali_fatal_error("Out of memory"); |
---|
28 | } |
---|
29 | else { |
---|
30 | leaf.position = nochild_position; |
---|
31 | leaf.next = 0; |
---|
32 | } |
---|
33 | } |
---|
34 | |
---|
35 | ALI_POSTREE_NODE::~ALI_POSTREE_NODE(void) |
---|
36 | { |
---|
37 | if (typ == Node) { |
---|
38 | while (node.number_of_children-- > 0) { |
---|
39 | if ((*node.children)[node.number_of_children]) |
---|
40 | delete (*node.children)[node.number_of_children]; |
---|
41 | } |
---|
42 | free((char *) node.children); |
---|
43 | } |
---|
44 | } |
---|
45 | |
---|
46 | ALI_POSTREE_NODE *ALI_POSTREE_NODE::leftmost_leaf(void) |
---|
47 | { |
---|
48 | ALI_POSTREE_NODE *n = this; |
---|
49 | long i; |
---|
50 | |
---|
51 | while (n->typ == Node) { |
---|
52 | for (i = 0; |
---|
53 | i < n->node.number_of_children && !(*n->node.children)[i]; i++); |
---|
54 | if (i < n->node.number_of_children) |
---|
55 | n = (*n->node.children)[i]; |
---|
56 | else |
---|
57 | ali_fatal_error("Found node without leaf", |
---|
58 | "ALI_POSTREE_NODE::leftmost_leaf()"); |
---|
59 | } |
---|
60 | return n; |
---|
61 | } |
---|
62 | |
---|
63 | ALI_POSTREE_NODE *ALI_POSTREE_NODE::rightmost_leaf(void) |
---|
64 | { |
---|
65 | ALI_POSTREE_NODE *n = this; |
---|
66 | long i; |
---|
67 | |
---|
68 | while (n->typ == Node) { |
---|
69 | for (i = (long) n->node.number_of_children - 1; |
---|
70 | i >= 0 && !(*n->node.children)[i]; i--); |
---|
71 | if (i >= 0) |
---|
72 | n = (*n->node.children)[i]; |
---|
73 | else |
---|
74 | ali_fatal_error("Found node without leaf", |
---|
75 | "ALI_POSTREE_NODE::rightmost_leaf()"); |
---|
76 | } |
---|
77 | return n; |
---|
78 | } |
---|
79 | |
---|
80 | ALI_POSTREE_NODE *ALI_POSTREE_NODE::link_leafs(ALI_POSTREE_NODE *last) |
---|
81 | { |
---|
82 | long i; |
---|
83 | |
---|
84 | if (typ == Node) { |
---|
85 | for (i = (long) node.number_of_children - 1; i >= 0; i--) |
---|
86 | if ((*node.children)[i]) |
---|
87 | last = (*node.children)[i]->link_leafs(last); |
---|
88 | } |
---|
89 | else { |
---|
90 | leaf.next = last; |
---|
91 | last = this; |
---|
92 | } |
---|
93 | return last; |
---|
94 | } |
---|
95 | |
---|
96 | |
---|
97 | void ALI_POSTREE_NODE::print(unsigned long depth) |
---|
98 | { |
---|
99 | unsigned long l, nr; |
---|
100 | |
---|
101 | |
---|
102 | if (typ == Node) { |
---|
103 | for (nr = 0; nr < node.number_of_children; nr++) { |
---|
104 | for (l = 0; l < depth; l++) |
---|
105 | printf(" "); |
---|
106 | printf("%2d:\n",nr); |
---|
107 | if ((*node.children)[nr]) |
---|
108 | (*node.children)[nr]->print(depth+1); |
---|
109 | } |
---|
110 | } |
---|
111 | else { |
---|
112 | for (l = 0; l < depth; l++) |
---|
113 | printf(" "); |
---|
114 | printf("<%d>\n",leaf.position); |
---|
115 | } |
---|
116 | } |
---|
117 | |
---|
118 | |
---|
119 | /***************************************************************************** |
---|
120 | * |
---|
121 | * CLASS: ALI_POSTREE (PRIVAT) |
---|
122 | * |
---|
123 | * DESCRIPTION: Implementation of a position tree |
---|
124 | * |
---|
125 | *****************************************************************************/ |
---|
126 | |
---|
127 | unsigned char *ALI_POSTREE::make_postree_sequence( |
---|
128 | unsigned char *seq, unsigned long seq_len, unsigned char terminal) |
---|
129 | { |
---|
130 | unsigned char *buffer, *src, *dst; |
---|
131 | |
---|
132 | buffer = (unsigned char *) CALLOC((unsigned int) seq_len + 1, |
---|
133 | sizeof(unsigned char)); |
---|
134 | if (buffer == 0) |
---|
135 | ali_fatal_error("Out of memory"); |
---|
136 | |
---|
137 | for (src = seq, dst = buffer; src < seq + seq_len;) |
---|
138 | *dst++ = *src++; |
---|
139 | |
---|
140 | *dst = terminal; |
---|
141 | |
---|
142 | return buffer; |
---|
143 | } |
---|
144 | |
---|
145 | void ALI_POSTREE::insert(unsigned char *seq, unsigned char terminal) |
---|
146 | { |
---|
147 | ALI_POSTREE_NODE *v; |
---|
148 | unsigned char *akt, *pos = seq; |
---|
149 | unsigned long position; |
---|
150 | |
---|
151 | for (position = 0; *pos != terminal && *pos < number_of_branches; |
---|
152 | position++, pos++) { |
---|
153 | akt = pos; |
---|
154 | v = root; |
---|
155 | /* |
---|
156 | * Find maximal equal path in POSTREE |
---|
157 | */ |
---|
158 | while ((*v->node.children)[*akt] && (*v->node.children)[*akt]->is_node()) |
---|
159 | v = (*v->node.children)[*akt++]; |
---|
160 | /* |
---|
161 | * Make a unique leaf for the new prefix |
---|
162 | */ |
---|
163 | if (!(*v->node.children)[*akt]) |
---|
164 | (*v->node.children)[*akt] = new ALI_POSTREE_NODE(Leaf,position); |
---|
165 | else { |
---|
166 | ALI_POSTREE_NODE *old_leaf = (*v->node.children)[*akt]; |
---|
167 | unsigned char *akt2 = (unsigned char *) |
---|
168 | ((int) seq + (int) old_leaf->leaf.position + |
---|
169 | (int) akt - (int) pos); |
---|
170 | /* |
---|
171 | * Expande equal part of path in prefix tree |
---|
172 | */ |
---|
173 | while (*akt == *akt2) { |
---|
174 | (*v->node.children)[*akt] = new ALI_POSTREE_NODE(Node, |
---|
175 | number_of_branches); |
---|
176 | v = (*v->node.children)[*akt]; |
---|
177 | akt++; |
---|
178 | akt2++; |
---|
179 | } |
---|
180 | (*v->node.children)[*akt2] = old_leaf; |
---|
181 | (*v->node.children)[*akt] = new ALI_POSTREE_NODE(Leaf,position); |
---|
182 | } |
---|
183 | } |
---|
184 | |
---|
185 | if (*pos >= number_of_branches) |
---|
186 | ali_fatal_error("Unexpected value","ALI_POSTREE::insert()"); |
---|
187 | if ((*root->node.children)[*pos]) |
---|
188 | ali_fatal_error("Terminal occupied","ALI_POSTREE::insert()"); |
---|
189 | else |
---|
190 | (*root->node.children)[*pos] = new ALI_POSTREE_NODE(Leaf,position); |
---|
191 | } |
---|
192 | |
---|
193 | |
---|
194 | void ALI_POSTREE::link_leafs(void) |
---|
195 | { |
---|
196 | ALI_POSTREE_NODE *first; |
---|
197 | |
---|
198 | first = root->link_leafs(0); |
---|
199 | } |
---|
200 | |
---|
201 | |
---|
202 | ali_postree_sol *ALI_POSTREE::make_postree_solution( |
---|
203 | ALI_POSTREE_NODE *first, ALI_POSTREE_NODE *last, |
---|
204 | unsigned long min_pos, unsigned long max_pos, |
---|
205 | unsigned long seq_len, unsigned long errors, |
---|
206 | ALI_TSTACK<char> *stack) |
---|
207 | { |
---|
208 | unsigned long i; |
---|
209 | ali_postree_sol *solution = 0; |
---|
210 | ALI_TLIST<unsigned long> *pos_list = 0; |
---|
211 | |
---|
212 | /* |
---|
213 | * Make list of positions and make _insertations_ for the errors |
---|
214 | */ |
---|
215 | while (first != last) { |
---|
216 | if (first->leaf.position >= min_pos && first->leaf.position <= max_pos && |
---|
217 | first->leaf.position + seq_len + errors <= length_of_sequence) { |
---|
218 | if (pos_list == 0) |
---|
219 | pos_list = new ALI_TLIST<unsigned long>(first->leaf.position); |
---|
220 | else |
---|
221 | pos_list->append_end(first->leaf.position); |
---|
222 | } |
---|
223 | first = first->leaf.next; |
---|
224 | } |
---|
225 | if (first->leaf.position >= min_pos && first->leaf.position <= max_pos && |
---|
226 | first->leaf.position + seq_len + errors <= length_of_sequence) { |
---|
227 | if (pos_list == 0) |
---|
228 | pos_list = new ALI_TLIST<unsigned long>(first->leaf.position); |
---|
229 | else |
---|
230 | pos_list->append_end(first->leaf.position); |
---|
231 | } |
---|
232 | |
---|
233 | /* |
---|
234 | * Make solution with _expanded_ path |
---|
235 | */ |
---|
236 | if (pos_list != 0) { |
---|
237 | for (i = 0; i < errors; i++) |
---|
238 | stack->push(ALI_POSTREE_STACK_INS); |
---|
239 | solution = new ali_postree_sol(stack,pos_list); |
---|
240 | for (i = 0; i < errors; i++) |
---|
241 | stack->pop(); |
---|
242 | } |
---|
243 | |
---|
244 | return solution; |
---|
245 | } |
---|
246 | |
---|
247 | |
---|
248 | unsigned long ALI_POSTREE::maximal_position(ALI_POSTREE_NODE *first, |
---|
249 | ALI_POSTREE_NODE *last) |
---|
250 | { |
---|
251 | unsigned long maximum; |
---|
252 | |
---|
253 | maximum = first->leaf.position; |
---|
254 | |
---|
255 | while (first != last) { |
---|
256 | first = first->leaf.next; |
---|
257 | if (first->leaf.position > maximum) |
---|
258 | maximum = first->leaf.position; |
---|
259 | } |
---|
260 | |
---|
261 | return maximum; |
---|
262 | } |
---|
263 | |
---|
264 | |
---|
265 | void ALI_POSTREE::handle_remaining_sequence( |
---|
266 | unsigned char *seq, unsigned long seq_len, |
---|
267 | unsigned long seq_pos, unsigned long im_seq_len, |
---|
268 | unsigned long min_pos, unsigned long max_pos, |
---|
269 | unsigned long ref_pos, unsigned long errors, |
---|
270 | ALI_TSTACK<char> *stack, |
---|
271 | ALI_TLIST<ali_postree_sol *> *sol_list) |
---|
272 | { |
---|
273 | ali_postree_sol *solution, *new_solution; |
---|
274 | ALI_TSTACK<char> *new_stack; |
---|
275 | ALI_TLIST<unsigned long> *pos_list; |
---|
276 | ALI_TLIST<ali_postree_sol *> *new_sol_list; |
---|
277 | int ok_flag = 0; |
---|
278 | |
---|
279 | if (ref_pos < min_pos || ref_pos > max_pos) |
---|
280 | return; |
---|
281 | |
---|
282 | new_sol_list = new ALI_TLIST<ali_postree_sol *>; |
---|
283 | new_stack = new ALI_TSTACK<char>(seq_len - seq_pos + errors + 1); |
---|
284 | |
---|
285 | finder(root,seq + seq_pos,seq_len - seq_pos, 0, 0, |
---|
286 | ref_pos + im_seq_len,ref_pos + im_seq_len,errors, |
---|
287 | new_stack,new_sol_list); |
---|
288 | |
---|
289 | /* |
---|
290 | * Generate solutions |
---|
291 | */ |
---|
292 | if (!new_sol_list->is_empty()) { |
---|
293 | new_solution = new_sol_list->first(); |
---|
294 | pos_list = new ALI_TLIST<unsigned long>(ref_pos); |
---|
295 | solution = new ali_postree_sol(stack,pos_list,"",new_solution->path); |
---|
296 | delete new_solution; |
---|
297 | sol_list->append_end(solution); |
---|
298 | while (new_sol_list->is_next()) { |
---|
299 | new_solution = new_sol_list->next(); |
---|
300 | pos_list = new ALI_TLIST<unsigned long>(ref_pos); |
---|
301 | solution = new ali_postree_sol(stack,pos_list,"",new_solution->path); |
---|
302 | delete new_solution; |
---|
303 | sol_list->append_end(solution); |
---|
304 | } |
---|
305 | } |
---|
306 | delete new_stack; |
---|
307 | delete new_sol_list; |
---|
308 | } |
---|
309 | |
---|
310 | |
---|
311 | void ALI_POSTREE::finder(ALI_POSTREE_NODE *n, |
---|
312 | unsigned char *seq, unsigned long seq_len, |
---|
313 | unsigned long seq_pos, unsigned long im_seq_len, |
---|
314 | unsigned long min_pos, unsigned long max_pos, |
---|
315 | unsigned long errors, |
---|
316 | ALI_TSTACK<char> *stack, |
---|
317 | ALI_TLIST<ali_postree_sol *> *sol_list) |
---|
318 | { |
---|
319 | ALI_POSTREE_NODE *first, *last; |
---|
320 | ali_postree_sol *solution = 0; |
---|
321 | unsigned long i; |
---|
322 | |
---|
323 | /* |
---|
324 | * Found end of sequence |
---|
325 | */ |
---|
326 | if (seq_pos >= seq_len) { |
---|
327 | if (n->is_leaf()) { |
---|
328 | first = n; |
---|
329 | last = n; |
---|
330 | } |
---|
331 | else { |
---|
332 | first = n->leftmost_leaf(); |
---|
333 | last = n->rightmost_leaf(); |
---|
334 | } |
---|
335 | solution = make_postree_solution(first,last,min_pos,max_pos, |
---|
336 | im_seq_len,errors,stack); |
---|
337 | if (solution) |
---|
338 | sol_list->append_end(solution); |
---|
339 | } |
---|
340 | else { |
---|
341 | /* |
---|
342 | * Found unique position |
---|
343 | */ |
---|
344 | if (n->is_leaf()) { |
---|
345 | handle_remaining_sequence(seq,seq_len,seq_pos,im_seq_len, |
---|
346 | min_pos,max_pos,n->leaf.position,errors, |
---|
347 | stack,sol_list); |
---|
348 | } |
---|
349 | /* |
---|
350 | * Recursive search |
---|
351 | */ |
---|
352 | else { |
---|
353 | if ((*n->node.children)[seq[seq_pos]]) { |
---|
354 | stack->push(ALI_POSTREE_STACK_SUB); |
---|
355 | finder((*n->node.children)[seq[seq_pos]],seq,seq_len,seq_pos + 1, |
---|
356 | im_seq_len + 1,min_pos, max_pos, errors, stack, sol_list); |
---|
357 | stack->pop(); |
---|
358 | } |
---|
359 | /* |
---|
360 | * Recursive search with errors |
---|
361 | */ |
---|
362 | if (errors > 0) { |
---|
363 | /* |
---|
364 | * Deletion (in seq) |
---|
365 | */ |
---|
366 | stack->push(ALI_POSTREE_STACK_DEL); |
---|
367 | finder(n,seq,seq_len,seq_pos + 1,im_seq_len,min_pos,max_pos, |
---|
368 | errors - 1,stack,sol_list); |
---|
369 | stack->pop(); |
---|
370 | |
---|
371 | for (i = 0; i < number_of_branches; i++) { |
---|
372 | if ((*n->node.children)[i]) { |
---|
373 | /* |
---|
374 | * Insertion (in seq) |
---|
375 | */ |
---|
376 | stack->push(ALI_POSTREE_STACK_INS); |
---|
377 | finder((*n->node.children)[i],seq,seq_len,seq_pos, |
---|
378 | im_seq_len + 1,min_pos, max_pos, errors - 1, |
---|
379 | stack, sol_list); |
---|
380 | stack->pop(); |
---|
381 | /* |
---|
382 | * Substitution |
---|
383 | */ |
---|
384 | if (i != seq[seq_pos]) { |
---|
385 | stack->push(ALI_POSTREE_STACK_SUB); |
---|
386 | finder((*n->node.children)[i],seq,seq_len,seq_pos + 1, |
---|
387 | im_seq_len + 1, min_pos, max_pos, errors - 1, |
---|
388 | stack, sol_list); |
---|
389 | stack->pop(); |
---|
390 | } |
---|
391 | } |
---|
392 | } |
---|
393 | } |
---|
394 | } |
---|
395 | } |
---|
396 | } |
---|
397 | |
---|
398 | |
---|
399 | |
---|
400 | |
---|
401 | |
---|
402 | /***************************************************************************** |
---|
403 | * |
---|
404 | * CLASS: ALI_POSTREE (PUBLIC) |
---|
405 | * |
---|
406 | * DESCRIPTION: Implementation of a position tree |
---|
407 | * |
---|
408 | *****************************************************************************/ |
---|
409 | |
---|
410 | ALI_POSTREE::ALI_POSTREE(unsigned long branches, |
---|
411 | unsigned char *seq, unsigned long seq_len, |
---|
412 | unsigned char terminal) |
---|
413 | { |
---|
414 | unsigned char *seq_buffer; |
---|
415 | |
---|
416 | length_of_sequence = seq_len; |
---|
417 | number_of_branches = branches + 1; |
---|
418 | root = new ALI_POSTREE_NODE(Node,number_of_branches); |
---|
419 | |
---|
420 | seq_buffer = make_postree_sequence(seq,seq_len,terminal); |
---|
421 | insert(seq_buffer,terminal); |
---|
422 | link_leafs(); |
---|
423 | |
---|
424 | free((char *) seq_buffer); |
---|
425 | } |
---|
426 | |
---|
427 | |
---|
428 | ALI_TLIST<ali_postree_sol *> *ALI_POSTREE::find( |
---|
429 | unsigned char *seq, unsigned long seq_len, |
---|
430 | unsigned long min_pos, unsigned long max_pos, |
---|
431 | unsigned long errors) |
---|
432 | { |
---|
433 | ALI_TLIST<ali_postree_sol *> *sol_list; |
---|
434 | ALI_TSTACK<char> *stack; |
---|
435 | |
---|
436 | sol_list = new ALI_TLIST<ali_postree_sol *>; |
---|
437 | stack = new ALI_TSTACK<char>(seq_len + errors + 1); |
---|
438 | |
---|
439 | finder(root,seq,seq_len,0,0,min_pos,max_pos,errors,stack,sol_list); |
---|
440 | |
---|
441 | delete stack; |
---|
442 | |
---|
443 | return sol_list; |
---|
444 | } |
---|
445 | |
---|
446 | |
---|
447 | ALI_TLIST<ali_postree_sol *> *ALI_POSTREE::find_complement( |
---|
448 | unsigned char *seq, unsigned long seq_len, |
---|
449 | unsigned long min_pos, unsigned long max_pos, |
---|
450 | float max_costs) |
---|
451 | { |
---|
452 | ALI_TLIST<ali_postree_sol *> *sol_list; |
---|
453 | ALI_TSTACK<char> *stack; |
---|
454 | |
---|
455 | sol_list = new ALI_TLIST<ali_postree_sol *>; |
---|
456 | stack = new ALI_TSTACK<char>(2 * seq_len); |
---|
457 | |
---|
458 | /* |
---|
459 | compl_finder(root,seq,seq_len,0,0,min_pos,max_pos,max_costs,stack,sol_list); |
---|
460 | */ |
---|
461 | |
---|
462 | delete stack; |
---|
463 | |
---|
464 | return sol_list; |
---|
465 | } |
---|
466 | |
---|
467 | |
---|
468 | void ALI_POSTREE::print(void) |
---|
469 | { |
---|
470 | root->print(0); |
---|
471 | } |
---|
472 | |
---|
473 | |
---|
474 | |
---|
475 | |
---|
476 | |
---|
477 | /************************** |
---|
478 | * |
---|
479 | * TESTPART |
---|
480 | * |
---|
481 | ************************** |
---|
482 | |
---|
483 | unsigned char seq[] = {1,1,0,1,1,3,1,1,5,1,1}; |
---|
484 | unsigned long seq_len = 11; |
---|
485 | |
---|
486 | unsigned char fseq[] = {1,1,3,1,1}; |
---|
487 | unsigned long fseq_len = 5; |
---|
488 | |
---|
489 | void print_sol(ALI_TLIST<ali_postree_sol *> *sol_list) |
---|
490 | { |
---|
491 | int i; |
---|
492 | ALI_TLIST<unsigned long> *pos_list; |
---|
493 | ali_postree_sol *solution; |
---|
494 | |
---|
495 | printf("SOL:\n"); |
---|
496 | i = 1; |
---|
497 | while (!sol_list->is_empty()) { |
---|
498 | solution = sol_list->first(); |
---|
499 | pos_list = solution->position_list; |
---|
500 | sol_list->delete_element(); |
---|
501 | if (!pos_list->is_empty()) { |
---|
502 | printf("%2d <%s> : %d",i,solution->path,pos_list->first()); |
---|
503 | while (pos_list->is_next()) |
---|
504 | printf(", %d",pos_list->next()); |
---|
505 | } |
---|
506 | else |
---|
507 | printf("%2d : empty",i); |
---|
508 | printf("\n"); |
---|
509 | delete solution; |
---|
510 | i++; |
---|
511 | } |
---|
512 | delete sol_list; |
---|
513 | } |
---|
514 | |
---|
515 | |
---|
516 | main() |
---|
517 | { |
---|
518 | ALI_TLIST<ali_postree_sol *> *sol_list; |
---|
519 | ALI_POSTREE pos_tree(5,seq,seq_len,4); |
---|
520 | |
---|
521 | pos_tree.print(); |
---|
522 | |
---|
523 | printf("0 Fehler\n"); |
---|
524 | sol_list = pos_tree.find(fseq,fseq_len,0,10,0); |
---|
525 | print_sol(sol_list); |
---|
526 | printf("1 Fehler\n"); |
---|
527 | sol_list = pos_tree.find(fseq,fseq_len,0,10,1); |
---|
528 | print_sol(sol_list); |
---|
529 | printf("2 Fehler\n"); |
---|
530 | sol_list = pos_tree.find(fseq,fseq_len,0,10,2); |
---|
531 | print_sol(sol_list); |
---|
532 | printf("3 Fehler\n"); |
---|
533 | sol_list = pos_tree.find(fseq,fseq_len,0,10,3); |
---|
534 | print_sol(sol_list); |
---|
535 | } |
---|
536 | |
---|
537 | ***************************************/ |
---|