1 | #include <cmath> |
---|
2 | #include <cstdio> |
---|
3 | #include <cstdlib> |
---|
4 | #include <cstring> |
---|
5 | |
---|
6 | #include <sys/times.h> |
---|
7 | |
---|
8 | #include "ps_tools.hxx" |
---|
9 | #include "ps_database.hxx" |
---|
10 | #include "ps_candidate.hxx" |
---|
11 | |
---|
12 | |
---|
13 | void PS_get_leaf_candidates( PS_CandidatePtr _candidate_parent, PS_CandidateSet &_leaf_candidates ) { |
---|
14 | |
---|
15 | for ( PS_CandidateByGainMapRIter next_candidate = _candidate_parent->children.rbegin(); |
---|
16 | next_candidate != _candidate_parent->children.rend(); |
---|
17 | ++next_candidate ) { |
---|
18 | |
---|
19 | if (next_candidate->second->children.size() == 0){ |
---|
20 | _leaf_candidates.insert( next_candidate->second ); |
---|
21 | } |
---|
22 | PS_get_leaf_candidates( &(*next_candidate->second), _leaf_candidates ); |
---|
23 | } |
---|
24 | } |
---|
25 | |
---|
26 | |
---|
27 | typedef set<PS_Node *> PS_NodeSet; |
---|
28 | typedef pair<PS_CandidatePtr,PS_NodeSet> PS_Candidate2NodeSetPair; |
---|
29 | typedef multimap<unsigned long,PS_Candidate2NodeSetPair> PS_Candidate2NodeSetPairByLengthMap; |
---|
30 | |
---|
31 | void PS_get_node_paths( PS_CandidateSet &_leaf_candidates, PS_Candidate2NodeSetPairByLengthMap &_paths ) { |
---|
32 | PS_NodeSet nodepath; |
---|
33 | for ( PS_CandidateSetCIter candidate_iter = _leaf_candidates.begin(); |
---|
34 | candidate_iter != _leaf_candidates.end(); |
---|
35 | ++candidate_iter ) { |
---|
36 | // get nodepath of candidate |
---|
37 | PS_CandidateSPtr candidate_sptr = *candidate_iter; |
---|
38 | PS_CandidatePtr candidate = &(*candidate_sptr); |
---|
39 | nodepath.clear(); |
---|
40 | while ((candidate) && |
---|
41 | (!candidate->node.Null())) { |
---|
42 | nodepath.insert( &(*(candidate->node)) ); |
---|
43 | candidate = candidate->parent; |
---|
44 | } |
---|
45 | // compare nodepath of candidate with stored paths |
---|
46 | PS_Candidate2NodeSetPairByLengthMap::iterator stored_path; |
---|
47 | bool found = false; |
---|
48 | for ( stored_path = _paths.find( candidate->depth ); // find first path with path length == depth |
---|
49 | ((stored_path != _paths.end()) && // while not at end of paths |
---|
50 | (stored_path->first == candidate->depth) && // and path length == depth |
---|
51 | (!found)); // and not found yet |
---|
52 | ++stored_path ) { |
---|
53 | found = stored_path->second.second == nodepath; |
---|
54 | } |
---|
55 | if (!found) { |
---|
56 | // not found -> store nodepath |
---|
57 | candidate_sptr = *candidate_iter; |
---|
58 | candidate = &(*candidate_sptr); |
---|
59 | _paths.insert( pair<unsigned long,PS_Candidate2NodeSetPair>( candidate->depth,PS_Candidate2NodeSetPair( candidate,nodepath ) ) ); |
---|
60 | } |
---|
61 | } |
---|
62 | } |
---|
63 | |
---|
64 | |
---|
65 | typedef set<unsigned long> ULSet; |
---|
66 | typedef set<unsigned short> USSet; |
---|
67 | typedef map<float,float> FFMap; |
---|
68 | typedef set<float> FSet; |
---|
69 | |
---|
70 | inline unsigned long PS_calc_temp( const PS_ProbePtr &_probe ) { |
---|
71 | return ( 4*(_probe->length - _probe->GC_content) + 2*_probe->GC_content ); |
---|
72 | } |
---|
73 | |
---|
74 | void PS_calc_sums_for_nodepath( PS_NodeSet &_nodepath, ULSet &_sums ) { |
---|
75 | // iterate over nodes in path |
---|
76 | ULSet sums_for_node; |
---|
77 | ULSet temperatures; |
---|
78 | |
---|
79 | for ( PS_NodeSet::const_iterator node = _nodepath.begin(); |
---|
80 | node != _nodepath.end(); |
---|
81 | ++node ) { |
---|
82 | |
---|
83 | // iterate over probes of node |
---|
84 | sums_for_node.clear(); |
---|
85 | temperatures.clear(); |
---|
86 | |
---|
87 | for ( PS_ProbeSetCIter probe_iter = (*node)->getProbesBegin(); |
---|
88 | probe_iter != (*node)->getProbesEnd(); |
---|
89 | ++probe_iter ) { |
---|
90 | PS_ProbePtr probe = *probe_iter; |
---|
91 | |
---|
92 | // test if already calculated sums for temperature of probe |
---|
93 | unsigned long temperature = PS_calc_temp( probe ); |
---|
94 | ULSet::iterator found = temperatures.find( temperature ); |
---|
95 | if (found != temperatures.end()) continue; |
---|
96 | |
---|
97 | // calc sums |
---|
98 | temperatures.insert( temperature ); |
---|
99 | // printf( "node [%p] temp (%lu) sums ( ", *node, temperature ); |
---|
100 | |
---|
101 | for ( ULSet::iterator sum = _sums.begin(); |
---|
102 | sum != _sums.end(); |
---|
103 | ++sum ) { |
---|
104 | sums_for_node.insert( *sum + temperature ); |
---|
105 | } |
---|
106 | |
---|
107 | // for ( ULSet::iterator sum = sums_for_node.begin(); |
---|
108 | // sum != sums_for_node.end(); |
---|
109 | // ++sum ) { |
---|
110 | // printf( "%lu ", *sum ); |
---|
111 | // } |
---|
112 | // printf( ")\n"); |
---|
113 | } |
---|
114 | |
---|
115 | // store sums_for_node in _sums |
---|
116 | _sums = sums_for_node; |
---|
117 | } |
---|
118 | } |
---|
119 | bool PS_calc_min_sum_of_square_distances_to_average( PS_NodeSet &_nodepath, |
---|
120 | FFMap &_averages, |
---|
121 | float &_best_average, |
---|
122 | float &_min_sum ) { |
---|
123 | // iterate over nodes in path |
---|
124 | ULSet temperatures; |
---|
125 | FSet sums_for_average; |
---|
126 | float min_sum_for_node = -2.0; |
---|
127 | float best_average = -2.0; |
---|
128 | |
---|
129 | for ( PS_NodeSet::const_iterator node = _nodepath.begin(); |
---|
130 | (node != _nodepath.end()) && |
---|
131 | ((_min_sum < 0) || |
---|
132 | (_min_sum > min_sum_for_node)); |
---|
133 | ++node ) { |
---|
134 | |
---|
135 | // iterate over averages |
---|
136 | for ( FFMap::iterator average = _averages.begin(); |
---|
137 | average != _averages.end(); |
---|
138 | ++average ) { |
---|
139 | if ((_min_sum > 0) && (average->second > _min_sum)) continue; |
---|
140 | |
---|
141 | // iterate over probes of node |
---|
142 | // printf( "\nnode [%p] avg (%.3f,%.3f)", *node, average->first, average->second ); |
---|
143 | sums_for_average.clear(); |
---|
144 | temperatures.clear(); |
---|
145 | |
---|
146 | for ( PS_ProbeSetCIter probe_iter = (*node)->getProbesBegin(); |
---|
147 | probe_iter != (*node)->getProbesEnd(); |
---|
148 | ++probe_iter ) { |
---|
149 | PS_ProbePtr probe = *probe_iter; |
---|
150 | |
---|
151 | // test if already calculated sum for GC-content of probe |
---|
152 | unsigned long temperature = PS_calc_temp( probe ); |
---|
153 | ULSet::iterator found = temperatures.find( temperature ); |
---|
154 | if (found != temperatures.end()) continue; |
---|
155 | |
---|
156 | // calc sum |
---|
157 | temperatures.insert( temperature ); |
---|
158 | float distance = fabsf( average->first - temperature ); |
---|
159 | float sum = average->second + ( distance * distance ); |
---|
160 | sums_for_average.insert( sum ); |
---|
161 | // printf( " temp (%lu) sum (%.3f)", temperature, sum ); |
---|
162 | } |
---|
163 | |
---|
164 | // store min sum |
---|
165 | average->second = *sums_for_average.begin(); |
---|
166 | // printf( " -> %.3f", average->second ); |
---|
167 | } |
---|
168 | |
---|
169 | // update min sum for node |
---|
170 | min_sum_for_node = _averages.begin()->second; |
---|
171 | best_average = _averages.begin()->first; |
---|
172 | for ( FFMap::iterator average = _averages.begin(); |
---|
173 | average != _averages.end(); |
---|
174 | ++average ) { |
---|
175 | if (average->second < min_sum_for_node) { |
---|
176 | min_sum_for_node = average->second; |
---|
177 | best_average = average->first; |
---|
178 | } |
---|
179 | } |
---|
180 | // printf( " => min_sum_for_node (%.3f) @ average (%.3f)", min_sum_for_node, best_average ); |
---|
181 | } |
---|
182 | // printf( "\n" ); |
---|
183 | |
---|
184 | if ((_min_sum < 0) || (min_sum_for_node < _min_sum)) { |
---|
185 | printf( "UPDATED _best_average (%7.3f) _min_sum (%10.3f) <- best_average (%7.3f) min_sum_for_node (%10.3f)\n", _best_average, _min_sum, best_average, min_sum_for_node ); |
---|
186 | _min_sum = min_sum_for_node; |
---|
187 | _best_average = best_average; |
---|
188 | return true; |
---|
189 | } else { |
---|
190 | printf( "aborted _best_average (%7.3f) _min_sum (%10.3f) best_average (%7.3f) min_sum_for_node (%10.3f)\n", _best_average, _min_sum, best_average, min_sum_for_node ); |
---|
191 | return false; |
---|
192 | } |
---|
193 | } |
---|
194 | void PS_eval_node_paths( PS_Candidate2NodeSetPairByLengthMap &_paths, |
---|
195 | float &_min_sum_of_square_distances_to_average, |
---|
196 | float &_best_average ) { |
---|
197 | // |
---|
198 | // initially calc min square distance for one candidate |
---|
199 | // |
---|
200 | PS_Candidate2NodeSetPairByLengthMap::iterator stored_path = _paths.begin(); |
---|
201 | // calc sums of GC-contents of probes for nodes in candidate's path |
---|
202 | ULSet sums; |
---|
203 | sums.insert( 0 ); |
---|
204 | PS_calc_sums_for_nodepath( stored_path->second.second, sums ); |
---|
205 | // calc average GC-contents |
---|
206 | // printf( "averages : ( " ); |
---|
207 | FFMap averages; |
---|
208 | for ( ULSet::iterator sum = sums.begin(); |
---|
209 | sum != sums.end(); |
---|
210 | ++sum ) { |
---|
211 | float avg = (float)(*sum) / stored_path->first; |
---|
212 | averages[ avg ] = 0.0; |
---|
213 | // printf( "%7.3f, 0.000 ", avg ); |
---|
214 | } |
---|
215 | // printf( ")\n" ); |
---|
216 | // search minimum of sum of square distance to average |
---|
217 | _min_sum_of_square_distances_to_average = -1; |
---|
218 | printf( "[%p] distance: ", stored_path->second.first ); |
---|
219 | PS_calc_min_sum_of_square_distances_to_average( stored_path->second.second, |
---|
220 | averages, |
---|
221 | _best_average, |
---|
222 | _min_sum_of_square_distances_to_average ); |
---|
223 | // printf( "averages : ( " ); |
---|
224 | // for (FFMap::iterator avg = averages.begin(); |
---|
225 | // avg != averages.end(); |
---|
226 | // ++avg ) { |
---|
227 | // printf( "%7.3f,%7.3f ", avg->first, avg->second ); |
---|
228 | // } |
---|
229 | // printf( ")\n\n" ); |
---|
230 | |
---|
231 | // iterate over remaining candidates |
---|
232 | PS_Candidate2NodeSetPairByLengthMap::iterator best_candidate = stored_path; |
---|
233 | for ( ++stored_path; |
---|
234 | stored_path != _paths.end(); |
---|
235 | ++stored_path ) { |
---|
236 | |
---|
237 | // calc sums |
---|
238 | sums.clear(); |
---|
239 | sums.insert( 0 ); |
---|
240 | PS_calc_sums_for_nodepath( stored_path->second.second, sums ); |
---|
241 | |
---|
242 | // calc averages |
---|
243 | averages.clear(); |
---|
244 | // printf( "averages : ( " ); |
---|
245 | for ( ULSet::iterator sum = sums.begin(); |
---|
246 | sum != sums.end(); |
---|
247 | ++sum ) { |
---|
248 | float avg = (float)(*sum) / stored_path->first; |
---|
249 | averages[ avg ] = 0.0; |
---|
250 | // printf( "%7.3f, 0.000 ", avg ); |
---|
251 | } |
---|
252 | // printf( ")\n" ); |
---|
253 | |
---|
254 | // search min distance |
---|
255 | printf( "[%p] distance: ", stored_path->second.first ); |
---|
256 | if (PS_calc_min_sum_of_square_distances_to_average( stored_path->second.second, |
---|
257 | averages, |
---|
258 | _best_average, |
---|
259 | _min_sum_of_square_distances_to_average )) { |
---|
260 | best_candidate = stored_path; |
---|
261 | } |
---|
262 | // printf( "averages : ( " ); |
---|
263 | // for (FFMap::iterator avg = averages.begin(); |
---|
264 | // avg != averages.end(); |
---|
265 | // ++avg ) { |
---|
266 | // printf( "%7.3f,%7.3f ", avg->first, avg->second ); |
---|
267 | // } |
---|
268 | // printf( ")\n\n" ); |
---|
269 | } |
---|
270 | |
---|
271 | // remove all but best candidate |
---|
272 | PS_Candidate2NodeSetPairByLengthMap::iterator to_delete = _paths.end(); |
---|
273 | for ( stored_path = _paths.begin(); |
---|
274 | stored_path != _paths.end(); |
---|
275 | ++stored_path ) { |
---|
276 | if (to_delete != _paths.end()) { |
---|
277 | _paths.erase( to_delete ); |
---|
278 | to_delete = _paths.end(); |
---|
279 | } |
---|
280 | if (stored_path != best_candidate) to_delete = stored_path; |
---|
281 | } |
---|
282 | if (to_delete != _paths.end()) { |
---|
283 | _paths.erase( to_delete ); |
---|
284 | to_delete = _paths.end(); |
---|
285 | } |
---|
286 | } |
---|
287 | |
---|
288 | void PS_remove_bad_probes( PS_NodeSet &_nodes, |
---|
289 | float _average, |
---|
290 | set<unsigned int> &_probe_lengths ) { |
---|
291 | for ( PS_NodeSet::const_iterator node_iter = _nodes.begin(); |
---|
292 | node_iter != _nodes.end(); |
---|
293 | ++node_iter ) { |
---|
294 | PS_Node *node = *node_iter; |
---|
295 | |
---|
296 | // calc min distance |
---|
297 | float min_distance = _average * _average; |
---|
298 | for ( PS_ProbeSetCIter probe = node->getProbesBegin(); |
---|
299 | probe != node->getProbesEnd(); |
---|
300 | ++probe ) { |
---|
301 | float distance = fabsf( _average - PS_calc_temp( *probe ) ); |
---|
302 | if (distance < min_distance) min_distance = distance; |
---|
303 | } |
---|
304 | |
---|
305 | // remove probes with distance > min_distance |
---|
306 | // printf( "average (%.3f) min_distance (%.3f)\n", _average, min_distance ); |
---|
307 | PS_ProbeSetCIter to_delete = node->getProbesEnd(); |
---|
308 | for ( PS_ProbeSetCIter probe = node->getProbesBegin(); |
---|
309 | probe != node->getProbesEnd(); |
---|
310 | ++probe ) { |
---|
311 | if (to_delete != node->getProbesEnd()) { |
---|
312 | node->removeProbe( to_delete ); |
---|
313 | to_delete = node->getProbesEnd(); |
---|
314 | } |
---|
315 | float distance = fabsf( _average - PS_calc_temp( *probe ) ); |
---|
316 | if (distance > min_distance) { |
---|
317 | // printf( "node [%p] probes (%u) delete probe l(%u) gc(%u) temp(%lu) distance(%.3f)\n", |
---|
318 | // node, |
---|
319 | // node->countProbes(), |
---|
320 | // (*probe)->length, |
---|
321 | // (*probe)->GC_content, |
---|
322 | // PS_calc_temp( *probe ), |
---|
323 | // distance ); |
---|
324 | // fflush( stdout ); |
---|
325 | to_delete = probe; |
---|
326 | } |
---|
327 | } |
---|
328 | if (to_delete != node->getProbesEnd()) { |
---|
329 | node->removeProbe( to_delete ); |
---|
330 | to_delete = node->getProbesEnd(); |
---|
331 | } |
---|
332 | |
---|
333 | // select probe with greatest length |
---|
334 | if (node->countProbes() > 1) { |
---|
335 | |
---|
336 | // get greatest length |
---|
337 | unsigned max_length = 0; |
---|
338 | for ( PS_ProbeSetCIter probe = node->getProbesBegin(); |
---|
339 | probe != node->getProbesEnd(); |
---|
340 | ++probe ) { |
---|
341 | if (max_length < (*probe)->length) max_length = (*probe)->length; |
---|
342 | } |
---|
343 | |
---|
344 | // remove probes with length < max_length |
---|
345 | // printf( "max_length (%u)\n", max_length ); |
---|
346 | for ( PS_ProbeSetCIter probe = node->getProbesBegin(); |
---|
347 | probe != node->getProbesEnd(); |
---|
348 | ++probe ) { |
---|
349 | if (to_delete != node->getProbesEnd()) { |
---|
350 | node->removeProbe( to_delete ); |
---|
351 | to_delete = node->getProbesEnd(); |
---|
352 | } |
---|
353 | if ((*probe)->length < max_length) { |
---|
354 | // printf( "node [%p] probes (%u) delete probe l(%u)\n", |
---|
355 | // node, |
---|
356 | // node->countProbes(), |
---|
357 | // (*probe)->length ); |
---|
358 | // fflush( stdout ); |
---|
359 | to_delete = probe; |
---|
360 | } |
---|
361 | } |
---|
362 | if (to_delete != node->getProbesEnd()) { |
---|
363 | node->removeProbe( to_delete ); |
---|
364 | to_delete = node->getProbesEnd(); |
---|
365 | } |
---|
366 | } |
---|
367 | _probe_lengths.insert( (*node->getProbesBegin())->length ); |
---|
368 | } |
---|
369 | } |
---|
370 | |
---|
371 | // ==================================================== |
---|
372 | // ==================================================== |
---|
373 | int main( int argc, |
---|
374 | char *argv[] ) { |
---|
375 | // |
---|
376 | // check arguments |
---|
377 | // |
---|
378 | if (argc < 3) { |
---|
379 | printf( "Missing argument\n Usage %s <database name> <final candidates filename>\n", argv[0] ); |
---|
380 | exit( 1 ); |
---|
381 | } |
---|
382 | |
---|
383 | const char *input_DB_name = argv[1]; |
---|
384 | const char *final_candidates_filename = argv[2]; |
---|
385 | |
---|
386 | // |
---|
387 | // open probe-set-database |
---|
388 | // |
---|
389 | printf( "Opening probe-set-database '%s'..\n", input_DB_name ); |
---|
390 | PS_Database *db = new PS_Database( input_DB_name, PS_Database::READONLY ); |
---|
391 | db->load(); |
---|
392 | SpeciesID max_id = db->getMaxID(); |
---|
393 | unsigned long bits_in_map = ((max_id+1)*max_id)/2 + max_id+1; |
---|
394 | printf( "..loaded database (enter to continue)\n" ); fflush( stdout ); |
---|
395 | |
---|
396 | // |
---|
397 | // candidates |
---|
398 | // |
---|
399 | printf( "opening candidates-file '%s'..\n", final_candidates_filename ); |
---|
400 | PS_Candidate *candidates_root = new PS_Candidate( 0.0 ); |
---|
401 | PS_FileBuffer *candidates_file = new PS_FileBuffer( final_candidates_filename, PS_FileBuffer::READONLY ); |
---|
402 | candidates_root->load( candidates_file, bits_in_map, db->getConstRootNode() ); |
---|
403 | delete candidates_file; |
---|
404 | printf( "..loaded candidates file (enter to continue)\n" ); fflush( stdout ); |
---|
405 | |
---|
406 | // printf( "probes per candidate..\n" ); |
---|
407 | // candidates_root->printProbes( max_id - db->getMinID() + 1 ); |
---|
408 | |
---|
409 | // |
---|
410 | // scan candidates-tree for leaf candidates |
---|
411 | // |
---|
412 | printf( "\nsearching leaf candidates.. " ); |
---|
413 | PS_CandidateSet leaf_candidates; |
---|
414 | PS_get_leaf_candidates( candidates_root, leaf_candidates ); |
---|
415 | printf( "%zu\n", leaf_candidates.size() ); |
---|
416 | |
---|
417 | // |
---|
418 | // scan leaf-candidates for non-permutated node-paths |
---|
419 | // |
---|
420 | printf( "\ngetting node paths for leaf candidates.." ); |
---|
421 | PS_Candidate2NodeSetPairByLengthMap node_paths; |
---|
422 | PS_get_node_paths( leaf_candidates, node_paths ); |
---|
423 | printf( "%zu\n", node_paths.size() ); |
---|
424 | for ( PS_Candidate2NodeSetPairByLengthMap::const_iterator stored_path = node_paths.begin(); |
---|
425 | stored_path != node_paths.end(); |
---|
426 | ++stored_path ) { |
---|
427 | printf( "length (%3lu) candidate [%p] nodes (%3zu) ", |
---|
428 | stored_path->first, |
---|
429 | stored_path->second.first, |
---|
430 | stored_path->second.second.size() ); |
---|
431 | unsigned long sum = 0; |
---|
432 | for ( PS_NodeSet::const_iterator node = stored_path->second.second.begin(); |
---|
433 | node != stored_path->second.second.end(); |
---|
434 | ++node ) { |
---|
435 | sum += (*node)->countProbes(); |
---|
436 | } |
---|
437 | printf( "probes (%6lu)\n", sum ); fflush( stdout ); |
---|
438 | } |
---|
439 | |
---|
440 | // |
---|
441 | // eval node paths |
---|
442 | // |
---|
443 | printf( "\nevaluating node paths of leaf candidates..\n" ); |
---|
444 | float min_sum_of_square_distances_to_average; |
---|
445 | float average; |
---|
446 | PS_eval_node_paths( node_paths, min_sum_of_square_distances_to_average, average ); |
---|
447 | printf( " best candidate : average (%f) sum of square distances to average (%f)\n ", average, min_sum_of_square_distances_to_average ); |
---|
448 | node_paths.begin()->second.first->print(); |
---|
449 | |
---|
450 | // |
---|
451 | // remove probes with unwanted distance or length |
---|
452 | // |
---|
453 | printf( "\nremoving unwanted probes from best candidate..\n" ); |
---|
454 | set<unsigned int> probe_lengths; |
---|
455 | PS_remove_bad_probes( node_paths.begin()->second.second, average, probe_lengths ); |
---|
456 | |
---|
457 | // |
---|
458 | // write out paths to probes |
---|
459 | // |
---|
460 | char *final_candidates_paths_filename = (char *)malloc( strlen(final_candidates_filename)+1+6 ); |
---|
461 | strcpy( final_candidates_paths_filename, final_candidates_filename ); |
---|
462 | strcat( final_candidates_paths_filename, ".paths" ); |
---|
463 | printf( "Writing final candidate's paths to '%s'..\n", final_candidates_paths_filename ); |
---|
464 | PS_FileBuffer *final_candidates_paths_file = new PS_FileBuffer( final_candidates_paths_filename, PS_FileBuffer::WRITEONLY ); |
---|
465 | PS_CandidatePtr c = node_paths.begin()->second.first; |
---|
466 | unsigned long min_temp = PS_calc_temp( *c->node->getProbesBegin() ); |
---|
467 | unsigned long max_temp = min_temp; |
---|
468 | // write count of paths |
---|
469 | final_candidates_paths_file->put_ulong( c->depth ); |
---|
470 | // write used probe lengths (informal) |
---|
471 | final_candidates_paths_file->put_uint( probe_lengths.size() ); |
---|
472 | printf( " probe lengths :" ); |
---|
473 | for ( set<unsigned int>::iterator length = probe_lengths.begin(); |
---|
474 | length != probe_lengths.end(); |
---|
475 | ++length ) { |
---|
476 | final_candidates_paths_file->put_uint( *length ); |
---|
477 | printf( " %u", *length ); |
---|
478 | } |
---|
479 | printf( "\n" ); |
---|
480 | // write paths |
---|
481 | while (c && !c->node.Null()) { |
---|
482 | PS_ProbeSetCIter probe = c->node->getProbesBegin(); |
---|
483 | unsigned long temp = PS_calc_temp( *probe ); |
---|
484 | if (temp < min_temp) min_temp = temp; |
---|
485 | if (temp > max_temp) max_temp = temp; |
---|
486 | printf( "depth (%lu) candidate [%p] node [%p] probe (q_%+i__l_%2u__gc_%2u__temp_%3lu)\n", |
---|
487 | c->depth, |
---|
488 | c, |
---|
489 | &(*(c->node)), |
---|
490 | (*probe)->quality, |
---|
491 | (*probe)->length, |
---|
492 | (*probe)->GC_content, |
---|
493 | temp ); |
---|
494 | // write probe length |
---|
495 | final_candidates_paths_file->put_uint( (*probe)->length ); |
---|
496 | // write probe GC-content |
---|
497 | final_candidates_paths_file->put_uint( (*probe)->GC_content ); |
---|
498 | |
---|
499 | if ((*probe)->quality >= 0) { |
---|
500 | // write path length |
---|
501 | final_candidates_paths_file->put_uint( c->path.size() ); |
---|
502 | // write path |
---|
503 | for ( IDSetCIter id = c->path.begin(); |
---|
504 | id != c->path.end(); |
---|
505 | ++id ) { |
---|
506 | final_candidates_paths_file->put_int( *id ); |
---|
507 | } |
---|
508 | } else { |
---|
509 | // write inverse path length |
---|
510 | final_candidates_paths_file->put_uint( db->getSpeciesCount() - c->path.size() ); |
---|
511 | // write inverse path |
---|
512 | IDSetCIter path_it = c->path.begin(); |
---|
513 | SpeciesID path_id = *path_it; |
---|
514 | for ( SpeciesID id = db->getMinID(); |
---|
515 | id <= db->getMaxID(); |
---|
516 | ++id ) { |
---|
517 | if (id == path_id) { |
---|
518 | ++path_it; |
---|
519 | path_id = (path_it == c->path.end()) ? -1 : *path_it; |
---|
520 | continue; |
---|
521 | } |
---|
522 | final_candidates_paths_file->put_int( id ); |
---|
523 | } |
---|
524 | } |
---|
525 | // write dummy probe data |
---|
526 | for ( unsigned int i = 0; i < (*probe)->length; ++i ) { |
---|
527 | final_candidates_paths_file->put_char( '\x00' ); |
---|
528 | } |
---|
529 | c = c->parent; |
---|
530 | } |
---|
531 | // write probe lengths again |
---|
532 | // this set is used store remaining 'todo probe-lengths' |
---|
533 | final_candidates_paths_file->put_uint( probe_lengths.size() ); |
---|
534 | for ( set<unsigned int>::iterator length = probe_lengths.begin(); |
---|
535 | length != probe_lengths.end(); |
---|
536 | ++length ) { |
---|
537 | final_candidates_paths_file->put_uint( *length ); |
---|
538 | } |
---|
539 | |
---|
540 | printf( " temperature range %lu..%lu\n", min_temp, max_temp ); |
---|
541 | free( final_candidates_paths_filename ); |
---|
542 | delete final_candidates_paths_file; |
---|
543 | |
---|
544 | // |
---|
545 | // clean up |
---|
546 | // |
---|
547 | printf( "cleaning up... candidates\n" ); fflush( stdout ); |
---|
548 | delete candidates_root; |
---|
549 | printf( "cleaning up... database\n" ); fflush( stdout ); |
---|
550 | delete db; |
---|
551 | |
---|
552 | return 0; |
---|
553 | } |
---|