source: branches/port5/PROBE_SET/ps_find_probes.cxx

Last change on this file was 5435, checked in by westram, 17 years ago
  • cleanup useless comments
  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 53.2 KB
Line 
1#include <cmath>
2#include <cstdio>
3#include <cstdlib>
4#include <cstring>
5#include <iostream>
6
7#include <sys/times.h>
8
9#include "ps_tools.hxx"
10#include "ps_database.hxx"
11#include "ps_bitmap.hxx"
12#include "ps_candidate.hxx"
13
14using namespace std;
15
16template<class T>
17void PS_print_set_ranges( const char *_set_name, const set<T> &_set, const bool _cr_at_end = true ) {
18    fflush( stdout );
19    printf( "%s size (%3zu) : ", _set_name, _set.size() );
20    if (_set.size() == 0) {
21        printf( "(empty)" );
22    } else {
23        typename set<T>::const_iterator range_begin = _set.begin();
24        typename set<T>::const_iterator range_end   = range_begin;
25        typename set<T>::const_iterator it = _set.begin();
26        for ( ++it;
27              it != _set.end();
28              ++it ) {
29            if (*it == (*range_end)+1) {
30                range_end = it;
31            } else {
32                if (range_end == range_begin) {
33                    cout << *range_begin << " ";
34                } else {
35                    cout << *range_begin << "-" << *range_end << " ";
36                }
37                range_begin = it;
38                range_end   = it;
39            }
40        }
41        cout << *range_begin;
42        if (range_end != range_begin) cout << "-" << *range_end;
43    }
44    if (_cr_at_end) cout << "\n";
45    fflush( stdout );
46}
47
48
49template<class T1,class T2>
50void PS_print_map_ranges( const char *_map_name, const map<T1,T2> &_map, const bool _compare_keys = true, const bool _cr_at_end = true ) {
51    fflush( stdout );
52    if (_map.size() == 0) {
53        printf( "%s size (  0) : (empty)", _map_name );
54    } else {
55        if (_compare_keys) {
56            printf( "%s size (%3i) : ", _map_name, _map.size() );
57            typename map<T1,T2>::const_iterator range_begin = _map.begin();
58            typename map<T1,T2>::const_iterator range_end   = range_begin;
59            typename map<T1,T2>::const_iterator it = _map.begin();
60            for ( ++it;
61                  it != _map.end();
62                  ++it ) {
63                if (it->first == range_end->first+1) {
64                    range_end = it;
65                } else {
66                    if (range_end == range_begin) {
67                        cout << "(" << range_begin->first << "," << range_begin->second << ") ";
68                    } else {
69                        cout << "(" << range_begin->first << "-" << range_end->first << "," << range_begin->second << ") ";
70                    }
71                    range_begin = it;
72                    range_end   = it;
73                }
74            }
75            if (range_end == range_begin) {
76                cout << "(" << range_begin->first << "," << range_begin->second << ") ";
77            } else {
78                cout << "(" << range_begin->first << "-" << range_end->first << "," << range_begin->second << ") ";
79            }
80        } else {
81            map<T2,set<pair<T1,T1> > > value2indices;
82            map<T2,unsigned long int>  value2count;
83            pair<T1,T1> range;
84            range.first  = _map.begin()->first;
85            range.second = range.first;
86            T2 cur_value = _map.begin()->second;
87            typename map<T1,T2>::const_iterator it = _map.begin();
88            for ( ++it;
89                  it != _map.end();
90                  ++it ) {
91                if (it->second == cur_value) {
92                    if (it->first == range.second+1) {
93                        range.second = it->first;
94                    } else {
95                        value2indices[ cur_value ].insert( range );
96                        value2count[ cur_value ] += range.second - range.first +1;
97                        range.first  = it->first;
98                        range.second = it->first;
99                    }
100                } else {
101                    value2indices[ cur_value ].insert( range );
102                    value2count[ cur_value ] += range.second - range.first +1;
103                    range.first  = it->first;
104                    range.second = it->first;
105                    cur_value    = it->second;
106                }
107            }
108            value2indices[ cur_value ].insert( range );
109            value2count[ cur_value ] += range.second - range.first +1;
110            for ( typename map<T2,set<pair<T1,T1> > >::const_iterator value = value2indices.begin();
111                  value != value2indices.end();
112                  ++value ) {
113                if (value != value2indices.begin()) cout << "\n";
114                printf( "%s size (%3i) : value (", _map_name, _map.size() );
115                cout << value->first << ") count (" << value2count[ value->first ] << ") [";
116                for ( typename set<pair<T1,T1> >::const_iterator indices = value->second.begin();
117                      indices != value->second.end();
118                      ++indices ) {
119                    if (indices != value->second.begin()) cout << " ";
120                    cout << indices->first;
121                    if (indices->first != indices->second) cout << "-" << indices->second;
122                }
123                cout << "]," << value->first << ")";
124            }
125        }
126    }
127    if (_cr_at_end) cout << "\n";
128    fflush( stdout );
129}
130
131//
132// common globals
133//
134
135bool                     __VERBOSE = false;
136SpeciesID                __MAX_ID;
137SpeciesID                __MIN_ID;
138SpeciesID                __SPECIES_COUNT;
139unsigned long            __BITS_IN_MAP;
140PS_BitMap_Counted       *__MAP;
141PS_BitMap_Counted       *__PRESET_MAP;
142IDSet                    __SOURCE_ID_SET;
143PS_BitSet::IndexSet      __TARGET_ID_SET;
144
145//
146// globals for PS_calc_next_speciesid_sets,
147//             PS_calc_next_speciesid_sets_for_candidate
148//
149
150//  count of trues for a source_id may be max 5%
151//  (of the difference between __SPECIES_COUNT and the lowest
152//  count of trues per species) higher than the lowest count
153//  of trues for the species in __SOURCE_ID_SET
154#define __TRESHOLD_PERCENTAGE_NEXT_SOURCE_ID_SET  5
155//  target_id must be at least in 95% of total target_id_sets
156//  to be in __TARGET_ID_SET
157#define __TRESHOLD_PERCENTAGE_NEXT_TARGET_ID_SET 95
158
159SpeciesID               __MIN_SETS_ID;
160SpeciesID               __MAX_SETS_ID;
161
162//
163// globals for PS_find_probes
164//
165
166//  initially a probe must match 40-60% of species
167//  in the ID-sets to be a candidate
168//  if PS_find_probes fails to find candidates within this range
169//  it is repeated with a broadened range (by 5% each border)
170#define __MIN_PERCENTAGE_SET_MATCH 40
171#define __MAX_PERCENTAGE_SET_MATCH 60
172
173float                   __SOURCE_MIN_MATCH_COUNT;
174float                   __SOURCE_MAX_MATCH_COUNT;
175float                   __TARGET_MIN_MATCH_COUNT;
176float                   __TARGET_MAX_MATCH_COUNT;
177float                   __SOURCE_PERFECT_MATCH_COUNT;
178float                   __TARGET_PERFECT_MATCH_COUNT;
179unsigned long           __PROBES_COUNTER;
180unsigned long           __PROBES_REMOVED;
181PS_CandidatePtr         __CANDIDATES_ROOT;
182unsigned long           __CANDIDATES_COUNTER;
183unsigned long           __CANDIDATES_TODO;
184unsigned long           __CANDIDATES_FINISHED;
185unsigned long           __MAX_DEPTH;
186
187//
188// globals for PS_find_probe_for_sets
189//             PS_test_candidate_on_bitmap
190//
191
192IDSet __PATH;
193
194//
195// globals for PS_descend
196//
197char *__PATH_IN_CANDIDATES;
198
199
200unsigned long int PS_test_candidate_on_bitmap( float             *_filling_level = 0,
201                                               PS_BitMap_Counted *_map = 0 ) {
202    //  returns number of locations in __MAP that would be switched
203    //  from false to true by __PATH
204   
205    // iterate over all IDs except path
206    IDSetCIter        path_iter    = __PATH.begin();
207    SpeciesID         next_path_ID = *path_iter;
208    unsigned long int gain         = 0;
209    if (_map) {
210        for ( SpeciesID not_in_path_ID = __MIN_ID;
211              not_in_path_ID <= __MAX_ID;
212              ++not_in_path_ID) {
213            if (not_in_path_ID == next_path_ID) {   // if i run into a ID in path
214                ++path_iter;                        // advance to next path ID
215                next_path_ID = (path_iter == __PATH.end()) ? -1 : *path_iter;
216                continue;                           // skip this ID
217            }
218            // iterate over path IDs
219            for ( IDSetCIter path_ID = __PATH.begin();
220                  path_ID != __PATH.end();
221                  ++path_ID) {
222                if (not_in_path_ID == *path_ID)
223                    continue;   // obviously a probe cant differ a species from itself
224                if (not_in_path_ID > *path_ID) {
225                    gain += (_map->get( not_in_path_ID, *path_ID ) == false);
226                } else {
227                    gain += (_map->get( *path_ID, not_in_path_ID ) == false);
228                }
229            }
230        }
231    } else {
232        for ( SpeciesID not_in_path_ID = __MIN_ID;
233              not_in_path_ID <= __MAX_ID;
234              ++not_in_path_ID) {
235            if (not_in_path_ID == next_path_ID) {   // if i run into a ID in path
236                ++path_iter;                        // advance to next path ID
237                next_path_ID = (path_iter == __PATH.end()) ? -1 : *path_iter;
238                continue;                           // skip this ID
239            }
240            // iterate over path IDs
241            for ( IDSetCIter path_ID = __PATH.begin();
242                  path_ID != __PATH.end();
243                  ++path_ID) {
244                if (not_in_path_ID == *path_ID)
245                    continue;   // obviously a probe cant differ a species from itself
246                if (not_in_path_ID > *path_ID) {
247                    gain += (__MAP->get( not_in_path_ID, *path_ID ) == false);
248                } else {
249                    gain += (__MAP->get( *path_ID, not_in_path_ID ) == false);
250                }
251            }
252        }
253    }
254    if (_filling_level) {
255        unsigned long int trues = (_map) ? _map->getCountOfTrues() + gain :  __MAP->getCountOfTrues() + gain;
256        *_filling_level = ((float)trues / __BITS_IN_MAP)*100.0;
257    }
258    return gain;
259}
260
261
262bool PS_test_sets_on_path( float &_distance ) {
263    long count_matched_source = 0;
264    long count_matched_target = 0;
265    SpeciesID  path_id;
266    IDSetCIter path_end   = __PATH.end();
267    IDSetCIter path       = __PATH.begin();
268    IDSetCIter source_end = __SOURCE_ID_SET.end();
269    IDSetCIter source     = __SOURCE_ID_SET.begin();
270    PS_BitSet::IndexSet::const_iterator target_end = __TARGET_ID_SET.end();
271    PS_BitSet::IndexSet::const_iterator target     = __TARGET_ID_SET.begin();
272
273    while (path != path_end) {
274        path_id = *path;
275        while ((*target < path_id) && (target != target_end)) {
276            ++target;
277        }
278        if ((target != target_end) && (*target == path_id))
279            ++count_matched_target;
280        while ((*source < path_id) && (source != source_end)) {
281            ++source;
282        }
283        if ((source != source_end) && (*source == path_id))
284            ++count_matched_source;
285        ++path;
286    }
287
288    if ((count_matched_source > __SOURCE_MIN_MATCH_COUNT) &&
289        (count_matched_source < __SOURCE_MAX_MATCH_COUNT) &&
290        (count_matched_target > __TARGET_MIN_MATCH_COUNT) &&
291        (count_matched_target < __TARGET_MAX_MATCH_COUNT)) {
292        _distance = fabs( __SOURCE_PERFECT_MATCH_COUNT - count_matched_source ) + fabs( __TARGET_PERFECT_MATCH_COUNT - count_matched_target );
293        return true;
294    }
295    return false;
296}
297
298void PS_find_probe_for_sets( const PS_NodePtr _ps_node, PS_CandidatePtr _candidate_parent ) {
299    //  scan subtree starting with _ps_node for candidates
300
301    SpeciesID id         = _ps_node->getNum();
302    bool      has_probes = _ps_node->hasProbes();
303
304    //
305    // append ID to path
306    //
307    __PATH.insert( id );
308
309    //
310    // dont look at path until ID is greater than lowest ID in the sets of IDs
311    // also dont use a node if its already used as candidate
312    //
313    if ((id >= __MIN_SETS_ID) && has_probes && !_candidate_parent->alreadyUsedNode(_ps_node)) {
314        ++__PROBES_COUNTER;
315        if (__VERBOSE && (__PROBES_COUNTER % 100 == 0)) printf( "%8lup %8luc\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", __PROBES_COUNTER, __CANDIDATES_COUNTER ); fflush( stdout );
316        // make intersections of __PATH with __SOURCE_ID_SET and __TARGET_ID_SET
317        float distance_to_perfect_match;
318        if (PS_test_sets_on_path( distance_to_perfect_match )) {
319            unsigned long gain = PS_test_candidate_on_bitmap();   // no -> calc gain and make new candidate node
320            int status = (gain < (unsigned)__SPECIES_COUNT / 100)
321                ? 0
322                : _candidate_parent->addChild( static_cast<unsigned long>(distance_to_perfect_match), gain, _ps_node, __PATH );
323            if (status > 0) {
324                if (status == 2) {
325//                     printf( " PS_find_probe_for_sets() : new candidate: count_matched_source (%li/%.0f) count_matched_target (%li/%.0f) distance (%4.0f) path (%i) gain (%li) node (%p)\n",
326//                             count_matched_source, __SOURCE_PERFECT_MATCH_COUNT,
327//                             count_matched_target, __TARGET_PERFECT_MATCH_COUNT,
328//                             distance_to_perfect_match,
329//                             __PATH.size(), gain, &(*_ps_node) );
330                    ++__CANDIDATES_COUNTER;
331                    if (__VERBOSE) printf( "%8lup %8luc\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", __PROBES_COUNTER, __CANDIDATES_COUNTER ); fflush( stdout );
332//                } else {
333//                     printf( " PS_find_probe_for_sets() : upd candidate: count_matched_source (%li/%.0f) count_matched_target (%li/%.0f) distance (%4.0f) path (%i) gain (%li) node (%p)\n",
334//                             count_matched_source, __SOURCE_PERFECT_MATCH_COUNT,
335//                             count_matched_target, __TARGET_PERFECT_MATCH_COUNT,
336//                             distance_to_perfect_match,
337//                             __PATH.size(), gain, &(*_ps_node) );
338                }
339                fflush( stdout );
340            } // if status (of addChild)
341        }
342    }
343
344    //
345    // step down the children if either ID is lower than highest
346    // ID in the set of ID-pairs or the node has no probes
347    //
348    if ((id < __MAX_SETS_ID) || (! has_probes)) {
349        for ( PS_NodeMapConstIterator i = _ps_node->getChildrenBegin();
350              i != _ps_node->getChildrenEnd();
351              ++i) {
352            PS_find_probe_for_sets( i->second, _candidate_parent );
353        }
354    }
355
356    //
357    // remove ID from path
358    //
359    __PATH.erase( id );
360}
361
362void PS_find_probes( const PS_NodePtr _root_node, const int _round, PS_CandidatePtr _candidate_parent, const float _filling_level ) {
363    //  scan PS_Node-tree for candidates to raise filling level of __MAP
364   
365    __PROBES_COUNTER             = 0;
366    __CANDIDATES_COUNTER         = 0;
367    __SOURCE_MIN_MATCH_COUNT     = (__SOURCE_ID_SET.size() * (__MIN_PERCENTAGE_SET_MATCH - (_round * 5))) / 100.0;
368    __SOURCE_MAX_MATCH_COUNT     = (__SOURCE_ID_SET.size() * (__MAX_PERCENTAGE_SET_MATCH + (_round * 5))) / 100.0;
369    __TARGET_MIN_MATCH_COUNT     = (__TARGET_ID_SET.size() * (__MIN_PERCENTAGE_SET_MATCH - (_round * 5))) / 100.0;
370    __TARGET_MAX_MATCH_COUNT     = (__TARGET_ID_SET.size() * (__MAX_PERCENTAGE_SET_MATCH + (_round * 5))) / 100.0;
371    __SOURCE_PERFECT_MATCH_COUNT = __SOURCE_ID_SET.size() / 2.0;
372    __TARGET_PERFECT_MATCH_COUNT = __TARGET_ID_SET.size() / 2.0;
373    __MIN_SETS_ID = (*__SOURCE_ID_SET.begin()  < *__TARGET_ID_SET.begin())  ? *__SOURCE_ID_SET.begin()  : *__TARGET_ID_SET.begin();
374    __MAX_SETS_ID = (*__SOURCE_ID_SET.rbegin() < *__TARGET_ID_SET.rbegin()) ? *__SOURCE_ID_SET.rbegin() : *__TARGET_ID_SET.rbegin();
375    printf( "PS_find_probes(%i) : source match %10.3f .. %10.3f   target match %10.3f .. %10.3f\n", _round, __SOURCE_MIN_MATCH_COUNT, __SOURCE_MAX_MATCH_COUNT, __TARGET_MIN_MATCH_COUNT, __TARGET_MAX_MATCH_COUNT ); fflush( stdout );
376    // int c = 0;
377    struct tms before;
378    times( &before );
379    for ( PS_NodeMapConstIterator i = _root_node->getChildrenBegin();
380          (i != _root_node->getChildrenEnd()) && (i->first < __MAX_SETS_ID);
381          ++i/*,++c*/ ) {
382        //if (c % 100 == 0) printf( "PS_find_probe_for_sets( %i ) : %i/%i 1st level nodes -> %li/%li candidates of %li probes looked at\n", i->first, c+1, _root_node->countChildren(), __CANDIDATES_COUNTER, __CANDIDATES_TOTAL_COUNTER, __PROBES_TOTAL_COUNTER );
383        __PATH.clear();
384        PS_find_probe_for_sets( i->second, _candidate_parent );
385    }
386    printf( "PS_find_probes(%i) : %li candidates of %li probes visited  ", _round, __CANDIDATES_COUNTER, __PROBES_COUNTER );
387    PS_print_time_diff( &before );
388    // reduce candidates to best,worst and middle gain
389    _candidate_parent->reduceChildren( _filling_level );
390    for ( PS_CandidateByGainMapCIter c = _candidate_parent->children.begin();
391          c != _candidate_parent->children.end();
392          ++c ) {
393        printf( "PS_find_probes(%i) : ", _round );
394        c->second->print();
395        ++__CANDIDATES_TODO;
396    }
397    fflush( stdout );
398}
399
400void PS_calc_next_speciesid_sets() {
401    //  scan __MAP to find sets of IDs that need differentiation most
402   
403    //
404    // 1. __SOURCE_ID_SET
405    //    scan bitmap for species that need more matches
406    //
407    SpeciesID highest_count;
408    SpeciesID lowest_count;
409    float     treshold;
410    SpeciesID count;
411
412    // first pass -- get lowest count of trues and calc treshold
413    lowest_count  = __SPECIES_COUNT;
414    highest_count = 0;
415    for ( SpeciesID id = __MIN_ID;
416          id <= __MAX_ID;
417          ++id) {
418        count = __MAP->getCountFor( id );
419        if (count == __SPECIES_COUNT-__MIN_ID-1) continue; // i cannot improve species that can be differed from all others
420        if (count > highest_count) highest_count = count;
421        if (count < lowest_count)  lowest_count  = count;
422    }
423    treshold = ( ((__SPECIES_COUNT-lowest_count) * __TRESHOLD_PERCENTAGE_NEXT_SOURCE_ID_SET) / 100.0 ) + lowest_count;
424    printf( "PS_calc_next_speciesid_sets() : SOURCE count 1's [%i..%i]  treshold (%.3f)", lowest_count, highest_count, treshold );
425
426    // second pass -- get IDs where count is below or equal treshold
427    __SOURCE_ID_SET.clear();
428    for ( SpeciesID id = __MIN_ID;
429          id <= __MAX_ID;
430          ++id) {
431        if (__MAP->getCountFor( id ) <= treshold) __SOURCE_ID_SET.insert( id );
432    }
433    PS_print_set_ranges( " __SOURCE_ID_SET", __SOURCE_ID_SET );
434
435    //
436    // 2. __TARGET_ID_SET
437    //    scan bitmap for species IDs that need to be distinguished from MOST species in __SOURCE_ID_SET
438    //
439    ID2IDMap count_falses_per_id;
440
441    // first -- get the IDs that need differentiation from __SOURCE_ID_SET
442    unsigned long count_iterations = 0;
443    for ( IDSetCIter source_id = __SOURCE_ID_SET.begin();
444          source_id != __SOURCE_ID_SET.end();
445          ++source_id,++count_iterations ) {
446        //if (count_iterations % 100 == 0) printf( "PS_calc_next_speciesid_sets() : set #%li of %i\n", count_iterations+1, __SOURCE_ID_SET.size() );
447        // get 'next_target_set'
448        __MAP->getFalseIndicesFor( *source_id, __TARGET_ID_SET );
449        // count falses per SpeciesID
450        for ( PS_BitSet::IndexSet::iterator target_id = __TARGET_ID_SET.begin();
451              target_id != __TARGET_ID_SET.end();
452              ++target_id) {
453            if ((*target_id < __MIN_ID) || (*target_id > __MAX_ID)) continue; // skip ID's that are outside DB-IDs-range (like zero if __MIN_ID is one)
454            if (*target_id != *source_id) ++count_falses_per_id[ *target_id ];
455        }
456    }
457    //PS_print_map_ranges( "PS_calc_next_speciesid_sets() : count_falses_per_id", count_falses_per_id, false );
458
459    // second -- get highest count of falses and calc treshold
460    lowest_count  = __SPECIES_COUNT;
461    highest_count = 0;
462    for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin();
463          count_per_id != count_falses_per_id.end();
464          ++count_per_id ) {
465        if (count_per_id->second > highest_count) highest_count = count_per_id->second;
466        if (count_per_id->second < lowest_count)  lowest_count  = count_per_id->second;
467    }
468    printf( "PS_calc_next_speciesid_sets() : TARGET count 0's [%i..%i]", lowest_count, highest_count );
469
470    // third -- put all IDs in __TARGET_ID_SET that are needed most by species from __SOURCE_ID_SET
471    __TARGET_ID_SET.clear();
472    for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin();
473          count_per_id != count_falses_per_id.end();
474          ++count_per_id ) {
475        if (count_per_id->second == highest_count) __TARGET_ID_SET.insert( count_per_id->first );
476    }
477    PS_print_set_ranges( " __TARGET_ID_SET", __TARGET_ID_SET );
478}
479
480void PS_apply_path_to_bitmap( IDSet &_path, const bool _silent = false, PS_BitMap_Counted *_map = 0 ) {
481    //  set true in __MAP for all combinations of IDs (in _path , not in _path)
482   
483    // iterate over all IDs except path
484    IDSetCIter        path_iter    = _path.begin();
485    SpeciesID         next_path_ID = *path_iter;
486    unsigned long int gain         = 0;
487    if (_map) { // called for a 'private' map ?
488        for ( SpeciesID not_in_path_ID = __MIN_ID;
489              not_in_path_ID <= __MAX_ID;
490              ++not_in_path_ID) {
491            if (not_in_path_ID == next_path_ID) {   // if i run into a ID in path
492                ++path_iter;                        // advance to next path ID
493                next_path_ID = (path_iter == _path.end()) ? -1 : *path_iter;
494                continue;                           // skip this ID
495            }
496            // iterate over path IDs
497            for ( IDSetCIter path_ID = _path.begin();
498                  path_ID != _path.end();
499                  ++path_ID) {
500                if (not_in_path_ID == *path_ID) continue;   // obviously a probe cant differ a species from itself
501                if (not_in_path_ID > *path_ID) {
502                    gain += (_map->set( not_in_path_ID, *path_ID, true ) == false);
503                } else {
504                    gain += (_map->set( *path_ID, not_in_path_ID, true ) == false);
505                }
506            }
507        }
508    } else { // called for __MAP
509        for ( SpeciesID not_in_path_ID = __MIN_ID;
510              not_in_path_ID <= __MAX_ID;
511              ++not_in_path_ID) {
512            if (not_in_path_ID == next_path_ID) {   // if i run into a ID in path
513                ++path_iter;                        // advance to next path ID
514                next_path_ID = (path_iter == _path.end()) ? -1 : *path_iter;
515                continue;                           // skip this ID
516            }
517            // iterate over path IDs
518            for ( IDSetCIter path_ID = _path.begin();
519                  path_ID != _path.end();
520                  ++path_ID) {
521                if (not_in_path_ID == *path_ID) continue;   // obviously a probe cant differ a species from itself
522                if (not_in_path_ID > *path_ID) {
523                    gain += (__MAP->set( not_in_path_ID, *path_ID, true ) == false);
524                } else {
525                    gain += (__MAP->set( *path_ID, not_in_path_ID, true ) == false);
526                }
527            }
528        }
529    }
530    unsigned long int sets =  (__SPECIES_COUNT-_path.size())*_path.size();
531    if (!_silent) printf( "PS_apply_path_to_bitmap() : gain %lu of %lu -- %.2f%%  -> wasted %lu\n", gain, sets, ((float)gain/sets)*100.0, sets-gain );
532    //__MAP->recalcCounters();
533}
534
535float PS_filling_level( PS_CandidatePtr _candidate = 0 ) {
536    //  returns filling level of __MAP
537   
538    unsigned long trues      = __MAP->getCountOfTrues();
539    float         percentage = ((float)trues / __BITS_IN_MAP)*100.0;
540    if (_candidate) {
541        _candidate->filling_level = percentage;
542        _candidate->false_IDs     = __BITS_IN_MAP - trues;
543    } else {
544        printf( "PS_filling_level() : bitmap (%lu) now has %lu trues and %lu falses -- %.5f%% filled\n", __BITS_IN_MAP, trues, __BITS_IN_MAP-trues, percentage );
545    }
546    return percentage;
547}
548
549void PS_GNUPlot( const char *_out_prefix, const long _iteration, const IDSet &_path, const ID2IDSet &_noMatches ) {
550    //  generate data- and commandfiles for GNUPlot to display __MAP and its count_trues_per_id
551   
552    char *buffer = (char *)malloc( 4096 );
553    // open data file
554    sprintf( buffer, "%s.%06li", _out_prefix, _iteration );
555    char *data_name = strdup( buffer );
556    PS_FileBuffer *file = new PS_FileBuffer( data_name, PS_FileBuffer::WRITEONLY );
557    // output data
558    long size = sprintf( buffer, "# CANDIDATE #%li path (%zu)\n", _iteration, _path.size() );
559    file->put( buffer, size );
560    for ( IDSetCIter i = _path.begin();
561          i != _path.end();
562          ++i ) {
563        size = sprintf( buffer, "# %i\n", *i );
564        file->put( buffer, size );
565    }
566    size = sprintf( buffer, "#noMatches (%zu)\n", _noMatches.size() );
567    file->put( buffer, size );
568    for ( ID2IDSetCIter p = _noMatches.begin();
569          p != _noMatches.end();
570          ++p ) {
571        size = sprintf( buffer, "%i %i\n", p->first, p->second );
572        file->put( buffer, size );
573    }
574    size = sprintf( buffer, "\n\n" );
575    file->put( buffer, size );
576    sprintf( buffer, "Bitmap after %li. candidate", _iteration );
577    char *title = strdup( buffer );
578    __MAP->printGNUplot( title, buffer, file );
579    // open gnuplot command file
580    sprintf( buffer, "%s.%06li.gnuplot", _out_prefix, _iteration );
581    file->reinit( buffer, PS_FileBuffer::WRITEONLY );
582    // output gnuplot commands
583    size = sprintf( buffer, "set terminal png color\nset output '%06li.bitmap.png\n", _iteration );
584    file->put( buffer, size );
585    size = sprintf( buffer, "set yrange [%i:%i] reverse\nset xrange [%i:%i]\n", __MIN_ID,__MAX_ID,__MIN_ID,__MAX_ID );
586    file->put( buffer, size );
587    size = sprintf( buffer, "set size square\nset title '%li. iteration'\nset pointsize 0.5\n", _iteration );
588    file->put( buffer, size );
589    size = sprintf( buffer, "plot '%s' index 1 title 'match ()' with dots 2,'%s' index 0 title 'no match (30)' with points 1\n", data_name, data_name );
590    file->put( buffer, size );
591    size = sprintf( buffer, "set terminal png color\nset output '%06li.counters.png\n", _iteration );
592    file->put( buffer, size );
593    size = sprintf( buffer, "set yrange [] noreverse\nset size nosquare\n" );
594    file->put( buffer, size );
595    size = sprintf( buffer, "plot '%s' index 2 title 'count/species' with impulses 2,'%s' index 3 title 'species/count' with points 1", data_name, data_name );
596    file->put( buffer, size );
597
598    delete file;
599    free( title );
600    free( data_name );
601    free( buffer );
602}
603
604PS_CandidatePtr PS_ascend( PS_CandidatePtr _last_candidate ) {
605    //  'remove' _last_candidate from __MAP by applying its parent's
606    //  paths to a fresh __MAP
607   
608    //
609    // make fresh __MAP
610    //
611    __MAP->copy( __PRESET_MAP );
612    //
613    // apply paths of parent-candidates to __MAP
614    //
615    PS_CandidatePtr parent = _last_candidate->parent;
616    while ((parent != 0) && (parent->path.size() > 0)) {
617        PS_apply_path_to_bitmap( parent->path, true );
618        parent = parent->parent;
619    }
620    printf( "ASCEND " );
621    PS_filling_level();
622    //
623    // return the parent of the given candidate
624    //
625    return _last_candidate->parent;
626}
627
628
629void PS_descend( PS_CandidatePtr _candidate_parent, const PS_NodePtr _root_node, unsigned long _depth, const float _filling_level ) {
630    struct tms before;
631    times( &before );
632
633    printf( "\nDESCEND ==================== depth (%lu / %lu) candidates (%lu / %lu -> %lu left) ====================\n",
634            _depth,
635            __MAX_DEPTH,
636            __CANDIDATES_FINISHED,
637            __CANDIDATES_TODO,
638            __CANDIDATES_TODO-__CANDIDATES_FINISHED  ); fflush( stdout );
639    printf( "DESCEND PATH : (%s)\n\n", __PATH_IN_CANDIDATES );
640
641    //
642    // calc source- and target-set of IDs
643    //
644    PS_calc_next_speciesid_sets();
645    printf( "\nDESCEND ---------- calculated next speciesid-sets ----------\n\n" ); fflush( stdout );
646
647    //
648    // search for candidates to improve filling level of __MAP using the source- and target-set of IDs
649    // max. 3 rounds
650    //
651    int round = 0;
652    for (; (round < 3) && (_candidate_parent->children.size() == 0); ++round) {
653        PS_find_probes( _root_node, round, _candidate_parent, _filling_level );
654    }
655    printf( "\nDESCEND ---------- searched probe for speciesid-sets [rounds : %i] candidates (%lu / %lu -> %lu left) ----------\n\n",
656            round,
657            __CANDIDATES_FINISHED,
658            __CANDIDATES_TODO,
659            __CANDIDATES_TODO-__CANDIDATES_FINISHED ); fflush( stdout );
660
661    //
662    // descend down each (of the max 3) candidate(s)
663    //
664    char count = '0'+_candidate_parent->children.size();
665    for ( PS_CandidateByGainMapRIter next_candidate_it = _candidate_parent->children.rbegin();
666          next_candidate_it != _candidate_parent->children.rend();
667          ++next_candidate_it, --count ) {
668        PS_CandidatePtr next_candidate = &(*next_candidate_it->second);
669        ++__CANDIDATES_FINISHED;
670        //
671        // apply candidate to __MAP
672        //
673        printf( "[%p] ", next_candidate );
674        PS_apply_path_to_bitmap( next_candidate->path );
675        printf( "[%p] ", next_candidate );
676        PS_filling_level( next_candidate );
677        next_candidate->depth = _depth+1;
678        next_candidate->print( 0,false,false );
679        //
680        // step down
681        //
682        if ((next_candidate->filling_level < 75.0) && (_depth+1 < __MAX_DEPTH)) {
683            __PATH_IN_CANDIDATES[ _depth ] = count;
684            __PATH_IN_CANDIDATES[ _depth+1 ] = '\x00';
685            PS_descend( next_candidate, _root_node, _depth+1, next_candidate->filling_level );
686        }
687        //
688        // remove candidate from __MAP
689        //
690        PS_ascend( next_candidate );
691    }
692
693    printf( "\nDESCEND ~~~~~~~~~~~~~~~~~~~~ depth (%li) max depth (%li) candidates (%lu / %lu -> %lu left) ~~~~~~~~~~~~~~~~~~~~ ",
694            _depth,
695            __MAX_DEPTH,
696            __CANDIDATES_FINISHED,
697            __CANDIDATES_TODO,
698            __CANDIDATES_TODO-__CANDIDATES_FINISHED );
699    PS_print_time_diff( &before );
700    printf( "\n" ); fflush( stdout );
701}
702
703void PS_make_map_for_candidate( PS_CandidatePtr _candidate ) {
704    //  make __MAP for _candidate
705   
706    if (_candidate->map) return; // if candidate already has its map return
707    _candidate->map = new PS_BitMap_Counted( false, __MAX_ID+1 );
708    _candidate->map->copy( __PRESET_MAP );
709    PS_apply_path_to_bitmap( _candidate->path, true, _candidate->map ); // apply _candidate's path
710    PS_CandidatePtr parent = _candidate->parent;
711    while ((parent != 0) && (parent->path.size() > 0)) {        // apply parent's paths
712        PS_apply_path_to_bitmap( parent->path, true, _candidate->map );
713        parent = parent->parent;
714    }
715}
716
717// void PS_calc_next_speciesid_sets_for_candidate( PS_CandidatePtr _candidate ) {
718//  make __MAP for _candidate and search for best source/target-sets
719//     //
720//     // 1. __MAP for _candidate
721//     //
722//     PS_make_map_for_candidate( _candidate );
723//     //
724//     // 2. __SOURCE_ID_SET
725//     //    scan bitmap for species that need more matches
726//     //
727//     SpeciesID highest_count;
728//     SpeciesID lowest_count;
729//     SpeciesID count;
730
731//     // first pass -- get lowest/highest count of trues
732//     lowest_count  = __SPECIES_COUNT;
733//     highest_count = 0;
734//     for ( SpeciesID id = __MIN_ID;
735//           id <= __MAX_ID;
736//           ++id) {
737//         count = _candidate->map->getCountFor( id );
738//         if (count == __SPECIES_COUNT-__MIN_ID-1) continue; // i cannot improve species that can be differed from all others
739//         if (count < lowest_count)  lowest_count  = count;
740//         if (count > highest_count) highest_count = count;
741//     }
742//     //printf( "PS_calc_next_speciesid_sets_for_candidate() : SOURCE count 1's [%i..%i]\n", lowest_count, highest_count );
743
744//     // second pass -- get IDs where count is equal to lowest_count or highest_count
745//     IDSet highest_count_src_ids;
746//     IDSet lowest_count_src_ids;
747//     for ( SpeciesID id = __MIN_ID;
748//           id <= __MAX_ID;
749//           ++id) {
750//         count = _candidate->map->getCountFor( id );
751//         if (count == lowest_count) lowest_count_src_ids.insert( id );
752//         if (count == highest_count) highest_count_src_ids.insert( id );
753//     }
754//     if (_candidate->source_set) {
755//         _candidate->source_set->clear();
756//         _candidate->source_set->insert( lowest_count_src_ids.begin(), lowest_count_src_ids.end() );
757//     } else {
758//         _candidate->source_set = new IDSet( lowest_count_src_ids );
759//     }
760//     _candidate->source_set->insert( highest_count_src_ids.begin(), highest_count_src_ids.end() );
761//     //PS_print_set_ranges( "   lowest_count_src_ids", lowest_count_src_ids  );
762//     //PS_print_set_ranges( "  highest_count_src_ids", highest_count_src_ids  );
763//     if (*(_candidate->source_set->begin())  < __MIN_SETS_ID) __MIN_SETS_ID = *(_candidate->source_set->begin());
764//     if (*(_candidate->source_set->rbegin()) > __MAX_SETS_ID) __MAX_SETS_ID = *(_candidate->source_set->rbegin());
765
766//     //
767//     // 3. __TARGET_ID_SET
768//     //    scan bitmap for species IDs that need to be distinguished from MOST species in lowest_count_src_ids
769//     //
770//     ID2IDMap count_falses_per_id;
771
772//     // first -- get the IDs that need differentiation from __SOURCE_ID_SET
773//     for ( IDSetCIter source_id = lowest_count_src_ids.begin();
774//           source_id != lowest_count_src_ids.end();
775//           ++source_id ) {
776//         // get 'next_target_set'
777//         _candidate->map->getFalseIndicesFor( *source_id, __TARGET_ID_SET );
778//         // count falses per SpeciesID
779//         for ( PS_BitSet::IndexSet::iterator target_id = __TARGET_ID_SET.begin();
780//               target_id != __TARGET_ID_SET.end();
781//               ++target_id) {
782//             if ((*target_id < __MIN_ID) || (*target_id > __MAX_ID)) continue; // skip ID's that are outside DB-IDs-range (like zero if __MIN_ID is one)
783//             if (*target_id != *source_id) ++count_falses_per_id[ *target_id ];
784//         }
785//     }
786//     //printf( "\n" );
787//     //PS_print_map_ranges( "PS_calc_next_speciesid_sets_for_candidate() : count_falses_per_id", count_falses_per_id, false );
788
789//     // second -- get highest count of falses and calc treshold
790//     lowest_count  = __SPECIES_COUNT;
791//     highest_count = 0;
792//     for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin();
793//           count_per_id != count_falses_per_id.end();
794//           ++count_per_id ) {
795//         if (count_per_id->second > highest_count) highest_count = count_per_id->second;
796//         if (count_per_id->second < lowest_count)  lowest_count  = count_per_id->second;
797//     }
798//     //printf( "PS_calc_next_speciesid_sets_for_candidate() : TARGET count 0's [%i..%i]\n", lowest_count, highest_count );
799
800//     // third -- put all IDs in __TARGET_ID_SET that are needed most by species from lowest_count_src_ids
801//     if (_candidate->target_set) {
802//         _candidate->target_set->clear();
803//     } else {
804//         _candidate->target_set = new IDSet();
805//     }
806//     __TARGET_ID_SET.clear();
807//     for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin();
808//           count_per_id != count_falses_per_id.end();
809//           ++count_per_id ) {
810//         if (count_per_id->second == highest_count) __TARGET_ID_SET.insert( count_per_id->first );
811//     }
812//     //PS_print_set_ranges( " target_set (by lowest count of trues) ", __TARGET_ID_SET );
813//     _candidate->target_set->insert( __TARGET_ID_SET.begin(), __TARGET_ID_SET.end() );
814
815//     // fourth -- put all IDs in __TARGET_ID_SET that are needed by species from highest_count_src_ids
816//     PS_BitSet::IndexSet next_target_ids;
817//     __TARGET_ID_SET.clear();
818//     for ( IDSetCIter source_id = highest_count_src_ids.begin();
819//           source_id != highest_count_src_ids.end();
820//           ++source_id ) {
821//         // get next target-IDs
822//         _candidate->map->getFalseIndicesFor( *source_id, next_target_ids );
823//         // 'append' target-IDs
824//         for ( PS_BitSet::IndexSet::iterator target_id = next_target_ids.begin();
825//               target_id != next_target_ids.end();
826//               ++target_id) {
827//             if ((*target_id < __MIN_ID) || (*target_id > __MAX_ID)) continue; // skip ID's that are outside DB-IDs-range (like zero if __MIN_ID is one)
828//             if (*target_id != *source_id) __TARGET_ID_SET.insert( *target_id );
829//         }
830//     }
831//     //PS_print_set_ranges( " target_set (by highest count of trues)", __TARGET_ID_SET );
832//     _candidate->target_set->insert( __TARGET_ID_SET.begin(), __TARGET_ID_SET.end() );
833
834//     if (*(__TARGET_ID_SET.begin())  < __MIN_SETS_ID) __MIN_SETS_ID = *(__TARGET_ID_SET.begin());
835//     if (*(__TARGET_ID_SET.rbegin()) > __MAX_SETS_ID) __MAX_SETS_ID = *(__TARGET_ID_SET.rbegin());
836//     //printf( "\n" ); fflush( stdout );
837// }
838
839
840void PS_get_leaf_candidates( PS_CandidatePtr  _candidate_parent,
841                             PS_CandidateSet &_leaf_candidates,
842                             const bool       _ignore_passes_left = false ) {
843
844    for ( PS_CandidateByGainMapIter next_candidate = _candidate_parent->children.begin();
845          next_candidate != _candidate_parent->children.end();
846          ++next_candidate ) {
847
848        if ((next_candidate->second->children.size() == 0) &&
849            ((next_candidate->second->passes_left > 0) || _ignore_passes_left)){
850            _leaf_candidates.insert( next_candidate->second );
851        }
852        PS_get_leaf_candidates( &(*next_candidate->second), _leaf_candidates, _ignore_passes_left );
853    }
854}
855
856
857// bool PS_test_candidate_sets_on_path( PS_CandidatePtr _candidate ) {
858//     int round = PS_Candidate::MAX_PASSES - _candidate->passes_left;
859//     __SOURCE_MIN_MATCH_COUNT = (_candidate->source_set->size() * (__MIN_PERCENTAGE_SET_MATCH - (round * 5))) / 100.0;
860//     __SOURCE_MAX_MATCH_COUNT = (_candidate->source_set->size() * (__MAX_PERCENTAGE_SET_MATCH + (round * 5))) / 100.0;
861//     __TARGET_MIN_MATCH_COUNT = (_candidate->target_set->size() * (__MIN_PERCENTAGE_SET_MATCH - (round * 5))) / 100.0;
862//     __TARGET_MAX_MATCH_COUNT = (_candidate->target_set->size() * (__MAX_PERCENTAGE_SET_MATCH + (round * 5))) / 100.0;
863//     long count_matched_source = 0;
864//     long count_matched_target = 0;
865//     SpeciesID  path_id;
866//     IDSetCIter path_end   = __PATH.end();
867//     IDSetCIter path       = __PATH.begin();
868//     IDSetCIter source_end = _candidate->source_set->end();
869//     IDSetCIter source     = _candidate->source_set->begin();
870//     IDSetCIter target_end = _candidate->target_set->end();
871//     IDSetCIter target     = _candidate->target_set->begin();
872
873//     while (path != path_end) {
874//         path_id = *path;
875//         while ((*target < path_id) && (target != target_end)) {
876//             ++target;
877//         }
878//         if ((target != target_end) && (*target == path_id))
879//             ++count_matched_target;
880//         while ((*source < path_id) && (source != source_end)) {
881//             ++source;
882//         }
883//         if ((source != source_end) && (*source == path_id))
884//             ++count_matched_source;
885//         ++path;
886//     }
887
888//     if ((count_matched_source > __SOURCE_MIN_MATCH_COUNT) &&
889//         (count_matched_source < __SOURCE_MAX_MATCH_COUNT) &&
890//         (count_matched_target > __TARGET_MIN_MATCH_COUNT) &&
891//         (count_matched_target < __TARGET_MAX_MATCH_COUNT)) {
892//         return true;
893//     }
894//     return false;
895// }
896
897void PS_get_next_candidates_descend( PS_NodePtr _ps_node, PS_CandidateSet &_leaf_candidates ) {
898    //  scan PS_node-tree for next candidates for each of _leaf_candidates
899   
900    SpeciesID id         = _ps_node->getNum();
901    bool      has_probes = _ps_node->hasProbes();
902
903    //
904    // append ID to path
905    //
906    __PATH.insert( id );
907
908    //
909    // dont look at path until ID is greater than lowest ID in the sets of IDs
910    // also dont use a node if its already used as candidate
911    //
912    if ((id >= __MIN_SETS_ID) && has_probes) {
913        unsigned long total_gain_of_node   = 0;
914        unsigned long total_tests_per_node = 0;
915        ++__PROBES_COUNTER;
916        // iterate over _leaf_candidates
917        for ( PS_CandidateSetIter candidate_iter = _leaf_candidates.begin();
918              candidate_iter != _leaf_candidates.end();
919              ++candidate_iter ) {
920            PS_CandidateSPtr candidate = *candidate_iter;
921
922            // next leaf-candidate if the probe was already used for current candidate
923            if (candidate->alreadyUsedNode( _ps_node )) continue;
924
925            ++__CANDIDATES_COUNTER; // possible candidates
926            if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",
927                                   __PROBES_COUNTER,
928                                   __PROBES_REMOVED,
929                                   __CANDIDATES_COUNTER,
930                                   __CANDIDATES_TODO,
931                                   __CANDIDATES_FINISHED ); fflush( stdout );
932
933            // next leaf-candidate if __PATH doesnt fulfill matching criteria
934            unsigned long matches = candidate->matchPathOnOneFalseIDs( __PATH );
935            if (matches < candidate->one_false_IDs_matches) continue;
936
937            ++__CANDIDATES_TODO; // good candidates
938            if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",
939                                   __PROBES_COUNTER,
940                                   __PROBES_REMOVED,
941                                   __CANDIDATES_COUNTER,
942                                   __CANDIDATES_TODO,
943                                   __CANDIDATES_FINISHED ); fflush( stdout );
944
945            // test node on candidate's bitmap
946            float filling_level;
947            ++total_tests_per_node;
948            unsigned long gain = PS_test_candidate_on_bitmap( &filling_level, candidate->map );
949            total_gain_of_node += gain;
950            if (candidate->updateBestChild( gain, matches, filling_level, _ps_node, __PATH )) {
951                ++__CANDIDATES_FINISHED; // used candiates
952                if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",
953                                       __PROBES_COUNTER,
954                                       __PROBES_REMOVED,
955                                       __CANDIDATES_COUNTER,
956                                       __CANDIDATES_TODO,
957                                       __CANDIDATES_FINISHED ); fflush( stdout );
958            }
959        }
960        if ((total_tests_per_node == _leaf_candidates.size()) &&
961            (total_gain_of_node == 0)) {
962            ++__PROBES_REMOVED;
963            if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b",
964                                   __PROBES_COUNTER,
965                                   __PROBES_REMOVED,
966                                   __CANDIDATES_COUNTER,
967                                   __CANDIDATES_TODO,
968                                   __CANDIDATES_FINISHED ); fflush( stdout );
969            _ps_node->removeProbes();
970        }
971    }
972
973    //
974    // step down the children if either ID is lower than highest
975    // ID in the set of ID-pairs or the node has no probes
976    //
977    if ((id < __MAX_SETS_ID) || (! has_probes)) {
978        for ( PS_NodeMapConstIterator i = _ps_node->getChildrenBegin();
979              i != _ps_node->getChildrenEnd();
980              ++i) {
981            PS_get_next_candidates_descend( i->second, _leaf_candidates );
982        }
983    }
984
985    //
986    // remove ID from path
987    //
988    __PATH.erase( id );
989}
990
991
992void PS_get_next_candidates( const PS_NodePtr  _root_node,
993                             PS_CandidateSet  &_leaf_candidates ) {
994    struct tms before;
995    times( &before );
996    // first calc source/target/false_IDs sets
997    printf( "PS_get_next_candidates() : initializing %zu candidates..\n", _leaf_candidates.size() ); fflush( stdout );
998    __PROBES_COUNTER      = 0;
999    __PROBES_REMOVED      = 0;
1000    __CANDIDATES_COUNTER  = 0;
1001    __CANDIDATES_TODO     = 0;
1002    __CANDIDATES_FINISHED = 0;
1003    __MIN_SETS_ID         = __MAX_ID;
1004    __MAX_SETS_ID         = __MIN_ID;
1005    for ( PS_CandidateSetIter iter = _leaf_candidates.begin();
1006          iter != _leaf_candidates.end();
1007          ++iter ) {
1008        PS_CandidateSPtr candidate = *iter;
1009        if (!candidate->map) {          // if candidate has no map yet
1010            candidate->getParentMap();  // try to get its parent's map
1011            if (candidate->map) {       // if parent had a map (if not a new map is created in PS_calc_next_speciesid_sets_for_candidate
1012                PS_apply_path_to_bitmap( candidate->path, true, candidate->map ); // apply candidate's path
1013            }
1014        }
1015        PS_make_map_for_candidate( &(*candidate) );
1016        candidate->initFalseIDs( __MIN_ID, __MAX_ID, __MIN_SETS_ID, __MAX_SETS_ID );
1017        //candidate->print();
1018    }
1019    // scan tree
1020    printf( "PS_get_next_candidates() : " ); fflush( stdout );
1021    for ( PS_NodeMapConstIterator node_iter = _root_node->getChildrenBegin();
1022          (node_iter != _root_node->getChildrenEnd()) && (node_iter->first < __MAX_SETS_ID);
1023          ++node_iter ) {
1024        __PATH.clear();
1025        PS_get_next_candidates_descend( node_iter->second, _leaf_candidates );
1026    }
1027    printf( "looked at probes (%lu) -> removed probes (%lu)   possible candidates (%lu) -> good candidates (%lu) -> used candidates (%lu)\nPS_get_next_candidates() : ",
1028            __PROBES_COUNTER,
1029            __PROBES_REMOVED,
1030            __CANDIDATES_COUNTER,
1031            __CANDIDATES_TODO,
1032            __CANDIDATES_FINISHED ); fflush( stdout );
1033    PS_print_time_diff( &before );
1034
1035    // 'descend' candidates
1036    for ( PS_CandidateSetIter iter = _leaf_candidates.begin();
1037          iter != _leaf_candidates.end();
1038          ++iter ) {
1039        PS_CandidateSPtr candidate = *iter;
1040        candidate->print();
1041        candidate->decreasePasses();
1042    }
1043
1044    _leaf_candidates.clear();
1045    PS_get_leaf_candidates( __CANDIDATES_ROOT, _leaf_candidates );
1046}
1047
1048//  ====================================================
1049//  ====================================================
1050int main( int   argc,
1051          char *argv[] ) {
1052    //
1053    // check arguments
1054    //
1055    if (argc < 5) {
1056        printf( "Missing argument\n Usage %s <database name> <result filename> <[-]candidates filename> <final candidates filename> [--verbose] [result filename prefix for output files]\n", argv[0] );
1057        exit( 1 );
1058    }
1059
1060    const char *input_DB_name             = argv[1];
1061    const char *result_in_filename        = argv[2];
1062    const char *candidates_filename       = argv[3];
1063    const char *final_candidates_filename = argv[4];
1064    const char *result_out_prefix         = (argc > 5) ? argv[5] : 0;
1065    if (argc > 5) {
1066        if (strcmp(result_out_prefix,"--verbose") == 0) {
1067            __VERBOSE = true;
1068            result_out_prefix  = (argc > 6) ? argv[6] : 0;
1069        }
1070    }
1071    //
1072    // open probe-set-database
1073    //
1074    printf( "Opening probe-set-database '%s'..\n", input_DB_name );
1075    PS_Database *db = new PS_Database( input_DB_name, PS_Database::READONLY );
1076    db->load();
1077    __MAX_ID = db->getMaxID();
1078    __MIN_ID = db->getMinID();
1079    __SPECIES_COUNT = db->getSpeciesCount();
1080    printf( "min ID (%i)  max ID (%i)  count of species (%i)\n", __MIN_ID, __MAX_ID, __SPECIES_COUNT );
1081    printf( "..loaded database (enter to continue)\n" ); fflush( stdout );
1082
1083    //
1084    // open result file for preset bitmap
1085    //
1086    printf( "Opening result file %s..\n", result_in_filename );
1087    PS_FileBuffer *result_file = new PS_FileBuffer( result_in_filename, PS_FileBuffer::READONLY );
1088    long size;
1089    ID2IDSet noMatches;
1090    SpeciesID id1,id2;
1091    printf( "reading no matches : " );
1092    result_file->get_long( size );
1093    printf( "(%li)\n", size ); fflush( stdout );
1094    for ( long i=0;
1095          i < size;
1096          ++i ) {
1097        result_file->get_int( id1 );
1098        result_file->get_int( id2 );
1099        printf( "  %i,%i", id1, id2 );
1100        if (id1 < id2) {
1101            noMatches.insert( ID2IDPair(id1,id2) );
1102        } else {
1103            noMatches.insert( ID2IDPair(id2,id1) );
1104        }
1105    }
1106    printf( "\nreading one matches : " );
1107    result_file->get_long( size );
1108    printf( "(%li)\n", size ); fflush( stdout );
1109    long          path_length;
1110    SpeciesID     path_id;
1111    IDSet         path;
1112    IDID2IDSetMap oneMatchesMap;
1113    for ( long i=0;
1114          i < size;
1115          ++i) {
1116        path.clear();
1117        result_file->get_int( id1 );
1118        result_file->get_int( id2 );
1119        result_file->get_long( path_length );
1120        printf( "%i,%i path(%li)", id1, id2, path_length );
1121        while (path_length-- > 0) {
1122            result_file->get_int( path_id );
1123            printf( " %i", path_id );
1124            path.insert( path_id );
1125        }
1126        printf( "\n" );
1127        oneMatchesMap[ ID2IDPair(id1,id2) ] = path;
1128    }
1129    printf( "loading preset bitmap..\n" ); fflush( stdout );
1130    __BITS_IN_MAP = ((__MAX_ID+1)*__MAX_ID)/2 + __MAX_ID+1;
1131    __MAP         = new PS_BitMap_Counted( result_file );
1132    __PRESET_MAP  = new PS_BitMap_Counted( false, __MAX_ID+1 );
1133    for (id1 = 0; id1 < __MIN_ID; ++id1 ) {
1134        for (id2 = 0; id2 <= __MAX_ID; ++id2 ) {
1135            if (id1 > id2) {
1136                __MAP->setTrue( id1,id2 );
1137            } else {
1138                __MAP->setTrue( id2,id1 );
1139            }
1140        }
1141    }
1142    for ( SpeciesID id = 0; id <= __MAX_ID; ++id ) {
1143        __MAP->setTrue( id,id );
1144    }
1145    __MAP->recalcCounters();
1146    __PRESET_MAP->copy( __MAP );
1147    printf( "..loaded result file (enter to continue)\n" );
1148    printf( "cleaning up... result file\n" ); fflush( stdout );
1149    delete result_file;
1150
1151    //
1152    // create or read candidates
1153    //
1154    __CANDIDATES_ROOT = new PS_Candidate( 0.0 );
1155    PS_FileBuffer *candidates_file;
1156    struct tms before;
1157    if (candidates_filename[0] != '-') {
1158        printf( "searching candidates..\n" );
1159        //
1160        // recursively build a tree of candidates
1161        //
1162        __MAX_DEPTH = 0;
1163        for ( long species_count = __SPECIES_COUNT; species_count > 0; species_count >>= 1 ) {
1164            ++__MAX_DEPTH;
1165        }
1166        __MAX_DEPTH = (__MAX_DEPTH * 3) >> 1;
1167        __PATH_IN_CANDIDATES = (char *)calloc( __MAX_DEPTH+1, sizeof(char) );
1168        __CANDIDATES_TODO = 0;
1169        __CANDIDATES_FINISHED = 0;
1170        times( &before );
1171        PS_descend( __CANDIDATES_ROOT, db->getConstRootNode(), 0, 0.0 );
1172        printf( "PS_descend : total " );
1173        PS_print_time_diff( &before );
1174
1175        //
1176        // save candidates
1177        //
1178        printf( "saving candidates to %s..\n", candidates_filename );
1179        candidates_file = new PS_FileBuffer( candidates_filename, PS_FileBuffer::WRITEONLY );
1180        __CANDIDATES_ROOT->false_IDs = __BITS_IN_MAP;
1181        __CANDIDATES_ROOT->save( candidates_file, __BITS_IN_MAP );
1182    } else {
1183        printf( "loading candidates..\n" );
1184        candidates_file = new PS_FileBuffer( candidates_filename+1, PS_FileBuffer::READONLY );
1185        __CANDIDATES_ROOT->load( candidates_file, __BITS_IN_MAP, db->getConstRootNode() );
1186        printf( "..loaded candidates file (enter to continue)\n" );
1187    }
1188    printf( "cleaning up... candidates file\n" ); fflush( stdout );
1189    delete candidates_file;
1190
1191    //
1192    // scan candidates-tree for leaf candidates
1193    //
1194    printf( "CANDIDATES :\n" );
1195    __CANDIDATES_ROOT->print();
1196    printf( "\nsearching leaf candidates.. " );
1197    PS_CandidateSet leaf_candidates;
1198    PS_get_leaf_candidates( __CANDIDATES_ROOT, leaf_candidates );
1199    printf( "%zu\n", leaf_candidates.size() );
1200
1201    //
1202    // scan tree for next candidates (below the ones in leaf_candidates)
1203    //
1204    times( &before );
1205    long round = 0;
1206    while ((leaf_candidates.size() > 0) &&
1207           (round < 200)) {
1208        printf( "\nsearching next candidates [round #%li]..\n", ++round );
1209        PS_get_next_candidates( db->getConstRootNode(), leaf_candidates );
1210
1211//         printf( "\nCANDIDATES :\n" );
1212//         __CANDIDATES_ROOT->print();
1213        printf( "rounds %li total ", round );
1214        PS_print_time_diff( &before );
1215        // getchar();
1216    }
1217
1218    printf( "\nFINAL CANDIDATES:\n" );
1219    __CANDIDATES_ROOT->print( 0,true );
1220    printf( "\nfinal leaf candidates.. (%zu)\n", leaf_candidates.size() );
1221    PS_get_leaf_candidates( __CANDIDATES_ROOT, leaf_candidates, true );
1222    for ( PS_CandidateSetIter c = leaf_candidates.begin();
1223          c != leaf_candidates.end();
1224          ++c ) {
1225        (*(*c)).print();
1226    }
1227
1228    //
1229    // save final candidates
1230    //
1231    printf( "saving final candidates to %s..\n", final_candidates_filename );
1232    candidates_file = new PS_FileBuffer( final_candidates_filename, PS_FileBuffer::WRITEONLY );
1233    __CANDIDATES_ROOT->save( candidates_file, __BITS_IN_MAP );
1234    printf( "cleaning up... candidates file\n" ); fflush( stdout );
1235    delete candidates_file;
1236
1237    //
1238    // starting with the best candidate print the __MAP for each
1239    // depth walking up to root of candidates-tree
1240    //
1241    if (result_out_prefix) {
1242        // put __MAP in the state 'after applying best_candidate and its parents'
1243        PS_CandidateSPtr best_candidate_smart = *leaf_candidates.begin();
1244        PS_CandidatePtr  best_candidate       = &(*best_candidate_smart);
1245        PS_ascend( best_candidate );
1246        PS_apply_path_to_bitmap( best_candidate->path );
1247        while (best_candidate) {
1248            PS_GNUPlot( result_out_prefix, __MAX_DEPTH--, best_candidate->path, noMatches );
1249            best_candidate = PS_ascend( best_candidate );
1250        }
1251    }
1252
1253    //
1254    // clean up
1255    //
1256    printf( "cleaning up... candidates\n" ); fflush( stdout );
1257    delete __CANDIDATES_ROOT;
1258    free( __PATH_IN_CANDIDATES );
1259    printf( "cleaning up... database\n" ); fflush( stdout );
1260    delete db;
1261    printf( "cleaning up... bitmaps\n" ); fflush( stdout );
1262    delete __PRESET_MAP;
1263    delete __MAP;
1264
1265    return 0;
1266}
Note: See TracBrowser for help on using the repository browser.