| 1 | #include <cmath> |
|---|
| 2 | #include <cstdio> |
|---|
| 3 | #include <cstdlib> |
|---|
| 4 | #include <cstring> |
|---|
| 5 | #include <iostream> |
|---|
| 6 | |
|---|
| 7 | #include <sys/times.h> |
|---|
| 8 | |
|---|
| 9 | #include "ps_tools.hxx" |
|---|
| 10 | #include "ps_database.hxx" |
|---|
| 11 | #include "ps_bitmap.hxx" |
|---|
| 12 | #include "ps_candidate.hxx" |
|---|
| 13 | |
|---|
| 14 | using namespace std; |
|---|
| 15 | |
|---|
| 16 | template<class T> |
|---|
| 17 | void PS_print_set_ranges( const char *_set_name, const set<T> &_set, const bool _cr_at_end = true ) { |
|---|
| 18 | fflush( stdout ); |
|---|
| 19 | printf( "%s size (%3zu) : ", _set_name, _set.size() ); |
|---|
| 20 | if (_set.size() == 0) { |
|---|
| 21 | printf( "(empty)" ); |
|---|
| 22 | } else { |
|---|
| 23 | typename set<T>::const_iterator range_begin = _set.begin(); |
|---|
| 24 | typename set<T>::const_iterator range_end = range_begin; |
|---|
| 25 | typename set<T>::const_iterator it = _set.begin(); |
|---|
| 26 | for ( ++it; |
|---|
| 27 | it != _set.end(); |
|---|
| 28 | ++it ) { |
|---|
| 29 | if (*it == (*range_end)+1) { |
|---|
| 30 | range_end = it; |
|---|
| 31 | } else { |
|---|
| 32 | if (range_end == range_begin) { |
|---|
| 33 | cout << *range_begin << " "; |
|---|
| 34 | } else { |
|---|
| 35 | cout << *range_begin << "-" << *range_end << " "; |
|---|
| 36 | } |
|---|
| 37 | range_begin = it; |
|---|
| 38 | range_end = it; |
|---|
| 39 | } |
|---|
| 40 | } |
|---|
| 41 | cout << *range_begin; |
|---|
| 42 | if (range_end != range_begin) cout << "-" << *range_end; |
|---|
| 43 | } |
|---|
| 44 | if (_cr_at_end) cout << "\n"; |
|---|
| 45 | fflush( stdout ); |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | |
|---|
| 49 | template<class T1,class T2> |
|---|
| 50 | void PS_print_map_ranges( const char *_map_name, const map<T1,T2> &_map, const bool _compare_keys = true, const bool _cr_at_end = true ) { |
|---|
| 51 | fflush( stdout ); |
|---|
| 52 | if (_map.size() == 0) { |
|---|
| 53 | printf( "%s size ( 0) : (empty)", _map_name ); |
|---|
| 54 | } else { |
|---|
| 55 | if (_compare_keys) { |
|---|
| 56 | printf( "%s size (%3i) : ", _map_name, _map.size() ); |
|---|
| 57 | typename map<T1,T2>::const_iterator range_begin = _map.begin(); |
|---|
| 58 | typename map<T1,T2>::const_iterator range_end = range_begin; |
|---|
| 59 | typename map<T1,T2>::const_iterator it = _map.begin(); |
|---|
| 60 | for ( ++it; |
|---|
| 61 | it != _map.end(); |
|---|
| 62 | ++it ) { |
|---|
| 63 | if (it->first == range_end->first+1) { |
|---|
| 64 | range_end = it; |
|---|
| 65 | } else { |
|---|
| 66 | if (range_end == range_begin) { |
|---|
| 67 | cout << "(" << range_begin->first << "," << range_begin->second << ") "; |
|---|
| 68 | } else { |
|---|
| 69 | cout << "(" << range_begin->first << "-" << range_end->first << "," << range_begin->second << ") "; |
|---|
| 70 | } |
|---|
| 71 | range_begin = it; |
|---|
| 72 | range_end = it; |
|---|
| 73 | } |
|---|
| 74 | } |
|---|
| 75 | if (range_end == range_begin) { |
|---|
| 76 | cout << "(" << range_begin->first << "," << range_begin->second << ") "; |
|---|
| 77 | } else { |
|---|
| 78 | cout << "(" << range_begin->first << "-" << range_end->first << "," << range_begin->second << ") "; |
|---|
| 79 | } |
|---|
| 80 | } else { |
|---|
| 81 | map<T2,set<pair<T1,T1> > > value2indices; |
|---|
| 82 | map<T2,unsigned long int> value2count; |
|---|
| 83 | pair<T1,T1> range; |
|---|
| 84 | range.first = _map.begin()->first; |
|---|
| 85 | range.second = range.first; |
|---|
| 86 | T2 cur_value = _map.begin()->second; |
|---|
| 87 | typename map<T1,T2>::const_iterator it = _map.begin(); |
|---|
| 88 | for ( ++it; |
|---|
| 89 | it != _map.end(); |
|---|
| 90 | ++it ) { |
|---|
| 91 | if (it->second == cur_value) { |
|---|
| 92 | if (it->first == range.second+1) { |
|---|
| 93 | range.second = it->first; |
|---|
| 94 | } else { |
|---|
| 95 | value2indices[ cur_value ].insert( range ); |
|---|
| 96 | value2count[ cur_value ] += range.second - range.first +1; |
|---|
| 97 | range.first = it->first; |
|---|
| 98 | range.second = it->first; |
|---|
| 99 | } |
|---|
| 100 | } else { |
|---|
| 101 | value2indices[ cur_value ].insert( range ); |
|---|
| 102 | value2count[ cur_value ] += range.second - range.first +1; |
|---|
| 103 | range.first = it->first; |
|---|
| 104 | range.second = it->first; |
|---|
| 105 | cur_value = it->second; |
|---|
| 106 | } |
|---|
| 107 | } |
|---|
| 108 | value2indices[ cur_value ].insert( range ); |
|---|
| 109 | value2count[ cur_value ] += range.second - range.first +1; |
|---|
| 110 | for ( typename map<T2,set<pair<T1,T1> > >::const_iterator value = value2indices.begin(); |
|---|
| 111 | value != value2indices.end(); |
|---|
| 112 | ++value ) { |
|---|
| 113 | if (value != value2indices.begin()) cout << "\n"; |
|---|
| 114 | printf( "%s size (%3i) : value (", _map_name, _map.size() ); |
|---|
| 115 | cout << value->first << ") count (" << value2count[ value->first ] << ") ["; |
|---|
| 116 | for ( typename set<pair<T1,T1> >::const_iterator indices = value->second.begin(); |
|---|
| 117 | indices != value->second.end(); |
|---|
| 118 | ++indices ) { |
|---|
| 119 | if (indices != value->second.begin()) cout << " "; |
|---|
| 120 | cout << indices->first; |
|---|
| 121 | if (indices->first != indices->second) cout << "-" << indices->second; |
|---|
| 122 | } |
|---|
| 123 | cout << "]," << value->first << ")"; |
|---|
| 124 | } |
|---|
| 125 | } |
|---|
| 126 | } |
|---|
| 127 | if (_cr_at_end) cout << "\n"; |
|---|
| 128 | fflush( stdout ); |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | // |
|---|
| 132 | // common globals |
|---|
| 133 | // |
|---|
| 134 | |
|---|
| 135 | bool __VERBOSE = false; |
|---|
| 136 | SpeciesID __MAX_ID; |
|---|
| 137 | SpeciesID __MIN_ID; |
|---|
| 138 | SpeciesID __SPECIES_COUNT; |
|---|
| 139 | unsigned long __BITS_IN_MAP; |
|---|
| 140 | PS_BitMap_Counted *__MAP; |
|---|
| 141 | PS_BitMap_Counted *__PRESET_MAP; |
|---|
| 142 | IDSet __SOURCE_ID_SET; |
|---|
| 143 | PS_BitSet::IndexSet __TARGET_ID_SET; |
|---|
| 144 | |
|---|
| 145 | // |
|---|
| 146 | // globals for PS_calc_next_speciesid_sets, |
|---|
| 147 | // PS_calc_next_speciesid_sets_for_candidate |
|---|
| 148 | // |
|---|
| 149 | |
|---|
| 150 | // count of trues for a source_id may be max 5% |
|---|
| 151 | // (of the difference between __SPECIES_COUNT and the lowest |
|---|
| 152 | // count of trues per species) higher than the lowest count |
|---|
| 153 | // of trues for the species in __SOURCE_ID_SET |
|---|
| 154 | #define __TRESHOLD_PERCENTAGE_NEXT_SOURCE_ID_SET 5 |
|---|
| 155 | // target_id must be at least in 95% of total target_id_sets |
|---|
| 156 | // to be in __TARGET_ID_SET |
|---|
| 157 | #define __TRESHOLD_PERCENTAGE_NEXT_TARGET_ID_SET 95 |
|---|
| 158 | |
|---|
| 159 | SpeciesID __MIN_SETS_ID; |
|---|
| 160 | SpeciesID __MAX_SETS_ID; |
|---|
| 161 | |
|---|
| 162 | // |
|---|
| 163 | // globals for PS_find_probes |
|---|
| 164 | // |
|---|
| 165 | |
|---|
| 166 | // initially a probe must match 40-60% of species |
|---|
| 167 | // in the ID-sets to be a candidate |
|---|
| 168 | // if PS_find_probes fails to find candidates within this range |
|---|
| 169 | // it is repeated with a broadened range (by 5% each border) |
|---|
| 170 | #define __MIN_PERCENTAGE_SET_MATCH 40 |
|---|
| 171 | #define __MAX_PERCENTAGE_SET_MATCH 60 |
|---|
| 172 | |
|---|
| 173 | float __SOURCE_MIN_MATCH_COUNT; |
|---|
| 174 | float __SOURCE_MAX_MATCH_COUNT; |
|---|
| 175 | float __TARGET_MIN_MATCH_COUNT; |
|---|
| 176 | float __TARGET_MAX_MATCH_COUNT; |
|---|
| 177 | float __SOURCE_PERFECT_MATCH_COUNT; |
|---|
| 178 | float __TARGET_PERFECT_MATCH_COUNT; |
|---|
| 179 | unsigned long __PROBES_COUNTER; |
|---|
| 180 | unsigned long __PROBES_REMOVED; |
|---|
| 181 | PS_CandidatePtr __CANDIDATES_ROOT; |
|---|
| 182 | unsigned long __CANDIDATES_COUNTER; |
|---|
| 183 | unsigned long __CANDIDATES_TODO; |
|---|
| 184 | unsigned long __CANDIDATES_FINISHED; |
|---|
| 185 | unsigned long __MAX_DEPTH; |
|---|
| 186 | |
|---|
| 187 | // |
|---|
| 188 | // globals for PS_find_probe_for_sets |
|---|
| 189 | // PS_test_candidate_on_bitmap |
|---|
| 190 | // |
|---|
| 191 | |
|---|
| 192 | IDSet __PATH; |
|---|
| 193 | |
|---|
| 194 | // |
|---|
| 195 | // globals for PS_descend |
|---|
| 196 | // |
|---|
| 197 | char *__PATH_IN_CANDIDATES; |
|---|
| 198 | |
|---|
| 199 | |
|---|
| 200 | unsigned long int PS_test_candidate_on_bitmap( float *_filling_level = 0, |
|---|
| 201 | PS_BitMap_Counted *_map = 0 ) { |
|---|
| 202 | // returns number of locations in __MAP that would be switched |
|---|
| 203 | // from false to true by __PATH |
|---|
| 204 | |
|---|
| 205 | // iterate over all IDs except path |
|---|
| 206 | IDSetCIter path_iter = __PATH.begin(); |
|---|
| 207 | SpeciesID next_path_ID = *path_iter; |
|---|
| 208 | unsigned long int gain = 0; |
|---|
| 209 | if (_map) { |
|---|
| 210 | for ( SpeciesID not_in_path_ID = __MIN_ID; |
|---|
| 211 | not_in_path_ID <= __MAX_ID; |
|---|
| 212 | ++not_in_path_ID) { |
|---|
| 213 | if (not_in_path_ID == next_path_ID) { // if i run into a ID in path |
|---|
| 214 | ++path_iter; // advance to next path ID |
|---|
| 215 | next_path_ID = (path_iter == __PATH.end()) ? -1 : *path_iter; |
|---|
| 216 | continue; // skip this ID |
|---|
| 217 | } |
|---|
| 218 | // iterate over path IDs |
|---|
| 219 | for ( IDSetCIter path_ID = __PATH.begin(); |
|---|
| 220 | path_ID != __PATH.end(); |
|---|
| 221 | ++path_ID) { |
|---|
| 222 | if (not_in_path_ID == *path_ID) |
|---|
| 223 | continue; // obviously a probe cant differ a species from itself |
|---|
| 224 | if (not_in_path_ID > *path_ID) { |
|---|
| 225 | gain += (_map->get( not_in_path_ID, *path_ID ) == false); |
|---|
| 226 | } else { |
|---|
| 227 | gain += (_map->get( *path_ID, not_in_path_ID ) == false); |
|---|
| 228 | } |
|---|
| 229 | } |
|---|
| 230 | } |
|---|
| 231 | } else { |
|---|
| 232 | for ( SpeciesID not_in_path_ID = __MIN_ID; |
|---|
| 233 | not_in_path_ID <= __MAX_ID; |
|---|
| 234 | ++not_in_path_ID) { |
|---|
| 235 | if (not_in_path_ID == next_path_ID) { // if i run into a ID in path |
|---|
| 236 | ++path_iter; // advance to next path ID |
|---|
| 237 | next_path_ID = (path_iter == __PATH.end()) ? -1 : *path_iter; |
|---|
| 238 | continue; // skip this ID |
|---|
| 239 | } |
|---|
| 240 | // iterate over path IDs |
|---|
| 241 | for ( IDSetCIter path_ID = __PATH.begin(); |
|---|
| 242 | path_ID != __PATH.end(); |
|---|
| 243 | ++path_ID) { |
|---|
| 244 | if (not_in_path_ID == *path_ID) |
|---|
| 245 | continue; // obviously a probe cant differ a species from itself |
|---|
| 246 | if (not_in_path_ID > *path_ID) { |
|---|
| 247 | gain += (__MAP->get( not_in_path_ID, *path_ID ) == false); |
|---|
| 248 | } else { |
|---|
| 249 | gain += (__MAP->get( *path_ID, not_in_path_ID ) == false); |
|---|
| 250 | } |
|---|
| 251 | } |
|---|
| 252 | } |
|---|
| 253 | } |
|---|
| 254 | if (_filling_level) { |
|---|
| 255 | unsigned long int trues = (_map) ? _map->getCountOfTrues() + gain : __MAP->getCountOfTrues() + gain; |
|---|
| 256 | *_filling_level = ((float)trues / __BITS_IN_MAP)*100.0; |
|---|
| 257 | } |
|---|
| 258 | return gain; |
|---|
| 259 | } |
|---|
| 260 | |
|---|
| 261 | |
|---|
| 262 | bool PS_test_sets_on_path( float &_distance ) { |
|---|
| 263 | long count_matched_source = 0; |
|---|
| 264 | long count_matched_target = 0; |
|---|
| 265 | SpeciesID path_id; |
|---|
| 266 | IDSetCIter path_end = __PATH.end(); |
|---|
| 267 | IDSetCIter path = __PATH.begin(); |
|---|
| 268 | IDSetCIter source_end = __SOURCE_ID_SET.end(); |
|---|
| 269 | IDSetCIter source = __SOURCE_ID_SET.begin(); |
|---|
| 270 | PS_BitSet::IndexSet::const_iterator target_end = __TARGET_ID_SET.end(); |
|---|
| 271 | PS_BitSet::IndexSet::const_iterator target = __TARGET_ID_SET.begin(); |
|---|
| 272 | |
|---|
| 273 | while (path != path_end) { |
|---|
| 274 | path_id = *path; |
|---|
| 275 | while ((*target < path_id) && (target != target_end)) { |
|---|
| 276 | ++target; |
|---|
| 277 | } |
|---|
| 278 | if ((target != target_end) && (*target == path_id)) |
|---|
| 279 | ++count_matched_target; |
|---|
| 280 | while ((*source < path_id) && (source != source_end)) { |
|---|
| 281 | ++source; |
|---|
| 282 | } |
|---|
| 283 | if ((source != source_end) && (*source == path_id)) |
|---|
| 284 | ++count_matched_source; |
|---|
| 285 | ++path; |
|---|
| 286 | } |
|---|
| 287 | |
|---|
| 288 | if ((count_matched_source > __SOURCE_MIN_MATCH_COUNT) && |
|---|
| 289 | (count_matched_source < __SOURCE_MAX_MATCH_COUNT) && |
|---|
| 290 | (count_matched_target > __TARGET_MIN_MATCH_COUNT) && |
|---|
| 291 | (count_matched_target < __TARGET_MAX_MATCH_COUNT)) { |
|---|
| 292 | _distance = fabs( __SOURCE_PERFECT_MATCH_COUNT - count_matched_source ) + fabs( __TARGET_PERFECT_MATCH_COUNT - count_matched_target ); |
|---|
| 293 | return true; |
|---|
| 294 | } |
|---|
| 295 | return false; |
|---|
| 296 | } |
|---|
| 297 | |
|---|
| 298 | void PS_find_probe_for_sets( const PS_NodePtr _ps_node, PS_CandidatePtr _candidate_parent ) { |
|---|
| 299 | // scan subtree starting with _ps_node for candidates |
|---|
| 300 | |
|---|
| 301 | SpeciesID id = _ps_node->getNum(); |
|---|
| 302 | bool has_probes = _ps_node->hasProbes(); |
|---|
| 303 | |
|---|
| 304 | // |
|---|
| 305 | // append ID to path |
|---|
| 306 | // |
|---|
| 307 | __PATH.insert( id ); |
|---|
| 308 | |
|---|
| 309 | // |
|---|
| 310 | // dont look at path until ID is greater than lowest ID in the sets of IDs |
|---|
| 311 | // also dont use a node if its already used as candidate |
|---|
| 312 | // |
|---|
| 313 | if ((id >= __MIN_SETS_ID) && has_probes && !_candidate_parent->alreadyUsedNode(_ps_node)) { |
|---|
| 314 | ++__PROBES_COUNTER; |
|---|
| 315 | if (__VERBOSE && (__PROBES_COUNTER % 100 == 0)) printf( "%8lup %8luc\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", __PROBES_COUNTER, __CANDIDATES_COUNTER ); fflush( stdout ); |
|---|
| 316 | // make intersections of __PATH with __SOURCE_ID_SET and __TARGET_ID_SET |
|---|
| 317 | float distance_to_perfect_match; |
|---|
| 318 | if (PS_test_sets_on_path( distance_to_perfect_match )) { |
|---|
| 319 | unsigned long gain = PS_test_candidate_on_bitmap(); // no -> calc gain and make new candidate node |
|---|
| 320 | int status = (gain < (unsigned)__SPECIES_COUNT / 100) |
|---|
| 321 | ? 0 |
|---|
| 322 | : _candidate_parent->addChild( static_cast<unsigned long>(distance_to_perfect_match), gain, _ps_node, __PATH ); |
|---|
| 323 | if (status > 0) { |
|---|
| 324 | if (status == 2) { |
|---|
| 325 | // printf( " PS_find_probe_for_sets() : new candidate: count_matched_source (%li/%.0f) count_matched_target (%li/%.0f) distance (%4.0f) path (%i) gain (%li) node (%p)\n", |
|---|
| 326 | // count_matched_source, __SOURCE_PERFECT_MATCH_COUNT, |
|---|
| 327 | // count_matched_target, __TARGET_PERFECT_MATCH_COUNT, |
|---|
| 328 | // distance_to_perfect_match, |
|---|
| 329 | // __PATH.size(), gain, &(*_ps_node) ); |
|---|
| 330 | ++__CANDIDATES_COUNTER; |
|---|
| 331 | if (__VERBOSE) printf( "%8lup %8luc\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", __PROBES_COUNTER, __CANDIDATES_COUNTER ); fflush( stdout ); |
|---|
| 332 | // } else { |
|---|
| 333 | // printf( " PS_find_probe_for_sets() : upd candidate: count_matched_source (%li/%.0f) count_matched_target (%li/%.0f) distance (%4.0f) path (%i) gain (%li) node (%p)\n", |
|---|
| 334 | // count_matched_source, __SOURCE_PERFECT_MATCH_COUNT, |
|---|
| 335 | // count_matched_target, __TARGET_PERFECT_MATCH_COUNT, |
|---|
| 336 | // distance_to_perfect_match, |
|---|
| 337 | // __PATH.size(), gain, &(*_ps_node) ); |
|---|
| 338 | } |
|---|
| 339 | fflush( stdout ); |
|---|
| 340 | } // if status (of addChild) |
|---|
| 341 | } |
|---|
| 342 | } |
|---|
| 343 | |
|---|
| 344 | // |
|---|
| 345 | // step down the children if either ID is lower than highest |
|---|
| 346 | // ID in the set of ID-pairs or the node has no probes |
|---|
| 347 | // |
|---|
| 348 | if ((id < __MAX_SETS_ID) || (! has_probes)) { |
|---|
| 349 | for ( PS_NodeMapConstIterator i = _ps_node->getChildrenBegin(); |
|---|
| 350 | i != _ps_node->getChildrenEnd(); |
|---|
| 351 | ++i) { |
|---|
| 352 | PS_find_probe_for_sets( i->second, _candidate_parent ); |
|---|
| 353 | } |
|---|
| 354 | } |
|---|
| 355 | |
|---|
| 356 | // |
|---|
| 357 | // remove ID from path |
|---|
| 358 | // |
|---|
| 359 | __PATH.erase( id ); |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | void PS_find_probes( const PS_NodePtr _root_node, const int _round, PS_CandidatePtr _candidate_parent, const float _filling_level ) { |
|---|
| 363 | // scan PS_Node-tree for candidates to raise filling level of __MAP |
|---|
| 364 | |
|---|
| 365 | __PROBES_COUNTER = 0; |
|---|
| 366 | __CANDIDATES_COUNTER = 0; |
|---|
| 367 | __SOURCE_MIN_MATCH_COUNT = (__SOURCE_ID_SET.size() * (__MIN_PERCENTAGE_SET_MATCH - (_round * 5))) / 100.0; |
|---|
| 368 | __SOURCE_MAX_MATCH_COUNT = (__SOURCE_ID_SET.size() * (__MAX_PERCENTAGE_SET_MATCH + (_round * 5))) / 100.0; |
|---|
| 369 | __TARGET_MIN_MATCH_COUNT = (__TARGET_ID_SET.size() * (__MIN_PERCENTAGE_SET_MATCH - (_round * 5))) / 100.0; |
|---|
| 370 | __TARGET_MAX_MATCH_COUNT = (__TARGET_ID_SET.size() * (__MAX_PERCENTAGE_SET_MATCH + (_round * 5))) / 100.0; |
|---|
| 371 | __SOURCE_PERFECT_MATCH_COUNT = __SOURCE_ID_SET.size() / 2.0; |
|---|
| 372 | __TARGET_PERFECT_MATCH_COUNT = __TARGET_ID_SET.size() / 2.0; |
|---|
| 373 | __MIN_SETS_ID = (*__SOURCE_ID_SET.begin() < *__TARGET_ID_SET.begin()) ? *__SOURCE_ID_SET.begin() : *__TARGET_ID_SET.begin(); |
|---|
| 374 | __MAX_SETS_ID = (*__SOURCE_ID_SET.rbegin() < *__TARGET_ID_SET.rbegin()) ? *__SOURCE_ID_SET.rbegin() : *__TARGET_ID_SET.rbegin(); |
|---|
| 375 | printf( "PS_find_probes(%i) : source match %10.3f .. %10.3f target match %10.3f .. %10.3f\n", _round, __SOURCE_MIN_MATCH_COUNT, __SOURCE_MAX_MATCH_COUNT, __TARGET_MIN_MATCH_COUNT, __TARGET_MAX_MATCH_COUNT ); fflush( stdout ); |
|---|
| 376 | // int c = 0; |
|---|
| 377 | struct tms before; |
|---|
| 378 | times( &before ); |
|---|
| 379 | for ( PS_NodeMapConstIterator i = _root_node->getChildrenBegin(); |
|---|
| 380 | (i != _root_node->getChildrenEnd()) && (i->first < __MAX_SETS_ID); |
|---|
| 381 | ++i/*,++c*/ ) { |
|---|
| 382 | //if (c % 100 == 0) printf( "PS_find_probe_for_sets( %i ) : %i/%i 1st level nodes -> %li/%li candidates of %li probes looked at\n", i->first, c+1, _root_node->countChildren(), __CANDIDATES_COUNTER, __CANDIDATES_TOTAL_COUNTER, __PROBES_TOTAL_COUNTER ); |
|---|
| 383 | __PATH.clear(); |
|---|
| 384 | PS_find_probe_for_sets( i->second, _candidate_parent ); |
|---|
| 385 | } |
|---|
| 386 | printf( "PS_find_probes(%i) : %li candidates of %li probes visited ", _round, __CANDIDATES_COUNTER, __PROBES_COUNTER ); |
|---|
| 387 | PS_print_time_diff( &before ); |
|---|
| 388 | // reduce candidates to best,worst and middle gain |
|---|
| 389 | _candidate_parent->reduceChildren( _filling_level ); |
|---|
| 390 | for ( PS_CandidateByGainMapCIter c = _candidate_parent->children.begin(); |
|---|
| 391 | c != _candidate_parent->children.end(); |
|---|
| 392 | ++c ) { |
|---|
| 393 | printf( "PS_find_probes(%i) : ", _round ); |
|---|
| 394 | c->second->print(); |
|---|
| 395 | ++__CANDIDATES_TODO; |
|---|
| 396 | } |
|---|
| 397 | fflush( stdout ); |
|---|
| 398 | } |
|---|
| 399 | |
|---|
| 400 | void PS_calc_next_speciesid_sets() { |
|---|
| 401 | // scan __MAP to find sets of IDs that need differentiation most |
|---|
| 402 | |
|---|
| 403 | // |
|---|
| 404 | // 1. __SOURCE_ID_SET |
|---|
| 405 | // scan bitmap for species that need more matches |
|---|
| 406 | // |
|---|
| 407 | SpeciesID highest_count; |
|---|
| 408 | SpeciesID lowest_count; |
|---|
| 409 | float treshold; |
|---|
| 410 | SpeciesID count; |
|---|
| 411 | |
|---|
| 412 | // first pass -- get lowest count of trues and calc treshold |
|---|
| 413 | lowest_count = __SPECIES_COUNT; |
|---|
| 414 | highest_count = 0; |
|---|
| 415 | for ( SpeciesID id = __MIN_ID; |
|---|
| 416 | id <= __MAX_ID; |
|---|
| 417 | ++id) { |
|---|
| 418 | count = __MAP->getCountFor( id ); |
|---|
| 419 | if (count == __SPECIES_COUNT-__MIN_ID-1) continue; // i cannot improve species that can be differed from all others |
|---|
| 420 | if (count > highest_count) highest_count = count; |
|---|
| 421 | if (count < lowest_count) lowest_count = count; |
|---|
| 422 | } |
|---|
| 423 | treshold = ( ((__SPECIES_COUNT-lowest_count) * __TRESHOLD_PERCENTAGE_NEXT_SOURCE_ID_SET) / 100.0 ) + lowest_count; |
|---|
| 424 | printf( "PS_calc_next_speciesid_sets() : SOURCE count 1's [%i..%i] treshold (%.3f)", lowest_count, highest_count, treshold ); |
|---|
| 425 | |
|---|
| 426 | // second pass -- get IDs where count is below or equal treshold |
|---|
| 427 | __SOURCE_ID_SET.clear(); |
|---|
| 428 | for ( SpeciesID id = __MIN_ID; |
|---|
| 429 | id <= __MAX_ID; |
|---|
| 430 | ++id) { |
|---|
| 431 | if (__MAP->getCountFor( id ) <= treshold) __SOURCE_ID_SET.insert( id ); |
|---|
| 432 | } |
|---|
| 433 | PS_print_set_ranges( " __SOURCE_ID_SET", __SOURCE_ID_SET ); |
|---|
| 434 | |
|---|
| 435 | // |
|---|
| 436 | // 2. __TARGET_ID_SET |
|---|
| 437 | // scan bitmap for species IDs that need to be distinguished from MOST species in __SOURCE_ID_SET |
|---|
| 438 | // |
|---|
| 439 | ID2IDMap count_falses_per_id; |
|---|
| 440 | |
|---|
| 441 | // first -- get the IDs that need differentiation from __SOURCE_ID_SET |
|---|
| 442 | unsigned long count_iterations = 0; |
|---|
| 443 | for ( IDSetCIter source_id = __SOURCE_ID_SET.begin(); |
|---|
| 444 | source_id != __SOURCE_ID_SET.end(); |
|---|
| 445 | ++source_id,++count_iterations ) { |
|---|
| 446 | //if (count_iterations % 100 == 0) printf( "PS_calc_next_speciesid_sets() : set #%li of %i\n", count_iterations+1, __SOURCE_ID_SET.size() ); |
|---|
| 447 | // get 'next_target_set' |
|---|
| 448 | __MAP->getFalseIndicesFor( *source_id, __TARGET_ID_SET ); |
|---|
| 449 | // count falses per SpeciesID |
|---|
| 450 | for ( PS_BitSet::IndexSet::iterator target_id = __TARGET_ID_SET.begin(); |
|---|
| 451 | target_id != __TARGET_ID_SET.end(); |
|---|
| 452 | ++target_id) { |
|---|
| 453 | if ((*target_id < __MIN_ID) || (*target_id > __MAX_ID)) continue; // skip ID's that are outside DB-IDs-range (like zero if __MIN_ID is one) |
|---|
| 454 | if (*target_id != *source_id) ++count_falses_per_id[ *target_id ]; |
|---|
| 455 | } |
|---|
| 456 | } |
|---|
| 457 | //PS_print_map_ranges( "PS_calc_next_speciesid_sets() : count_falses_per_id", count_falses_per_id, false ); |
|---|
| 458 | |
|---|
| 459 | // second -- get highest count of falses and calc treshold |
|---|
| 460 | lowest_count = __SPECIES_COUNT; |
|---|
| 461 | highest_count = 0; |
|---|
| 462 | for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin(); |
|---|
| 463 | count_per_id != count_falses_per_id.end(); |
|---|
| 464 | ++count_per_id ) { |
|---|
| 465 | if (count_per_id->second > highest_count) highest_count = count_per_id->second; |
|---|
| 466 | if (count_per_id->second < lowest_count) lowest_count = count_per_id->second; |
|---|
| 467 | } |
|---|
| 468 | printf( "PS_calc_next_speciesid_sets() : TARGET count 0's [%i..%i]", lowest_count, highest_count ); |
|---|
| 469 | |
|---|
| 470 | // third -- put all IDs in __TARGET_ID_SET that are needed most by species from __SOURCE_ID_SET |
|---|
| 471 | __TARGET_ID_SET.clear(); |
|---|
| 472 | for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin(); |
|---|
| 473 | count_per_id != count_falses_per_id.end(); |
|---|
| 474 | ++count_per_id ) { |
|---|
| 475 | if (count_per_id->second == highest_count) __TARGET_ID_SET.insert( count_per_id->first ); |
|---|
| 476 | } |
|---|
| 477 | PS_print_set_ranges( " __TARGET_ID_SET", __TARGET_ID_SET ); |
|---|
| 478 | } |
|---|
| 479 | |
|---|
| 480 | void PS_apply_path_to_bitmap( IDSet &_path, const bool _silent = false, PS_BitMap_Counted *_map = 0 ) { |
|---|
| 481 | // set true in __MAP for all combinations of IDs (in _path , not in _path) |
|---|
| 482 | |
|---|
| 483 | // iterate over all IDs except path |
|---|
| 484 | IDSetCIter path_iter = _path.begin(); |
|---|
| 485 | SpeciesID next_path_ID = *path_iter; |
|---|
| 486 | unsigned long int gain = 0; |
|---|
| 487 | if (_map) { // called for a 'private' map ? |
|---|
| 488 | for ( SpeciesID not_in_path_ID = __MIN_ID; |
|---|
| 489 | not_in_path_ID <= __MAX_ID; |
|---|
| 490 | ++not_in_path_ID) { |
|---|
| 491 | if (not_in_path_ID == next_path_ID) { // if i run into a ID in path |
|---|
| 492 | ++path_iter; // advance to next path ID |
|---|
| 493 | next_path_ID = (path_iter == _path.end()) ? -1 : *path_iter; |
|---|
| 494 | continue; // skip this ID |
|---|
| 495 | } |
|---|
| 496 | // iterate over path IDs |
|---|
| 497 | for ( IDSetCIter path_ID = _path.begin(); |
|---|
| 498 | path_ID != _path.end(); |
|---|
| 499 | ++path_ID) { |
|---|
| 500 | if (not_in_path_ID == *path_ID) continue; // obviously a probe cant differ a species from itself |
|---|
| 501 | if (not_in_path_ID > *path_ID) { |
|---|
| 502 | gain += (_map->set( not_in_path_ID, *path_ID, true ) == false); |
|---|
| 503 | } else { |
|---|
| 504 | gain += (_map->set( *path_ID, not_in_path_ID, true ) == false); |
|---|
| 505 | } |
|---|
| 506 | } |
|---|
| 507 | } |
|---|
| 508 | } else { // called for __MAP |
|---|
| 509 | for ( SpeciesID not_in_path_ID = __MIN_ID; |
|---|
| 510 | not_in_path_ID <= __MAX_ID; |
|---|
| 511 | ++not_in_path_ID) { |
|---|
| 512 | if (not_in_path_ID == next_path_ID) { // if i run into a ID in path |
|---|
| 513 | ++path_iter; // advance to next path ID |
|---|
| 514 | next_path_ID = (path_iter == _path.end()) ? -1 : *path_iter; |
|---|
| 515 | continue; // skip this ID |
|---|
| 516 | } |
|---|
| 517 | // iterate over path IDs |
|---|
| 518 | for ( IDSetCIter path_ID = _path.begin(); |
|---|
| 519 | path_ID != _path.end(); |
|---|
| 520 | ++path_ID) { |
|---|
| 521 | if (not_in_path_ID == *path_ID) continue; // obviously a probe cant differ a species from itself |
|---|
| 522 | if (not_in_path_ID > *path_ID) { |
|---|
| 523 | gain += (__MAP->set( not_in_path_ID, *path_ID, true ) == false); |
|---|
| 524 | } else { |
|---|
| 525 | gain += (__MAP->set( *path_ID, not_in_path_ID, true ) == false); |
|---|
| 526 | } |
|---|
| 527 | } |
|---|
| 528 | } |
|---|
| 529 | } |
|---|
| 530 | unsigned long int sets = (__SPECIES_COUNT-_path.size())*_path.size(); |
|---|
| 531 | if (!_silent) printf( "PS_apply_path_to_bitmap() : gain %lu of %lu -- %.2f%% -> wasted %lu\n", gain, sets, ((float)gain/sets)*100.0, sets-gain ); |
|---|
| 532 | //__MAP->recalcCounters(); |
|---|
| 533 | } |
|---|
| 534 | |
|---|
| 535 | float PS_filling_level( PS_CandidatePtr _candidate = 0 ) { |
|---|
| 536 | // returns filling level of __MAP |
|---|
| 537 | |
|---|
| 538 | unsigned long trues = __MAP->getCountOfTrues(); |
|---|
| 539 | float percentage = ((float)trues / __BITS_IN_MAP)*100.0; |
|---|
| 540 | if (_candidate) { |
|---|
| 541 | _candidate->filling_level = percentage; |
|---|
| 542 | _candidate->false_IDs = __BITS_IN_MAP - trues; |
|---|
| 543 | } else { |
|---|
| 544 | printf( "PS_filling_level() : bitmap (%lu) now has %lu trues and %lu falses -- %.5f%% filled\n", __BITS_IN_MAP, trues, __BITS_IN_MAP-trues, percentage ); |
|---|
| 545 | } |
|---|
| 546 | return percentage; |
|---|
| 547 | } |
|---|
| 548 | |
|---|
| 549 | void PS_GNUPlot( const char *_out_prefix, const long _iteration, const IDSet &_path, const ID2IDSet &_noMatches ) { |
|---|
| 550 | // generate data- and commandfiles for GNUPlot to display __MAP and its count_trues_per_id |
|---|
| 551 | |
|---|
| 552 | char *buffer = (char *)malloc( 4096 ); |
|---|
| 553 | // open data file |
|---|
| 554 | sprintf( buffer, "%s.%06li", _out_prefix, _iteration ); |
|---|
| 555 | char *data_name = strdup( buffer ); |
|---|
| 556 | PS_FileBuffer *file = new PS_FileBuffer( data_name, PS_FileBuffer::WRITEONLY ); |
|---|
| 557 | // output data |
|---|
| 558 | long size = sprintf( buffer, "# CANDIDATE #%li path (%zu)\n", _iteration, _path.size() ); |
|---|
| 559 | file->put( buffer, size ); |
|---|
| 560 | for ( IDSetCIter i = _path.begin(); |
|---|
| 561 | i != _path.end(); |
|---|
| 562 | ++i ) { |
|---|
| 563 | size = sprintf( buffer, "# %i\n", *i ); |
|---|
| 564 | file->put( buffer, size ); |
|---|
| 565 | } |
|---|
| 566 | size = sprintf( buffer, "#noMatches (%zu)\n", _noMatches.size() ); |
|---|
| 567 | file->put( buffer, size ); |
|---|
| 568 | for ( ID2IDSetCIter p = _noMatches.begin(); |
|---|
| 569 | p != _noMatches.end(); |
|---|
| 570 | ++p ) { |
|---|
| 571 | size = sprintf( buffer, "%i %i\n", p->first, p->second ); |
|---|
| 572 | file->put( buffer, size ); |
|---|
| 573 | } |
|---|
| 574 | size = sprintf( buffer, "\n\n" ); |
|---|
| 575 | file->put( buffer, size ); |
|---|
| 576 | sprintf( buffer, "Bitmap after %li. candidate", _iteration ); |
|---|
| 577 | char *title = strdup( buffer ); |
|---|
| 578 | __MAP->printGNUplot( title, buffer, file ); |
|---|
| 579 | // open gnuplot command file |
|---|
| 580 | sprintf( buffer, "%s.%06li.gnuplot", _out_prefix, _iteration ); |
|---|
| 581 | file->reinit( buffer, PS_FileBuffer::WRITEONLY ); |
|---|
| 582 | // output gnuplot commands |
|---|
| 583 | size = sprintf( buffer, "set terminal png color\nset output '%06li.bitmap.png\n", _iteration ); |
|---|
| 584 | file->put( buffer, size ); |
|---|
| 585 | size = sprintf( buffer, "set yrange [%i:%i] reverse\nset xrange [%i:%i]\n", __MIN_ID,__MAX_ID,__MIN_ID,__MAX_ID ); |
|---|
| 586 | file->put( buffer, size ); |
|---|
| 587 | size = sprintf( buffer, "set size square\nset title '%li. iteration'\nset pointsize 0.5\n", _iteration ); |
|---|
| 588 | file->put( buffer, size ); |
|---|
| 589 | size = sprintf( buffer, "plot '%s' index 1 title 'match ()' with dots 2,'%s' index 0 title 'no match (30)' with points 1\n", data_name, data_name ); |
|---|
| 590 | file->put( buffer, size ); |
|---|
| 591 | size = sprintf( buffer, "set terminal png color\nset output '%06li.counters.png\n", _iteration ); |
|---|
| 592 | file->put( buffer, size ); |
|---|
| 593 | size = sprintf( buffer, "set yrange [] noreverse\nset size nosquare\n" ); |
|---|
| 594 | file->put( buffer, size ); |
|---|
| 595 | size = sprintf( buffer, "plot '%s' index 2 title 'count/species' with impulses 2,'%s' index 3 title 'species/count' with points 1", data_name, data_name ); |
|---|
| 596 | file->put( buffer, size ); |
|---|
| 597 | |
|---|
| 598 | delete file; |
|---|
| 599 | free( title ); |
|---|
| 600 | free( data_name ); |
|---|
| 601 | free( buffer ); |
|---|
| 602 | } |
|---|
| 603 | |
|---|
| 604 | PS_CandidatePtr PS_ascend( PS_CandidatePtr _last_candidate ) { |
|---|
| 605 | // 'remove' _last_candidate from __MAP by applying its parent's |
|---|
| 606 | // paths to a fresh __MAP |
|---|
| 607 | |
|---|
| 608 | // |
|---|
| 609 | // make fresh __MAP |
|---|
| 610 | // |
|---|
| 611 | __MAP->copy( __PRESET_MAP ); |
|---|
| 612 | // |
|---|
| 613 | // apply paths of parent-candidates to __MAP |
|---|
| 614 | // |
|---|
| 615 | PS_CandidatePtr parent = _last_candidate->parent; |
|---|
| 616 | while ((parent != 0) && (parent->path.size() > 0)) { |
|---|
| 617 | PS_apply_path_to_bitmap( parent->path, true ); |
|---|
| 618 | parent = parent->parent; |
|---|
| 619 | } |
|---|
| 620 | printf( "ASCEND " ); |
|---|
| 621 | PS_filling_level(); |
|---|
| 622 | // |
|---|
| 623 | // return the parent of the given candidate |
|---|
| 624 | // |
|---|
| 625 | return _last_candidate->parent; |
|---|
| 626 | } |
|---|
| 627 | |
|---|
| 628 | |
|---|
| 629 | void PS_descend( PS_CandidatePtr _candidate_parent, const PS_NodePtr _root_node, unsigned long _depth, const float _filling_level ) { |
|---|
| 630 | struct tms before; |
|---|
| 631 | times( &before ); |
|---|
| 632 | |
|---|
| 633 | printf( "\nDESCEND ==================== depth (%lu / %lu) candidates (%lu / %lu -> %lu left) ====================\n", |
|---|
| 634 | _depth, |
|---|
| 635 | __MAX_DEPTH, |
|---|
| 636 | __CANDIDATES_FINISHED, |
|---|
| 637 | __CANDIDATES_TODO, |
|---|
| 638 | __CANDIDATES_TODO-__CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 639 | printf( "DESCEND PATH : (%s)\n\n", __PATH_IN_CANDIDATES ); |
|---|
| 640 | |
|---|
| 641 | // |
|---|
| 642 | // calc source- and target-set of IDs |
|---|
| 643 | // |
|---|
| 644 | PS_calc_next_speciesid_sets(); |
|---|
| 645 | printf( "\nDESCEND ---------- calculated next speciesid-sets ----------\n\n" ); fflush( stdout ); |
|---|
| 646 | |
|---|
| 647 | // |
|---|
| 648 | // search for candidates to improve filling level of __MAP using the source- and target-set of IDs |
|---|
| 649 | // max. 3 rounds |
|---|
| 650 | // |
|---|
| 651 | int round = 0; |
|---|
| 652 | for (; (round < 3) && (_candidate_parent->children.size() == 0); ++round) { |
|---|
| 653 | PS_find_probes( _root_node, round, _candidate_parent, _filling_level ); |
|---|
| 654 | } |
|---|
| 655 | printf( "\nDESCEND ---------- searched probe for speciesid-sets [rounds : %i] candidates (%lu / %lu -> %lu left) ----------\n\n", |
|---|
| 656 | round, |
|---|
| 657 | __CANDIDATES_FINISHED, |
|---|
| 658 | __CANDIDATES_TODO, |
|---|
| 659 | __CANDIDATES_TODO-__CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 660 | |
|---|
| 661 | // |
|---|
| 662 | // descend down each (of the max 3) candidate(s) |
|---|
| 663 | // |
|---|
| 664 | char count = '0'+_candidate_parent->children.size(); |
|---|
| 665 | for ( PS_CandidateByGainMapRIter next_candidate_it = _candidate_parent->children.rbegin(); |
|---|
| 666 | next_candidate_it != _candidate_parent->children.rend(); |
|---|
| 667 | ++next_candidate_it, --count ) { |
|---|
| 668 | PS_CandidatePtr next_candidate = &(*next_candidate_it->second); |
|---|
| 669 | ++__CANDIDATES_FINISHED; |
|---|
| 670 | // |
|---|
| 671 | // apply candidate to __MAP |
|---|
| 672 | // |
|---|
| 673 | printf( "[%p] ", next_candidate ); |
|---|
| 674 | PS_apply_path_to_bitmap( next_candidate->path ); |
|---|
| 675 | printf( "[%p] ", next_candidate ); |
|---|
| 676 | PS_filling_level( next_candidate ); |
|---|
| 677 | next_candidate->depth = _depth+1; |
|---|
| 678 | next_candidate->print( 0,false,false ); |
|---|
| 679 | // |
|---|
| 680 | // step down |
|---|
| 681 | // |
|---|
| 682 | if ((next_candidate->filling_level < 75.0) && (_depth+1 < __MAX_DEPTH)) { |
|---|
| 683 | __PATH_IN_CANDIDATES[ _depth ] = count; |
|---|
| 684 | __PATH_IN_CANDIDATES[ _depth+1 ] = '\x00'; |
|---|
| 685 | PS_descend( next_candidate, _root_node, _depth+1, next_candidate->filling_level ); |
|---|
| 686 | } |
|---|
| 687 | // |
|---|
| 688 | // remove candidate from __MAP |
|---|
| 689 | // |
|---|
| 690 | PS_ascend( next_candidate ); |
|---|
| 691 | } |
|---|
| 692 | |
|---|
| 693 | printf( "\nDESCEND ~~~~~~~~~~~~~~~~~~~~ depth (%li) max depth (%li) candidates (%lu / %lu -> %lu left) ~~~~~~~~~~~~~~~~~~~~ ", |
|---|
| 694 | _depth, |
|---|
| 695 | __MAX_DEPTH, |
|---|
| 696 | __CANDIDATES_FINISHED, |
|---|
| 697 | __CANDIDATES_TODO, |
|---|
| 698 | __CANDIDATES_TODO-__CANDIDATES_FINISHED ); |
|---|
| 699 | PS_print_time_diff( &before ); |
|---|
| 700 | printf( "\n" ); fflush( stdout ); |
|---|
| 701 | } |
|---|
| 702 | |
|---|
| 703 | void PS_make_map_for_candidate( PS_CandidatePtr _candidate ) { |
|---|
| 704 | // make __MAP for _candidate |
|---|
| 705 | |
|---|
| 706 | if (_candidate->map) return; // if candidate already has its map return |
|---|
| 707 | _candidate->map = new PS_BitMap_Counted( false, __MAX_ID+1 ); |
|---|
| 708 | _candidate->map->copy( __PRESET_MAP ); |
|---|
| 709 | PS_apply_path_to_bitmap( _candidate->path, true, _candidate->map ); // apply _candidate's path |
|---|
| 710 | PS_CandidatePtr parent = _candidate->parent; |
|---|
| 711 | while ((parent != 0) && (parent->path.size() > 0)) { // apply parent's paths |
|---|
| 712 | PS_apply_path_to_bitmap( parent->path, true, _candidate->map ); |
|---|
| 713 | parent = parent->parent; |
|---|
| 714 | } |
|---|
| 715 | } |
|---|
| 716 | |
|---|
| 717 | // void PS_calc_next_speciesid_sets_for_candidate( PS_CandidatePtr _candidate ) { |
|---|
| 718 | // make __MAP for _candidate and search for best source/target-sets |
|---|
| 719 | // // |
|---|
| 720 | // // 1. __MAP for _candidate |
|---|
| 721 | // // |
|---|
| 722 | // PS_make_map_for_candidate( _candidate ); |
|---|
| 723 | // // |
|---|
| 724 | // // 2. __SOURCE_ID_SET |
|---|
| 725 | // // scan bitmap for species that need more matches |
|---|
| 726 | // // |
|---|
| 727 | // SpeciesID highest_count; |
|---|
| 728 | // SpeciesID lowest_count; |
|---|
| 729 | // SpeciesID count; |
|---|
| 730 | |
|---|
| 731 | // // first pass -- get lowest/highest count of trues |
|---|
| 732 | // lowest_count = __SPECIES_COUNT; |
|---|
| 733 | // highest_count = 0; |
|---|
| 734 | // for ( SpeciesID id = __MIN_ID; |
|---|
| 735 | // id <= __MAX_ID; |
|---|
| 736 | // ++id) { |
|---|
| 737 | // count = _candidate->map->getCountFor( id ); |
|---|
| 738 | // if (count == __SPECIES_COUNT-__MIN_ID-1) continue; // i cannot improve species that can be differed from all others |
|---|
| 739 | // if (count < lowest_count) lowest_count = count; |
|---|
| 740 | // if (count > highest_count) highest_count = count; |
|---|
| 741 | // } |
|---|
| 742 | // //printf( "PS_calc_next_speciesid_sets_for_candidate() : SOURCE count 1's [%i..%i]\n", lowest_count, highest_count ); |
|---|
| 743 | |
|---|
| 744 | // // second pass -- get IDs where count is equal to lowest_count or highest_count |
|---|
| 745 | // IDSet highest_count_src_ids; |
|---|
| 746 | // IDSet lowest_count_src_ids; |
|---|
| 747 | // for ( SpeciesID id = __MIN_ID; |
|---|
| 748 | // id <= __MAX_ID; |
|---|
| 749 | // ++id) { |
|---|
| 750 | // count = _candidate->map->getCountFor( id ); |
|---|
| 751 | // if (count == lowest_count) lowest_count_src_ids.insert( id ); |
|---|
| 752 | // if (count == highest_count) highest_count_src_ids.insert( id ); |
|---|
| 753 | // } |
|---|
| 754 | // if (_candidate->source_set) { |
|---|
| 755 | // _candidate->source_set->clear(); |
|---|
| 756 | // _candidate->source_set->insert( lowest_count_src_ids.begin(), lowest_count_src_ids.end() ); |
|---|
| 757 | // } else { |
|---|
| 758 | // _candidate->source_set = new IDSet( lowest_count_src_ids ); |
|---|
| 759 | // } |
|---|
| 760 | // _candidate->source_set->insert( highest_count_src_ids.begin(), highest_count_src_ids.end() ); |
|---|
| 761 | // //PS_print_set_ranges( " lowest_count_src_ids", lowest_count_src_ids ); |
|---|
| 762 | // //PS_print_set_ranges( " highest_count_src_ids", highest_count_src_ids ); |
|---|
| 763 | // if (*(_candidate->source_set->begin()) < __MIN_SETS_ID) __MIN_SETS_ID = *(_candidate->source_set->begin()); |
|---|
| 764 | // if (*(_candidate->source_set->rbegin()) > __MAX_SETS_ID) __MAX_SETS_ID = *(_candidate->source_set->rbegin()); |
|---|
| 765 | |
|---|
| 766 | // // |
|---|
| 767 | // // 3. __TARGET_ID_SET |
|---|
| 768 | // // scan bitmap for species IDs that need to be distinguished from MOST species in lowest_count_src_ids |
|---|
| 769 | // // |
|---|
| 770 | // ID2IDMap count_falses_per_id; |
|---|
| 771 | |
|---|
| 772 | // // first -- get the IDs that need differentiation from __SOURCE_ID_SET |
|---|
| 773 | // for ( IDSetCIter source_id = lowest_count_src_ids.begin(); |
|---|
| 774 | // source_id != lowest_count_src_ids.end(); |
|---|
| 775 | // ++source_id ) { |
|---|
| 776 | // // get 'next_target_set' |
|---|
| 777 | // _candidate->map->getFalseIndicesFor( *source_id, __TARGET_ID_SET ); |
|---|
| 778 | // // count falses per SpeciesID |
|---|
| 779 | // for ( PS_BitSet::IndexSet::iterator target_id = __TARGET_ID_SET.begin(); |
|---|
| 780 | // target_id != __TARGET_ID_SET.end(); |
|---|
| 781 | // ++target_id) { |
|---|
| 782 | // if ((*target_id < __MIN_ID) || (*target_id > __MAX_ID)) continue; // skip ID's that are outside DB-IDs-range (like zero if __MIN_ID is one) |
|---|
| 783 | // if (*target_id != *source_id) ++count_falses_per_id[ *target_id ]; |
|---|
| 784 | // } |
|---|
| 785 | // } |
|---|
| 786 | // //printf( "\n" ); |
|---|
| 787 | // //PS_print_map_ranges( "PS_calc_next_speciesid_sets_for_candidate() : count_falses_per_id", count_falses_per_id, false ); |
|---|
| 788 | |
|---|
| 789 | // // second -- get highest count of falses and calc treshold |
|---|
| 790 | // lowest_count = __SPECIES_COUNT; |
|---|
| 791 | // highest_count = 0; |
|---|
| 792 | // for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin(); |
|---|
| 793 | // count_per_id != count_falses_per_id.end(); |
|---|
| 794 | // ++count_per_id ) { |
|---|
| 795 | // if (count_per_id->second > highest_count) highest_count = count_per_id->second; |
|---|
| 796 | // if (count_per_id->second < lowest_count) lowest_count = count_per_id->second; |
|---|
| 797 | // } |
|---|
| 798 | // //printf( "PS_calc_next_speciesid_sets_for_candidate() : TARGET count 0's [%i..%i]\n", lowest_count, highest_count ); |
|---|
| 799 | |
|---|
| 800 | // // third -- put all IDs in __TARGET_ID_SET that are needed most by species from lowest_count_src_ids |
|---|
| 801 | // if (_candidate->target_set) { |
|---|
| 802 | // _candidate->target_set->clear(); |
|---|
| 803 | // } else { |
|---|
| 804 | // _candidate->target_set = new IDSet(); |
|---|
| 805 | // } |
|---|
| 806 | // __TARGET_ID_SET.clear(); |
|---|
| 807 | // for ( ID2IDMapCIter count_per_id = count_falses_per_id.begin(); |
|---|
| 808 | // count_per_id != count_falses_per_id.end(); |
|---|
| 809 | // ++count_per_id ) { |
|---|
| 810 | // if (count_per_id->second == highest_count) __TARGET_ID_SET.insert( count_per_id->first ); |
|---|
| 811 | // } |
|---|
| 812 | // //PS_print_set_ranges( " target_set (by lowest count of trues) ", __TARGET_ID_SET ); |
|---|
| 813 | // _candidate->target_set->insert( __TARGET_ID_SET.begin(), __TARGET_ID_SET.end() ); |
|---|
| 814 | |
|---|
| 815 | // // fourth -- put all IDs in __TARGET_ID_SET that are needed by species from highest_count_src_ids |
|---|
| 816 | // PS_BitSet::IndexSet next_target_ids; |
|---|
| 817 | // __TARGET_ID_SET.clear(); |
|---|
| 818 | // for ( IDSetCIter source_id = highest_count_src_ids.begin(); |
|---|
| 819 | // source_id != highest_count_src_ids.end(); |
|---|
| 820 | // ++source_id ) { |
|---|
| 821 | // // get next target-IDs |
|---|
| 822 | // _candidate->map->getFalseIndicesFor( *source_id, next_target_ids ); |
|---|
| 823 | // // 'append' target-IDs |
|---|
| 824 | // for ( PS_BitSet::IndexSet::iterator target_id = next_target_ids.begin(); |
|---|
| 825 | // target_id != next_target_ids.end(); |
|---|
| 826 | // ++target_id) { |
|---|
| 827 | // if ((*target_id < __MIN_ID) || (*target_id > __MAX_ID)) continue; // skip ID's that are outside DB-IDs-range (like zero if __MIN_ID is one) |
|---|
| 828 | // if (*target_id != *source_id) __TARGET_ID_SET.insert( *target_id ); |
|---|
| 829 | // } |
|---|
| 830 | // } |
|---|
| 831 | // //PS_print_set_ranges( " target_set (by highest count of trues)", __TARGET_ID_SET ); |
|---|
| 832 | // _candidate->target_set->insert( __TARGET_ID_SET.begin(), __TARGET_ID_SET.end() ); |
|---|
| 833 | |
|---|
| 834 | // if (*(__TARGET_ID_SET.begin()) < __MIN_SETS_ID) __MIN_SETS_ID = *(__TARGET_ID_SET.begin()); |
|---|
| 835 | // if (*(__TARGET_ID_SET.rbegin()) > __MAX_SETS_ID) __MAX_SETS_ID = *(__TARGET_ID_SET.rbegin()); |
|---|
| 836 | // //printf( "\n" ); fflush( stdout ); |
|---|
| 837 | // } |
|---|
| 838 | |
|---|
| 839 | |
|---|
| 840 | void PS_get_leaf_candidates( PS_CandidatePtr _candidate_parent, |
|---|
| 841 | PS_CandidateSet &_leaf_candidates, |
|---|
| 842 | const bool _ignore_passes_left = false ) { |
|---|
| 843 | |
|---|
| 844 | for ( PS_CandidateByGainMapIter next_candidate = _candidate_parent->children.begin(); |
|---|
| 845 | next_candidate != _candidate_parent->children.end(); |
|---|
| 846 | ++next_candidate ) { |
|---|
| 847 | |
|---|
| 848 | if ((next_candidate->second->children.size() == 0) && |
|---|
| 849 | ((next_candidate->second->passes_left > 0) || _ignore_passes_left)){ |
|---|
| 850 | _leaf_candidates.insert( next_candidate->second ); |
|---|
| 851 | } |
|---|
| 852 | PS_get_leaf_candidates( &(*next_candidate->second), _leaf_candidates, _ignore_passes_left ); |
|---|
| 853 | } |
|---|
| 854 | } |
|---|
| 855 | |
|---|
| 856 | |
|---|
| 857 | // bool PS_test_candidate_sets_on_path( PS_CandidatePtr _candidate ) { |
|---|
| 858 | // int round = PS_Candidate::MAX_PASSES - _candidate->passes_left; |
|---|
| 859 | // __SOURCE_MIN_MATCH_COUNT = (_candidate->source_set->size() * (__MIN_PERCENTAGE_SET_MATCH - (round * 5))) / 100.0; |
|---|
| 860 | // __SOURCE_MAX_MATCH_COUNT = (_candidate->source_set->size() * (__MAX_PERCENTAGE_SET_MATCH + (round * 5))) / 100.0; |
|---|
| 861 | // __TARGET_MIN_MATCH_COUNT = (_candidate->target_set->size() * (__MIN_PERCENTAGE_SET_MATCH - (round * 5))) / 100.0; |
|---|
| 862 | // __TARGET_MAX_MATCH_COUNT = (_candidate->target_set->size() * (__MAX_PERCENTAGE_SET_MATCH + (round * 5))) / 100.0; |
|---|
| 863 | // long count_matched_source = 0; |
|---|
| 864 | // long count_matched_target = 0; |
|---|
| 865 | // SpeciesID path_id; |
|---|
| 866 | // IDSetCIter path_end = __PATH.end(); |
|---|
| 867 | // IDSetCIter path = __PATH.begin(); |
|---|
| 868 | // IDSetCIter source_end = _candidate->source_set->end(); |
|---|
| 869 | // IDSetCIter source = _candidate->source_set->begin(); |
|---|
| 870 | // IDSetCIter target_end = _candidate->target_set->end(); |
|---|
| 871 | // IDSetCIter target = _candidate->target_set->begin(); |
|---|
| 872 | |
|---|
| 873 | // while (path != path_end) { |
|---|
| 874 | // path_id = *path; |
|---|
| 875 | // while ((*target < path_id) && (target != target_end)) { |
|---|
| 876 | // ++target; |
|---|
| 877 | // } |
|---|
| 878 | // if ((target != target_end) && (*target == path_id)) |
|---|
| 879 | // ++count_matched_target; |
|---|
| 880 | // while ((*source < path_id) && (source != source_end)) { |
|---|
| 881 | // ++source; |
|---|
| 882 | // } |
|---|
| 883 | // if ((source != source_end) && (*source == path_id)) |
|---|
| 884 | // ++count_matched_source; |
|---|
| 885 | // ++path; |
|---|
| 886 | // } |
|---|
| 887 | |
|---|
| 888 | // if ((count_matched_source > __SOURCE_MIN_MATCH_COUNT) && |
|---|
| 889 | // (count_matched_source < __SOURCE_MAX_MATCH_COUNT) && |
|---|
| 890 | // (count_matched_target > __TARGET_MIN_MATCH_COUNT) && |
|---|
| 891 | // (count_matched_target < __TARGET_MAX_MATCH_COUNT)) { |
|---|
| 892 | // return true; |
|---|
| 893 | // } |
|---|
| 894 | // return false; |
|---|
| 895 | // } |
|---|
| 896 | |
|---|
| 897 | void PS_get_next_candidates_descend( PS_NodePtr _ps_node, PS_CandidateSet &_leaf_candidates ) { |
|---|
| 898 | // scan PS_node-tree for next candidates for each of _leaf_candidates |
|---|
| 899 | |
|---|
| 900 | SpeciesID id = _ps_node->getNum(); |
|---|
| 901 | bool has_probes = _ps_node->hasProbes(); |
|---|
| 902 | |
|---|
| 903 | // |
|---|
| 904 | // append ID to path |
|---|
| 905 | // |
|---|
| 906 | __PATH.insert( id ); |
|---|
| 907 | |
|---|
| 908 | // |
|---|
| 909 | // dont look at path until ID is greater than lowest ID in the sets of IDs |
|---|
| 910 | // also dont use a node if its already used as candidate |
|---|
| 911 | // |
|---|
| 912 | if ((id >= __MIN_SETS_ID) && has_probes) { |
|---|
| 913 | unsigned long total_gain_of_node = 0; |
|---|
| 914 | unsigned long total_tests_per_node = 0; |
|---|
| 915 | ++__PROBES_COUNTER; |
|---|
| 916 | // iterate over _leaf_candidates |
|---|
| 917 | for ( PS_CandidateSetIter candidate_iter = _leaf_candidates.begin(); |
|---|
| 918 | candidate_iter != _leaf_candidates.end(); |
|---|
| 919 | ++candidate_iter ) { |
|---|
| 920 | PS_CandidateSPtr candidate = *candidate_iter; |
|---|
| 921 | |
|---|
| 922 | // next leaf-candidate if the probe was already used for current candidate |
|---|
| 923 | if (candidate->alreadyUsedNode( _ps_node )) continue; |
|---|
| 924 | |
|---|
| 925 | ++__CANDIDATES_COUNTER; // possible candidates |
|---|
| 926 | if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", |
|---|
| 927 | __PROBES_COUNTER, |
|---|
| 928 | __PROBES_REMOVED, |
|---|
| 929 | __CANDIDATES_COUNTER, |
|---|
| 930 | __CANDIDATES_TODO, |
|---|
| 931 | __CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 932 | |
|---|
| 933 | // next leaf-candidate if __PATH doesnt fulfill matching criteria |
|---|
| 934 | unsigned long matches = candidate->matchPathOnOneFalseIDs( __PATH ); |
|---|
| 935 | if (matches < candidate->one_false_IDs_matches) continue; |
|---|
| 936 | |
|---|
| 937 | ++__CANDIDATES_TODO; // good candidates |
|---|
| 938 | if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", |
|---|
| 939 | __PROBES_COUNTER, |
|---|
| 940 | __PROBES_REMOVED, |
|---|
| 941 | __CANDIDATES_COUNTER, |
|---|
| 942 | __CANDIDATES_TODO, |
|---|
| 943 | __CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 944 | |
|---|
| 945 | // test node on candidate's bitmap |
|---|
| 946 | float filling_level; |
|---|
| 947 | ++total_tests_per_node; |
|---|
| 948 | unsigned long gain = PS_test_candidate_on_bitmap( &filling_level, candidate->map ); |
|---|
| 949 | total_gain_of_node += gain; |
|---|
| 950 | if (candidate->updateBestChild( gain, matches, filling_level, _ps_node, __PATH )) { |
|---|
| 951 | ++__CANDIDATES_FINISHED; // used candiates |
|---|
| 952 | if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", |
|---|
| 953 | __PROBES_COUNTER, |
|---|
| 954 | __PROBES_REMOVED, |
|---|
| 955 | __CANDIDATES_COUNTER, |
|---|
| 956 | __CANDIDATES_TODO, |
|---|
| 957 | __CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 958 | } |
|---|
| 959 | } |
|---|
| 960 | if ((total_tests_per_node == _leaf_candidates.size()) && |
|---|
| 961 | (total_gain_of_node == 0)) { |
|---|
| 962 | ++__PROBES_REMOVED; |
|---|
| 963 | if (__VERBOSE) printf( "%8lup %8lur %8luc %8lug %8luu\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", |
|---|
| 964 | __PROBES_COUNTER, |
|---|
| 965 | __PROBES_REMOVED, |
|---|
| 966 | __CANDIDATES_COUNTER, |
|---|
| 967 | __CANDIDATES_TODO, |
|---|
| 968 | __CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 969 | _ps_node->removeProbes(); |
|---|
| 970 | } |
|---|
| 971 | } |
|---|
| 972 | |
|---|
| 973 | // |
|---|
| 974 | // step down the children if either ID is lower than highest |
|---|
| 975 | // ID in the set of ID-pairs or the node has no probes |
|---|
| 976 | // |
|---|
| 977 | if ((id < __MAX_SETS_ID) || (! has_probes)) { |
|---|
| 978 | for ( PS_NodeMapConstIterator i = _ps_node->getChildrenBegin(); |
|---|
| 979 | i != _ps_node->getChildrenEnd(); |
|---|
| 980 | ++i) { |
|---|
| 981 | PS_get_next_candidates_descend( i->second, _leaf_candidates ); |
|---|
| 982 | } |
|---|
| 983 | } |
|---|
| 984 | |
|---|
| 985 | // |
|---|
| 986 | // remove ID from path |
|---|
| 987 | // |
|---|
| 988 | __PATH.erase( id ); |
|---|
| 989 | } |
|---|
| 990 | |
|---|
| 991 | |
|---|
| 992 | void PS_get_next_candidates( const PS_NodePtr _root_node, |
|---|
| 993 | PS_CandidateSet &_leaf_candidates ) { |
|---|
| 994 | struct tms before; |
|---|
| 995 | times( &before ); |
|---|
| 996 | // first calc source/target/false_IDs sets |
|---|
| 997 | printf( "PS_get_next_candidates() : initializing %zu candidates..\n", _leaf_candidates.size() ); fflush( stdout ); |
|---|
| 998 | __PROBES_COUNTER = 0; |
|---|
| 999 | __PROBES_REMOVED = 0; |
|---|
| 1000 | __CANDIDATES_COUNTER = 0; |
|---|
| 1001 | __CANDIDATES_TODO = 0; |
|---|
| 1002 | __CANDIDATES_FINISHED = 0; |
|---|
| 1003 | __MIN_SETS_ID = __MAX_ID; |
|---|
| 1004 | __MAX_SETS_ID = __MIN_ID; |
|---|
| 1005 | for ( PS_CandidateSetIter iter = _leaf_candidates.begin(); |
|---|
| 1006 | iter != _leaf_candidates.end(); |
|---|
| 1007 | ++iter ) { |
|---|
| 1008 | PS_CandidateSPtr candidate = *iter; |
|---|
| 1009 | if (!candidate->map) { // if candidate has no map yet |
|---|
| 1010 | candidate->getParentMap(); // try to get its parent's map |
|---|
| 1011 | if (candidate->map) { // if parent had a map (if not a new map is created in PS_calc_next_speciesid_sets_for_candidate |
|---|
| 1012 | PS_apply_path_to_bitmap( candidate->path, true, candidate->map ); // apply candidate's path |
|---|
| 1013 | } |
|---|
| 1014 | } |
|---|
| 1015 | PS_make_map_for_candidate( &(*candidate) ); |
|---|
| 1016 | candidate->initFalseIDs( __MIN_ID, __MAX_ID, __MIN_SETS_ID, __MAX_SETS_ID ); |
|---|
| 1017 | //candidate->print(); |
|---|
| 1018 | } |
|---|
| 1019 | // scan tree |
|---|
| 1020 | printf( "PS_get_next_candidates() : " ); fflush( stdout ); |
|---|
| 1021 | for ( PS_NodeMapConstIterator node_iter = _root_node->getChildrenBegin(); |
|---|
| 1022 | (node_iter != _root_node->getChildrenEnd()) && (node_iter->first < __MAX_SETS_ID); |
|---|
| 1023 | ++node_iter ) { |
|---|
| 1024 | __PATH.clear(); |
|---|
| 1025 | PS_get_next_candidates_descend( node_iter->second, _leaf_candidates ); |
|---|
| 1026 | } |
|---|
| 1027 | printf( "looked at probes (%lu) -> removed probes (%lu) possible candidates (%lu) -> good candidates (%lu) -> used candidates (%lu)\nPS_get_next_candidates() : ", |
|---|
| 1028 | __PROBES_COUNTER, |
|---|
| 1029 | __PROBES_REMOVED, |
|---|
| 1030 | __CANDIDATES_COUNTER, |
|---|
| 1031 | __CANDIDATES_TODO, |
|---|
| 1032 | __CANDIDATES_FINISHED ); fflush( stdout ); |
|---|
| 1033 | PS_print_time_diff( &before ); |
|---|
| 1034 | |
|---|
| 1035 | // 'descend' candidates |
|---|
| 1036 | for ( PS_CandidateSetIter iter = _leaf_candidates.begin(); |
|---|
| 1037 | iter != _leaf_candidates.end(); |
|---|
| 1038 | ++iter ) { |
|---|
| 1039 | PS_CandidateSPtr candidate = *iter; |
|---|
| 1040 | candidate->print(); |
|---|
| 1041 | candidate->decreasePasses(); |
|---|
| 1042 | } |
|---|
| 1043 | |
|---|
| 1044 | _leaf_candidates.clear(); |
|---|
| 1045 | PS_get_leaf_candidates( __CANDIDATES_ROOT, _leaf_candidates ); |
|---|
| 1046 | } |
|---|
| 1047 | |
|---|
| 1048 | // ==================================================== |
|---|
| 1049 | // ==================================================== |
|---|
| 1050 | int main( int argc, |
|---|
| 1051 | char *argv[] ) { |
|---|
| 1052 | // |
|---|
| 1053 | // check arguments |
|---|
| 1054 | // |
|---|
| 1055 | if (argc < 5) { |
|---|
| 1056 | printf( "Missing argument\n Usage %s <database name> <result filename> <[-]candidates filename> <final candidates filename> [--verbose] [result filename prefix for output files]\n", argv[0] ); |
|---|
| 1057 | exit( 1 ); |
|---|
| 1058 | } |
|---|
| 1059 | |
|---|
| 1060 | const char *input_DB_name = argv[1]; |
|---|
| 1061 | const char *result_in_filename = argv[2]; |
|---|
| 1062 | const char *candidates_filename = argv[3]; |
|---|
| 1063 | const char *final_candidates_filename = argv[4]; |
|---|
| 1064 | const char *result_out_prefix = (argc > 5) ? argv[5] : 0; |
|---|
| 1065 | if (argc > 5) { |
|---|
| 1066 | if (strcmp(result_out_prefix,"--verbose") == 0) { |
|---|
| 1067 | __VERBOSE = true; |
|---|
| 1068 | result_out_prefix = (argc > 6) ? argv[6] : 0; |
|---|
| 1069 | } |
|---|
| 1070 | } |
|---|
| 1071 | // |
|---|
| 1072 | // open probe-set-database |
|---|
| 1073 | // |
|---|
| 1074 | printf( "Opening probe-set-database '%s'..\n", input_DB_name ); |
|---|
| 1075 | PS_Database *db = new PS_Database( input_DB_name, PS_Database::READONLY ); |
|---|
| 1076 | db->load(); |
|---|
| 1077 | __MAX_ID = db->getMaxID(); |
|---|
| 1078 | __MIN_ID = db->getMinID(); |
|---|
| 1079 | __SPECIES_COUNT = db->getSpeciesCount(); |
|---|
| 1080 | printf( "min ID (%i) max ID (%i) count of species (%i)\n", __MIN_ID, __MAX_ID, __SPECIES_COUNT ); |
|---|
| 1081 | printf( "..loaded database (enter to continue)\n" ); fflush( stdout ); |
|---|
| 1082 | |
|---|
| 1083 | // |
|---|
| 1084 | // open result file for preset bitmap |
|---|
| 1085 | // |
|---|
| 1086 | printf( "Opening result file %s..\n", result_in_filename ); |
|---|
| 1087 | PS_FileBuffer *result_file = new PS_FileBuffer( result_in_filename, PS_FileBuffer::READONLY ); |
|---|
| 1088 | long size; |
|---|
| 1089 | ID2IDSet noMatches; |
|---|
| 1090 | SpeciesID id1,id2; |
|---|
| 1091 | printf( "reading no matches : " ); |
|---|
| 1092 | result_file->get_long( size ); |
|---|
| 1093 | printf( "(%li)\n", size ); fflush( stdout ); |
|---|
| 1094 | for ( long i=0; |
|---|
| 1095 | i < size; |
|---|
| 1096 | ++i ) { |
|---|
| 1097 | result_file->get_int( id1 ); |
|---|
| 1098 | result_file->get_int( id2 ); |
|---|
| 1099 | printf( " %i,%i", id1, id2 ); |
|---|
| 1100 | if (id1 < id2) { |
|---|
| 1101 | noMatches.insert( ID2IDPair(id1,id2) ); |
|---|
| 1102 | } else { |
|---|
| 1103 | noMatches.insert( ID2IDPair(id2,id1) ); |
|---|
| 1104 | } |
|---|
| 1105 | } |
|---|
| 1106 | printf( "\nreading one matches : " ); |
|---|
| 1107 | result_file->get_long( size ); |
|---|
| 1108 | printf( "(%li)\n", size ); fflush( stdout ); |
|---|
| 1109 | long path_length; |
|---|
| 1110 | SpeciesID path_id; |
|---|
| 1111 | IDSet path; |
|---|
| 1112 | IDID2IDSetMap oneMatchesMap; |
|---|
| 1113 | for ( long i=0; |
|---|
| 1114 | i < size; |
|---|
| 1115 | ++i) { |
|---|
| 1116 | path.clear(); |
|---|
| 1117 | result_file->get_int( id1 ); |
|---|
| 1118 | result_file->get_int( id2 ); |
|---|
| 1119 | result_file->get_long( path_length ); |
|---|
| 1120 | printf( "%i,%i path(%li)", id1, id2, path_length ); |
|---|
| 1121 | while (path_length-- > 0) { |
|---|
| 1122 | result_file->get_int( path_id ); |
|---|
| 1123 | printf( " %i", path_id ); |
|---|
| 1124 | path.insert( path_id ); |
|---|
| 1125 | } |
|---|
| 1126 | printf( "\n" ); |
|---|
| 1127 | oneMatchesMap[ ID2IDPair(id1,id2) ] = path; |
|---|
| 1128 | } |
|---|
| 1129 | printf( "loading preset bitmap..\n" ); fflush( stdout ); |
|---|
| 1130 | __BITS_IN_MAP = ((__MAX_ID+1)*__MAX_ID)/2 + __MAX_ID+1; |
|---|
| 1131 | __MAP = new PS_BitMap_Counted( result_file ); |
|---|
| 1132 | __PRESET_MAP = new PS_BitMap_Counted( false, __MAX_ID+1 ); |
|---|
| 1133 | for (id1 = 0; id1 < __MIN_ID; ++id1 ) { |
|---|
| 1134 | for (id2 = 0; id2 <= __MAX_ID; ++id2 ) { |
|---|
| 1135 | if (id1 > id2) { |
|---|
| 1136 | __MAP->setTrue( id1,id2 ); |
|---|
| 1137 | } else { |
|---|
| 1138 | __MAP->setTrue( id2,id1 ); |
|---|
| 1139 | } |
|---|
| 1140 | } |
|---|
| 1141 | } |
|---|
| 1142 | for ( SpeciesID id = 0; id <= __MAX_ID; ++id ) { |
|---|
| 1143 | __MAP->setTrue( id,id ); |
|---|
| 1144 | } |
|---|
| 1145 | __MAP->recalcCounters(); |
|---|
| 1146 | __PRESET_MAP->copy( __MAP ); |
|---|
| 1147 | printf( "..loaded result file (enter to continue)\n" ); |
|---|
| 1148 | printf( "cleaning up... result file\n" ); fflush( stdout ); |
|---|
| 1149 | delete result_file; |
|---|
| 1150 | |
|---|
| 1151 | // |
|---|
| 1152 | // create or read candidates |
|---|
| 1153 | // |
|---|
| 1154 | __CANDIDATES_ROOT = new PS_Candidate( 0.0 ); |
|---|
| 1155 | PS_FileBuffer *candidates_file; |
|---|
| 1156 | struct tms before; |
|---|
| 1157 | if (candidates_filename[0] != '-') { |
|---|
| 1158 | printf( "searching candidates..\n" ); |
|---|
| 1159 | // |
|---|
| 1160 | // recursively build a tree of candidates |
|---|
| 1161 | // |
|---|
| 1162 | __MAX_DEPTH = 0; |
|---|
| 1163 | for ( long species_count = __SPECIES_COUNT; species_count > 0; species_count >>= 1 ) { |
|---|
| 1164 | ++__MAX_DEPTH; |
|---|
| 1165 | } |
|---|
| 1166 | __MAX_DEPTH = (__MAX_DEPTH * 3) >> 1; |
|---|
| 1167 | __PATH_IN_CANDIDATES = (char *)calloc( __MAX_DEPTH+1, sizeof(char) ); |
|---|
| 1168 | __CANDIDATES_TODO = 0; |
|---|
| 1169 | __CANDIDATES_FINISHED = 0; |
|---|
| 1170 | times( &before ); |
|---|
| 1171 | PS_descend( __CANDIDATES_ROOT, db->getConstRootNode(), 0, 0.0 ); |
|---|
| 1172 | printf( "PS_descend : total " ); |
|---|
| 1173 | PS_print_time_diff( &before ); |
|---|
| 1174 | |
|---|
| 1175 | // |
|---|
| 1176 | // save candidates |
|---|
| 1177 | // |
|---|
| 1178 | printf( "saving candidates to %s..\n", candidates_filename ); |
|---|
| 1179 | candidates_file = new PS_FileBuffer( candidates_filename, PS_FileBuffer::WRITEONLY ); |
|---|
| 1180 | __CANDIDATES_ROOT->false_IDs = __BITS_IN_MAP; |
|---|
| 1181 | __CANDIDATES_ROOT->save( candidates_file, __BITS_IN_MAP ); |
|---|
| 1182 | } else { |
|---|
| 1183 | printf( "loading candidates..\n" ); |
|---|
| 1184 | candidates_file = new PS_FileBuffer( candidates_filename+1, PS_FileBuffer::READONLY ); |
|---|
| 1185 | __CANDIDATES_ROOT->load( candidates_file, __BITS_IN_MAP, db->getConstRootNode() ); |
|---|
| 1186 | printf( "..loaded candidates file (enter to continue)\n" ); |
|---|
| 1187 | } |
|---|
| 1188 | printf( "cleaning up... candidates file\n" ); fflush( stdout ); |
|---|
| 1189 | delete candidates_file; |
|---|
| 1190 | |
|---|
| 1191 | // |
|---|
| 1192 | // scan candidates-tree for leaf candidates |
|---|
| 1193 | // |
|---|
| 1194 | printf( "CANDIDATES :\n" ); |
|---|
| 1195 | __CANDIDATES_ROOT->print(); |
|---|
| 1196 | printf( "\nsearching leaf candidates.. " ); |
|---|
| 1197 | PS_CandidateSet leaf_candidates; |
|---|
| 1198 | PS_get_leaf_candidates( __CANDIDATES_ROOT, leaf_candidates ); |
|---|
| 1199 | printf( "%zu\n", leaf_candidates.size() ); |
|---|
| 1200 | |
|---|
| 1201 | // |
|---|
| 1202 | // scan tree for next candidates (below the ones in leaf_candidates) |
|---|
| 1203 | // |
|---|
| 1204 | times( &before ); |
|---|
| 1205 | long round = 0; |
|---|
| 1206 | while ((leaf_candidates.size() > 0) && |
|---|
| 1207 | (round < 200)) { |
|---|
| 1208 | printf( "\nsearching next candidates [round #%li]..\n", ++round ); |
|---|
| 1209 | PS_get_next_candidates( db->getConstRootNode(), leaf_candidates ); |
|---|
| 1210 | |
|---|
| 1211 | // printf( "\nCANDIDATES :\n" ); |
|---|
| 1212 | // __CANDIDATES_ROOT->print(); |
|---|
| 1213 | printf( "rounds %li total ", round ); |
|---|
| 1214 | PS_print_time_diff( &before ); |
|---|
| 1215 | // getchar(); |
|---|
| 1216 | } |
|---|
| 1217 | |
|---|
| 1218 | printf( "\nFINAL CANDIDATES:\n" ); |
|---|
| 1219 | __CANDIDATES_ROOT->print( 0,true ); |
|---|
| 1220 | printf( "\nfinal leaf candidates.. (%zu)\n", leaf_candidates.size() ); |
|---|
| 1221 | PS_get_leaf_candidates( __CANDIDATES_ROOT, leaf_candidates, true ); |
|---|
| 1222 | for ( PS_CandidateSetIter c = leaf_candidates.begin(); |
|---|
| 1223 | c != leaf_candidates.end(); |
|---|
| 1224 | ++c ) { |
|---|
| 1225 | (*(*c)).print(); |
|---|
| 1226 | } |
|---|
| 1227 | |
|---|
| 1228 | // |
|---|
| 1229 | // save final candidates |
|---|
| 1230 | // |
|---|
| 1231 | printf( "saving final candidates to %s..\n", final_candidates_filename ); |
|---|
| 1232 | candidates_file = new PS_FileBuffer( final_candidates_filename, PS_FileBuffer::WRITEONLY ); |
|---|
| 1233 | __CANDIDATES_ROOT->save( candidates_file, __BITS_IN_MAP ); |
|---|
| 1234 | printf( "cleaning up... candidates file\n" ); fflush( stdout ); |
|---|
| 1235 | delete candidates_file; |
|---|
| 1236 | |
|---|
| 1237 | // |
|---|
| 1238 | // starting with the best candidate print the __MAP for each |
|---|
| 1239 | // depth walking up to root of candidates-tree |
|---|
| 1240 | // |
|---|
| 1241 | if (result_out_prefix) { |
|---|
| 1242 | // put __MAP in the state 'after applying best_candidate and its parents' |
|---|
| 1243 | PS_CandidateSPtr best_candidate_smart = *leaf_candidates.begin(); |
|---|
| 1244 | PS_CandidatePtr best_candidate = &(*best_candidate_smart); |
|---|
| 1245 | PS_ascend( best_candidate ); |
|---|
| 1246 | PS_apply_path_to_bitmap( best_candidate->path ); |
|---|
| 1247 | while (best_candidate) { |
|---|
| 1248 | PS_GNUPlot( result_out_prefix, __MAX_DEPTH--, best_candidate->path, noMatches ); |
|---|
| 1249 | best_candidate = PS_ascend( best_candidate ); |
|---|
| 1250 | } |
|---|
| 1251 | } |
|---|
| 1252 | |
|---|
| 1253 | // |
|---|
| 1254 | // clean up |
|---|
| 1255 | // |
|---|
| 1256 | printf( "cleaning up... candidates\n" ); fflush( stdout ); |
|---|
| 1257 | delete __CANDIDATES_ROOT; |
|---|
| 1258 | free( __PATH_IN_CANDIDATES ); |
|---|
| 1259 | printf( "cleaning up... database\n" ); fflush( stdout ); |
|---|
| 1260 | delete db; |
|---|
| 1261 | printf( "cleaning up... bitmaps\n" ); fflush( stdout ); |
|---|
| 1262 | delete __PRESET_MAP; |
|---|
| 1263 | delete __MAP; |
|---|
| 1264 | |
|---|
| 1265 | return 0; |
|---|
| 1266 | } |
|---|