| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : PT_match.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #include "probe.h" |
|---|
| 12 | #include <PT_server_prototypes.h> |
|---|
| 13 | #include <struct_man.h> |
|---|
| 14 | |
|---|
| 15 | #include "pt_split.h" |
|---|
| 16 | #include "probe_tree.h" |
|---|
| 17 | |
|---|
| 18 | #include <arb_strbuf.h> |
|---|
| 19 | #include <arb_defs.h> |
|---|
| 20 | #include <arb_sort.h> |
|---|
| 21 | #include <cctype> |
|---|
| 22 | #include <map> |
|---|
| 23 | |
|---|
| 24 | // overloaded functions to avoid problems with type-punning: |
|---|
| 25 | inline void aisc_link(dll_public *dll, PT_probematch *match) { aisc_link(reinterpret_cast<dllpublic_ext*>(dll), reinterpret_cast<dllheader_ext*>(match)); } |
|---|
| 26 | |
|---|
| 27 | class MatchRequest; |
|---|
| 28 | class Mismatches { |
|---|
| 29 | MatchRequest& req; |
|---|
| 30 | |
|---|
| 31 | int plain; // plain mismatch between 2 standard bases |
|---|
| 32 | int ambig; // mismatch with N or '.' involved |
|---|
| 33 | |
|---|
| 34 | double weighted; // weighted mismatches |
|---|
| 35 | |
|---|
| 36 | public: |
|---|
| 37 | |
|---|
| 38 | Mismatches(MatchRequest& req_) : req(req_), plain(0), ambig(0), weighted(0.0) {} |
|---|
| 39 | |
|---|
| 40 | inline void count_weighted(char probe, char seq, int height); |
|---|
| 41 | void count_versus(const ReadableDataLoc& loc, const char *probe, int height); |
|---|
| 42 | |
|---|
| 43 | inline bool accepted() const; |
|---|
| 44 | |
|---|
| 45 | int get_plain() const { return plain; } |
|---|
| 46 | int get_ambig() const { return ambig; } |
|---|
| 47 | |
|---|
| 48 | double get_weighted() const { return weighted; } |
|---|
| 49 | |
|---|
| 50 | inline PT_local& get_PT_local() const; |
|---|
| 51 | }; |
|---|
| 52 | |
|---|
| 53 | class MatchRequest : virtual Noncopyable { |
|---|
| 54 | PT_local& pt_local; |
|---|
| 55 | |
|---|
| 56 | int max_ambig; // max. possible ambiguous hits (i.e. max value in Mismatches::ambig) |
|---|
| 57 | int *accepted_N_mismatches; |
|---|
| 58 | |
|---|
| 59 | MismatchWeights weights; |
|---|
| 60 | |
|---|
| 61 | void init_accepted_N_mismatches(int ignored_Nmismatches, int when_less_than_Nmismatches); |
|---|
| 62 | |
|---|
| 63 | public: |
|---|
| 64 | explicit MatchRequest(PT_local& locs, int probe_length) |
|---|
| 65 | : pt_local(locs), |
|---|
| 66 | max_ambig(probe_length), |
|---|
| 67 | accepted_N_mismatches(new int[max_ambig+1]), |
|---|
| 68 | weights(locs.bond) |
|---|
| 69 | { |
|---|
| 70 | init_accepted_N_mismatches(pt_local.pm_nmatches_ignored, pt_local.pm_nmatches_limit); |
|---|
| 71 | } |
|---|
| 72 | ~MatchRequest() { |
|---|
| 73 | delete [] accepted_N_mismatches; |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | PT_local& get_PT_local() const { return pt_local; } |
|---|
| 77 | |
|---|
| 78 | bool hit_limit_reached() const { |
|---|
| 79 | bool reached = pt_local.pm_max_hits>0 && pt_local.ppm.cnt >= pt_local.pm_max_hits; |
|---|
| 80 | if (reached) pt_local.matches_truncated = 1; |
|---|
| 81 | return reached; |
|---|
| 82 | } |
|---|
| 83 | |
|---|
| 84 | int accept_N_mismatches(int ambig) const { |
|---|
| 85 | pt_assert(ambig<=max_ambig); |
|---|
| 86 | return accepted_N_mismatches[ambig]; |
|---|
| 87 | } |
|---|
| 88 | |
|---|
| 89 | bool add_hit(const DataLoc& at, const Mismatches& mismatch); |
|---|
| 90 | bool add_hits_for_children(POS_TREE2 *pt, const Mismatches& mismatch); |
|---|
| 91 | bool collect_hits_for(const char *probe, POS_TREE2 *pt, Mismatches& mismatch, int height); |
|---|
| 92 | |
|---|
| 93 | int allowed_mismatches() const { return pt_local.pm_max; } |
|---|
| 94 | double get_mismatch_weight(char probe, char seq) const { return weights.get(probe, seq); } |
|---|
| 95 | }; |
|---|
| 96 | |
|---|
| 97 | |
|---|
| 98 | |
|---|
| 99 | void MatchRequest::init_accepted_N_mismatches(int ignored_Nmismatches, int when_less_than_Nmismatches) { |
|---|
| 100 | // calculate table for PT_N mismatches |
|---|
| 101 | // |
|---|
| 102 | // 'ignored_Nmismatches' specifies, how many N-mismatches will be accepted as |
|---|
| 103 | // matches, when overall number of N-mismatches is below 'when_less_than_Nmismatches'. |
|---|
| 104 | // |
|---|
| 105 | // above that limit, every N-mismatch counts as mismatch |
|---|
| 106 | |
|---|
| 107 | when_less_than_Nmismatches = std::min(when_less_than_Nmismatches, max_ambig+1); |
|---|
| 108 | ignored_Nmismatches = std::min(ignored_Nmismatches, when_less_than_Nmismatches-1); |
|---|
| 109 | |
|---|
| 110 | accepted_N_mismatches[0] = 0; |
|---|
| 111 | int mm; |
|---|
| 112 | for (mm = 1; mm<when_less_than_Nmismatches; ++mm) { // LOOP_VECTORIZED =1[<7] =2 (1 loop prior; 2 loops reported since 7.x) |
|---|
| 113 | accepted_N_mismatches[mm] = mm>ignored_Nmismatches ? mm-ignored_Nmismatches : 0; |
|---|
| 114 | } |
|---|
| 115 | pt_assert(mm <= (max_ambig+1)); |
|---|
| 116 | for (; mm <= max_ambig; ++mm) { |
|---|
| 117 | accepted_N_mismatches[mm] = mm; |
|---|
| 118 | } |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | inline void Mismatches::count_weighted(char probe, char seq, int height) { |
|---|
| 122 | bool is_ambig = is_ambig_base(probe) || is_ambig_base(seq); |
|---|
| 123 | if (is_ambig || probe != seq) { |
|---|
| 124 | if (is_ambig) ambig++; else plain++; |
|---|
| 125 | weighted += req.get_mismatch_weight(probe, seq) * psg.pos_to_weight[height]; |
|---|
| 126 | } |
|---|
| 127 | } |
|---|
| 128 | |
|---|
| 129 | inline bool Mismatches::accepted() const { |
|---|
| 130 | if (get_PT_local().sort_by == PT_MATCH_TYPE_INTEGER) { |
|---|
| 131 | return (req.accept_N_mismatches(ambig)+plain) <= req.allowed_mismatches(); |
|---|
| 132 | } |
|---|
| 133 | return weighted <= (req.allowed_mismatches()+0.5); |
|---|
| 134 | } |
|---|
| 135 | |
|---|
| 136 | inline PT_local& Mismatches::get_PT_local() const { |
|---|
| 137 | return req.get_PT_local(); |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | bool MatchRequest::add_hit(const DataLoc& at, const Mismatches& mismatch) { |
|---|
| 141 | PT_probematch *ml = create_PT_probematch(); |
|---|
| 142 | |
|---|
| 143 | ml->name = at.get_name(); |
|---|
| 144 | ml->b_pos = at.get_abs_pos(); |
|---|
| 145 | ml->g_pos = -1; |
|---|
| 146 | ml->rpos = at.get_rel_pos(); |
|---|
| 147 | |
|---|
| 148 | ml->mismatches = mismatch.get_plain() + accept_N_mismatches(mismatch.get_ambig()); |
|---|
| 149 | ml->wmismatches = mismatch.get_weighted(); |
|---|
| 150 | ml->N_mismatches = mismatch.get_ambig(); |
|---|
| 151 | |
|---|
| 152 | ml->sequence = psg.main_probe; |
|---|
| 153 | ml->reversed = psg.reversed ? 1 : 0; |
|---|
| 154 | |
|---|
| 155 | aisc_link(&get_PT_local().ppm, ml); |
|---|
| 156 | |
|---|
| 157 | return hit_limit_reached(); |
|---|
| 158 | } |
|---|
| 159 | |
|---|
| 160 | bool MatchRequest::add_hits_for_children(POS_TREE2 *pt, const Mismatches& mismatch) { |
|---|
| 161 | //! go down the tree to chains and leafs; copy names, positions and mismatches in locs structure |
|---|
| 162 | |
|---|
| 163 | pt_assert(pt && mismatch.accepted()); // invalid or superfluous call |
|---|
| 164 | pt_assert(!hit_limit_reached()); |
|---|
| 165 | |
|---|
| 166 | bool enough = false; |
|---|
| 167 | switch (pt->get_type()) { |
|---|
| 168 | case PT2_LEAF: |
|---|
| 169 | enough = add_hit(DataLoc(pt), mismatch); |
|---|
| 170 | break; |
|---|
| 171 | |
|---|
| 172 | case PT2_CHAIN: { |
|---|
| 173 | ChainIteratorStage2 entry(pt); |
|---|
| 174 | while (entry && !enough) { |
|---|
| 175 | enough = add_hit(DataLoc(entry.at()), mismatch); |
|---|
| 176 | ++entry; |
|---|
| 177 | } |
|---|
| 178 | break; |
|---|
| 179 | } |
|---|
| 180 | case PT2_NODE: |
|---|
| 181 | for (int base = PT_QU; base < PT_BASES && !enough; base++) { |
|---|
| 182 | POS_TREE2 *son = PT_read_son(pt, (PT_base)base); |
|---|
| 183 | if (son) enough = add_hits_for_children(son, mismatch); |
|---|
| 184 | } |
|---|
| 185 | break; |
|---|
| 186 | } |
|---|
| 187 | return enough; |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | void Mismatches::count_versus(const ReadableDataLoc& loc, const char *probe, int height) { |
|---|
| 191 | int base; |
|---|
| 192 | while ((base = probe[height])) { |
|---|
| 193 | int ref = loc[height]; |
|---|
| 194 | if (ref == PT_QU) break; |
|---|
| 195 | |
|---|
| 196 | count_weighted(base, ref, height); |
|---|
| 197 | height++; |
|---|
| 198 | } |
|---|
| 199 | |
|---|
| 200 | if (base != PT_QU) { // not end of probe |
|---|
| 201 | pt_assert(loc[height] == PT_QU); // at EOS |
|---|
| 202 | do { |
|---|
| 203 | count_weighted(base, PT_QU, height); |
|---|
| 204 | height++; |
|---|
| 205 | } |
|---|
| 206 | while ((base = probe[height])); |
|---|
| 207 | } |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | bool MatchRequest::collect_hits_for(const char *probe, POS_TREE2 *pt, Mismatches& mismatches, const int height) { |
|---|
| 211 | //! search down the tree to find matching species for the given probe |
|---|
| 212 | |
|---|
| 213 | pt_assert(pt && mismatches.accepted()); // invalid or superfluous call |
|---|
| 214 | pt_assert(!hit_limit_reached()); |
|---|
| 215 | |
|---|
| 216 | bool enough = false; |
|---|
| 217 | if (probe[height] == PT_QU) { |
|---|
| 218 | enough = add_hits_for_children(pt, mismatches); |
|---|
| 219 | } |
|---|
| 220 | else { |
|---|
| 221 | switch (pt->get_type()) { |
|---|
| 222 | case PT2_LEAF: { |
|---|
| 223 | ReadableDataLoc loc(pt); |
|---|
| 224 | mismatches.count_versus(loc, probe, height); |
|---|
| 225 | if (mismatches.accepted()) { |
|---|
| 226 | enough = add_hit(loc, mismatches); |
|---|
| 227 | } |
|---|
| 228 | break; |
|---|
| 229 | } |
|---|
| 230 | case PT2_CHAIN: { |
|---|
| 231 | pt_assert(probe); |
|---|
| 232 | |
|---|
| 233 | ChainIteratorStage2 entry(pt); |
|---|
| 234 | while (entry && !enough) { |
|---|
| 235 | Mismatches entry_mismatches(mismatches); |
|---|
| 236 | DataLoc dloc(entry.at()); // @@@ EXPENSIVE_CONVERSION |
|---|
| 237 | entry_mismatches.count_versus(ReadableDataLoc(dloc), probe, height); // @@@ EXPENSIVE_CONVERSION |
|---|
| 238 | if (entry_mismatches.accepted()) { |
|---|
| 239 | enough = add_hit(dloc, entry_mismatches); |
|---|
| 240 | } |
|---|
| 241 | ++entry; |
|---|
| 242 | } |
|---|
| 243 | break; |
|---|
| 244 | } |
|---|
| 245 | case PT2_NODE: |
|---|
| 246 | for (int i=PT_QU; i<PT_BASES && !enough; i++) { |
|---|
| 247 | POS_TREE2 *son = PT_read_son(pt, (PT_base)i); |
|---|
| 248 | if (son) { |
|---|
| 249 | Mismatches son_mismatches(mismatches); |
|---|
| 250 | son_mismatches.count_weighted(probe[height], i, height); |
|---|
| 251 | if (son_mismatches.accepted()) { |
|---|
| 252 | if (i == PT_QU) { |
|---|
| 253 | // @@@ calculation here is constant for a fixed probe (cache results) |
|---|
| 254 | pt_assert(probe[height] != PT_QU); |
|---|
| 255 | |
|---|
| 256 | int son_height = height+1; |
|---|
| 257 | while (1) { |
|---|
| 258 | int base = probe[son_height]; |
|---|
| 259 | if (base == PT_QU) { |
|---|
| 260 | if (son_mismatches.accepted()) { |
|---|
| 261 | enough = add_hits_for_children(son, son_mismatches); |
|---|
| 262 | } |
|---|
| 263 | break; |
|---|
| 264 | } |
|---|
| 265 | |
|---|
| 266 | son_mismatches.count_weighted(base, PT_QU, son_height); |
|---|
| 267 | if (!son_mismatches.accepted()) break; |
|---|
| 268 | |
|---|
| 269 | ++son_height; |
|---|
| 270 | } |
|---|
| 271 | } |
|---|
| 272 | else { |
|---|
| 273 | enough = collect_hits_for(probe, son, son_mismatches, height+1); |
|---|
| 274 | } |
|---|
| 275 | } |
|---|
| 276 | } |
|---|
| 277 | } |
|---|
| 278 | break; |
|---|
| 279 | } |
|---|
| 280 | } |
|---|
| 281 | |
|---|
| 282 | return enough; |
|---|
| 283 | } |
|---|
| 284 | |
|---|
| 285 | static int pt_sort_compare_match(const void *PT_probematch_ptr1, const void *PT_probematch_ptr2, void *) { |
|---|
| 286 | const PT_probematch *mach1 = (const PT_probematch*)PT_probematch_ptr1; |
|---|
| 287 | const PT_probematch *mach2 = (const PT_probematch*)PT_probematch_ptr2; |
|---|
| 288 | |
|---|
| 289 | if (psg.sort_by != PT_MATCH_TYPE_INTEGER) { |
|---|
| 290 | if (mach1->wmismatches > mach2->wmismatches) return 1; |
|---|
| 291 | if (mach1->wmismatches < mach2->wmismatches) return -1; |
|---|
| 292 | } |
|---|
| 293 | |
|---|
| 294 | int cmp = mach1->mismatches - mach2->mismatches; |
|---|
| 295 | if (!cmp) { |
|---|
| 296 | cmp = mach1->N_mismatches - mach2->N_mismatches; |
|---|
| 297 | if (!cmp) { |
|---|
| 298 | if (mach1->wmismatches < mach2->wmismatches) cmp = -1; |
|---|
| 299 | else if (mach1->wmismatches > mach2->wmismatches) cmp = 1; |
|---|
| 300 | else { |
|---|
| 301 | cmp = mach1->b_pos - mach2->b_pos; |
|---|
| 302 | if (!cmp) { |
|---|
| 303 | cmp = mach1->name - mach2->name; |
|---|
| 304 | } |
|---|
| 305 | } |
|---|
| 306 | } |
|---|
| 307 | } |
|---|
| 308 | |
|---|
| 309 | return cmp; |
|---|
| 310 | } |
|---|
| 311 | |
|---|
| 312 | static void pt_sort_match_list(PT_local * locs) { |
|---|
| 313 | if (locs->pm) { |
|---|
| 314 | psg.sort_by = locs->sort_by; |
|---|
| 315 | |
|---|
| 316 | int list_len = locs->pm->get_count(); |
|---|
| 317 | if (list_len > 1) { |
|---|
| 318 | PT_probematch **my_list; ARB_calloc(my_list, list_len); |
|---|
| 319 | { |
|---|
| 320 | PT_probematch *match = locs->pm; |
|---|
| 321 | for (int i=0; match; i++) { |
|---|
| 322 | my_list[i] = match; |
|---|
| 323 | match = match->next; |
|---|
| 324 | } |
|---|
| 325 | } |
|---|
| 326 | GB_sort((void **)my_list, 0, list_len, pt_sort_compare_match, NULp); |
|---|
| 327 | for (int i=0; i<list_len; i++) { |
|---|
| 328 | aisc_unlink((dllheader_ext*)my_list[i]); |
|---|
| 329 | aisc_link(&locs->ppm, my_list[i]); |
|---|
| 330 | } |
|---|
| 331 | free(my_list); |
|---|
| 332 | } |
|---|
| 333 | } |
|---|
| 334 | } |
|---|
| 335 | char *create_reversed_probe(char *probe, int len) { |
|---|
| 336 | //! reverse order of bases in a probe |
|---|
| 337 | char *rev_probe = ARB_strduplen(probe, len); |
|---|
| 338 | reverse_probe(rev_probe, len); |
|---|
| 339 | return rev_probe; |
|---|
| 340 | } |
|---|
| 341 | |
|---|
| 342 | CONSTEXPR_INLINE double calc_position_wmis(int pos, int seq_len, double y1, double y2) { |
|---|
| 343 | return (double)(((double)(pos * (seq_len - 1 - pos)) / (double)((seq_len - 1) * (seq_len - 1)))* (double)(y2*4.0) + y1); |
|---|
| 344 | } |
|---|
| 345 | |
|---|
| 346 | static void pt_build_pos_to_weight(PT_MATCH_TYPE type, const char *sequence) { |
|---|
| 347 | delete [] psg.pos_to_weight; |
|---|
| 348 | int slen = strlen(sequence); |
|---|
| 349 | psg.pos_to_weight = new double[slen+1]; |
|---|
| 350 | int p; |
|---|
| 351 | for (p=0; p<slen; p++) { // LOOP_VECTORIZED =4[<8] =2 (no idea why this is instantiated 4 times for gcc<8.1. inline would/does cause 2 instantiations, as encountered with gcc 8.1) |
|---|
| 352 | if (type == PT_MATCH_TYPE_WEIGHTED_PLUS_POS) { |
|---|
| 353 | psg.pos_to_weight[p] = calc_position_wmis(p, slen, 0.3, 1.0); |
|---|
| 354 | } |
|---|
| 355 | else { |
|---|
| 356 | psg.pos_to_weight[p] = 1.0; |
|---|
| 357 | } |
|---|
| 358 | } |
|---|
| 359 | psg.pos_to_weight[slen] = 0; |
|---|
| 360 | } |
|---|
| 361 | |
|---|
| 362 | static std::map<PT_local*,Splits> splits_for_match_overlay; // initialized by probe-match, used by match-retrieval (one entry for each match-request); @@@ leaks.. 1 entry for each request |
|---|
| 363 | |
|---|
| 364 | int probe_match(PT_local *locs, aisc_string probestring) { |
|---|
| 365 | //! find out where a given probe matches |
|---|
| 366 | |
|---|
| 367 | freedup(locs->pm_sequence, probestring); |
|---|
| 368 | psg.main_probe = locs->pm_sequence; |
|---|
| 369 | |
|---|
| 370 | compress_data(probestring); |
|---|
| 371 | while (PT_probematch *ml = locs->pm) destroy_PT_probematch(ml); |
|---|
| 372 | locs->matches_truncated = 0; |
|---|
| 373 | |
|---|
| 374 | #if defined(DEBUG) && 0 |
|---|
| 375 | printf("Current bond values:\n"); |
|---|
| 376 | for (int y = 0; y<4; y++) { |
|---|
| 377 | for (int x = 0; x<4; x++) { |
|---|
| 378 | printf("%5.2f", locs->bond[y*4+x].val); |
|---|
| 379 | } |
|---|
| 380 | printf("\n"); |
|---|
| 381 | } |
|---|
| 382 | #endif // DEBUG |
|---|
| 383 | |
|---|
| 384 | int probe_len = strlen(probestring); |
|---|
| 385 | bool failed = false; |
|---|
| 386 | if (probe_len<MIN_PROBE_LENGTH) { |
|---|
| 387 | pt_export_error(locs, GBS_global_string("Min. probe length is %i", MIN_PROBE_LENGTH)); |
|---|
| 388 | failed = true; |
|---|
| 389 | } |
|---|
| 390 | else { |
|---|
| 391 | int max_poss_mismatches = probe_len/2; |
|---|
| 392 | pt_assert(max_poss_mismatches>0); |
|---|
| 393 | if (locs->pm_max > max_poss_mismatches) { |
|---|
| 394 | pt_export_error(locs, GBS_global_string("Max. %i mismatch%s are allowed for probes of length %i", |
|---|
| 395 | max_poss_mismatches, |
|---|
| 396 | max_poss_mismatches == 1 ? "" : "es", |
|---|
| 397 | probe_len)); |
|---|
| 398 | failed = true; |
|---|
| 399 | } |
|---|
| 400 | } |
|---|
| 401 | |
|---|
| 402 | if (!failed) { |
|---|
| 403 | if (locs->pm_complement_first) { |
|---|
| 404 | complement_probe(probestring, probe_len); |
|---|
| 405 | } |
|---|
| 406 | psg.reversed = 0; |
|---|
| 407 | |
|---|
| 408 | freedup(locs->pm_sequence, probestring); |
|---|
| 409 | psg.main_probe = locs->pm_sequence; |
|---|
| 410 | |
|---|
| 411 | pt_build_pos_to_weight((PT_MATCH_TYPE)locs->sort_by, probestring); |
|---|
| 412 | |
|---|
| 413 | MatchRequest req(*locs, probe_len); |
|---|
| 414 | |
|---|
| 415 | pt_assert(req.allowed_mismatches() >= 0); // till [8011] value<0 was used to trigger "new match" (feature unused) |
|---|
| 416 | Mismatches mismatch(req); |
|---|
| 417 | req.collect_hits_for(probestring, psg.TREE_ROOT2(), mismatch, 0); |
|---|
| 418 | |
|---|
| 419 | if (locs->pm_also_revcomp) { |
|---|
| 420 | psg.reversed = 1; |
|---|
| 421 | char *rev_pro = create_reversed_probe(probestring, probe_len); |
|---|
| 422 | complement_probe(rev_pro, probe_len); |
|---|
| 423 | freeset(locs->pm_csequence, psg.main_probe = ARB_strdup(rev_pro)); |
|---|
| 424 | |
|---|
| 425 | Mismatches rev_mismatch(req); |
|---|
| 426 | req.collect_hits_for(rev_pro, psg.TREE_ROOT2(), rev_mismatch, 0); |
|---|
| 427 | free(rev_pro); |
|---|
| 428 | } |
|---|
| 429 | pt_sort_match_list(locs); |
|---|
| 430 | splits_for_match_overlay[locs] = Splits(locs); |
|---|
| 431 | } |
|---|
| 432 | free(probestring); |
|---|
| 433 | |
|---|
| 434 | return 0; |
|---|
| 435 | } |
|---|
| 436 | |
|---|
| 437 | struct format_props { |
|---|
| 438 | bool show_mismatches; // whether to show 'mis' and 'N_mis' |
|---|
| 439 | bool show_ecoli; // whether to show 'ecoli' column |
|---|
| 440 | bool show_gpos; // whether to show 'gpos' column |
|---|
| 441 | |
|---|
| 442 | int name_width; // width of 'name' column |
|---|
| 443 | int gene_or_full_width; // width of 'genename' or 'fullname' column |
|---|
| 444 | int pos_width; // max. width of pos column |
|---|
| 445 | int gpos_width; // max. width of gpos column |
|---|
| 446 | int ecoli_width; // max. width of ecoli column |
|---|
| 447 | |
|---|
| 448 | int rev_width() const { return 3; } |
|---|
| 449 | int mis_width() const { return 3; } |
|---|
| 450 | int N_mis_width() const { return 5; } |
|---|
| 451 | int wmis_width() const { return 4; } |
|---|
| 452 | }; |
|---|
| 453 | |
|---|
| 454 | inline void set_max(const char *str, int &curr_max) { |
|---|
| 455 | if (str) { |
|---|
| 456 | int len = strlen(str); |
|---|
| 457 | if (len>curr_max) curr_max = len; |
|---|
| 458 | } |
|---|
| 459 | } |
|---|
| 460 | |
|---|
| 461 | static format_props detect_format_props(const PT_local *locs, bool show_gpos) { |
|---|
| 462 | PT_probematch *ml = locs->pm; // probe matches |
|---|
| 463 | format_props format; |
|---|
| 464 | |
|---|
| 465 | format.show_mismatches = (ml->N_mismatches >= 0); |
|---|
| 466 | format.show_ecoli = psg.ecoli; // display only if there is ecoli |
|---|
| 467 | format.show_gpos = show_gpos; // display only for gene probe matches |
|---|
| 468 | |
|---|
| 469 | // minimum values (caused by header widths) : |
|---|
| 470 | format.name_width = gene_flag ? 8 : 4; // 'organism' or 'name' |
|---|
| 471 | format.gene_or_full_width = 8; // 'genename' or 'fullname' |
|---|
| 472 | format.pos_width = 3; // 'pos' |
|---|
| 473 | format.gpos_width = 4; // 'gpos' |
|---|
| 474 | format.ecoli_width = 5; // 'ecoli' |
|---|
| 475 | |
|---|
| 476 | for (; ml; ml = ml->next) { |
|---|
| 477 | set_max(virt_name(ml), format.name_width); |
|---|
| 478 | set_max(virt_fullname(ml), format.gene_or_full_width); |
|---|
| 479 | set_max(GBS_global_string("%i", info2bio(ml->b_pos)), format.pos_width); |
|---|
| 480 | if (show_gpos) set_max(GBS_global_string("%i", info2bio(ml->g_pos)), format.gpos_width); |
|---|
| 481 | if (format.show_ecoli) set_max(GBS_global_string("%li", PT_abs_2_ecoli_rel(ml->b_pos+1)), format.ecoli_width); |
|---|
| 482 | } |
|---|
| 483 | |
|---|
| 484 | return format; |
|---|
| 485 | } |
|---|
| 486 | |
|---|
| 487 | inline void cat_internal(GBS_strstruct& memfile, int len, const char *text, int width, char spacer, bool align_left) { |
|---|
| 488 | if (len == width) { |
|---|
| 489 | memfile.cat(text); // text has exact len |
|---|
| 490 | } |
|---|
| 491 | else if (len > width) { // text to long |
|---|
| 492 | // @@@ use ncat here? |
|---|
| 493 | char buf[width+1]; |
|---|
| 494 | memcpy(buf, text, width); |
|---|
| 495 | buf[width] = 0; |
|---|
| 496 | memfile.cat(buf); |
|---|
| 497 | } |
|---|
| 498 | else { // text is too short -> insert spaces |
|---|
| 499 | int spaces = width-len; |
|---|
| 500 | pt_assert(spaces>0); |
|---|
| 501 | char sp[spaces+1]; |
|---|
| 502 | memset(sp, spacer, spaces); |
|---|
| 503 | sp[spaces] = 0; |
|---|
| 504 | |
|---|
| 505 | if (align_left) { |
|---|
| 506 | memfile.cat(text); |
|---|
| 507 | memfile.cat(sp); // @@@ use nput? |
|---|
| 508 | } |
|---|
| 509 | else { |
|---|
| 510 | memfile.cat(sp); // @@@ use nput? |
|---|
| 511 | memfile.cat(text); |
|---|
| 512 | } |
|---|
| 513 | } |
|---|
| 514 | memfile.put(' '); // one space behind each column |
|---|
| 515 | } |
|---|
| 516 | inline void cat_spaced_left (GBS_strstruct& memfile, const char *text, int width) { cat_internal(memfile, strlen(text), text, width, ' ', true); } |
|---|
| 517 | inline void cat_spaced_right(GBS_strstruct& memfile, const char *text, int width) { cat_internal(memfile, strlen(text), text, width, ' ', false); } |
|---|
| 518 | inline void cat_dashed_left (GBS_strstruct& memfile, const char *text, int width) { cat_internal(memfile, strlen(text), text, width, '-', true); } |
|---|
| 519 | inline void cat_dashed_right(GBS_strstruct& memfile, const char *text, int width) { cat_internal(memfile, strlen(text), text, width, '-', false); } |
|---|
| 520 | |
|---|
| 521 | const char *get_match_overlay(const PT_probematch *ml) { |
|---|
| 522 | int pr_len = strlen(ml->sequence); |
|---|
| 523 | PT_local *locs = (PT_local *)ml->mh.parent->parent; |
|---|
| 524 | |
|---|
| 525 | const int CONTEXT_SIZE = 9; |
|---|
| 526 | |
|---|
| 527 | char *ref = ARB_calloc<char>(CONTEXT_SIZE+1+pr_len+1+CONTEXT_SIZE+1); |
|---|
| 528 | memset(ref, '.', CONTEXT_SIZE+1); |
|---|
| 529 | |
|---|
| 530 | SmartCharPtr seqPtr = psg.data[ml->name].get_dataPtr(); |
|---|
| 531 | const char *seq = &*seqPtr; |
|---|
| 532 | |
|---|
| 533 | const Splits& splits = splits_for_match_overlay[locs]; |
|---|
| 534 | |
|---|
| 535 | for (int pr_pos = CONTEXT_SIZE-1, al_pos = ml->rpos-1; |
|---|
| 536 | pr_pos >= 0 && al_pos >= 0; |
|---|
| 537 | pr_pos--, al_pos--) |
|---|
| 538 | { |
|---|
| 539 | if (!seq[al_pos]) break; |
|---|
| 540 | ref[pr_pos] = base_2_readable(seq[al_pos]); |
|---|
| 541 | } |
|---|
| 542 | ref[CONTEXT_SIZE] = '-'; |
|---|
| 543 | |
|---|
| 544 | pt_build_pos_to_weight((PT_MATCH_TYPE)locs->sort_by, ml->sequence); |
|---|
| 545 | |
|---|
| 546 | bool display_right_context = true; |
|---|
| 547 | { |
|---|
| 548 | char *pref = ref+CONTEXT_SIZE+1; |
|---|
| 549 | |
|---|
| 550 | for (int pr_pos = 0, al_pos = ml->rpos; |
|---|
| 551 | pr_pos < pr_len && al_pos < psg.data[ml->name].get_size(); |
|---|
| 552 | pr_pos++, al_pos++) |
|---|
| 553 | { |
|---|
| 554 | int ps = ml->sequence[pr_pos]; // probe sequence |
|---|
| 555 | int ts = seq[al_pos]; // target sequence (hit) |
|---|
| 556 | if (ps == ts) { |
|---|
| 557 | pref[pr_pos] = '='; |
|---|
| 558 | } |
|---|
| 559 | else { |
|---|
| 560 | if (ts) { |
|---|
| 561 | int r = base_2_readable(ts); |
|---|
| 562 | if (is_std_base(ps) && is_std_base(ts)) { |
|---|
| 563 | double h = splits.check(ml->sequence[pr_pos], ts); |
|---|
| 564 | if (h>=0.0) r = tolower(r); // if mismatch does not split probe into two domains -> show as lowercase |
|---|
| 565 | } |
|---|
| 566 | pref[pr_pos] = r; |
|---|
| 567 | } |
|---|
| 568 | else { |
|---|
| 569 | // end of sequence or missing data (dots inside sequence) reached |
|---|
| 570 | // (rest of probe was accepted by N-matches) |
|---|
| 571 | display_right_context = false; |
|---|
| 572 | for (; pr_pos < pr_len; pr_pos++) { // LOOP_VECTORIZED[!<7,!>=910<11] // only worked with 8.x series |
|---|
| 573 | pref[pr_pos] = '.'; |
|---|
| 574 | } |
|---|
| 575 | } |
|---|
| 576 | } |
|---|
| 577 | |
|---|
| 578 | } |
|---|
| 579 | } |
|---|
| 580 | |
|---|
| 581 | { |
|---|
| 582 | char *cref = ref+CONTEXT_SIZE+1+pr_len+1; |
|---|
| 583 | cref[-1] = '-'; |
|---|
| 584 | |
|---|
| 585 | int al_size = psg.data[ml->name].get_size(); |
|---|
| 586 | int al_pos = ml->rpos+pr_len; |
|---|
| 587 | |
|---|
| 588 | if (display_right_context) { |
|---|
| 589 | for (int pr_pos = 0; |
|---|
| 590 | pr_pos < CONTEXT_SIZE && al_pos < al_size; |
|---|
| 591 | pr_pos++, al_pos++) |
|---|
| 592 | { |
|---|
| 593 | cref[pr_pos] = base_2_readable(seq[al_pos]); |
|---|
| 594 | } |
|---|
| 595 | } |
|---|
| 596 | else { |
|---|
| 597 | if (al_pos < al_size) strcpy(cref, "<more>"); |
|---|
| 598 | } |
|---|
| 599 | } |
|---|
| 600 | |
|---|
| 601 | static char *result = NULp; |
|---|
| 602 | freeset(result, ref); |
|---|
| 603 | return result; |
|---|
| 604 | } |
|---|
| 605 | |
|---|
| 606 | const char* get_match_acc(const PT_probematch *ml) { |
|---|
| 607 | return psg.data[ml->name].get_acc(); |
|---|
| 608 | } |
|---|
| 609 | int get_match_start(const PT_probematch *ml) { |
|---|
| 610 | return psg.data[ml->name].get_start(); |
|---|
| 611 | } |
|---|
| 612 | int get_match_stop(const PT_probematch *ml) { |
|---|
| 613 | return psg.data[ml->name].get_stop(); |
|---|
| 614 | } |
|---|
| 615 | |
|---|
| 616 | static const char *get_match_info_formatted(PT_probematch *ml, const format_props& format) { |
|---|
| 617 | GBS_strstruct memfile(256); |
|---|
| 618 | memfile.cat(" "); |
|---|
| 619 | |
|---|
| 620 | cat_spaced_left(memfile, virt_name(ml), format.name_width); |
|---|
| 621 | cat_spaced_left(memfile, virt_fullname(ml), format.gene_or_full_width); |
|---|
| 622 | |
|---|
| 623 | if (format.show_mismatches) { |
|---|
| 624 | cat_spaced_right(memfile, GBS_global_string("%i", ml->mismatches), format.mis_width()); |
|---|
| 625 | cat_spaced_right(memfile, GBS_global_string("%i", ml->N_mismatches), format.N_mis_width()); |
|---|
| 626 | } |
|---|
| 627 | cat_spaced_right(memfile, GBS_global_string("%.1f", ml->wmismatches), format.wmis_width()); |
|---|
| 628 | cat_spaced_right(memfile, GBS_global_string("%i", info2bio(ml->b_pos)), format.pos_width); |
|---|
| 629 | if (format.show_gpos) { |
|---|
| 630 | cat_spaced_right(memfile, GBS_global_string("%i", info2bio(ml->g_pos)), format.gpos_width); |
|---|
| 631 | } |
|---|
| 632 | if (format.show_ecoli) { |
|---|
| 633 | cat_spaced_right(memfile, GBS_global_string("%li", PT_abs_2_ecoli_rel(ml->b_pos+1)), format.ecoli_width); |
|---|
| 634 | } |
|---|
| 635 | cat_spaced_left(memfile, GBS_global_string("%i", ml->reversed), format.rev_width()); |
|---|
| 636 | |
|---|
| 637 | memfile.cat(get_match_overlay(ml)); |
|---|
| 638 | |
|---|
| 639 | static char *result = NULp; // @@@ instead make memfile static and erase before use. |
|---|
| 640 | freeset(result, memfile.release()); |
|---|
| 641 | return result; |
|---|
| 642 | } |
|---|
| 643 | |
|---|
| 644 | static const char *get_match_hinfo_formatted(PT_probematch *ml, const format_props& format) { |
|---|
| 645 | if (ml) { |
|---|
| 646 | GBS_strstruct memfile(500); |
|---|
| 647 | memfile.cat(" "); // one space more than in get_match_info_formatted() |
|---|
| 648 | |
|---|
| 649 | cat_dashed_left(memfile, gene_flag ? "organism" : "name", format.name_width); |
|---|
| 650 | cat_dashed_left(memfile, gene_flag ? "genename" : "fullname", format.gene_or_full_width); |
|---|
| 651 | |
|---|
| 652 | if (format.show_mismatches) { |
|---|
| 653 | cat_dashed_right(memfile, "mis", format.mis_width()); |
|---|
| 654 | cat_dashed_right(memfile, "N_mis", format.N_mis_width()); |
|---|
| 655 | } |
|---|
| 656 | cat_dashed_right(memfile, "wmis", format.wmis_width()); |
|---|
| 657 | cat_dashed_right(memfile, "pos", format.pos_width); |
|---|
| 658 | if (format.show_gpos) { |
|---|
| 659 | cat_dashed_right(memfile, "gpos", format.gpos_width); |
|---|
| 660 | } |
|---|
| 661 | if (format.show_ecoli) { |
|---|
| 662 | cat_dashed_right(memfile, "ecoli", format.ecoli_width); |
|---|
| 663 | } |
|---|
| 664 | cat_dashed_left(memfile, "rev", format.rev_width()); |
|---|
| 665 | |
|---|
| 666 | if (ml->N_mismatches >= 0) { // |
|---|
| 667 | char *seq = ARB_strdup(ml->sequence); |
|---|
| 668 | probe_2_readable(seq, strlen(ml->sequence)); // @@@ maybe wrong if match contains PT_QU (see [9070]) |
|---|
| 669 | |
|---|
| 670 | memfile.cat(" '"); |
|---|
| 671 | memfile.cat(seq); |
|---|
| 672 | memfile.put('\''); |
|---|
| 673 | |
|---|
| 674 | free(seq); |
|---|
| 675 | } |
|---|
| 676 | |
|---|
| 677 | static char *result = NULp; // @@@ instead make memfile static and erase before use. |
|---|
| 678 | freeset(result, memfile.release()); |
|---|
| 679 | return result; |
|---|
| 680 | } |
|---|
| 681 | // Else set header of result |
|---|
| 682 | return "There are no targets"; |
|---|
| 683 | } |
|---|
| 684 | |
|---|
| 685 | static void gene_rel_2_abs(PT_probematch *ml) { |
|---|
| 686 | /*! after gene probe match all positions are gene-relative. |
|---|
| 687 | * gene_rel_2_abs() makes them genome-absolute. |
|---|
| 688 | */ |
|---|
| 689 | |
|---|
| 690 | GB_transaction ta(psg.gb_main); |
|---|
| 691 | |
|---|
| 692 | for (; ml; ml = ml->next) { |
|---|
| 693 | long gene_pos = psg.data[ml->name].get_geneabspos(); |
|---|
| 694 | if (gene_pos >= 0) { |
|---|
| 695 | ml->g_pos = ml->b_pos; |
|---|
| 696 | ml->b_pos += gene_pos; |
|---|
| 697 | } |
|---|
| 698 | else { |
|---|
| 699 | fprintf(stderr, "Error in gene-pt-server: gene w/o position info\n"); |
|---|
| 700 | pt_assert(0); |
|---|
| 701 | } |
|---|
| 702 | } |
|---|
| 703 | } |
|---|
| 704 | |
|---|
| 705 | bytestring *match_string(const PT_local *locs) { |
|---|
| 706 | /*! Create list of species where probe matches. |
|---|
| 707 | * |
|---|
| 708 | * header^1name^1info^1name^1info....^0 |
|---|
| 709 | * (where ^0 and ^1 are ASCII 0 and 1) |
|---|
| 710 | * |
|---|
| 711 | * Implements server function 'MATCH_STRING' |
|---|
| 712 | */ |
|---|
| 713 | |
|---|
| 714 | GBS_strstruct memfile(50000); |
|---|
| 715 | |
|---|
| 716 | if (locs->pm) { |
|---|
| 717 | if (gene_flag) gene_rel_2_abs(locs->pm); |
|---|
| 718 | |
|---|
| 719 | format_props format = detect_format_props(locs, gene_flag); |
|---|
| 720 | |
|---|
| 721 | memfile.cat(get_match_hinfo_formatted(locs->pm, format)); |
|---|
| 722 | memfile.put(char(1)); |
|---|
| 723 | |
|---|
| 724 | for (PT_probematch *ml = locs->pm; ml; ml = ml->next) { |
|---|
| 725 | memfile.cat(virt_name(ml)); |
|---|
| 726 | memfile.put(char(1)); |
|---|
| 727 | memfile.cat(get_match_info_formatted(ml, format)); |
|---|
| 728 | memfile.put(char(1)); |
|---|
| 729 | } |
|---|
| 730 | } |
|---|
| 731 | |
|---|
| 732 | static bytestring bs = { NULp, 0 }; |
|---|
| 733 | bs.size = memfile.get_position()+1; |
|---|
| 734 | freeset(bs.data, memfile.release()); |
|---|
| 735 | return &bs; |
|---|
| 736 | } |
|---|
| 737 | |
|---|
| 738 | |
|---|
| 739 | |
|---|
| 740 | |
|---|
| 741 | bytestring *MP_match_string(const PT_local *locs) { |
|---|
| 742 | /*! Create list of species where probe matches and append number of mismatches and weighted mismatches (used by multiprobe) |
|---|
| 743 | * |
|---|
| 744 | * Format: "header^1name^1#mismatches^1#wmismatches^1name^1#mismatches^1#wmismatches....^0" |
|---|
| 745 | * (where ^0 and ^1 are ASCII 0 and 1) |
|---|
| 746 | * |
|---|
| 747 | * Implements server function 'MP_MATCH_STRING' |
|---|
| 748 | */ |
|---|
| 749 | |
|---|
| 750 | static GBS_strstruct memfile(25000); |
|---|
| 751 | memfile.erase(); |
|---|
| 752 | |
|---|
| 753 | for (PT_probematch *ml = locs->pm; ml; ml = ml->next) { |
|---|
| 754 | memfile.cat(virt_name(ml)); |
|---|
| 755 | memfile.put((char)1); |
|---|
| 756 | memfile.nprintf(30, "%2i", ml->mismatches); |
|---|
| 757 | memfile.put((char)1); |
|---|
| 758 | memfile.nprintf(30, "%1.1f", ml->wmismatches); |
|---|
| 759 | memfile.put((char)1); |
|---|
| 760 | } |
|---|
| 761 | |
|---|
| 762 | static bytestring bs = { NULp, 0 }; |
|---|
| 763 | bs.size = memfile.get_position()+1; |
|---|
| 764 | bs.data = (char*)memfile.get_data(); |
|---|
| 765 | return &bs; |
|---|
| 766 | } |
|---|
| 767 | |
|---|
| 768 | |
|---|
| 769 | bytestring *MP_all_species_string(const PT_local *) { |
|---|
| 770 | /*! Create list of all species known to PT server |
|---|
| 771 | * |
|---|
| 772 | * Format: ^1name^1name....^0 |
|---|
| 773 | * (where ^0 and ^1 are ASCII 0 and 1) |
|---|
| 774 | * |
|---|
| 775 | * Implements server function 'MP_ALL_SPECIES_STRING' |
|---|
| 776 | */ |
|---|
| 777 | |
|---|
| 778 | static GBS_strstruct memfile(10000); |
|---|
| 779 | memfile.erase(); |
|---|
| 780 | |
|---|
| 781 | for (int i = 0; i < psg.data_count; i++) { |
|---|
| 782 | memfile.cat(psg.data[i].get_shortname()); |
|---|
| 783 | memfile.put((char)1); |
|---|
| 784 | } |
|---|
| 785 | |
|---|
| 786 | static bytestring bs = { NULp, 0 }; |
|---|
| 787 | bs.size = memfile.get_position()+1; |
|---|
| 788 | bs.data = (char*)memfile.get_data(); |
|---|
| 789 | return &bs; |
|---|
| 790 | } |
|---|
| 791 | |
|---|
| 792 | int MP_count_all_species(const PT_local *) { |
|---|
| 793 | return psg.data_count; |
|---|
| 794 | } |
|---|
| 795 | |
|---|
| 796 | // -------------------------------------------------------------------------------- |
|---|
| 797 | |
|---|
| 798 | #ifdef UNIT_TESTS |
|---|
| 799 | #ifndef TEST_UNIT_H |
|---|
| 800 | #include <test_unit.h> |
|---|
| 801 | #endif |
|---|
| 802 | |
|---|
| 803 | struct EnterStage2 { |
|---|
| 804 | EnterStage2() { |
|---|
| 805 | PT_init_psg(); |
|---|
| 806 | psg.enter_stage(STAGE2); |
|---|
| 807 | } |
|---|
| 808 | ~EnterStage2() { |
|---|
| 809 | PT_exit_psg(); |
|---|
| 810 | } |
|---|
| 811 | }; |
|---|
| 812 | |
|---|
| 813 | #define TEST_WEIGHTED_MISMATCH(probe,seq,expected) TEST_EXPECT_SIMILAR(weights.get(probe,seq), expected, EPS) |
|---|
| 814 | |
|---|
| 815 | void TEST_weighted_mismatches() { |
|---|
| 816 | EnterStage2 stage2; |
|---|
| 817 | PT_bond bonds[16] = { |
|---|
| 818 | { 0.0 }, { 0.0 }, { 0.5 }, { 1.1 }, |
|---|
| 819 | { 0.0 }, { 0.0 }, { 1.5 }, { 0.0 }, |
|---|
| 820 | { 0.5 }, { 1.5 }, { 0.4 }, { 0.9 }, |
|---|
| 821 | { 1.1 }, { 0.0 }, { 0.9 }, { 0.0 }, |
|---|
| 822 | }; |
|---|
| 823 | |
|---|
| 824 | MismatchWeights weights(bonds); |
|---|
| 825 | |
|---|
| 826 | double EPS = 0.0001; |
|---|
| 827 | |
|---|
| 828 | TEST_WEIGHTED_MISMATCH(PT_A, PT_A, 0.0); |
|---|
| 829 | TEST_WEIGHTED_MISMATCH(PT_A, PT_C, 1.1); // (T~A = 1.1) - (T~C = 0) |
|---|
| 830 | TEST_WEIGHTED_MISMATCH(PT_A, PT_G, 0.2); // (T~A = 1.1) - (T~G = 0.9) |
|---|
| 831 | TEST_WEIGHTED_MISMATCH(PT_A, PT_T, 1.1); |
|---|
| 832 | |
|---|
| 833 | TEST_WEIGHTED_MISMATCH(PT_C, PT_A, 1.0); |
|---|
| 834 | TEST_WEIGHTED_MISMATCH(PT_C, PT_C, 0.0); |
|---|
| 835 | TEST_WEIGHTED_MISMATCH(PT_C, PT_G, 1.1); |
|---|
| 836 | TEST_WEIGHTED_MISMATCH(PT_C, PT_T, 0.6); // (G~C = 1.5) - (G~T = 0.9) |
|---|
| 837 | |
|---|
| 838 | TEST_WEIGHTED_MISMATCH(PT_G, PT_A, 1.5); |
|---|
| 839 | TEST_WEIGHTED_MISMATCH(PT_G, PT_C, 1.5); |
|---|
| 840 | TEST_WEIGHTED_MISMATCH(PT_G, PT_G, 0.0); |
|---|
| 841 | TEST_WEIGHTED_MISMATCH(PT_G, PT_T, 1.5); |
|---|
| 842 | |
|---|
| 843 | TEST_WEIGHTED_MISMATCH(PT_T, PT_A, 1.1); |
|---|
| 844 | TEST_WEIGHTED_MISMATCH(PT_T, PT_C, 1.1); |
|---|
| 845 | TEST_WEIGHTED_MISMATCH(PT_T, PT_G, 0.6); |
|---|
| 846 | TEST_WEIGHTED_MISMATCH(PT_T, PT_T, 0.0); |
|---|
| 847 | |
|---|
| 848 | |
|---|
| 849 | TEST_WEIGHTED_MISMATCH(PT_N, PT_A, 0.9); |
|---|
| 850 | TEST_WEIGHTED_MISMATCH(PT_N, PT_C, 0.925); |
|---|
| 851 | TEST_WEIGHTED_MISMATCH(PT_N, PT_G, 0.475); |
|---|
| 852 | TEST_WEIGHTED_MISMATCH(PT_N, PT_T, 0.8); |
|---|
| 853 | |
|---|
| 854 | TEST_WEIGHTED_MISMATCH(PT_A, PT_N, 0.6); |
|---|
| 855 | TEST_WEIGHTED_MISMATCH(PT_C, PT_N, 0.675); |
|---|
| 856 | TEST_WEIGHTED_MISMATCH(PT_G, PT_N, 1.125); |
|---|
| 857 | TEST_WEIGHTED_MISMATCH(PT_T, PT_N, 0.7); |
|---|
| 858 | |
|---|
| 859 | TEST_WEIGHTED_MISMATCH(PT_N, PT_N, 0.775); |
|---|
| 860 | TEST_WEIGHTED_MISMATCH(PT_QU, PT_QU, 0.775); |
|---|
| 861 | TEST_WEIGHTED_MISMATCH(PT_QU, PT_N, 0.775); |
|---|
| 862 | } |
|---|
| 863 | |
|---|
| 864 | #endif // UNIT_TESTS |
|---|
| 865 | |
|---|
| 866 | // -------------------------------------------------------------------------------- |
|---|
| 867 | |
|---|
| 868 | |
|---|
| 869 | |
|---|