| 1 | |
|---|
| 2 | #include <stdio.h> |
|---|
| 3 | #include <stdlib.h> |
|---|
| 4 | #include <string.h> |
|---|
| 5 | #include <PT_server.h> |
|---|
| 6 | #include <PT_server_prototypes.h> |
|---|
| 7 | #include <struct_man.h> |
|---|
| 8 | #include "ptpan.h" |
|---|
| 9 | #include "pt_prototypes.h" |
|---|
| 10 | #include <arbdbt.h> |
|---|
| 11 | #include <math.h> |
|---|
| 12 | |
|---|
| 13 | /* /// "SearchPartition()" */ |
|---|
| 14 | void SearchPartition(struct PTPanPartition *pp, struct SearchQuery *sq) |
|---|
| 15 | { |
|---|
| 16 | struct PTPanGlobal *pg = pp->pp_PTPanGlobal; |
|---|
| 17 | struct SearchQuery *tmpsq; |
|---|
| 18 | |
|---|
| 19 | /* do search on this partition */ |
|---|
| 20 | tmpsq = CloneSearchQuery(sq); |
|---|
| 21 | tmpsq->sq_PTPanPartition = pp; |
|---|
| 22 | tmpsq->sq_SourceSeq = pp->pp_PrefixSeq; |
|---|
| 23 | |
|---|
| 24 | if (PTPanGlobalPtr->pg_verbose >0) |
|---|
| 25 | printf("== SearchPartition: for %s\n", sq->sq_Query); |
|---|
| 26 | |
|---|
| 27 | if(MatchSequence(tmpsq)) |
|---|
| 28 | { |
|---|
| 29 | if(!(pp->pp_CacheNode = CacheLoadData(pg->pg_PartitionCache, pp->pp_CacheNode, pp))) |
|---|
| 30 | { |
|---|
| 31 | return; /* something went wrong while loading */ |
|---|
| 32 | } |
|---|
| 33 | SearchTree(tmpsq); |
|---|
| 34 | PostFilterQueryHits(tmpsq); |
|---|
| 35 | MergeQueryHits(sq, tmpsq); /* needs semaphore protection on parallel runs */ |
|---|
| 36 | } |
|---|
| 37 | pp->pp_Done = TRUE; |
|---|
| 38 | FreeSearchQuery(tmpsq); |
|---|
| 39 | } |
|---|
| 40 | /* \\\ */ |
|---|
| 41 | |
|---|
| 42 | #ifdef BENCHMARK |
|---|
| 43 | /* /// "QueryTests()" */ |
|---|
| 44 | void QueryTests(struct PTPanGlobal *pg) |
|---|
| 45 | { |
|---|
| 46 | PT_local *locs; |
|---|
| 47 | STRPTR ecoli; |
|---|
| 48 | ULONG ecolilen; |
|---|
| 49 | ULONG pos; |
|---|
| 50 | ULONG qlen; |
|---|
| 51 | char buf[32]; |
|---|
| 52 | |
|---|
| 53 | locs = (PT_local *) calloc(1, sizeof(PT_local)); |
|---|
| 54 | ecoli = FilterSequence(pg, pg->pg_EcoliSeq); |
|---|
| 55 | ecolilen = strlen(ecoli); |
|---|
| 56 | locs->pm_max = 0; /* exact search */ |
|---|
| 57 | locs->pm_complement = 0; |
|---|
| 58 | locs->pm_reversed = 0; |
|---|
| 59 | locs->sort_by = SORT_HITS_WEIGHTED; |
|---|
| 60 | |
|---|
| 61 | //qlen = 18; |
|---|
| 62 | for(locs->pm_max = 4; locs->pm_max < 5; locs->pm_max++) |
|---|
| 63 | { |
|---|
| 64 | // for(qlen = 20; qlen - locs->pm_max >= 10; qlen--) |
|---|
| 65 | for(qlen = 9 + locs->pm_max; qlen >= 10; qlen--) |
|---|
| 66 | // for(qlen = 31; qlen >= 16; qlen--) |
|---|
| 67 | { |
|---|
| 68 | pg->pg_Bench.ts_Hits = 0; |
|---|
| 69 | pg->pg_Bench.ts_UnsafeHits = 0; |
|---|
| 70 | pg->pg_Bench.ts_UnsafeKilled = 0; |
|---|
| 71 | pg->pg_Bench.ts_DupsKilled = 0; |
|---|
| 72 | pg->pg_Bench.ts_CrossBoundKilled = 0; |
|---|
| 73 | pg->pg_Bench.ts_DotsKilled = 0; |
|---|
| 74 | pg->pg_Bench.ts_OutHits = 0; |
|---|
| 75 | pg->pg_Bench.ts_CandSetTime= 0; |
|---|
| 76 | pg->pg_Bench.ts_OutputTime = 0; |
|---|
| 77 | |
|---|
| 78 | for(pos = 0; pos < ecolilen - qlen; pos += 2) |
|---|
| 79 | { |
|---|
| 80 | strncpy(buf, &ecoli[pos], qlen); |
|---|
| 81 | buf[qlen] = 0; |
|---|
| 82 | probe_match(locs, strdup(buf)); |
|---|
| 83 | } |
|---|
| 84 | printf("qDAT: (queries qlen err hits gentime unsafe unkill dupskill crosskill dotskill outhits outtime)\n"); |
|---|
| 85 | printf("%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %s QDAT\n", |
|---|
| 86 | pos, qlen, locs->pm_max, |
|---|
| 87 | pg->pg_Bench.ts_Hits, |
|---|
| 88 | pg->pg_Bench.ts_CandSetTime, |
|---|
| 89 | pg->pg_Bench.ts_UnsafeHits, |
|---|
| 90 | pg->pg_Bench.ts_UnsafeKilled, |
|---|
| 91 | pg->pg_Bench.ts_DupsKilled, |
|---|
| 92 | pg->pg_Bench.ts_CrossBoundKilled, |
|---|
| 93 | pg->pg_Bench.ts_DotsKilled, |
|---|
| 94 | pg->pg_Bench.ts_OutHits, |
|---|
| 95 | pg->pg_Bench.ts_OutputTime, |
|---|
| 96 | pg->pg_DBName); |
|---|
| 97 | fflush(stdout); |
|---|
| 98 | } |
|---|
| 99 | } |
|---|
| 100 | printf("Done\n"); |
|---|
| 101 | free(ecoli); |
|---|
| 102 | free(locs); |
|---|
| 103 | } |
|---|
| 104 | /* \\\ */ |
|---|
| 105 | #endif |
|---|
| 106 | |
|---|
| 107 | |
|---|
| 108 | void PP_convertBondMatrix(PT_pdc *pdc, PTPanGlobal *pg) |
|---|
| 109 | { |
|---|
| 110 | for (int query = SEQCODE_A; query <= SEQCODE_T; ++query) { |
|---|
| 111 | for (int species = SEQCODE_A; species <= SEQCODE_T; ++species) { |
|---|
| 112 | int rowIdx = (pg->pg_ComplementTable[query] - SEQCODE_A)*4; |
|---|
| 113 | int maxIdx = rowIdx + query - SEQCODE_A; |
|---|
| 114 | int newIdx = rowIdx + species - SEQCODE_A; |
|---|
| 115 | |
|---|
| 116 | double max_bind = pdc->bond[maxIdx].val; |
|---|
| 117 | double new_bind = pdc->bond[newIdx].val; |
|---|
| 118 | |
|---|
| 119 | pg->pg_MismatchWeights.mw_Replace[query * ALPHASIZE + species] = max_bind - new_bind; |
|---|
| 120 | } |
|---|
| 121 | } |
|---|
| 122 | #if defined(DEBUG) |
|---|
| 123 | printf("Current bond values:\n"); |
|---|
| 124 | for (int y = 0; y<4; y++) { |
|---|
| 125 | for (int x = 0; x<4; x++) { |
|---|
| 126 | printf("%5.2f", pdc->bond[y*4+x].val); |
|---|
| 127 | } |
|---|
| 128 | printf("\n"); |
|---|
| 129 | } |
|---|
| 130 | printf("Current Replace Matrix:\n"); |
|---|
| 131 | for (int query = SEQCODE_A; query <= SEQCODE_T; ++query) { |
|---|
| 132 | for (int species = SEQCODE_A; species <= SEQCODE_T; ++species) { |
|---|
| 133 | printf("%5.2f", pg->pg_MismatchWeights.mw_Replace[query * ALPHASIZE + species]); |
|---|
| 134 | } |
|---|
| 135 | printf("\n"); |
|---|
| 136 | } |
|---|
| 137 | #endif // DEBUG |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | |
|---|
| 141 | static double PP_calc_position_wmis(int pos, int seq_len, double y1, double y2) |
|---|
| 142 | { |
|---|
| 143 | return (double)(((double)(pos * (seq_len - 1 - pos)) / (double)((seq_len - 1) * (seq_len - 1)))* (double)(y2*4.0) + y1); |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | |
|---|
| 147 | void PP_buildPosWeight(SearchQuery *sq) |
|---|
| 148 | { |
|---|
| 149 | if (sq->sq_PosWeight) delete[] sq->sq_PosWeight; |
|---|
| 150 | //printf("buildPosWeight: ...new double[%i];\n", sq->sq_QueryLen+1); |
|---|
| 151 | sq->sq_PosWeight = new double[sq->sq_QueryLen+1]; // TODO: check if +1 is necessary |
|---|
| 152 | |
|---|
| 153 | for (int pos=0; pos < sq->sq_QueryLen; ++pos) { |
|---|
| 154 | if (sq->sq_SortMode == SORT_HITS_WEIGHTED) { |
|---|
| 155 | sq->sq_PosWeight[pos] = PP_calc_position_wmis(pos, sq->sq_QueryLen, 0.3, 1.0); |
|---|
| 156 | }else{ |
|---|
| 157 | sq->sq_PosWeight[pos] = 1.0; |
|---|
| 158 | } |
|---|
| 159 | } |
|---|
| 160 | sq->sq_PosWeight[sq->sq_QueryLen] = 0.0; // TODO: check if last pos is necessary |
|---|
| 161 | #if defined(DEBUG) |
|---|
| 162 | printf("sq_Posweight[]: "); |
|---|
| 163 | for (int pos=0; pos < sq->sq_QueryLen; ++pos) { |
|---|
| 164 | printf("%f, ", sq->sq_PosWeight[pos]); |
|---|
| 165 | } |
|---|
| 166 | printf("%f\n", sq->sq_PosWeight[sq->sq_QueryLen]); |
|---|
| 167 | #endif |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | |
|---|
| 171 | /* /// "probe_match()" */ |
|---|
| 172 | extern "C" int probe_match(PT_local *locs, aisc_string probestring) |
|---|
| 173 | { |
|---|
| 174 | struct PTPanGlobal *pg = PTPanGlobalPtr; |
|---|
| 175 | struct PTPanPartition *pp; |
|---|
| 176 | struct SearchQuery *sq; |
|---|
| 177 | struct SearchQuery *compsq = NULL; |
|---|
| 178 | PT_probematch *ml; |
|---|
| 179 | |
|---|
| 180 | pg->pg_SearchPrefs = locs; |
|---|
| 181 | |
|---|
| 182 | PP_convertBondMatrix(locs->pdc, pg); |
|---|
| 183 | |
|---|
| 184 | /* find out where a given probe matches */ |
|---|
| 185 | if (PTPanGlobalPtr->pg_verbose >0) { |
|---|
| 186 | printf("Search request for %s (errs = %d, compl = %d, rev = %d, weight = %d)\n", |
|---|
| 187 | probestring, |
|---|
| 188 | pg->pg_SearchPrefs->pm_max, |
|---|
| 189 | pg->pg_SearchPrefs->pm_complement, |
|---|
| 190 | pg->pg_SearchPrefs->pm_reversed, |
|---|
| 191 | pg->pg_SearchPrefs->sort_by); |
|---|
| 192 | } |
|---|
| 193 | |
|---|
| 194 | /* free the old sequence */ |
|---|
| 195 | if(pg->pg_SearchPrefs->pm_sequence) |
|---|
| 196 | { |
|---|
| 197 | free(pg->pg_SearchPrefs->pm_sequence); |
|---|
| 198 | } |
|---|
| 199 | pg->pg_SearchPrefs->pm_sequence = FilterSequence(pg, probestring); |
|---|
| 200 | |
|---|
| 201 | /* do we need to check the complement instead of the normal one? */ |
|---|
| 202 | if(pg->pg_SearchPrefs->pm_complement) |
|---|
| 203 | { |
|---|
| 204 | ComplementSequence(pg, pg->pg_SearchPrefs->pm_sequence); |
|---|
| 205 | } |
|---|
| 206 | |
|---|
| 207 | /* do we need to look at the reversed sequence as well? */ |
|---|
| 208 | if(pg->pg_SearchPrefs->pm_reversed) |
|---|
| 209 | { |
|---|
| 210 | if(pg->pg_SearchPrefs->pm_csequence) |
|---|
| 211 | { |
|---|
| 212 | free(pg->pg_SearchPrefs->pm_csequence); |
|---|
| 213 | } |
|---|
| 214 | pg->pg_SearchPrefs->pm_csequence = strdup(pg->pg_SearchPrefs->pm_sequence); |
|---|
| 215 | ReverseSequence(pg, pg->pg_SearchPrefs->pm_csequence); |
|---|
| 216 | ComplementSequence(pg, pg->pg_SearchPrefs->pm_csequence); |
|---|
| 217 | } |
|---|
| 218 | |
|---|
| 219 | //psg.main_probe = strdup(probestring); |
|---|
| 220 | |
|---|
| 221 | /* clear all old matches */ |
|---|
| 222 | while((ml = pg->pg_SearchPrefs->pm)) |
|---|
| 223 | { |
|---|
| 224 | destroy_PT_probematch(ml); |
|---|
| 225 | } |
|---|
| 226 | |
|---|
| 227 | #if 1 |
|---|
| 228 | /* check, if the probe string is too short */ |
|---|
| 229 | if(strlen(pg->pg_SearchPrefs->pm_sequence) + |
|---|
| 230 | (2 * pg->pg_SearchPrefs->pm_max) < MIN_PROBE_LENGTH) |
|---|
| 231 | { |
|---|
| 232 | SetARBErrorMsg(pg->pg_SearchPrefs, (STRPTR) "error: probe too short!!\n"); |
|---|
| 233 | free(probestring); |
|---|
| 234 | return(0); |
|---|
| 235 | } |
|---|
| 236 | #endif |
|---|
| 237 | |
|---|
| 238 | /* allocate query that configures and holds all the merged results */ |
|---|
| 239 | sq = AllocSearchQuery(pg); |
|---|
| 240 | |
|---|
| 241 | /* prefs */ |
|---|
| 242 | sq->sq_Query = (STRPTR) pg->pg_SearchPrefs->pm_sequence; |
|---|
| 243 | sq->sq_QueryLen = strlen(sq->sq_Query); |
|---|
| 244 | sq->sq_MaxErrors = (float) pg->pg_SearchPrefs->pm_max; |
|---|
| 245 | sq->sq_Reversed = FALSE; |
|---|
| 246 | sq->sq_AllowReplace = TRUE; |
|---|
| 247 | sq->sq_AllowInsert = TRUE; |
|---|
| 248 | sq->sq_AllowDelete = TRUE; |
|---|
| 249 | sq->sq_KillNSeqsAt = strlen(sq->sq_Query) / 3; |
|---|
| 250 | sq->sq_MinorMisThres = pg->pg_SearchPrefs->pdc->split; |
|---|
| 251 | sq->sq_SortMode = pg->pg_SearchPrefs->sort_by; |
|---|
| 252 | |
|---|
| 253 | /* init */ |
|---|
| 254 | sq->sq_PTPanPartition = NULL; |
|---|
| 255 | PP_buildPosWeight(sq); |
|---|
| 256 | if(pg->pg_SearchPrefs->sort_by) |
|---|
| 257 | { |
|---|
| 258 | /* user requested weighted searching */ |
|---|
| 259 | sq->sq_MismatchWeights = &sq->sq_PTPanGlobal->pg_MismatchWeights; |
|---|
| 260 | } else { |
|---|
| 261 | /* user wants unified searching */ |
|---|
| 262 | sq->sq_MismatchWeights = &sq->sq_PTPanGlobal->pg_NoWeights; |
|---|
| 263 | } |
|---|
| 264 | |
|---|
| 265 | /* do we need to do a second query on the complement? */ |
|---|
| 266 | if(pg->pg_SearchPrefs->pm_reversed) |
|---|
| 267 | { |
|---|
| 268 | compsq = CloneSearchQuery(sq); |
|---|
| 269 | compsq->sq_Query = (STRPTR) pg->pg_SearchPrefs->pm_csequence; |
|---|
| 270 | compsq->sq_Reversed = TRUE; |
|---|
| 271 | } |
|---|
| 272 | |
|---|
| 273 | /* start time here */ |
|---|
| 274 | #ifdef BENCHMARK |
|---|
| 275 | if (PTPanGlobalPtr->pg_verbose >0) |
|---|
| 276 | BenchTimePassed(pg); |
|---|
| 277 | #endif |
|---|
| 278 | |
|---|
| 279 | /* search over partitions that are still in cache */ |
|---|
| 280 | pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head; |
|---|
| 281 | while(pp->pp_Node.ln_Succ) |
|---|
| 282 | { |
|---|
| 283 | pp->pp_Done = FALSE; |
|---|
| 284 | if(CacheDataLoaded(pp->pp_CacheNode)) |
|---|
| 285 | { |
|---|
| 286 | /* search normal */ |
|---|
| 287 | SearchPartition(pp, sq); |
|---|
| 288 | /* and optionally, search complement */ |
|---|
| 289 | if(compsq) |
|---|
| 290 | { |
|---|
| 291 | SearchPartition(pp, compsq); |
|---|
| 292 | } |
|---|
| 293 | } |
|---|
| 294 | pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ; |
|---|
| 295 | } |
|---|
| 296 | |
|---|
| 297 | /* search over all partitions not done yet */ |
|---|
| 298 | pp = (struct PTPanPartition *) pg->pg_Partitions.lh_Head; |
|---|
| 299 | while(pp->pp_Node.ln_Succ) |
|---|
| 300 | { |
|---|
| 301 | if(!pp->pp_Done) |
|---|
| 302 | { |
|---|
| 303 | /* search normal */ |
|---|
| 304 | SearchPartition(pp, sq); |
|---|
| 305 | /* and optionally, search complement */ |
|---|
| 306 | if(compsq) |
|---|
| 307 | { |
|---|
| 308 | SearchPartition(pp, compsq); |
|---|
| 309 | } |
|---|
| 310 | } |
|---|
| 311 | pp = (struct PTPanPartition *) pp->pp_Node.ln_Succ; |
|---|
| 312 | } |
|---|
| 313 | |
|---|
| 314 | #ifdef BENCHMARK |
|---|
| 315 | if (PTPanGlobalPtr->pg_verbose >0) |
|---|
| 316 | pg->pg_Bench.ts_CandSetTime += BenchTimePassed(pg); |
|---|
| 317 | #endif |
|---|
| 318 | |
|---|
| 319 | SortHitsList(sq); |
|---|
| 320 | CreateHitsGUIList(sq); |
|---|
| 321 | FreeSearchQuery(sq); |
|---|
| 322 | if(compsq) |
|---|
| 323 | { |
|---|
| 324 | SortHitsList(compsq); |
|---|
| 325 | CreateHitsGUIList(compsq); |
|---|
| 326 | FreeSearchQuery(compsq); |
|---|
| 327 | } |
|---|
| 328 | |
|---|
| 329 | #ifdef BENCHMARK |
|---|
| 330 | if (PTPanGlobalPtr->pg_verbose >0) |
|---|
| 331 | pg->pg_Bench.ts_OutputTime += BenchTimePassed(pg); |
|---|
| 332 | #endif |
|---|
| 333 | |
|---|
| 334 | free(probestring); /* I actually don't know, if this is required */ |
|---|
| 335 | return 0; |
|---|
| 336 | } |
|---|
| 337 | /* \\\ */ |
|---|
| 338 | |
|---|
| 339 | /* /// "SortHitsList()" */ |
|---|
| 340 | void SortHitsList(struct SearchQuery *sq) |
|---|
| 341 | { |
|---|
| 342 | //struct PTPanGlobal *pg = sq->sq_PTPanGlobal; |
|---|
| 343 | struct QueryHit *qh; |
|---|
| 344 | |
|---|
| 345 | /* enter priority and sort */ |
|---|
| 346 | qh = (struct QueryHit *) sq->sq_Hits.lh_Head; |
|---|
| 347 | if(sq->sq_SortMode == SORT_HITS_NOWEIGHT) |
|---|
| 348 | { |
|---|
| 349 | /* sorting criteria: |
|---|
| 350 | - normal/composite (1 bit) |
|---|
| 351 | - replace only or insert/delete (1 bit) |
|---|
| 352 | - mismatch count (5 bits) |
|---|
| 353 | - error count (8 bits) |
|---|
| 354 | - species (20 bits) |
|---|
| 355 | - absolute position (28 bits) |
|---|
| 356 | */ |
|---|
| 357 | //printf("Sort no weight...\n"); |
|---|
| 358 | while(qh->qh_Node.ln_Succ) |
|---|
| 359 | { |
|---|
| 360 | arb_assert(((LLONG) (qh->qh_ReplaceCount + qh->qh_InsertCount + qh->qh_DeleteCount)) <= 0x1f); // 5 bit |
|---|
| 361 | arb_assert(((LLONG) round(qh->qh_ErrorCount * 10.0)) <= 0xff); // 8 bit |
|---|
| 362 | arb_assert(((LLONG) qh->qh_Species->ps_Num) <= 0xfffff); // 20 bit |
|---|
| 363 | arb_assert(((LLONG) (qh->qh_AbsPos - qh->qh_Species->ps_AbsOffset)) <= 0xfffffff); // 28 bit |
|---|
| 364 | qh->qh_Node.ln_Pri = (LLONG) |
|---|
| 365 | ((qh->qh_Flags & QHF_REVERSED) ? (1LL << 62) : 0LL) + |
|---|
| 366 | ((qh->qh_InsertCount | qh->qh_DeleteCount) ? (1LL << 61) : 0LL) + |
|---|
| 367 | (((LLONG) (qh->qh_ReplaceCount + qh->qh_InsertCount + qh->qh_DeleteCount)) << 56) + |
|---|
| 368 | (((LLONG) round(qh->qh_ErrorCount * 10.0)) << 48) + |
|---|
| 369 | (((LLONG) qh->qh_Species->ps_Num) << 28) + |
|---|
| 370 | ((LLONG) (qh->qh_AbsPos - qh->qh_Species->ps_AbsOffset)); |
|---|
| 371 | qh = (struct QueryHit *) qh->qh_Node.ln_Succ; |
|---|
| 372 | } |
|---|
| 373 | } else { |
|---|
| 374 | //printf("Sort with weight...\n"); |
|---|
| 375 | while(qh->qh_Node.ln_Succ) |
|---|
| 376 | { |
|---|
| 377 | arb_assert(((LLONG) (qh->qh_ReplaceCount + qh->qh_InsertCount + qh->qh_DeleteCount)) <= 0x1f); // 5 bit |
|---|
| 378 | arb_assert(((LLONG) round(qh->qh_ErrorCount * 10.0)) <= 0xff); // 8 bit |
|---|
| 379 | arb_assert(((LLONG) qh->qh_Species->ps_Num) <= 0xfffff); // 20 bit |
|---|
| 380 | arb_assert(((LLONG) (qh->qh_AbsPos - qh->qh_Species->ps_AbsOffset)) <= 0xfffffff); // 28 bit |
|---|
| 381 | qh->qh_Node.ln_Pri = (LLONG) |
|---|
| 382 | ((LLONG) (qh->qh_Flags & QHF_REVERSED) ? (1LL << 62) : 0LL) + |
|---|
| 383 | ((LLONG) (qh->qh_InsertCount | qh->qh_DeleteCount) ? (1LL << 61) : 0LL) + |
|---|
| 384 | (((LLONG) round(qh->qh_ErrorCount * 10.0)) << 53) + |
|---|
| 385 | (((LLONG) (qh->qh_ReplaceCount + qh->qh_InsertCount + qh->qh_DeleteCount)) << 48) + |
|---|
| 386 | ((LLONG) qh->qh_Species->ps_Num << 28) + |
|---|
| 387 | ((LLONG) (qh->qh_AbsPos - qh->qh_Species->ps_AbsOffset)); |
|---|
| 388 | //printf("%16llx\n", qh->qh_Node.ln_Pri); |
|---|
| 389 | qh = (struct QueryHit *) qh->qh_Node.ln_Succ; |
|---|
| 390 | } |
|---|
| 391 | } |
|---|
| 392 | SortList(&sq->sq_Hits); |
|---|
| 393 | #if 0 |
|---|
| 394 | qh = (struct QueryHit *) sq->sq_Hits.lh_Head; |
|---|
| 395 | //printf("... and after\n"); |
|---|
| 396 | while(qh->qh_Node.ln_Succ) |
|---|
| 397 | { |
|---|
| 398 | //printf("%16llx\n", qh->qh_Node.ln_Pri); |
|---|
| 399 | qh = (struct QueryHit *) qh->qh_Node.ln_Succ; |
|---|
| 400 | } |
|---|
| 401 | #endif |
|---|
| 402 | } |
|---|
| 403 | /* \\\ */ |
|---|
| 404 | |
|---|
| 405 | |
|---|
| 406 | /* /// "CreateHitsGUIList()" */ |
|---|
| 407 | void CreateHitsGUIList(struct SearchQuery *sq) |
|---|
| 408 | { |
|---|
| 409 | struct PTPanGlobal *pg = sq->sq_PTPanGlobal; |
|---|
| 410 | struct PTPanSpecies *ps; |
|---|
| 411 | struct QueryHit *qh; |
|---|
| 412 | STRPTR srcptr; |
|---|
| 413 | STRPTR tarptr; |
|---|
| 414 | ULONG maxlen; |
|---|
| 415 | float minweight; |
|---|
| 416 | ULONG cnt; |
|---|
| 417 | ULONG numhits; |
|---|
| 418 | ULONG tarlen; |
|---|
| 419 | |
|---|
| 420 | if (PTPanGlobalPtr->pg_verbose >0) printf(">> CreateHitsGUIList\n"); |
|---|
| 421 | |
|---|
| 422 | /* calculate maximum size of string that we have to examine */ |
|---|
| 423 | minweight = sq->sq_MismatchWeights->mw_Delete[0]; |
|---|
| 424 | for(cnt = 1; cnt < pg->pg_AlphaSize; cnt++) |
|---|
| 425 | { |
|---|
| 426 | if(sq->sq_MismatchWeights->mw_Delete[cnt] < minweight) |
|---|
| 427 | { |
|---|
| 428 | minweight = sq->sq_MismatchWeights->mw_Delete[cnt]; |
|---|
| 429 | } |
|---|
| 430 | } |
|---|
| 431 | maxlen = sq->sq_QueryLen + (ULONG) ((sq->sq_MaxErrors + minweight) / minweight); |
|---|
| 432 | sq->sq_SourceSeq = (STRPTR) malloc(maxlen + 1); |
|---|
| 433 | |
|---|
| 434 | numhits = 0; |
|---|
| 435 | pg->pg_SpeciesCache->ch_SwapCount = 0; |
|---|
| 436 | |
|---|
| 437 | qh = (struct QueryHit *) sq->sq_Hits.lh_Head; |
|---|
| 438 | while(qh->qh_Node.ln_Succ) |
|---|
| 439 | { |
|---|
| 440 | LONG relpos; |
|---|
| 441 | BOOL good; |
|---|
| 442 | ULONG nmismatch; |
|---|
| 443 | UBYTE code; |
|---|
| 444 | UBYTE seqcode; |
|---|
| 445 | |
|---|
| 446 | STRPTR seqout; |
|---|
| 447 | STRPTR seqptr; |
|---|
| 448 | LONG relposcnt; |
|---|
| 449 | PT_probematch *ml; |
|---|
| 450 | |
|---|
| 451 | good = TRUE; |
|---|
| 452 | ps = qh->qh_Species; |
|---|
| 453 | char prefix[10], postfix[10]; |
|---|
| 454 | for (int i = 0; i < 9; ++i) |
|---|
| 455 | { |
|---|
| 456 | prefix[i] = '>'; |
|---|
| 457 | postfix[i] = '<'; |
|---|
| 458 | } |
|---|
| 459 | prefix[9] = postfix[9] = 0x00; |
|---|
| 460 | |
|---|
| 461 | relpos = 0; |
|---|
| 462 | nmismatch = 0; |
|---|
| 463 | ULONG abspos = qh->qh_AbsPos - ps->ps_AbsOffset; |
|---|
| 464 | ULONG bitpos = 0; |
|---|
| 465 | ULONG count; |
|---|
| 466 | /* given an absolute sequence position, search for the relative one, |
|---|
| 467 | e.g. abspos 2 on "-----UU-C-C" will yield 8 |
|---|
| 468 | abspos: 01 2 3 |
|---|
| 469 | relpos: 0123456789a */ |
|---|
| 470 | /* |
|---|
| 471 | if (strcmp(ps->ps_Name, "BclSp114") == 0) |
|---|
| 472 | { |
|---|
| 473 | printf("qh_AbsPos: %li\t\tps_AbsOffset: %li\t\tabspos:%li\n", |
|---|
| 474 | qh->qh_AbsPos, ps->ps_AbsOffset, abspos); |
|---|
| 475 | while ((code = GetNextCharacter(pg, ps->ps_SeqDataCompressed, bitpos, count)) != 0xff) |
|---|
| 476 | { |
|---|
| 477 | if (count > 1) printf("%li%c", count, code); |
|---|
| 478 | else printf("%c", code); |
|---|
| 479 | } |
|---|
| 480 | printf("\n"); |
|---|
| 481 | bitpos = 0; |
|---|
| 482 | } |
|---|
| 483 | */ |
|---|
| 484 | while (bitpos < ps->ps_SeqDataCompressedSize) // get relpos and store prefix |
|---|
| 485 | { |
|---|
| 486 | code = GetNextCharacter(pg, ps->ps_SeqDataCompressed, bitpos, count); |
|---|
| 487 | |
|---|
| 488 | if (pg->pg_SeqCodeValidTable[code]) |
|---|
| 489 | { // it's a valid char |
|---|
| 490 | if (!(abspos--)) break; // position found |
|---|
| 491 | if (abspos <= 8) prefix[8-abspos] = code; // store prefix |
|---|
| 492 | ++relpos; |
|---|
| 493 | } else |
|---|
| 494 | { // it's not a valid char |
|---|
| 495 | arb_assert((code == '.') || (code == '-')); |
|---|
| 496 | |
|---|
| 497 | #ifdef ALLOWDOTSINMATCH |
|---|
| 498 | if ((code == '.') && (count == 1)) // check for dots in match |
|---|
| 499 | { |
|---|
| 500 | if (!(abspos--)) break; // position found |
|---|
| 501 | if (abspos <= 8) prefix[8-abspos] = code; // store prefix |
|---|
| 502 | } |
|---|
| 503 | #endif |
|---|
| 504 | relpos += count; |
|---|
| 505 | if ((code == '.') && (abspos <= 9)) // fill prefix with '.' |
|---|
| 506 | { // TODO: decide if we really want to fill the |
|---|
| 507 | for (int i = 0; i < (9 - abspos); ++i) // whole prefix or just 'count' dots |
|---|
| 508 | { |
|---|
| 509 | prefix[i] = '.'; |
|---|
| 510 | } |
|---|
| 511 | } |
|---|
| 512 | } |
|---|
| 513 | } |
|---|
| 514 | arb_assert(bitpos < ps->ps_SeqDataCompressedSize); |
|---|
| 515 | bitpos -= 3; // bitpos now points to the first character of found seq |
|---|
| 516 | //printf("%c, %li, %li, %li\n", code, ((unsigned long)code), count, bitpos); |
|---|
| 517 | tarlen = sq->sq_QueryLen - qh->qh_DeleteCount + qh->qh_InsertCount; |
|---|
| 518 | for (cnt = 0; cnt < tarlen;) // copy found string into sq->sq_SourceSeq[] |
|---|
| 519 | { |
|---|
| 520 | if (bitpos >= ps->ps_SeqDataCompressedSize) |
|---|
| 521 | { |
|---|
| 522 | arb_assert(false); |
|---|
| 523 | good = FALSE; |
|---|
| 524 | break; |
|---|
| 525 | } |
|---|
| 526 | |
|---|
| 527 | code = GetNextCharacter(pg, ps->ps_SeqDataCompressed, bitpos, count); |
|---|
| 528 | if (pg->pg_SeqCodeValidTable[code]) // valid character |
|---|
| 529 | { |
|---|
| 530 | sq->sq_SourceSeq[cnt++] = code; |
|---|
| 531 | if (code == 'N') nmismatch++; |
|---|
| 532 | } else |
|---|
| 533 | { |
|---|
| 534 | if (code == '.') // if we got a dot in sequence |
|---|
| 535 | { // the hit is bogus |
|---|
| 536 | #ifdef ALLOWDOTSINMATCH |
|---|
| 537 | if (count == 1) |
|---|
| 538 | { |
|---|
| 539 | sq->sq_SourceSeq[cnt++] = '.'; |
|---|
| 540 | } else |
|---|
| 541 | #endif |
|---|
| 542 | { |
|---|
| 543 | pg->pg_Bench.ts_DotsKilled++; |
|---|
| 544 | good = FALSE; |
|---|
| 545 | break; |
|---|
| 546 | } |
|---|
| 547 | } |
|---|
| 548 | } |
|---|
| 549 | } |
|---|
| 550 | sq->sq_SourceSeq[tarlen] = 0; |
|---|
| 551 | if(nmismatch == tarlen) good = FALSE; |
|---|
| 552 | |
|---|
| 553 | if(good) |
|---|
| 554 | { |
|---|
| 555 | /* we need to verify the hit? */ |
|---|
| 556 | if(qh->qh_Flags & QHF_UNSAFE) |
|---|
| 557 | { |
|---|
| 558 | pg->pg_Bench.ts_UnsafeHits++; |
|---|
| 559 | |
|---|
| 560 | good = MatchSequence(sq); |
|---|
| 561 | if(!good) |
|---|
| 562 | { |
|---|
| 563 | pg->pg_Bench.ts_UnsafeKilled++; |
|---|
| 564 | //printf("Verify failed on %s != %s\n", sq->sq_SourceSeq, sq->sq_Query); |
|---|
| 565 | qh->qh_Flags &= ~QHF_ISVALID; |
|---|
| 566 | } else { |
|---|
| 567 | /* fill in correct match */ |
|---|
| 568 | qh->qh_ErrorCount = sq->sq_State.sqs_ErrorCount; |
|---|
| 569 | qh->qh_ReplaceCount = sq->sq_State.sqs_ReplaceCount; |
|---|
| 570 | qh->qh_InsertCount = sq->sq_State.sqs_InsertCount; |
|---|
| 571 | qh->qh_DeleteCount = sq->sq_State.sqs_DeleteCount; |
|---|
| 572 | } |
|---|
| 573 | //qh->qh_Flags &= ~QHF_UNSAFE; |
|---|
| 574 | } |
|---|
| 575 | } //else printf("'.'-Sequence!\n"); |
|---|
| 576 | |
|---|
| 577 | if(good) |
|---|
| 578 | { |
|---|
| 579 | seqout = (STRPTR) calloc(9 + 1 + sq->sq_QueryLen + 1 + 9 + 1, 0x01); |
|---|
| 580 | strncpy(seqout, prefix, 0x09); // copy prefix |
|---|
| 581 | seqout[9] = '-'; // 1st delimiter |
|---|
| 582 | good = FindSequenceMatch(sq, qh, &seqout[10]); // generate mismatch sequence */ |
|---|
| 583 | seqout[10 + sq->sq_QueryLen] = '-'; // 2nd delimiter |
|---|
| 584 | if (!good) free(seqout); |
|---|
| 585 | } |
|---|
| 586 | |
|---|
| 587 | if (good) |
|---|
| 588 | { |
|---|
| 589 | for (cnt = 0; cnt < 9;) // generate postfix |
|---|
| 590 | { |
|---|
| 591 | code = GetNextCharacter(pg, ps->ps_SeqDataCompressed, bitpos, count); |
|---|
| 592 | if (code == 0xff) break; |
|---|
| 593 | if (pg->pg_SeqCodeValidTable[code]) // valid character |
|---|
| 594 | { |
|---|
| 595 | postfix[cnt++] = code; |
|---|
| 596 | } else if (code == '.') // '.' found |
|---|
| 597 | { |
|---|
| 598 | for (; cnt < 9; ++cnt) // fill postfix with '.' |
|---|
| 599 | { // TODO: decide if we really want to fill the |
|---|
| 600 | postfix[cnt] = '.'; // whole postfix or just 'count' dots |
|---|
| 601 | } |
|---|
| 602 | } |
|---|
| 603 | } |
|---|
| 604 | |
|---|
| 605 | strncpy(&seqout[11 + sq->sq_QueryLen], postfix, 0x09); // copy postfix |
|---|
| 606 | |
|---|
| 607 | ml = create_PT_probematch(); |
|---|
| 608 | ml->name = qh->qh_Species->ps_Num; |
|---|
| 609 | ml->b_pos = relpos; |
|---|
| 610 | ml->rpos = qh->qh_AbsPos - ps->ps_AbsOffset; |
|---|
| 611 | ml->wmismatches = (double) qh->qh_ErrorCount; |
|---|
| 612 | ml->mismatches = qh->qh_ReplaceCount + qh->qh_InsertCount + qh->qh_DeleteCount; |
|---|
| 613 | ml->N_mismatches = nmismatch; |
|---|
| 614 | ml->sequence = seqout; /* warning! potentional memory leak -- FIX destroy_PT_probematch(ml) */ |
|---|
| 615 | ml->reversed = (qh->qh_Flags & QHF_REVERSED) ? 1 : 0; |
|---|
| 616 | |
|---|
| 617 | aisc_link((struct_dllpublic_ext *) &(pg->pg_SearchPrefs->ppm), (struct_dllheader_ext *) ml); |
|---|
| 618 | numhits++; |
|---|
| 619 | |
|---|
| 620 | if (PTPanGlobalPtr->pg_verbose >0) printf("SeqOut: '%s'\n", seqout); |
|---|
| 621 | } |
|---|
| 622 | |
|---|
| 623 | RemQueryHit(qh); |
|---|
| 624 | qh = (struct QueryHit *) sq->sq_Hits.lh_Head; |
|---|
| 625 | |
|---|
| 626 | } // while(qh->qh_Node.ln_Succ) |
|---|
| 627 | free(sq->sq_SourceSeq); |
|---|
| 628 | |
|---|
| 629 | if (PTPanGlobalPtr->pg_verbose >0) { |
|---|
| 630 | pg->pg_Bench.ts_OutHits += numhits; |
|---|
| 631 | printf("<< CreateHitsGUIList: Number of hits %ld (SwapCount %ld)\n", |
|---|
| 632 | numhits, pg->pg_SpeciesCache->ch_SwapCount); |
|---|
| 633 | } |
|---|
| 634 | } |
|---|
| 635 | /* \\\ */ |
|---|
| 636 | |
|---|
| 637 | /* /// "get_match_info()" */ |
|---|
| 638 | extern "C" STRPTR get_match_info(PT_probematch *ml) |
|---|
| 639 | { |
|---|
| 640 | struct PTPanGlobal *pg = PTPanGlobalPtr; |
|---|
| 641 | struct PTPanSpecies *ps; |
|---|
| 642 | ULONG ecolipos = 0; |
|---|
| 643 | |
|---|
| 644 | /* calculate ecoli position in O(1) */ |
|---|
| 645 | if(pg->pg_EcoliBaseTable) |
|---|
| 646 | { |
|---|
| 647 | if((ULONG) ml->b_pos < pg->pg_EcoliSeqSize) |
|---|
| 648 | { |
|---|
| 649 | ecolipos = pg->pg_EcoliBaseTable[ml->b_pos]; |
|---|
| 650 | } else { |
|---|
| 651 | ecolipos = pg->pg_EcoliBaseTable[pg->pg_EcoliSeqSize]; |
|---|
| 652 | } |
|---|
| 653 | } |
|---|
| 654 | ps = pg->pg_SpeciesMap[ml->name]; |
|---|
| 655 | sprintf(pg->pg_TempBuffer, "%10s %-30.30s %2d %2d %1.1f %7d %4ld %1d %s", |
|---|
| 656 | ps->ps_Name, ps->ps_FullName, |
|---|
| 657 | ml->mismatches, ml->N_mismatches, ml->wmismatches, |
|---|
| 658 | ml->b_pos, ecolipos, |
|---|
| 659 | ml->reversed, ml->sequence); |
|---|
| 660 | |
|---|
| 661 | if (PTPanGlobalPtr->pg_verbose >0) printf("== get_match_info: %s\n", pg->pg_TempBuffer); |
|---|
| 662 | |
|---|
| 663 | return(pg->pg_TempBuffer); |
|---|
| 664 | } |
|---|
| 665 | /* \\\ */ |
|---|
| 666 | |
|---|
| 667 | /* /// "GetMatchListHeader()" */ |
|---|
| 668 | STRPTR GetMatchListHeader(STRPTR seq) |
|---|
| 669 | { |
|---|
| 670 | STRPTR res; |
|---|
| 671 | |
|---|
| 672 | if(seq) |
|---|
| 673 | { |
|---|
| 674 | res = (STRPTR) GBS_global_string(" name fullname " |
|---|
| 675 | "mis N_mis wmis pos ecoli rev '%s'", seq); |
|---|
| 676 | } else { |
|---|
| 677 | res = (STRPTR) " name fullname " |
|---|
| 678 | "mis N_mis wmis pos ecoli rev"; |
|---|
| 679 | } |
|---|
| 680 | |
|---|
| 681 | if (PTPanGlobalPtr->pg_verbose >0) printf("== GetMatchListHeader: %s\n", res); |
|---|
| 682 | |
|---|
| 683 | return(res); |
|---|
| 684 | } |
|---|
| 685 | /* \\\ */ |
|---|
| 686 | |
|---|
| 687 | /* /// "get_match_hinfo()" */ |
|---|
| 688 | extern "C" STRPTR get_match_hinfo(PT_probematch *) |
|---|
| 689 | { |
|---|
| 690 | return(GetMatchListHeader(NULL)); |
|---|
| 691 | } |
|---|
| 692 | /* \\\ */ |
|---|
| 693 | |
|---|
| 694 | /* /// "c_get_match_hinfo()" */ |
|---|
| 695 | extern "C" STRPTR c_get_match_hinfo(PT_probematch *) |
|---|
| 696 | { |
|---|
| 697 | printf("EXTERN: c_get_match_hinfo\n"); |
|---|
| 698 | return(GetMatchListHeader(NULL)); |
|---|
| 699 | } |
|---|
| 700 | /* \\\ */ |
|---|
| 701 | |
|---|
| 702 | /* /// "match_string()" */ |
|---|
| 703 | /* Create a big output string: header\001name\001info\001name\001info....\000 */ |
|---|
| 704 | extern "C" bytestring * match_string(PT_local *locs) |
|---|
| 705 | { |
|---|
| 706 | struct PTPanGlobal *pg = PTPanGlobalPtr; |
|---|
| 707 | struct GBS_strstruct *outstr; |
|---|
| 708 | PT_probematch *ml; |
|---|
| 709 | STRPTR srcptr; |
|---|
| 710 | LONG entryCount = 0; |
|---|
| 711 | |
|---|
| 712 | printf("EXTERN: match_string\n"); |
|---|
| 713 | free(pg->pg_ResultString.data); // free old memory |
|---|
| 714 | for(ml = locs->pm; ml; ml = ml->next) // count number of entries |
|---|
| 715 | ++entryCount; |
|---|
| 716 | |
|---|
| 717 | outstr = GBS_stropen(entryCount * 150); // 150 bytes per entry seemes to be a good estimation |
|---|
| 718 | |
|---|
| 719 | if(locs->pm) // add header |
|---|
| 720 | { |
|---|
| 721 | srcptr = GetMatchListHeader(locs->pm->reversed ? locs->pm_csequence : locs->pm_sequence); |
|---|
| 722 | GBS_strcat(outstr, srcptr); |
|---|
| 723 | GBS_chrcat(outstr, 1); |
|---|
| 724 | } |
|---|
| 725 | |
|---|
| 726 | for(ml = locs->pm; ml; ml = ml->next) // add each entry to the list |
|---|
| 727 | { |
|---|
| 728 | srcptr = virt_name(ml); // add the name |
|---|
| 729 | GBS_strcat(outstr, srcptr); |
|---|
| 730 | GBS_chrcat(outstr, 1); |
|---|
| 731 | |
|---|
| 732 | srcptr = get_match_info(ml); // and the info |
|---|
| 733 | GBS_strcat(outstr, srcptr); |
|---|
| 734 | GBS_chrcat(outstr, 1); |
|---|
| 735 | } |
|---|
| 736 | |
|---|
| 737 | pg->pg_ResultString.data = GBS_strclose(outstr); |
|---|
| 738 | pg->pg_ResultString.size = strlen(pg->pg_ResultString.data) + 1; |
|---|
| 739 | |
|---|
| 740 | if (PTPanGlobalPtr->pg_verbose >0) printf("== match_string: %s\n", pg->pg_ResultString.data); |
|---|
| 741 | |
|---|
| 742 | #ifdef DEBUG |
|---|
| 743 | printf("%li entries used %li bytes (%li MB) of buffer: %5.2f byte per entry\n", |
|---|
| 744 | entryCount, pg->pg_ResultString.size, pg->pg_ResultString.size >> 20, |
|---|
| 745 | (double)pg->pg_ResultString.size/(double)entryCount); |
|---|
| 746 | #endif |
|---|
| 747 | return(&pg->pg_ResultString); |
|---|
| 748 | } |
|---|
| 749 | /* \\\ */ |
|---|
| 750 | |
|---|
| 751 | /* /// "MP_match_string()" */ |
|---|
| 752 | /* Create a big output string: header\001name\001#mismatch\001name\001#mismatch....\000 */ |
|---|
| 753 | extern "C" bytestring * MP_match_string(PT_local *locs) |
|---|
| 754 | { |
|---|
| 755 | struct PTPanGlobal *pg = PTPanGlobalPtr; |
|---|
| 756 | PT_probematch *ml; |
|---|
| 757 | STRPTR outptr; |
|---|
| 758 | STRPTR srcptr; |
|---|
| 759 | LONG buflen = 100000000; // TODO: calculate buflen instead of using hard coded value |
|---|
| 760 | |
|---|
| 761 | printf("EXTERN: MP_match_string\n"); |
|---|
| 762 | /* free old memory */ |
|---|
| 763 | free(pg->pg_ResultMString.data); |
|---|
| 764 | |
|---|
| 765 | outptr = (STRPTR) malloc(buflen); |
|---|
| 766 | pg->pg_ResultMString.data = outptr; |
|---|
| 767 | |
|---|
| 768 | buflen--; /* space for termination byte */ |
|---|
| 769 | |
|---|
| 770 | LONG entryCount = 0; |
|---|
| 771 | /* add each entry to the list */ |
|---|
| 772 | for(ml = locs->pm; ml; ml = ml->next) |
|---|
| 773 | { |
|---|
| 774 | ++entryCount; |
|---|
| 775 | /* add the name */ |
|---|
| 776 | srcptr = virt_name(ml); |
|---|
| 777 | while((--buflen > 0) && (*outptr++ = *srcptr++)); |
|---|
| 778 | if(buflen <= 0) |
|---|
| 779 | { |
|---|
| 780 | printf("ERROR: buffer too small - see function MP_match_string(...) in file PT_match.cxx\n"); |
|---|
| 781 | break; |
|---|
| 782 | } |
|---|
| 783 | outptr[-1] = 1; |
|---|
| 784 | |
|---|
| 785 | /* and and the mismatch and wmismatch count */ |
|---|
| 786 | sprintf(pg->pg_TempBuffer, "%2d\001%1.1f", ml->mismatches, ml->wmismatches); |
|---|
| 787 | srcptr = pg->pg_TempBuffer; |
|---|
| 788 | while((--buflen > 0) && (*outptr++ = *srcptr++)); |
|---|
| 789 | if(buflen <= 0) |
|---|
| 790 | { |
|---|
| 791 | printf("ERROR: buffer too small - see function MP_match_string(...) in file PT_match.cxx\n"); |
|---|
| 792 | break; |
|---|
| 793 | } |
|---|
| 794 | outptr[-1] = 1; |
|---|
| 795 | } |
|---|
| 796 | /* terminate string */ |
|---|
| 797 | *outptr++ = 0; |
|---|
| 798 | |
|---|
| 799 | pg->pg_ResultMString.size = (ULONG) outptr - (ULONG) pg->pg_ResultMString.data; |
|---|
| 800 | /* free unused memory */ |
|---|
| 801 | pg->pg_ResultMString.data = (STRPTR) realloc(pg->pg_ResultMString.data, |
|---|
| 802 | pg->pg_ResultMString.size); |
|---|
| 803 | |
|---|
| 804 | if (PTPanGlobalPtr->pg_verbose >0) printf("== MP_match_string: %s\n", pg->pg_ResultString.data); |
|---|
| 805 | |
|---|
| 806 | printf("%li entries used %li bytes (%li MB) of buffer: %5.2f byte per entry\n", |
|---|
| 807 | entryCount, (100000000-buflen), (100000000-buflen) >> 20, (double)(100000000-buflen)/(double)entryCount); |
|---|
| 808 | return(&pg->pg_ResultMString); |
|---|
| 809 | } |
|---|
| 810 | /* \\\ */ |
|---|
| 811 | |
|---|
| 812 | /* /// "MP_all_species_string()" */ |
|---|
| 813 | /* Create a big output string: 001name\001name\....\000 */ |
|---|
| 814 | extern "C" bytestring * MP_all_species_string(PT_local *) |
|---|
| 815 | { |
|---|
| 816 | struct PTPanGlobal *pg = PTPanGlobalPtr; |
|---|
| 817 | struct PTPanSpecies *ps; |
|---|
| 818 | STRPTR outptr; |
|---|
| 819 | STRPTR srcptr; |
|---|
| 820 | // LONG buflen = 500000; /* enough for about 50000 species */ |
|---|
| 821 | LONG buflen = 100000000; // TODO: calculate buflen instead of using hard coded value |
|---|
| 822 | |
|---|
| 823 | printf("EXTERN: MP_all_species_string\n"); |
|---|
| 824 | /* free old memory */ |
|---|
| 825 | free(pg->pg_SpeciesString.data); |
|---|
| 826 | |
|---|
| 827 | outptr = (STRPTR) malloc(buflen); |
|---|
| 828 | pg->pg_SpeciesString.data = outptr; |
|---|
| 829 | |
|---|
| 830 | buflen--; /* space for termination byte */ |
|---|
| 831 | |
|---|
| 832 | LONG entryCount = 0; |
|---|
| 833 | /* add each entry to the list */ |
|---|
| 834 | ps = (struct PTPanSpecies *) pg->pg_Species.lh_Head; |
|---|
| 835 | while(ps->ps_Node.ln_Succ) |
|---|
| 836 | { |
|---|
| 837 | ++entryCount; |
|---|
| 838 | /* add the name */ |
|---|
| 839 | srcptr = ps->ps_Name; |
|---|
| 840 | while((--buflen > 0) && (*outptr++ = *srcptr++)); |
|---|
| 841 | if(buflen <= 0) |
|---|
| 842 | { |
|---|
| 843 | printf("ERROR: buffer too small - see function MP_all_species_string(...) in file PT_match.cxx\n"); |
|---|
| 844 | break; |
|---|
| 845 | } |
|---|
| 846 | outptr[-1] = 1; |
|---|
| 847 | ps = (struct PTPanSpecies *) ps->ps_Node.ln_Succ; |
|---|
| 848 | } |
|---|
| 849 | /* terminate string */ |
|---|
| 850 | *outptr++ = 0; |
|---|
| 851 | |
|---|
| 852 | pg->pg_SpeciesString.size = (ULONG) outptr - (ULONG) pg->pg_SpeciesString.data; |
|---|
| 853 | /* free unused memory */ |
|---|
| 854 | pg->pg_SpeciesString.data = (STRPTR) realloc(pg->pg_SpeciesString.data, |
|---|
| 855 | pg->pg_SpeciesString.size); |
|---|
| 856 | printf("%li entries used %li bytes (%li MB) of buffer: %5.2f byte per entry\n", |
|---|
| 857 | entryCount, (100000000-buflen), (100000000-buflen) >> 20, (double)(100000000-buflen)/(double)entryCount); |
|---|
| 858 | return(&pg->pg_SpeciesString); |
|---|
| 859 | } |
|---|
| 860 | /* \\\ */ |
|---|
| 861 | |
|---|
| 862 | /* /// "MP_count_all_species()" */ |
|---|
| 863 | extern "C" int MP_count_all_species(PT_local *) |
|---|
| 864 | { |
|---|
| 865 | struct PTPanGlobal *pg = PTPanGlobalPtr; |
|---|
| 866 | printf("EXTERN: MP_count_all_species\n"); |
|---|
| 867 | return(pg->pg_NumSpecies); |
|---|
| 868 | } |
|---|
| 869 | /* \\\ */ |
|---|
| 870 | |
|---|
| 871 | |
|---|