// =============================================================== // // // // File : seq_search.cxx // // Purpose : // // // // Coded by Ralf Westram (coder@reallysoft.de) // // Institute of Microbiology (Technical University Munich) // // http://www.arb-home.de/ // // // // =============================================================== // #include "seq_search.hxx" #include #include #include #include #define MESSAGE_BUFFERSIZE 300 #if defined(DEBUG) inline int check_equal(int o, int n) { if (o != n) { fprintf(stderr, "o=%i\nn=%i\n", o, n); fflush(stderr); } fa_assert(o == n); return n; } #endif __ATTR__FORMAT(1) static void messagef(const char *format, ...) { va_list argp; char buffer[MESSAGE_BUFFERSIZE]; va_start(argp, format); IF_ASSERTION_USED(int chars =) vsprintf(buffer, format, argp); fa_assert(charsappend(neu); else Next = neu; } static CharPredicate pred_is_ali_gap(is_ali_gap); CompactedSequence::CompactedSequence(const char *Text, int Length, const char *name, int start_offset) : basepos(Text, Length, pred_is_ali_gap), myName(strdup(name)), myStartOffset(0), // corrected at end of ctor (otherwise calculations below go wrong) dots(NULp) // referred(1) { fa_assert(Length>0); { long firstBase = basepos.first_base_abspos(); long lastBase = basepos.last_base_abspos(); int dotOffset = firstBase + strcspn(Text+firstBase, "."); // skip non-dots while (dotOffset <= lastBase) { fa_assert(Text[dotOffset] == '.'); storeDots(basepos.abs_2_rel(dotOffset)); dotOffset += strspn(Text+dotOffset, "."); // skip dots dotOffset += strcspn(Text+dotOffset, "."); // skip non-dots } } int cLen = length(); myText = new char[cLen+1]; myText[cLen] = 0; gapsBeforePosition = new int[cLen+1]; for (int cPos = 0; cPos0) { gapsBeforePosition[cLen] = no_of_gaps_before(cLen); // gaps before end of sequence } myStartOffset += start_offset; } CompactedSequence::~CompactedSequence() { delete [] myText; delete[] gapsBeforePosition; free(myName); delete dots; } FastAlignInsertion::~FastAlignInsertion() { if (myNext) delete myNext; } FastSearchSequence::FastSearchSequence(const CompactedSubSequence& seq) { memset((char*)myOffset, 0, MAX_TRIPLES*sizeof(*myOffset)); SequencePosition triple(seq); mySequence = &seq; while (triple.rightOf()>=3) { // enough text for triple? int tidx = triple_index(triple.text()); TripleOffset *top = new TripleOffset(triple.leftOf(), myOffset[tidx]); myOffset[tidx] = top; ++triple; } } FastSearchSequence::~FastSearchSequence() { for (int tidx = 0; tidx= rest) break; long cnt; for (cnt=0; found[cnt]=='?'; cnt++) found[cnt] = '+'; // count # of unaligned positions and change them to '+' long from = found-myQuality; long b_off = from-1; // position before unaligned positions long a_off = from+cnt; // positions after unaligned positions long before, after; for (before=0; b_off>=0 && isGlobalGap(b_off); before++, b_off--) ; // count free positions before unaligned positions for (after=0; a_off=used) after = LONG_MAX; if (before=0; --i) myBuffer[i] = '.'; } // -------------------------------------------------------------------------------- #ifdef UNIT_TESTS #include #include static int get_dotpos(const CompactedSubSequence& css, int i) { int pos = css.firstDotPosition(); while (i) { --i; pos = css.nextDotPosition(); } return pos; } static int count_dotpos(const CompactedSubSequence& css) { int pos = css.firstDotPosition(); int count = 0; while (pos != -1) { ++count; pos = css.nextDotPosition(); } return count; } struct bind_css { CompactedSubSequence& css; int test_mode; bind_css(CompactedSubSequence& css_) : css(css_), test_mode(-1) {} int operator()(int i) const { switch (test_mode) { case 0: return css.compPosition(i); case 1: return css.expdPosition(i); case 2: return css[i]; case 3: return css.no_of_gaps_before(i); case 4: return css.no_of_gaps_after(i); case 5: return get_dotpos(css, i); } fa_assert(0); return -666; } }; #define TEST_EXPECT_CSS_SELF_REFLEXIVE(css) do { \ for (int b = 0; b