| 1 | #include "AP_seq_simple_pro.hxx" |
|---|
| 2 | #include <AP_pro_a_nucs.hxx> |
|---|
| 3 | #include <AP_filter.hxx> |
|---|
| 4 | #include <ARB_Tree.hxx> |
|---|
| 5 | |
|---|
| 6 | |
|---|
| 7 | // #define ap_assert(bed) arb_assert(bed) |
|---|
| 8 | |
|---|
| 9 | AP_sequence_simple_protein::AP_sequence_simple_protein(const AliView *aliview) : |
|---|
| 10 | AP_sequence(aliview) |
|---|
| 11 | { |
|---|
| 12 | sequence = NULp; |
|---|
| 13 | } |
|---|
| 14 | |
|---|
| 15 | AP_sequence_simple_protein::~AP_sequence_simple_protein() { |
|---|
| 16 | delete [] sequence; |
|---|
| 17 | } |
|---|
| 18 | |
|---|
| 19 | AP_sequence *AP_sequence_simple_protein::dup() const { |
|---|
| 20 | return new AP_sequence_simple_protein(get_aliview()); |
|---|
| 21 | } |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | void AP_sequence_simple_protein::set(const char *isequence) { |
|---|
| 26 | AWT_translator *translator = AWT_get_user_translator(get_aliview()->get_gb_main()); |
|---|
| 27 | |
|---|
| 28 | const struct arb_r2a_pro_2_nuc * const *s2str = translator->S2strArray(); |
|---|
| 29 | |
|---|
| 30 | size_t sequence_len = get_sequence_length(); |
|---|
| 31 | sequence = new ap_pro[sequence_len+1]; |
|---|
| 32 | memset(sequence, s2str['.']->index, (size_t)(sizeof(ap_pro) * sequence_len)); |
|---|
| 33 | |
|---|
| 34 | const char *s = isequence; |
|---|
| 35 | ap_pro *d = sequence; |
|---|
| 36 | |
|---|
| 37 | const AP_filter *filt = get_filter(); |
|---|
| 38 | const uchar *simplify = filt->get_simplify_table(); |
|---|
| 39 | int sindex = s2str['s']->index; |
|---|
| 40 | |
|---|
| 41 | if (filt->does_bootstrap()) { |
|---|
| 42 | int iseqlen = strlen(isequence); |
|---|
| 43 | for (int i = filt->get_filtered_length()-1; i>=0; i--) { |
|---|
| 44 | int pos = filt->bootstrapped_seqpos(i); |
|---|
| 45 | if (pos >= iseqlen) continue; |
|---|
| 46 | unsigned char c = s[pos]; |
|---|
| 47 | if (! (s2str[c])) { // unknown character |
|---|
| 48 | continue; |
|---|
| 49 | } |
|---|
| 50 | int ind = s2str[simplify[c]]->index; |
|---|
| 51 | if (ind >= sindex) ind --; |
|---|
| 52 | d[i] = ind; |
|---|
| 53 | } |
|---|
| 54 | } |
|---|
| 55 | else { |
|---|
| 56 | size_t i, j; |
|---|
| 57 | size_t flen = filt->get_length(); |
|---|
| 58 | for (i = j = 0; i < flen; ++i) { |
|---|
| 59 | unsigned char c = s[i]; |
|---|
| 60 | if (!c) break; |
|---|
| 61 | if (filt->use_position(i)) { |
|---|
| 62 | if (s2str[c]) { |
|---|
| 63 | int ind = s2str[simplify[c]]->index; |
|---|
| 64 | if (ind >= sindex) ind--; |
|---|
| 65 | d[j] = ind; |
|---|
| 66 | } |
|---|
| 67 | j++; |
|---|
| 68 | } |
|---|
| 69 | } |
|---|
| 70 | } |
|---|
| 71 | mark_sequence_set(true); |
|---|
| 72 | } |
|---|
| 73 | |
|---|
| 74 | void AP_sequence_simple_protein::unset() { |
|---|
| 75 | delete [] sequence; |
|---|
| 76 | sequence = NULp; |
|---|
| 77 | mark_sequence_set(false); |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | |
|---|
| 81 | |
|---|