| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : SEC_helix.cxx // |
|---|
| 4 | // Purpose : helix related things // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in August 2007 // |
|---|
| 7 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 8 | // http://www.arb-home.de/ // |
|---|
| 9 | // // |
|---|
| 10 | // =============================================================== // |
|---|
| 11 | |
|---|
| 12 | #include "SEC_root.hxx" |
|---|
| 13 | #include "SEC_helix.hxx" |
|---|
| 14 | #include <arbdbt.h> |
|---|
| 15 | #include <arb_strbuf.h> |
|---|
| 16 | |
|---|
| 17 | using namespace std; |
|---|
| 18 | |
|---|
| 19 | const size_t *SEC_root::getHelixPositions(const char *helixNr) const { |
|---|
| 20 | sec_assert(helixNr); |
|---|
| 21 | const BI_helix *helix = get_helixDef(); |
|---|
| 22 | long start1 = helix->first_position(helixNr); |
|---|
| 23 | |
|---|
| 24 | if (start1 == -1) return 0; |
|---|
| 25 | |
|---|
| 26 | size_t start2 = helix->last_position(helixNr); |
|---|
| 27 | static size_t pos[4]; |
|---|
| 28 | |
|---|
| 29 | if (size_t(start1)<start2) { |
|---|
| 30 | pos[0] = size_t(start1); |
|---|
| 31 | pos[1] = start2; |
|---|
| 32 | } |
|---|
| 33 | else { |
|---|
| 34 | pos[0] = start2; |
|---|
| 35 | pos[1] = size_t(start1); |
|---|
| 36 | } |
|---|
| 37 | |
|---|
| 38 | pos[2] = helix->opposite_position(pos[1]); |
|---|
| 39 | pos[3] = helix->opposite_position(pos[0]); |
|---|
| 40 | |
|---|
| 41 | return pos; |
|---|
| 42 | } |
|---|
| 43 | |
|---|
| 44 | // ------------------------------------------- |
|---|
| 45 | // current way to save folded helices: |
|---|
| 46 | // |
|---|
| 47 | // save a list of helix-numbers (with sign) separated by ';' |
|---|
| 48 | // strand start-positions are saved as 'num' |
|---|
| 49 | // segment start-positions are saved as '!num' |
|---|
| 50 | |
|---|
| 51 | char *SEC_xstring_to_foldedHelixList(const char *x_string, size_t xlength, const BI_helix *helix, GB_ERROR& error) { |
|---|
| 52 | GBS_strstruct *out = GBS_stropen(1000); |
|---|
| 53 | bool next_is_start = true; |
|---|
| 54 | |
|---|
| 55 | sec_assert(!error); |
|---|
| 56 | error = NULL; |
|---|
| 57 | |
|---|
| 58 | for (size_t pos = 0; pos<xlength && !error; ++pos) { |
|---|
| 59 | if (x_string[pos] == 'x') { |
|---|
| 60 | BI_PAIR_TYPE pairType = helix->pairtype(pos); |
|---|
| 61 | const char *helixNr = helix->helixNr(pos); |
|---|
| 62 | |
|---|
| 63 | if (next_is_start) { |
|---|
| 64 | sec_assert(pairType != HELIX_NONE); |
|---|
| 65 | GBS_strcat(out, helixNr); |
|---|
| 66 | } |
|---|
| 67 | else { |
|---|
| 68 | bool is_end_and_start = pairType != HELIX_NONE; |
|---|
| 69 | |
|---|
| 70 | if (helix->pairtype(pos-1) == HELIX_NONE) { |
|---|
| 71 | // hackaround: xstring got out of sync (e.g. by insert/delete columns) |
|---|
| 72 | error = GBS_global_string("xstring out of sync at position %zu (approximately)\nRefold helices around position!", pos); |
|---|
| 73 | } |
|---|
| 74 | else { |
|---|
| 75 | GBS_strnprintf(out, 20, "!%s", helix->helixNr(pos-1)); |
|---|
| 76 | |
|---|
| 77 | if (is_end_and_start) { |
|---|
| 78 | GBS_chrcat(out, ';'); |
|---|
| 79 | GBS_strcat(out, helixNr); |
|---|
| 80 | next_is_start = !next_is_start; |
|---|
| 81 | } |
|---|
| 82 | } |
|---|
| 83 | } |
|---|
| 84 | next_is_start = !next_is_start; |
|---|
| 85 | GBS_chrcat(out, ';'); |
|---|
| 86 | } |
|---|
| 87 | } |
|---|
| 88 | |
|---|
| 89 | return error ? (GBS_strforget(out), (char*)NULL) : GBS_strclose(out); |
|---|
| 90 | } |
|---|
| 91 | |
|---|
| 92 | char *SEC_foldedHelixList_to_xstring(const char *foldedHelices, size_t xlength, const BI_helix *helix, GB_ERROR& error) { |
|---|
| 93 | // example of foldedHelices: '-5;!-5;-8;!-8;-9;!-9;9;!9;8;!8;5;!5;-18;!-18;18;!18;-19a;!-19a;19a;!19a;-21;!-21;-24;!-24;24;!24;21;!21;-27;!-27;27;!27;-31;!-31;-43;!-43;43;!43;-46;!-46;46;!46;31;!31;-50;!-50;50;!50;' |
|---|
| 94 | char *xstring = (char*)malloc(xlength+1); |
|---|
| 95 | memset(xstring, '.', xlength); |
|---|
| 96 | xstring[xlength] = 0; |
|---|
| 97 | |
|---|
| 98 | sec_assert(error == 0); |
|---|
| 99 | error = 0; |
|---|
| 100 | |
|---|
| 101 | while (!error) { |
|---|
| 102 | const char *semi = strchr(foldedHelices, ';'); |
|---|
| 103 | if (!semi) break; |
|---|
| 104 | |
|---|
| 105 | char *taggedHelixNr = GB_strpartdup(foldedHelices, semi-1); |
|---|
| 106 | const char *helixNr = 0; |
|---|
| 107 | long xpos = -1; |
|---|
| 108 | |
|---|
| 109 | if (taggedHelixNr[0] == '!') { // position behind end of helix |
|---|
| 110 | helixNr = taggedHelixNr+1; |
|---|
| 111 | xpos = helix->last_position(helixNr); |
|---|
| 112 | if (xpos >= 0) ++xpos; |
|---|
| 113 | } |
|---|
| 114 | else { // position at start of helix |
|---|
| 115 | helixNr = taggedHelixNr; |
|---|
| 116 | xpos = helix->first_position(helixNr); |
|---|
| 117 | } |
|---|
| 118 | |
|---|
| 119 | if (xpos == -1) { |
|---|
| 120 | error = GBS_global_string("Can't find helix '%s'", helixNr); |
|---|
| 121 | } |
|---|
| 122 | else { |
|---|
| 123 | sec_assert(xpos >= 0 && size_t(xpos) < xlength); |
|---|
| 124 | xstring[xpos] = 'x'; |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | free(taggedHelixNr); |
|---|
| 128 | |
|---|
| 129 | foldedHelices = semi+1; |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | return xstring; |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | // ------------------------------------------------------------------------- |
|---|
| 136 | // xstring was made helix-relative, when saved to old version file |
|---|
| 137 | |
|---|
| 138 | |
|---|
| 139 | char *old_decode_xstring_rel_helix(GB_CSTR rel_helix, size_t xlength, const BI_helix *helix, int *no_of_helices_ptr) |
|---|
| 140 | { |
|---|
| 141 | const char *start_helix_nr = 0; |
|---|
| 142 | int no_of_helices = 0; |
|---|
| 143 | int start_helix = 0; |
|---|
| 144 | int end_helix = -1; |
|---|
| 145 | int rel_pos = 0; |
|---|
| 146 | size_t lastpos = helix->size()-1; |
|---|
| 147 | |
|---|
| 148 | char *x_buffer = (char*)malloc(xlength+1); |
|---|
| 149 | memset(x_buffer, '.', xlength); |
|---|
| 150 | x_buffer[xlength] = 0; |
|---|
| 151 | |
|---|
| 152 | for (size_t pos=0; ; pos++) { |
|---|
| 153 | const char *helix_nr = 0; |
|---|
| 154 | BI_PAIR_TYPE pairType = helix->pairtype(pos); |
|---|
| 155 | |
|---|
| 156 | if (pairType!=HELIX_NONE) { |
|---|
| 157 | helix_nr = helix->helixNr(pos); |
|---|
| 158 | |
|---|
| 159 | if (helix_nr==start_helix_nr) { // same helix as last |
|---|
| 160 | end_helix = pos; |
|---|
| 161 | } |
|---|
| 162 | else { // new helix nr |
|---|
| 163 | if (start_helix_nr) { // not first helix -> write last to xstring |
|---|
| 164 | insert_helix : |
|---|
| 165 | helix_nr = helix->helixNr(pos); // re-init (needed in case of goto insert_helix) |
|---|
| 166 | char flag = rel_helix[rel_pos++]; |
|---|
| 167 | |
|---|
| 168 | no_of_helices++; |
|---|
| 169 | if (flag=='1') { |
|---|
| 170 | sec_assert(end_helix!=-1); |
|---|
| 171 | sec_assert(size_t(start_helix)<xlength); |
|---|
| 172 | sec_assert(size_t(end_helix+1)<xlength); |
|---|
| 173 | |
|---|
| 174 | x_buffer[start_helix] = 'x'; |
|---|
| 175 | x_buffer[end_helix+1] = 'x'; |
|---|
| 176 | } |
|---|
| 177 | else if (flag!='0') { |
|---|
| 178 | if (flag==0) break; // eos |
|---|
| 179 | |
|---|
| 180 | sec_assert(0); // illegal character |
|---|
| 181 | |
|---|
| 182 | break; |
|---|
| 183 | } |
|---|
| 184 | if (pos==lastpos) { |
|---|
| 185 | break; |
|---|
| 186 | } |
|---|
| 187 | } |
|---|
| 188 | start_helix = pos; |
|---|
| 189 | end_helix = pos; |
|---|
| 190 | start_helix_nr = helix_nr; |
|---|
| 191 | } |
|---|
| 192 | } |
|---|
| 193 | if (pos==lastpos) { |
|---|
| 194 | if (start_helix_nr) { |
|---|
| 195 | goto insert_helix; |
|---|
| 196 | } |
|---|
| 197 | } |
|---|
| 198 | } |
|---|
| 199 | |
|---|
| 200 | *no_of_helices_ptr = no_of_helices; |
|---|
| 201 | return x_buffer; |
|---|
| 202 | } |
|---|