| 76 | | this->exit(); |
| 77 | | GB_transaction dummy(this->gb_main); |
| 78 | | long alignment_length_l = GBT_get_alignment_len(this->gb_main, alignment_name); |
| 79 | | GB_ERROR error = 0; |
| 80 | | if (alignment_length_l <= 1) { |
| 81 | | return GB_export_errorf("Unknown Alignment Size: Name '%s'\n" |
| 82 | | " Select a Valid Alignment",alignment_name); |
| 83 | | } |
| 84 | | size_t alignment_length = alignment_length_l; |
| 85 | | if (filter && filter->get_length() != alignment_length) { |
| 86 | | return GB_export_error( "Incompatible filter_len" ); |
| | 76 | exit(); // delete previously calculated stats |
| | 77 | |
| | 78 | GB_transaction dummy(gb_main); |
| | 79 | GB_ERROR error = 0; |
| | 80 | size_t alignment_length; |
| | 81 | { |
| | 82 | long alignment_length_l = GBT_get_alignment_len(gb_main, alignment_name); |
| | 83 | if (alignment_length_l <= 1) { |
| | 84 | error = GBS_global_string("Unknown size for alignment '%s'", alignment_name); |
| | 85 | } |
| | 86 | else { |
| | 87 | alignment_length = alignment_length_l; |
| | 88 | if (filter && filter->get_length() != alignment_length) { |
| | 89 | error = GBS_global_string("Incompatible filter (filter=%zu bp, alignment=%zu bp)", |
| | 90 | filter->get_length(), alignment_length); |
| | 91 | } |
| | 92 | } |
| 97 | | gb_ali = GB_entry(gb_sai,alignment_name); |
| 98 | | if (!gb_ali) error = GB_export_error("Please select a valid Column Statistic"); |
| 99 | | } |
| 100 | | if (!error) { |
| 101 | | gb_freqs = GB_entry(gb_ali,"FREQUENCIES"); |
| 102 | | if (!gb_ali) error = GB_export_error("Please select a valid Column Statistic"); |
| 103 | | } |
| 104 | | if (error) { |
| | 97 | char *sai_name = awr->awar(awar_name)->read_string(); |
| | 98 | GBDATA *gb_sai = GBT_find_SAI(gb_main, sai_name); |
| | 99 | |
| | 100 | if (!gb_sai) error = "Please select a column statistic"; |
| | 101 | |
| | 102 | GBDATA *gb_ali = 0; |
| | 103 | GBDATA *gb_freqs = 0; |
| | 104 | if (!error) { |
| | 105 | gb_ali = GB_entry(gb_sai, alignment_name); |
| | 106 | if (!gb_ali) error = GBS_global_string("SAI '%s' does not exist in alignment '%s'", sai_name, alignment_name); |
| | 107 | } |
| | 108 | if (!error) { |
| | 109 | gb_freqs = GB_entry(gb_ali,"FREQUENCIES"); |
| | 110 | if (!gb_ali) error = GBS_global_string("Column statistic '%s' is invalid\n(has no FREQUENCIES)", sai_name); |
| | 111 | } |
| | 112 | |
| | 113 | if (!error) { |
| | 114 | seq_len = filter ? filter->get_filtered_length() : alignment_length; |
| | 115 | |
| | 116 | delete [] weights; weights = new GB_UINT4[seq_len]; |
| | 117 | delete [] rates; rates = new float[seq_len]; |
| | 118 | delete [] ttratio; ttratio = new float[seq_len]; |
| | 119 | delete [] is_helix;is_helix = new bool[seq_len]; |
| | 120 | delete [] mut_sum; mut_sum = new GB_UINT4[seq_len]; |
| | 121 | delete [] freq_sum;freq_sum = new GB_UINT4[seq_len]; |
| | 122 | |
| | 123 | delete desc; desc = 0; |
| | 124 | |
| | 125 | size_t i; |
| | 126 | size_t j; |
| | 127 | for (i=0;i<256;i++) { delete frequency[i];frequency[i]=0;} |
| | 128 | |
| | 129 | long use_helix = awr->awar(awar_enable_helix)->read_int(); |
| | 130 | |
| | 131 | for (j=i=0;i<seq_len;i++) { |
| | 132 | is_helix[i] = false; |
| | 133 | weights[i] = 1; |
| | 134 | } |
| | 135 | |
| | 136 | if (!error && use_helix) { // calculate weights and helix filter |
| | 137 | BI_helix helix; |
| | 138 | error = helix.init(this->gb_main,alignment_name); |
| | 139 | if (error){ |
| | 140 | aw_message(error); |
| | 141 | error = 0; |
| | 142 | goto no_helix; |
| | 143 | } |
| | 144 | error = 0; |
| | 145 | for (j=i=0;i<alignment_length;i++) { |
| | 146 | if (!filter || filter->use_position(i)) { |
| | 147 | if (helix.pairtype(i) == HELIX_PAIR) { |
| | 148 | is_helix[j] = true; |
| | 149 | weights[j] = 1; |
| | 150 | } |
| | 151 | else{ |
| | 152 | is_helix[j] = false; |
| | 153 | weights[j] = 2; |
| | 154 | } |
| | 155 | j++; |
| | 156 | } |
| | 157 | } |
| | 158 | } |
| | 159 | no_helix: |
| | 160 | |
| | 161 | for (i=0;i<seq_len;i++) freq_sum[i] = 0; |
| | 162 | |
| | 163 | GBDATA *gb_freq; |
| | 164 | GB_UINT4 *freqi[256]; |
| | 165 | for (i=0;i<256; i++) freqi[i] = 0; |
| | 166 | int wf; // ********* read the frequency statistic |
| | 167 | for (gb_freq = GB_child(gb_freqs); gb_freq; gb_freq = GB_nextChild(gb_freq)) { |
| | 168 | char *key = GB_read_key(gb_freq); |
| | 169 | if (key[0] == 'N' && key[1] && !key[2]) { |
| | 170 | wf = key[1]; |
| | 171 | freqi[wf] = GB_read_ints(gb_freq); |
| | 172 | } |
| | 173 | free(key); |
| | 174 | } |
| | 175 | |
| | 176 | GB_UINT4 *minmut = 0; |
| | 177 | GBDATA *gb_minmut = GB_entry(gb_freqs,"TRANSITIONS"); |
| | 178 | if (gb_minmut) minmut = GB_read_ints(gb_minmut); |
| | 179 | |
| | 180 | GB_UINT4 *transver = 0; |
| | 181 | GBDATA *gb_transver = GB_entry(gb_freqs,"TRANSVERSIONS"); |
| | 182 | if (gb_transver) transver = GB_read_ints(gb_transver); |
| | 183 | unsigned long max_freq_sum = 0; |
| | 184 | for (wf = 0; wf<256;wf++) { // ********* calculate sum of mutations |
| | 185 | if (!freqi[wf]) continue; // ********* calculate nbases for each col. |
| | 186 | for (j=i=0;i<alignment_length;i++) { |
| | 187 | if (filter && !filter->use_position(i)) continue; |
| | 188 | freq_sum[j] += freqi[wf][i]; |
| | 189 | mut_sum[j] = minmut[i]; |
| | 190 | j++; |
| | 191 | } |
| | 192 | } |
| | 193 | for (i=0;i<seq_len;i++) |
| | 194 | if (freq_sum[i] > max_freq_sum) |
| | 195 | max_freq_sum = freq_sum[i]; |
| | 196 | unsigned long min_freq_sum = DIST_MIN_SEQ(max_freq_sum); |
| | 197 | |
| | 198 | for (wf = 0; wf<256;wf++) { |
| | 199 | if (!freqi[wf]) continue; |
| | 200 | frequency[wf] = new float[seq_len]; |
| | 201 | for (j=i=0;i<alignment_length;i++) { |
| | 202 | if (filter && !filter->use_position(i)) continue; |
| | 203 | if (freq_sum[j] > min_freq_sum) { |
| | 204 | frequency[wf][j] = freqi[wf][i]/(float)freq_sum[j]; |
| | 205 | }else{ |
| | 206 | frequency[wf][j] = 0; |
| | 207 | weights[j] = 0; |
| | 208 | } |
| | 209 | j++; |
| | 210 | } |
| | 211 | } |
| | 212 | |
| | 213 | for (j=i=0;i<alignment_length;i++) { // ******* calculate rates |
| | 214 | if (filter && !filter->use_position(i)) continue; |
| | 215 | if (!weights[j]) { |
| | 216 | rates[j] = 1.0; |
| | 217 | ttratio[j] = 0.5; |
| | 218 | j++; |
| | 219 | continue; |
| | 220 | } |
| | 221 | rates[j] = (mut_sum[j] / (float)freq_sum[j]); |
| | 222 | if (transver[i] > 0) { |
| | 223 | ttratio[j] = (minmut[i] - transver[i]) / (float)transver[i]; |
| | 224 | }else{ |
| | 225 | ttratio[j] = 2.0; |
| | 226 | } |
| | 227 | if (ttratio[j] < 0.05) ttratio[j] = 0.05; |
| | 228 | if (ttratio[j] > 5.0) ttratio[j] = 5.0; |
| | 229 | j++; |
| | 230 | } |
| | 231 | |
| | 232 | // ****** normalize rates |
| | 233 | |
| | 234 | double sum_rates = 0; |
| | 235 | for (i=0;i<seq_len;i++) sum_rates += rates[i]; |
| | 236 | sum_rates /= seq_len; |
| | 237 | for (i=0;i<seq_len;i++) rates[i] /= sum_rates; |
| | 238 | |
| | 239 | free(transver); |
| | 240 | free(minmut); |
| | 241 | for (i=0;i<256;i++) free(freqi[i]); |
| | 242 | } |
| 106 | | return error; |
| 107 | | } |
| 108 | | |
| 109 | | seq_len = filter ? filter->get_filtered_length() : alignment_length; |
| 110 | | |
| 111 | | delete [] weights; weights = new GB_UINT4[seq_len]; |
| 112 | | delete [] rates; rates = new float[seq_len]; |
| 113 | | delete [] ttratio; ttratio = new float[seq_len]; |
| 114 | | delete [] is_helix;is_helix = new bool[seq_len]; |
| 115 | | delete [] mut_sum; mut_sum = new GB_UINT4[seq_len]; |
| 116 | | delete [] freq_sum;freq_sum = new GB_UINT4[seq_len]; |
| 117 | | |
| 118 | | delete desc; desc = 0; |
| 119 | | |
| 120 | | size_t i; |
| 121 | | size_t j; |
| 122 | | for (i=0;i<256;i++) { delete frequency[i];frequency[i]=0;} |
| 123 | | |
| 124 | | long use_helix = awr->awar(awar_enable_helix)->read_int(); |
| 125 | | |
| 126 | | for (j=i=0;i<seq_len;i++) { |
| 127 | | is_helix[i] = false; |
| 128 | | weights[i] = 1; |
| 129 | | } |
| 130 | | |
| 131 | | if (!error && use_helix) { // calculate weights and helix filter |
| 132 | | BI_helix helix; |
| 133 | | error = helix.init(this->gb_main,alignment_name); |
| 134 | | if (error){ |
| 135 | | aw_message(error); |
| 136 | | error = 0; |
| 137 | | goto no_helix; |
| 138 | | } |
| 139 | | error = 0; |
| 140 | | for (j=i=0;i<alignment_length;i++) { |
| 141 | | if (!filter || filter->use_position(i)) { |
| 142 | | if (helix.pairtype(i) == HELIX_PAIR) { |
| 143 | | is_helix[j] = true; |
| 144 | | weights[j] = 1; |
| 145 | | } |
| 146 | | else{ |
| 147 | | is_helix[j] = false; |
| 148 | | weights[j] = 2; |
| 149 | | } |
| 150 | | j++; |
| 151 | | } |
| 152 | | } |
| 153 | | } |
| 154 | | no_helix: |
| 155 | | |
| 156 | | for (i=0;i<seq_len;i++) freq_sum[i] = 0; |
| 157 | | |
| 158 | | GBDATA *gb_freq; |
| 159 | | GB_UINT4 *freqi[256]; |
| 160 | | for (i=0;i<256; i++) freqi[i] = 0; |
| 161 | | int wf; // ********* read the frequency statistic |
| 162 | | for (gb_freq = GB_child(gb_freqs); gb_freq; gb_freq = GB_nextChild(gb_freq)) { |
| 163 | | char *key = GB_read_key(gb_freq); |
| 164 | | if (key[0] == 'N' && key[1] && !key[2]) { |
| 165 | | wf = key[1]; |
| 166 | | freqi[wf] = GB_read_ints(gb_freq); |
| 167 | | } |
| 168 | | free(key); |
| 169 | | } |
| 170 | | |
| 171 | | GB_UINT4 *minmut = 0; |
| 172 | | GBDATA *gb_minmut = GB_entry(gb_freqs,"TRANSITIONS"); |
| 173 | | if (gb_minmut) minmut = GB_read_ints(gb_minmut); |
| 174 | | |
| 175 | | GB_UINT4 *transver = 0; |
| 176 | | GBDATA *gb_transver = GB_entry(gb_freqs,"TRANSVERSIONS"); |
| 177 | | if (gb_transver) transver = GB_read_ints(gb_transver); |
| 178 | | unsigned long max_freq_sum = 0; |
| 179 | | for (wf = 0; wf<256;wf++) { // ********* calculate sum of mutations |
| 180 | | if (!freqi[wf]) continue; // ********* calculate nbases for each col. |
| 181 | | for (j=i=0;i<alignment_length;i++) { |
| 182 | | if (filter && !filter->use_position(i)) continue; |
| 183 | | freq_sum[j] += freqi[wf][i]; |
| 184 | | mut_sum[j] = minmut[i]; |
| 185 | | j++; |
| 186 | | } |
| 187 | | } |
| 188 | | for (i=0;i<seq_len;i++) |
| 189 | | if (freq_sum[i] > max_freq_sum) |
| 190 | | max_freq_sum = freq_sum[i]; |
| 191 | | unsigned long min_freq_sum = DIST_MIN_SEQ(max_freq_sum); |
| 192 | | |
| 193 | | for (wf = 0; wf<256;wf++) { |
| 194 | | if (!freqi[wf]) continue; |
| 195 | | frequency[wf] = new float[seq_len]; |
| 196 | | for (j=i=0;i<alignment_length;i++) { |
| 197 | | if (filter && !filter->use_position(i)) continue; |
| 198 | | if (freq_sum[j] > min_freq_sum) { |
| 199 | | frequency[wf][j] = freqi[wf][i]/(float)freq_sum[j]; |
| 200 | | }else{ |
| 201 | | frequency[wf][j] = 0; |
| 202 | | weights[j] = 0; |
| 203 | | } |
| 204 | | j++; |
| 205 | | } |
| 206 | | } |
| 207 | | |
| 208 | | for (j=i=0;i<alignment_length;i++) { // ******* calculate rates |
| 209 | | if (filter && !filter->use_position(i)) continue; |
| 210 | | if (!weights[j]) { |
| 211 | | rates[j] = 1.0; |
| 212 | | ttratio[j] = 0.5; |
| 213 | | j++; |
| 214 | | continue; |
| 215 | | } |
| 216 | | rates[j] = (mut_sum[j] / (float)freq_sum[j]); |
| 217 | | if (transver[i] > 0) { |
| 218 | | ttratio[j] = (minmut[i] - transver[i]) / (float)transver[i]; |
| 219 | | }else{ |
| 220 | | ttratio[j] = 2.0; |
| 221 | | } |
| 222 | | if (ttratio[j] < 0.05) ttratio[j] = 0.05; |
| 223 | | if (ttratio[j] > 5.0) ttratio[j] = 5.0; |
| 224 | | j++; |
| 225 | | } |
| 226 | | |
| 227 | | // ****** normalize rates |
| 228 | | |
| 229 | | double sum_rates = 0; |
| 230 | | for (i=0;i<seq_len;i++) sum_rates += rates[i]; |
| 231 | | sum_rates /= seq_len; |
| 232 | | for (i=0;i<seq_len;i++) rates[i] /= sum_rates; |
| 233 | | |
| 234 | | free(transver); |
| 235 | | free(minmut); |
| 236 | | for (i=0;i<256;i++) free(freqi[i]); |
| 237 | | free(sai_name); |
| | 244 | } |