| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : ali_prealigner.hxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #ifndef ALI_PREALIGNER_HXX |
|---|
| 12 | #define ALI_PREALIGNER_HXX |
|---|
| 13 | |
|---|
| 14 | #ifndef ALI_PATHMAP_HXX |
|---|
| 15 | #include "ali_pathmap.hxx" |
|---|
| 16 | #endif |
|---|
| 17 | #ifndef ALI_TSTACK_HXX |
|---|
| 18 | #include "ali_tstack.hxx" |
|---|
| 19 | #endif |
|---|
| 20 | #ifndef ALI_SOLUTION_HXX |
|---|
| 21 | #include "ali_solution.hxx" |
|---|
| 22 | #endif |
|---|
| 23 | |
|---|
| 24 | #define ALI_PREALIGNER_INS 1 |
|---|
| 25 | #define ALI_PREALIGNER_SUB 2 |
|---|
| 26 | #define ALI_PREALIGNER_DEL 3 |
|---|
| 27 | #define ALI_PREALIGNER_MULTI_FLAG 4 |
|---|
| 28 | |
|---|
| 29 | |
|---|
| 30 | |
|---|
| 31 | struct ALI_PREALIGNER_CONTEXT { |
|---|
| 32 | long max_number_of_maps; |
|---|
| 33 | long max_number_of_maps_aligner; |
|---|
| 34 | int interval_border; |
|---|
| 35 | int interval_center; |
|---|
| 36 | float max_cost_of_sub_percent; |
|---|
| 37 | float max_cost_of_helix; |
|---|
| 38 | unsigned long error_count; |
|---|
| 39 | }; |
|---|
| 40 | |
|---|
| 41 | // Structure for a cell in the (simple) distance matrix |
|---|
| 42 | struct ali_prealigner_cell { |
|---|
| 43 | float d; |
|---|
| 44 | |
|---|
| 45 | ali_prealigner_cell() { |
|---|
| 46 | d = 0.0; |
|---|
| 47 | } |
|---|
| 48 | }; |
|---|
| 49 | |
|---|
| 50 | // Structure for a column in the (simple) distance matrix |
|---|
| 51 | struct ali_prealigner_column : virtual Noncopyable { |
|---|
| 52 | unsigned long column_length; |
|---|
| 53 | ali_prealigner_cell **cells; |
|---|
| 54 | |
|---|
| 55 | ali_prealigner_column(unsigned long length) { |
|---|
| 56 | column_length = length; |
|---|
| 57 | cells = (ali_prealigner_cell **) calloc((unsigned int) column_length, sizeof(ali_prealigner_cell)); |
|---|
| 58 | ali_out_of_memory_if(!cells); |
|---|
| 59 | } |
|---|
| 60 | ~ali_prealigner_column() { |
|---|
| 61 | if (cells) |
|---|
| 62 | free((char *) cells); |
|---|
| 63 | } |
|---|
| 64 | }; |
|---|
| 65 | |
|---|
| 66 | // Structure for the intersection of maps |
|---|
| 67 | struct ali_prealigner_mask { |
|---|
| 68 | ALI_MAP *map; |
|---|
| 69 | float cost_of_binding; |
|---|
| 70 | |
|---|
| 71 | unsigned long calls, joins, last_new, last_joins; |
|---|
| 72 | |
|---|
| 73 | ali_prealigner_mask() { |
|---|
| 74 | last_new = last_joins = 0; |
|---|
| 75 | map = NULp; |
|---|
| 76 | calls = joins = 0; |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | void insert(ALI_MAP *in_map, float costs); |
|---|
| 80 | void delete_expensive(ALI_PREALIGNER_CONTEXT *context, ALI_PROFILE *profile); |
|---|
| 81 | void clear() { |
|---|
| 82 | last_new = last_joins = 0; |
|---|
| 83 | delete map; |
|---|
| 84 | calls = joins = 0; |
|---|
| 85 | } |
|---|
| 86 | }; |
|---|
| 87 | |
|---|
| 88 | // Structure for a approximation of a map |
|---|
| 89 | struct ali_prealigner_approx_element : virtual Noncopyable { |
|---|
| 90 | ALI_MAP *map; |
|---|
| 91 | char *ins_marker; |
|---|
| 92 | |
|---|
| 93 | ali_prealigner_approx_element(ALI_MAP *m = NULp, char *im = NULp) { |
|---|
| 94 | map = m; |
|---|
| 95 | ins_marker = im; |
|---|
| 96 | } |
|---|
| 97 | ~ali_prealigner_approx_element() { |
|---|
| 98 | /* |
|---|
| 99 | if (map) |
|---|
| 100 | delete map; |
|---|
| 101 | if (ins_marker) |
|---|
| 102 | free((char *) ins_marker); |
|---|
| 103 | */ |
|---|
| 104 | } |
|---|
| 105 | void print() { |
|---|
| 106 | printf("map = %p, ins_marker = %p\n", map, ins_marker); |
|---|
| 107 | map->print(); |
|---|
| 108 | printf("<%20s>\n", ins_marker); |
|---|
| 109 | } |
|---|
| 110 | }; |
|---|
| 111 | |
|---|
| 112 | // Structure for a list of approximated maps |
|---|
| 113 | struct ali_prealigner_approximation : virtual Noncopyable { |
|---|
| 114 | ALI_TLIST<ali_prealigner_approx_element *> *approx_list; |
|---|
| 115 | float cost_of_binding; |
|---|
| 116 | |
|---|
| 117 | ali_prealigner_approximation() { |
|---|
| 118 | cost_of_binding = 0.0; |
|---|
| 119 | approx_list = new ALI_TLIST<ali_prealigner_approx_element *>; |
|---|
| 120 | } |
|---|
| 121 | ~ali_prealigner_approximation() { |
|---|
| 122 | if (approx_list) { |
|---|
| 123 | clear(); |
|---|
| 124 | delete approx_list; |
|---|
| 125 | } |
|---|
| 126 | } |
|---|
| 127 | |
|---|
| 128 | // Insert a new approximation (dependence of the costs) |
|---|
| 129 | void insert(ALI_MAP *in_map, char *in_insert_marker, float costs) { |
|---|
| 130 | ali_prealigner_approx_element *approx_elem; |
|---|
| 131 | |
|---|
| 132 | if (!approx_list) |
|---|
| 133 | approx_list = new ALI_TLIST<ali_prealigner_approx_element *>; |
|---|
| 134 | |
|---|
| 135 | if (costs < cost_of_binding || approx_list->is_empty()) { |
|---|
| 136 | cost_of_binding = costs; |
|---|
| 137 | clear(); |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | if (costs == cost_of_binding) { |
|---|
| 141 | if (approx_list->cardinality() < 20) { |
|---|
| 142 | approx_elem = new ali_prealigner_approx_element(in_map, |
|---|
| 143 | in_insert_marker); |
|---|
| 144 | |
|---|
| 145 | approx_list->append_end(approx_elem); |
|---|
| 146 | } |
|---|
| 147 | else { |
|---|
| 148 | delete in_map; |
|---|
| 149 | delete in_insert_marker; |
|---|
| 150 | } |
|---|
| 151 | } |
|---|
| 152 | else { |
|---|
| 153 | delete in_map; |
|---|
| 154 | delete in_insert_marker; |
|---|
| 155 | } |
|---|
| 156 | } |
|---|
| 157 | ALI_TLIST<ali_prealigner_approx_element *> *list() { |
|---|
| 158 | ALI_TLIST<ali_prealigner_approx_element *> *ret_list; |
|---|
| 159 | ret_list = approx_list; |
|---|
| 160 | approx_list = NULp; |
|---|
| 161 | return ret_list; |
|---|
| 162 | } |
|---|
| 163 | void clear() { |
|---|
| 164 | if (approx_list) { |
|---|
| 165 | if (!approx_list->is_empty()) { |
|---|
| 166 | delete approx_list->first(); |
|---|
| 167 | while (approx_list->has_next()) |
|---|
| 168 | delete approx_list->next(); |
|---|
| 169 | approx_list->make_empty(); |
|---|
| 170 | } |
|---|
| 171 | } |
|---|
| 172 | } |
|---|
| 173 | void print() { |
|---|
| 174 | ali_prealigner_approx_element *approx_elem; |
|---|
| 175 | printf("\nList of Approximations\n"); |
|---|
| 176 | printf("cost_of_binding = %f\n", cost_of_binding); |
|---|
| 177 | if (approx_list) { |
|---|
| 178 | if (!approx_list->is_empty()) { |
|---|
| 179 | approx_elem = approx_list->first(); |
|---|
| 180 | approx_elem->print(); |
|---|
| 181 | while (approx_list->has_next()) { |
|---|
| 182 | approx_elem = approx_list->next(); |
|---|
| 183 | approx_elem->print(); |
|---|
| 184 | } |
|---|
| 185 | } |
|---|
| 186 | else { |
|---|
| 187 | printf("List is empty\n"); |
|---|
| 188 | } |
|---|
| 189 | } |
|---|
| 190 | else { |
|---|
| 191 | printf("List not existent\n"); |
|---|
| 192 | } |
|---|
| 193 | } |
|---|
| 194 | }; |
|---|
| 195 | |
|---|
| 196 | |
|---|
| 197 | |
|---|
| 198 | // Class of the prealigner |
|---|
| 199 | class ALI_PREALIGNER : virtual Noncopyable { |
|---|
| 200 | ALI_PROFILE *profile; |
|---|
| 201 | ALI_PATHMAP *path_map; |
|---|
| 202 | ALI_SUB_SOLUTION *sub_solution; |
|---|
| 203 | |
|---|
| 204 | ali_prealigner_mask result_mask; |
|---|
| 205 | unsigned long result_mask_counter; |
|---|
| 206 | unsigned long start_x, end_x, start_y, end_y; |
|---|
| 207 | ali_prealigner_approximation result_approx; |
|---|
| 208 | |
|---|
| 209 | float minimum2(float a, float b); |
|---|
| 210 | float minimum3(float a, float b, float c); |
|---|
| 211 | |
|---|
| 212 | void calculate_first_column_first_cell(ali_prealigner_cell *akt_cell); |
|---|
| 213 | void calculate_first_column_cell(ali_prealigner_cell *up_cell, |
|---|
| 214 | ali_prealigner_cell *akt_cell, |
|---|
| 215 | unsigned long pos_y); |
|---|
| 216 | void calculate_first_column(ali_prealigner_column *akt_column); |
|---|
| 217 | |
|---|
| 218 | void calculate_first_cell(ali_prealigner_cell *left_cell, |
|---|
| 219 | ali_prealigner_cell *akt_cell, |
|---|
| 220 | unsigned long pos_x); |
|---|
| 221 | void calculate_cell( |
|---|
| 222 | ali_prealigner_cell *diag_cell, ali_prealigner_cell *left_cell, |
|---|
| 223 | ali_prealigner_cell *up_cell, ali_prealigner_cell *akt_cell, |
|---|
| 224 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 225 | void calculate_column(ali_prealigner_column *prev_col, |
|---|
| 226 | ali_prealigner_column *akt_col, |
|---|
| 227 | unsigned long pos_x); |
|---|
| 228 | |
|---|
| 229 | void calculate_last_column_first_cell(ali_prealigner_cell *left_cell, |
|---|
| 230 | ali_prealigner_cell *akt_cell, |
|---|
| 231 | unsigned long pos_x); |
|---|
| 232 | void calculate_last_column_cell( |
|---|
| 233 | ali_prealigner_cell *diag_cell, ali_prealigner_cell *left_cell, |
|---|
| 234 | ali_prealigner_cell *up_cell, ali_prealigner_cell *akt_cell, |
|---|
| 235 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 236 | void calculate_last_column(ali_prealigner_column *prev_col, |
|---|
| 237 | ali_prealigner_column *akt_col, |
|---|
| 238 | unsigned long pos_x); |
|---|
| 239 | |
|---|
| 240 | void calculate_matrix(); |
|---|
| 241 | |
|---|
| 242 | void generate_solution(ALI_MAP *map); |
|---|
| 243 | void generate_result_mask(ALI_TSTACK<unsigned char> *stack); |
|---|
| 244 | void mapper_pre(ALI_TSTACK<unsigned char> *stack, |
|---|
| 245 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 246 | void mapper_post(ALI_TSTACK<unsigned char> *stack, |
|---|
| 247 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 248 | void mapper_post_multi(ALI_TSTACK<unsigned char> *stack, |
|---|
| 249 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 250 | void mapper_random(ALI_TSTACK<unsigned char> *stack, |
|---|
| 251 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 252 | void mapper(ALI_TSTACK<unsigned char> *stack, |
|---|
| 253 | unsigned long pos_x, unsigned long pos_y); |
|---|
| 254 | void make_map(); |
|---|
| 255 | |
|---|
| 256 | void generate_approximation(ALI_SUB_SOLUTION *work_sol); |
|---|
| 257 | void mapper_approximation(unsigned long area_number, |
|---|
| 258 | ALI_TARRAY<ALI_TLIST<ALI_MAP *> *> *map_lists, |
|---|
| 259 | ALI_SUB_SOLUTION *work_sol); |
|---|
| 260 | void make_approximation(ALI_PREALIGNER_CONTEXT *context); |
|---|
| 261 | |
|---|
| 262 | unsigned long number_of_solutions(); |
|---|
| 263 | |
|---|
| 264 | public: |
|---|
| 265 | |
|---|
| 266 | ALI_PREALIGNER(ALI_PREALIGNER_CONTEXT *context, ALI_PROFILE *profile, |
|---|
| 267 | unsigned long start_x, unsigned long end_x, |
|---|
| 268 | unsigned long start_y, unsigned long end_y); |
|---|
| 269 | ~ALI_PREALIGNER() { |
|---|
| 270 | if (sub_solution) |
|---|
| 271 | delete sub_solution; |
|---|
| 272 | } |
|---|
| 273 | |
|---|
| 274 | ALI_SEQUENCE *sequence() { |
|---|
| 275 | return result_mask.map->sequence(profile->sequence()); |
|---|
| 276 | } |
|---|
| 277 | ALI_SEQUENCE *sequence_without_inserts() { |
|---|
| 278 | return result_mask.map->sequence_without_inserts(profile->sequence()); |
|---|
| 279 | } |
|---|
| 280 | ALI_SUB_SOLUTION *solution() { |
|---|
| 281 | ALI_SUB_SOLUTION *ret_sol = sub_solution; |
|---|
| 282 | sub_solution = NULp; |
|---|
| 283 | return ret_sol; |
|---|
| 284 | } |
|---|
| 285 | ALI_TLIST<ali_prealigner_approx_element *> *approximation() { |
|---|
| 286 | return result_approx.list(); |
|---|
| 287 | } |
|---|
| 288 | }; |
|---|
| 289 | |
|---|
| 290 | #else |
|---|
| 291 | #error ali_prealigner.hxx included twice |
|---|
| 292 | #endif // ALI_PREALIGNER_HXX |
|---|