| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : ColumnStat_2_gnuplot.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #include "NT_local.h" |
|---|
| 12 | |
|---|
| 13 | #include <ColumnStat.hxx> |
|---|
| 14 | #include <AP_filter.hxx> |
|---|
| 15 | #include <awt_filter.hxx> |
|---|
| 16 | #include <aw_awars.hxx> |
|---|
| 17 | #include <aw_file.hxx> |
|---|
| 18 | #include <aw_msg.hxx> |
|---|
| 19 | #include <aw_root.hxx> |
|---|
| 20 | #include <aw_select.hxx> |
|---|
| 21 | #include <arbdbt.h> |
|---|
| 22 | #include <arb_file.h> |
|---|
| 23 | |
|---|
| 24 | #include <unistd.h> |
|---|
| 25 | #include <xcmd.hxx> |
|---|
| 26 | |
|---|
| 27 | #define AWAR_CS2GP "tmp/ntree/colstat_2_gnuplot" |
|---|
| 28 | |
|---|
| 29 | #define AWAR_CS2GP_NAME AWAR_CS2GP "/name" |
|---|
| 30 | #define AWAR_CS2GP_SUFFIX AWAR_CS2GP "/filter" |
|---|
| 31 | #define AWAR_CS2GP_DIRECTORY AWAR_CS2GP "/directory" |
|---|
| 32 | #define AWAR_CS2GP_FILENAME AWAR_CS2GP "/file_name" |
|---|
| 33 | |
|---|
| 34 | #define AWAR_CS2GP_SMOOTH_VALUES AWAR_CS2GP "/smooth_values" |
|---|
| 35 | #define AWAR_CS2GP_SMOOTH_GNUPLOT AWAR_CS2GP "/smooth_gnuplot" |
|---|
| 36 | #define AWAR_CS2GP_GNUPLOT_OVERLAY_PREFIX AWAR_CS2GP "/gnuplot_overlay_prefix" |
|---|
| 37 | #define AWAR_CS2GP_GNUPLOT_OVERLAY_POSTFIX AWAR_CS2GP "/gnuplot_overlay_postfix" |
|---|
| 38 | #define AWAR_CS2GP_FILTER_NAME AWAR_CS2GP "/ap_filter/name" |
|---|
| 39 | |
|---|
| 40 | static GB_ERROR split_stat_filename(const char *fname, char **dirPtr, char **name_prefixPtr, char **name_postfixPtr) { |
|---|
| 41 | // 'fname' is sth like 'directory/prefix.sth_gnu' |
|---|
| 42 | // 'dirPtr' is set to a malloc-copy of 'directory' |
|---|
| 43 | // 'name_prefixPtr' is set to a malloc-copy of 'prefix' (defaults to '*') |
|---|
| 44 | // 'name_postfixPtr' is set to a malloc-copy of 'sth_gnu' (defaults to '*_gnu') |
|---|
| 45 | |
|---|
| 46 | *dirPtr = NULp; |
|---|
| 47 | *name_prefixPtr = NULp; |
|---|
| 48 | *name_postfixPtr = NULp; |
|---|
| 49 | |
|---|
| 50 | const char *lslash = strrchr(fname, '/'); |
|---|
| 51 | if (!lslash) return GB_export_errorf("'%s' has to contain a '/'", fname); |
|---|
| 52 | |
|---|
| 53 | char *dir = ARB_strdup(fname); |
|---|
| 54 | dir[lslash-fname] = 0; // cut off at last '/' |
|---|
| 55 | |
|---|
| 56 | char *name_prefix = ARB_strdup(lslash+1); |
|---|
| 57 | char *name_postfix = NULp; |
|---|
| 58 | char *ldot = strrchr(name_prefix, '.'); |
|---|
| 59 | |
|---|
| 60 | if (ldot) { |
|---|
| 61 | ldot[0] = 0; |
|---|
| 62 | name_postfix = ARB_strdup(ldot+1); |
|---|
| 63 | } |
|---|
| 64 | if (!ldot || name_prefix[0] == 0) freedup(name_prefix, "*"); // no dot or empty name_prefix |
|---|
| 65 | if (!name_postfix || name_postfix[0] == 0) freedup(name_postfix, "*_gnu"); |
|---|
| 66 | |
|---|
| 67 | nt_assert(name_prefix); |
|---|
| 68 | nt_assert(name_postfix); |
|---|
| 69 | |
|---|
| 70 | *dirPtr = dir; |
|---|
| 71 | *name_prefixPtr = name_prefix; |
|---|
| 72 | *name_postfixPtr = name_postfix; |
|---|
| 73 | |
|---|
| 74 | return NULp; |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | static char * get_overlay_files(AW_root *awr, const char *fname, GB_ERROR& error) { |
|---|
| 78 | nt_assert(!error); |
|---|
| 79 | |
|---|
| 80 | bool overlay_prefix = awr->awar(AWAR_CS2GP_GNUPLOT_OVERLAY_PREFIX)->read_int(); |
|---|
| 81 | bool overlay_postfix = awr->awar(AWAR_CS2GP_GNUPLOT_OVERLAY_POSTFIX)->read_int(); |
|---|
| 82 | |
|---|
| 83 | char *dir, *name_prefix, *name_postfix; |
|---|
| 84 | error = split_stat_filename(fname, &dir, &name_prefix, &name_postfix); |
|---|
| 85 | |
|---|
| 86 | char *found_files = NULp; |
|---|
| 87 | if (!error) { |
|---|
| 88 | char *found_prefix_files = NULp; |
|---|
| 89 | char *found_postfix_files = NULp; |
|---|
| 90 | |
|---|
| 91 | if (overlay_prefix || overlay_postfix) { |
|---|
| 92 | char *mask = GBS_global_string_copy("%s.*_gnu", name_prefix); |
|---|
| 93 | if (overlay_prefix) { |
|---|
| 94 | // @@@ change error handling for GB_find_all_files() - globally! |
|---|
| 95 | found_prefix_files = GB_find_all_files(dir, mask, false); |
|---|
| 96 | if (!found_prefix_files) error = GB_get_error(); |
|---|
| 97 | } |
|---|
| 98 | free(mask); |
|---|
| 99 | |
|---|
| 100 | if (!error) { |
|---|
| 101 | mask = GBS_global_string_copy("*.%s", name_postfix); |
|---|
| 102 | if (overlay_postfix) { |
|---|
| 103 | found_postfix_files = GB_find_all_files(dir, mask, false); |
|---|
| 104 | if (!found_postfix_files) error = GB_get_error(); |
|---|
| 105 | } |
|---|
| 106 | free(mask); |
|---|
| 107 | } |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | if (!error) { |
|---|
| 111 | if (found_prefix_files) { |
|---|
| 112 | if (found_postfix_files) { |
|---|
| 113 | found_files = GBS_global_string_copy("%s*%s", found_prefix_files, found_postfix_files); |
|---|
| 114 | } |
|---|
| 115 | else { // only found_prefix_files |
|---|
| 116 | found_files = found_prefix_files; |
|---|
| 117 | found_prefix_files = NULp; |
|---|
| 118 | } |
|---|
| 119 | } |
|---|
| 120 | else { |
|---|
| 121 | found_files = found_postfix_files; |
|---|
| 122 | found_postfix_files = NULp; |
|---|
| 123 | } |
|---|
| 124 | } |
|---|
| 125 | |
|---|
| 126 | free(found_postfix_files); |
|---|
| 127 | free(found_prefix_files); |
|---|
| 128 | } |
|---|
| 129 | |
|---|
| 130 | free(name_postfix); |
|---|
| 131 | free(name_prefix); |
|---|
| 132 | free(dir); |
|---|
| 133 | |
|---|
| 134 | return found_files; |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | enum PlotType { |
|---|
| 138 | PT_GC_RATIO, |
|---|
| 139 | PT_GA_RATIO, |
|---|
| 140 | PT_RATE, |
|---|
| 141 | PT_TT_RATIO, |
|---|
| 142 | PT_MOST_FREQUENT_BASE, |
|---|
| 143 | PT_SECOND_FREQUENT_BASE, |
|---|
| 144 | PT_THIRD_FREQUENT_BASE, |
|---|
| 145 | PT_LEAST_FREQUENT_BASE, |
|---|
| 146 | PT_BASE_A, |
|---|
| 147 | PT_BASE_C, |
|---|
| 148 | PT_BASE_G, |
|---|
| 149 | PT_BASE_TU, |
|---|
| 150 | PT_HELIX, |
|---|
| 151 | |
|---|
| 152 | PT_PLOT_TYPES, |
|---|
| 153 | PT_UNKNOWN |
|---|
| 154 | }; |
|---|
| 155 | |
|---|
| 156 | static const char *plotTypeName[PT_PLOT_TYPES] = { |
|---|
| 157 | "gc_gnu", |
|---|
| 158 | "ga_gnu", |
|---|
| 159 | "rate_gnu", |
|---|
| 160 | "tt_gnu", |
|---|
| 161 | "f1_gnu", |
|---|
| 162 | "f2_gnu", |
|---|
| 163 | "f3_gnu", |
|---|
| 164 | "f4_gnu", |
|---|
| 165 | "a_gnu", |
|---|
| 166 | "c_gnu", |
|---|
| 167 | "g_gnu", |
|---|
| 168 | "tu_gnu", |
|---|
| 169 | "helix_gnu" |
|---|
| 170 | }; |
|---|
| 171 | |
|---|
| 172 | static const char *plotTypeDescription[PT_PLOT_TYPES] = { |
|---|
| 173 | "G+C ratio", |
|---|
| 174 | "G+A ratio", |
|---|
| 175 | "Rate", |
|---|
| 176 | "TT Ratio", |
|---|
| 177 | "Most frequent base", |
|---|
| 178 | "2nd frequent base", |
|---|
| 179 | "3rd frequent base", |
|---|
| 180 | "Least frequent base", |
|---|
| 181 | "A ratio", |
|---|
| 182 | "C ratio", |
|---|
| 183 | "G ratio", |
|---|
| 184 | "T/U ratio", |
|---|
| 185 | "Helix" |
|---|
| 186 | }; |
|---|
| 187 | |
|---|
| 188 | static PlotType string2PlotType(const char *type) { |
|---|
| 189 | for (int pt = 0; pt<PT_PLOT_TYPES; ++pt) { |
|---|
| 190 | if (strcmp(type, plotTypeName[pt]) == 0) { |
|---|
| 191 | return PlotType(pt); |
|---|
| 192 | } |
|---|
| 193 | } |
|---|
| 194 | |
|---|
| 195 | return PT_UNKNOWN; |
|---|
| 196 | } |
|---|
| 197 | |
|---|
| 198 | static const char *makeTitle(const char *fname) { |
|---|
| 199 | const char *rslash = strrchr(fname, '/'); |
|---|
| 200 | if (rslash) rslash++; |
|---|
| 201 | else rslash = fname; |
|---|
| 202 | |
|---|
| 203 | char *name = ARB_strdup(rslash); |
|---|
| 204 | char *rdot = strrchr(name, '.'); |
|---|
| 205 | |
|---|
| 206 | PlotType pt = PT_UNKNOWN; |
|---|
| 207 | if (rdot) pt = string2PlotType(rdot+1); |
|---|
| 208 | |
|---|
| 209 | static char *title = NULp; |
|---|
| 210 | freenull(title); |
|---|
| 211 | |
|---|
| 212 | if (pt == PT_UNKNOWN) { |
|---|
| 213 | title = GBS_global_string_copy("%s (unknown type)", name); |
|---|
| 214 | } |
|---|
| 215 | else { |
|---|
| 216 | rdot[0] = 0; |
|---|
| 217 | title = GBS_global_string_copy("%s (%s)", name, plotTypeDescription[pt]); |
|---|
| 218 | } |
|---|
| 219 | |
|---|
| 220 | free(name); |
|---|
| 221 | |
|---|
| 222 | return title; |
|---|
| 223 | } |
|---|
| 224 | |
|---|
| 225 | // ------------------- |
|---|
| 226 | // SortedFreq |
|---|
| 227 | |
|---|
| 228 | class SortedFreq : virtual Noncopyable { |
|---|
| 229 | float *freq[4]; |
|---|
| 230 | |
|---|
| 231 | public: |
|---|
| 232 | SortedFreq(const ColumnStat *column_stat); |
|---|
| 233 | ~SortedFreq(); |
|---|
| 234 | |
|---|
| 235 | float get(PlotType plot_type, size_t pos) const { |
|---|
| 236 | float f; |
|---|
| 237 | switch (plot_type) { |
|---|
| 238 | case PT_MOST_FREQUENT_BASE: f = freq[0][pos]; break; |
|---|
| 239 | case PT_SECOND_FREQUENT_BASE: f = freq[1][pos]; break; |
|---|
| 240 | case PT_THIRD_FREQUENT_BASE: f = freq[2][pos]; break; |
|---|
| 241 | case PT_LEAST_FREQUENT_BASE: f = freq[3][pos]; break; |
|---|
| 242 | default: nt_assert(0); f = 0; break; |
|---|
| 243 | } |
|---|
| 244 | return f; |
|---|
| 245 | } |
|---|
| 246 | }; |
|---|
| 247 | |
|---|
| 248 | SortedFreq::SortedFreq(const ColumnStat *column_stat) { |
|---|
| 249 | size_t len = column_stat->get_length(); |
|---|
| 250 | for (int i = 0; i<4; ++i) { // 4 best frequencies |
|---|
| 251 | freq[i] = new float[len]; |
|---|
| 252 | for (size_t p = 0; p<len; ++p) freq[i][p] = 0.0; // clear |
|---|
| 253 | } |
|---|
| 254 | |
|---|
| 255 | for (unsigned int c = 0; c<256; ++c) { // all character stats |
|---|
| 256 | const float *cfreq = column_stat->get_frequencies((unsigned char)c); |
|---|
| 257 | if (cfreq) { |
|---|
| 258 | for (size_t p = 0; p<len; ++p) { // all positions |
|---|
| 259 | if (freq[3][p] < cfreq[p]) { |
|---|
| 260 | freq[3][p] = cfreq[p]; // found higher freq |
|---|
| 261 | |
|---|
| 262 | for (int i = 3; i > 0; --i) { // bubble upwards to sort |
|---|
| 263 | if (freq[i-1][p] >= freq[i][p]) break; // sorted! |
|---|
| 264 | |
|---|
| 265 | float f = freq[i][p]; |
|---|
| 266 | freq[i][p] = freq[i-1][p]; |
|---|
| 267 | freq[i-1][p] = f; |
|---|
| 268 | } |
|---|
| 269 | } |
|---|
| 270 | } |
|---|
| 271 | } |
|---|
| 272 | } |
|---|
| 273 | |
|---|
| 274 | #if defined(DEBUG) |
|---|
| 275 | for (size_t p = 0; p<len; ++p) { // all positions |
|---|
| 276 | nt_assert(freq[0][p] >= freq[1][p]); |
|---|
| 277 | nt_assert(freq[1][p] >= freq[2][p]); |
|---|
| 278 | nt_assert(freq[2][p] >= freq[3][p]); |
|---|
| 279 | } |
|---|
| 280 | #endif // DEBUG |
|---|
| 281 | } |
|---|
| 282 | SortedFreq::~SortedFreq() { |
|---|
| 283 | for (int i = 0; i<4; ++i) delete [] freq[i]; |
|---|
| 284 | } |
|---|
| 285 | |
|---|
| 286 | class Smoother : virtual Noncopyable { |
|---|
| 287 | double *value; |
|---|
| 288 | size_t *index; |
|---|
| 289 | size_t size; |
|---|
| 290 | size_t halfsize; |
|---|
| 291 | size_t next; |
|---|
| 292 | size_t added; |
|---|
| 293 | double sum; |
|---|
| 294 | |
|---|
| 295 | size_t wrap(size_t idx) { return idx >= size ? idx-size : idx; } |
|---|
| 296 | |
|---|
| 297 | public: |
|---|
| 298 | Smoother(size_t smooth_range) |
|---|
| 299 | : size(smooth_range), |
|---|
| 300 | halfsize(size/2), |
|---|
| 301 | next(0), |
|---|
| 302 | added(0), |
|---|
| 303 | sum(0.0) |
|---|
| 304 | { |
|---|
| 305 | nt_assert(size>0); |
|---|
| 306 | |
|---|
| 307 | value = new double[size]; |
|---|
| 308 | index = new size_t[size]; |
|---|
| 309 | |
|---|
| 310 | for (size_t i = 0; i<size; ++i) value[i] = 0.0; |
|---|
| 311 | } |
|---|
| 312 | ~Smoother() { |
|---|
| 313 | delete [] value; |
|---|
| 314 | delete [] index; |
|---|
| 315 | } |
|---|
| 316 | |
|---|
| 317 | void print(FILE *out, size_t i, double v) { |
|---|
| 318 | sum = sum+v-value[next]; |
|---|
| 319 | |
|---|
| 320 | index[next] = i; |
|---|
| 321 | value[next] = v; |
|---|
| 322 | |
|---|
| 323 | next = wrap(next+1); |
|---|
| 324 | |
|---|
| 325 | if (added<size) ++added; |
|---|
| 326 | if (added == size) { |
|---|
| 327 | size_t printNext = wrap(next+halfsize); |
|---|
| 328 | fprintf(out, "%zu %f\n", index[printNext], sum/size); |
|---|
| 329 | } |
|---|
| 330 | } |
|---|
| 331 | }; |
|---|
| 332 | |
|---|
| 333 | enum PlotMode { |
|---|
| 334 | PLOT, // write file |
|---|
| 335 | PLOT_AND_VIEW, // write file and run gnuplot |
|---|
| 336 | PLOT_CLEANUP, // delete all files with same prefix |
|---|
| 337 | }; |
|---|
| 338 | |
|---|
| 339 | struct PlotParam { |
|---|
| 340 | ColumnStat *colstat; |
|---|
| 341 | adfiltercbstruct *filterdef; |
|---|
| 342 | |
|---|
| 343 | PlotParam(ColumnStat *colstat_, adfiltercbstruct *filterdef_) |
|---|
| 344 | : colstat(colstat_), |
|---|
| 345 | filterdef(filterdef_) |
|---|
| 346 | {} |
|---|
| 347 | }; |
|---|
| 348 | |
|---|
| 349 | static void colstat_2_gnuplot_cb(AW_window *aww, PlotParam *param, PlotMode mode) { |
|---|
| 350 | GB_transaction ta(GLOBAL.gb_main); |
|---|
| 351 | GB_ERROR error = NULp; |
|---|
| 352 | ColumnStat *column_stat = param->colstat; |
|---|
| 353 | AW_root *awr = aww->get_root(); |
|---|
| 354 | |
|---|
| 355 | if (mode == PLOT || mode == PLOT_AND_VIEW) { |
|---|
| 356 | char *filterstring = awr->awar(param->filterdef->def_filter)->read_string(); |
|---|
| 357 | char *alignment_name = awr->awar(param->filterdef->def_alignment)->read_string(); |
|---|
| 358 | long alignment_length = GBT_get_alignment_len(GLOBAL.gb_main, alignment_name); |
|---|
| 359 | |
|---|
| 360 | if (alignment_length<=0) { |
|---|
| 361 | GB_clear_error(); |
|---|
| 362 | error = "Please select a valid alignment"; |
|---|
| 363 | } |
|---|
| 364 | else { |
|---|
| 365 | AP_filter filter(filterstring, "0", alignment_length); |
|---|
| 366 | |
|---|
| 367 | error = column_stat->calculate(&filter); |
|---|
| 368 | |
|---|
| 369 | if (!error && !column_stat->get_length()) error = "Please select column statistic"; |
|---|
| 370 | } |
|---|
| 371 | |
|---|
| 372 | free(alignment_name); |
|---|
| 373 | free(filterstring); |
|---|
| 374 | } |
|---|
| 375 | |
|---|
| 376 | if (!error) { |
|---|
| 377 | char *fname = awr->awar(AWAR_CS2GP_FILENAME)->read_string(); |
|---|
| 378 | |
|---|
| 379 | if (!strchr(fname, '/')) freeset(fname, GBS_global_string_copy("./%s", fname)); |
|---|
| 380 | if (strlen(fname) < 1) error = "Please enter file name"; |
|---|
| 381 | |
|---|
| 382 | if (mode == PLOT_CLEANUP) { // delete overlay files |
|---|
| 383 | if (!error) { |
|---|
| 384 | char *found_files = get_overlay_files(awr, fname, error); |
|---|
| 385 | |
|---|
| 386 | if (found_files) { |
|---|
| 387 | for (char *name = strtok(found_files, "*"); name; name = strtok(NULp, "*")) { |
|---|
| 388 | printf("Deleting gnuplot file '%s'\n", name); |
|---|
| 389 | if (unlink(name) != 0) printf("Can't delete '%s'\n", name); |
|---|
| 390 | } |
|---|
| 391 | free(found_files); |
|---|
| 392 | awr->awar(AWAR_CS2GP_DIRECTORY)->touch(); // reload file selection box |
|---|
| 393 | } |
|---|
| 394 | } |
|---|
| 395 | } |
|---|
| 396 | else { |
|---|
| 397 | FILE *out = NULp; |
|---|
| 398 | if (!error) { |
|---|
| 399 | out = fopen(fname, "w"); |
|---|
| 400 | if (!out) error = GB_export_errorf("Cannot write to file '%s'", fname); |
|---|
| 401 | } |
|---|
| 402 | |
|---|
| 403 | nt_assert(out || error); |
|---|
| 404 | |
|---|
| 405 | if (!error) { |
|---|
| 406 | char *type = awr->awar(AWAR_CS2GP_SUFFIX)->read_string(); |
|---|
| 407 | long smoothSize = awr->awar(AWAR_CS2GP_SMOOTH_VALUES)->read_int(); // 1 = discrete values |
|---|
| 408 | size_t columns = column_stat->get_length(); |
|---|
| 409 | |
|---|
| 410 | enum { |
|---|
| 411 | STAT_AMOUNT, |
|---|
| 412 | STAT_SIMPLE_FLOAT, |
|---|
| 413 | STAT_SIMPLE_BOOL, |
|---|
| 414 | STAT_SORT, |
|---|
| 415 | STAT_UNKNOWN |
|---|
| 416 | } stat_type = STAT_UNKNOWN; |
|---|
| 417 | union { |
|---|
| 418 | struct { |
|---|
| 419 | const float *A; |
|---|
| 420 | const float *C; |
|---|
| 421 | const float *G; |
|---|
| 422 | const float *TU; |
|---|
| 423 | } amount; // STAT_AMOUNT |
|---|
| 424 | const float *floatVals; // STAT_SIMPLE_FLOAT |
|---|
| 425 | const bool *boolVals; // STAT_SIMPLE_BOOL |
|---|
| 426 | SortedFreq *sorted; // STAT_SORT |
|---|
| 427 | } data; |
|---|
| 428 | |
|---|
| 429 | data.amount.A = NULp; // silence warnings |
|---|
| 430 | data.amount.C = NULp; |
|---|
| 431 | data.amount.G = NULp; |
|---|
| 432 | data.amount.TU = NULp; |
|---|
| 433 | |
|---|
| 434 | PlotType plot_type = string2PlotType(type); |
|---|
| 435 | switch (plot_type) { |
|---|
| 436 | case PT_GC_RATIO: |
|---|
| 437 | case PT_GA_RATIO: |
|---|
| 438 | case PT_BASE_A: |
|---|
| 439 | case PT_BASE_C: |
|---|
| 440 | case PT_BASE_G: |
|---|
| 441 | case PT_BASE_TU: { |
|---|
| 442 | stat_type = STAT_AMOUNT; |
|---|
| 443 | |
|---|
| 444 | data.amount.A = column_stat->get_frequencies('A'); |
|---|
| 445 | data.amount.C = column_stat->get_frequencies('C'); |
|---|
| 446 | data.amount.G = column_stat->get_frequencies('G'); |
|---|
| 447 | data.amount.TU = column_stat->get_frequencies('U'); |
|---|
| 448 | break; |
|---|
| 449 | } |
|---|
| 450 | case PT_RATE: |
|---|
| 451 | stat_type = STAT_SIMPLE_FLOAT; |
|---|
| 452 | data.floatVals = column_stat->get_rates(); |
|---|
| 453 | break; |
|---|
| 454 | |
|---|
| 455 | case PT_TT_RATIO: |
|---|
| 456 | stat_type = STAT_SIMPLE_FLOAT; |
|---|
| 457 | data.floatVals = column_stat->get_ttratio(); |
|---|
| 458 | break; |
|---|
| 459 | |
|---|
| 460 | case PT_HELIX: { |
|---|
| 461 | stat_type = STAT_SIMPLE_BOOL; |
|---|
| 462 | data.boolVals = column_stat->get_is_helix(); |
|---|
| 463 | break; |
|---|
| 464 | } |
|---|
| 465 | case PT_MOST_FREQUENT_BASE: |
|---|
| 466 | case PT_SECOND_FREQUENT_BASE: |
|---|
| 467 | case PT_THIRD_FREQUENT_BASE: |
|---|
| 468 | case PT_LEAST_FREQUENT_BASE: { |
|---|
| 469 | stat_type = STAT_SORT; |
|---|
| 470 | data.sorted = new SortedFreq(column_stat); |
|---|
| 471 | break; |
|---|
| 472 | } |
|---|
| 473 | case PT_PLOT_TYPES: |
|---|
| 474 | case PT_UNKNOWN: |
|---|
| 475 | error = "Please select what to plot"; |
|---|
| 476 | break; |
|---|
| 477 | } |
|---|
| 478 | |
|---|
| 479 | const GB_UINT4 *weights = column_stat->get_weights(); |
|---|
| 480 | |
|---|
| 481 | if (!error) { |
|---|
| 482 | Smoother smoother(smoothSize); |
|---|
| 483 | |
|---|
| 484 | for (size_t j=0; j<columns; ++j) { |
|---|
| 485 | if (!weights[j]) continue; |
|---|
| 486 | |
|---|
| 487 | double val = 0; |
|---|
| 488 | switch (stat_type) { |
|---|
| 489 | case STAT_AMOUNT: { |
|---|
| 490 | float A = data.amount.A[j]; |
|---|
| 491 | float C = data.amount.C[j]; |
|---|
| 492 | float G = data.amount.G[j]; |
|---|
| 493 | float TU = data.amount.TU[j]; |
|---|
| 494 | |
|---|
| 495 | float amount = A+C+G+TU; |
|---|
| 496 | |
|---|
| 497 | switch (plot_type) { |
|---|
| 498 | case PT_GC_RATIO: val = (G+C)/amount; break; |
|---|
| 499 | case PT_GA_RATIO: val = (G+A)/amount; break; |
|---|
| 500 | case PT_BASE_A: val = A/amount; break; |
|---|
| 501 | case PT_BASE_C: val = C/amount; break; |
|---|
| 502 | case PT_BASE_G: val = G/amount; break; |
|---|
| 503 | case PT_BASE_TU: val = TU/amount; break; |
|---|
| 504 | |
|---|
| 505 | default: nt_assert(0); break; |
|---|
| 506 | } |
|---|
| 507 | break; |
|---|
| 508 | } |
|---|
| 509 | case STAT_SIMPLE_FLOAT: val = data.floatVals[j]; break; |
|---|
| 510 | case STAT_SIMPLE_BOOL: val = data.boolVals[j]; break; |
|---|
| 511 | case STAT_SORT: val = data.sorted->get(plot_type, j); break; |
|---|
| 512 | |
|---|
| 513 | case STAT_UNKNOWN: nt_assert(0); break; |
|---|
| 514 | } |
|---|
| 515 | |
|---|
| 516 | smoother.print(out, j, val); |
|---|
| 517 | } |
|---|
| 518 | } |
|---|
| 519 | |
|---|
| 520 | if (stat_type == STAT_SORT) delete data.sorted; |
|---|
| 521 | free(type); |
|---|
| 522 | } |
|---|
| 523 | |
|---|
| 524 | if (out) { |
|---|
| 525 | fclose(out); |
|---|
| 526 | out = NULp; |
|---|
| 527 | } |
|---|
| 528 | |
|---|
| 529 | if (!error) { |
|---|
| 530 | awr->awar(AWAR_CS2GP_DIRECTORY)->touch(); // reload file selection box |
|---|
| 531 | |
|---|
| 532 | if (mode == PLOT_AND_VIEW) { // run gnuplot? |
|---|
| 533 | char *command_file; |
|---|
| 534 | char *command_name = GB_unique_filename("arb", "gnuplot"); |
|---|
| 535 | |
|---|
| 536 | out = GB_fopen_tempfile(command_name, "wt", &command_file); |
|---|
| 537 | if (!out) error = GB_await_error(); |
|---|
| 538 | else { |
|---|
| 539 | char *smooth = awr->awar(AWAR_CS2GP_SMOOTH_GNUPLOT)->read_string(); |
|---|
| 540 | char *found_files = get_overlay_files(awr, fname, error); |
|---|
| 541 | |
|---|
| 542 | fprintf(out, "set samples 1000\n"); |
|---|
| 543 | |
|---|
| 544 | bool plotted = false; // set to true after first 'plot' command (other plots use 'replot') |
|---|
| 545 | const char *plot_command[] = { "plot", "replot" }; |
|---|
| 546 | |
|---|
| 547 | if (found_files) { |
|---|
| 548 | for (char *name = strtok(found_files, "*"); name; name = strtok(NULp, "*")) { |
|---|
| 549 | if (strcmp(name, fname) != 0) { // latest data file is done below |
|---|
| 550 | fprintf(out, "%s \"%s\" %s title \"%s\"\n", plot_command[int(plotted)], name, smooth, makeTitle(name)); |
|---|
| 551 | plotted = true; |
|---|
| 552 | } |
|---|
| 553 | } |
|---|
| 554 | free(found_files); |
|---|
| 555 | } |
|---|
| 556 | |
|---|
| 557 | fprintf(out, "%s \"%s\" %s title \"%s\"\n", plot_command[int(plotted)], fname, smooth, makeTitle(fname)); |
|---|
| 558 | fprintf(out, "pause mouse any \"Any key or button will terminate gnuplot\"\n"); |
|---|
| 559 | fclose(out); |
|---|
| 560 | out = NULp; |
|---|
| 561 | |
|---|
| 562 | char *script = GBS_global_string_copy("gnuplot %s && rm -f %s", command_file, command_file); |
|---|
| 563 | ARB_system(script, XCmdType(XCMD_ASYNC_WAIT_ON_ERROR, GLOBAL.gb_main)); |
|---|
| 564 | free(script); |
|---|
| 565 | free(smooth); |
|---|
| 566 | } |
|---|
| 567 | free(command_file); |
|---|
| 568 | free(command_name); |
|---|
| 569 | } |
|---|
| 570 | } |
|---|
| 571 | } |
|---|
| 572 | free(fname); |
|---|
| 573 | } |
|---|
| 574 | |
|---|
| 575 | if (error) aw_message(error); |
|---|
| 576 | } |
|---|
| 577 | |
|---|
| 578 | static void colstat_ali_changed_cb(AW_root *root, AW_awar *awar_ali) { |
|---|
| 579 | if (GLOBAL.gb_main) { |
|---|
| 580 | long smooth_max = GBT_get_alignment_len(GLOBAL.gb_main, awar_ali->read_char_pntr()); |
|---|
| 581 | if (smooth_max<0) { // ali not found |
|---|
| 582 | GB_clear_error(); |
|---|
| 583 | smooth_max = INT_MAX; |
|---|
| 584 | } |
|---|
| 585 | root->awar(AWAR_CS2GP_SMOOTH_VALUES)->set_minmax(1, smooth_max); |
|---|
| 586 | } |
|---|
| 587 | } |
|---|
| 588 | |
|---|
| 589 | AW_window *NT_create_colstat_2_gnuplot_window(AW_root *root) { |
|---|
| 590 | GB_transaction ta(GLOBAL.gb_main); |
|---|
| 591 | |
|---|
| 592 | AW_awar *awar_default_alignment = root->awar_string(AWAR_DEFAULT_ALIGNMENT, "", GLOBAL.gb_main); |
|---|
| 593 | |
|---|
| 594 | ColumnStat *column_stat = new ColumnStat(GLOBAL.gb_main, root, AWAR_CS2GP_NAME, awar_default_alignment); |
|---|
| 595 | AW_window_simple *aws = new AW_window_simple; |
|---|
| 596 | |
|---|
| 597 | aws->init(root, "EXPORT_CSP_TO_GNUPLOT", "Export Column statistic to GnuPlot"); |
|---|
| 598 | aws->load_xfig("cpro/csp_2_gnuplot.fig"); |
|---|
| 599 | |
|---|
| 600 | root->awar_int(AWAR_CS2GP_SMOOTH_VALUES, 1); |
|---|
| 601 | root->awar_int(AWAR_CS2GP_GNUPLOT_OVERLAY_POSTFIX); |
|---|
| 602 | root->awar_int(AWAR_CS2GP_GNUPLOT_OVERLAY_PREFIX); |
|---|
| 603 | root->awar_string(AWAR_CS2GP_SMOOTH_GNUPLOT); |
|---|
| 604 | |
|---|
| 605 | AW_create_fileselection_awars(root, AWAR_CS2GP, "", ".gc_gnu", "noname.gc_gnu"); |
|---|
| 606 | |
|---|
| 607 | aws->at("close"); |
|---|
| 608 | aws->callback(AW_POPDOWN); |
|---|
| 609 | aws->create_button("CLOSE", "CLOSE", "C"); |
|---|
| 610 | |
|---|
| 611 | aws->at("help"); aws->callback(makeHelpCallback("csp_2_gnuplot.hlp")); |
|---|
| 612 | aws->create_button("HELP", "HELP", "H"); |
|---|
| 613 | |
|---|
| 614 | AW_create_standard_fileselection(aws, AWAR_CS2GP); |
|---|
| 615 | |
|---|
| 616 | aws->at("csp"); |
|---|
| 617 | COLSTAT_create_selection_list(aws, column_stat); |
|---|
| 618 | |
|---|
| 619 | aws->at("what"); |
|---|
| 620 | AW_selection_list *plotTypeList = aws->create_selection_list(AWAR_CS2GP_SUFFIX); |
|---|
| 621 | for (int pt = 0; pt<PT_PLOT_TYPES; ++pt) { |
|---|
| 622 | plotTypeList->insert(plotTypeDescription[pt], plotTypeName[pt]); |
|---|
| 623 | } |
|---|
| 624 | plotTypeList->insert_default("<select one>", ""); |
|---|
| 625 | plotTypeList->update(); |
|---|
| 626 | |
|---|
| 627 | awt_create_filter_awars(root, AW_ROOT_DEFAULT, AWAR_CS2GP_FILTER_NAME, AWAR_DEFAULT_ALIGNMENT); |
|---|
| 628 | adfiltercbstruct *filter = awt_create_select_filter(root, GLOBAL.gb_main, AWAR_CS2GP_FILTER_NAME); |
|---|
| 629 | { |
|---|
| 630 | AW_awar *awar_ali = root->awar(filter->def_alignment); |
|---|
| 631 | awar_ali->add_callback(makeRootCallback(colstat_ali_changed_cb, awar_ali)); |
|---|
| 632 | awar_ali->touch(); |
|---|
| 633 | } |
|---|
| 634 | |
|---|
| 635 | aws->at("ap_filter"); |
|---|
| 636 | aws->callback(makeCreateWindowCallback(awt_create_select_filter_win, filter)); |
|---|
| 637 | aws->create_button("SELECT_FILTER", AWAR_CS2GP_FILTER_NAME); |
|---|
| 638 | |
|---|
| 639 | aws->at("smooth"); |
|---|
| 640 | aws->create_input_field(AWAR_CS2GP_SMOOTH_VALUES, 8); |
|---|
| 641 | |
|---|
| 642 | aws->at("smooth2"); |
|---|
| 643 | aws->create_toggle_field(AWAR_CS2GP_SMOOTH_GNUPLOT, AW_HORIZONTAL); |
|---|
| 644 | aws->insert_default_toggle("None", "N", ""); |
|---|
| 645 | aws->insert_toggle("Unique", "U", "smooth unique"); |
|---|
| 646 | aws->insert_toggle("CSpline", "S", "smooth cspline"); |
|---|
| 647 | aws->insert_toggle("Bezier", "B", "smooth bezier"); |
|---|
| 648 | aws->update_toggle_field(); |
|---|
| 649 | |
|---|
| 650 | aws->auto_space(10, 10); |
|---|
| 651 | aws->button_length(13); |
|---|
| 652 | |
|---|
| 653 | PlotParam *param = new PlotParam(column_stat, filter); // bound to CB, dont free |
|---|
| 654 | |
|---|
| 655 | aws->at("save"); |
|---|
| 656 | aws->callback(makeWindowCallback(colstat_2_gnuplot_cb, param, PLOT)); |
|---|
| 657 | aws->create_button("SAVE", "Save"); |
|---|
| 658 | |
|---|
| 659 | aws->highlight(); |
|---|
| 660 | aws->callback(makeWindowCallback(colstat_2_gnuplot_cb, param, PLOT_AND_VIEW)); |
|---|
| 661 | aws->create_button("SAVE_AND_VIEW", "Save & View"); |
|---|
| 662 | |
|---|
| 663 | aws->at("overlay1"); |
|---|
| 664 | aws->label("Overlay statistics with same prefix?"); |
|---|
| 665 | aws->create_toggle(AWAR_CS2GP_GNUPLOT_OVERLAY_PREFIX); |
|---|
| 666 | |
|---|
| 667 | aws->at("overlay2"); |
|---|
| 668 | aws->label("Overlay statistics with same postfix?"); |
|---|
| 669 | aws->create_toggle(AWAR_CS2GP_GNUPLOT_OVERLAY_POSTFIX); |
|---|
| 670 | |
|---|
| 671 | aws->at("del_overlays"); |
|---|
| 672 | aws->callback(makeWindowCallback(colstat_2_gnuplot_cb, param, PLOT_CLEANUP)); |
|---|
| 673 | aws->create_autosize_button("DEL_OVERLAYS", "Delete currently overlayed files", "D", 2); |
|---|
| 674 | |
|---|
| 675 | return aws; |
|---|
| 676 | } |
|---|
| 677 | |
|---|
| 678 | |
|---|