| 1 | // ========================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : xferset.c // |
|---|
| 4 | // Purpose : field transfer sets // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in Mar 19 // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ========================================================= // |
|---|
| 10 | |
|---|
| 11 | #include "xferset.h" |
|---|
| 12 | |
|---|
| 13 | #include <ConfigMapping.h> |
|---|
| 14 | #include <BufferedFileReader.h> |
|---|
| 15 | #include <arbdbt.h> |
|---|
| 16 | #include <arb_str.h> |
|---|
| 17 | #include <arb_stdstr.h> |
|---|
| 18 | |
|---|
| 19 | #include <set> |
|---|
| 20 | #include <gb_aci.h> |
|---|
| 21 | |
|---|
| 22 | using namespace std; |
|---|
| 23 | |
|---|
| 24 | namespace FieldTransfer { |
|---|
| 25 | |
|---|
| 26 | typedef set<string, NoCaseCmp> StrSet; |
|---|
| 27 | |
|---|
| 28 | static void StrSet2StrArray(const StrSet& src, StrArray& dst) { |
|---|
| 29 | for (StrSet::const_iterator i = src.begin(); i != src.end(); ++i) { |
|---|
| 30 | dst.put(strdup(i->c_str())); |
|---|
| 31 | } |
|---|
| 32 | } |
|---|
| 33 | static void StrArray2StrSet(const StrArray& src, StrSet& dst) { |
|---|
| 34 | for (unsigned i = 0; i<src.size(); ++i) { |
|---|
| 35 | dst.insert(src[i]); |
|---|
| 36 | } |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | void RuleSet::extractUsedFields(StrArray& input, StrArray& output) const { // @@@ want flavor just filling 2 StrSets |
|---|
| 40 | StrSet in, out; |
|---|
| 41 | for (unsigned i = 0; i<size(); ++i) { |
|---|
| 42 | const Rule& rule = get(i); |
|---|
| 43 | const string& srcFields = rule.getSourceFields(); |
|---|
| 44 | if (rule.multiple_source_fields()) { |
|---|
| 45 | ConstStrArray ifield; |
|---|
| 46 | GBT_split_string(ifield, srcFields.c_str(), ';'); |
|---|
| 47 | for (unsigned f = 0; f<ifield.size(); ++f) { |
|---|
| 48 | const char *source = ifield[f]; |
|---|
| 49 | if (source[0]) in.insert(source); |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | else { |
|---|
| 53 | if (!srcFields.empty()) in.insert(srcFields); |
|---|
| 54 | } |
|---|
| 55 | const string& target = rule.targetField(); |
|---|
| 56 | if (!target.empty()) out.insert(target); |
|---|
| 57 | } |
|---|
| 58 | StrSet2StrArray(in, input); |
|---|
| 59 | StrSet2StrArray(out, output); |
|---|
| 60 | } |
|---|
| 61 | |
|---|
| 62 | TransportedData ReadRule::readTypedFromField(GB_TYPES readAsType, GBDATA *gb_field) const { |
|---|
| 63 | xf_assert(GB_read_type(gb_field) != GB_DB); // fails e.g. if rule defined for a name used by a container |
|---|
| 64 | |
|---|
| 65 | switch (readAsType) { |
|---|
| 66 | case GB_INT: { |
|---|
| 67 | int asInt = GB_read_int(gb_field); |
|---|
| 68 | return TransportedData(asInt); |
|---|
| 69 | } |
|---|
| 70 | case GB_FLOAT: { |
|---|
| 71 | float asFloat = GB_read_float(gb_field); |
|---|
| 72 | return TransportedData(asFloat); |
|---|
| 73 | } |
|---|
| 74 | case GB_STRING: { |
|---|
| 75 | char *asStr = GB_read_as_string(gb_field); |
|---|
| 76 | string data(asStr); |
|---|
| 77 | free(asStr); |
|---|
| 78 | return TransportedData(data); |
|---|
| 79 | } |
|---|
| 80 | default: xf_assert(0); break; // invalid type |
|---|
| 81 | } |
|---|
| 82 | xf_assert(0); // should be never reached. |
|---|
| 83 | return TransportedData::none(); |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | TransportedData ReadRule::aciAppliedTo(const string& toStr, GBDATA *gb_main, GBDATA *gb_dest_item) const { |
|---|
| 87 | // We can not generally provide a meaningful item for ACI here. |
|---|
| 88 | // Currently it always uses the destination item, but this item may be some dummy item, |
|---|
| 89 | // e.g. a species clone used only during transfer. |
|---|
| 90 | |
|---|
| 91 | GBL_env env(gb_main, NULp); |
|---|
| 92 | GBL_call_env callEnv(gb_dest_item, env); |
|---|
| 93 | |
|---|
| 94 | char *result = GB_command_interpreter_in_env(toStr.c_str(), aci.c_str(), callEnv); |
|---|
| 95 | if (result) { |
|---|
| 96 | string converted(result); |
|---|
| 97 | free(result); |
|---|
| 98 | return TransportedData(converted); |
|---|
| 99 | } |
|---|
| 100 | return TransportedData::makeError(GB_await_error()); |
|---|
| 101 | } |
|---|
| 102 | |
|---|
| 103 | inline TransportedData cannotReadContainer(const char *containerName) { |
|---|
| 104 | return TransportedData::makeError(GBS_global_string("cannot read as data ('%s' is a container)", containerName)); |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | TransportedData ReadRule::readFrom(GBDATA *gb_item, GBDATA *gb_dest_item) const { |
|---|
| 108 | // 'gb_dest_item' only used for ACI |
|---|
| 109 | |
|---|
| 110 | if (!gb_item) { // got no item -> can't read |
|---|
| 111 | return TransportedData::makeError("lacking item to readFrom"); |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | if (fields.empty()) { |
|---|
| 115 | return TransportedData::makeError("no source field(s) specified"); |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | if (multiple_source_fields()) { |
|---|
| 119 | ConstStrArray field; |
|---|
| 120 | GBT_split_string(field, fields.c_str(), ';'); |
|---|
| 121 | |
|---|
| 122 | string concat; |
|---|
| 123 | bool gotData = false; // at least one input field found? |
|---|
| 124 | for (size_t f = 0; f<field.size(); ++f) { |
|---|
| 125 | GBDATA *gb_field = GB_search(gb_item, field[f], GB_FIND); |
|---|
| 126 | if (gb_field) { |
|---|
| 127 | GB_TYPES sourceType = GB_read_type(gb_field); |
|---|
| 128 | if (sourceType == GB_DB) { |
|---|
| 129 | return cannotReadContainer(field[f]); |
|---|
| 130 | } |
|---|
| 131 | |
|---|
| 132 | TransportedData plain = readTypedFromField(GB_STRING, gb_field); // ignores sourceType |
|---|
| 133 | if (plain.failed()) return plain; |
|---|
| 134 | |
|---|
| 135 | xf_assert(plain.exists()); |
|---|
| 136 | if (!concat.empty()) concat += separator; |
|---|
| 137 | concat += plain.getString(); |
|---|
| 138 | |
|---|
| 139 | gotData = true; |
|---|
| 140 | } |
|---|
| 141 | else if (GB_have_error()) { |
|---|
| 142 | return TransportedData::makeError(GB_await_error()); |
|---|
| 143 | } |
|---|
| 144 | } |
|---|
| 145 | |
|---|
| 146 | if (gotData) { |
|---|
| 147 | if (!aci.empty()) { |
|---|
| 148 | return aciAppliedTo(concat, GB_get_root(gb_item), gb_dest_item); |
|---|
| 149 | } |
|---|
| 150 | return TransportedData(concat); |
|---|
| 151 | } |
|---|
| 152 | // otherwise: do not transport if all source fields are missing |
|---|
| 153 | } |
|---|
| 154 | else { |
|---|
| 155 | GBDATA *gb_field = GB_search(gb_item, fields.c_str(), GB_FIND); |
|---|
| 156 | if (gb_field) { |
|---|
| 157 | GB_TYPES sourceType = GB_read_type(gb_field); |
|---|
| 158 | if (sourceType == GB_DB) { |
|---|
| 159 | return cannotReadContainer(fields.c_str()); |
|---|
| 160 | } |
|---|
| 161 | if (!aci.empty()) { |
|---|
| 162 | TransportedData plain = readTypedFromField(GB_STRING, gb_field); // ignores sourceType |
|---|
| 163 | // @@@ store sourceType if no dest-type deduced? |
|---|
| 164 | return aciAppliedTo(plain.getString(), GB_get_root(gb_item), gb_dest_item); |
|---|
| 165 | } |
|---|
| 166 | return readTypedFromField(sourceType, gb_field); |
|---|
| 167 | } |
|---|
| 168 | else if (GB_have_error()) { |
|---|
| 169 | return TransportedData::makeError(GB_await_error()); |
|---|
| 170 | } |
|---|
| 171 | // otherwise: do not transport if source field is missing |
|---|
| 172 | } |
|---|
| 173 | // if field does not exist -> report "no type" |
|---|
| 174 | return TransportedData::none(); |
|---|
| 175 | } |
|---|
| 176 | |
|---|
| 177 | static GB_ERROR unconvertedWrite(const TransportedData& data, GBDATA *gb_field) { |
|---|
| 178 | GB_ERROR error = NULp; |
|---|
| 179 | switch (data.getType()) { |
|---|
| 180 | case GB_STRING: { |
|---|
| 181 | const char *str = data.getString().c_str(); |
|---|
| 182 | error = GB_write_string(gb_field, str); |
|---|
| 183 | break; |
|---|
| 184 | } |
|---|
| 185 | case GB_INT: { |
|---|
| 186 | int num = data.getInt(); |
|---|
| 187 | error = GB_write_int(gb_field, num); |
|---|
| 188 | break; |
|---|
| 189 | } |
|---|
| 190 | case GB_FLOAT: { |
|---|
| 191 | float fnum = data.getFloat(); |
|---|
| 192 | error = GB_write_float(gb_field, fnum); |
|---|
| 193 | break; |
|---|
| 194 | } |
|---|
| 195 | default: { // unhandled type |
|---|
| 196 | xf_assert(0); |
|---|
| 197 | break; |
|---|
| 198 | } |
|---|
| 199 | } |
|---|
| 200 | return error; |
|---|
| 201 | } |
|---|
| 202 | static GB_ERROR convertAndWrite(const TransportedData& data, GBDATA *gb_field, GB_TYPES wantedTargetType, bool acceptLossyConversion) { |
|---|
| 203 | // perform conversion to 'wantedTargetType' and write to 'gb_field' |
|---|
| 204 | GB_ERROR error = NULp; |
|---|
| 205 | |
|---|
| 206 | switch (data.getType()) { |
|---|
| 207 | case GB_INT: |
|---|
| 208 | if (wantedTargetType == GB_FLOAT) { |
|---|
| 209 | // convert int -> float |
|---|
| 210 | float f = data.getInt(); |
|---|
| 211 | int32_t i = int(f+.5); // round (just in case some underflow happened, causing sth like 4711.9999999) |
|---|
| 212 | |
|---|
| 213 | if (i != data.getInt() && !acceptLossyConversion) { |
|---|
| 214 | error = GBS_global_string("lossy int->float type conversion (%i->%i)", data.getInt(), i); |
|---|
| 215 | } |
|---|
| 216 | else { |
|---|
| 217 | error = GB_write_float(gb_field, f); |
|---|
| 218 | } |
|---|
| 219 | } |
|---|
| 220 | else { |
|---|
| 221 | error = GB_write_lossless_int(gb_field, data.getInt()); |
|---|
| 222 | } |
|---|
| 223 | break; |
|---|
| 224 | |
|---|
| 225 | case GB_FLOAT: |
|---|
| 226 | if (wantedTargetType == GB_INT) { |
|---|
| 227 | // convert float -> int |
|---|
| 228 | double d = data.getFloat(); |
|---|
| 229 | int i = d>0 ? (int)(d+0.5) : (int)(d-0.5); |
|---|
| 230 | // @@@ increment a round-counter in RuleSet? |
|---|
| 231 | double d2 = i; |
|---|
| 232 | |
|---|
| 233 | if (d != d2 && !acceptLossyConversion) { // precision loss |
|---|
| 234 | error = GBS_global_string("lossy float->int type conversion (%e->%e)", d, d2); |
|---|
| 235 | } |
|---|
| 236 | else { |
|---|
| 237 | error = GB_write_int(gb_field, i); |
|---|
| 238 | } |
|---|
| 239 | } |
|---|
| 240 | else { |
|---|
| 241 | error = GB_write_lossless_float(gb_field, data.getFloat()); |
|---|
| 242 | } |
|---|
| 243 | break; |
|---|
| 244 | |
|---|
| 245 | case GB_STRING: |
|---|
| 246 | error = GB_write_autoconv_string(gb_field, data.getString().c_str()); // @@@ avoid silent data loss |
|---|
| 247 | // @@@ use GBT_write_float_converted / GBT_write_int_converted here! |
|---|
| 248 | break; |
|---|
| 249 | |
|---|
| 250 | default: |
|---|
| 251 | xf_assert(0); // unhandled type |
|---|
| 252 | break; |
|---|
| 253 | } |
|---|
| 254 | |
|---|
| 255 | return error; |
|---|
| 256 | } |
|---|
| 257 | |
|---|
| 258 | GB_ERROR WriteRule::writeTo(const TransportedData& data, GBDATA *gb_item, bool acceptLossyConversion) const { |
|---|
| 259 | if (!gb_item) return "lacking item to writeTo"; |
|---|
| 260 | |
|---|
| 261 | // @@@ overwrite existing target field? should it be allowed or denied? optional? |
|---|
| 262 | // @@@ try GBT_searchOrCreate_itemfield_according_to_changekey to create a field |
|---|
| 263 | xf_assert(data.exists()); |
|---|
| 264 | |
|---|
| 265 | GB_TYPES usedTargetType = forcesType() ? getTargetType() : data.getType(); |
|---|
| 266 | |
|---|
| 267 | GB_ERROR error = check_hkey(); |
|---|
| 268 | if (!error) { |
|---|
| 269 | GBDATA *gb_field = GB_search(gb_item, name.c_str(), usedTargetType); // Note: works with hierarchical keys |
|---|
| 270 | if (!gb_field) { |
|---|
| 271 | error = GB_await_error(); // field not created -> report why |
|---|
| 272 | } |
|---|
| 273 | else { |
|---|
| 274 | if (data.getType() == usedTargetType) { // data and target have same type -> no conversion needed |
|---|
| 275 | error = unconvertedWrite(data, gb_field); |
|---|
| 276 | } |
|---|
| 277 | else { // type differs -> perform conversion (act like field converter reachable from info-window) |
|---|
| 278 | error = convertAndWrite(data, gb_field, usedTargetType, acceptLossyConversion); |
|---|
| 279 | } |
|---|
| 280 | } |
|---|
| 281 | } |
|---|
| 282 | return error; |
|---|
| 283 | } |
|---|
| 284 | |
|---|
| 285 | GB_ERROR Rule::transferBy(GBDATA *gb_source, GBDATA *gb_dest) const { |
|---|
| 286 | /*! apply one rule (as part of transfer). */ |
|---|
| 287 | |
|---|
| 288 | // @@@ detect target field type. has to be done before starting transfer (do only once for each rule!) |
|---|
| 289 | // @@@ pass target field type to reader (to select best read method)! |
|---|
| 290 | |
|---|
| 291 | GB_ERROR error = NULp; |
|---|
| 292 | TransportedData tdata = readFrom(gb_source, gb_dest); |
|---|
| 293 | if (tdata.failed()) { |
|---|
| 294 | error = tdata.getError(); |
|---|
| 295 | } |
|---|
| 296 | else if (tdata.exists()) { |
|---|
| 297 | error = writeTo(tdata, gb_dest, precisionLossPermitted()); |
|---|
| 298 | } |
|---|
| 299 | // else source missing -> do nothing. |
|---|
| 300 | // Note: if target field exists and source field is missing -> target field remains intact. |
|---|
| 301 | |
|---|
| 302 | xf_assert(!GB_have_error()); // invalid to export an error (should get returned) |
|---|
| 303 | return error; |
|---|
| 304 | } |
|---|
| 305 | |
|---|
| 306 | GB_ERROR RuleSet::transferBy(GBDATA *gb_source, GBDATA *gb_dest) const { |
|---|
| 307 | /*! transfer field data by applying all rules. */ |
|---|
| 308 | |
|---|
| 309 | GB_ERROR error = NULp; |
|---|
| 310 | size_t r; |
|---|
| 311 | for (r = 0; r<size() && !error; ++r) { |
|---|
| 312 | const Rule& rule = get(r); |
|---|
| 313 | error = rule.transferBy(gb_source, gb_dest); |
|---|
| 314 | } |
|---|
| 315 | if (error) { |
|---|
| 316 | error = GBS_global_string("%s (in rule #%zu)", error, r); |
|---|
| 317 | } |
|---|
| 318 | |
|---|
| 319 | xf_assert(!GB_have_error()); // invalid to export an error (should get returned) |
|---|
| 320 | return error; |
|---|
| 321 | } |
|---|
| 322 | |
|---|
| 323 | GB_ERROR RuleSet::saveTo(const char *filename) const { |
|---|
| 324 | GB_ERROR error = NULp; |
|---|
| 325 | FILE *out = fopen(filename, "wt"); |
|---|
| 326 | if (!out) { |
|---|
| 327 | error = GB_IO_error("saving", filename); |
|---|
| 328 | } |
|---|
| 329 | else { |
|---|
| 330 | // print header: |
|---|
| 331 | fputs("# arb field transfer set; version 1.0\n", out); |
|---|
| 332 | fputc('\n', out); |
|---|
| 333 | |
|---|
| 334 | // print global RuleSet data: |
|---|
| 335 | { |
|---|
| 336 | ConstStrArray clines; |
|---|
| 337 | GBT_split_string(clines, comment.c_str(), '\n'); |
|---|
| 338 | |
|---|
| 339 | for (int c = 0; clines[c]; ++c) { |
|---|
| 340 | fprintf(out, "desc:%s\n", clines[c]); |
|---|
| 341 | } |
|---|
| 342 | fputc('\n', out); |
|---|
| 343 | } |
|---|
| 344 | fprintf(out, "transferUndef:%i\n", int(transferUndefFields)); |
|---|
| 345 | |
|---|
| 346 | // print rules: |
|---|
| 347 | for (size_t r = 0; r<size(); ++r) { |
|---|
| 348 | const Rule& rule = get(r); |
|---|
| 349 | string cfg = rule.getConfig(); |
|---|
| 350 | fprintf(out, "rule:%s\n", cfg.c_str()); |
|---|
| 351 | } |
|---|
| 352 | fputc('\n', out); |
|---|
| 353 | |
|---|
| 354 | fclose(out); |
|---|
| 355 | } |
|---|
| 356 | return error; |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | inline bool isCommentLine(const string& line) { |
|---|
| 360 | size_t leadingSpaces = line.find_first_not_of(" \t"); |
|---|
| 361 | return line[leadingSpaces] == '#'; |
|---|
| 362 | } |
|---|
| 363 | inline bool shallIgnore(const string& line) { |
|---|
| 364 | // decide whether to ignore a line loaded from .fts file. |
|---|
| 365 | return line.empty() || isCommentLine(line); |
|---|
| 366 | } |
|---|
| 367 | |
|---|
| 368 | ErrorOrRuleSetPtr RuleSet::loadFrom(const char *filename) { |
|---|
| 369 | ARB_ERROR error; |
|---|
| 370 | RuleSetPtr ruleset; |
|---|
| 371 | |
|---|
| 372 | FILE *in = fopen(filename, "rt"); |
|---|
| 373 | if (!in) { |
|---|
| 374 | error = GB_IO_error("loading", filename); |
|---|
| 375 | } |
|---|
| 376 | else { |
|---|
| 377 | ruleset = new RuleSet(); |
|---|
| 378 | BufferedFileReader reader(filename, in); |
|---|
| 379 | |
|---|
| 380 | string line; |
|---|
| 381 | while (!error && reader.getLine(line)) { |
|---|
| 382 | if (shallIgnore(line)) continue; |
|---|
| 383 | |
|---|
| 384 | size_t pos = line.find(':'); |
|---|
| 385 | if (pos == string::npos) { |
|---|
| 386 | error = GBS_global_string("expected ':' while parsing line '%s'", line.c_str()); |
|---|
| 387 | } |
|---|
| 388 | else { |
|---|
| 389 | string tag = line.substr(0, pos); |
|---|
| 390 | string content = line.substr(pos+1); |
|---|
| 391 | |
|---|
| 392 | if (tag == "rule") { |
|---|
| 393 | ErrorOrRulePtr rule = Rule::makeFromConfig(content.c_str()); |
|---|
| 394 | if (rule.hasError()) { |
|---|
| 395 | error = GBS_global_string("while reading rule from '%s': %s", |
|---|
| 396 | content.c_str(), |
|---|
| 397 | rule.getError().deliver()); |
|---|
| 398 | } |
|---|
| 399 | else { |
|---|
| 400 | ruleset->add(rule.getValue()); |
|---|
| 401 | } |
|---|
| 402 | } |
|---|
| 403 | else if (tag == "desc") { |
|---|
| 404 | const string& existing = ruleset->getComment(); |
|---|
| 405 | ruleset->setComment(existing.empty() ? content : existing+'\n'+content); |
|---|
| 406 | } |
|---|
| 407 | else if (tag == "transferUndef") { |
|---|
| 408 | ruleset->set_transferUndefFields(bool(atoi(content.c_str()))); |
|---|
| 409 | } |
|---|
| 410 | else { |
|---|
| 411 | error = GBS_global_string("unknown tag '%s' while parsing line '%s'", |
|---|
| 412 | tag.c_str(), |
|---|
| 413 | line.c_str()); |
|---|
| 414 | } |
|---|
| 415 | } |
|---|
| 416 | } |
|---|
| 417 | |
|---|
| 418 | if (error) ruleset.setNull(); |
|---|
| 419 | } |
|---|
| 420 | |
|---|
| 421 | return ErrorOrRuleSetPtr(error, ruleset); |
|---|
| 422 | } |
|---|
| 423 | |
|---|
| 424 | // -------------------------------- |
|---|
| 425 | // configuration of rules |
|---|
| 426 | |
|---|
| 427 | #define SOURCE "source" |
|---|
| 428 | #define ACI "aci" |
|---|
| 429 | #define TARGET "target" |
|---|
| 430 | #define SEP "sep" |
|---|
| 431 | #define TYPE "type" |
|---|
| 432 | #define LOSS "loss" |
|---|
| 433 | |
|---|
| 434 | #define PERMITTED "permitted" |
|---|
| 435 | |
|---|
| 436 | inline const char *type2str(GB_TYPES type) { |
|---|
| 437 | const char *str = NULp; |
|---|
| 438 | switch (type) { |
|---|
| 439 | case GB_STRING: str = "text"; break; |
|---|
| 440 | case GB_INT: str = "int"; break; |
|---|
| 441 | case GB_FLOAT: str = "float"; break; |
|---|
| 442 | case GB_BITS: str = "bits"; break; |
|---|
| 443 | case GB_NONE: str = "auto"; break; |
|---|
| 444 | default: break; |
|---|
| 445 | } |
|---|
| 446 | return str; |
|---|
| 447 | } |
|---|
| 448 | inline GB_TYPES str2type(const char *str) { |
|---|
| 449 | GB_TYPES type = GB_TYPE_MAX; // invalid |
|---|
| 450 | switch (str[0]) { |
|---|
| 451 | case 't': if (strcmp(str, "text") == 0) type = GB_STRING; break; |
|---|
| 452 | case 'i': if (strcmp(str, "int") == 0) type = GB_INT; break; |
|---|
| 453 | case 'f': if (strcmp(str, "float") == 0) type = GB_FLOAT; break; |
|---|
| 454 | case 'b': if (strcmp(str, "bits") == 0) type = GB_BITS; break; |
|---|
| 455 | case 'a': if (strcmp(str, "auto") == 0) type = GB_NONE; break; |
|---|
| 456 | } |
|---|
| 457 | return type; |
|---|
| 458 | } |
|---|
| 459 | |
|---|
| 460 | void ReadRule::saveReadConfig(ConfigMapping& cfgmap) const { |
|---|
| 461 | cfgmap.set_entry(SOURCE, fields); |
|---|
| 462 | if (separator != NOSEP) cfgmap.set_entry(SEP, separator); |
|---|
| 463 | if (!aci.empty()) cfgmap.set_entry(ACI, aci); |
|---|
| 464 | } |
|---|
| 465 | |
|---|
| 466 | void WriteRule::saveWriteConfig(ConfigMapping& cfgmap) const { |
|---|
| 467 | cfgmap.set_entry(TARGET, name); |
|---|
| 468 | if (forcesType()) { |
|---|
| 469 | cfgmap.set_entry(TYPE, type2str(getTargetType())); |
|---|
| 470 | } |
|---|
| 471 | } |
|---|
| 472 | string Rule::getConfig() const { |
|---|
| 473 | ConfigMapping cfgmap; |
|---|
| 474 | |
|---|
| 475 | saveReadConfig(cfgmap); |
|---|
| 476 | saveWriteConfig(cfgmap); |
|---|
| 477 | |
|---|
| 478 | if (precisionLossPermitted()) { |
|---|
| 479 | cfgmap.set_entry(LOSS, PERMITTED); |
|---|
| 480 | } |
|---|
| 481 | |
|---|
| 482 | return cfgmap.config_string(); |
|---|
| 483 | } |
|---|
| 484 | |
|---|
| 485 | ErrorOrRulePtr Rule::makeFromConfig(const char *config) { |
|---|
| 486 | RulePtr rule; |
|---|
| 487 | ConfigMapping cfgmap; |
|---|
| 488 | GB_ERROR error = cfgmap.parseFrom(config); |
|---|
| 489 | |
|---|
| 490 | if (!error) { |
|---|
| 491 | const char *source = cfgmap.get_entry(SOURCE); |
|---|
| 492 | const char *target = cfgmap.get_entry(TARGET); |
|---|
| 493 | const char *sep = cfgmap.get_entry(SEP); |
|---|
| 494 | |
|---|
| 495 | if (!source) error = "missing " SOURCE " entry"; |
|---|
| 496 | if (!target) error = "missing " TARGET " entry"; |
|---|
| 497 | |
|---|
| 498 | if (!sep) sep = NOSEP; // default to 'no separator' |
|---|
| 499 | |
|---|
| 500 | if (!error) { |
|---|
| 501 | const char *aci = cfgmap.get_entry(ACI); |
|---|
| 502 | if (aci) { |
|---|
| 503 | rule = makeAciConverter(source, sep, aci, target); |
|---|
| 504 | } |
|---|
| 505 | else { |
|---|
| 506 | rule = makeSimple(source, sep, target); |
|---|
| 507 | } |
|---|
| 508 | |
|---|
| 509 | const char *typeID = cfgmap.get_entry(TYPE); |
|---|
| 510 | if (typeID) { |
|---|
| 511 | GB_TYPES type = str2type(typeID); |
|---|
| 512 | if (type == GB_TYPE_MAX) { // = unknown type ID |
|---|
| 513 | error = GBS_global_string("invalid type id '%s'", typeID); |
|---|
| 514 | rule.setNull(); |
|---|
| 515 | } |
|---|
| 516 | else { |
|---|
| 517 | xf_assert(GB_TYPE_readable_as_string(type)); |
|---|
| 518 | rule->setTargetType(type); |
|---|
| 519 | } |
|---|
| 520 | } |
|---|
| 521 | |
|---|
| 522 | if (!error) { |
|---|
| 523 | const char *loss = cfgmap.get_entry(LOSS); |
|---|
| 524 | if (loss && strcmp(loss, PERMITTED) == 0) { |
|---|
| 525 | rule->permitPrecisionLoss(); |
|---|
| 526 | } |
|---|
| 527 | } |
|---|
| 528 | } |
|---|
| 529 | } |
|---|
| 530 | |
|---|
| 531 | return ErrorOrRulePtr(error, rule); |
|---|
| 532 | } |
|---|
| 533 | |
|---|
| 534 | |
|---|
| 535 | // ------------------------ |
|---|
| 536 | // describe rules |
|---|
| 537 | string ReadRule::describe() const { |
|---|
| 538 | if (aci.empty()) return fields; |
|---|
| 539 | return fields+"|ACI"; |
|---|
| 540 | } |
|---|
| 541 | string WriteRule::describe() const { |
|---|
| 542 | return name; |
|---|
| 543 | } |
|---|
| 544 | string Rule::getShortDescription() const { |
|---|
| 545 | return ReadRule::describe() + " -> " + WriteRule::describe(); |
|---|
| 546 | } |
|---|
| 547 | |
|---|
| 548 | // ----------------------------- |
|---|
| 549 | // ItemClonedByRuleSet |
|---|
| 550 | string ItemClonedByRuleSet::lastReportedError; |
|---|
| 551 | |
|---|
| 552 | GB_ERROR ItemClonedByRuleSet::overlayOrCloneSub(const char *subName, GBDATA *gb_sub) { |
|---|
| 553 | GBDATA *gb_existing = GB_entry(gb_clone, subName); |
|---|
| 554 | GB_ERROR error; |
|---|
| 555 | if (gb_existing) { // if target entry exists .. |
|---|
| 556 | error = GB_copy_overlay(gb_existing, gb_sub); // .. overwrite its content. |
|---|
| 557 | } |
|---|
| 558 | else { // otherwise .. |
|---|
| 559 | error = GB_incur_error_if(!GB_clone(gb_clone, gb_sub)); // .. clone source entry |
|---|
| 560 | } |
|---|
| 561 | return error; |
|---|
| 562 | } |
|---|
| 563 | |
|---|
| 564 | GB_ERROR ItemClonedByRuleSet::cloneMissingSub(const char *subName, GBDATA *gb_sub) { |
|---|
| 565 | GBDATA *gb_existing = GB_entry(gb_clone, subName); |
|---|
| 566 | GB_ERROR error; |
|---|
| 567 | if (gb_existing) { // if target entry exists .. |
|---|
| 568 | error = NULp; // .. keep it |
|---|
| 569 | } |
|---|
| 570 | else { // otherwise .. |
|---|
| 571 | error = GB_incur_error_if(!GB_clone(gb_clone, gb_sub)); // .. clone source entry |
|---|
| 572 | } |
|---|
| 573 | return error; |
|---|
| 574 | } |
|---|
| 575 | |
|---|
| 576 | GB_ERROR ItemClonedByRuleSet::copySubIfMissing(const char *subName) { |
|---|
| 577 | // copy sub-field (or -container) if it doesn't exist in target |
|---|
| 578 | GB_ERROR error = NULp; |
|---|
| 579 | GBDATA *gb_sub = GB_entry(gb_source, subName); |
|---|
| 580 | if (!gb_sub) { |
|---|
| 581 | error = GBS_global_string("no such entry '%s' (in source)", subName); |
|---|
| 582 | UNCOVERED(); |
|---|
| 583 | } |
|---|
| 584 | else { |
|---|
| 585 | error = cloneMissingSub(subName, gb_sub); // sub = passed field or container |
|---|
| 586 | } |
|---|
| 587 | return error; |
|---|
| 588 | } |
|---|
| 589 | |
|---|
| 590 | GB_ERROR ItemClonedByRuleSet::copyAlignments() { |
|---|
| 591 | GB_ERROR error = NULp; |
|---|
| 592 | for (GBDATA *gb_ali = GB_child(gb_source); gb_ali; gb_ali = GB_nextChild(gb_ali)) { |
|---|
| 593 | if (GB_is_container(gb_ali)) { |
|---|
| 594 | const char *aliname = GB_read_key_pntr(gb_ali); |
|---|
| 595 | if (ARB_strBeginsWith(aliname, "ali_")) { |
|---|
| 596 | GBDATA *gb_data = GB_entry(gb_ali, "data"); |
|---|
| 597 | if (gb_data) { |
|---|
| 598 | bool dataIsSTRING = GB_read_type(gb_data) == GB_STRING; |
|---|
| 599 | xf_assert(dataIsSTRING); |
|---|
| 600 | if (dataIsSTRING) { |
|---|
| 601 | error = overlayOrCloneSub(aliname, gb_ali); // sub = whole alignment container |
|---|
| 602 | } |
|---|
| 603 | } |
|---|
| 604 | else { |
|---|
| 605 | error = GB_incur_error(); |
|---|
| 606 | UNCOVERED(); |
|---|
| 607 | } |
|---|
| 608 | } |
|---|
| 609 | } |
|---|
| 610 | } |
|---|
| 611 | return error; |
|---|
| 612 | } |
|---|
| 613 | |
|---|
| 614 | const char *ItemClonedByRuleSet::get_id_field() const { |
|---|
| 615 | const char *field = NULp; |
|---|
| 616 | switch (itemtype) { |
|---|
| 617 | case CLONE_ITEM_SPECIES: field = "name"; break; |
|---|
| 618 | default: xf_assert(0); break; |
|---|
| 619 | } |
|---|
| 620 | return field; |
|---|
| 621 | } |
|---|
| 622 | |
|---|
| 623 | ItemClonedByRuleSet::ItemClonedByRuleSet(GBDATA*& gb_item, ClonableItemType itemtype_, RuleSetPtr ruleset, ItemCloneType type_, GBDATA *gb_refItem, const AlignmentTransporter *aliTransporter) : |
|---|
| 624 | itemtype(itemtype_), |
|---|
| 625 | gb_source(gb_item), |
|---|
| 626 | type(type_) |
|---|
| 627 | { |
|---|
| 628 | /*! clone or update item using ruleset. |
|---|
| 629 | * |
|---|
| 630 | * @param gb_item the source item (will be set to NULp if type_ is REPLACE_ITEM_BY_CLONE). |
|---|
| 631 | * @param itemtype_ currently always CLONE_ITEM_SPECIES. |
|---|
| 632 | * @param ruleset ruleset used to transfer fields from source item to cloned item |
|---|
| 633 | * @param type_ type of clone (see ItemCloneType for details). |
|---|
| 634 | * @param gb_refItem CLONE_INTO_EXISTING: target species, REAL_CLONE: target item container, otherwise: NULp |
|---|
| 635 | * @param aliTransporter allows to overide how alignment gets copied (default: copy all alignment sub-containers) |
|---|
| 636 | */ |
|---|
| 637 | |
|---|
| 638 | // @@@ method is far too long -> split |
|---|
| 639 | |
|---|
| 640 | GB_ERROR error = NULp; |
|---|
| 641 | GB_transaction ta(gb_source); |
|---|
| 642 | |
|---|
| 643 | #if defined(ASSERTION_USED) |
|---|
| 644 | checked4error = false; |
|---|
| 645 | userCallbackUsed = false; |
|---|
| 646 | #endif |
|---|
| 647 | |
|---|
| 648 | if (type == CLONE_INTO_EXISTING) { |
|---|
| 649 | if (gb_refItem) { |
|---|
| 650 | gb_clone = gb_refItem; // use passed clone as target |
|---|
| 651 | } |
|---|
| 652 | else { |
|---|
| 653 | error = "no target species specified (logic error)"; |
|---|
| 654 | UNCOVERED(); |
|---|
| 655 | } |
|---|
| 656 | } |
|---|
| 657 | else { |
|---|
| 658 | GBDATA *gb_item_container; |
|---|
| 659 | { |
|---|
| 660 | GBDATA *gb_src_item_container = GB_get_father(gb_source); |
|---|
| 661 | if (type == REAL_CLONE) { |
|---|
| 662 | gb_item_container = gb_refItem; |
|---|
| 663 | if (!gb_item_container) { |
|---|
| 664 | error = "no target item container specified (logic error)"; |
|---|
| 665 | } |
|---|
| 666 | else if (gb_item_container == gb_src_item_container) { |
|---|
| 667 | error = "source and target item containers need to differ (logic error)"; |
|---|
| 668 | } |
|---|
| 669 | } |
|---|
| 670 | else { |
|---|
| 671 | xf_assert(!gb_refItem); // passed value is ignored (please pass NULp) |
|---|
| 672 | gb_item_container = gb_src_item_container; |
|---|
| 673 | } |
|---|
| 674 | } |
|---|
| 675 | |
|---|
| 676 | if (!error) { |
|---|
| 677 | xf_assert(itemtype_ == CLONE_ITEM_SPECIES); // next command only works for species |
|---|
| 678 | gb_clone = GB_create_container(gb_item_container, "species"); // create separate species |
|---|
| 679 | if (!gb_clone) { |
|---|
| 680 | error = GB_await_error(); |
|---|
| 681 | UNCOVERED(); |
|---|
| 682 | } |
|---|
| 683 | } |
|---|
| 684 | } |
|---|
| 685 | |
|---|
| 686 | if (!error) { |
|---|
| 687 | // apply ruleset: |
|---|
| 688 | error = ruleset->transferBy(gb_source, gb_clone); |
|---|
| 689 | |
|---|
| 690 | // perform some standard transfers: |
|---|
| 691 | const char *IDFIELD = get_id_field(); |
|---|
| 692 | if (!error) error = copySubIfMissing(IDFIELD); // transfer IDFIELD for any itemtype |
|---|
| 693 | |
|---|
| 694 | switch (itemtype) { |
|---|
| 695 | case CLONE_ITEM_SPECIES: |
|---|
| 696 | if (!error) error = copySubIfMissing("acc"); |
|---|
| 697 | if (!error) { |
|---|
| 698 | if (aliTransporter) { // use user callback if given |
|---|
| 699 | if (aliTransporter->shallCopyBefore()) { |
|---|
| 700 | error = copyAlignments(); |
|---|
| 701 | } |
|---|
| 702 | if (!error) { |
|---|
| 703 | error = aliTransporter->transport(gb_source, gb_clone); // e.g. used to adapt alignment in mergetool |
|---|
| 704 | } |
|---|
| 705 | #if defined(ASSERTION_USED) |
|---|
| 706 | userCallbackUsed = true; |
|---|
| 707 | #endif |
|---|
| 708 | } |
|---|
| 709 | else { |
|---|
| 710 | error = copyAlignments(); |
|---|
| 711 | } |
|---|
| 712 | } |
|---|
| 713 | break; |
|---|
| 714 | default: xf_assert(0); break; |
|---|
| 715 | } |
|---|
| 716 | |
|---|
| 717 | if (!error && ruleset->shallTransferUndefFields()) { |
|---|
| 718 | |
|---|
| 719 | StrSet defined; |
|---|
| 720 | // extract used fields: |
|---|
| 721 | { |
|---|
| 722 | StrArray in, out; |
|---|
| 723 | ruleset->extractUsedFields(in, out); |
|---|
| 724 | // @@@ do extraction only once (not for each item transfer) |
|---|
| 725 | |
|---|
| 726 | StrArray2StrSet(in, defined); |
|---|
| 727 | StrArray2StrSet(out, defined); |
|---|
| 728 | } |
|---|
| 729 | { |
|---|
| 730 | // exclude parent containers: |
|---|
| 731 | StrSet parents; |
|---|
| 732 | for (StrSet::const_iterator field = defined.begin(); field != defined.end(); ++field) { |
|---|
| 733 | size_t slashpos = field->find_first_of('/'); |
|---|
| 734 | if (slashpos != string::npos) { // fieldname contains a slash |
|---|
| 735 | string parentname = field->substr(0, slashpos); // name of top-level parent container inside species |
|---|
| 736 | parents.insert(parentname); |
|---|
| 737 | } |
|---|
| 738 | } |
|---|
| 739 | defined.insert(parents.begin(), parents.end()); |
|---|
| 740 | } |
|---|
| 741 | |
|---|
| 742 | // transfer rest of fields (i.e. those neighter used by ruleset nor as standard field): |
|---|
| 743 | for (GBDATA *gb_field = GB_child(gb_source); gb_field && !error; gb_field = GB_nextChild(gb_field)) { |
|---|
| 744 | const char *key = GB_read_key_pntr(gb_field); |
|---|
| 745 | bool keyUsed = defined.find(key) != defined.end(); // key was read or written by ruleset |
|---|
| 746 | |
|---|
| 747 | if (!keyUsed) { |
|---|
| 748 | error = copySubIfMissing(key); |
|---|
| 749 | } |
|---|
| 750 | } |
|---|
| 751 | } |
|---|
| 752 | |
|---|
| 753 | // @@@ do we need to preserve security etc of cloned species? (security of sub-fields is preserved; e.g. see r17967) |
|---|
| 754 | |
|---|
| 755 | if (!error) { |
|---|
| 756 | xf_assert(correlated(aliTransporter, userCallbackUsed)); // custom transporter was not used (logic error?) |
|---|
| 757 | |
|---|
| 758 | switch (type) { |
|---|
| 759 | case REPLACE_ITEM_BY_CLONE: |
|---|
| 760 | error = GB_delete(gb_source); // will be replaced by clone |
|---|
| 761 | if (!error) { |
|---|
| 762 | gb_item = NULp; |
|---|
| 763 | gb_source = NULp; |
|---|
| 764 | } |
|---|
| 765 | break; |
|---|
| 766 | |
|---|
| 767 | case RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS: { |
|---|
| 768 | GBDATA *gb_id = GB_entry(gb_source, IDFIELD); |
|---|
| 769 | if (!gb_id) { |
|---|
| 770 | error = GBS_global_string("expected field '%s' not found", IDFIELD); |
|---|
| 771 | } |
|---|
| 772 | else { |
|---|
| 773 | const char *name = GB_read_char_pntr(gb_id); |
|---|
| 774 | xf_assert(name); |
|---|
| 775 | orgName = name; // store name (written back in dtor) |
|---|
| 776 | |
|---|
| 777 | error = GB_write_string(gb_id, "fake"); // change name // @@@ use different name |
|---|
| 778 | } |
|---|
| 779 | break; |
|---|
| 780 | } |
|---|
| 781 | case CLONE_INTO_EXISTING: |
|---|
| 782 | case REAL_CLONE: |
|---|
| 783 | // nothing to do here |
|---|
| 784 | break; |
|---|
| 785 | } |
|---|
| 786 | } |
|---|
| 787 | } |
|---|
| 788 | |
|---|
| 789 | error = ta.close(error); |
|---|
| 790 | if (error) { |
|---|
| 791 | errorCopy = error; // store copy of error in string |
|---|
| 792 | gb_clone = NULp; |
|---|
| 793 | } |
|---|
| 794 | } |
|---|
| 795 | |
|---|
| 796 | ItemClonedByRuleSet::~ItemClonedByRuleSet() { |
|---|
| 797 | if (!has_error()) { // if error occurred during construction -> TA was aborted -> nothing to undo |
|---|
| 798 | if (type == RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS) { |
|---|
| 799 | GB_transaction ta(gb_source); |
|---|
| 800 | GB_ERROR error = NULp; |
|---|
| 801 | |
|---|
| 802 | GBDATA *gb_id = GB_entry(gb_source, get_id_field()); |
|---|
| 803 | xf_assert(gb_id); |
|---|
| 804 | |
|---|
| 805 | if (gb_id) { |
|---|
| 806 | xf_assert(!orgName.empty()); |
|---|
| 807 | error = GB_write_string(gb_id, orgName.c_str()); |
|---|
| 808 | if (error) { |
|---|
| 809 | fprintf(stderr, "Failed to rename original item after temp. clone (Reason: %s)", error); |
|---|
| 810 | xf_assert(0); // should not happen |
|---|
| 811 | } |
|---|
| 812 | } |
|---|
| 813 | |
|---|
| 814 | // delete temp clone: |
|---|
| 815 | if (!error) { |
|---|
| 816 | error = GB_delete(gb_clone); |
|---|
| 817 | if (error) { |
|---|
| 818 | fprintf(stderr, "Failed to delete temp. clone (Reason: %s)", error); |
|---|
| 819 | xf_assert(0); // should not happen |
|---|
| 820 | } |
|---|
| 821 | } |
|---|
| 822 | } |
|---|
| 823 | } |
|---|
| 824 | } |
|---|
| 825 | |
|---|
| 826 | }; |
|---|
| 827 | |
|---|
| 828 | // -------------------------------------------------------------------------------- |
|---|
| 829 | |
|---|
| 830 | #ifdef UNIT_TESTS |
|---|
| 831 | |
|---|
| 832 | #include <arb_diff.h> |
|---|
| 833 | #include <arb_file.h> |
|---|
| 834 | #include <arb_defs.h> |
|---|
| 835 | |
|---|
| 836 | #ifndef TEST_UNIT_H |
|---|
| 837 | #include <test_unit.h> |
|---|
| 838 | #endif |
|---|
| 839 | |
|---|
| 840 | void TEST_type2id() { |
|---|
| 841 | using namespace FieldTransfer; |
|---|
| 842 | |
|---|
| 843 | for (GB_TYPES t = GB_NONE; t<=GB_TYPE_MAX; t = GB_TYPES(t+1)) { |
|---|
| 844 | const char *id = type2str(t); |
|---|
| 845 | if (id) { |
|---|
| 846 | TEST_ANNOTATE(id); |
|---|
| 847 | TEST_EXPECT_EQUAL(str2type(id), t); |
|---|
| 848 | } |
|---|
| 849 | } |
|---|
| 850 | } |
|---|
| 851 | void TEST_transportedData() { |
|---|
| 852 | using namespace FieldTransfer; |
|---|
| 853 | |
|---|
| 854 | GB_ERROR error; |
|---|
| 855 | { |
|---|
| 856 | TransportedData noData = TransportedData::none(); |
|---|
| 857 | TransportedData errorData = TransportedData::makeError("the error msg"); |
|---|
| 858 | |
|---|
| 859 | TEST_REJECT(noData.failed()); |
|---|
| 860 | TEST_REJECT(noData.exists()); |
|---|
| 861 | |
|---|
| 862 | TEST_EXPECT(errorData.failed()); |
|---|
| 863 | error = errorData.getError(); |
|---|
| 864 | TEST_EXPECT_EQUAL(error, "the error msg"); |
|---|
| 865 | } |
|---|
| 866 | TEST_EXPECT_EQUAL(error, "the error msg"); // error has to survive destruction of TransportedData |
|---|
| 867 | |
|---|
| 868 | TransportedData greet("hello"); |
|---|
| 869 | TransportedData num(4711); |
|---|
| 870 | TransportedData fnum(0.815f); |
|---|
| 871 | |
|---|
| 872 | TEST_REJECT(greet.failed()); |
|---|
| 873 | TEST_EXPECT(greet.exists()); |
|---|
| 874 | TEST_EXPECT_EQUAL(greet.getString(), "hello"); |
|---|
| 875 | |
|---|
| 876 | TEST_REJECT(num.failed()); |
|---|
| 877 | TEST_EXPECT(num.exists()); |
|---|
| 878 | TEST_EXPECT_EQUAL(num.getInt(), 4711); |
|---|
| 879 | |
|---|
| 880 | TEST_REJECT(fnum.failed()); |
|---|
| 881 | TEST_EXPECT(fnum.exists()); |
|---|
| 882 | TEST_EXPECT_SIMILAR(fnum.getFloat(), 0.815, 0.000001); |
|---|
| 883 | } |
|---|
| 884 | |
|---|
| 885 | void TEST_xferset() { |
|---|
| 886 | FieldTransfer::RuleSet fts; |
|---|
| 887 | |
|---|
| 888 | TEST_EXPECT_ZERO(fts.size()); |
|---|
| 889 | TEST_EXPECT(fts.empty()); |
|---|
| 890 | TEST_EXPECT_EQUAL(fts.size(), 0); |
|---|
| 891 | |
|---|
| 892 | // -------------------- |
|---|
| 893 | // add rules: |
|---|
| 894 | fts.add(new FieldTransfer::Rule(FieldTransfer::ReadRule("location", NOSEP), // add a simple rule (one source, no ACI) |
|---|
| 895 | FieldTransfer::WriteRule("geolocation"))); |
|---|
| 896 | TEST_EXPECT(!fts.empty()); |
|---|
| 897 | TEST_EXPECT_EQUAL(fts.size(), 1); |
|---|
| 898 | |
|---|
| 899 | fts.add(FieldTransfer::Rule::permitPrecisionLoss(new FieldTransfer::Rule(FieldTransfer::ReadRule("isolation", NOSEP, "upper"), // add an ACI rule (one source) |
|---|
| 900 | FieldTransfer::WriteRule("isolation_source", GB_INT)))); // force int type |
|---|
| 901 | TEST_EXPECT_EQUAL(fts.size(), 2); |
|---|
| 902 | |
|---|
| 903 | // @@@ add multisource rules (with and w/o ACI)! |
|---|
| 904 | |
|---|
| 905 | // --------------------- |
|---|
| 906 | // query rules |
|---|
| 907 | for (size_t r = 0; r<fts.size(); ++r) { |
|---|
| 908 | TEST_ANNOTATE(GBS_global_string("r=%zu", r)); |
|---|
| 909 | |
|---|
| 910 | const FieldTransfer::Rule& rule = fts.get(r); |
|---|
| 911 | switch (r) { |
|---|
| 912 | // @@@ add tests for source field(s) |
|---|
| 913 | |
|---|
| 914 | case 0: // simple rule |
|---|
| 915 | |
|---|
| 916 | TEST_EXPECT_EQUAL(rule.targetField(), "geolocation"); |
|---|
| 917 | TEST_REJECT(rule.forcesType()); |
|---|
| 918 | TEST_REJECT(rule.precisionLossPermitted()); |
|---|
| 919 | break; |
|---|
| 920 | |
|---|
| 921 | case 1: // basic ACI rule |
|---|
| 922 | |
|---|
| 923 | TEST_EXPECT_EQUAL(rule.targetField(), "isolation_source"); |
|---|
| 924 | TEST_EXPECT(rule.forcesType()); // type is forced .. |
|---|
| 925 | TEST_EXPECT_EQUAL(rule.getTargetType(), GB_INT); // .. to int |
|---|
| 926 | TEST_EXPECT(rule.precisionLossPermitted()); |
|---|
| 927 | break; |
|---|
| 928 | |
|---|
| 929 | default: |
|---|
| 930 | xf_assert(0); // untested rule |
|---|
| 931 | break; |
|---|
| 932 | } |
|---|
| 933 | } |
|---|
| 934 | TEST_ANNOTATE(NULp); |
|---|
| 935 | TEST_EXPECT_EQUAL(fts.size(), 2); |
|---|
| 936 | |
|---|
| 937 | // ------------------------------------------- |
|---|
| 938 | // test rule replacement and removal |
|---|
| 939 | { |
|---|
| 940 | // order=01 |
|---|
| 941 | string cfg0 = fts.get(0).getConfig(); |
|---|
| 942 | string cfg1 = fts.get(1).getConfig(); |
|---|
| 943 | |
|---|
| 944 | TEST_EXPECT_DIFFERENT(cfg0, cfg1); // otherwise test below does not test replacement |
|---|
| 945 | |
|---|
| 946 | { |
|---|
| 947 | // swap 2 rules |
|---|
| 948 | FieldTransfer::RulePtr tmp = fts.getPtr(0); |
|---|
| 949 | fts.replace(0, fts.getPtr(1)); |
|---|
| 950 | fts.replace(1, tmp); |
|---|
| 951 | // order=10 |
|---|
| 952 | } |
|---|
| 953 | |
|---|
| 954 | string newcfg0 = fts.get(0).getConfig(); |
|---|
| 955 | string newcfg1 = fts.get(1).getConfig(); |
|---|
| 956 | |
|---|
| 957 | TEST_EXPECT_EQUAL(newcfg0, cfg1); |
|---|
| 958 | TEST_EXPECT_EQUAL(newcfg1, cfg0); |
|---|
| 959 | |
|---|
| 960 | { |
|---|
| 961 | int insertedAt; |
|---|
| 962 | |
|---|
| 963 | insertedAt = fts.insertBefore(0, fts.getPtr(1)); // insert before first -> order = 010 |
|---|
| 964 | TEST_EXPECT_EQUAL(fts.size(), 3); |
|---|
| 965 | TEST_EXPECT_EQUAL(insertedAt, 0); |
|---|
| 966 | TEST_EXPECT_EQUAL(fts.get(insertedAt).getConfig(), cfg0); |
|---|
| 967 | |
|---|
| 968 | insertedAt = fts.insertBefore(2, fts.getPtr(1)); // insert before last -> order = 0110 |
|---|
| 969 | TEST_EXPECT_EQUAL(fts.size(), 4); |
|---|
| 970 | TEST_EXPECT_EQUAL(insertedAt, 2); |
|---|
| 971 | TEST_EXPECT_EQUAL(fts.get(insertedAt).getConfig(), cfg1); |
|---|
| 972 | |
|---|
| 973 | insertedAt = fts.insertBefore(7, fts.getPtr(1)); // insert before invalid position = append -> order = 01101 |
|---|
| 974 | TEST_EXPECT_EQUAL(fts.size(), 5); |
|---|
| 975 | TEST_EXPECT_EQUAL(insertedAt, 4); |
|---|
| 976 | TEST_EXPECT_EQUAL(fts.get(insertedAt).getConfig(), cfg1); |
|---|
| 977 | |
|---|
| 978 | // "undo" inserts |
|---|
| 979 | fts.erase(1); // -> order = 0101 |
|---|
| 980 | fts.erase(3); // erase at end -> order = 010 |
|---|
| 981 | fts.erase(0); // -> order = 10 |
|---|
| 982 | } |
|---|
| 983 | |
|---|
| 984 | fts.erase(0); // erase 1st rule -> order = 0 |
|---|
| 985 | TEST_EXPECT_EQUAL(fts.size(), 1); |
|---|
| 986 | string finalcfg = fts.get(0).getConfig(); |
|---|
| 987 | TEST_EXPECT_EQUAL(finalcfg, cfg0); |
|---|
| 988 | } |
|---|
| 989 | } |
|---|
| 990 | |
|---|
| 991 | class FailingRule: public FieldTransfer::Rule { |
|---|
| 992 | string partOfFailReason; |
|---|
| 993 | public: |
|---|
| 994 | FailingRule(const Rule& failing, string part) : Rule(failing), partOfFailReason(part) {} |
|---|
| 995 | FailingRule(FieldTransfer::RulePtr failing, string part) : Rule(*failing), partOfFailReason(part) {} |
|---|
| 996 | const char *expectedPartOfFailure() const { return partOfFailReason.c_str(); } |
|---|
| 997 | }; |
|---|
| 998 | |
|---|
| 999 | |
|---|
| 1000 | struct XferEnv : virtual Noncopyable { // provides test environment for transfer tests |
|---|
| 1001 | GB_shell shell; |
|---|
| 1002 | |
|---|
| 1003 | const char *target_ascii; |
|---|
| 1004 | |
|---|
| 1005 | GBDATA *gb_src; |
|---|
| 1006 | GBDATA *gb_dest; |
|---|
| 1007 | |
|---|
| 1008 | XferEnv() : |
|---|
| 1009 | target_ascii("TEST_fields_xferred.arb") |
|---|
| 1010 | { |
|---|
| 1011 | gb_src = GB_open("TEST_fields_ascii.arb", "r"); // ../../UNIT_TESTER/run/TEST_fields_ascii.arb |
|---|
| 1012 | gb_dest = GB_open(target_ascii, "wc"); |
|---|
| 1013 | } |
|---|
| 1014 | ~XferEnv() { |
|---|
| 1015 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(target_ascii)); |
|---|
| 1016 | |
|---|
| 1017 | GB_close(gb_dest); |
|---|
| 1018 | GB_close(gb_src); |
|---|
| 1019 | } |
|---|
| 1020 | |
|---|
| 1021 | void transferAllSpeciesBy(const FieldTransfer::RuleSet& ruleset) { // transfer all species according to Ruleset |
|---|
| 1022 | // @@@ transferAllSpeciesBy is quite similar to what has to happen in merge-tool |
|---|
| 1023 | GB_transaction tas(gb_src); |
|---|
| 1024 | GB_transaction tad(gb_dest); |
|---|
| 1025 | |
|---|
| 1026 | for (GBDATA *gb_src_species = GBT_first_species(gb_src); |
|---|
| 1027 | gb_src_species; |
|---|
| 1028 | gb_src_species = GBT_next_species(gb_src_species)) |
|---|
| 1029 | { |
|---|
| 1030 | const char *name = GBT_get_name(gb_src_species); |
|---|
| 1031 | TEST_REJECT_NULL(name); |
|---|
| 1032 | |
|---|
| 1033 | GBDATA *gb_dest_species = GBT_find_or_create_species(gb_dest, name, false); |
|---|
| 1034 | TEST_REJECT_NULL(gb_dest_species); |
|---|
| 1035 | |
|---|
| 1036 | // @@@ transferAllSpeciesBy could allow overwrites (keep existing fields; allow overwrite of fields): |
|---|
| 1037 | // -> try to use ItemClonedByRuleSet with CLONE_INTO_EXISTING instead of direct call to transferBy below! |
|---|
| 1038 | |
|---|
| 1039 | TEST_EXPECT_ZERO(GB_read_flag(gb_dest_species)); // (was previously done by GBT_find_or_create_species) |
|---|
| 1040 | TEST_EXPECT_NO_ERROR(ruleset.transferBy(gb_src_species, gb_dest_species)); |
|---|
| 1041 | } |
|---|
| 1042 | } |
|---|
| 1043 | |
|---|
| 1044 | // ---------------------------------------------------------------------------- |
|---|
| 1045 | |
|---|
| 1046 | void copyAllSpecies() { |
|---|
| 1047 | GB_transaction tas(gb_src); |
|---|
| 1048 | GB_transaction tad(gb_dest); |
|---|
| 1049 | |
|---|
| 1050 | GBDATA *gb_dest_species_data = GBT_get_species_data(gb_dest); |
|---|
| 1051 | TEST_REJECT_NULL(gb_dest_species_data); |
|---|
| 1052 | |
|---|
| 1053 | for (GBDATA *gb_src_species = GBT_first_species(gb_src); |
|---|
| 1054 | gb_src_species; |
|---|
| 1055 | gb_src_species = GBT_next_species(gb_src_species)) |
|---|
| 1056 | { |
|---|
| 1057 | const char *name = GBT_get_name(gb_src_species); |
|---|
| 1058 | TEST_REJECT_NULL(name); |
|---|
| 1059 | |
|---|
| 1060 | GBDATA *gb_dest_exists = GBT_find_species(gb_dest, name); |
|---|
| 1061 | TEST_EXPECT_NULL(gb_dest_exists); // this method cannot handle overwrites |
|---|
| 1062 | |
|---|
| 1063 | GBDATA *gb_dest_species = GB_create_container(gb_dest_species_data, "species"); |
|---|
| 1064 | TEST_REJECT_NULL(gb_dest_species); |
|---|
| 1065 | |
|---|
| 1066 | TEST_EXPECT_NO_ERROR(GB_copy_dropProtectMarksAndTempstate(gb_dest_species, gb_src_species)); |
|---|
| 1067 | TEST_EXPECT_ZERO(GB_read_flag(gb_dest_species)); |
|---|
| 1068 | } |
|---|
| 1069 | } |
|---|
| 1070 | |
|---|
| 1071 | // ---------------------------------------------------------------------------- |
|---|
| 1072 | // write transferred data to ascii db + compare with expected result: |
|---|
| 1073 | void save() { |
|---|
| 1074 | TEST_EXPECT_NO_ERROR(GB_save_as(gb_dest, target_ascii, "a")); |
|---|
| 1075 | } |
|---|
| 1076 | void saveAndCompare(const char *expected_ascii, bool allowAutoUpdate) { |
|---|
| 1077 | save(); |
|---|
| 1078 | if (allowAutoUpdate) { |
|---|
| 1079 | // #define TEST_AUTO_UPDATE // uncomment to update expected result |
|---|
| 1080 | #if defined(TEST_AUTO_UPDATE) |
|---|
| 1081 | TEST_COPY_FILE(target_ascii, expected_ascii); |
|---|
| 1082 | #endif |
|---|
| 1083 | } |
|---|
| 1084 | TEST_EXPECT_TEXTFILE_DIFFLINES(target_ascii, expected_ascii, 0); |
|---|
| 1085 | } |
|---|
| 1086 | }; |
|---|
| 1087 | |
|---|
| 1088 | static const char *expRuleConfig[] = { |
|---|
| 1089 | "source='lat_lon';target='geolocation'", |
|---|
| 1090 | "source='seq_quality_slv';target='seq/slv_quality'", |
|---|
| 1091 | "source='homop_slv';target='slv_homop'", |
|---|
| 1092 | |
|---|
| 1093 | "source='no1';target='notTransferred'", |
|---|
| 1094 | |
|---|
| 1095 | "source='pubmed_id';target='str2int';type='int'", |
|---|
| 1096 | "source='pubmed_id';target='str2flt';type='float'", |
|---|
| 1097 | "source='stop';target='int2flt';type='float'", |
|---|
| 1098 | "source='stop';target='int2str';type='text'", |
|---|
| 1099 | "source='align_ident_slv';target='flt2str';type='text'", |
|---|
| 1100 | "loss='permitted';source='align_ident_slv';target='flt2int';type='int'", |
|---|
| 1101 | |
|---|
| 1102 | "aci='|lower|contains(partial)|isAbove(0)';source='description';target='describedAsPartial'", |
|---|
| 1103 | |
|---|
| 1104 | "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscoreStr'", |
|---|
| 1105 | "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscore';type='int'", |
|---|
| 1106 | "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscoreFlt';type='float'", |
|---|
| 1107 | |
|---|
| 1108 | "aci='|fmult(3.5)';source='homop_slv';target='multiHomopStr'", |
|---|
| 1109 | "aci='|fmult(3.5)';source='homop_slv';target='multiHomopInt';type='int'", |
|---|
| 1110 | "aci='|fmult(3.5)';source='homop_slv';target='multiHomop';type='float'", |
|---|
| 1111 | |
|---|
| 1112 | "sep='/';source='embl_class;embl_division';target='embl_class_division'", |
|---|
| 1113 | "sep='-';source='align_startpos_slv;align_stoppos_slv';target='align_range_slv'", |
|---|
| 1114 | "sep='\\'';source='no1;align_bp_score_slv;no2;rel_ltp;no3';target='missing'", |
|---|
| 1115 | |
|---|
| 1116 | "sep=';';source='NO1;no2;no3';target='skipped'", |
|---|
| 1117 | |
|---|
| 1118 | "aci='|upper';sep=':';source='embl_class;embl_division';target='emblClassDivision'", |
|---|
| 1119 | "aci='|\"<\";dd;\">\"';sep=';';source='no1;no2;no3';target='skipped2'", |
|---|
| 1120 | }; |
|---|
| 1121 | |
|---|
| 1122 | static const char *EXPECTED_ASCII = "TEST_fields_xferred_expected.arb"; // ../../UNIT_TESTER/run/TEST_fields_xferred_expected.arb |
|---|
| 1123 | static const char *EXPECTED_ASCII_CLONED = "TEST_fields_cloned_expected.arb"; // ../../UNIT_TESTER/run/TEST_fields_cloned_expected.arb |
|---|
| 1124 | |
|---|
| 1125 | void TEST_xferBySet() { |
|---|
| 1126 | // tests data transfer between items using RuleSet|s |
|---|
| 1127 | using namespace FieldTransfer; |
|---|
| 1128 | XferEnv env; |
|---|
| 1129 | |
|---|
| 1130 | // -------------------------------------------------------------- |
|---|
| 1131 | // create rules and transfer item data using RuleSet|s: |
|---|
| 1132 | |
|---|
| 1133 | typedef std::vector<FailingRule> FailingRuleCont; |
|---|
| 1134 | |
|---|
| 1135 | RuleSet ruleset; |
|---|
| 1136 | FailingRuleCont failing; // failing rules should go here |
|---|
| 1137 | |
|---|
| 1138 | #define FAILING_add(rule,msg) failing.push_back(FailingRule(rule, msg)) |
|---|
| 1139 | |
|---|
| 1140 | ruleset.add(Rule::makeSimple("lat_lon", NOSEP, "geolocation")); // STRING->STRING |
|---|
| 1141 | ruleset.add(Rule::makeSimple("seq_quality_slv", NOSEP, "seq/slv_quality")); // INT ->INT (generate hierarchical target key) |
|---|
| 1142 | ruleset.add(Rule::makeSimple("homop_slv", NOSEP, "slv_homop")); // FLOAT ->FLOAT |
|---|
| 1143 | |
|---|
| 1144 | ruleset.add(Rule::makeSimple("no1", NOSEP, "notTransferred")); // missing fields are skipped |
|---|
| 1145 | |
|---|
| 1146 | // force target types |
|---|
| 1147 | ruleset.add(Rule::forceTargetType(GB_INT, Rule::makeSimple("pubmed_id", NOSEP, "str2int"))); // STRING->INT |
|---|
| 1148 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeSimple("pubmed_id", NOSEP, "str2flt"))); // STRING->FLOAT |
|---|
| 1149 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeSimple("stop", NOSEP, "int2flt"))); // INT->FLOAT |
|---|
| 1150 | ruleset.add(Rule::forceTargetType(GB_STRING, Rule::makeSimple("stop", NOSEP, "int2str"))); // INT->STRING |
|---|
| 1151 | ruleset.add(Rule::forceTargetType(GB_STRING, Rule::makeSimple("align_ident_slv", NOSEP, "flt2str"))); // FLOAT->STRING |
|---|
| 1152 | FAILING_add(Rule::forceTargetType(GB_INT, Rule::makeSimple("align_ident_slv", NOSEP, "dummy")), "lossy float->int type conversion (9.484605e+01->9.500000e+01)"); // FLOAT->INT |
|---|
| 1153 | ruleset.add(Rule::permitPrecisionLoss(Rule::forceTargetType(GB_INT, Rule::makeSimple("align_ident_slv", NOSEP, "flt2int")))); // FLOAT->INT |
|---|
| 1154 | // @@@ test forcedTargetType(GB_BITS) |
|---|
| 1155 | |
|---|
| 1156 | // @@@ test transfer with existing target keys (and mismatching i.e. not default types) |
|---|
| 1157 | // -> shall force type conversion (e.g. int->float, float->string, string->int) |
|---|
| 1158 | |
|---|
| 1159 | ruleset.add(Rule::makeAciConverter("description", NOSEP, "|lower|contains(partial)|isAbove(0)", "describedAsPartial")); // transports STRING through ACI to STRING |
|---|
| 1160 | |
|---|
| 1161 | // INT | ACI -> STRING|INT|FLOAT: |
|---|
| 1162 | ruleset.add(Rule::makeAciConverter("align_bp_score_slv", NOSEP, "|fdiv(2.0)", "halfBPscoreStr")); // transports INT through ACI to STRING |
|---|
| 1163 | ruleset.add(Rule::forceTargetType(GB_INT, Rule::makeAciConverter("align_bp_score_slv", NOSEP, "|fdiv(2.0)", "halfBPscore"))); // transports INT through ACI to INT // @@@ why does this not complain about conversion loss? (e.g. PurGergo 58.5 -> 58). examine!!! |
|---|
| 1164 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeAciConverter("align_bp_score_slv", NOSEP, "|fdiv(2.0)", "halfBPscoreFlt"))); // transports INT through ACI to FLOAT |
|---|
| 1165 | |
|---|
| 1166 | // FLOAT | ACI -> STRING: |
|---|
| 1167 | ruleset.add(Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5)", "multiHomopStr")); // transports FLOAT through ACI to STRING |
|---|
| 1168 | ruleset.add(Rule::forceTargetType(GB_INT, Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5)", "multiHomopInt"))); // transports FLOAT through ACI to INT // @@@ conversion loss happens (for all species?) // @@@ examine! |
|---|
| 1169 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5)", "multiHomop"))); // transports FLOAT through ACI to FLOAT |
|---|
| 1170 | |
|---|
| 1171 | // @@@ test ACIs containing the following chars: ['"\\ = ] |
|---|
| 1172 | |
|---|
| 1173 | // test concatenating rules: |
|---|
| 1174 | ruleset.add(Rule::makeSimple("embl_class;embl_division", "/", "embl_class_division")); // concat 2 STRINGs |
|---|
| 1175 | ruleset.add(Rule::makeSimple("align_startpos_slv;align_stoppos_slv", "-", "align_range_slv")); // concat 2 INTs |
|---|
| 1176 | ruleset.add(Rule::makeSimple("no1;align_bp_score_slv;no2;rel_ltp;no3", "'", "missing")); // concat INT + STRING (plus 3 non-existing fields) |
|---|
| 1177 | |
|---|
| 1178 | ruleset.add(Rule::makeSimple("NO1;no2;no3", ";", "skipped")); // concat 3 non-existing fields -> field 'skipped' is NOT written to result DB |
|---|
| 1179 | |
|---|
| 1180 | // test concatenation + ACI: |
|---|
| 1181 | ruleset.add(Rule::makeAciConverter("embl_class;embl_division", ":", "|upper", "emblClassDivision")); // concat 2 STRINGs |
|---|
| 1182 | ruleset.add(Rule::makeAciConverter("no1;no2;no3", ";", "|\"<\";dd;\">\"", "skipped2")); // concat 3 non-existing fields and apply ACI -> field 'skipped2' is NOT written to result DB + ACI not applied |
|---|
| 1183 | |
|---|
| 1184 | // ---------------------------------------------------------------------------- |
|---|
| 1185 | // please do not change 'ruleset' below this point |
|---|
| 1186 | // ---------------------------------------------------------------------------- |
|---|
| 1187 | |
|---|
| 1188 | // test input/output field extraction |
|---|
| 1189 | { |
|---|
| 1190 | StrArray input; |
|---|
| 1191 | StrArray output; |
|---|
| 1192 | |
|---|
| 1193 | ruleset.extractUsedFields(input, output); |
|---|
| 1194 | |
|---|
| 1195 | TEST_EXPECT_STRARRAY_CONTAINS(input, ';', "align_bp_score_slv;align_ident_slv;align_startpos_slv;align_stoppos_slv;description;embl_class;embl_division;homop_slv;lat_lon;no1;no2;no3;pubmed_id;rel_ltp;seq_quality_slv;stop"); |
|---|
| 1196 | TEST_EXPECT_STRARRAY_CONTAINS(output, ';', "align_range_slv;describedAsPartial;emblClassDivision;embl_class_division;flt2int;flt2str;geolocation;halfBPscore;halfBPscoreFlt;halfBPscoreStr;int2flt;int2str;missing;multiHomop;multiHomopInt;multiHomopStr;notTransferred;seq/slv_quality;skipped;skipped2;slv_homop;str2flt;str2int"); |
|---|
| 1197 | } |
|---|
| 1198 | |
|---|
| 1199 | // convert all rules in 'ruleset' into string and test versus expRuleConfig: |
|---|
| 1200 | const size_t cfgs = ARRAY_ELEMS(expRuleConfig); |
|---|
| 1201 | const size_t rulz = ruleset.size(); |
|---|
| 1202 | { |
|---|
| 1203 | const size_t testableRepr = min(cfgs, rulz); |
|---|
| 1204 | for (size_t r = 0; r<testableRepr; ++r) { |
|---|
| 1205 | TEST_ANNOTATE(GBS_global_string("r=%zu", r)); |
|---|
| 1206 | const Rule& rule = ruleset.get(r); |
|---|
| 1207 | string rep = rule.getConfig(); |
|---|
| 1208 | TEST_EXPECT_EQUAL(expRuleConfig[r], rep.c_str()); |
|---|
| 1209 | } |
|---|
| 1210 | } |
|---|
| 1211 | |
|---|
| 1212 | TEST_EXPECT_EQUAL(cfgs, rulz); |
|---|
| 1213 | |
|---|
| 1214 | // test no 2 rules have equal config: |
|---|
| 1215 | for (size_t r1 = 0; r1<rulz; ++r1) { |
|---|
| 1216 | for (size_t r2 = r1+1; r2<rulz; ++r2) { |
|---|
| 1217 | TEST_ANNOTATE(GBS_global_string("r1/r2=%zu/%zu", r1, r2)); |
|---|
| 1218 | TEST_EXPECT_DIFFERENT(expRuleConfig[r1], expRuleConfig[r2]); |
|---|
| 1219 | } |
|---|
| 1220 | } |
|---|
| 1221 | TEST_ANNOTATE(NULp); |
|---|
| 1222 | |
|---|
| 1223 | env.transferAllSpeciesBy(ruleset); |
|---|
| 1224 | |
|---|
| 1225 | // ------------------------------------------- |
|---|
| 1226 | // test missing source-/target-item: |
|---|
| 1227 | { |
|---|
| 1228 | GB_transaction tas(env.gb_src); |
|---|
| 1229 | GB_transaction tad(env.gb_dest); |
|---|
| 1230 | |
|---|
| 1231 | GBDATA *gb_src_species = GBT_first_species(env.gb_src); |
|---|
| 1232 | TEST_REJECT_NULL(gb_src_species); |
|---|
| 1233 | |
|---|
| 1234 | const char *name = GBT_get_name(gb_src_species); |
|---|
| 1235 | TEST_REJECT_NULL(name); |
|---|
| 1236 | |
|---|
| 1237 | GBDATA *gb_dest_species = GBT_find_species(env.gb_dest, name); // already has been created by 'transferAllSpeciesBy' above |
|---|
| 1238 | TEST_REJECT_NULL(gb_dest_species); |
|---|
| 1239 | |
|---|
| 1240 | TEST_EXPECT_ERROR_CONTAINS(ruleset.transferBy(NULp, gb_dest_species), "lacking item to readFrom"); |
|---|
| 1241 | TEST_EXPECT_ERROR_CONTAINS(ruleset.transferBy(gb_src_species, NULp), "lacking item to writeTo"); |
|---|
| 1242 | } |
|---|
| 1243 | |
|---|
| 1244 | // --------------------------------------------- |
|---|
| 1245 | // test rules failing during transfer: |
|---|
| 1246 | FAILING_add(Rule::forceTargetType(GB_FLOAT, Rule::makeSimple("nuc_region", NOSEP, "str2flt")), "cannot convert '1..1494' to float"); // test conversion errors (e.g. non-numeric string -> int or float) |
|---|
| 1247 | FAILING_add(Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5, ooo)", "dummy"), "Unknown command '3.5'"); |
|---|
| 1248 | FAILING_add(Rule::makeSimple("stop", NOSEP, "xx*xx"), "Invalid character '*' in 'xx*xx'"); |
|---|
| 1249 | FAILING_add(Rule::makeSimple("ali_16s", NOSEP, "whatever"), "cannot read as data ('ali_16s' is a container)"); |
|---|
| 1250 | |
|---|
| 1251 | for (FailingRuleCont::const_iterator failRule = failing.begin(); failRule != failing.end(); ++failRule) { |
|---|
| 1252 | const FailingRule& testableRule = *failRule; |
|---|
| 1253 | RuleSet separated; |
|---|
| 1254 | separated.add(new Rule(testableRule)); |
|---|
| 1255 | |
|---|
| 1256 | // apply rule: |
|---|
| 1257 | { |
|---|
| 1258 | GB_transaction tas(env.gb_src); |
|---|
| 1259 | GB_transaction tad(env.gb_dest); |
|---|
| 1260 | |
|---|
| 1261 | GB_ERROR error = NULp; |
|---|
| 1262 | |
|---|
| 1263 | for (GBDATA *gb_src_species = GBT_first_species(env.gb_src); |
|---|
| 1264 | gb_src_species && !error; |
|---|
| 1265 | gb_src_species = GBT_next_species(gb_src_species)) |
|---|
| 1266 | { |
|---|
| 1267 | const char *name = GBT_get_name(gb_src_species); |
|---|
| 1268 | if (!name) { |
|---|
| 1269 | error = "cannot search for unnamed species"; |
|---|
| 1270 | } |
|---|
| 1271 | else { |
|---|
| 1272 | GBDATA *gb_dest_species = GBT_find_species(env.gb_dest, name); // already has been created by 'transferBy' above |
|---|
| 1273 | error = separated.transferBy(gb_src_species, gb_dest_species); |
|---|
| 1274 | } |
|---|
| 1275 | } |
|---|
| 1276 | tad.close(error); // aborts transaction (if error occurs, which is expected here) |
|---|
| 1277 | TEST_EXPECT_ERROR_CONTAINS(error, testableRule.expectedPartOfFailure()); |
|---|
| 1278 | } |
|---|
| 1279 | } |
|---|
| 1280 | |
|---|
| 1281 | // ---------------------------------------------------------------- |
|---|
| 1282 | // test type of each field is same across all items of DB |
|---|
| 1283 | { |
|---|
| 1284 | GB_transaction tad(env.gb_dest); |
|---|
| 1285 | GBDATA *gb_fake_species_data = GB_create_container(env.gb_dest, "tmp"); // necessary because GBT_scan_db never scans DIRECT childs |
|---|
| 1286 | |
|---|
| 1287 | typedef map<string,GB_TYPES> TypedField; |
|---|
| 1288 | TypedField seen; |
|---|
| 1289 | |
|---|
| 1290 | GB_ERROR error = NULp; |
|---|
| 1291 | for (GBDATA *gb_dest_species = GBT_first_species(env.gb_dest); |
|---|
| 1292 | gb_dest_species && !error; |
|---|
| 1293 | gb_dest_species = GBT_next_species(gb_dest_species)) |
|---|
| 1294 | { |
|---|
| 1295 | TEST_ANNOTATE(GBS_global_string("name=%s", GBT_get_name_or_description(gb_dest_species))); |
|---|
| 1296 | |
|---|
| 1297 | GBDATA *gb_specCopy = GB_create_container(gb_fake_species_data, "tmp"); |
|---|
| 1298 | error = GB_copy_dropProtectMarksAndTempstate(gb_specCopy, gb_dest_species); |
|---|
| 1299 | |
|---|
| 1300 | if (error) break; |
|---|
| 1301 | |
|---|
| 1302 | StrArray curr; |
|---|
| 1303 | GBT_scan_db(curr, gb_fake_species_data, NULp); |
|---|
| 1304 | TEST_REJECT_ZERO(curr.size()); // expect fields |
|---|
| 1305 | |
|---|
| 1306 | for (int i = 0; curr[i]; ++i) { |
|---|
| 1307 | const char *scanned = curr[i]; // 1st char is type |
|---|
| 1308 | const char *field = scanned+1; |
|---|
| 1309 | GB_TYPES type = GB_TYPES(scanned[0]); |
|---|
| 1310 | |
|---|
| 1311 | TypedField::iterator found = seen.find(field); |
|---|
| 1312 | if (found != seen.end()) { |
|---|
| 1313 | if (type != found->second) { |
|---|
| 1314 | TEST_ANNOTATE(field); |
|---|
| 1315 | TEST_EXPECT_EQUAL(type, found->second); // existing field has to have same type (in all species) |
|---|
| 1316 | } |
|---|
| 1317 | } |
|---|
| 1318 | else { |
|---|
| 1319 | fprintf(stderr, "field='%s' type='%i'\n", field, type); |
|---|
| 1320 | seen[field] = type; // insert new field |
|---|
| 1321 | } |
|---|
| 1322 | } |
|---|
| 1323 | |
|---|
| 1324 | if (!error) error = GB_delete(gb_specCopy); |
|---|
| 1325 | } |
|---|
| 1326 | if (!error) error = GB_delete(gb_fake_species_data); |
|---|
| 1327 | TEST_EXPECT_NO_ERROR(error); |
|---|
| 1328 | } |
|---|
| 1329 | |
|---|
| 1330 | // ---------------------------------------------------------------------------- |
|---|
| 1331 | xf_assert(rulz == ruleset.size()); // please do not change 'ruleset' after 'rulz' has been set! |
|---|
| 1332 | |
|---|
| 1333 | env.saveAndCompare(EXPECTED_ASCII, true); |
|---|
| 1334 | } |
|---|
| 1335 | |
|---|
| 1336 | void TEST_LATE_ruleConfigsReadable() { |
|---|
| 1337 | // run this test later than TEST_xferBySet |
|---|
| 1338 | |
|---|
| 1339 | using namespace FieldTransfer; |
|---|
| 1340 | |
|---|
| 1341 | { |
|---|
| 1342 | // test failing Rule configs: |
|---|
| 1343 | struct InvalidConfig { |
|---|
| 1344 | const char *config; |
|---|
| 1345 | GB_ERROR failure; |
|---|
| 1346 | }; |
|---|
| 1347 | InvalidConfig invalidCfg[] = { |
|---|
| 1348 | { TARGET "='xxx'", "missing source entry" }, |
|---|
| 1349 | { SOURCE "='xxx'", "missing target entry" }, |
|---|
| 1350 | { "tag='halfquot;", "could not find matching quote" }, |
|---|
| 1351 | { TARGET "='xxx';" SOURCE "='xxx';type='bizarre'", "invalid type id 'bizarre'" }, |
|---|
| 1352 | }; |
|---|
| 1353 | |
|---|
| 1354 | for (size_t i = 0; i<ARRAY_ELEMS(invalidCfg); ++i) { |
|---|
| 1355 | InvalidConfig& CFG = invalidCfg[i]; |
|---|
| 1356 | TEST_ANNOTATE(GBS_global_string("invalidCfg='%s'", CFG.config)); |
|---|
| 1357 | |
|---|
| 1358 | ErrorOrRulePtr result = Rule::makeFromConfig(CFG.config); |
|---|
| 1359 | TEST_EXPECT(result.hasError()); |
|---|
| 1360 | TEST_EXPECT_ERROR_CONTAINS(result.getError(), CFG.failure); |
|---|
| 1361 | } |
|---|
| 1362 | TEST_ANNOTATE(NULp); |
|---|
| 1363 | } |
|---|
| 1364 | |
|---|
| 1365 | const size_t cfgs = ARRAY_ELEMS(expRuleConfig); |
|---|
| 1366 | RuleSet ruleset; |
|---|
| 1367 | |
|---|
| 1368 | // convert config->Rule + Rule->config + compare configs: |
|---|
| 1369 | for (size_t r = 0; r<cfgs; ++r) { |
|---|
| 1370 | const char *config = expRuleConfig[r]; |
|---|
| 1371 | |
|---|
| 1372 | ErrorOrRulePtr result = Rule::makeFromConfig(config); |
|---|
| 1373 | if (result.hasError()) { |
|---|
| 1374 | TEST_EXPECT_NO_ERROR(result.getError()); |
|---|
| 1375 | } |
|---|
| 1376 | else { |
|---|
| 1377 | RulePtr rule = result.getValue(); |
|---|
| 1378 | string reloadedConfig = rule->getConfig(); |
|---|
| 1379 | TEST_EXPECT_EQUAL(reloadedConfig, config); |
|---|
| 1380 | |
|---|
| 1381 | ruleset.add(rule); |
|---|
| 1382 | } |
|---|
| 1383 | } |
|---|
| 1384 | |
|---|
| 1385 | // test RuleSet comment: |
|---|
| 1386 | const char *COMMENT = "A multi-\nline-\ntest-\ncomment."; |
|---|
| 1387 | ruleset.setComment(COMMENT); |
|---|
| 1388 | TEST_EXPECT_EQUAL(ruleset.getComment(), COMMENT); |
|---|
| 1389 | |
|---|
| 1390 | ruleset.set_transferUndefFields(true); |
|---|
| 1391 | TEST_EXPECT(ruleset.shallTransferUndefFields()); |
|---|
| 1392 | |
|---|
| 1393 | // save RuleSet + reload it + compare: |
|---|
| 1394 | RuleSet reloaded_ruleset; |
|---|
| 1395 | { |
|---|
| 1396 | const char *rulesetSaved = "impexp/rulesetCurr.fts"; |
|---|
| 1397 | const char *rulesetExpected = "impexp/ruleset.fts"; |
|---|
| 1398 | |
|---|
| 1399 | TEST_EXPECT_NO_ERROR(ruleset.saveTo(rulesetSaved)); |
|---|
| 1400 | // #define TEST_AUTO_UPDATE_RS // uncomment to update expected result |
|---|
| 1401 | #if defined(TEST_AUTO_UPDATE_RS) |
|---|
| 1402 | TEST_COPY_FILE(rulesetSaved, rulesetExpected); |
|---|
| 1403 | #endif |
|---|
| 1404 | TEST_EXPECT_TEXTFILE_DIFFLINES(rulesetSaved, rulesetExpected, 0); |
|---|
| 1405 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(rulesetSaved)); |
|---|
| 1406 | |
|---|
| 1407 | // reload RuleSet: |
|---|
| 1408 | { |
|---|
| 1409 | ErrorOrRuleSetPtr loaded = RuleSet::loadFrom(rulesetExpected); |
|---|
| 1410 | if (loaded.hasError()) TEST_EXPECT_NO_ERROR(loaded.getError()); // if error -> dump+fail |
|---|
| 1411 | |
|---|
| 1412 | const RuleSet& loadedSet = *loaded.getValue(); |
|---|
| 1413 | TEST_EXPECT_EQUAL(loadedSet.size(), ruleset.size()); |
|---|
| 1414 | |
|---|
| 1415 | // compare reloaded rules configs vs. array of expected configs. |
|---|
| 1416 | // tests: |
|---|
| 1417 | // - save+load is correct and complete |
|---|
| 1418 | // - Rule order is stable |
|---|
| 1419 | for (size_t r = 0; r<loadedSet.size(); ++r) { |
|---|
| 1420 | const Rule& rule = loadedSet.get(r); |
|---|
| 1421 | string cfg = rule.getConfig(); |
|---|
| 1422 | TEST_EXPECT_EQUAL(cfg.c_str(), expRuleConfig[r]); |
|---|
| 1423 | } |
|---|
| 1424 | |
|---|
| 1425 | // test comment survives reload: |
|---|
| 1426 | TEST_EXPECT_EQUAL(loadedSet.getComment(), COMMENT); |
|---|
| 1427 | |
|---|
| 1428 | // test transferUndefFields survives save/load: |
|---|
| 1429 | TEST_EXPECT(loadedSet.shallTransferUndefFields()); |
|---|
| 1430 | |
|---|
| 1431 | // use reloaded Ruleset for tests below: |
|---|
| 1432 | reloaded_ruleset = loadedSet; // also tests RuleSet-copy-ctor works. |
|---|
| 1433 | |
|---|
| 1434 | // test comment gets copied: |
|---|
| 1435 | TEST_EXPECT_EQUAL(reloaded_ruleset.getComment(), loadedSet.getComment()); |
|---|
| 1436 | } |
|---|
| 1437 | } |
|---|
| 1438 | |
|---|
| 1439 | // test RuleSet load/save errors: |
|---|
| 1440 | { |
|---|
| 1441 | const char *noSuchFile = "nosuch.fts"; |
|---|
| 1442 | ErrorOrRuleSetPtr loaded = RuleSet::loadFrom(noSuchFile); |
|---|
| 1443 | |
|---|
| 1444 | TEST_EXPECT(loaded.hasError()); |
|---|
| 1445 | TEST_EXPECT_ERROR_CONTAINS(loaded.getError(), "No such file or directory"); |
|---|
| 1446 | |
|---|
| 1447 | const char *unsavable = "noSuchDir/whatever.fts"; |
|---|
| 1448 | TEST_EXPECT_ERROR_CONTAINS(ruleset.saveTo(unsavable), "No such file or directory"); |
|---|
| 1449 | } |
|---|
| 1450 | |
|---|
| 1451 | // load empty file -> empty RuleSet |
|---|
| 1452 | { |
|---|
| 1453 | const char *emptyFile = "general/empty.input"; |
|---|
| 1454 | |
|---|
| 1455 | ErrorOrRuleSetPtr empty = RuleSet::loadFrom(emptyFile); |
|---|
| 1456 | TEST_REJECT(empty.hasError()); |
|---|
| 1457 | |
|---|
| 1458 | const RuleSet& emptySet = *empty.getValue(); |
|---|
| 1459 | TEST_EXPECT_ZERO(emptySet.size()); // test emptySet has no rules |
|---|
| 1460 | TEST_EXPECT_EQUAL(emptySet.getComment(), ""); // test emptySet has no comment |
|---|
| 1461 | } |
|---|
| 1462 | |
|---|
| 1463 | // use 'reloaded_ruleset' to modify same DB (as above in TEST_xferBySet): |
|---|
| 1464 | { |
|---|
| 1465 | XferEnv env; |
|---|
| 1466 | env.transferAllSpeciesBy(reloaded_ruleset); |
|---|
| 1467 | env.saveAndCompare(EXPECTED_ASCII, false); // if this fails -> saving/reloading config looses Rule information |
|---|
| 1468 | } |
|---|
| 1469 | } |
|---|
| 1470 | |
|---|
| 1471 | #define CUSTOM_ALI_TRANSPORT_ERROR "custom ali transport error" |
|---|
| 1472 | |
|---|
| 1473 | struct TestAlignmentTransporter FINAL_TYPE : public FieldTransfer::AlignmentTransporter { |
|---|
| 1474 | int mode; |
|---|
| 1475 | TestAlignmentTransporter(int mode_) : mode(mode_) {} |
|---|
| 1476 | bool shallCopyBefore() const OVERRIDE { |
|---|
| 1477 | return false; // do not call copyAlignments() b4 calling transport() |
|---|
| 1478 | } |
|---|
| 1479 | GB_ERROR transport(GBDATA*gb_src_item, GBDATA *gb_dst_item) const OVERRIDE { |
|---|
| 1480 | GB_ERROR error = NULp; |
|---|
| 1481 | switch (mode) { |
|---|
| 1482 | case 1: // custom error |
|---|
| 1483 | error = CUSTOM_ALI_TRANSPORT_ERROR; |
|---|
| 1484 | break; |
|---|
| 1485 | |
|---|
| 1486 | case 2: // do nothing -> sequence still has old value (or is not copied) |
|---|
| 1487 | break; |
|---|
| 1488 | |
|---|
| 1489 | case 3: { // write reverse sequence data |
|---|
| 1490 | GBDATA *gb_src_data = GBT_find_sequence(gb_src_item, "ali_16s"); |
|---|
| 1491 | GBDATA *gb_dst_data = GBT_find_sequence(gb_dst_item, "ali_16s"); |
|---|
| 1492 | |
|---|
| 1493 | if (!gb_dst_data) { // destination has no 'ali_16s' -> clone whole container |
|---|
| 1494 | GBDATA *gb_src_ali = GB_get_father(gb_src_data); |
|---|
| 1495 | |
|---|
| 1496 | error = GB_incur_error_if(!GB_clone(gb_dst_item, gb_src_ali)); |
|---|
| 1497 | if (!error) { |
|---|
| 1498 | gb_dst_data = GBT_find_sequence(gb_dst_item, "ali_16s"); |
|---|
| 1499 | xf_assert(gb_dst_data); |
|---|
| 1500 | } |
|---|
| 1501 | } |
|---|
| 1502 | |
|---|
| 1503 | if (!error) { |
|---|
| 1504 | const char *seq = GB_read_char_pntr(gb_src_data); |
|---|
| 1505 | char *rev = GBT_reverseNucSequence(seq, strlen(seq)); |
|---|
| 1506 | |
|---|
| 1507 | error = GB_write_string(gb_dst_data, rev); |
|---|
| 1508 | free(rev); |
|---|
| 1509 | } |
|---|
| 1510 | break; |
|---|
| 1511 | } |
|---|
| 1512 | default: xf_assert(0); break; // unsupported mode |
|---|
| 1513 | } |
|---|
| 1514 | return error; |
|---|
| 1515 | } |
|---|
| 1516 | }; |
|---|
| 1517 | |
|---|
| 1518 | void TEST_clone_by_ruleset() { |
|---|
| 1519 | using namespace FieldTransfer; |
|---|
| 1520 | |
|---|
| 1521 | RuleSetPtr ruleset; |
|---|
| 1522 | { |
|---|
| 1523 | const char *rulesetExpected = "impexp/ruleset.fts"; // same ruleset as used in tests above |
|---|
| 1524 | |
|---|
| 1525 | ErrorOrRuleSetPtr loaded = RuleSet::loadFrom(rulesetExpected); |
|---|
| 1526 | if (loaded.hasError()) TEST_EXPECT_NO_ERROR(loaded.getError()); // if RuleSet load error -> dump+fail. see .@loadFrom |
|---|
| 1527 | |
|---|
| 1528 | ruleset = loaded.getValue(); |
|---|
| 1529 | } |
|---|
| 1530 | |
|---|
| 1531 | // use 'ruleset' to modify same DB (but use ItemClonedByRuleSet here) |
|---|
| 1532 | { |
|---|
| 1533 | XferEnv env; |
|---|
| 1534 | env.copyAllSpecies(); // copy species of input DB -> output DB |
|---|
| 1535 | |
|---|
| 1536 | GBDATA *gb_overwritten_species = NULp; |
|---|
| 1537 | char *overwrittenName = NULp; |
|---|
| 1538 | |
|---|
| 1539 | // clone some species (inside output db): |
|---|
| 1540 | { |
|---|
| 1541 | GB_transaction ta(env.gb_dest); |
|---|
| 1542 | |
|---|
| 1543 | GBDATA *gb_next_species = NULp; |
|---|
| 1544 | GBDATA *gb_first_clone = NULp; |
|---|
| 1545 | int count = 0; |
|---|
| 1546 | |
|---|
| 1547 | for (GBDATA *gb_species = GBT_first_species(env.gb_dest); |
|---|
| 1548 | gb_species && gb_species != gb_first_clone; |
|---|
| 1549 | gb_species = gb_next_species, ++count) |
|---|
| 1550 | { |
|---|
| 1551 | gb_next_species = GBT_next_species(gb_species); |
|---|
| 1552 | |
|---|
| 1553 | TEST_EXPECT_EQUAL(GB_countEntries(gb_species, "name"), 1); // safety-belt (had problems with duplicate name-entries) |
|---|
| 1554 | |
|---|
| 1555 | char *orgName = nulldup(GBT_get_name(gb_species)); |
|---|
| 1556 | TEST_REJECT_NULL(orgName); |
|---|
| 1557 | |
|---|
| 1558 | GBDATA *gb_clone = NULp; |
|---|
| 1559 | ItemCloneType cloneHow = (count == 3 || count == 7) ? RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS : REPLACE_ITEM_BY_CLONE; |
|---|
| 1560 | |
|---|
| 1561 | ruleset->set_transferUndefFields(count == 4); // test transfer of undefined fields for species #4 |
|---|
| 1562 | |
|---|
| 1563 | { |
|---|
| 1564 | ItemClonedByRuleSet clone(gb_species, CLONE_ITEM_SPECIES, ruleset, cloneHow, NULp, NULp); |
|---|
| 1565 | |
|---|
| 1566 | if (clone.has_error()) TEST_EXPECT_NO_ERROR(clone.get_error()); |
|---|
| 1567 | gb_clone = clone.get_clone(); |
|---|
| 1568 | if (!gb_first_clone) { |
|---|
| 1569 | xf_assert(cloneHow == REPLACE_ITEM_BY_CLONE); // limit will not work otherwise |
|---|
| 1570 | gb_first_clone = gb_clone; |
|---|
| 1571 | } |
|---|
| 1572 | |
|---|
| 1573 | switch (cloneHow) { |
|---|
| 1574 | case REPLACE_ITEM_BY_CLONE: |
|---|
| 1575 | TEST_EXPECT_NULL(gb_species); |
|---|
| 1576 | break; |
|---|
| 1577 | case RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS: |
|---|
| 1578 | TEST_EXPECT_EQUAL(GB_countEntries(gb_species, "name"), 1); |
|---|
| 1579 | TEST_EXPECT_EQUAL(GBT_get_name(gb_species), "fake"); // @@@ need a temporary name which cannot clash with existing names |
|---|
| 1580 | break; |
|---|
| 1581 | default: |
|---|
| 1582 | xf_assert(0); // not tested here |
|---|
| 1583 | break; |
|---|
| 1584 | } |
|---|
| 1585 | |
|---|
| 1586 | TEST_EXPECT_EQUAL(GB_countEntries(gb_clone, "name"), 1); |
|---|
| 1587 | TEST_EXPECT_EQUAL(GBT_get_name(gb_clone), orgName); |
|---|
| 1588 | } |
|---|
| 1589 | // 'clone' has been destroyed now! |
|---|
| 1590 | |
|---|
| 1591 | if (cloneHow == RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS) { |
|---|
| 1592 | TEST_EXPECT_EQUAL(GBT_get_name(gb_species), orgName); // rename back worked (RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS) |
|---|
| 1593 | } |
|---|
| 1594 | int orgNameCount = 0; |
|---|
| 1595 | for (GBDATA *gb_peek = GBT_first_species(env.gb_dest); gb_peek; gb_peek = GBT_next_species(gb_peek)) { |
|---|
| 1596 | bool hasOrgName = strcmp(GBT_get_name(gb_peek), orgName) == 0; |
|---|
| 1597 | orgNameCount += hasOrgName; |
|---|
| 1598 | switch (cloneHow) { |
|---|
| 1599 | case REPLACE_ITEM_BY_CLONE: |
|---|
| 1600 | if (hasOrgName) TEST_EXPECT(gb_peek == gb_clone); // orgName only used in persisting clone |
|---|
| 1601 | TEST_EXPECT(gb_peek != gb_species); // species has been removed |
|---|
| 1602 | break; |
|---|
| 1603 | case RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS: |
|---|
| 1604 | if (hasOrgName) TEST_EXPECT(gb_peek == gb_species); // orgName only used in original species (i.e. temp. clone did vanish) |
|---|
| 1605 | TEST_EXPECT(gb_peek != gb_clone); // clone has been removed |
|---|
| 1606 | break; |
|---|
| 1607 | default: |
|---|
| 1608 | xf_assert(0); // not tested here |
|---|
| 1609 | break; |
|---|
| 1610 | } |
|---|
| 1611 | // @@@ also test against "fake" names? |
|---|
| 1612 | } |
|---|
| 1613 | TEST_EXPECT_EQUAL(orgNameCount, 1); // species with duplicate names unwanted |
|---|
| 1614 | |
|---|
| 1615 | if (count == 3) { |
|---|
| 1616 | gb_overwritten_species = gb_species; // = copy of original |
|---|
| 1617 | overwrittenName = ARB_strdup(orgName); |
|---|
| 1618 | } |
|---|
| 1619 | free(orgName); |
|---|
| 1620 | } |
|---|
| 1621 | } |
|---|
| 1622 | |
|---|
| 1623 | // test merging one species from source DB onto existing (cloned) species in dest DB: |
|---|
| 1624 | { |
|---|
| 1625 | GBDATA *gb_source_species; |
|---|
| 1626 | |
|---|
| 1627 | { |
|---|
| 1628 | GB_transaction ta1(env.gb_src); |
|---|
| 1629 | GB_transaction ta2(env.gb_dest); |
|---|
| 1630 | |
|---|
| 1631 | gb_source_species = GBT_find_species(env.gb_src, overwrittenName); |
|---|
| 1632 | TEST_REJECT_NULL(gb_source_species); // (in gb_src) |
|---|
| 1633 | TEST_REJECT_NULL(gb_overwritten_species); // (in gb_dest) |
|---|
| 1634 | |
|---|
| 1635 | { |
|---|
| 1636 | GBDATA *gb_name = GB_entry(gb_overwritten_species, "name"); |
|---|
| 1637 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_name, "notOverwritten")); // prepare to test overwrite by data. |
|---|
| 1638 | |
|---|
| 1639 | // modify "data" in "ali_16s" to prove overwrite later: |
|---|
| 1640 | GBDATA *gb_seq = GBT_find_sequence(gb_overwritten_species, "ali_16s"); |
|---|
| 1641 | TEST_REJECT_NULL(gb_seq); |
|---|
| 1642 | |
|---|
| 1643 | const char *seq = GB_read_char_pntr(gb_seq); |
|---|
| 1644 | char *seqMod = GBS_string_eval(seq, ":U=T"); |
|---|
| 1645 | |
|---|
| 1646 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_seq, seqMod)); |
|---|
| 1647 | free(seqMod); |
|---|
| 1648 | } |
|---|
| 1649 | } |
|---|
| 1650 | |
|---|
| 1651 | SmartPtr<TestAlignmentTransporter> reverseAliTransporter; |
|---|
| 1652 | |
|---|
| 1653 | // overwrite with result of ruleset (=> mix original and clone) |
|---|
| 1654 | for (int pass = 1; pass<=4; ++pass) { |
|---|
| 1655 | TEST_ANNOTATE(GBS_global_string("pass %i", pass)); |
|---|
| 1656 | |
|---|
| 1657 | GB_transaction ta1(env.gb_src); |
|---|
| 1658 | GB_transaction ta2(env.gb_dest); |
|---|
| 1659 | |
|---|
| 1660 | SmartPtr<TestAlignmentTransporter> aliTransporter; |
|---|
| 1661 | if (pass<4) { |
|---|
| 1662 | aliTransporter = new TestAlignmentTransporter(pass); |
|---|
| 1663 | if (pass == 3) reverseAliTransporter = aliTransporter; // keep for later |
|---|
| 1664 | } |
|---|
| 1665 | |
|---|
| 1666 | ItemClonedByRuleSet overclone(gb_source_species, CLONE_ITEM_SPECIES, ruleset, CLONE_INTO_EXISTING, gb_overwritten_species, aliTransporter.content()); |
|---|
| 1667 | |
|---|
| 1668 | if (pass == 1) { |
|---|
| 1669 | TEST_EXPECT(overclone.has_error()); |
|---|
| 1670 | TEST_EXPECT_ERROR_CONTAINS(overclone.get_error(), CUSTOM_ALI_TRANSPORT_ERROR); |
|---|
| 1671 | ta2.close(overclone.get_error()); |
|---|
| 1672 | ta1.close(overclone.get_error()); |
|---|
| 1673 | } |
|---|
| 1674 | else { |
|---|
| 1675 | if (overclone.has_error()) TEST_EXPECT_NO_ERROR(overclone.get_error()); // expect no error, but show message if expectation fails |
|---|
| 1676 | TEST_EXPECT(overclone.get_clone() == gb_overwritten_species); // test get_clone reports gb_overwritten_species here |
|---|
| 1677 | TEST_EXPECT_EQUAL(GBT_get_name(gb_overwritten_species), "notOverwritten"); // test name of clone does not get overwritten |
|---|
| 1678 | TEST_EXPECT_EQUAL(GB_countEntries(gb_overwritten_species, "name"), 1); |
|---|
| 1679 | |
|---|
| 1680 | { |
|---|
| 1681 | GBDATA *gb_seq = GBT_find_sequence(gb_overwritten_species, "ali_16s"); |
|---|
| 1682 | TEST_REJECT_NULL(gb_seq); |
|---|
| 1683 | |
|---|
| 1684 | const char *seq = GB_read_char_pntr(gb_seq); |
|---|
| 1685 | |
|---|
| 1686 | switch (pass) { |
|---|
| 1687 | case 2: TEST_EXPECT_CONTAINS(seq, "GAAGTAGCTTGCTACTTTGCCGGCGAGCGGCGGAC"); break; // custom transporter: do nothing |
|---|
| 1688 | case 3: TEST_EXPECT_CONTAINS(seq, "CAGGCGGCGAGCGGCCGUUUCAUCGUUCGAUGAAG"); break; // custom transporter: writes reversed sequence data |
|---|
| 1689 | case 4: TEST_EXPECT_CONTAINS(seq, "GAAGUAGCUUGCUACUUUGCCGGCGAGCGGCGGAC"); break; // default behavior (=copy sequence over) |
|---|
| 1690 | default: xf_assert(0); break; // unexpected 'pass' |
|---|
| 1691 | } |
|---|
| 1692 | |
|---|
| 1693 | |
|---|
| 1694 | GBDATA *gb_ali = GB_get_father(gb_seq); |
|---|
| 1695 | TEST_EXPECT_EQUAL(GB_countEntries(gb_ali, "data"), 1); |
|---|
| 1696 | } |
|---|
| 1697 | |
|---|
| 1698 | TEST_EXPECT_EQUAL(GB_countEntries(gb_overwritten_species, "ali_16s"), 1); |
|---|
| 1699 | } |
|---|
| 1700 | } |
|---|
| 1701 | |
|---|
| 1702 | // "test" REAL_CLONE mode |
|---|
| 1703 | |
|---|
| 1704 | { |
|---|
| 1705 | GB_transaction ta1(env.gb_src); |
|---|
| 1706 | GB_transaction ta2(env.gb_dest); |
|---|
| 1707 | |
|---|
| 1708 | ItemClonedByRuleSet realClone(gb_source_species, CLONE_ITEM_SPECIES, ruleset, REAL_CLONE, GBT_get_species_data(env.gb_dest), &*reverseAliTransporter); |
|---|
| 1709 | |
|---|
| 1710 | if (realClone.has_error()) TEST_EXPECT_NO_ERROR(realClone.get_error()); // expect no error, but show message if expectation fails |
|---|
| 1711 | |
|---|
| 1712 | GBDATA *gb_clone = realClone.get_clone(); |
|---|
| 1713 | |
|---|
| 1714 | TEST_REJECT_NULL(gb_clone); |
|---|
| 1715 | TEST_REJECT_NULL(gb_source_species); |
|---|
| 1716 | TEST_EXPECT(gb_clone != gb_source_species); |
|---|
| 1717 | TEST_EXPECT_EQUAL(GBT_get_name(gb_clone), GBT_get_name(gb_source_species)); |
|---|
| 1718 | |
|---|
| 1719 | TEST_REJECT(GB_get_father(gb_clone) == GB_get_father(gb_source_species)); |
|---|
| 1720 | |
|---|
| 1721 | { |
|---|
| 1722 | GBDATA *gb_seq = GBT_find_sequence(gb_clone, "ali_16s"); |
|---|
| 1723 | TEST_REJECT_NULL(gb_seq); |
|---|
| 1724 | |
|---|
| 1725 | const char *seq = GB_read_char_pntr(gb_seq); |
|---|
| 1726 | |
|---|
| 1727 | // TEST_EXPECT_CONTAINS(seq, "GAAGUAGCUUGCUACUUUGCCGGCGAGCGGCGGAC"); // default behavior (=copy sequence over) |
|---|
| 1728 | TEST_EXPECT_CONTAINS(seq, "CAGGCGGCGAGCGGCCGUUUCAUCGUUCGAUGAAG"); // custom transporter: writes reversed sequence data |
|---|
| 1729 | |
|---|
| 1730 | GBDATA *gb_ali = GB_get_father(gb_seq); |
|---|
| 1731 | TEST_EXPECT_EQUAL(GB_countEntries(gb_ali, "data"), 1); |
|---|
| 1732 | } |
|---|
| 1733 | } |
|---|
| 1734 | } |
|---|
| 1735 | |
|---|
| 1736 | env.saveAndCompare(EXPECTED_ASCII_CLONED, true); |
|---|
| 1737 | free(overwrittenName); |
|---|
| 1738 | } |
|---|
| 1739 | } |
|---|
| 1740 | |
|---|
| 1741 | #endif // UNIT_TESTS |
|---|
| 1742 | |
|---|
| 1743 | // -------------------------------------------------------------------------------- |
|---|