1 | // ========================================================= // |
---|
2 | // // |
---|
3 | // File : xferset.c // |
---|
4 | // Purpose : field transfer sets // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in Mar 19 // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // ========================================================= // |
---|
10 | |
---|
11 | #include "xferset.h" |
---|
12 | |
---|
13 | #include <ConfigMapping.h> |
---|
14 | #include <BufferedFileReader.h> |
---|
15 | #include <arbdbt.h> |
---|
16 | #include <arb_str.h> |
---|
17 | #include <arb_stdstr.h> |
---|
18 | |
---|
19 | #include <set> |
---|
20 | #include <gb_aci.h> |
---|
21 | |
---|
22 | using namespace std; |
---|
23 | |
---|
24 | namespace FieldTransfer { |
---|
25 | |
---|
26 | typedef set<string, NoCaseCmp> StrSet; |
---|
27 | |
---|
28 | static void StrSet2StrArray(const StrSet& src, StrArray& dst) { |
---|
29 | for (StrSet::const_iterator i = src.begin(); i != src.end(); ++i) { |
---|
30 | dst.put(strdup(i->c_str())); |
---|
31 | } |
---|
32 | } |
---|
33 | static void StrArray2StrSet(const StrArray& src, StrSet& dst) { |
---|
34 | for (unsigned i = 0; i<src.size(); ++i) { |
---|
35 | dst.insert(src[i]); |
---|
36 | } |
---|
37 | } |
---|
38 | |
---|
39 | void RuleSet::extractUsedFields(StrArray& input, StrArray& output) const { // @@@ want flavor just filling 2 StrSets |
---|
40 | StrSet in, out; |
---|
41 | for (unsigned i = 0; i<size(); ++i) { |
---|
42 | const Rule& rule = get(i); |
---|
43 | const string& srcFields = rule.getSourceFields(); |
---|
44 | if (rule.multiple_source_fields()) { |
---|
45 | ConstStrArray ifield; |
---|
46 | GBT_split_string(ifield, srcFields.c_str(), ';'); |
---|
47 | for (unsigned f = 0; f<ifield.size(); ++f) { |
---|
48 | const char *source = ifield[f]; |
---|
49 | if (source[0]) in.insert(source); |
---|
50 | } |
---|
51 | } |
---|
52 | else { |
---|
53 | if (!srcFields.empty()) in.insert(srcFields); |
---|
54 | } |
---|
55 | const string& target = rule.targetField(); |
---|
56 | if (!target.empty()) out.insert(target); |
---|
57 | } |
---|
58 | StrSet2StrArray(in, input); |
---|
59 | StrSet2StrArray(out, output); |
---|
60 | } |
---|
61 | |
---|
62 | TransportedData ReadRule::readTypedFromField(GB_TYPES readAsType, GBDATA *gb_field) const { |
---|
63 | xf_assert(GB_read_type(gb_field) != GB_DB); // fails e.g. if rule defined for a name used by a container |
---|
64 | |
---|
65 | switch (readAsType) { |
---|
66 | case GB_INT: { |
---|
67 | int asInt = GB_read_int(gb_field); |
---|
68 | return TransportedData(asInt); |
---|
69 | } |
---|
70 | case GB_FLOAT: { |
---|
71 | float asFloat = GB_read_float(gb_field); |
---|
72 | return TransportedData(asFloat); |
---|
73 | } |
---|
74 | case GB_STRING: { |
---|
75 | char *asStr = GB_read_as_string(gb_field); |
---|
76 | string data(asStr); |
---|
77 | free(asStr); |
---|
78 | return TransportedData(data); |
---|
79 | } |
---|
80 | default: xf_assert(0); break; // invalid type |
---|
81 | } |
---|
82 | xf_assert(0); // should be never reached. |
---|
83 | return TransportedData::none(); |
---|
84 | } |
---|
85 | |
---|
86 | TransportedData ReadRule::aciAppliedTo(const string& toStr, GBDATA *gb_main, GBDATA *gb_dest_item) const { |
---|
87 | // We can not generally provide a meaningful item for ACI here. |
---|
88 | // Currently it always uses the destination item, but this item may be some dummy item, |
---|
89 | // e.g. a species clone used only during transfer. |
---|
90 | |
---|
91 | GBL_env env(gb_main, NULp); |
---|
92 | GBL_call_env callEnv(gb_dest_item, env); |
---|
93 | |
---|
94 | char *result = GB_command_interpreter_in_env(toStr.c_str(), aci.c_str(), callEnv); |
---|
95 | if (result) { |
---|
96 | string converted(result); |
---|
97 | free(result); |
---|
98 | return TransportedData(converted); |
---|
99 | } |
---|
100 | return TransportedData::makeError(GB_await_error()); |
---|
101 | } |
---|
102 | |
---|
103 | inline TransportedData cannotReadContainer(const char *containerName) { |
---|
104 | return TransportedData::makeError(GBS_global_string("cannot read as data ('%s' is a container)", containerName)); |
---|
105 | } |
---|
106 | |
---|
107 | TransportedData ReadRule::readFrom(GBDATA *gb_item, GBDATA *gb_dest_item) const { |
---|
108 | // 'gb_dest_item' only used for ACI |
---|
109 | |
---|
110 | if (!gb_item) { // got no item -> can't read |
---|
111 | return TransportedData::makeError("lacking item to readFrom"); |
---|
112 | } |
---|
113 | |
---|
114 | if (fields.empty()) { |
---|
115 | return TransportedData::makeError("no source field(s) specified"); |
---|
116 | } |
---|
117 | |
---|
118 | if (multiple_source_fields()) { |
---|
119 | ConstStrArray field; |
---|
120 | GBT_split_string(field, fields.c_str(), ';'); |
---|
121 | |
---|
122 | string concat; |
---|
123 | bool gotData = false; // at least one input field found? |
---|
124 | for (size_t f = 0; f<field.size(); ++f) { |
---|
125 | GBDATA *gb_field = GB_search(gb_item, field[f], GB_FIND); |
---|
126 | if (gb_field) { |
---|
127 | GB_TYPES sourceType = GB_read_type(gb_field); |
---|
128 | if (sourceType == GB_DB) { |
---|
129 | return cannotReadContainer(field[f]); |
---|
130 | } |
---|
131 | |
---|
132 | TransportedData plain = readTypedFromField(GB_STRING, gb_field); // ignores sourceType |
---|
133 | if (plain.failed()) return plain; |
---|
134 | |
---|
135 | xf_assert(plain.exists()); |
---|
136 | if (!concat.empty()) concat += separator; |
---|
137 | concat += plain.getString(); |
---|
138 | |
---|
139 | gotData = true; |
---|
140 | } |
---|
141 | else if (GB_have_error()) { |
---|
142 | return TransportedData::makeError(GB_await_error()); |
---|
143 | } |
---|
144 | } |
---|
145 | |
---|
146 | if (gotData) { |
---|
147 | if (!aci.empty()) { |
---|
148 | return aciAppliedTo(concat, GB_get_root(gb_item), gb_dest_item); |
---|
149 | } |
---|
150 | return TransportedData(concat); |
---|
151 | } |
---|
152 | // otherwise: do not transport if all source fields are missing |
---|
153 | } |
---|
154 | else { |
---|
155 | GBDATA *gb_field = GB_search(gb_item, fields.c_str(), GB_FIND); |
---|
156 | if (gb_field) { |
---|
157 | GB_TYPES sourceType = GB_read_type(gb_field); |
---|
158 | if (sourceType == GB_DB) { |
---|
159 | return cannotReadContainer(fields.c_str()); |
---|
160 | } |
---|
161 | if (!aci.empty()) { |
---|
162 | TransportedData plain = readTypedFromField(GB_STRING, gb_field); // ignores sourceType |
---|
163 | // @@@ store sourceType if no dest-type deduced? |
---|
164 | return aciAppliedTo(plain.getString(), GB_get_root(gb_item), gb_dest_item); |
---|
165 | } |
---|
166 | return readTypedFromField(sourceType, gb_field); |
---|
167 | } |
---|
168 | else if (GB_have_error()) { |
---|
169 | return TransportedData::makeError(GB_await_error()); |
---|
170 | } |
---|
171 | // otherwise: do not transport if source field is missing |
---|
172 | } |
---|
173 | // if field does not exist -> report "no type" |
---|
174 | return TransportedData::none(); |
---|
175 | } |
---|
176 | |
---|
177 | static GB_ERROR unconvertedWrite(const TransportedData& data, GBDATA *gb_field) { |
---|
178 | GB_ERROR error = NULp; |
---|
179 | switch (data.getType()) { |
---|
180 | case GB_STRING: { |
---|
181 | const char *str = data.getString().c_str(); |
---|
182 | error = GB_write_string(gb_field, str); |
---|
183 | break; |
---|
184 | } |
---|
185 | case GB_INT: { |
---|
186 | int num = data.getInt(); |
---|
187 | error = GB_write_int(gb_field, num); |
---|
188 | break; |
---|
189 | } |
---|
190 | case GB_FLOAT: { |
---|
191 | float fnum = data.getFloat(); |
---|
192 | error = GB_write_float(gb_field, fnum); |
---|
193 | break; |
---|
194 | } |
---|
195 | default: { // unhandled type |
---|
196 | xf_assert(0); |
---|
197 | break; |
---|
198 | } |
---|
199 | } |
---|
200 | return error; |
---|
201 | } |
---|
202 | static GB_ERROR convertAndWrite(const TransportedData& data, GBDATA *gb_field, GB_TYPES wantedTargetType, bool acceptLossyConversion) { |
---|
203 | // perform conversion to 'wantedTargetType' and write to 'gb_field' |
---|
204 | GB_ERROR error = NULp; |
---|
205 | |
---|
206 | switch (data.getType()) { |
---|
207 | case GB_INT: |
---|
208 | if (wantedTargetType == GB_FLOAT) { |
---|
209 | // convert int -> float |
---|
210 | float f = data.getInt(); |
---|
211 | int32_t i = int(f+.5); // round (just in case some underflow happened, causing sth like 4711.9999999) |
---|
212 | |
---|
213 | if (i != data.getInt() && !acceptLossyConversion) { |
---|
214 | error = GBS_global_string("lossy int->float type conversion (%i->%i)", data.getInt(), i); |
---|
215 | } |
---|
216 | else { |
---|
217 | error = GB_write_float(gb_field, f); |
---|
218 | } |
---|
219 | } |
---|
220 | else { |
---|
221 | error = GB_write_lossless_int(gb_field, data.getInt()); |
---|
222 | } |
---|
223 | break; |
---|
224 | |
---|
225 | case GB_FLOAT: |
---|
226 | if (wantedTargetType == GB_INT) { |
---|
227 | // convert float -> int |
---|
228 | double d = data.getFloat(); |
---|
229 | int i = d>0 ? (int)(d+0.5) : (int)(d-0.5); |
---|
230 | // @@@ increment a round-counter in RuleSet? |
---|
231 | double d2 = i; |
---|
232 | |
---|
233 | if (d != d2 && !acceptLossyConversion) { // precision loss |
---|
234 | error = GBS_global_string("lossy float->int type conversion (%e->%e)", d, d2); |
---|
235 | } |
---|
236 | else { |
---|
237 | error = GB_write_int(gb_field, i); |
---|
238 | } |
---|
239 | } |
---|
240 | else { |
---|
241 | error = GB_write_lossless_float(gb_field, data.getFloat()); |
---|
242 | } |
---|
243 | break; |
---|
244 | |
---|
245 | case GB_STRING: |
---|
246 | error = GB_write_autoconv_string(gb_field, data.getString().c_str()); // @@@ avoid silent data loss |
---|
247 | // @@@ use GBT_write_float_converted / GBT_write_int_converted here! |
---|
248 | break; |
---|
249 | |
---|
250 | default: |
---|
251 | xf_assert(0); // unhandled type |
---|
252 | break; |
---|
253 | } |
---|
254 | |
---|
255 | return error; |
---|
256 | } |
---|
257 | |
---|
258 | GB_ERROR WriteRule::writeTo(const TransportedData& data, GBDATA *gb_item, bool acceptLossyConversion) const { |
---|
259 | if (!gb_item) return "lacking item to writeTo"; |
---|
260 | |
---|
261 | // @@@ overwrite existing target field? should it be allowed or denied? optional? |
---|
262 | // @@@ try GBT_searchOrCreate_itemfield_according_to_changekey to create a field |
---|
263 | xf_assert(data.exists()); |
---|
264 | |
---|
265 | GB_TYPES usedTargetType = forcesType() ? getTargetType() : data.getType(); |
---|
266 | |
---|
267 | GB_ERROR error = check_hkey(); |
---|
268 | if (!error) { |
---|
269 | GBDATA *gb_field = GB_search(gb_item, name.c_str(), usedTargetType); // Note: works with hierarchical keys |
---|
270 | if (!gb_field) { |
---|
271 | error = GB_await_error(); // field not created -> report why |
---|
272 | } |
---|
273 | else { |
---|
274 | if (data.getType() == usedTargetType) { // data and target have same type -> no conversion needed |
---|
275 | error = unconvertedWrite(data, gb_field); |
---|
276 | } |
---|
277 | else { // type differs -> perform conversion (act like field converter reachable from info-window) |
---|
278 | error = convertAndWrite(data, gb_field, usedTargetType, acceptLossyConversion); |
---|
279 | } |
---|
280 | } |
---|
281 | } |
---|
282 | return error; |
---|
283 | } |
---|
284 | |
---|
285 | GB_ERROR Rule::transferBy(GBDATA *gb_source, GBDATA *gb_dest) const { |
---|
286 | /*! apply one rule (as part of transfer). */ |
---|
287 | |
---|
288 | // @@@ detect target field type. has to be done before starting transfer (do only once for each rule!) |
---|
289 | // @@@ pass target field type to reader (to select best read method)! |
---|
290 | |
---|
291 | GB_ERROR error = NULp; |
---|
292 | TransportedData tdata = readFrom(gb_source, gb_dest); |
---|
293 | if (tdata.failed()) { |
---|
294 | error = tdata.getError(); |
---|
295 | } |
---|
296 | else if (tdata.exists()) { |
---|
297 | error = writeTo(tdata, gb_dest, precisionLossPermitted()); |
---|
298 | } |
---|
299 | // else source missing -> do nothing. |
---|
300 | // Note: if target field exists and source field is missing -> target field remains intact. |
---|
301 | |
---|
302 | xf_assert(!GB_have_error()); // invalid to export an error (should get returned) |
---|
303 | return error; |
---|
304 | } |
---|
305 | |
---|
306 | GB_ERROR RuleSet::transferBy(GBDATA *gb_source, GBDATA *gb_dest) const { |
---|
307 | /*! transfer field data by applying all rules. */ |
---|
308 | |
---|
309 | GB_ERROR error = NULp; |
---|
310 | size_t r; |
---|
311 | for (r = 0; r<size() && !error; ++r) { |
---|
312 | const Rule& rule = get(r); |
---|
313 | error = rule.transferBy(gb_source, gb_dest); |
---|
314 | } |
---|
315 | if (error) { |
---|
316 | error = GBS_global_string("%s (in rule #%zu)", error, r); |
---|
317 | } |
---|
318 | |
---|
319 | xf_assert(!GB_have_error()); // invalid to export an error (should get returned) |
---|
320 | return error; |
---|
321 | } |
---|
322 | |
---|
323 | GB_ERROR RuleSet::saveTo(const char *filename) const { |
---|
324 | GB_ERROR error = NULp; |
---|
325 | FILE *out = fopen(filename, "wt"); |
---|
326 | if (!out) { |
---|
327 | error = GB_IO_error("saving", filename); |
---|
328 | } |
---|
329 | else { |
---|
330 | // print header: |
---|
331 | fputs("# arb field transfer set; version 1.0\n", out); |
---|
332 | fputc('\n', out); |
---|
333 | |
---|
334 | // print global RuleSet data: |
---|
335 | { |
---|
336 | ConstStrArray clines; |
---|
337 | GBT_split_string(clines, comment.c_str(), '\n'); |
---|
338 | |
---|
339 | for (int c = 0; clines[c]; ++c) { |
---|
340 | fprintf(out, "desc:%s\n", clines[c]); |
---|
341 | } |
---|
342 | fputc('\n', out); |
---|
343 | } |
---|
344 | fprintf(out, "transferUndef:%i\n", int(transferUndefFields)); |
---|
345 | |
---|
346 | // print rules: |
---|
347 | for (size_t r = 0; r<size(); ++r) { |
---|
348 | const Rule& rule = get(r); |
---|
349 | string cfg = rule.getConfig(); |
---|
350 | fprintf(out, "rule:%s\n", cfg.c_str()); |
---|
351 | } |
---|
352 | fputc('\n', out); |
---|
353 | |
---|
354 | fclose(out); |
---|
355 | } |
---|
356 | return error; |
---|
357 | } |
---|
358 | |
---|
359 | inline bool isCommentLine(const string& line) { |
---|
360 | size_t leadingSpaces = line.find_first_not_of(" \t"); |
---|
361 | return line[leadingSpaces] == '#'; |
---|
362 | } |
---|
363 | inline bool shallIgnore(const string& line) { |
---|
364 | // decide whether to ignore a line loaded from .fts file. |
---|
365 | return line.empty() || isCommentLine(line); |
---|
366 | } |
---|
367 | |
---|
368 | ErrorOrRuleSetPtr RuleSet::loadFrom(const char *filename) { |
---|
369 | ARB_ERROR error; |
---|
370 | RuleSetPtr ruleset; |
---|
371 | |
---|
372 | FILE *in = fopen(filename, "rt"); |
---|
373 | if (!in) { |
---|
374 | error = GB_IO_error("loading", filename); |
---|
375 | } |
---|
376 | else { |
---|
377 | ruleset = new RuleSet(); |
---|
378 | BufferedFileReader reader(filename, in); |
---|
379 | |
---|
380 | string line; |
---|
381 | while (!error && reader.getLine(line)) { |
---|
382 | if (shallIgnore(line)) continue; |
---|
383 | |
---|
384 | size_t pos = line.find(':'); |
---|
385 | if (pos == string::npos) { |
---|
386 | error = GBS_global_string("expected ':' while parsing line '%s'", line.c_str()); |
---|
387 | } |
---|
388 | else { |
---|
389 | string tag = line.substr(0, pos); |
---|
390 | string content = line.substr(pos+1); |
---|
391 | |
---|
392 | if (tag == "rule") { |
---|
393 | ErrorOrRulePtr rule = Rule::makeFromConfig(content.c_str()); |
---|
394 | if (rule.hasError()) { |
---|
395 | error = GBS_global_string("while reading rule from '%s': %s", |
---|
396 | content.c_str(), |
---|
397 | rule.getError().deliver()); |
---|
398 | } |
---|
399 | else { |
---|
400 | ruleset->add(rule.getValue()); |
---|
401 | } |
---|
402 | } |
---|
403 | else if (tag == "desc") { |
---|
404 | const string& existing = ruleset->getComment(); |
---|
405 | ruleset->setComment(existing.empty() ? content : existing+'\n'+content); |
---|
406 | } |
---|
407 | else if (tag == "transferUndef") { |
---|
408 | ruleset->set_transferUndefFields(bool(atoi(content.c_str()))); |
---|
409 | } |
---|
410 | else { |
---|
411 | error = GBS_global_string("unknown tag '%s' while parsing line '%s'", |
---|
412 | tag.c_str(), |
---|
413 | line.c_str()); |
---|
414 | } |
---|
415 | } |
---|
416 | } |
---|
417 | |
---|
418 | if (error) ruleset.setNull(); |
---|
419 | } |
---|
420 | |
---|
421 | return ErrorOrRuleSetPtr(error, ruleset); |
---|
422 | } |
---|
423 | |
---|
424 | // -------------------------------- |
---|
425 | // configuration of rules |
---|
426 | |
---|
427 | #define SOURCE "source" |
---|
428 | #define ACI "aci" |
---|
429 | #define TARGET "target" |
---|
430 | #define SEP "sep" |
---|
431 | #define TYPE "type" |
---|
432 | #define LOSS "loss" |
---|
433 | |
---|
434 | #define PERMITTED "permitted" |
---|
435 | |
---|
436 | inline const char *type2str(GB_TYPES type) { |
---|
437 | const char *str = NULp; |
---|
438 | switch (type) { |
---|
439 | case GB_STRING: str = "text"; break; |
---|
440 | case GB_INT: str = "int"; break; |
---|
441 | case GB_FLOAT: str = "float"; break; |
---|
442 | case GB_BITS: str = "bits"; break; |
---|
443 | case GB_NONE: str = "auto"; break; |
---|
444 | default: break; |
---|
445 | } |
---|
446 | return str; |
---|
447 | } |
---|
448 | inline GB_TYPES str2type(const char *str) { |
---|
449 | GB_TYPES type = GB_TYPE_MAX; // invalid |
---|
450 | switch (str[0]) { |
---|
451 | case 't': if (strcmp(str, "text") == 0) type = GB_STRING; break; |
---|
452 | case 'i': if (strcmp(str, "int") == 0) type = GB_INT; break; |
---|
453 | case 'f': if (strcmp(str, "float") == 0) type = GB_FLOAT; break; |
---|
454 | case 'b': if (strcmp(str, "bits") == 0) type = GB_BITS; break; |
---|
455 | case 'a': if (strcmp(str, "auto") == 0) type = GB_NONE; break; |
---|
456 | } |
---|
457 | return type; |
---|
458 | } |
---|
459 | |
---|
460 | void ReadRule::saveReadConfig(ConfigMapping& cfgmap) const { |
---|
461 | cfgmap.set_entry(SOURCE, fields); |
---|
462 | if (separator != NOSEP) cfgmap.set_entry(SEP, separator); |
---|
463 | if (!aci.empty()) cfgmap.set_entry(ACI, aci); |
---|
464 | } |
---|
465 | |
---|
466 | void WriteRule::saveWriteConfig(ConfigMapping& cfgmap) const { |
---|
467 | cfgmap.set_entry(TARGET, name); |
---|
468 | if (forcesType()) { |
---|
469 | cfgmap.set_entry(TYPE, type2str(getTargetType())); |
---|
470 | } |
---|
471 | } |
---|
472 | string Rule::getConfig() const { |
---|
473 | ConfigMapping cfgmap; |
---|
474 | |
---|
475 | saveReadConfig(cfgmap); |
---|
476 | saveWriteConfig(cfgmap); |
---|
477 | |
---|
478 | if (precisionLossPermitted()) { |
---|
479 | cfgmap.set_entry(LOSS, PERMITTED); |
---|
480 | } |
---|
481 | |
---|
482 | return cfgmap.config_string(); |
---|
483 | } |
---|
484 | |
---|
485 | ErrorOrRulePtr Rule::makeFromConfig(const char *config) { |
---|
486 | RulePtr rule; |
---|
487 | ConfigMapping cfgmap; |
---|
488 | GB_ERROR error = cfgmap.parseFrom(config); |
---|
489 | |
---|
490 | if (!error) { |
---|
491 | const char *source = cfgmap.get_entry(SOURCE); |
---|
492 | const char *target = cfgmap.get_entry(TARGET); |
---|
493 | const char *sep = cfgmap.get_entry(SEP); |
---|
494 | |
---|
495 | if (!source) error = "missing " SOURCE " entry"; |
---|
496 | if (!target) error = "missing " TARGET " entry"; |
---|
497 | |
---|
498 | if (!sep) sep = NOSEP; // default to 'no separator' |
---|
499 | |
---|
500 | if (!error) { |
---|
501 | const char *aci = cfgmap.get_entry(ACI); |
---|
502 | if (aci) { |
---|
503 | rule = makeAciConverter(source, sep, aci, target); |
---|
504 | } |
---|
505 | else { |
---|
506 | rule = makeSimple(source, sep, target); |
---|
507 | } |
---|
508 | |
---|
509 | const char *typeID = cfgmap.get_entry(TYPE); |
---|
510 | if (typeID) { |
---|
511 | GB_TYPES type = str2type(typeID); |
---|
512 | if (type == GB_TYPE_MAX) { // = unknown type ID |
---|
513 | error = GBS_global_string("invalid type id '%s'", typeID); |
---|
514 | rule.setNull(); |
---|
515 | } |
---|
516 | else { |
---|
517 | xf_assert(GB_TYPE_readable_as_string(type)); |
---|
518 | rule->setTargetType(type); |
---|
519 | } |
---|
520 | } |
---|
521 | |
---|
522 | if (!error) { |
---|
523 | const char *loss = cfgmap.get_entry(LOSS); |
---|
524 | if (loss && strcmp(loss, PERMITTED) == 0) { |
---|
525 | rule->permitPrecisionLoss(); |
---|
526 | } |
---|
527 | } |
---|
528 | } |
---|
529 | } |
---|
530 | |
---|
531 | return ErrorOrRulePtr(error, rule); |
---|
532 | } |
---|
533 | |
---|
534 | |
---|
535 | // ------------------------ |
---|
536 | // describe rules |
---|
537 | string ReadRule::describe() const { |
---|
538 | if (aci.empty()) return fields; |
---|
539 | return fields+"|ACI"; |
---|
540 | } |
---|
541 | string WriteRule::describe() const { |
---|
542 | return name; |
---|
543 | } |
---|
544 | string Rule::getShortDescription() const { |
---|
545 | return ReadRule::describe() + " -> " + WriteRule::describe(); |
---|
546 | } |
---|
547 | |
---|
548 | // ----------------------------- |
---|
549 | // ItemClonedByRuleSet |
---|
550 | string ItemClonedByRuleSet::lastReportedError; |
---|
551 | |
---|
552 | GB_ERROR ItemClonedByRuleSet::overlayOrCloneSub(const char *subName, GBDATA *gb_sub) { |
---|
553 | GBDATA *gb_existing = GB_entry(gb_clone, subName); |
---|
554 | GB_ERROR error; |
---|
555 | if (gb_existing) { // if target entry exists .. |
---|
556 | error = GB_copy_overlay(gb_existing, gb_sub); // .. overwrite its content. |
---|
557 | } |
---|
558 | else { // otherwise .. |
---|
559 | error = GB_incur_error_if(!GB_clone(gb_clone, gb_sub)); // .. clone source entry |
---|
560 | } |
---|
561 | return error; |
---|
562 | } |
---|
563 | |
---|
564 | GB_ERROR ItemClonedByRuleSet::cloneMissingSub(const char *subName, GBDATA *gb_sub) { |
---|
565 | GBDATA *gb_existing = GB_entry(gb_clone, subName); |
---|
566 | GB_ERROR error; |
---|
567 | if (gb_existing) { // if target entry exists .. |
---|
568 | error = NULp; // .. keep it |
---|
569 | } |
---|
570 | else { // otherwise .. |
---|
571 | error = GB_incur_error_if(!GB_clone(gb_clone, gb_sub)); // .. clone source entry |
---|
572 | } |
---|
573 | return error; |
---|
574 | } |
---|
575 | |
---|
576 | GB_ERROR ItemClonedByRuleSet::copySubIfMissing(const char *subName) { |
---|
577 | // copy sub-field (or -container) if it doesn't exist in target |
---|
578 | GB_ERROR error = NULp; |
---|
579 | GBDATA *gb_sub = GB_entry(gb_source, subName); |
---|
580 | if (!gb_sub) { |
---|
581 | error = GBS_global_string("no such entry '%s' (in source)", subName); |
---|
582 | UNCOVERED(); |
---|
583 | } |
---|
584 | else { |
---|
585 | error = cloneMissingSub(subName, gb_sub); // sub = passed field or container |
---|
586 | } |
---|
587 | return error; |
---|
588 | } |
---|
589 | |
---|
590 | GB_ERROR ItemClonedByRuleSet::copyAlignments() { |
---|
591 | GB_ERROR error = NULp; |
---|
592 | for (GBDATA *gb_ali = GB_child(gb_source); gb_ali; gb_ali = GB_nextChild(gb_ali)) { |
---|
593 | if (GB_is_container(gb_ali)) { |
---|
594 | const char *aliname = GB_read_key_pntr(gb_ali); |
---|
595 | if (ARB_strBeginsWith(aliname, "ali_")) { |
---|
596 | GBDATA *gb_data = GB_entry(gb_ali, "data"); |
---|
597 | if (gb_data) { |
---|
598 | bool dataIsSTRING = GB_read_type(gb_data) == GB_STRING; |
---|
599 | xf_assert(dataIsSTRING); |
---|
600 | if (dataIsSTRING) { |
---|
601 | error = overlayOrCloneSub(aliname, gb_ali); // sub = whole alignment container |
---|
602 | } |
---|
603 | } |
---|
604 | else { |
---|
605 | error = GB_incur_error(); |
---|
606 | UNCOVERED(); |
---|
607 | } |
---|
608 | } |
---|
609 | } |
---|
610 | } |
---|
611 | return error; |
---|
612 | } |
---|
613 | |
---|
614 | const char *ItemClonedByRuleSet::get_id_field() const { |
---|
615 | const char *field = NULp; |
---|
616 | switch (itemtype) { |
---|
617 | case CLONE_ITEM_SPECIES: field = "name"; break; |
---|
618 | default: xf_assert(0); break; |
---|
619 | } |
---|
620 | return field; |
---|
621 | } |
---|
622 | |
---|
623 | ItemClonedByRuleSet::ItemClonedByRuleSet(GBDATA*& gb_item, ClonableItemType itemtype_, RuleSetPtr ruleset, ItemCloneType type_, GBDATA *gb_refItem, const AlignmentTransporter *aliTransporter) : |
---|
624 | itemtype(itemtype_), |
---|
625 | gb_source(gb_item), |
---|
626 | type(type_) |
---|
627 | { |
---|
628 | /*! clone or update item using ruleset. |
---|
629 | * |
---|
630 | * @param gb_item the source item (will be set to NULp if type_ is REPLACE_ITEM_BY_CLONE). |
---|
631 | * @param itemtype_ currently always CLONE_ITEM_SPECIES. |
---|
632 | * @param ruleset ruleset used to transfer fields from source item to cloned item |
---|
633 | * @param type_ type of clone (see ItemCloneType for details). |
---|
634 | * @param gb_refItem CLONE_INTO_EXISTING: target species, REAL_CLONE: target item container, otherwise: NULp |
---|
635 | * @param aliTransporter allows to overide how alignment gets copied (default: copy all alignment sub-containers) |
---|
636 | */ |
---|
637 | |
---|
638 | // @@@ method is far too long -> split |
---|
639 | |
---|
640 | GB_ERROR error = NULp; |
---|
641 | GB_transaction ta(gb_source); |
---|
642 | |
---|
643 | #if defined(ASSERTION_USED) |
---|
644 | checked4error = false; |
---|
645 | userCallbackUsed = false; |
---|
646 | #endif |
---|
647 | |
---|
648 | if (type == CLONE_INTO_EXISTING) { |
---|
649 | if (gb_refItem) { |
---|
650 | gb_clone = gb_refItem; // use passed clone as target |
---|
651 | } |
---|
652 | else { |
---|
653 | error = "no target species specified (logic error)"; |
---|
654 | UNCOVERED(); |
---|
655 | } |
---|
656 | } |
---|
657 | else { |
---|
658 | GBDATA *gb_item_container; |
---|
659 | { |
---|
660 | GBDATA *gb_src_item_container = GB_get_father(gb_source); |
---|
661 | if (type == REAL_CLONE) { |
---|
662 | gb_item_container = gb_refItem; |
---|
663 | if (!gb_item_container) { |
---|
664 | error = "no target item container specified (logic error)"; |
---|
665 | } |
---|
666 | else if (gb_item_container == gb_src_item_container) { |
---|
667 | error = "source and target item containers need to differ (logic error)"; |
---|
668 | } |
---|
669 | } |
---|
670 | else { |
---|
671 | xf_assert(!gb_refItem); // passed value is ignored (please pass NULp) |
---|
672 | gb_item_container = gb_src_item_container; |
---|
673 | } |
---|
674 | } |
---|
675 | |
---|
676 | if (!error) { |
---|
677 | xf_assert(itemtype_ == CLONE_ITEM_SPECIES); // next command only works for species |
---|
678 | gb_clone = GB_create_container(gb_item_container, "species"); // create separate species |
---|
679 | if (!gb_clone) { |
---|
680 | error = GB_await_error(); |
---|
681 | UNCOVERED(); |
---|
682 | } |
---|
683 | } |
---|
684 | } |
---|
685 | |
---|
686 | if (!error) { |
---|
687 | // apply ruleset: |
---|
688 | error = ruleset->transferBy(gb_source, gb_clone); |
---|
689 | |
---|
690 | // perform some standard transfers: |
---|
691 | const char *IDFIELD = get_id_field(); |
---|
692 | if (!error) error = copySubIfMissing(IDFIELD); // transfer IDFIELD for any itemtype |
---|
693 | |
---|
694 | switch (itemtype) { |
---|
695 | case CLONE_ITEM_SPECIES: |
---|
696 | if (!error) error = copySubIfMissing("acc"); |
---|
697 | if (!error) { |
---|
698 | if (aliTransporter) { // use user callback if given |
---|
699 | if (aliTransporter->shallCopyBefore()) { |
---|
700 | error = copyAlignments(); |
---|
701 | } |
---|
702 | if (!error) { |
---|
703 | error = aliTransporter->transport(gb_source, gb_clone); // e.g. used to adapt alignment in mergetool |
---|
704 | } |
---|
705 | #if defined(ASSERTION_USED) |
---|
706 | userCallbackUsed = true; |
---|
707 | #endif |
---|
708 | } |
---|
709 | else { |
---|
710 | error = copyAlignments(); |
---|
711 | } |
---|
712 | } |
---|
713 | break; |
---|
714 | default: xf_assert(0); break; |
---|
715 | } |
---|
716 | |
---|
717 | if (!error && ruleset->shallTransferUndefFields()) { |
---|
718 | |
---|
719 | StrSet defined; |
---|
720 | // extract used fields: |
---|
721 | { |
---|
722 | StrArray in, out; |
---|
723 | ruleset->extractUsedFields(in, out); |
---|
724 | // @@@ do extraction only once (not for each item transfer) |
---|
725 | |
---|
726 | StrArray2StrSet(in, defined); |
---|
727 | StrArray2StrSet(out, defined); |
---|
728 | } |
---|
729 | { |
---|
730 | // exclude parent containers: |
---|
731 | StrSet parents; |
---|
732 | for (StrSet::const_iterator field = defined.begin(); field != defined.end(); ++field) { |
---|
733 | size_t slashpos = field->find_first_of('/'); |
---|
734 | if (slashpos != string::npos) { // fieldname contains a slash |
---|
735 | string parentname = field->substr(0, slashpos); // name of top-level parent container inside species |
---|
736 | parents.insert(parentname); |
---|
737 | } |
---|
738 | } |
---|
739 | defined.insert(parents.begin(), parents.end()); |
---|
740 | } |
---|
741 | |
---|
742 | // transfer rest of fields (i.e. those neighter used by ruleset nor as standard field): |
---|
743 | for (GBDATA *gb_field = GB_child(gb_source); gb_field && !error; gb_field = GB_nextChild(gb_field)) { |
---|
744 | const char *key = GB_read_key_pntr(gb_field); |
---|
745 | bool keyUsed = defined.find(key) != defined.end(); // key was read or written by ruleset |
---|
746 | |
---|
747 | if (!keyUsed) { |
---|
748 | error = copySubIfMissing(key); |
---|
749 | } |
---|
750 | } |
---|
751 | } |
---|
752 | |
---|
753 | // @@@ do we need to preserve security etc of cloned species? (security of sub-fields is preserved; e.g. see r17967) |
---|
754 | |
---|
755 | if (!error) { |
---|
756 | xf_assert(correlated(aliTransporter, userCallbackUsed)); // custom transporter was not used (logic error?) |
---|
757 | |
---|
758 | switch (type) { |
---|
759 | case REPLACE_ITEM_BY_CLONE: |
---|
760 | error = GB_delete(gb_source); // will be replaced by clone |
---|
761 | if (!error) { |
---|
762 | gb_item = NULp; |
---|
763 | gb_source = NULp; |
---|
764 | } |
---|
765 | break; |
---|
766 | |
---|
767 | case RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS: { |
---|
768 | GBDATA *gb_id = GB_entry(gb_source, IDFIELD); |
---|
769 | if (!gb_id) { |
---|
770 | error = GBS_global_string("expected field '%s' not found", IDFIELD); |
---|
771 | } |
---|
772 | else { |
---|
773 | const char *name = GB_read_char_pntr(gb_id); |
---|
774 | xf_assert(name); |
---|
775 | orgName = name; // store name (written back in dtor) |
---|
776 | |
---|
777 | error = GB_write_string(gb_id, "fake"); // change name // @@@ use different name |
---|
778 | } |
---|
779 | break; |
---|
780 | } |
---|
781 | case CLONE_INTO_EXISTING: |
---|
782 | case REAL_CLONE: |
---|
783 | // nothing to do here |
---|
784 | break; |
---|
785 | } |
---|
786 | } |
---|
787 | } |
---|
788 | |
---|
789 | error = ta.close(error); |
---|
790 | if (error) { |
---|
791 | errorCopy = error; // store copy of error in string |
---|
792 | gb_clone = NULp; |
---|
793 | } |
---|
794 | } |
---|
795 | |
---|
796 | ItemClonedByRuleSet::~ItemClonedByRuleSet() { |
---|
797 | if (!has_error()) { // if error occurred during construction -> TA was aborted -> nothing to undo |
---|
798 | if (type == RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS) { |
---|
799 | GB_transaction ta(gb_source); |
---|
800 | GB_ERROR error = NULp; |
---|
801 | |
---|
802 | GBDATA *gb_id = GB_entry(gb_source, get_id_field()); |
---|
803 | xf_assert(gb_id); |
---|
804 | |
---|
805 | if (gb_id) { |
---|
806 | xf_assert(!orgName.empty()); |
---|
807 | error = GB_write_string(gb_id, orgName.c_str()); |
---|
808 | if (error) { |
---|
809 | fprintf(stderr, "Failed to rename original item after temp. clone (Reason: %s)", error); |
---|
810 | xf_assert(0); // should not happen |
---|
811 | } |
---|
812 | } |
---|
813 | |
---|
814 | // delete temp clone: |
---|
815 | if (!error) { |
---|
816 | error = GB_delete(gb_clone); |
---|
817 | if (error) { |
---|
818 | fprintf(stderr, "Failed to delete temp. clone (Reason: %s)", error); |
---|
819 | xf_assert(0); // should not happen |
---|
820 | } |
---|
821 | } |
---|
822 | } |
---|
823 | } |
---|
824 | } |
---|
825 | |
---|
826 | }; |
---|
827 | |
---|
828 | // -------------------------------------------------------------------------------- |
---|
829 | |
---|
830 | #ifdef UNIT_TESTS |
---|
831 | |
---|
832 | #include <arb_diff.h> |
---|
833 | #include <arb_file.h> |
---|
834 | #include <arb_defs.h> |
---|
835 | |
---|
836 | #ifndef TEST_UNIT_H |
---|
837 | #include <test_unit.h> |
---|
838 | #endif |
---|
839 | |
---|
840 | void TEST_type2id() { |
---|
841 | using namespace FieldTransfer; |
---|
842 | |
---|
843 | for (GB_TYPES t = GB_NONE; t<=GB_TYPE_MAX; t = GB_TYPES(t+1)) { |
---|
844 | const char *id = type2str(t); |
---|
845 | if (id) { |
---|
846 | TEST_ANNOTATE(id); |
---|
847 | TEST_EXPECT_EQUAL(str2type(id), t); |
---|
848 | } |
---|
849 | } |
---|
850 | } |
---|
851 | void TEST_transportedData() { |
---|
852 | using namespace FieldTransfer; |
---|
853 | |
---|
854 | GB_ERROR error; |
---|
855 | { |
---|
856 | TransportedData noData = TransportedData::none(); |
---|
857 | TransportedData errorData = TransportedData::makeError("the error msg"); |
---|
858 | |
---|
859 | TEST_REJECT(noData.failed()); |
---|
860 | TEST_REJECT(noData.exists()); |
---|
861 | |
---|
862 | TEST_EXPECT(errorData.failed()); |
---|
863 | error = errorData.getError(); |
---|
864 | TEST_EXPECT_EQUAL(error, "the error msg"); |
---|
865 | } |
---|
866 | TEST_EXPECT_EQUAL(error, "the error msg"); // error has to survive destruction of TransportedData |
---|
867 | |
---|
868 | TransportedData greet("hello"); |
---|
869 | TransportedData num(4711); |
---|
870 | TransportedData fnum(0.815f); |
---|
871 | |
---|
872 | TEST_REJECT(greet.failed()); |
---|
873 | TEST_EXPECT(greet.exists()); |
---|
874 | TEST_EXPECT_EQUAL(greet.getString(), "hello"); |
---|
875 | |
---|
876 | TEST_REJECT(num.failed()); |
---|
877 | TEST_EXPECT(num.exists()); |
---|
878 | TEST_EXPECT_EQUAL(num.getInt(), 4711); |
---|
879 | |
---|
880 | TEST_REJECT(fnum.failed()); |
---|
881 | TEST_EXPECT(fnum.exists()); |
---|
882 | TEST_EXPECT_SIMILAR(fnum.getFloat(), 0.815, 0.000001); |
---|
883 | } |
---|
884 | |
---|
885 | void TEST_xferset() { |
---|
886 | FieldTransfer::RuleSet fts; |
---|
887 | |
---|
888 | TEST_EXPECT_ZERO(fts.size()); |
---|
889 | TEST_EXPECT(fts.empty()); |
---|
890 | TEST_EXPECT_EQUAL(fts.size(), 0); |
---|
891 | |
---|
892 | // -------------------- |
---|
893 | // add rules: |
---|
894 | fts.add(new FieldTransfer::Rule(FieldTransfer::ReadRule("location", NOSEP), // add a simple rule (one source, no ACI) |
---|
895 | FieldTransfer::WriteRule("geolocation"))); |
---|
896 | TEST_EXPECT(!fts.empty()); |
---|
897 | TEST_EXPECT_EQUAL(fts.size(), 1); |
---|
898 | |
---|
899 | fts.add(FieldTransfer::Rule::permitPrecisionLoss(new FieldTransfer::Rule(FieldTransfer::ReadRule("isolation", NOSEP, "upper"), // add an ACI rule (one source) |
---|
900 | FieldTransfer::WriteRule("isolation_source", GB_INT)))); // force int type |
---|
901 | TEST_EXPECT_EQUAL(fts.size(), 2); |
---|
902 | |
---|
903 | // @@@ add multisource rules (with and w/o ACI)! |
---|
904 | |
---|
905 | // --------------------- |
---|
906 | // query rules |
---|
907 | for (size_t r = 0; r<fts.size(); ++r) { |
---|
908 | TEST_ANNOTATE(GBS_global_string("r=%zu", r)); |
---|
909 | |
---|
910 | const FieldTransfer::Rule& rule = fts.get(r); |
---|
911 | switch (r) { |
---|
912 | // @@@ add tests for source field(s) |
---|
913 | |
---|
914 | case 0: // simple rule |
---|
915 | |
---|
916 | TEST_EXPECT_EQUAL(rule.targetField(), "geolocation"); |
---|
917 | TEST_REJECT(rule.forcesType()); |
---|
918 | TEST_REJECT(rule.precisionLossPermitted()); |
---|
919 | break; |
---|
920 | |
---|
921 | case 1: // basic ACI rule |
---|
922 | |
---|
923 | TEST_EXPECT_EQUAL(rule.targetField(), "isolation_source"); |
---|
924 | TEST_EXPECT(rule.forcesType()); // type is forced .. |
---|
925 | TEST_EXPECT_EQUAL(rule.getTargetType(), GB_INT); // .. to int |
---|
926 | TEST_EXPECT(rule.precisionLossPermitted()); |
---|
927 | break; |
---|
928 | |
---|
929 | default: |
---|
930 | xf_assert(0); // untested rule |
---|
931 | break; |
---|
932 | } |
---|
933 | } |
---|
934 | TEST_ANNOTATE(NULp); |
---|
935 | TEST_EXPECT_EQUAL(fts.size(), 2); |
---|
936 | |
---|
937 | // ------------------------------------------- |
---|
938 | // test rule replacement and removal |
---|
939 | { |
---|
940 | // order=01 |
---|
941 | string cfg0 = fts.get(0).getConfig(); |
---|
942 | string cfg1 = fts.get(1).getConfig(); |
---|
943 | |
---|
944 | TEST_EXPECT_DIFFERENT(cfg0, cfg1); // otherwise test below does not test replacement |
---|
945 | |
---|
946 | { |
---|
947 | // swap 2 rules |
---|
948 | FieldTransfer::RulePtr tmp = fts.getPtr(0); |
---|
949 | fts.replace(0, fts.getPtr(1)); |
---|
950 | fts.replace(1, tmp); |
---|
951 | // order=10 |
---|
952 | } |
---|
953 | |
---|
954 | string newcfg0 = fts.get(0).getConfig(); |
---|
955 | string newcfg1 = fts.get(1).getConfig(); |
---|
956 | |
---|
957 | TEST_EXPECT_EQUAL(newcfg0, cfg1); |
---|
958 | TEST_EXPECT_EQUAL(newcfg1, cfg0); |
---|
959 | |
---|
960 | { |
---|
961 | int insertedAt; |
---|
962 | |
---|
963 | insertedAt = fts.insertBefore(0, fts.getPtr(1)); // insert before first -> order = 010 |
---|
964 | TEST_EXPECT_EQUAL(fts.size(), 3); |
---|
965 | TEST_EXPECT_EQUAL(insertedAt, 0); |
---|
966 | TEST_EXPECT_EQUAL(fts.get(insertedAt).getConfig(), cfg0); |
---|
967 | |
---|
968 | insertedAt = fts.insertBefore(2, fts.getPtr(1)); // insert before last -> order = 0110 |
---|
969 | TEST_EXPECT_EQUAL(fts.size(), 4); |
---|
970 | TEST_EXPECT_EQUAL(insertedAt, 2); |
---|
971 | TEST_EXPECT_EQUAL(fts.get(insertedAt).getConfig(), cfg1); |
---|
972 | |
---|
973 | insertedAt = fts.insertBefore(7, fts.getPtr(1)); // insert before invalid position = append -> order = 01101 |
---|
974 | TEST_EXPECT_EQUAL(fts.size(), 5); |
---|
975 | TEST_EXPECT_EQUAL(insertedAt, 4); |
---|
976 | TEST_EXPECT_EQUAL(fts.get(insertedAt).getConfig(), cfg1); |
---|
977 | |
---|
978 | // "undo" inserts |
---|
979 | fts.erase(1); // -> order = 0101 |
---|
980 | fts.erase(3); // erase at end -> order = 010 |
---|
981 | fts.erase(0); // -> order = 10 |
---|
982 | } |
---|
983 | |
---|
984 | fts.erase(0); // erase 1st rule -> order = 0 |
---|
985 | TEST_EXPECT_EQUAL(fts.size(), 1); |
---|
986 | string finalcfg = fts.get(0).getConfig(); |
---|
987 | TEST_EXPECT_EQUAL(finalcfg, cfg0); |
---|
988 | } |
---|
989 | } |
---|
990 | |
---|
991 | class FailingRule: public FieldTransfer::Rule { |
---|
992 | string partOfFailReason; |
---|
993 | public: |
---|
994 | FailingRule(const Rule& failing, string part) : Rule(failing), partOfFailReason(part) {} |
---|
995 | FailingRule(FieldTransfer::RulePtr failing, string part) : Rule(*failing), partOfFailReason(part) {} |
---|
996 | const char *expectedPartOfFailure() const { return partOfFailReason.c_str(); } |
---|
997 | }; |
---|
998 | |
---|
999 | |
---|
1000 | struct XferEnv : virtual Noncopyable { // provides test environment for transfer tests |
---|
1001 | GB_shell shell; |
---|
1002 | |
---|
1003 | const char *target_ascii; |
---|
1004 | |
---|
1005 | GBDATA *gb_src; |
---|
1006 | GBDATA *gb_dest; |
---|
1007 | |
---|
1008 | XferEnv() : |
---|
1009 | target_ascii("TEST_fields_xferred.arb") |
---|
1010 | { |
---|
1011 | gb_src = GB_open("TEST_fields_ascii.arb", "r"); // ../../UNIT_TESTER/run/TEST_fields_ascii.arb |
---|
1012 | gb_dest = GB_open(target_ascii, "wc"); |
---|
1013 | } |
---|
1014 | ~XferEnv() { |
---|
1015 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(target_ascii)); |
---|
1016 | |
---|
1017 | GB_close(gb_dest); |
---|
1018 | GB_close(gb_src); |
---|
1019 | } |
---|
1020 | |
---|
1021 | void transferAllSpeciesBy(const FieldTransfer::RuleSet& ruleset) { // transfer all species according to Ruleset |
---|
1022 | // @@@ transferAllSpeciesBy is quite similar to what has to happen in merge-tool |
---|
1023 | GB_transaction tas(gb_src); |
---|
1024 | GB_transaction tad(gb_dest); |
---|
1025 | |
---|
1026 | for (GBDATA *gb_src_species = GBT_first_species(gb_src); |
---|
1027 | gb_src_species; |
---|
1028 | gb_src_species = GBT_next_species(gb_src_species)) |
---|
1029 | { |
---|
1030 | const char *name = GBT_get_name(gb_src_species); |
---|
1031 | TEST_REJECT_NULL(name); |
---|
1032 | |
---|
1033 | GBDATA *gb_dest_species = GBT_find_or_create_species(gb_dest, name, false); |
---|
1034 | TEST_REJECT_NULL(gb_dest_species); |
---|
1035 | |
---|
1036 | // @@@ transferAllSpeciesBy could allow overwrites (keep existing fields; allow overwrite of fields): |
---|
1037 | // -> try to use ItemClonedByRuleSet with CLONE_INTO_EXISTING instead of direct call to transferBy below! |
---|
1038 | |
---|
1039 | TEST_EXPECT_ZERO(GB_read_flag(gb_dest_species)); // (was previously done by GBT_find_or_create_species) |
---|
1040 | TEST_EXPECT_NO_ERROR(ruleset.transferBy(gb_src_species, gb_dest_species)); |
---|
1041 | } |
---|
1042 | } |
---|
1043 | |
---|
1044 | // ---------------------------------------------------------------------------- |
---|
1045 | |
---|
1046 | void copyAllSpecies() { |
---|
1047 | GB_transaction tas(gb_src); |
---|
1048 | GB_transaction tad(gb_dest); |
---|
1049 | |
---|
1050 | GBDATA *gb_dest_species_data = GBT_get_species_data(gb_dest); |
---|
1051 | TEST_REJECT_NULL(gb_dest_species_data); |
---|
1052 | |
---|
1053 | for (GBDATA *gb_src_species = GBT_first_species(gb_src); |
---|
1054 | gb_src_species; |
---|
1055 | gb_src_species = GBT_next_species(gb_src_species)) |
---|
1056 | { |
---|
1057 | const char *name = GBT_get_name(gb_src_species); |
---|
1058 | TEST_REJECT_NULL(name); |
---|
1059 | |
---|
1060 | GBDATA *gb_dest_exists = GBT_find_species(gb_dest, name); |
---|
1061 | TEST_EXPECT_NULL(gb_dest_exists); // this method cannot handle overwrites |
---|
1062 | |
---|
1063 | GBDATA *gb_dest_species = GB_create_container(gb_dest_species_data, "species"); |
---|
1064 | TEST_REJECT_NULL(gb_dest_species); |
---|
1065 | |
---|
1066 | TEST_EXPECT_NO_ERROR(GB_copy_dropProtectMarksAndTempstate(gb_dest_species, gb_src_species)); |
---|
1067 | TEST_EXPECT_ZERO(GB_read_flag(gb_dest_species)); |
---|
1068 | } |
---|
1069 | } |
---|
1070 | |
---|
1071 | // ---------------------------------------------------------------------------- |
---|
1072 | // write transferred data to ascii db + compare with expected result: |
---|
1073 | void save() { |
---|
1074 | TEST_EXPECT_NO_ERROR(GB_save_as(gb_dest, target_ascii, "a")); |
---|
1075 | } |
---|
1076 | void saveAndCompare(const char *expected_ascii, bool allowAutoUpdate) { |
---|
1077 | save(); |
---|
1078 | if (allowAutoUpdate) { |
---|
1079 | // #define TEST_AUTO_UPDATE // uncomment to update expected result |
---|
1080 | #if defined(TEST_AUTO_UPDATE) |
---|
1081 | TEST_COPY_FILE(target_ascii, expected_ascii); |
---|
1082 | #endif |
---|
1083 | } |
---|
1084 | TEST_EXPECT_TEXTFILE_DIFFLINES(target_ascii, expected_ascii, 0); |
---|
1085 | } |
---|
1086 | }; |
---|
1087 | |
---|
1088 | static const char *expRuleConfig[] = { |
---|
1089 | "source='lat_lon';target='geolocation'", |
---|
1090 | "source='seq_quality_slv';target='seq/slv_quality'", |
---|
1091 | "source='homop_slv';target='slv_homop'", |
---|
1092 | |
---|
1093 | "source='no1';target='notTransferred'", |
---|
1094 | |
---|
1095 | "source='pubmed_id';target='str2int';type='int'", |
---|
1096 | "source='pubmed_id';target='str2flt';type='float'", |
---|
1097 | "source='stop';target='int2flt';type='float'", |
---|
1098 | "source='stop';target='int2str';type='text'", |
---|
1099 | "source='align_ident_slv';target='flt2str';type='text'", |
---|
1100 | "loss='permitted';source='align_ident_slv';target='flt2int';type='int'", |
---|
1101 | |
---|
1102 | "aci='|lower|contains(partial)|isAbove(0)';source='description';target='describedAsPartial'", |
---|
1103 | |
---|
1104 | "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscoreStr'", |
---|
1105 | "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscore';type='int'", |
---|
1106 | "aci='|fdiv(2.0)';source='align_bp_score_slv';target='halfBPscoreFlt';type='float'", |
---|
1107 | |
---|
1108 | "aci='|fmult(3.5)';source='homop_slv';target='multiHomopStr'", |
---|
1109 | "aci='|fmult(3.5)';source='homop_slv';target='multiHomopInt';type='int'", |
---|
1110 | "aci='|fmult(3.5)';source='homop_slv';target='multiHomop';type='float'", |
---|
1111 | |
---|
1112 | "sep='/';source='embl_class;embl_division';target='embl_class_division'", |
---|
1113 | "sep='-';source='align_startpos_slv;align_stoppos_slv';target='align_range_slv'", |
---|
1114 | "sep='\\'';source='no1;align_bp_score_slv;no2;rel_ltp;no3';target='missing'", |
---|
1115 | |
---|
1116 | "sep=';';source='NO1;no2;no3';target='skipped'", |
---|
1117 | |
---|
1118 | "aci='|upper';sep=':';source='embl_class;embl_division';target='emblClassDivision'", |
---|
1119 | "aci='|\"<\";dd;\">\"';sep=';';source='no1;no2;no3';target='skipped2'", |
---|
1120 | }; |
---|
1121 | |
---|
1122 | static const char *EXPECTED_ASCII = "TEST_fields_xferred_expected.arb"; // ../../UNIT_TESTER/run/TEST_fields_xferred_expected.arb |
---|
1123 | static const char *EXPECTED_ASCII_CLONED = "TEST_fields_cloned_expected.arb"; // ../../UNIT_TESTER/run/TEST_fields_cloned_expected.arb |
---|
1124 | |
---|
1125 | void TEST_xferBySet() { |
---|
1126 | // tests data transfer between items using RuleSet|s |
---|
1127 | using namespace FieldTransfer; |
---|
1128 | XferEnv env; |
---|
1129 | |
---|
1130 | // -------------------------------------------------------------- |
---|
1131 | // create rules and transfer item data using RuleSet|s: |
---|
1132 | |
---|
1133 | typedef std::vector<FailingRule> FailingRuleCont; |
---|
1134 | |
---|
1135 | RuleSet ruleset; |
---|
1136 | FailingRuleCont failing; // failing rules should go here |
---|
1137 | |
---|
1138 | #define FAILING_add(rule,msg) failing.push_back(FailingRule(rule, msg)) |
---|
1139 | |
---|
1140 | ruleset.add(Rule::makeSimple("lat_lon", NOSEP, "geolocation")); // STRING->STRING |
---|
1141 | ruleset.add(Rule::makeSimple("seq_quality_slv", NOSEP, "seq/slv_quality")); // INT ->INT (generate hierarchical target key) |
---|
1142 | ruleset.add(Rule::makeSimple("homop_slv", NOSEP, "slv_homop")); // FLOAT ->FLOAT |
---|
1143 | |
---|
1144 | ruleset.add(Rule::makeSimple("no1", NOSEP, "notTransferred")); // missing fields are skipped |
---|
1145 | |
---|
1146 | // force target types |
---|
1147 | ruleset.add(Rule::forceTargetType(GB_INT, Rule::makeSimple("pubmed_id", NOSEP, "str2int"))); // STRING->INT |
---|
1148 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeSimple("pubmed_id", NOSEP, "str2flt"))); // STRING->FLOAT |
---|
1149 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeSimple("stop", NOSEP, "int2flt"))); // INT->FLOAT |
---|
1150 | ruleset.add(Rule::forceTargetType(GB_STRING, Rule::makeSimple("stop", NOSEP, "int2str"))); // INT->STRING |
---|
1151 | ruleset.add(Rule::forceTargetType(GB_STRING, Rule::makeSimple("align_ident_slv", NOSEP, "flt2str"))); // FLOAT->STRING |
---|
1152 | FAILING_add(Rule::forceTargetType(GB_INT, Rule::makeSimple("align_ident_slv", NOSEP, "dummy")), "lossy float->int type conversion (9.484605e+01->9.500000e+01)"); // FLOAT->INT |
---|
1153 | ruleset.add(Rule::permitPrecisionLoss(Rule::forceTargetType(GB_INT, Rule::makeSimple("align_ident_slv", NOSEP, "flt2int")))); // FLOAT->INT |
---|
1154 | // @@@ test forcedTargetType(GB_BITS) |
---|
1155 | |
---|
1156 | // @@@ test transfer with existing target keys (and mismatching i.e. not default types) |
---|
1157 | // -> shall force type conversion (e.g. int->float, float->string, string->int) |
---|
1158 | |
---|
1159 | ruleset.add(Rule::makeAciConverter("description", NOSEP, "|lower|contains(partial)|isAbove(0)", "describedAsPartial")); // transports STRING through ACI to STRING |
---|
1160 | |
---|
1161 | // INT | ACI -> STRING|INT|FLOAT: |
---|
1162 | ruleset.add(Rule::makeAciConverter("align_bp_score_slv", NOSEP, "|fdiv(2.0)", "halfBPscoreStr")); // transports INT through ACI to STRING |
---|
1163 | ruleset.add(Rule::forceTargetType(GB_INT, Rule::makeAciConverter("align_bp_score_slv", NOSEP, "|fdiv(2.0)", "halfBPscore"))); // transports INT through ACI to INT // @@@ why does this not complain about conversion loss? (e.g. PurGergo 58.5 -> 58). examine!!! |
---|
1164 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeAciConverter("align_bp_score_slv", NOSEP, "|fdiv(2.0)", "halfBPscoreFlt"))); // transports INT through ACI to FLOAT |
---|
1165 | |
---|
1166 | // FLOAT | ACI -> STRING: |
---|
1167 | ruleset.add(Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5)", "multiHomopStr")); // transports FLOAT through ACI to STRING |
---|
1168 | ruleset.add(Rule::forceTargetType(GB_INT, Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5)", "multiHomopInt"))); // transports FLOAT through ACI to INT // @@@ conversion loss happens (for all species?) // @@@ examine! |
---|
1169 | ruleset.add(Rule::forceTargetType(GB_FLOAT, Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5)", "multiHomop"))); // transports FLOAT through ACI to FLOAT |
---|
1170 | |
---|
1171 | // @@@ test ACIs containing the following chars: ['"\\ = ] |
---|
1172 | |
---|
1173 | // test concatenating rules: |
---|
1174 | ruleset.add(Rule::makeSimple("embl_class;embl_division", "/", "embl_class_division")); // concat 2 STRINGs |
---|
1175 | ruleset.add(Rule::makeSimple("align_startpos_slv;align_stoppos_slv", "-", "align_range_slv")); // concat 2 INTs |
---|
1176 | ruleset.add(Rule::makeSimple("no1;align_bp_score_slv;no2;rel_ltp;no3", "'", "missing")); // concat INT + STRING (plus 3 non-existing fields) |
---|
1177 | |
---|
1178 | ruleset.add(Rule::makeSimple("NO1;no2;no3", ";", "skipped")); // concat 3 non-existing fields -> field 'skipped' is NOT written to result DB |
---|
1179 | |
---|
1180 | // test concatenation + ACI: |
---|
1181 | ruleset.add(Rule::makeAciConverter("embl_class;embl_division", ":", "|upper", "emblClassDivision")); // concat 2 STRINGs |
---|
1182 | ruleset.add(Rule::makeAciConverter("no1;no2;no3", ";", "|\"<\";dd;\">\"", "skipped2")); // concat 3 non-existing fields and apply ACI -> field 'skipped2' is NOT written to result DB + ACI not applied |
---|
1183 | |
---|
1184 | // ---------------------------------------------------------------------------- |
---|
1185 | // please do not change 'ruleset' below this point |
---|
1186 | // ---------------------------------------------------------------------------- |
---|
1187 | |
---|
1188 | // test input/output field extraction |
---|
1189 | { |
---|
1190 | StrArray input; |
---|
1191 | StrArray output; |
---|
1192 | |
---|
1193 | ruleset.extractUsedFields(input, output); |
---|
1194 | |
---|
1195 | TEST_EXPECT_STRARRAY_CONTAINS(input, ';', "align_bp_score_slv;align_ident_slv;align_startpos_slv;align_stoppos_slv;description;embl_class;embl_division;homop_slv;lat_lon;no1;no2;no3;pubmed_id;rel_ltp;seq_quality_slv;stop"); |
---|
1196 | TEST_EXPECT_STRARRAY_CONTAINS(output, ';', "align_range_slv;describedAsPartial;emblClassDivision;embl_class_division;flt2int;flt2str;geolocation;halfBPscore;halfBPscoreFlt;halfBPscoreStr;int2flt;int2str;missing;multiHomop;multiHomopInt;multiHomopStr;notTransferred;seq/slv_quality;skipped;skipped2;slv_homop;str2flt;str2int"); |
---|
1197 | } |
---|
1198 | |
---|
1199 | // convert all rules in 'ruleset' into string and test versus expRuleConfig: |
---|
1200 | const size_t cfgs = ARRAY_ELEMS(expRuleConfig); |
---|
1201 | const size_t rulz = ruleset.size(); |
---|
1202 | { |
---|
1203 | const size_t testableRepr = min(cfgs, rulz); |
---|
1204 | for (size_t r = 0; r<testableRepr; ++r) { |
---|
1205 | TEST_ANNOTATE(GBS_global_string("r=%zu", r)); |
---|
1206 | const Rule& rule = ruleset.get(r); |
---|
1207 | string rep = rule.getConfig(); |
---|
1208 | TEST_EXPECT_EQUAL(expRuleConfig[r], rep.c_str()); |
---|
1209 | } |
---|
1210 | } |
---|
1211 | |
---|
1212 | TEST_EXPECT_EQUAL(cfgs, rulz); |
---|
1213 | |
---|
1214 | // test no 2 rules have equal config: |
---|
1215 | for (size_t r1 = 0; r1<rulz; ++r1) { |
---|
1216 | for (size_t r2 = r1+1; r2<rulz; ++r2) { |
---|
1217 | TEST_ANNOTATE(GBS_global_string("r1/r2=%zu/%zu", r1, r2)); |
---|
1218 | TEST_EXPECT_DIFFERENT(expRuleConfig[r1], expRuleConfig[r2]); |
---|
1219 | } |
---|
1220 | } |
---|
1221 | TEST_ANNOTATE(NULp); |
---|
1222 | |
---|
1223 | env.transferAllSpeciesBy(ruleset); |
---|
1224 | |
---|
1225 | // ------------------------------------------- |
---|
1226 | // test missing source-/target-item: |
---|
1227 | { |
---|
1228 | GB_transaction tas(env.gb_src); |
---|
1229 | GB_transaction tad(env.gb_dest); |
---|
1230 | |
---|
1231 | GBDATA *gb_src_species = GBT_first_species(env.gb_src); |
---|
1232 | TEST_REJECT_NULL(gb_src_species); |
---|
1233 | |
---|
1234 | const char *name = GBT_get_name(gb_src_species); |
---|
1235 | TEST_REJECT_NULL(name); |
---|
1236 | |
---|
1237 | GBDATA *gb_dest_species = GBT_find_species(env.gb_dest, name); // already has been created by 'transferAllSpeciesBy' above |
---|
1238 | TEST_REJECT_NULL(gb_dest_species); |
---|
1239 | |
---|
1240 | TEST_EXPECT_ERROR_CONTAINS(ruleset.transferBy(NULp, gb_dest_species), "lacking item to readFrom"); |
---|
1241 | TEST_EXPECT_ERROR_CONTAINS(ruleset.transferBy(gb_src_species, NULp), "lacking item to writeTo"); |
---|
1242 | } |
---|
1243 | |
---|
1244 | // --------------------------------------------- |
---|
1245 | // test rules failing during transfer: |
---|
1246 | FAILING_add(Rule::forceTargetType(GB_FLOAT, Rule::makeSimple("nuc_region", NOSEP, "str2flt")), "cannot convert '1..1494' to float"); // test conversion errors (e.g. non-numeric string -> int or float) |
---|
1247 | FAILING_add(Rule::makeAciConverter("homop_slv", NOSEP, "|fmult(3.5, ooo)", "dummy"), "Unknown command '3.5'"); |
---|
1248 | FAILING_add(Rule::makeSimple("stop", NOSEP, "xx*xx"), "Invalid character '*' in 'xx*xx'"); |
---|
1249 | FAILING_add(Rule::makeSimple("ali_16s", NOSEP, "whatever"), "cannot read as data ('ali_16s' is a container)"); |
---|
1250 | |
---|
1251 | for (FailingRuleCont::const_iterator failRule = failing.begin(); failRule != failing.end(); ++failRule) { |
---|
1252 | const FailingRule& testableRule = *failRule; |
---|
1253 | RuleSet separated; |
---|
1254 | separated.add(new Rule(testableRule)); |
---|
1255 | |
---|
1256 | // apply rule: |
---|
1257 | { |
---|
1258 | GB_transaction tas(env.gb_src); |
---|
1259 | GB_transaction tad(env.gb_dest); |
---|
1260 | |
---|
1261 | GB_ERROR error = NULp; |
---|
1262 | |
---|
1263 | for (GBDATA *gb_src_species = GBT_first_species(env.gb_src); |
---|
1264 | gb_src_species && !error; |
---|
1265 | gb_src_species = GBT_next_species(gb_src_species)) |
---|
1266 | { |
---|
1267 | const char *name = GBT_get_name(gb_src_species); |
---|
1268 | if (!name) { |
---|
1269 | error = "cannot search for unnamed species"; |
---|
1270 | } |
---|
1271 | else { |
---|
1272 | GBDATA *gb_dest_species = GBT_find_species(env.gb_dest, name); // already has been created by 'transferBy' above |
---|
1273 | error = separated.transferBy(gb_src_species, gb_dest_species); |
---|
1274 | } |
---|
1275 | } |
---|
1276 | tad.close(error); // aborts transaction (if error occurs, which is expected here) |
---|
1277 | TEST_EXPECT_ERROR_CONTAINS(error, testableRule.expectedPartOfFailure()); |
---|
1278 | } |
---|
1279 | } |
---|
1280 | |
---|
1281 | // ---------------------------------------------------------------- |
---|
1282 | // test type of each field is same across all items of DB |
---|
1283 | { |
---|
1284 | GB_transaction tad(env.gb_dest); |
---|
1285 | GBDATA *gb_fake_species_data = GB_create_container(env.gb_dest, "tmp"); // necessary because GBT_scan_db never scans DIRECT childs |
---|
1286 | |
---|
1287 | typedef map<string,GB_TYPES> TypedField; |
---|
1288 | TypedField seen; |
---|
1289 | |
---|
1290 | GB_ERROR error = NULp; |
---|
1291 | for (GBDATA *gb_dest_species = GBT_first_species(env.gb_dest); |
---|
1292 | gb_dest_species && !error; |
---|
1293 | gb_dest_species = GBT_next_species(gb_dest_species)) |
---|
1294 | { |
---|
1295 | TEST_ANNOTATE(GBS_global_string("name=%s", GBT_get_name_or_description(gb_dest_species))); |
---|
1296 | |
---|
1297 | GBDATA *gb_specCopy = GB_create_container(gb_fake_species_data, "tmp"); |
---|
1298 | error = GB_copy_dropProtectMarksAndTempstate(gb_specCopy, gb_dest_species); |
---|
1299 | |
---|
1300 | if (error) break; |
---|
1301 | |
---|
1302 | StrArray curr; |
---|
1303 | GBT_scan_db(curr, gb_fake_species_data, NULp); |
---|
1304 | TEST_REJECT_ZERO(curr.size()); // expect fields |
---|
1305 | |
---|
1306 | for (int i = 0; curr[i]; ++i) { |
---|
1307 | const char *scanned = curr[i]; // 1st char is type |
---|
1308 | const char *field = scanned+1; |
---|
1309 | GB_TYPES type = GB_TYPES(scanned[0]); |
---|
1310 | |
---|
1311 | TypedField::iterator found = seen.find(field); |
---|
1312 | if (found != seen.end()) { |
---|
1313 | if (type != found->second) { |
---|
1314 | TEST_ANNOTATE(field); |
---|
1315 | TEST_EXPECT_EQUAL(type, found->second); // existing field has to have same type (in all species) |
---|
1316 | } |
---|
1317 | } |
---|
1318 | else { |
---|
1319 | fprintf(stderr, "field='%s' type='%i'\n", field, type); |
---|
1320 | seen[field] = type; // insert new field |
---|
1321 | } |
---|
1322 | } |
---|
1323 | |
---|
1324 | if (!error) error = GB_delete(gb_specCopy); |
---|
1325 | } |
---|
1326 | if (!error) error = GB_delete(gb_fake_species_data); |
---|
1327 | TEST_EXPECT_NO_ERROR(error); |
---|
1328 | } |
---|
1329 | |
---|
1330 | // ---------------------------------------------------------------------------- |
---|
1331 | xf_assert(rulz == ruleset.size()); // please do not change 'ruleset' after 'rulz' has been set! |
---|
1332 | |
---|
1333 | env.saveAndCompare(EXPECTED_ASCII, true); |
---|
1334 | } |
---|
1335 | |
---|
1336 | void TEST_LATE_ruleConfigsReadable() { |
---|
1337 | // run this test later than TEST_xferBySet |
---|
1338 | |
---|
1339 | using namespace FieldTransfer; |
---|
1340 | |
---|
1341 | { |
---|
1342 | // test failing Rule configs: |
---|
1343 | struct InvalidConfig { |
---|
1344 | const char *config; |
---|
1345 | GB_ERROR failure; |
---|
1346 | }; |
---|
1347 | InvalidConfig invalidCfg[] = { |
---|
1348 | { TARGET "='xxx'", "missing source entry" }, |
---|
1349 | { SOURCE "='xxx'", "missing target entry" }, |
---|
1350 | { "tag='halfquot;", "could not find matching quote" }, |
---|
1351 | { TARGET "='xxx';" SOURCE "='xxx';type='bizarre'", "invalid type id 'bizarre'" }, |
---|
1352 | }; |
---|
1353 | |
---|
1354 | for (size_t i = 0; i<ARRAY_ELEMS(invalidCfg); ++i) { |
---|
1355 | InvalidConfig& CFG = invalidCfg[i]; |
---|
1356 | TEST_ANNOTATE(GBS_global_string("invalidCfg='%s'", CFG.config)); |
---|
1357 | |
---|
1358 | ErrorOrRulePtr result = Rule::makeFromConfig(CFG.config); |
---|
1359 | TEST_EXPECT(result.hasError()); |
---|
1360 | TEST_EXPECT_ERROR_CONTAINS(result.getError(), CFG.failure); |
---|
1361 | } |
---|
1362 | TEST_ANNOTATE(NULp); |
---|
1363 | } |
---|
1364 | |
---|
1365 | const size_t cfgs = ARRAY_ELEMS(expRuleConfig); |
---|
1366 | RuleSet ruleset; |
---|
1367 | |
---|
1368 | // convert config->Rule + Rule->config + compare configs: |
---|
1369 | for (size_t r = 0; r<cfgs; ++r) { |
---|
1370 | const char *config = expRuleConfig[r]; |
---|
1371 | |
---|
1372 | ErrorOrRulePtr result = Rule::makeFromConfig(config); |
---|
1373 | if (result.hasError()) { |
---|
1374 | TEST_EXPECT_NO_ERROR(result.getError()); |
---|
1375 | } |
---|
1376 | else { |
---|
1377 | RulePtr rule = result.getValue(); |
---|
1378 | string reloadedConfig = rule->getConfig(); |
---|
1379 | TEST_EXPECT_EQUAL(reloadedConfig, config); |
---|
1380 | |
---|
1381 | ruleset.add(rule); |
---|
1382 | } |
---|
1383 | } |
---|
1384 | |
---|
1385 | // test RuleSet comment: |
---|
1386 | const char *COMMENT = "A multi-\nline-\ntest-\ncomment."; |
---|
1387 | ruleset.setComment(COMMENT); |
---|
1388 | TEST_EXPECT_EQUAL(ruleset.getComment(), COMMENT); |
---|
1389 | |
---|
1390 | ruleset.set_transferUndefFields(true); |
---|
1391 | TEST_EXPECT(ruleset.shallTransferUndefFields()); |
---|
1392 | |
---|
1393 | // save RuleSet + reload it + compare: |
---|
1394 | RuleSet reloaded_ruleset; |
---|
1395 | { |
---|
1396 | const char *rulesetSaved = "impexp/rulesetCurr.fts"; |
---|
1397 | const char *rulesetExpected = "impexp/ruleset.fts"; |
---|
1398 | |
---|
1399 | TEST_EXPECT_NO_ERROR(ruleset.saveTo(rulesetSaved)); |
---|
1400 | // #define TEST_AUTO_UPDATE_RS // uncomment to update expected result |
---|
1401 | #if defined(TEST_AUTO_UPDATE_RS) |
---|
1402 | TEST_COPY_FILE(rulesetSaved, rulesetExpected); |
---|
1403 | #endif |
---|
1404 | TEST_EXPECT_TEXTFILE_DIFFLINES(rulesetSaved, rulesetExpected, 0); |
---|
1405 | TEST_EXPECT_ZERO_OR_SHOW_ERRNO(GB_unlink(rulesetSaved)); |
---|
1406 | |
---|
1407 | // reload RuleSet: |
---|
1408 | { |
---|
1409 | ErrorOrRuleSetPtr loaded = RuleSet::loadFrom(rulesetExpected); |
---|
1410 | if (loaded.hasError()) TEST_EXPECT_NO_ERROR(loaded.getError()); // if error -> dump+fail |
---|
1411 | |
---|
1412 | const RuleSet& loadedSet = *loaded.getValue(); |
---|
1413 | TEST_EXPECT_EQUAL(loadedSet.size(), ruleset.size()); |
---|
1414 | |
---|
1415 | // compare reloaded rules configs vs. array of expected configs. |
---|
1416 | // tests: |
---|
1417 | // - save+load is correct and complete |
---|
1418 | // - Rule order is stable |
---|
1419 | for (size_t r = 0; r<loadedSet.size(); ++r) { |
---|
1420 | const Rule& rule = loadedSet.get(r); |
---|
1421 | string cfg = rule.getConfig(); |
---|
1422 | TEST_EXPECT_EQUAL(cfg.c_str(), expRuleConfig[r]); |
---|
1423 | } |
---|
1424 | |
---|
1425 | // test comment survives reload: |
---|
1426 | TEST_EXPECT_EQUAL(loadedSet.getComment(), COMMENT); |
---|
1427 | |
---|
1428 | // test transferUndefFields survives save/load: |
---|
1429 | TEST_EXPECT(loadedSet.shallTransferUndefFields()); |
---|
1430 | |
---|
1431 | // use reloaded Ruleset for tests below: |
---|
1432 | reloaded_ruleset = loadedSet; // also tests RuleSet-copy-ctor works. |
---|
1433 | |
---|
1434 | // test comment gets copied: |
---|
1435 | TEST_EXPECT_EQUAL(reloaded_ruleset.getComment(), loadedSet.getComment()); |
---|
1436 | } |
---|
1437 | } |
---|
1438 | |
---|
1439 | // test RuleSet load/save errors: |
---|
1440 | { |
---|
1441 | const char *noSuchFile = "nosuch.fts"; |
---|
1442 | ErrorOrRuleSetPtr loaded = RuleSet::loadFrom(noSuchFile); |
---|
1443 | |
---|
1444 | TEST_EXPECT(loaded.hasError()); |
---|
1445 | TEST_EXPECT_ERROR_CONTAINS(loaded.getError(), "No such file or directory"); |
---|
1446 | |
---|
1447 | const char *unsavable = "noSuchDir/whatever.fts"; |
---|
1448 | TEST_EXPECT_ERROR_CONTAINS(ruleset.saveTo(unsavable), "No such file or directory"); |
---|
1449 | } |
---|
1450 | |
---|
1451 | // load empty file -> empty RuleSet |
---|
1452 | { |
---|
1453 | const char *emptyFile = "general/empty.input"; |
---|
1454 | |
---|
1455 | ErrorOrRuleSetPtr empty = RuleSet::loadFrom(emptyFile); |
---|
1456 | TEST_REJECT(empty.hasError()); |
---|
1457 | |
---|
1458 | const RuleSet& emptySet = *empty.getValue(); |
---|
1459 | TEST_EXPECT_ZERO(emptySet.size()); // test emptySet has no rules |
---|
1460 | TEST_EXPECT_EQUAL(emptySet.getComment(), ""); // test emptySet has no comment |
---|
1461 | } |
---|
1462 | |
---|
1463 | // use 'reloaded_ruleset' to modify same DB (as above in TEST_xferBySet): |
---|
1464 | { |
---|
1465 | XferEnv env; |
---|
1466 | env.transferAllSpeciesBy(reloaded_ruleset); |
---|
1467 | env.saveAndCompare(EXPECTED_ASCII, false); // if this fails -> saving/reloading config looses Rule information |
---|
1468 | } |
---|
1469 | } |
---|
1470 | |
---|
1471 | #define CUSTOM_ALI_TRANSPORT_ERROR "custom ali transport error" |
---|
1472 | |
---|
1473 | struct TestAlignmentTransporter FINAL_TYPE : public FieldTransfer::AlignmentTransporter { |
---|
1474 | int mode; |
---|
1475 | TestAlignmentTransporter(int mode_) : mode(mode_) {} |
---|
1476 | bool shallCopyBefore() const OVERRIDE { |
---|
1477 | return false; // do not call copyAlignments() b4 calling transport() |
---|
1478 | } |
---|
1479 | GB_ERROR transport(GBDATA*gb_src_item, GBDATA *gb_dst_item) const OVERRIDE { |
---|
1480 | GB_ERROR error = NULp; |
---|
1481 | switch (mode) { |
---|
1482 | case 1: // custom error |
---|
1483 | error = CUSTOM_ALI_TRANSPORT_ERROR; |
---|
1484 | break; |
---|
1485 | |
---|
1486 | case 2: // do nothing -> sequence still has old value (or is not copied) |
---|
1487 | break; |
---|
1488 | |
---|
1489 | case 3: { // write reverse sequence data |
---|
1490 | GBDATA *gb_src_data = GBT_find_sequence(gb_src_item, "ali_16s"); |
---|
1491 | GBDATA *gb_dst_data = GBT_find_sequence(gb_dst_item, "ali_16s"); |
---|
1492 | |
---|
1493 | if (!gb_dst_data) { // destination has no 'ali_16s' -> clone whole container |
---|
1494 | GBDATA *gb_src_ali = GB_get_father(gb_src_data); |
---|
1495 | |
---|
1496 | error = GB_incur_error_if(!GB_clone(gb_dst_item, gb_src_ali)); |
---|
1497 | if (!error) { |
---|
1498 | gb_dst_data = GBT_find_sequence(gb_dst_item, "ali_16s"); |
---|
1499 | xf_assert(gb_dst_data); |
---|
1500 | } |
---|
1501 | } |
---|
1502 | |
---|
1503 | if (!error) { |
---|
1504 | const char *seq = GB_read_char_pntr(gb_src_data); |
---|
1505 | char *rev = GBT_reverseNucSequence(seq, strlen(seq)); |
---|
1506 | |
---|
1507 | error = GB_write_string(gb_dst_data, rev); |
---|
1508 | free(rev); |
---|
1509 | } |
---|
1510 | break; |
---|
1511 | } |
---|
1512 | default: xf_assert(0); break; // unsupported mode |
---|
1513 | } |
---|
1514 | return error; |
---|
1515 | } |
---|
1516 | }; |
---|
1517 | |
---|
1518 | void TEST_clone_by_ruleset() { |
---|
1519 | using namespace FieldTransfer; |
---|
1520 | |
---|
1521 | RuleSetPtr ruleset; |
---|
1522 | { |
---|
1523 | const char *rulesetExpected = "impexp/ruleset.fts"; // same ruleset as used in tests above |
---|
1524 | |
---|
1525 | ErrorOrRuleSetPtr loaded = RuleSet::loadFrom(rulesetExpected); |
---|
1526 | if (loaded.hasError()) TEST_EXPECT_NO_ERROR(loaded.getError()); // if RuleSet load error -> dump+fail. see .@loadFrom |
---|
1527 | |
---|
1528 | ruleset = loaded.getValue(); |
---|
1529 | } |
---|
1530 | |
---|
1531 | // use 'ruleset' to modify same DB (but use ItemClonedByRuleSet here) |
---|
1532 | { |
---|
1533 | XferEnv env; |
---|
1534 | env.copyAllSpecies(); // copy species of input DB -> output DB |
---|
1535 | |
---|
1536 | GBDATA *gb_overwritten_species = NULp; |
---|
1537 | char *overwrittenName = NULp; |
---|
1538 | |
---|
1539 | // clone some species (inside output db): |
---|
1540 | { |
---|
1541 | GB_transaction ta(env.gb_dest); |
---|
1542 | |
---|
1543 | GBDATA *gb_next_species = NULp; |
---|
1544 | GBDATA *gb_first_clone = NULp; |
---|
1545 | int count = 0; |
---|
1546 | |
---|
1547 | for (GBDATA *gb_species = GBT_first_species(env.gb_dest); |
---|
1548 | gb_species && gb_species != gb_first_clone; |
---|
1549 | gb_species = gb_next_species, ++count) |
---|
1550 | { |
---|
1551 | gb_next_species = GBT_next_species(gb_species); |
---|
1552 | |
---|
1553 | TEST_EXPECT_EQUAL(GB_countEntries(gb_species, "name"), 1); // safety-belt (had problems with duplicate name-entries) |
---|
1554 | |
---|
1555 | char *orgName = nulldup(GBT_get_name(gb_species)); |
---|
1556 | TEST_REJECT_NULL(orgName); |
---|
1557 | |
---|
1558 | GBDATA *gb_clone = NULp; |
---|
1559 | ItemCloneType cloneHow = (count == 3 || count == 7) ? RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS : REPLACE_ITEM_BY_CLONE; |
---|
1560 | |
---|
1561 | ruleset->set_transferUndefFields(count == 4); // test transfer of undefined fields for species #4 |
---|
1562 | |
---|
1563 | { |
---|
1564 | ItemClonedByRuleSet clone(gb_species, CLONE_ITEM_SPECIES, ruleset, cloneHow, NULp, NULp); |
---|
1565 | |
---|
1566 | if (clone.has_error()) TEST_EXPECT_NO_ERROR(clone.get_error()); |
---|
1567 | gb_clone = clone.get_clone(); |
---|
1568 | if (!gb_first_clone) { |
---|
1569 | xf_assert(cloneHow == REPLACE_ITEM_BY_CLONE); // limit will not work otherwise |
---|
1570 | gb_first_clone = gb_clone; |
---|
1571 | } |
---|
1572 | |
---|
1573 | switch (cloneHow) { |
---|
1574 | case REPLACE_ITEM_BY_CLONE: |
---|
1575 | TEST_EXPECT_NULL(gb_species); |
---|
1576 | break; |
---|
1577 | case RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS: |
---|
1578 | TEST_EXPECT_EQUAL(GB_countEntries(gb_species, "name"), 1); |
---|
1579 | TEST_EXPECT_EQUAL(GBT_get_name(gb_species), "fake"); // @@@ need a temporary name which cannot clash with existing names |
---|
1580 | break; |
---|
1581 | default: |
---|
1582 | xf_assert(0); // not tested here |
---|
1583 | break; |
---|
1584 | } |
---|
1585 | |
---|
1586 | TEST_EXPECT_EQUAL(GB_countEntries(gb_clone, "name"), 1); |
---|
1587 | TEST_EXPECT_EQUAL(GBT_get_name(gb_clone), orgName); |
---|
1588 | } |
---|
1589 | // 'clone' has been destroyed now! |
---|
1590 | |
---|
1591 | if (cloneHow == RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS) { |
---|
1592 | TEST_EXPECT_EQUAL(GBT_get_name(gb_species), orgName); // rename back worked (RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS) |
---|
1593 | } |
---|
1594 | int orgNameCount = 0; |
---|
1595 | for (GBDATA *gb_peek = GBT_first_species(env.gb_dest); gb_peek; gb_peek = GBT_next_species(gb_peek)) { |
---|
1596 | bool hasOrgName = strcmp(GBT_get_name(gb_peek), orgName) == 0; |
---|
1597 | orgNameCount += hasOrgName; |
---|
1598 | switch (cloneHow) { |
---|
1599 | case REPLACE_ITEM_BY_CLONE: |
---|
1600 | if (hasOrgName) TEST_EXPECT(gb_peek == gb_clone); // orgName only used in persisting clone |
---|
1601 | TEST_EXPECT(gb_peek != gb_species); // species has been removed |
---|
1602 | break; |
---|
1603 | case RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS: |
---|
1604 | if (hasOrgName) TEST_EXPECT(gb_peek == gb_species); // orgName only used in original species (i.e. temp. clone did vanish) |
---|
1605 | TEST_EXPECT(gb_peek != gb_clone); // clone has been removed |
---|
1606 | break; |
---|
1607 | default: |
---|
1608 | xf_assert(0); // not tested here |
---|
1609 | break; |
---|
1610 | } |
---|
1611 | // @@@ also test against "fake" names? |
---|
1612 | } |
---|
1613 | TEST_EXPECT_EQUAL(orgNameCount, 1); // species with duplicate names unwanted |
---|
1614 | |
---|
1615 | if (count == 3) { |
---|
1616 | gb_overwritten_species = gb_species; // = copy of original |
---|
1617 | overwrittenName = ARB_strdup(orgName); |
---|
1618 | } |
---|
1619 | free(orgName); |
---|
1620 | } |
---|
1621 | } |
---|
1622 | |
---|
1623 | // test merging one species from source DB onto existing (cloned) species in dest DB: |
---|
1624 | { |
---|
1625 | GBDATA *gb_source_species; |
---|
1626 | |
---|
1627 | { |
---|
1628 | GB_transaction ta1(env.gb_src); |
---|
1629 | GB_transaction ta2(env.gb_dest); |
---|
1630 | |
---|
1631 | gb_source_species = GBT_find_species(env.gb_src, overwrittenName); |
---|
1632 | TEST_REJECT_NULL(gb_source_species); // (in gb_src) |
---|
1633 | TEST_REJECT_NULL(gb_overwritten_species); // (in gb_dest) |
---|
1634 | |
---|
1635 | { |
---|
1636 | GBDATA *gb_name = GB_entry(gb_overwritten_species, "name"); |
---|
1637 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_name, "notOverwritten")); // prepare to test overwrite by data. |
---|
1638 | |
---|
1639 | // modify "data" in "ali_16s" to prove overwrite later: |
---|
1640 | GBDATA *gb_seq = GBT_find_sequence(gb_overwritten_species, "ali_16s"); |
---|
1641 | TEST_REJECT_NULL(gb_seq); |
---|
1642 | |
---|
1643 | const char *seq = GB_read_char_pntr(gb_seq); |
---|
1644 | char *seqMod = GBS_string_eval(seq, ":U=T"); |
---|
1645 | |
---|
1646 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_seq, seqMod)); |
---|
1647 | free(seqMod); |
---|
1648 | } |
---|
1649 | } |
---|
1650 | |
---|
1651 | SmartPtr<TestAlignmentTransporter> reverseAliTransporter; |
---|
1652 | |
---|
1653 | // overwrite with result of ruleset (=> mix original and clone) |
---|
1654 | for (int pass = 1; pass<=4; ++pass) { |
---|
1655 | TEST_ANNOTATE(GBS_global_string("pass %i", pass)); |
---|
1656 | |
---|
1657 | GB_transaction ta1(env.gb_src); |
---|
1658 | GB_transaction ta2(env.gb_dest); |
---|
1659 | |
---|
1660 | SmartPtr<TestAlignmentTransporter> aliTransporter; |
---|
1661 | if (pass<4) { |
---|
1662 | aliTransporter = new TestAlignmentTransporter(pass); |
---|
1663 | if (pass == 3) reverseAliTransporter = aliTransporter; // keep for later |
---|
1664 | } |
---|
1665 | |
---|
1666 | ItemClonedByRuleSet overclone(gb_source_species, CLONE_ITEM_SPECIES, ruleset, CLONE_INTO_EXISTING, gb_overwritten_species, aliTransporter.content()); |
---|
1667 | |
---|
1668 | if (pass == 1) { |
---|
1669 | TEST_EXPECT(overclone.has_error()); |
---|
1670 | TEST_EXPECT_ERROR_CONTAINS(overclone.get_error(), CUSTOM_ALI_TRANSPORT_ERROR); |
---|
1671 | ta2.close(overclone.get_error()); |
---|
1672 | ta1.close(overclone.get_error()); |
---|
1673 | } |
---|
1674 | else { |
---|
1675 | if (overclone.has_error()) TEST_EXPECT_NO_ERROR(overclone.get_error()); // expect no error, but show message if expectation fails |
---|
1676 | TEST_EXPECT(overclone.get_clone() == gb_overwritten_species); // test get_clone reports gb_overwritten_species here |
---|
1677 | TEST_EXPECT_EQUAL(GBT_get_name(gb_overwritten_species), "notOverwritten"); // test name of clone does not get overwritten |
---|
1678 | TEST_EXPECT_EQUAL(GB_countEntries(gb_overwritten_species, "name"), 1); |
---|
1679 | |
---|
1680 | { |
---|
1681 | GBDATA *gb_seq = GBT_find_sequence(gb_overwritten_species, "ali_16s"); |
---|
1682 | TEST_REJECT_NULL(gb_seq); |
---|
1683 | |
---|
1684 | const char *seq = GB_read_char_pntr(gb_seq); |
---|
1685 | |
---|
1686 | switch (pass) { |
---|
1687 | case 2: TEST_EXPECT_CONTAINS(seq, "GAAGTAGCTTGCTACTTTGCCGGCGAGCGGCGGAC"); break; // custom transporter: do nothing |
---|
1688 | case 3: TEST_EXPECT_CONTAINS(seq, "CAGGCGGCGAGCGGCCGUUUCAUCGUUCGAUGAAG"); break; // custom transporter: writes reversed sequence data |
---|
1689 | case 4: TEST_EXPECT_CONTAINS(seq, "GAAGUAGCUUGCUACUUUGCCGGCGAGCGGCGGAC"); break; // default behavior (=copy sequence over) |
---|
1690 | default: xf_assert(0); break; // unexpected 'pass' |
---|
1691 | } |
---|
1692 | |
---|
1693 | |
---|
1694 | GBDATA *gb_ali = GB_get_father(gb_seq); |
---|
1695 | TEST_EXPECT_EQUAL(GB_countEntries(gb_ali, "data"), 1); |
---|
1696 | } |
---|
1697 | |
---|
1698 | TEST_EXPECT_EQUAL(GB_countEntries(gb_overwritten_species, "ali_16s"), 1); |
---|
1699 | } |
---|
1700 | } |
---|
1701 | |
---|
1702 | // "test" REAL_CLONE mode |
---|
1703 | |
---|
1704 | { |
---|
1705 | GB_transaction ta1(env.gb_src); |
---|
1706 | GB_transaction ta2(env.gb_dest); |
---|
1707 | |
---|
1708 | ItemClonedByRuleSet realClone(gb_source_species, CLONE_ITEM_SPECIES, ruleset, REAL_CLONE, GBT_get_species_data(env.gb_dest), &*reverseAliTransporter); |
---|
1709 | |
---|
1710 | if (realClone.has_error()) TEST_EXPECT_NO_ERROR(realClone.get_error()); // expect no error, but show message if expectation fails |
---|
1711 | |
---|
1712 | GBDATA *gb_clone = realClone.get_clone(); |
---|
1713 | |
---|
1714 | TEST_REJECT_NULL(gb_clone); |
---|
1715 | TEST_REJECT_NULL(gb_source_species); |
---|
1716 | TEST_EXPECT(gb_clone != gb_source_species); |
---|
1717 | TEST_EXPECT_EQUAL(GBT_get_name(gb_clone), GBT_get_name(gb_source_species)); |
---|
1718 | |
---|
1719 | TEST_REJECT(GB_get_father(gb_clone) == GB_get_father(gb_source_species)); |
---|
1720 | |
---|
1721 | { |
---|
1722 | GBDATA *gb_seq = GBT_find_sequence(gb_clone, "ali_16s"); |
---|
1723 | TEST_REJECT_NULL(gb_seq); |
---|
1724 | |
---|
1725 | const char *seq = GB_read_char_pntr(gb_seq); |
---|
1726 | |
---|
1727 | // TEST_EXPECT_CONTAINS(seq, "GAAGUAGCUUGCUACUUUGCCGGCGAGCGGCGGAC"); // default behavior (=copy sequence over) |
---|
1728 | TEST_EXPECT_CONTAINS(seq, "CAGGCGGCGAGCGGCCGUUUCAUCGUUCGAUGAAG"); // custom transporter: writes reversed sequence data |
---|
1729 | |
---|
1730 | GBDATA *gb_ali = GB_get_father(gb_seq); |
---|
1731 | TEST_EXPECT_EQUAL(GB_countEntries(gb_ali, "data"), 1); |
---|
1732 | } |
---|
1733 | } |
---|
1734 | } |
---|
1735 | |
---|
1736 | env.saveAndCompare(EXPECTED_ASCII_CLONED, true); |
---|
1737 | free(overwrittenName); |
---|
1738 | } |
---|
1739 | } |
---|
1740 | |
---|
1741 | #endif // UNIT_TESTS |
---|
1742 | |
---|
1743 | // -------------------------------------------------------------------------------- |
---|