| 1 | // ========================================================= // |
|---|
| 2 | // // |
|---|
| 3 | // File : xferset.h // |
|---|
| 4 | // Purpose : field transfer sets // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in Mar 19 // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // ========================================================= // |
|---|
| 10 | |
|---|
| 11 | #ifndef XFERSET_H |
|---|
| 12 | #define XFERSET_H |
|---|
| 13 | |
|---|
| 14 | #ifndef ERRORORTYPE_H |
|---|
| 15 | #include <ErrorOrType.h> |
|---|
| 16 | #endif |
|---|
| 17 | #ifndef ARBDB_H |
|---|
| 18 | #include <arbdb.h> |
|---|
| 19 | #endif |
|---|
| 20 | |
|---|
| 21 | #ifndef _GLIBCXX_VECTOR |
|---|
| 22 | #include <vector> |
|---|
| 23 | #endif |
|---|
| 24 | #ifndef _GLIBCXX_STRING |
|---|
| 25 | #include <string> |
|---|
| 26 | #endif |
|---|
| 27 | #ifndef _GLIBCXX_CSTDLIB |
|---|
| 28 | #include <cstdlib> |
|---|
| 29 | #endif |
|---|
| 30 | |
|---|
| 31 | #define xf_assert(cond) arb_assert(cond) |
|---|
| 32 | |
|---|
| 33 | #define NOSEP "" |
|---|
| 34 | |
|---|
| 35 | struct ConfigMapping; |
|---|
| 36 | |
|---|
| 37 | namespace FieldTransfer { |
|---|
| 38 | using std::string; |
|---|
| 39 | |
|---|
| 40 | class TransportedData { |
|---|
| 41 | GB_TYPES sourceType; // found type (GB_NONE -> field does not exist) |
|---|
| 42 | |
|---|
| 43 | // members to store data of different types: |
|---|
| 44 | string data_s; |
|---|
| 45 | int data_i; |
|---|
| 46 | float data_f; |
|---|
| 47 | |
|---|
| 48 | string err; // if non-empty -> something went wrong |
|---|
| 49 | |
|---|
| 50 | static bool state_valid() { return !GB_have_error(); } |
|---|
| 51 | |
|---|
| 52 | explicit TransportedData(GB_TYPES t, GB_ERROR error) : sourceType(t), err(error) { xf_assert(t == GB_NONE); xf_assert(error); xf_assert(state_valid()); } // otherwise use GB_TYPES-ctor! |
|---|
| 53 | explicit TransportedData(GB_TYPES); // dangerous & forbidden |
|---|
| 54 | explicit TransportedData() : sourceType(GB_NONE) { xf_assert(state_valid()); } // no data and no error (e.g. missing source field). called by none() |
|---|
| 55 | |
|---|
| 56 | public: |
|---|
| 57 | explicit TransportedData(const string& s) : sourceType(GB_STRING), data_s(s) { xf_assert(state_valid()); } |
|---|
| 58 | explicit TransportedData(int i) : sourceType(GB_INT), data_i(i) { xf_assert(state_valid()); } |
|---|
| 59 | explicit TransportedData(float f) : sourceType(GB_FLOAT), data_f(f) { xf_assert(state_valid()); } |
|---|
| 60 | |
|---|
| 61 | static TransportedData none() { return TransportedData(); } |
|---|
| 62 | static TransportedData makeError(GB_ERROR error) { return TransportedData(GB_NONE, error); } |
|---|
| 63 | |
|---|
| 64 | bool failed() const { return sourceType == GB_NONE && !err.empty(); } |
|---|
| 65 | bool exists() const { |
|---|
| 66 | xf_assert(!failed()); // failed transports can never have existing data! |
|---|
| 67 | return sourceType != GB_NONE; |
|---|
| 68 | } |
|---|
| 69 | GB_TYPES getType() const { xf_assert(exists()); return sourceType; } |
|---|
| 70 | |
|---|
| 71 | const string& getString() const { xf_assert(getType() == GB_STRING); return data_s; } |
|---|
| 72 | int getInt() const { xf_assert(getType() == GB_INT); return data_i; } |
|---|
| 73 | float getFloat() const { xf_assert(getType() == GB_FLOAT); return data_f; } |
|---|
| 74 | |
|---|
| 75 | GB_ERROR getError() const { |
|---|
| 76 | xf_assert(failed()); |
|---|
| 77 | static string kept; |
|---|
| 78 | kept = err; // avoid 'err' gets destroyed together with 'this' |
|---|
| 79 | return kept.c_str(); |
|---|
| 80 | } |
|---|
| 81 | }; |
|---|
| 82 | |
|---|
| 83 | class ReadRule { |
|---|
| 84 | string fields; // single field or list of fields (separated by ';'). field may be hierarchical (i.e. contain '/'). |
|---|
| 85 | string separator; |
|---|
| 86 | string aci; // empty = > do not use ACI |
|---|
| 87 | bool single_field; // true if 'fields' contains only 1 field |
|---|
| 88 | |
|---|
| 89 | TransportedData readTypedFromField(GB_TYPES readAsType, GBDATA *gb_field) const; |
|---|
| 90 | TransportedData aciAppliedTo(const string& toStr, GBDATA *gb_main, GBDATA *gb_dest_item) const; |
|---|
| 91 | |
|---|
| 92 | void detectSingleField() { single_field = fields.find_first_of(';') == string::npos; } |
|---|
| 93 | |
|---|
| 94 | protected: |
|---|
| 95 | void saveReadConfig(ConfigMapping& cfgmap) const; |
|---|
| 96 | string describe() const; |
|---|
| 97 | |
|---|
| 98 | public: |
|---|
| 99 | ReadRule(const string& fields_, const string& separator_) : // simple rule |
|---|
| 100 | fields(fields_), |
|---|
| 101 | separator(separator_) |
|---|
| 102 | { |
|---|
| 103 | detectSingleField(); |
|---|
| 104 | } |
|---|
| 105 | ReadRule(const char *fields_, const char *separator_, const char *aci_) : // ACI rule |
|---|
| 106 | fields(fields_), |
|---|
| 107 | separator(separator_), |
|---|
| 108 | aci(aci_) |
|---|
| 109 | { |
|---|
| 110 | detectSingleField(); |
|---|
| 111 | } |
|---|
| 112 | |
|---|
| 113 | TransportedData readFrom(GBDATA *gb_item, GBDATA *gb_dest_item) const; |
|---|
| 114 | const string& getACI() const { return aci; } |
|---|
| 115 | const string& getSourceFields() const { return fields; } |
|---|
| 116 | const string& getSeparator() const { return separator; } |
|---|
| 117 | |
|---|
| 118 | bool multiple_source_fields() const { return !single_field; } |
|---|
| 119 | }; |
|---|
| 120 | |
|---|
| 121 | class WriteRule { |
|---|
| 122 | string name; |
|---|
| 123 | GB_TYPES targetType; // @@@ need to distinguish between forced type by-config and by-existing-target-field |
|---|
| 124 | |
|---|
| 125 | mutable bool isValidKey; |
|---|
| 126 | |
|---|
| 127 | protected: |
|---|
| 128 | void saveWriteConfig(ConfigMapping& cfgmap) const; |
|---|
| 129 | string describe() const; |
|---|
| 130 | |
|---|
| 131 | public: |
|---|
| 132 | explicit WriteRule(const string& name_) : |
|---|
| 133 | name(name_), |
|---|
| 134 | targetType(GB_NONE), |
|---|
| 135 | isValidKey(false) |
|---|
| 136 | {} |
|---|
| 137 | WriteRule(const char *name_, GB_TYPES forceType) : |
|---|
| 138 | name(name_), |
|---|
| 139 | targetType(forceType), |
|---|
| 140 | isValidKey(false) |
|---|
| 141 | {} |
|---|
| 142 | |
|---|
| 143 | GB_ERROR check_hkey() const { |
|---|
| 144 | /*! similar to GB_check_hkey, but checked only once for each VALID key, i.e. for each rule */ |
|---|
| 145 | GB_ERROR error = NULp; |
|---|
| 146 | if (!isValidKey) { |
|---|
| 147 | error = GB_check_hkey(name.c_str()); |
|---|
| 148 | if (error) error = GBS_global_string("in target field: %s", error); |
|---|
| 149 | else isValidKey = true; |
|---|
| 150 | } |
|---|
| 151 | return error; |
|---|
| 152 | } |
|---|
| 153 | const string& targetField() const { return name; } |
|---|
| 154 | |
|---|
| 155 | bool forcesType() const { return targetType != GB_NONE; } |
|---|
| 156 | GB_TYPES getTargetType() const { xf_assert(forcesType()); return targetType; } |
|---|
| 157 | void setTargetType(GB_TYPES forceType) { |
|---|
| 158 | xf_assert(!forcesType()); // already has been forced |
|---|
| 159 | targetType = forceType; |
|---|
| 160 | } |
|---|
| 161 | |
|---|
| 162 | __ATTR__USERESULT GB_ERROR writeTo(const TransportedData& data, GBDATA *gb_item, bool acceptLossyConversion) const; |
|---|
| 163 | |
|---|
| 164 | }; |
|---|
| 165 | |
|---|
| 166 | class Rule; |
|---|
| 167 | typedef SmartPtr<Rule> RulePtr; |
|---|
| 168 | typedef ErrorOr<RulePtr> ErrorOrRulePtr; |
|---|
| 169 | |
|---|
| 170 | class Rule : public ReadRule, public WriteRule { |
|---|
| 171 | bool precision_loss_permitted; // @@@ move to WriteRule? |
|---|
| 172 | |
|---|
| 173 | public: |
|---|
| 174 | Rule(const ReadRule& howRead, const WriteRule& howWrite) : |
|---|
| 175 | ReadRule(howRead), |
|---|
| 176 | WriteRule(howWrite), |
|---|
| 177 | precision_loss_permitted(false) |
|---|
| 178 | {} |
|---|
| 179 | |
|---|
| 180 | __ATTR__USERESULT GB_ERROR transferBy(GBDATA *gb_source, GBDATA *gb_dest) const; |
|---|
| 181 | |
|---|
| 182 | static RulePtr makeSimple(const string& src, const string& sep, const string& dest) { return new Rule(ReadRule(src, sep), WriteRule(dest)); } |
|---|
| 183 | static RulePtr makeAciConverter(const char *src, const char *sep, const char *aci, const char *dest) { return new Rule(ReadRule(src, sep, aci), WriteRule(dest)); } |
|---|
| 184 | static ErrorOrRulePtr makeFromConfig(const char *config); |
|---|
| 185 | |
|---|
| 186 | static RulePtr forceTargetType(GB_TYPES forceType, RulePtr rule) { |
|---|
| 187 | rule->setTargetType(forceType); |
|---|
| 188 | return rule; |
|---|
| 189 | } |
|---|
| 190 | |
|---|
| 191 | bool precisionLossPermitted() const { |
|---|
| 192 | return precision_loss_permitted; |
|---|
| 193 | } |
|---|
| 194 | void permitPrecisionLoss() { |
|---|
| 195 | xf_assert(!precisionLossPermitted()); // why do you multi-permit? |
|---|
| 196 | precision_loss_permitted = true; |
|---|
| 197 | } |
|---|
| 198 | static RulePtr permitPrecisionLoss(RulePtr rule) { |
|---|
| 199 | rule->permitPrecisionLoss(); |
|---|
| 200 | return rule; |
|---|
| 201 | } |
|---|
| 202 | |
|---|
| 203 | string getConfig() const; |
|---|
| 204 | string getShortDescription() const; |
|---|
| 205 | }; |
|---|
| 206 | |
|---|
| 207 | typedef std::vector<RulePtr> RuleContainer; |
|---|
| 208 | typedef RuleContainer::const_iterator RuleIterator; |
|---|
| 209 | |
|---|
| 210 | class RuleSet; |
|---|
| 211 | typedef SmartPtr<RuleSet> RuleSetPtr; |
|---|
| 212 | typedef ErrorOr<RuleSetPtr> ErrorOrRuleSetPtr; |
|---|
| 213 | |
|---|
| 214 | class RuleSet { |
|---|
| 215 | // defines a transfer from item A to item B. |
|---|
| 216 | // items may e.g. be species, genes, ... |
|---|
| 217 | |
|---|
| 218 | RuleContainer rules; // rules to transfer single fields |
|---|
| 219 | string comment; // description of RuleSet |
|---|
| 220 | bool transferUndefFields; |
|---|
| 221 | |
|---|
| 222 | public: |
|---|
| 223 | RuleSet() : transferUndefFields(false) {} |
|---|
| 224 | |
|---|
| 225 | bool empty() const { return rules.empty(); } |
|---|
| 226 | size_t size() const { return rules.size(); } |
|---|
| 227 | bool validIdx(int idx) const { return idx>=0 && size_t(idx)<size(); } |
|---|
| 228 | |
|---|
| 229 | void add(RulePtr rule) { rules.push_back(rule); } |
|---|
| 230 | |
|---|
| 231 | RulePtr getPtr(int idx) { |
|---|
| 232 | xf_assert(validIdx(idx)); |
|---|
| 233 | return rules[idx]; |
|---|
| 234 | } |
|---|
| 235 | const Rule& get(int idx) const { |
|---|
| 236 | xf_assert(validIdx(idx)); |
|---|
| 237 | return *rules[idx]; |
|---|
| 238 | } |
|---|
| 239 | void replace(int idx, RulePtr rule) { |
|---|
| 240 | xf_assert(validIdx(idx)); |
|---|
| 241 | xf_assert(rule.isSet()); |
|---|
| 242 | rules[idx] = rule; |
|---|
| 243 | } |
|---|
| 244 | void erase(int idx) { |
|---|
| 245 | xf_assert(validIdx(idx)); |
|---|
| 246 | rules.erase(rules.begin()+idx); |
|---|
| 247 | } |
|---|
| 248 | int insertBefore(int idx, RulePtr rule) { |
|---|
| 249 | if (validIdx(idx)) { |
|---|
| 250 | rules.insert(rules.begin()+idx, rule); |
|---|
| 251 | return idx; |
|---|
| 252 | } |
|---|
| 253 | // invalid index -> silently append: |
|---|
| 254 | add(rule); |
|---|
| 255 | return int(size()-1); |
|---|
| 256 | } |
|---|
| 257 | |
|---|
| 258 | void setComment(const string& newComment) { comment = newComment; } |
|---|
| 259 | const string& getComment() const { return comment; } |
|---|
| 260 | |
|---|
| 261 | bool shallTransferUndefFields() const { return transferUndefFields; } |
|---|
| 262 | void set_transferUndefFields(bool transferThem) { transferUndefFields = transferThem; } |
|---|
| 263 | |
|---|
| 264 | __ATTR__USERESULT GB_ERROR transferBy(GBDATA *gb_source, GBDATA *gb_dest) const; |
|---|
| 265 | |
|---|
| 266 | __ATTR__USERESULT GB_ERROR saveTo(const char *filename) const; |
|---|
| 267 | static ErrorOrRuleSetPtr loadFrom(const char *filename); |
|---|
| 268 | |
|---|
| 269 | void extractUsedFields(StrArray& input, StrArray& output) const; |
|---|
| 270 | }; |
|---|
| 271 | |
|---|
| 272 | enum ItemCloneType { // (all types but CLONE_INTO_EXISTING create a cloned species) |
|---|
| 273 | REPLACE_ITEM_BY_CLONE, // delete source item, create + keep cloned item (used by IMPORT). |
|---|
| 274 | RENAME_ITEM_WHILE_TEMP_CLONE_EXISTS, // temp. rename source. create clone. on destruction: delete clone + restore source item (used by EXPORT). |
|---|
| 275 | CLONE_INTO_EXISTING, // keep source, update existing target by overwriting target fields (used by MERGE). |
|---|
| 276 | REAL_CLONE, // keep source, create + keep clone (will be used by MERGE). problematic if source+clone reside in same parent container! |
|---|
| 277 | }; |
|---|
| 278 | |
|---|
| 279 | enum ClonableItemType { |
|---|
| 280 | CLONE_ITEM_SPECIES, |
|---|
| 281 | }; |
|---|
| 282 | |
|---|
| 283 | struct AlignmentTransporter { |
|---|
| 284 | virtual ~AlignmentTransporter() {} |
|---|
| 285 | virtual bool shallCopyBefore() const = 0; // shall ItemClonedByRuleSet::copyAlignments() be called b4 calling transport()? |
|---|
| 286 | virtual GB_ERROR transport(GBDATA*gb_src_item, GBDATA *gb_dst_item) const = 0; |
|---|
| 287 | }; |
|---|
| 288 | typedef SmartPtr<AlignmentTransporter> AlignmentTransporterPtr; |
|---|
| 289 | |
|---|
| 290 | class ItemClonedByRuleSet : virtual Noncopyable { |
|---|
| 291 | ClonableItemType itemtype; |
|---|
| 292 | |
|---|
| 293 | GBDATA *gb_source; |
|---|
| 294 | GBDATA *gb_clone; |
|---|
| 295 | |
|---|
| 296 | ItemCloneType type; |
|---|
| 297 | |
|---|
| 298 | string errorCopy; |
|---|
| 299 | string orgName; |
|---|
| 300 | |
|---|
| 301 | #if defined(ASSERTION_USED) |
|---|
| 302 | mutable bool checked4error; |
|---|
| 303 | mutable bool userCallbackUsed; |
|---|
| 304 | #endif |
|---|
| 305 | |
|---|
| 306 | static string lastReportedError; |
|---|
| 307 | |
|---|
| 308 | GB_ERROR overlayOrCloneSub(const char *subName, GBDATA *gb_sub); |
|---|
| 309 | GB_ERROR cloneMissingSub(const char *subName, GBDATA *gb_sub); |
|---|
| 310 | GB_ERROR copySubIfMissing(const char *subName); |
|---|
| 311 | GB_ERROR copyAlignments(); |
|---|
| 312 | const char *get_id_field() const; |
|---|
| 313 | |
|---|
| 314 | public: |
|---|
| 315 | ItemClonedByRuleSet(GBDATA*& gb_item, ClonableItemType itemtype_, RuleSetPtr ruleset, ItemCloneType type_, GBDATA *gb_refItem, const AlignmentTransporter *aliTransporter); |
|---|
| 316 | ~ItemClonedByRuleSet(); |
|---|
| 317 | |
|---|
| 318 | // access result: |
|---|
| 319 | bool has_error() const { |
|---|
| 320 | #if defined(ASSERTION_USED) |
|---|
| 321 | checked4error = true; |
|---|
| 322 | #endif |
|---|
| 323 | return !errorCopy.empty(); |
|---|
| 324 | } |
|---|
| 325 | GB_ERROR get_error() const { |
|---|
| 326 | // result persists until next call of this function |
|---|
| 327 | xf_assert(checked4error); // use has_error()! |
|---|
| 328 | xf_assert(has_error()); // logic error |
|---|
| 329 | lastReportedError = errorCopy; |
|---|
| 330 | return lastReportedError.c_str(); |
|---|
| 331 | } |
|---|
| 332 | GBDATA *get_clone() { |
|---|
| 333 | xf_assert(checked4error); // use has_error()! |
|---|
| 334 | xf_assert(!has_error()); // logic error |
|---|
| 335 | xf_assert(gb_clone); |
|---|
| 336 | return gb_clone; |
|---|
| 337 | } |
|---|
| 338 | }; |
|---|
| 339 | |
|---|
| 340 | }; |
|---|
| 341 | |
|---|
| 342 | #else |
|---|
| 343 | #error xferset.h included twice |
|---|
| 344 | #endif // XFERSET_H |
|---|