| 1 | // ================================================================ // |
|---|
| 2 | // // |
|---|
| 3 | // File : Feature.cxx // |
|---|
| 4 | // Purpose : // |
|---|
| 5 | // // |
|---|
| 6 | // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 // |
|---|
| 7 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 8 | // http://www.arb-home.de/ // |
|---|
| 9 | // // |
|---|
| 10 | // ================================================================ // |
|---|
| 11 | |
|---|
| 12 | #include "Feature.h" |
|---|
| 13 | #include "types.h" |
|---|
| 14 | #include <cctype> |
|---|
| 15 | |
|---|
| 16 | |
|---|
| 17 | using namespace std; |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | Feature::Feature(const string& Type, const string& locationString) : |
|---|
| 21 | type(Type), |
|---|
| 22 | location(parseLocation(locationString)) |
|---|
| 23 | {} |
|---|
| 24 | |
|---|
| 25 | inline void setOrAppendQualifiedEntry(stringMap& qualifiers, const string& qualifier, const string& value) { |
|---|
| 26 | stringMapIter existing = qualifiers.find(qualifier); |
|---|
| 27 | if (existing != qualifiers.end()) { // existing qualifier |
|---|
| 28 | existing->second.append(1, '\n'); // append separated by LF |
|---|
| 29 | existing->second.append(value); |
|---|
| 30 | } |
|---|
| 31 | else { |
|---|
| 32 | qualifiers[qualifier] = value; |
|---|
| 33 | } |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | void Feature::addQualifiedEntry(const string& qualifier, const string& value) { |
|---|
| 37 | // search for quotes |
|---|
| 38 | size_t vlen = value.length(); |
|---|
| 39 | |
|---|
| 40 | gi_assert(vlen>0); |
|---|
| 41 | |
|---|
| 42 | stringCIter start = value.begin(); |
|---|
| 43 | stringCIter end = start+vlen-1; |
|---|
| 44 | |
|---|
| 45 | if (*start == '"') { |
|---|
| 46 | if (vlen == 1 || *end != '"') { |
|---|
| 47 | throw GBS_global_string("Unclosed quotes at qualifier '%s'", qualifier.c_str()); |
|---|
| 48 | } |
|---|
| 49 | // skip quotes : |
|---|
| 50 | ++start; |
|---|
| 51 | // end points to '"' |
|---|
| 52 | } |
|---|
| 53 | else { |
|---|
| 54 | ++end; // point behind last character |
|---|
| 55 | } |
|---|
| 56 | |
|---|
| 57 | setOrAppendQualifiedEntry(qualifiers, qualifier, string(start, end)); |
|---|
| 58 | } |
|---|
| 59 | |
|---|
| 60 | static void appendData(string& id, const string& data, int maxAppend) { |
|---|
| 61 | // extract alphanumeric text portion from start of 'data' |
|---|
| 62 | // until some other character is found |
|---|
| 63 | |
|---|
| 64 | if (maxAppend >= 2) { |
|---|
| 65 | size_t old_id_len = id.length(); |
|---|
| 66 | |
|---|
| 67 | id.append(1, '_'); |
|---|
| 68 | maxAppend--; |
|---|
| 69 | |
|---|
| 70 | stringCIter end = data.end(); |
|---|
| 71 | bool insideWord = false; |
|---|
| 72 | bool seenNonDigit = false; |
|---|
| 73 | |
|---|
| 74 | for (stringCIter i = data.begin(); maxAppend>0 && i != end; ++i) { |
|---|
| 75 | char c = *i; |
|---|
| 76 | if (isalnum(c)) { |
|---|
| 77 | if (!insideWord) c = toupper(c); |
|---|
| 78 | id.append(1, c); |
|---|
| 79 | maxAppend--; |
|---|
| 80 | insideWord = true; |
|---|
| 81 | if (!seenNonDigit && isalpha(c)) { seenNonDigit = true; } |
|---|
| 82 | } |
|---|
| 83 | else if (isspace(c) || c == '-') { // ignore space and '-' |
|---|
| 84 | insideWord = false; |
|---|
| 85 | } |
|---|
| 86 | else { |
|---|
| 87 | break; // anything else -> abort |
|---|
| 88 | } |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | if (!seenNonDigit) { // data only contained digits (as far as data has been scanned) |
|---|
| 92 | id.resize(old_id_len); // undo changes |
|---|
| 93 | } |
|---|
| 94 | } |
|---|
| 95 | } |
|---|
| 96 | |
|---|
| 97 | string Feature::createGeneName() const { |
|---|
| 98 | stringMapCIter not_found = qualifiers.end(); |
|---|
| 99 | stringMapCIter product = qualifiers.find("product"); |
|---|
| 100 | stringMapCIter gene = qualifiers.find("gene"); |
|---|
| 101 | |
|---|
| 102 | const size_t maxidlen = 30; // just an approx. limit |
|---|
| 103 | string id = type; // use gene type |
|---|
| 104 | |
|---|
| 105 | id.reserve(maxidlen+10); |
|---|
| 106 | if (gene != not_found) { // append gene name |
|---|
| 107 | appendData(id, gene->second, maxidlen-id.length()); |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | if (product != not_found) { |
|---|
| 111 | appendData(id, product->second, maxidlen-id.length()); |
|---|
| 112 | } |
|---|
| 113 | |
|---|
| 114 | // now ensure that id doesn't end with digit |
|---|
| 115 | // (if it would, creating unique gene names gets too complicated) |
|---|
| 116 | if (isdigit(id[id.length()-1])) { |
|---|
| 117 | if (id.length() == maxidlen) id.resize(maxidlen-1); |
|---|
| 118 | id.append(1, 'X'); |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | return id; |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | void Feature::expectLocationInSequence(long seqLength) const { |
|---|
| 125 | // test whether feature location is inside sequence |
|---|
| 126 | // throw error otherwise |
|---|
| 127 | |
|---|
| 128 | if (!location->isInRange(1, seqLength)) { |
|---|
| 129 | throw GBS_global_string("Illegal feature location (outside sequence 1..%li)", seqLength); |
|---|
| 130 | } |
|---|
| 131 | } |
|---|
| 132 | |
|---|
| 133 | void Feature::fixEmptyQualifiers() { |
|---|
| 134 | // some qualifiers in feature table may be empty |
|---|
| 135 | |
|---|
| 136 | stringMapIter e = qualifiers.end(); |
|---|
| 137 | for (stringMapIter i = qualifiers.begin(); i != e; ++i) { |
|---|
| 138 | if (i->second.empty()) { // with all qualifiers, that have no content, do.. |
|---|
| 139 | if (i->first == "replace") { |
|---|
| 140 | // ARB cannot store empty strings! |
|---|
| 141 | // Since '/replace=""' means 'delete location', we need to store this |
|---|
| 142 | // this information differently. |
|---|
| 143 | i->second = "<empty>"; // |
|---|
| 144 | } |
|---|
| 145 | } |
|---|
| 146 | } |
|---|
| 147 | } |
|---|