1 | // ================================================================ // |
---|
2 | // // |
---|
3 | // File : Feature.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // ================================================================ // |
---|
11 | |
---|
12 | #include "Feature.h" |
---|
13 | #include "types.h" |
---|
14 | #include <cctype> |
---|
15 | |
---|
16 | |
---|
17 | using namespace std; |
---|
18 | |
---|
19 | |
---|
20 | Feature::Feature(const string& Type, const string& locationString) : |
---|
21 | type(Type), |
---|
22 | location(parseLocation(locationString)) |
---|
23 | {} |
---|
24 | |
---|
25 | inline void setOrAppendQualifiedEntry(stringMap& qualifiers, const string& qualifier, const string& value) { |
---|
26 | stringMapIter existing = qualifiers.find(qualifier); |
---|
27 | if (existing != qualifiers.end()) { // existing qualifier |
---|
28 | existing->second.append(1, '\n'); // append separated by LF |
---|
29 | existing->second.append(value); |
---|
30 | } |
---|
31 | else { |
---|
32 | qualifiers[qualifier] = value; |
---|
33 | } |
---|
34 | } |
---|
35 | |
---|
36 | void Feature::addQualifiedEntry(const string& qualifier, const string& value) { |
---|
37 | // search for quotes |
---|
38 | size_t vlen = value.length(); |
---|
39 | |
---|
40 | gi_assert(vlen>0); |
---|
41 | |
---|
42 | stringCIter start = value.begin(); |
---|
43 | stringCIter end = start+vlen-1; |
---|
44 | |
---|
45 | if (*start == '"') { |
---|
46 | if (vlen == 1 || *end != '"') { |
---|
47 | throw GBS_global_string("Unclosed quotes at qualifier '%s'", qualifier.c_str()); |
---|
48 | } |
---|
49 | // skip quotes : |
---|
50 | ++start; |
---|
51 | // end points to '"' |
---|
52 | } |
---|
53 | else { |
---|
54 | ++end; // point behind last character |
---|
55 | } |
---|
56 | |
---|
57 | setOrAppendQualifiedEntry(qualifiers, qualifier, string(start, end)); |
---|
58 | } |
---|
59 | |
---|
60 | static void appendData(string& id, const string& data, int maxAppend) { |
---|
61 | // extract alphanumeric text portion from start of 'data' |
---|
62 | // until some other character is found |
---|
63 | |
---|
64 | if (maxAppend >= 2) { |
---|
65 | size_t old_id_len = id.length(); |
---|
66 | |
---|
67 | id.append(1, '_'); |
---|
68 | maxAppend--; |
---|
69 | |
---|
70 | stringCIter end = data.end(); |
---|
71 | bool insideWord = false; |
---|
72 | bool seenNonDigit = false; |
---|
73 | |
---|
74 | for (stringCIter i = data.begin(); maxAppend>0 && i != end; ++i) { |
---|
75 | char c = *i; |
---|
76 | if (isalnum(c)) { |
---|
77 | if (!insideWord) c = toupper(c); |
---|
78 | id.append(1, c); |
---|
79 | maxAppend--; |
---|
80 | insideWord = true; |
---|
81 | if (!seenNonDigit && isalpha(c)) { seenNonDigit = true; } |
---|
82 | } |
---|
83 | else if (isspace(c) || c == '-') { // ignore space and '-' |
---|
84 | insideWord = false; |
---|
85 | } |
---|
86 | else { |
---|
87 | break; // anything else -> abort |
---|
88 | } |
---|
89 | } |
---|
90 | |
---|
91 | if (!seenNonDigit) { // data only contained digits (as far as data has been scanned) |
---|
92 | id.resize(old_id_len); // undo changes |
---|
93 | } |
---|
94 | } |
---|
95 | } |
---|
96 | |
---|
97 | string Feature::createGeneName() const { |
---|
98 | stringMapCIter not_found = qualifiers.end(); |
---|
99 | stringMapCIter product = qualifiers.find("product"); |
---|
100 | stringMapCIter gene = qualifiers.find("gene"); |
---|
101 | |
---|
102 | const size_t maxidlen = 30; // just an approx. limit |
---|
103 | string id = type; // use gene type |
---|
104 | |
---|
105 | id.reserve(maxidlen+10); |
---|
106 | if (gene != not_found) { // append gene name |
---|
107 | appendData(id, gene->second, maxidlen-id.length()); |
---|
108 | } |
---|
109 | |
---|
110 | if (product != not_found) { |
---|
111 | appendData(id, product->second, maxidlen-id.length()); |
---|
112 | } |
---|
113 | |
---|
114 | // now ensure that id doesn't end with digit |
---|
115 | // (if it would, creating unique gene names gets too complicated) |
---|
116 | if (isdigit(id[id.length()-1])) { |
---|
117 | if (id.length() == maxidlen) id.resize(maxidlen-1); |
---|
118 | id.append(1, 'X'); |
---|
119 | } |
---|
120 | |
---|
121 | return id; |
---|
122 | } |
---|
123 | |
---|
124 | void Feature::expectLocationInSequence(long seqLength) const { |
---|
125 | // test whether feature location is inside sequence |
---|
126 | // throw error otherwise |
---|
127 | |
---|
128 | if (!location->isInRange(1, seqLength)) { |
---|
129 | throw GBS_global_string("Illegal feature location (outside sequence 1..%li)", seqLength); |
---|
130 | } |
---|
131 | } |
---|
132 | |
---|
133 | void Feature::fixEmptyQualifiers() { |
---|
134 | // some qualifiers in feature table may be empty |
---|
135 | |
---|
136 | stringMapIter e = qualifiers.end(); |
---|
137 | for (stringMapIter i = qualifiers.begin(); i != e; ++i) { |
---|
138 | if (i->second.empty()) { // with all qualifiers, that have no content, do.. |
---|
139 | if (i->first == "replace") { |
---|
140 | // ARB cannot store empty strings! |
---|
141 | // Since '/replace=""' means 'delete location', we need to store this |
---|
142 | // this information differently. |
---|
143 | i->second = "<empty>"; // |
---|
144 | } |
---|
145 | } |
---|
146 | } |
---|
147 | } |
---|