1 | // ================================================================ // |
---|
2 | // // |
---|
3 | // File : Feature.cxx // |
---|
4 | // Purpose : // |
---|
5 | // // |
---|
6 | // Coded by Ralf Westram (coder@reallysoft.de) in November 2006 // |
---|
7 | // Institute of Microbiology (Technical University Munich) // |
---|
8 | // http://www.arb-home.de/ // |
---|
9 | // // |
---|
10 | // ================================================================ // |
---|
11 | |
---|
12 | #include "Feature.h" |
---|
13 | #include "types.h" |
---|
14 | #include <cctype> |
---|
15 | |
---|
16 | |
---|
17 | using namespace std; |
---|
18 | |
---|
19 | |
---|
20 | Feature::Feature(const string& Type, const string& locationString) |
---|
21 | : type(Type) |
---|
22 | , location(parseLocation(locationString)) |
---|
23 | { |
---|
24 | } |
---|
25 | |
---|
26 | inline void setOrAppendQualifiedEntry(stringMap& qualifiers, const string& qualifier, const string& value) { |
---|
27 | stringMapIter existing = qualifiers.find(qualifier); |
---|
28 | if (existing != qualifiers.end()) { // existing qualifier |
---|
29 | existing->second.append(1, '\n'); // append separated by LF |
---|
30 | existing->second.append(value); |
---|
31 | } |
---|
32 | else { |
---|
33 | qualifiers[qualifier] = value; |
---|
34 | } |
---|
35 | } |
---|
36 | |
---|
37 | void Feature::addQualifiedEntry(const string& qualifier, const string& value) { |
---|
38 | // search for quotes |
---|
39 | size_t vlen = value.length(); |
---|
40 | |
---|
41 | gi_assert(vlen>0); |
---|
42 | |
---|
43 | stringCIter start = value.begin(); |
---|
44 | stringCIter end = start+vlen-1; |
---|
45 | |
---|
46 | if (*start == '"') { |
---|
47 | if (vlen == 1 || *end != '"') { |
---|
48 | throw GBS_global_string("Unclosed quotes at qualifier '%s'", qualifier.c_str()); |
---|
49 | } |
---|
50 | // skip quotes : |
---|
51 | ++start; |
---|
52 | // end points to '"' |
---|
53 | } |
---|
54 | else { |
---|
55 | ++end; // point behind last character |
---|
56 | } |
---|
57 | |
---|
58 | setOrAppendQualifiedEntry(qualifiers, qualifier, string(start, end)); |
---|
59 | } |
---|
60 | |
---|
61 | static void appendData(string& id, const string& data, int maxAppend) { |
---|
62 | // extract alphanumeric text portion from start of 'data' |
---|
63 | // until some other character is found |
---|
64 | |
---|
65 | if (maxAppend >= 2) { |
---|
66 | size_t old_id_len = id.length(); |
---|
67 | |
---|
68 | id.append(1, '_'); |
---|
69 | maxAppend--; |
---|
70 | |
---|
71 | stringCIter end = data.end(); |
---|
72 | bool insideWord = false; |
---|
73 | bool seenNonDigit = false; |
---|
74 | |
---|
75 | for (stringCIter i = data.begin(); maxAppend>0 && i != end; ++i) { |
---|
76 | char c = *i; |
---|
77 | if (isalnum(c)) { |
---|
78 | if (!insideWord) c = toupper(c); |
---|
79 | id.append(1, c); |
---|
80 | maxAppend--; |
---|
81 | insideWord = true; |
---|
82 | if (!seenNonDigit && isalpha(c)) { seenNonDigit = true; } |
---|
83 | } |
---|
84 | else if (isspace(c) || c == '-') { // ignore space and '-' |
---|
85 | insideWord = false; |
---|
86 | } |
---|
87 | else { |
---|
88 | break; // anything else -> abort |
---|
89 | } |
---|
90 | } |
---|
91 | |
---|
92 | if (!seenNonDigit) { // data only contained digits (as far as data has been scanned) |
---|
93 | id.resize(old_id_len); // undo changes |
---|
94 | } |
---|
95 | } |
---|
96 | } |
---|
97 | |
---|
98 | string Feature::createGeneName() const |
---|
99 | { |
---|
100 | stringMapCIter not_found = qualifiers.end(); |
---|
101 | stringMapCIter product = qualifiers.find("product"); |
---|
102 | stringMapCIter gene = qualifiers.find("gene"); |
---|
103 | |
---|
104 | const size_t maxidlen = 30; // just an approx. limit |
---|
105 | string id = type; // use gene type |
---|
106 | |
---|
107 | id.reserve(maxidlen+10); |
---|
108 | if (gene != not_found) { // append gene name |
---|
109 | appendData(id, gene->second, maxidlen-id.length()); |
---|
110 | } |
---|
111 | |
---|
112 | if (product != not_found) { |
---|
113 | appendData(id, product->second, maxidlen-id.length()); |
---|
114 | } |
---|
115 | |
---|
116 | // now ensure that id doesn't end with digit |
---|
117 | // (if it would, creating unique gene names gets too complicated) |
---|
118 | if (isdigit(id[id.length()-1])) { |
---|
119 | if (id.length() == maxidlen) id.resize(maxidlen-1); |
---|
120 | id.append(1, 'X'); |
---|
121 | } |
---|
122 | |
---|
123 | return id; |
---|
124 | } |
---|
125 | |
---|
126 | void Feature::expectLocationInSequence(long seqLength) const |
---|
127 | { |
---|
128 | // test whether feature location is inside sequence |
---|
129 | // throw error otherwise |
---|
130 | |
---|
131 | if (!location->isInRange(1, seqLength)) { |
---|
132 | throw GBS_global_string("Illegal feature location (outside sequence 1..%li)", seqLength); |
---|
133 | } |
---|
134 | } |
---|
135 | |
---|
136 | void Feature::fixEmptyQualifiers() { |
---|
137 | // some qualifiers in feature table may be empty |
---|
138 | |
---|
139 | stringMapIter e = qualifiers.end(); |
---|
140 | for (stringMapIter i = qualifiers.begin(); i != e; ++i) { |
---|
141 | if (i->second.empty()) { // with all qualifiers, that have no content, do.. |
---|
142 | if (i->first == "replace") { |
---|
143 | // ARB cannot store empty strings! |
---|
144 | // Since '/replace=""' means 'delete location', we need to store this |
---|
145 | // this information differently. |
---|
146 | i->second = "<empty>"; // |
---|
147 | } |
---|
148 | } |
---|
149 | } |
---|
150 | } |
---|