| 1 | /* |
|---|
| 2 | * Definition of all objects belonging to this version of |
|---|
| 3 | * the valid names text file |
|---|
| 4 | * |
|---|
| 5 | * 29. November 2002 |
|---|
| 6 | * |
|---|
| 7 | * coded by Lothar Richter |
|---|
| 8 | * |
|---|
| 9 | * Copyright (C) 2002 Department of Microbiology (Technical University Munich) |
|---|
| 10 | */ |
|---|
| 11 | |
|---|
| 12 | #if defined(DEVEL_LOTHAR) |
|---|
| 13 | #define DUMP |
|---|
| 14 | #endif // DEVEL_LOTHAR |
|---|
| 15 | |
|---|
| 16 | #include "NT_validNameParser.h" |
|---|
| 17 | #include "NT_local.h" |
|---|
| 18 | |
|---|
| 19 | #include <cstdlib> |
|---|
| 20 | #include <cstdlib> |
|---|
| 21 | #include <iostream> |
|---|
| 22 | #include <fstream> |
|---|
| 23 | |
|---|
| 24 | using namespace std; |
|---|
| 25 | |
|---|
| 26 | namespace validNames { |
|---|
| 27 | |
|---|
| 28 | |
|---|
| 29 | TokLPtr tokenize(const std::string& description, TokLPtr tokenLP) { |
|---|
| 30 | size_t tokenEnd = 0; |
|---|
| 31 | size_t tokenBegin = 0; |
|---|
| 32 | |
|---|
| 33 | while (tokenEnd != description.size()) { // CC : warning: comparison between signed and unsigned (tokenEnd sollte nicht 'int' sondern 'unsigned' sein) |
|---|
| 34 | tokenEnd = description.find_first_of(' ', tokenBegin); |
|---|
| 35 | if (tokenEnd == string::npos) tokenEnd = description.size(); |
|---|
| 36 | int tokLength = tokenEnd - tokenBegin; |
|---|
| 37 | if (tokLength != 0) { |
|---|
| 38 | tokenLP->push_back(description.substr(tokenBegin, tokenEnd - tokenBegin)); |
|---|
| 39 | } |
|---|
| 40 | tokenBegin = tokenEnd + 1; |
|---|
| 41 | |
|---|
| 42 | } |
|---|
| 43 | return tokenLP; |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | |
|---|
| 47 | |
|---|
| 48 | |
|---|
| 49 | |
|---|
| 50 | Desco determineType(const string& descriptionString) |
|---|
| 51 | { // begin determineType |
|---|
| 52 | |
|---|
| 53 | DESCT actType = NOTYPE; |
|---|
| 54 | TokLPtr tokenLP = new TokL; |
|---|
| 55 | tokenLP = tokenize(descriptionString, tokenLP); |
|---|
| 56 | // remove all tokens in parentheses |
|---|
| 57 | { |
|---|
| 58 | TokL::iterator it = tokenLP->begin(); |
|---|
| 59 | while (it != tokenLP->end()) { |
|---|
| 60 | if (((*it).at(0) == '(') && *it != string("(corrig.)")) it = tokenLP->erase(it); |
|---|
| 61 | else ++it; |
|---|
| 62 | } |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | // check first word for upper case letters |
|---|
| 66 | string descNames[6]; // first the valid genus, species, subsp. then the other names |
|---|
| 67 | // stores occurrence of subsp. which is needed to retrieve the right tokens later on and status flags |
|---|
| 68 | int sspPos[2] = { 0, 0 }; // token subsp. occurs maximum twice |
|---|
| 69 | int ssp = 0; |
|---|
| 70 | bool isValid = true; |
|---|
| 71 | bool isRenamed = false; |
|---|
| 72 | bool isHetero = false; |
|---|
| 73 | bool isHomo = false; |
|---|
| 74 | bool isGenus = false; |
|---|
| 75 | // bool isSee = false; |
|---|
| 76 | bool isCorr = false; |
|---|
| 77 | |
|---|
| 78 | |
|---|
| 79 | |
|---|
| 80 | for (TokL::iterator it = tokenLP->begin(); it != tokenLP->end(); ++it, ++ssp) { |
|---|
| 81 | if (isUpperCase(*it)) { |
|---|
| 82 | isGenus = true; |
|---|
| 83 | #if defined(DUMP) |
|---|
| 84 | std::cout << "genus detected" << std::endl; |
|---|
| 85 | #endif // DUMP |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | |
|---|
| 89 | else { // begin operators |
|---|
| 90 | if (*it == string("->")) { |
|---|
| 91 | nt_assert(!isHetero); |
|---|
| 92 | nt_assert(!isHomo); |
|---|
| 93 | nt_assert(isValid); // only one operator per line allowed |
|---|
| 94 | isRenamed = true; |
|---|
| 95 | isValid = false; |
|---|
| 96 | #if defined(DUMP) |
|---|
| 97 | std::cout << "renaming detected" << std::endl; |
|---|
| 98 | #endif // DUMP |
|---|
| 99 | } |
|---|
| 100 | else { |
|---|
| 101 | if (*it == string("=>")) { |
|---|
| 102 | nt_assert(!isRenamed); |
|---|
| 103 | nt_assert(!isHomo); |
|---|
| 104 | nt_assert(isValid); |
|---|
| 105 | isHetero = true; |
|---|
| 106 | isValid = false; |
|---|
| 107 | #if defined(DUMP) |
|---|
| 108 | std::cout << "heteronym detected" << std::endl; |
|---|
| 109 | #endif // DUMP |
|---|
| 110 | } |
|---|
| 111 | else { |
|---|
| 112 | if (*it == string("=")) { |
|---|
| 113 | nt_assert(!isRenamed); |
|---|
| 114 | nt_assert(!isHetero); |
|---|
| 115 | nt_assert(isValid); |
|---|
| 116 | isHomo = true; |
|---|
| 117 | isValid = false; |
|---|
| 118 | #if defined(DUMP) |
|---|
| 119 | std::cout << "homonym detected" << std::endl; |
|---|
| 120 | #endif // DUMP |
|---|
| 121 | } |
|---|
| 122 | else { |
|---|
| 123 | if (*it == string("(corrig.)")) { |
|---|
| 124 | isCorr = true; |
|---|
| 125 | #if defined(DUMP) |
|---|
| 126 | std::cout << "correction" << std::endl; |
|---|
| 127 | #endif // DUMP |
|---|
| 128 | } |
|---|
| 129 | else { |
|---|
| 130 | if (*it == string("see:")) { |
|---|
| 131 | // isSee = true; |
|---|
| 132 | isValid = false; |
|---|
| 133 | #if defined(DUMP) |
|---|
| 134 | std::cout << "reference" << std::endl; |
|---|
| 135 | #endif // DUMP |
|---|
| 136 | } |
|---|
| 137 | else { |
|---|
| 138 | if (*it == string("subsp.")) { |
|---|
| 139 | #if defined(DUMP) |
|---|
| 140 | std::cout << "subspecies detected at position: >>>" << ssp << "<<<" << std::endl; |
|---|
| 141 | #endif // DUMP |
|---|
| 142 | ssp == 2 ? sspPos[0] = ssp : sspPos[1] = ssp; |
|---|
| 143 | // max. one subsp. on each operator side |
|---|
| 144 | #if defined(DUMP) |
|---|
| 145 | std::cout << "position of subsp.: " << sspPos[0] << "\tand: " << sspPos[1] << std::endl; |
|---|
| 146 | #endif // DUMP |
|---|
| 147 | } |
|---|
| 148 | } |
|---|
| 149 | } |
|---|
| 150 | } |
|---|
| 151 | } |
|---|
| 152 | } |
|---|
| 153 | } |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | |
|---|
| 157 | |
|---|
| 158 | if (isGenus) { |
|---|
| 159 | #if defined(DUMP) |
|---|
| 160 | std::cout << " GENUS description found " << std::endl; |
|---|
| 161 | #endif // DUMP |
|---|
| 162 | if (isValid) { |
|---|
| 163 | descNames[0] = (*tokenLP)[0]; |
|---|
| 164 | actType = VALGEN; |
|---|
| 165 | #if defined(DUMP) |
|---|
| 166 | std::cout << "VALIDGEN type set to: " << actType << std::endl; |
|---|
| 167 | #endif// DUMP |
|---|
| 168 | } |
|---|
| 169 | else { |
|---|
| 170 | if (isHetero) { |
|---|
| 171 | descNames[0] = (*tokenLP)[2]; |
|---|
| 172 | descNames[3] = (*tokenLP)[0]; |
|---|
| 173 | actType = HETGEN; |
|---|
| 174 | #if defined(DUMP) |
|---|
| 175 | std::cout << "HETERONYMGEN type set to: " << actType << std::endl; |
|---|
| 176 | #endif // DUMP |
|---|
| 177 | } |
|---|
| 178 | else { |
|---|
| 179 | if (isHomo) { |
|---|
| 180 | descNames[0] = (*tokenLP)[2]; |
|---|
| 181 | descNames[3] = (*tokenLP)[0]; |
|---|
| 182 | actType = HOMGEN; |
|---|
| 183 | #if defined(DUMP) |
|---|
| 184 | std::cout << "HOMONYMGEN type set to: " << actType << std::endl; |
|---|
| 185 | #endif // DUMP |
|---|
| 186 | |
|---|
| 187 | } |
|---|
| 188 | else { |
|---|
| 189 | |
|---|
| 190 | if (isRenamed) { |
|---|
| 191 | descNames[0] = (*tokenLP)[2]; |
|---|
| 192 | descNames[3] = (*tokenLP)[0]; |
|---|
| 193 | actType = RENGEN; |
|---|
| 194 | #if defined(DUMP) |
|---|
| 195 | std::cout << "RENAMEDGEN type set to: " << actType << std::endl; |
|---|
| 196 | #endif // DUMP |
|---|
| 197 | } |
|---|
| 198 | else { |
|---|
| 199 | #if defined(DUMP) |
|---|
| 200 | std::cout << "no meaningful combination of conditions reached" << std::endl |
|---|
| 201 | << "for line: " << descriptionString << std::endl; |
|---|
| 202 | std::cout << "description type is set to NOTYPE: " << NOTYPE << std::endl; |
|---|
| 203 | #endif // DUMP |
|---|
| 204 | isValid = false; |
|---|
| 205 | #if defined(DUMP) |
|---|
| 206 | std::cout << "isValid set to false " << std::endl; |
|---|
| 207 | #endif // DUMP |
|---|
| 208 | actType = NOTYPE; |
|---|
| 209 | } |
|---|
| 210 | } |
|---|
| 211 | } |
|---|
| 212 | } |
|---|
| 213 | } |
|---|
| 214 | else { |
|---|
| 215 | |
|---|
| 216 | // just fancy experimental , maybe not 100% correct but looks good |
|---|
| 217 | if (!(((sspPos[0] == 0) || (sspPos[0] == 2)) && (((sspPos[1] > 4)&&(sspPos[1]< 9))||(sspPos[1]==0)))) { |
|---|
| 218 | #if defined(DUMP) |
|---|
| 219 | std::cout << "subsp. at strange position found in line:" << std::endl << descriptionString << endl; |
|---|
| 220 | std::cout << "description type is set to NOTYPE: " << NOTYPE << std::endl; |
|---|
| 221 | #endif // DUMP |
|---|
| 222 | isValid = false; |
|---|
| 223 | #if defined(DUMP) |
|---|
| 224 | std::cout << "isValid set to false " << std::endl; |
|---|
| 225 | #endif // DUMP |
|---|
| 226 | actType = NOTYPE; |
|---|
| 227 | } |
|---|
| 228 | |
|---|
| 229 | if (isValid) { |
|---|
| 230 | descNames[0] = (*tokenLP)[0]; |
|---|
| 231 | descNames[1] = (*tokenLP)[1]; |
|---|
| 232 | if (sspPos[0] != 0) { descNames[2] = (*tokenLP)[sspPos[0]+1]; } // only if subsp. exists |
|---|
| 233 | actType = VALSPEC; |
|---|
| 234 | } |
|---|
| 235 | else { // begin else isHetero |
|---|
| 236 | if (isHetero) { |
|---|
| 237 | descNames[0] = (*tokenLP)[3 + sspPos[0]]; |
|---|
| 238 | descNames[1] = (*tokenLP)[4 + sspPos[0]]; |
|---|
| 239 | if (sspPos[1]!=0) { descNames[2]=(*tokenLP)[6 + sspPos[0]]; } // only if subsp. exists |
|---|
| 240 | |
|---|
| 241 | descNames[3] = (*tokenLP)[0]; |
|---|
| 242 | descNames[4] = (*tokenLP)[1]; |
|---|
| 243 | if (sspPos[0]!=0) { descNames[5]=(*tokenLP)[sspPos[0]+1]; } // only if subsp. exists |
|---|
| 244 | |
|---|
| 245 | actType = HETSPEC; |
|---|
| 246 | } |
|---|
| 247 | else { |
|---|
| 248 | if (isHomo) { |
|---|
| 249 | descNames[0] = (*tokenLP)[3 + sspPos[0]]; |
|---|
| 250 | descNames[1] = (*tokenLP)[4 + sspPos[0]]; |
|---|
| 251 | if (sspPos[1]!=0) { descNames[2]=(*tokenLP)[6 + sspPos[0]]; } // only if subsp. exists |
|---|
| 252 | |
|---|
| 253 | descNames[3] = (*tokenLP)[0]; |
|---|
| 254 | descNames[4] = (*tokenLP)[1]; |
|---|
| 255 | if (sspPos[0]!=0) { descNames[5]=(*tokenLP)[sspPos[0]+1]; } // only if subsp. exists |
|---|
| 256 | |
|---|
| 257 | actType = HOMSPEC; |
|---|
| 258 | |
|---|
| 259 | } |
|---|
| 260 | else { // else branch isHomo |
|---|
| 261 | if (isRenamed) { |
|---|
| 262 | descNames[0] = (*tokenLP)[3 + sspPos[0]]; |
|---|
| 263 | descNames[1] = (*tokenLP)[4 + sspPos[0]]; |
|---|
| 264 | if (sspPos[1]!=0) { descNames[2]=(*tokenLP)[6 + sspPos[0]]; } // only if subsp. exists |
|---|
| 265 | |
|---|
| 266 | descNames[3] = (*tokenLP)[0]; |
|---|
| 267 | descNames[4] = (*tokenLP)[1]; |
|---|
| 268 | if (sspPos[0]!=0) { descNames[5]=(*tokenLP)[sspPos[0]+1]; } // only if subsp. exists |
|---|
| 269 | |
|---|
| 270 | actType = RENSPEC; |
|---|
| 271 | |
|---|
| 272 | } |
|---|
| 273 | else { // species remaining cases |
|---|
| 274 | #if defined(DUMP) |
|---|
| 275 | std::cout << "not a valid description line detected" << std::endl; |
|---|
| 276 | std::cout << "isValid: " << isValid << std::endl; |
|---|
| 277 | std::cout << "isRenamed: " << isRenamed << std::endl; |
|---|
| 278 | std::cout << "isHetero: " << isHetero << std::endl; |
|---|
| 279 | std::cout << "isHomo: " << isHomo << std::endl; |
|---|
| 280 | std::cout << "isGenus: " << isGenus << std::endl; |
|---|
| 281 | std::cout << "isSee: " << isSee << std::endl; |
|---|
| 282 | std::cout << "isCorr: " << isCorr << std::endl; |
|---|
| 283 | std::cout << "sspPos: " << sspPos[0] << " and " << sspPos[1] << std::endl; |
|---|
| 284 | std::cout << descriptionString << std::endl; |
|---|
| 285 | #endif // DUMP |
|---|
| 286 | actType = NOTYPE; |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | } |
|---|
| 290 | } |
|---|
| 291 | } |
|---|
| 292 | } |
|---|
| 293 | |
|---|
| 294 | |
|---|
| 295 | #if defined(DUMP) |
|---|
| 296 | std::cout << descriptionString << std::endl; |
|---|
| 297 | std::cout << "classified as " << actType << std::endl; |
|---|
| 298 | #endif // DUMP |
|---|
| 299 | |
|---|
| 300 | Desco actDesc(actType, isCorr, descNames[0], descNames[1], descNames[2], descNames[3], descNames[4], descNames[5]); |
|---|
| 301 | delete tokenLP; |
|---|
| 302 | return actDesc; |
|---|
| 303 | } |
|---|
| 304 | |
|---|
| 305 | |
|---|
| 306 | string Desco::getFirstName() { |
|---|
| 307 | string tmp = firstgen; |
|---|
| 308 | if (!firstspec.empty()) { |
|---|
| 309 | tmp = tmp + " " + firstspec; |
|---|
| 310 | if (!firstsub.empty()) { |
|---|
| 311 | tmp = tmp + " " + "subsp." + " " + firstsub; |
|---|
| 312 | } |
|---|
| 313 | } |
|---|
| 314 | |
|---|
| 315 | |
|---|
| 316 | return tmp; |
|---|
| 317 | } |
|---|
| 318 | |
|---|
| 319 | string Desco::getSecondName() { |
|---|
| 320 | string tmp = secondgen; |
|---|
| 321 | if (!secondspec.empty()) { |
|---|
| 322 | tmp = tmp + " " + firstspec; |
|---|
| 323 | if (!secondsub.empty()) { |
|---|
| 324 | tmp = tmp + " " + "subsp." + " " + secondsub; |
|---|
| 325 | } |
|---|
| 326 | } |
|---|
| 327 | return tmp; |
|---|
| 328 | } |
|---|
| 329 | |
|---|
| 330 | |
|---|
| 331 | bool isUpperCase(const string& input) { |
|---|
| 332 | for (size_t i=0; i<input.length(); ++i) { |
|---|
| 333 | if (input[i]<'A' || input[i]>'Z') return false; |
|---|
| 334 | } |
|---|
| 335 | return true; |
|---|
| 336 | } |
|---|
| 337 | } |
|---|