Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

NT_validNameParser.cxx

Visit:

Last change on this file was 16766, checked in by westram, 8 years ago
reintegrates 'gcc' into 'trunk' mostly cosmetics changes adds: log:branches/gcc@16655,16741:16743,16752:16765
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 11.9 KB

Line
1	/*
2	* Definition of all objects belonging to this version of
3	* the valid names text file
4	*
5	* 29. November 2002
6	*
7	* coded by Lothar Richter
8	*
9	* Copyright (C) 2002 Department of Microbiology (Technical University Munich)
10	*/
11
12	#if defined(DEVEL_LOTHAR)
13	#define DUMP
14	#endif // DEVEL_LOTHAR
15
16	#include "NT_validNameParser.h"
17	#include "NT_local.h"
18
19	#include <cstdlib>
20	#include <cstdlib>
21	#include <iostream>
22	#include <fstream>
23
24	using namespace std;
25
26	namespace validNames {
27
28
29	TokLPtr tokenize(const std::string& description, TokLPtr tokenLP) {
30	size_t tokenEnd = 0;
31	size_t tokenBegin = 0;
32
33	while (tokenEnd != description.size()) { // CC : warning: comparison between signed and unsigned (tokenEnd sollte nicht 'int' sondern 'unsigned' sein)
34	tokenEnd = description.find_first_of(' ', tokenBegin);
35	if (tokenEnd == string::npos) tokenEnd = description.size();
36	int tokLength = tokenEnd - tokenBegin;
37	if (tokLength != 0) {
38	tokenLP->push_back(description.substr(tokenBegin, tokenEnd - tokenBegin));
39	}
40	tokenBegin = tokenEnd + 1;
41
42	}
43	return tokenLP;
44	}
45
46
47
48
49
50	Desco determineType(const string& descriptionString)
51	{ // begin determineType
52
53	DESCT actType = NOTYPE;
54	TokLPtr tokenLP = new TokL;
55	tokenLP = tokenize(descriptionString, tokenLP);
56	// remove all tokens in parentheses
57	{
58	TokL::iterator it = tokenLP->begin();
59	while (it != tokenLP->end()) {
60	if (((it).at(0) == '(') && it != string("(corrig.)")) it = tokenLP->erase(it);
61	else ++it;
62	}
63	}
64
65	// check first word for upper case letters
66	string descNames[6]; // first the valid genus, species, subsp. then the other names
67	// stores occurrence of subsp. which is needed to retrieve the right tokens later on and status flags
68	int sspPos[2] = { 0, 0 }; // token subsp. occurs maximum twice
69	int ssp = 0;
70	bool isValid = true;
71	bool isRenamed = false;
72	bool isHetero = false;
73	bool isHomo = false;
74	bool isGenus = false;
75	// bool isSee = false;
76	bool isCorr = false;
77
78
79
80	for (TokL::iterator it = tokenLP->begin(); it != tokenLP->end(); ++it, ++ssp) {
81	if (isUpperCase(*it)) {
82	isGenus = true;
83	#if defined(DUMP)
84	std::cout << "genus detected" << std::endl;
85	#endif // DUMP
86	}
87
88
89	else { // begin operators
90	if (*it == string("->")) {
91	nt_assert(!isHetero);
92	nt_assert(!isHomo);
93	nt_assert(isValid); // only one operator per line allowed
94	isRenamed = true;
95	isValid = false;
96	#if defined(DUMP)
97	std::cout << "renaming detected" << std::endl;
98	#endif // DUMP
99	}
100	else {
101	if (*it == string("=>")) {
102	nt_assert(!isRenamed);
103	nt_assert(!isHomo);
104	nt_assert(isValid);
105	isHetero = true;
106	isValid = false;
107	#if defined(DUMP)
108	std::cout << "heteronym detected" << std::endl;
109	#endif // DUMP
110	}
111	else {
112	if (*it == string("=")) {
113	nt_assert(!isRenamed);
114	nt_assert(!isHetero);
115	nt_assert(isValid);
116	isHomo = true;
117	isValid = false;
118	#if defined(DUMP)
119	std::cout << "homonym detected" << std::endl;
120	#endif // DUMP
121	}
122	else {
123	if (*it == string("(corrig.)")) {
124	isCorr = true;
125	#if defined(DUMP)
126	std::cout << "correction" << std::endl;
127	#endif // DUMP
128	}
129	else {
130	if (*it == string("see:")) {
131	// isSee = true;
132	isValid = false;
133	#if defined(DUMP)
134	std::cout << "reference" << std::endl;
135	#endif // DUMP
136	}
137	else {
138	if (*it == string("subsp.")) {
139	#if defined(DUMP)
140	std::cout << "subspecies detected at position: >>>" << ssp << "<<<" << std::endl;
141	#endif // DUMP
142	ssp == 2 ? sspPos[0] = ssp : sspPos[1] = ssp;
143	// max. one subsp. on each operator side
144	#if defined(DUMP)
145	std::cout << "position of subsp.: " << sspPos[0] << "\tand: " << sspPos[1] << std::endl;
146	#endif // DUMP
147	}
148	}
149	}
150	}
151	}
152	}
153	}
154	}
155
156
157
158	if (isGenus) {
159	#if defined(DUMP)
160	std::cout << " GENUS description found " << std::endl;
161	#endif // DUMP
162	if (isValid) {
163	descNames[0] = (*tokenLP)[0];
164	actType = VALGEN;
165	#if defined(DUMP)
166	std::cout << "VALIDGEN type set to: " << actType << std::endl;
167	#endif// DUMP
168	}
169	else {
170	if (isHetero) {
171	descNames[0] = (*tokenLP)[2];
172	descNames[3] = (*tokenLP)[0];
173	actType = HETGEN;
174	#if defined(DUMP)
175	std::cout << "HETERONYMGEN type set to: " << actType << std::endl;
176	#endif // DUMP
177	}
178	else {
179	if (isHomo) {
180	descNames[0] = (*tokenLP)[2];
181	descNames[3] = (*tokenLP)[0];
182	actType = HOMGEN;
183	#if defined(DUMP)
184	std::cout << "HOMONYMGEN type set to: " << actType << std::endl;
185	#endif // DUMP
186
187	}
188	else {
189
190	if (isRenamed) {
191	descNames[0] = (*tokenLP)[2];
192	descNames[3] = (*tokenLP)[0];
193	actType = RENGEN;
194	#if defined(DUMP)
195	std::cout << "RENAMEDGEN type set to: " << actType << std::endl;
196	#endif // DUMP
197	}
198	else {
199	#if defined(DUMP)
200	std::cout << "no meaningful combination of conditions reached" << std::endl
201	<< "for line: " << descriptionString << std::endl;
202	std::cout << "description type is set to NOTYPE: " << NOTYPE << std::endl;
203	#endif // DUMP
204	isValid = false;
205	#if defined(DUMP)
206	std::cout << "isValid set to false " << std::endl;
207	#endif // DUMP
208	actType = NOTYPE;
209	}
210	}
211	}
212	}
213	}
214	else {
215
216	// just fancy experimental , maybe not 100% correct but looks good
217	if (!(((sspPos[0] == 0) \|\| (sspPos[0] == 2)) && (((sspPos[1] > 4)&&(sspPos[1]< 9))\|\|(sspPos[1]==0)))) {
218	#if defined(DUMP)
219	std::cout << "subsp. at strange position found in line:" << std::endl << descriptionString << endl;
220	std::cout << "description type is set to NOTYPE: " << NOTYPE << std::endl;
221	#endif // DUMP
222	isValid = false;
223	#if defined(DUMP)
224	std::cout << "isValid set to false " << std::endl;
225	#endif // DUMP
226	actType = NOTYPE;
227	}
228
229	if (isValid) {
230	descNames[0] = (*tokenLP)[0];
231	descNames[1] = (*tokenLP)[1];
232	if (sspPos[0] != 0) { descNames[2] = (*tokenLP)[sspPos[0]+1]; } // only if subsp. exists
233	actType = VALSPEC;
234	}
235	else { // begin else isHetero
236	if (isHetero) {
237	descNames[0] = (*tokenLP)[3 + sspPos[0]];
238	descNames[1] = (*tokenLP)[4 + sspPos[0]];
239	if (sspPos[1]!=0) { descNames[2]=(*tokenLP)[6 + sspPos[0]]; } // only if subsp. exists
240
241	descNames[3] = (*tokenLP)[0];
242	descNames[4] = (*tokenLP)[1];
243	if (sspPos[0]!=0) { descNames[5]=(*tokenLP)[sspPos[0]+1]; } // only if subsp. exists
244
245	actType = HETSPEC;
246	}
247	else {
248	if (isHomo) {
249	descNames[0] = (*tokenLP)[3 + sspPos[0]];
250	descNames[1] = (*tokenLP)[4 + sspPos[0]];
251	if (sspPos[1]!=0) { descNames[2]=(*tokenLP)[6 + sspPos[0]]; } // only if subsp. exists
252
253	descNames[3] = (*tokenLP)[0];
254	descNames[4] = (*tokenLP)[1];
255	if (sspPos[0]!=0) { descNames[5]=(*tokenLP)[sspPos[0]+1]; } // only if subsp. exists
256
257	actType = HOMSPEC;
258
259	}
260	else { // else branch isHomo
261	if (isRenamed) {
262	descNames[0] = (*tokenLP)[3 + sspPos[0]];
263	descNames[1] = (*tokenLP)[4 + sspPos[0]];
264	if (sspPos[1]!=0) { descNames[2]=(*tokenLP)[6 + sspPos[0]]; } // only if subsp. exists
265
266	descNames[3] = (*tokenLP)[0];
267	descNames[4] = (*tokenLP)[1];
268	if (sspPos[0]!=0) { descNames[5]=(*tokenLP)[sspPos[0]+1]; } // only if subsp. exists
269
270	actType = RENSPEC;
271
272	}
273	else { // species remaining cases
274	#if defined(DUMP)
275	std::cout << "not a valid description line detected" << std::endl;
276	std::cout << "isValid: " << isValid << std::endl;
277	std::cout << "isRenamed: " << isRenamed << std::endl;
278	std::cout << "isHetero: " << isHetero << std::endl;
279	std::cout << "isHomo: " << isHomo << std::endl;
280	std::cout << "isGenus: " << isGenus << std::endl;
281	std::cout << "isSee: " << isSee << std::endl;
282	std::cout << "isCorr: " << isCorr << std::endl;
283	std::cout << "sspPos: " << sspPos[0] << " and " << sspPos[1] << std::endl;
284	std::cout << descriptionString << std::endl;
285	#endif // DUMP
286	actType = NOTYPE;
287	}
288
289	}
290	}
291	}
292	}
293
294
295	#if defined(DUMP)
296	std::cout << descriptionString << std::endl;
297	std::cout << "classified as " << actType << std::endl;
298	#endif // DUMP
299
300	Desco actDesc(actType, isCorr, descNames[0], descNames[1], descNames[2], descNames[3], descNames[4], descNames[5]);
301	delete tokenLP;
302	return actDesc;
303	}
304
305
306	string Desco::getFirstName() {
307	string tmp = firstgen;
308	if (!firstspec.empty()) {
309	tmp = tmp + " " + firstspec;
310	if (!firstsub.empty()) {
311	tmp = tmp + " " + "subsp." + " " + firstsub;
312	}
313	}
314
315
316	return tmp;
317	}
318
319	string Desco::getSecondName() {
320	string tmp = secondgen;
321	if (!secondspec.empty()) {
322	tmp = tmp + " " + firstspec;
323	if (!secondsub.empty()) {
324	tmp = tmp + " " + "subsp." + " " + secondsub;
325	}
326	}
327	return tmp;
328	}
329
330
331	bool isUpperCase(const string& input) {
332	for (size_t i=0; i<input.length(); ++i) {
333	if (input[i]<'A' \|\| input[i]>'Z') return false;
334	}
335	return true;
336	}
337	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/NTREE/NT_validNameParser.cxx

Download in other formats: