Context Navigation

source: branches/port5/NTREE/NT_validNameParser.cxx

Visit:

Last change on this file was 5400, checked in by westram, 17 years ago
64bit: use size_t instead of unsigned int
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 13.4 KB

Line
1
2	/**
3	* Definition of all objects belonging to this version of
4	* the valid names text file
5	*
6	* 29. November 2002
7	*
8	* coded by Lothar Richter
9	*
10	* Copyright (C) 2002 Department of Microbiology (Technical University Munich)
11	*/
12
13	#if defined(DEVEL_LOTHAR)
14	#define DUMP
15	#endif // DEVEL_LOTHAR
16
17	#include <stdlib.h>
18	#include <stdlib.h>
19	#include <iostream>
20	#include <fstream>
21	#include <string>
22	#include <list>
23	#include <vector>
24	#include <cassert>
25
26	#include "nt_validNameParser.hxx"
27
28	using namespace std;
29
30	// Hi Lothar,
31	// ich hab ein paar CodeComments (CC : ) fuer Dich eingefuegt, die sich auf die Compiler-Warnungen beziehen.
32
33	namespace validNames{
34
35
36	TokLPtr tokenize(const string& description, TokLPtr tokenLP)
37	{
38	// TokLPtr tokenLP = new TokL;
39	size_t tokenEnd = 0;
40	size_t tokenBegin = 0;
41	// std::cout <<description.size() << std::endl;
42
43	while (tokenEnd != description.size()){ // CC : warning: comparison between signed and unsigned (tokenEnd sollte nicht 'int' sondern 'unsigned' sein)
44	tokenEnd = description.find_first_of(' ', tokenBegin);
45	// std::cout << string("TokenBegin:\t") << tokenBegin << "\t" <<
46	// string("TokenEnd:\t") << tokenEnd << std::endl;
47	if (tokenEnd == string::npos) tokenEnd = description.size();
48	int tokLength = tokenEnd - tokenBegin;
49	if (tokLength != 0){
50	tokenLP->push_back(description.substr(tokenBegin,tokenEnd - tokenBegin));
51	// std::cout << string("pushedToken:\t") << description.substr(tokenBegin,tokenEnd - tokenBegin) << std::endl;
52	}
53	// std::cout << description.substr(tokenBegin,tokenEnd - tokenBegin) << std::endl;
54	tokenBegin = tokenEnd + 1;
55
56	}
57	return tokenLP;
58	} // end tokenize
59
60
61
62
63
64	Desco determineType(const string& descriptionString)
65	{// begin determineType
66
67	DESCT actType = NOTYPE;
68	TokLPtr tokenLP = new TokL;
69	tokenLP = tokenize(descriptionString, tokenLP);
70	// remove all tokens in parentheses
71	{
72	TokL::iterator it = tokenLP->begin();
73	while( it != tokenLP->end()){
74	if(((it).at(0) == '(') && it != string("(corrig.)")) it = tokenLP->erase(it);
75	else ++it;
76	}
77	}
78
79	// check first word for upper case letters
80	string descNames[6]; // first the valid genus, species, subsp. then the other names
81	// stores occurrence of subsp. which is needed to retrieve the right tokens later on and status flags
82	int sspPos[2] = {0, 0}; // token subsp. occurs maximum twice
83	int ssp = 0;
84	bool isValid = true;
85	bool isRenamed = false;
86	bool isHetero = false;
87	bool isHomo = false;
88	bool isGenus = false;
89	bool isSee = false;
90	// bool isSpecies = false;
91	bool isCorr = false;
92
93
94
95	for( TokL::iterator it = tokenLP->begin();it != tokenLP->end(); it ++, ssp++)
96	{//begin of token iteration
97	// std::cout << "entering analysis loop" << std::endl;
98	if(isUpperCase(*it)) {
99	isGenus = true;
100	#if defined(DUMP)
101	std::cout << "genus detected" << std::endl;
102	#endif // DUMP
103	} // for genus descriptions
104
105
106	else{ // begin operators
107	if(*it == string("->")) {
108	assert(isHetero == false);
109	assert(isHomo == false);
110	assert(isValid == true); // only one operator per line allowed
111	isRenamed = true;
112	isValid = false;
113	#if defined(DUMP)
114	std::cout << "renaming detected" << std::endl;
115	#endif // DUMP
116	}
117	else {
118	if(*it == string("=>")){
119	assert(isRenamed == false);
120	assert(isHomo == false);
121	assert(isValid == true);
122	isHetero = true;
123	isValid = false;
124	#if defined(DUMP)
125	std::cout << "heteronym detected" << std::endl;
126	#endif // DUMP
127	}
128	else {
129	if(*it == string("=")){
130	assert(isRenamed == false);
131	assert(isHetero == false);
132	assert(isValid == true);
133	isHomo = true;
134	isValid = false;
135	#if defined(DUMP)
136	std::cout << "homonym detected" << std::endl;
137	#endif // DUMP
138	}
139	else{
140	if(*it == string("(corrig.)")) {
141	isCorr = true;
142	#if defined(DUMP)
143	std::cout << "correction" << std::endl;
144	#endif // DUMP
145	}
146	else{
147	if(*it == string("see:")){
148	isSee = true;
149	isValid = false;
150	#if defined(DUMP)
151	std::cout << "reference" << std::endl;
152	#endif // DUMP
153	}
154	else{
155	if(*it == string("subsp.")){
156	#if defined(DUMP)
157	std::cout << "subspecies detected at position: >>>"<< ssp << "<<<" << std::endl;
158	#endif // DUMP
159	ssp == 2 ? sspPos[0] = ssp : sspPos[1] = ssp;
160	// : sspPos[1] == 0 ? sspPos[1] = ssp
161	// : 10000,exit (102); // bad hack to fake return value
162	// max. one subsp. on each operator side
163	#if defined(DUMP)
164	std::cout << "position of subsp.: " << sspPos[0] << "\tand: " << sspPos[1] << std::endl;
165	#endif // DUMP
166	}
167	}
168	}
169	}
170	}
171	}
172	} // end operators
173	} // end of token iteration
174
175
176
177	if(isGenus) { // for genus descriptions
178	#if defined(DUMP)
179	std::cout << " GENUS description found " << std::endl;
180	#endif // DUMP
181	if(isValid){
182	descNames[0] = (*tokenLP)[0];
183	actType = VALGEN;
184	#if defined(DUMP)
185	std::cout << "VALIDGEN type set to: " << actType << std::endl;
186	#endif// DUMP
187	}
188	else{
189	if(isHetero){
190	descNames[0] = (*tokenLP)[2];
191	descNames[3] = (*tokenLP)[0];
192	actType = HETGEN;
193	#if defined(DUMP)
194	std::cout << "HETERONYMGEN type set to: " << actType << std::endl;
195	#endif // DUMP
196	}
197	else{
198	if(isHomo){
199	descNames[0] = (*tokenLP)[2];
200	descNames[3] = (*tokenLP)[0];
201	actType = HOMGEN;
202	#if defined(DUMP)
203	std::cout << "HOMONYMGEN type set to: " << actType << std::endl;
204	#endif // DUMP
205
206	}
207	else{
208
209	if(isRenamed){
210	descNames[0] = (*tokenLP)[2];
211	descNames[3] = (*tokenLP)[0];
212	actType = RENGEN;
213	#if defined(DUMP)
214	std::cout << "RENAMEDGEN type set to: " << actType << std::endl;
215	#endif // DUMP
216	}
217	else{
218	#if defined(DUMP)
219	std::cout << "no meaningful combination of conditions reached" << std::endl
220	<< "for line: " << descriptionString << std::endl;
221	std::cout << "description type is set to NOTYPE: " << NOTYPE << std::endl;
222	#endif // DUMP
223	isValid = false;
224	#if defined(DUMP)
225	std::cout << "isValid set to false " << std::endl;
226	#endif // DUMP
227	actType = NOTYPE;
228	// exit(25);
229	}
230	}
231	}
232	}
233
234	} // end of genus part
235	else{ // begin of species part
236
237	// just fancy experimental , maybe not 100% correct but looks good
238	if (!(((sspPos[0] == 0) \|\| (sspPos[0] == 2)) && (((sspPos[1] > 4)&&(sspPos[1]< 9))\|\|(sspPos[1]==0))))
239	{
240	#if defined(DUMP)
241	std::cout << "subsp. at strange position found in line:" << std::endl<< descriptionString << endl;
242	std::cout << "description type is set to NOTYPE: " << NOTYPE << std::endl;
243	#endif // DUMP
244	isValid = false;
245	#if defined(DUMP)
246	std::cout << "isValid set to false " << std::endl;
247	#endif // DUMP
248	actType = NOTYPE;
249	}
250
251	if(isValid){
252	descNames[0] = (*tokenLP)[0];
253	descNames[1] = (*tokenLP)[1];
254	if(sspPos[0] != 0) {descNames[2] = (*tokenLP)[sspPos[0]+1];} // only if subsp. exists
255	actType = VALSPEC;
256	}
257	else{// begin else isHetero
258	if(isHetero){
259	descNames[0] = (*tokenLP)[3 + sspPos[0]];
260	descNames[1] = (*tokenLP)[4 + sspPos[0]];
261	if(sspPos[1]!=0){descNames[2]=(*tokenLP)[6 + sspPos[0]];} // only if subsp. exists
262
263	descNames[3] = (*tokenLP)[0];
264	descNames[4] = (*tokenLP)[1];
265	if(sspPos[0]!=0){descNames[5]=(*tokenLP)[sspPos[0]+1];} // only if subsp. exists
266
267	actType = HETSPEC;
268	}
269	else{
270	if(isHomo){
271	descNames[0] = (*tokenLP)[3 + sspPos[0]];
272	descNames[1] = (*tokenLP)[4 + sspPos[0]];
273	if(sspPos[1]!=0){descNames[2]=(*tokenLP)[6 + sspPos[0]];} // only if subsp. exists
274
275	descNames[3] = (*tokenLP)[0];
276	descNames[4] = (*tokenLP)[1];
277	if(sspPos[0]!=0){descNames[5]=(*tokenLP)[sspPos[0]+1];} // only if subsp. exists
278
279	actType = HOMSPEC;
280
281	}
282	else{// else branch isHomo
283	if(isRenamed){
284	descNames[0] = (*tokenLP)[3 + sspPos[0]];
285	descNames[1] = (*tokenLP)[4 + sspPos[0]];
286	if(sspPos[1]!=0){descNames[2]=(*tokenLP)[6 + sspPos[0]];} // only if subsp. exists
287
288	descNames[3] = (*tokenLP)[0];
289	descNames[4] = (*tokenLP)[1];
290	if(sspPos[0]!=0){descNames[5]=(*tokenLP)[sspPos[0]+1];} // only if subsp. exists
291
292	actType = RENSPEC;
293
294	}
295	else{// species remaining cases
296	#if defined(DUMP)
297	std::cout << "not a valid description line detected" << std::endl;
298	std::cout << "isValid: " << isValid << std::endl;
299	std::cout << "isRenamed: " << isRenamed << std::endl;
300	std::cout << "isHetero: " << isHetero << std::endl;
301	std::cout << "isHomo: " << isHomo << std::endl;
302	std::cout << "isGenus: " << isGenus << std::endl;
303	std::cout << "isSee: " << isSee << std::endl;
304	std::cout << "isCorr: " << isCorr << std::endl;
305	std::cout << "sspPos: " << sspPos[0]<< " and " << sspPos[1] << std::endl;
306	std::cout << descriptionString << std::endl;
307	#endif // DUMP
308	actType = NOTYPE;
309	}
310
311	}// end else isHomo
312	}// end else isHetero
313	}//end else is Valid
314	}// end of species part
315
316
317	// check line for -> or => or =
318	//}
319
320	#if defined(DUMP)
321	std::cout << descriptionString << std::endl;
322	std::cout << "classified as " << actType << std::endl ;
323	#endif // DUMP
324
325	//Desco actDesc(actType);
326	Desco actDesc(actType, isCorr, descNames[0],descNames[1],descNames[2],descNames[3],descNames[4],descNames[5] );
327	delete tokenLP;
328	return actDesc;
329	// return actType;
330	}; // end determineType
331
332
333	string Desco::getFirstName(){
334	// return nameParts[0] + nameParts[1] + nameParts[2];
335	string tmp = firstgen;
336	if (!firstspec.empty()){
337	tmp = tmp + " " + firstspec;
338	if(!firstsub.empty()){
339	tmp = tmp + " " + "subsp." + " " + firstsub;
340	}
341	}
342
343
344	return tmp;
345	} // end Desco::getFirstName()
346
347	string Desco::getSecondName(){
348	string tmp = secondgen;
349	if (!secondspec.empty()){
350	tmp = tmp + " " + firstspec;
351	if(!secondsub.empty()){
352	tmp = tmp + " " + "subsp." + " " + secondsub;
353	}
354	}
355	return tmp;
356	// return nameParts[3] + nameParts[4] + nameParts[5];
357	} // Desco::getSecondName()
358
359
360	bool isUpperCase(const string& input)
361	{
362	for (size_t i=0;i<input.length(); ++i)
363	{
364	if(input[i]<'A' \|\| input[i]>'Z'){return false;}
365	}
366	return true;
367	}
368
369
370	} // end namespace
371	// test

Note: See TracBrowser for help on using the repository browser.

Download in other formats: