Context Navigation

phyfromfile.cpp

Visit:

Last change on this file was 10390, checked in by aboeckma, 12 years ago
added muscle sourcles amd makefile
File size: 5.8 KB

Line
1	#include "muscle.h"
2	#include "tree.h"
3	#include "textfile.h"
4
5	#define TRACE 0
6
7	// Tokens in Newick files are:
8	// ( ) : , ;
9	// string
10	// 'string'
11	// "string"
12	// [ comment ]
13	//
14	// We can't safely distinguish between identifiers and floating point
15	// numbers at the lexical level (because identifiers may be numeric,
16	// or start with digits), so both edge lengths and identifiers are
17	// returned as strings.
18
19	const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const
20	{
21	switch (NTT)
22	{
23	#define c(x) case NTT_##x: return #x;
24	c(Unknown)
25	c(Lparen)
26	c(Rparen)
27	c(Colon)
28	c(Comma)
29	c(Semicolon)
30	c(String)
31	c(SingleQuotedString)
32	c(DoubleQuotedString)
33	c(Comment)
34	#undef c
35	}
36	return "??";
37	}
38
39	NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const
40	{
41	// Skip leading white space
42	File.SkipWhite();
43
44	char c;
45	File.GetCharX(c);
46
47	// In case a single-character token
48	szToken[0] = c;
49	szToken[1] = 0;
50
51	unsigned uBytesCopied = 0;
52	NEWICK_TOKEN_TYPE TT;
53	switch (c)
54	{
55	case '(':
56	return NTT_Lparen;
57
58	case ')':
59	return NTT_Rparen;
60
61	case ':':
62	return NTT_Colon;
63
64	case ';':
65	return NTT_Semicolon;
66
67	case ',':
68	return NTT_Comma;
69
70	case '\'':
71	TT = NTT_SingleQuotedString;
72	File.GetCharX(c);
73	break;
74
75	case '"':
76	TT = NTT_DoubleQuotedString;
77	File.GetCharX(c);
78	break;
79
80	case '[':
81	TT = NTT_Comment;
82	break;
83
84	default:
85	TT = NTT_String;
86	break;
87	}
88
89	for (;;)
90	{
91	if (TT != NTT_Comment)
92	{
93	if (uBytesCopied < uBytes - 2)
94	{
95	szToken[uBytesCopied++] = c;
96	szToken[uBytesCopied] = 0;
97	}
98	else
99	Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);
100	}
101	bool bEof = File.GetChar(c);
102	if (bEof)
103	return TT;
104
105	switch (TT)
106	{
107	case NTT_String:
108	if (0 != strchr("():;,", c))
109	{
110	File.PushBack(c);
111	return NTT_String;
112	}
113	if (isspace(c))
114	return NTT_String;
115	break;
116
117	case NTT_SingleQuotedString:
118	if ('\'' == c)
119	return NTT_String;
120	break;
121
122	case NTT_DoubleQuotedString:
123	if ('"' == c)
124	return NTT_String;
125	break;
126
127	case NTT_Comment:
128	if (']' == c)
129	return GetToken(File, szToken, uBytes);
130	break;
131
132	default:
133	Quit("Tree::GetToken, invalid TT=%u", TT);
134	}
135	}
136	}
137
138	// NOTE: this hack must come after definition of Tree::GetToken.
139	#if TRACE
140	#define GetToken GetTokenVerbose
141	#endif
142
143	void Tree::FromFile(TextFile &File)
144	{
145	// Assume rooted.
146	// If we discover that it is unrooted, will convert on the fly.
147	CreateRooted();
148
149	double dEdgeLength;
150	bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);
151
152	// Next token should be either ';' for rooted tree or ',' for unrooted.
153	char szToken[16];
154	NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
155
156	// If rooted, all done.
157	if (NTT_Semicolon == NTT)
158	{
159	if (bEdgeLength)
160	Log(" * Warning * edge length on root group in Newick file %s\n",
161	File.GetFileName());
162	Validate();
163	return;
164	}
165
166	if (NTT_Comma != NTT)
167	Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);
168
169	const unsigned uThirdNode = UnrootFromFile();
170	bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);
171	if (bEdgeLength)
172	SetEdgeLength(0, uThirdNode, dEdgeLength);
173	Validate();
174	}
175
176	// Return true if edge length for this group.
177	bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,
178	double *ptrdEdgeLength)
179	{
180	char szToken[1024];
181	NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
182
183	// Group is either leaf name or (left, right).
184	if (NTT_String == NTT)
185	{
186	SetLeafName(uNodeIndex, szToken);
187	#if TRACE
188	Log("Group is leaf '%s'\n", szToken);
189	#endif
190	}
191	else if (NTT_Lparen == NTT)
192	{
193	const unsigned uLeft = AppendBranch(uNodeIndex);
194	const unsigned uRight = uLeft + 1;
195
196	// Left sub-group...
197	#if TRACE
198	Log("Got '(', group is compound, expect left sub-group\n");
199	#endif
200	double dEdgeLength;
201	bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);
202	#if TRACE
203	if (bLeftLength)
204	Log("Edge length for left sub-group: %.3g\n", dEdgeLength);
205	else
206	Log("No edge length for left sub-group\n");
207	#endif
208	if (bLeftLength)
209	SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);
210
211	// ... then comma ...
212	#if TRACE
213	Log("Expect comma\n");
214	#endif
215	NTT = GetToken(File, szToken, sizeof(szToken));
216	if (NTT_Comma != NTT)
217	Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
218
219	// ...then right sub-group...
220	#if TRACE
221	Log("Expect right sub-group\n");
222	#endif
223	bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);
224	if (bRightLength)
225	SetEdgeLength(uNodeIndex, uRight, dEdgeLength);
226
227	#if TRACE
228	if (bRightLength)
229	Log("Edge length for right sub-group: %.3g\n", dEdgeLength);
230	else
231	Log("No edge length for right sub-group\n");
232	#endif
233
234	// ... then closing parenthesis.
235	#if TRACE
236	Log("Expect closing parenthesis (or comma if > 2-ary)\n");
237	#endif
238	NTT = GetToken(File, szToken, sizeof(szToken));
239	if (NTT_Rparen == NTT)
240	;
241	else if (NTT_Comma == NTT)
242	{
243	File.PushBack(',');
244	return false;
245	}
246	else
247	Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
248	}
249	else
250	Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
251	szToken);
252
253	// Group may optionally be followed by edge length.
254	bool bEof = File.SkipWhiteX();
255	if (bEof)
256	return false;
257	char c;
258	File.GetCharX(c);
259	#if TRACE
260	Log("Character following group, could be colon, is '%c'\n", c);
261	#endif
262	if (':' == c)
263	{
264	NTT = GetToken(File, szToken, sizeof(szToken));
265	if (NTT_String != NTT)
266	Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
267	*ptrdEdgeLength = atof(szToken);
268	return true;
269	}
270	File.PushBack(c);
271	return false;
272	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/GDE/MUSCLE/src/phyfromfile.cpp

Download in other formats: