Context Navigation

source: tags/ms_r17q3/ARBDB/adstring.cxx

Visit:

Last change on this file was 16425, checked in by westram, 8 years ago
reintegrates 'aci' into 'trunk' extends ACI language (implementing #707) boolean operators: `And`,`Or`,`Not` numeric comparison: `isAbove`,`isBelow`,`isEqual` floating point arithmetic: `fplus`,`fminus`,`fmult`,`fdiv` misc: `round`,`inRange`,`isEmpty` enforce parameter checks in ACI function code case of commands completely ignored adds: log:branches/aci@16385,16391:16424
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 64.4 KB

Line
1	// =============================================================== //
2	// //
3	// File : adstring.cxx //
4	// Purpose : various string functions //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11	#include <arb_backtrace.h>
12	#include <arb_strbuf.h>
13	#include <arb_defs.h>
14	#include <arb_str.h>
15
16	#include "gb_key.h"
17	#include "gb_aci.h"
18
19	#include <SigHandler.h>
20
21	#include <execinfo.h>
22
23	#include <cstdarg>
24	#include <cctype>
25	#include <cerrno>
26	#include <ctime>
27	#include <setjmp.h>
28
29	#include <valgrind.h>
30
31	static char GBS_string_2_key_with_exclusions(const char str, const char *additional) {
32	// converts any string to a valid key (all chars in 'additional' are additionally allowed)
33	char buf[GB_KEY_LEN_MAX+1];
34	int i;
35	int c;
36	for (i=0; i<GB_KEY_LEN_MAX;) {
37	c = *(str++);
38	if (!c) break;
39
40	if (c==' ' \|\| c == '_') {
41	buf[i++] = '_';
42	}
43	else if (isalnum(c) \|\| strchr(additional, c) != 0) {
44	buf[i++] = c;
45	}
46	}
47	for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48	buf[i] = 0;
49	return ARB_strdup(buf);
50	}
51
52	char GBS_string_2_key(const char str) // converts any string to a valid key
53	{
54	return GBS_string_2_key_with_exclusions(str, "");
55	}
56
57	char GB_memdup(const char source, size_t len) {
58	char *dest = ARB_alloc<char>(len);
59	memcpy(dest, source, len);
60	return dest;
61	}
62
63	GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
64	// test whether all characters are letters, numbers or _
65	int i;
66	long len;
67
68	if (!key \|\| key[0] == 0) return "Empty key is not allowed";
69	len = strlen(key);
70	if (len>GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
71	if (len < GB_KEY_LEN_MIN) return GBS_global_string("Invalid key '%s': too short", key);
72
73	for (i = 0; key[i]; ++i) {
74	char c = key[i];
75	if ((c>='a') && (c<='z')) continue;
76	if ((c>='A') && (c<='Z')) continue;
77	if ((c>='0') && (c<='9')) continue;
78	if (c=='_') continue;
79	return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80	}
81
82	return 0;
83	}
84
85	GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
86	// test whether all characters are letters, numbers or _
87	// additionally allow '/' and '->' for hierarchical keys
88	GB_ERROR err = 0;
89
90	if (!key \|\| key[0] == 0) {
91	err = "Empty key is not allowed";
92	}
93	else if (!strpbrk(key, "/-")) {
94	err = GB_check_key(key);
95	}
96	else {
97	char *key_copy = ARB_strdup(key);
98	char *start = key_copy;
99
100	if (start[0] == '/') ++start;
101
102	while (start && !err) {
103	char *key_end = strpbrk(start, "/-");
104
105	if (key_end) {
106	char c = *key_end;
107	*key_end = 0;
108	err = GB_check_key(start);
109	*key_end = c;
110
111	if (c == '-') {
112	if (key_end[1] != '>') {
113	err = GBS_global_string("'>' expected after '-' in '%s'", key);
114	}
115	start = key_end+2;
116	}
117	else {
118	gb_assert(c == '/');
119	start = key_end+1;
120	}
121	}
122	else {
123	err = GB_check_key(start);
124	start = 0;
125	}
126	}
127
128	free(key_copy);
129	}
130
131	return err;
132	}
133
134	// ----------------------------------------------
135	// escape/unescape characters in strings
136
137	char GBS_escape_string(const char str, const char *chars_to_escape, char escape_char) {
138	/*! escape characters in 'str'
139	*
140	* uses a special escape-method, which eliminates all 'chars_to_escape' completely
141	* from str (this makes further processing of the string more easy)
142	*
143	* @param str string to escape
144	*
145	* @param escape_char is the character used for escaping. For performance reasons it
146	* should be a character rarely used in 'str'.
147	*
148	* @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping)
149	* and it may not be longer than 26 bytes
150	*
151	* @return heap copy of escaped string
152	*
153	* Inverse of GBS_unescape_string()
154	*/
155
156	int len = strlen(str);
157	char buffer = ARB_alloc<char>(2len+1);
158	int j = 0;
159	int i;
160
161	gb_assert(strlen(chars_to_escape) <= 26);
162	gb_assert(strchr(chars_to_escape, escape_char) == 0); // escape_char may not be included in chars_to_escape
163
164	for (i = 0; str[i]; ++i) {
165	if (str[i] == escape_char) {
166	buffer[j++] = escape_char;
167	buffer[j++] = escape_char;
168	}
169	else {
170	const char *found = strchr(chars_to_escape, str[i]);
171	if (found) {
172	buffer[j++] = escape_char;
173	buffer[j++] = (found-chars_to_escape+'A');
174
175	gb_assert(found[0]<'A' \|\| found[0]>'Z'); // illegal character in chars_to_escape
176	}
177	else {
178
179	buffer[j++] = str[i];
180	}
181	}
182	}
183	buffer[j] = 0;
184
185	return buffer;
186	}
187
188	char GBS_unescape_string(const char str, const char *escaped_chars, char escape_char) {
189	//! inverse of GB_escape_string() - for params see there
190
191	int len = strlen(str);
192	char *buffer = ARB_alloc<char>(len+1);
193	int j = 0;
194	int i;
195
196	#if defined(ASSERTION_USED)
197	int escaped_chars_len = strlen(escaped_chars);
198	#endif // ASSERTION_USED
199
200	gb_assert(strlen(escaped_chars) <= 26);
201	gb_assert(strchr(escaped_chars, escape_char) == 0); // escape_char may not be included in chars_to_escape
202
203	for (i = 0; str[i]; ++i) {
204	if (str[i] == escape_char) {
205	if (str[i+1] == escape_char) {
206	buffer[j++] = escape_char;
207	}
208	else {
209	int idx = str[i+1]-'A';
210
211	gb_assert(idx >= 0 && idx<escaped_chars_len);
212	buffer[j++] = escaped_chars[idx];
213	}
214	++i;
215	}
216	else {
217	buffer[j++] = str[i];
218	}
219	}
220	buffer[j] = 0;
221
222	return buffer;
223	}
224
225	char *GBS_eval_env(GB_CSTR p) {
226	GB_ERROR error = 0;
227	GB_CSTR ka;
228	GBS_strstruct *out = GBS_stropen(1000);
229
230	while ((ka = GBS_find_string(p, "$(", 0))) {
231	GB_CSTR kz = strchr(ka, ')');
232	if (!kz) {
233	error = GBS_global_string("missing ')' for envvar '%s'", p);
234	break;
235	}
236	else {
237	char *envvar = ARB_strpartdup(ka+2, kz-1);
238	int len = ka-p;
239
240	if (len) GBS_strncat(out, p, len);
241
242	GB_CSTR genv = GB_getenv(envvar);
243	if (genv) GBS_strcat(out, genv);
244
245	p = kz+1;
246	free(envvar);
247	}
248	}
249
250	if (error) {
251	GB_export_error(error);
252	GBS_strforget(out);
253	return 0;
254	}
255
256	GBS_strcat(out, p); // copy rest
257	return GBS_strclose(out);
258	}
259
260	long GBS_gcgchecksum(const char *seq)
261	// GCGchecksum
262	{
263	long i;
264	long check = 0;
265	long count = 0;
266	long seqlen = strlen(seq);
267
268	for (i = 0; i < seqlen; i++) {
269	count++;
270	check += count * toupper(seq[i]);
271	if (count == 57) count = 0;
272	}
273	check %= 10000;
274
275	return check;
276	}
277
278	// Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
279	uint32_t crctab[] = {
280	0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
281	0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
282	0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
283	0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
284	0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
285	0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
286	0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
287	0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
288	0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
289	0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
290	0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
291	0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
292	0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
293	0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
294	0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
295	0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
296	0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
297	0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
298	0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
299	0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
300	0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
301	0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
302	0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
303	0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
304	0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
305	0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
306	0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
307	0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
308	0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
309	0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
310	0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
311	0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
312	0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
313	0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
314	0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
315	0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
316	0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
317	0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
318	0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
319	0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
320	0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
321	0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
322	0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
323	0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
324	0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
325	0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
326	0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
327	0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
328	0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
329	0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
330	0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
331	0x2d02ef8dL
332	};
333
334	uint32_t GB_checksum(const char seq, long length, int ignore_case, const char exclude) // RALF: 02-12-96
335	{
336	/* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
337	* if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
338	*/
339
340	unsigned long c = 0xffffffffL;
341	long n = length;
342	int i;
343	int tab[256];
344
345	for (i=0; i<256; i++) {
346	tab[i] = ignore_case ? toupper(i) : i;
347	}
348
349	if (exclude) {
350	while (1) {
351	int k = (unsigned char )exclude++;
352	if (!k) break;
353	tab[k] = 0;
354	if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
355	}
356	}
357
358	while (n--) {
359	i = tab[(const unsigned char )seq++];
360	if (i) {
361	c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
362	}
363	}
364	c = c ^ 0xffffffffL;
365	return c;
366	}
367
368	uint32_t GBS_checksum(const char seq, int ignore_case, const char exclude)
369	// if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
370	{
371	return GB_checksum(seq, strlen(seq), ignore_case, exclude);
372	}
373
374	size_t GBS_shorten_repeated_data(char *data) {
375	// shortens repeats in 'data'
376	// This function modifies 'data'!!
377	// e.g. "..............................ACGT....................TGCA"
378	// -> ".{30}ACGT.{20}TGCA"
379
380	#if defined(DEBUG)
381	size_t orgLen = strlen(data);
382	#endif // DEBUG
383	char *dataStart = data;
384	char *dest = data;
385	size_t repeat = 1;
386	char last = *data++;
387
388	while (last) {
389	char curr = *data++;
390	if (curr == last) {
391	repeat++;
392	}
393	else {
394	if (repeat >= 5) {
395	dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
396	}
397	else {
398	size_t r;
399	for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
400	}
401	last = curr;
402	repeat = 1;
403	}
404	}
405
406	*dest = 0;
407
408	#if defined(DEBUG)
409
410	gb_assert(strlen(dataStart) <= orgLen);
411	#endif // DEBUG
412	return dest-dataStart;
413	}
414
415
416	// ------------------------------------------
417	// helper classes for tagged fields
418
419	class TextRef {
420	const char *data; // has no terminal zero-byte!
421	int length;
422
423	public:
424	TextRef() : data(0), length(-1) {}
425	TextRef(const char *data_, int length_) : data(data_), length(length_) {}
426	explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
427
428	bool defined() const { return data && length>0; }
429	const char *get_data() const { return data; }
430	int get_length() const { return length; }
431
432	const char *get_following() const { return data ? data+length : NULL; }
433
434	int compare(const char *str) const {
435	gb_assert(defined());
436	int cmp = strncmp(get_data(), str, get_length());
437	if (!cmp) {
438	if (str[get_length()]) {
439	cmp = -1; // right side contains more content
440	}
441	}
442	return cmp;
443	}
444	int icompare(const char *str) const {
445	gb_assert(defined());
446	int cmp = strncasecmp(get_data(), str, get_length());
447	if (!cmp) {
448	if (str[get_length()]) {
449	cmp = -1; // right side contains more content
450	}
451	}
452	return cmp;
453	}
454	char *copy() const { return ARB_strndup(get_data(), get_length()); }
455
456	char head() const { return defined() ? data[0] : 0; }
457	char tail() const { return defined() ? data[length-1] : 0; }
458
459	TextRef headTrimmed() const {
460	if (defined()) {
461	for (int s = 0; s<length; ++s) {
462	if (!isspace(data[s])) {
463	return TextRef(data+s, length-s);
464	}
465	}
466	}
467	return TextRef();
468	}
469	TextRef tailTrimmed() const {
470	if (defined()) {
471	for (int s = length-1; s>=0; --s) {
472	if (!isspace(data[s])) {
473	return TextRef(data, s+1);
474	}
475	}
476	}
477	return TextRef();
478	}
479
480	TextRef trimmed() const {
481	return headTrimmed().tailTrimmed();
482	}
483
484	inline TextRef partBefore(const TextRef& subref) const;
485	inline TextRef partBehind(const TextRef& subref) const;
486
487	bool is_part_of(const TextRef& other) const {
488	gb_assert(defined() && other.defined());
489	return get_data()>=other.get_data() && get_following()<=other.get_following();
490	}
491
492	const char find(char c) const { return reinterpret_cast<const char>(memchr(get_data(), c, get_length())); }
493	};
494
495	TextRef textBetween(const TextRef& t1, const TextRef& t2) {
496	const char *behind_d1 = t1.get_following();
497	const char *d2 = t2.get_data();
498
499	if (behind_d1 && d2 && behind_d1<d2) {
500	return TextRef(behind_d1, d2-behind_d1);
501	}
502	return TextRef();
503	}
504
505	inline TextRef TextRef::partBefore(const TextRef& subref) const {
506	gb_assert(subref.is_part_of(*this));
507	return textBetween(TextRef(get_data(), 0), subref);
508	}
509	inline TextRef TextRef::partBehind(const TextRef& subref) const {
510	gb_assert(subref.is_part_of(*this));
511	return TextRef(subref.get_following(), get_following()-subref.get_following());
512	}
513
514	class TaggedContentParser {
515	TextRef wholeInput;
516	TextRef tag, content; // current position
517	TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
518	TextRef nextBrackets; // next "[..]" part (behind current tag)
519
520	void findBrackets(const char *in) {
521	nextBrackets = TextRef();
522	const char *tag_start = strchr(in, '[');
523	if (tag_start) {
524	const char *tag_end = strchr(tag_start, ']');
525	if (tag_end) {
526	if (tag_end == tag_start+1) { // empty tag -> use as content
527	findBrackets(tag_end+1);
528	}
529	else {
530	const char unwanted_bracket = reinterpret_cast<const char>(memchr(tag_start+1, '[', tag_end-tag_start-1));
531	if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
532	findBrackets(unwanted_bracket);
533	}
534	else {
535	TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
536	if (name.defined()) { // not only whitespace inside brackets
537	nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
538	}
539	else {
540	findBrackets(tag_end+1);
541	}
542	}
543	}
544	}
545	}
546	}
547
548	void parse_next_multi_tag() {
549	gb_assert(restTags.defined());
550	TextRef comma(restTags.find(','), 1);
551	if (comma.defined()) {
552	tag = restTags.partBefore(comma).tailTrimmed();
553	restTags = restTags.partBehind(comma).headTrimmed();
554	}
555	else {
556	tag = restTags;
557	restTags = TextRef();
558	}
559	}
560	void parse_next() {
561	if (restTags.defined()) {
562	parse_next_multi_tag();
563	}
564	else if (nextBrackets.defined()) {
565	TextRef brackets = nextBrackets;
566	findBrackets(brackets.get_following());
567
568	content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
569
570	gb_assert(brackets.head() == '[' && brackets.tail() == ']');
571
572	TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
573	gb_assert(tags.defined());
574
575	restTags = tags;
576	parse_next_multi_tag();
577	}
578	else {
579	tag = content = TextRef();
580	gb_assert(!has_part());
581	}
582	}
583	void parse_first() {
584	gb_assert(!has_part());
585	findBrackets(wholeInput.get_data());
586	content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
587	if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
588	}
589
590	public:
591	TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
592
593	bool has_tag() const { return tag.defined(); }
594	bool has_content() const { return content.defined(); }
595
596	void next() { parse_next(); }
597	bool has_part() const { return has_tag() \|\| has_content(); } // false -> parser has finished
598
599	const TextRef& get_tag() const { return tag; }
600	const TextRef& get_content() const { return content; }
601	};
602
603
604	// -------------------------------------------
605	// helper function for tagged fields
606
607	static void g_bs_add_value_tag_to_hash(GB_HASH hash, const char tag, char *value) {
608	if (!value[0]) return; // ignore empty values
609
610	{
611	char *p;
612	p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
613	p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
614	}
615
616	GB_HASH sh = (GB_HASH )GBS_read_hash(hash, value);
617	if (!sh) {
618	sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
619	GBS_write_hash(hash, value, (long)sh);
620	}
621	GBS_write_hash(sh, tag, 1);
622	}
623
624	static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH hash, char s, char default_tag, const char del) {
625	TaggedContentParser parser(s);
626	while (parser.has_part()) {
627	if (parser.has_content()) {
628	char *content = parser.get_content().copy();
629	if (parser.has_tag()) {
630	char *tag = parser.get_tag().copy();
631	if (!del \|\| ARB_stricmp(tag, del) != 0) {
632	g_bs_add_value_tag_to_hash(hash, tag, content);
633	}
634	free(tag);
635	}
636	else {
637	g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
638	}
639	free(content);
640	}
641	parser.next();
642	}
643	}
644
645	static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH hash, char s, char default_tag, const char rtag, const char *aci, GBL_call_env& env) {
646	GB_ERROR error = 0;
647
648	TaggedContentParser parser(s);
649	while (parser.has_part() && !error) {
650	if (parser.has_content()) {
651	char *value = parser.get_content().copy();
652	char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
653
654	if (rtag && ARB_stricmp(tag, rtag) == 0) {
655	freeset(value, GB_command_interpreter_in_env(value, aci, env));
656	if (!value) error = GB_await_error();
657	}
658
659	if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
660
661	free(tag);
662	free(value);
663	}
664	parser.next();
665	}
666
667	return error;
668	}
669
670	static long g_bs_merge_tags(const char tag, long val, void cd_sub_result) {
671	GBS_strstruct sub_result = (GBS_strstruct)cd_sub_result;
672
673	GBS_strcat(sub_result, tag);
674	GBS_strcat(sub_result, ",");
675
676	return val;
677	}
678
679	static long g_bs_read_tagged_hash(const char value, long subhash, void cd_g_bs_collect_tags_hash) {
680	static int counter = 0;
681
682	GBS_strstruct *sub_result = GBS_stropen(100);
683	GBS_hash_do_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result);
684	GBS_intcat(sub_result, counter++); // create a unique number
685
686	char *str = ARB_strupper(GBS_strclose(sub_result));
687
688	GB_HASH g_bs_collect_tags_hash = (GB_HASH)cd_g_bs_collect_tags_hash;
689	GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
690
691	free(str);
692	return subhash;
693	}
694
695	static long g_bs_read_final_hash(const char tag, long value, void cd_merge_result) {
696	GBS_strstruct merge_result = (GBS_strstruct)cd_merge_result;
697
698	char lk = const_cast<char>(strrchr(tag, ','));
699	if (lk) { // remove number at end
700	*lk = 0;
701
702	if (!merge_result->empty()) merge_result->put(' '); // skip trailing space
703	merge_result->put('[');
704	merge_result->cat(tag);
705	merge_result->put(']');
706	merge_result->put(' ');
707	}
708	merge_result->cat((char*)value);
709	return value;
710	}
711
712	static char g_bs_get_string_of_tag_hash(GB_HASH tag_hash) {
713	GBS_strstruct *merge_result = GBS_stropen(256);
714	GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free);
715
716	GBS_hash_do_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
717	GBS_hash_do_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result);
718
719	GBS_free_hash(collect_tags_hash);
720	return GBS_strclose(merge_result);
721	}
722
723	static long g_bs_free_hash_of_hashes_elem(const char /key/, long val, void ) {
724	GB_HASH hash = (GB_HASH)val;
725	if (hash) GBS_free_hash(hash);
726	return 0;
727	}
728	static void g_bs_free_hash_of_hashes(GB_HASH *hash) {
729	GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULL);
730	GBS_free_hash(hash);
731	}
732
733	char GBS_merge_tagged_strings(const char s1, const char tag1, const char replace1, const char s2, const char tag2, const char *replace2) {
734	/* Create a tagged string from two tagged strings:
735	* a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
736	*
737	* if 's2' is not empty, then delete tag 'replace1' in 's1'
738	* if 's1' is not empty, then delete tag 'replace2' in 's2'
739	*
740	* (result should never be NULL)
741	*/
742
743	char *str1 = ARB_strdup(s1);
744	char *str2 = ARB_strdup(s2);
745	char *t1 = GBS_string_2_key(tag1);
746	char *t2 = GBS_string_2_key(tag2);
747	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
748
749	if (!s1[0]) replace2 = NULL;
750	if (!s2[0]) replace1 = NULL;
751
752	if (replace1 && !replace1[0]) replace1 = NULL;
753	if (replace2 && !replace2[0]) replace2 = NULL;
754
755	g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
756	g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
757
758	char *result = g_bs_get_string_of_tag_hash(hash);
759
760	g_bs_free_hash_of_hashes(hash);
761
762	free(t2);
763	free(t1);
764	free(str2);
765	free(str1);
766
767	return result;
768	}
769
770	char GBS_modify_tagged_string_with_ACI(const char s, const char dt, const char tag, const char *aci, GBL_call_env& env) {
771	/* if 's' is untagged, tag it with default tag 'dt'.
772	* if 'tag' is != NULL -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
773	*
774	* if result is NULL, an error has been exported.
775	*/
776
777	char *str = ARB_strdup(s);
778	char *default_tag = GBS_string_2_key(dt);
779	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
780	char *result = 0;
781
782	GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
783
784	if (!error) {
785	result = g_bs_get_string_of_tag_hash(hash);
786	}
787	else {
788	GB_export_error(error);
789	}
790
791	g_bs_free_hash_of_hashes(hash);
792
793	free(default_tag);
794	free(str);
795
796	return result;
797	}
798
799	char GB_read_as_tagged_string(GBDATA gbd, const char *tagi) {
800	char *buf = GB_read_as_string(gbd);
801	if (buf && tagi && tagi[0]) {
802	TaggedContentParser parser(buf);
803
804	char *wantedTag = GBS_string_2_key(tagi);
805	char *contentFound = NULL;
806
807	while (parser.has_part() && !contentFound) {
808	if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
809	contentFound = parser.get_content().copy();
810	}
811	parser.next();
812	}
813	free(wantedTag);
814	free(buf);
815
816	return contentFound;
817	}
818	return buf;
819	}
820
821
822	/* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
823	* used as well to save perl macros
824	*
825	* when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
826	*
827	* always keep in mind, that many users have databases/macros written with older
828	* versions of this function. They MUST load proper!!!
829	*/
830	void GBS_fwrite_string(const char strngi, FILE out) {
831	unsigned char strng = (unsigned char )strngi;
832	int c;
833
834	putc('"', out);
835
836	while ((c = *strng++)) {
837	if (c < 32) {
838	putc('\\', out);
839	if (c == '\n')
840	putc('n', out);
841	else if (c == '\t')
842	putc('t', out);
843	else if (c<25) {
844	putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
845	}
846	else {
847	putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
848	}
849	}
850	else if (c == '"') {
851	putc('\\', out);
852	putc('"', out);
853	}
854	else if (c == '\\') {
855	putc('\\', out);
856	putc('\\', out);
857	}
858	else {
859	putc(c, out);
860	}
861	}
862	putc('"', out);
863	}
864
865	/* Read a string from a file written by GBS_fwrite_string,
866	* Searches first '"'
867	*
868	* WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
869	* any changes should be done in GBS_fconvert_string too.
870	*/
871
872	static char GBS_fread_string(FILE in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
873	GBS_strstruct *strstr = GBS_stropen(1024);
874	int x;
875
876	while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
877
878	if (x != EOF) {
879	while ((x = getc(in)) != '"') {
880	if (x == EOF) break;
881	if (x == '\\') {
882	x = getc(in); if (x==EOF) break;
883	if (x == 'n') {
884	GBS_chrcat(strstr, '\n');
885	continue;
886	}
887	if (x == 't') {
888	GBS_chrcat(strstr, '\t');
889	continue;
890	}
891	if (x>='@' && x <= '@' + 25) {
892	GBS_chrcat(strstr, x-'@');
893	continue;
894	}
895	if (x>='0' && x <= '9') {
896	GBS_chrcat(strstr, x-('0'-25));
897	continue;
898	}
899	// all other backslashes are simply skipped
900	}
901	GBS_chrcat(strstr, x);
902	}
903	}
904	return GBS_strclose(strstr);
905	}
906
907	/* does similar decoding as GBS_fread_string but works directly on an existing buffer
908	* (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
909	*
910	* inserts \0 behind decoded string (removes the closing '"')
911	* returns a pointer behind the end (") of the _encoded_ string
912	* returns NULL if a 0-character is found
913	*/
914	char GBS_fconvert_string(char buffer) {
915	char *t = buffer;
916	char *f = buffer;
917	int x;
918
919	gb_assert(f[-1] == '"');
920	// the opening " has already been read
921
922	while ((x = *f++) != '"') {
923	if (!x) break;
924
925	if (x == '\\') {
926	x = *f++;
927	if (!x) break;
928
929	if (x == 'n') {
930	*t++ = '\n';
931	continue;
932	}
933	if (x == 't') {
934	*t++ = '\t';
935	continue;
936	}
937	if (x>='@' && x <= '@' + 25) {
938	*t++ = x-'@';
939	continue;
940	}
941	if (x>='0' && x <= '9') {
942	*t++ = x-('0'-25);
943	continue;
944	}
945	// all other backslashes are simply skipped
946	}
947	*t++ = x;
948	}
949
950	if (!x) return 0; // error (string should not contain 0-character)
951	gb_assert(x == '"');
952
953	t[0] = 0;
954	return f;
955	}
956
957	char GBS_replace_tabs_by_spaces(const char text) {
958	int tlen = strlen(text);
959	GBS_strstruct mfile = GBS_stropen(tlen 3/2 + 1);
960	int tabpos = 0;
961	int c;
962
963	while ((c=*(text++))) {
964	if (c == '\t') {
965	int ntab = (tabpos + 8) & 0xfffff8;
966	while (tabpos < ntab) {
967	GBS_chrcat(mfile, ' ');
968	tabpos++;
969	}
970	continue;
971	}
972	tabpos ++;
973	if (c == '\n') {
974	tabpos = 0;
975	}
976	GBS_chrcat(mfile, c);
977	}
978	return GBS_strclose(mfile);
979	}
980
981	char GBS_trim(const char str) {
982	// trim whitespace at beginning and end of 'str'
983	const char *whitespace = " \t\n";
984	while (str[0] && strchr(whitespace, str[0])) str++;
985
986	const char *end = strchr(str, 0)-1;
987	while (end >= str && strchr(whitespace, end[0])) end--;
988
989	return ARB_strpartdup(str, end);
990	}
991
992	static char dated_info(const char info) {
993	char *dated_info = 0;
994	time_t date;
995	if (time(&date) != -1) {
996	char *dstr = ctime(&date);
997	char *nl = strchr(dstr, '\n');
998
999	if (nl) nl[0] = 0; // cut off LF
1000
1001	dated_info = GBS_global_string_copy("%s: %s", dstr, info);
1002	}
1003	else {
1004	dated_info = ARB_strdup(info);
1005	}
1006	return dated_info;
1007	}
1008
1009	char GBS_log_action_to(const char comment, const char *action, bool stamp) {
1010	/*! concatenates 'comment' and 'action'.
1011	* '\n' is appended to existing 'comment' and/or 'action' (if missing).
1012	* @param comment may be NULL (=> result is 'action')
1013	* @param action may NOT be NULL
1014	* @param stamp true -> prefix current timestamp in front of 'action'
1015	* @return heap copy of concatenation
1016	*/
1017	size_t clen = comment ? strlen(comment) : 0;
1018	size_t alen = strlen(action);
1019
1020	GBS_strstruct new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2\n + \0 + space for stamp
1021
1022	if (comment) {
1023	GBS_strcat(new_comment, comment);
1024	if (clen == 0 \|\| comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
1025	}
1026
1027	if (stamp) {
1028	char *dated_action = dated_info(action);
1029	GBS_strcat(new_comment, dated_action);
1030	free(dated_action);
1031	}
1032	else {
1033	GBS_strcat(new_comment, action);
1034	}
1035	if (alen == 0 \|\| action[alen-1] != '\n') GBS_chrcat(new_comment, '\n');
1036
1037	return GBS_strclose(new_comment);
1038	}
1039
1040	const char GBS_funptr2readable(void funptr, bool stripARBHOME) {
1041	// only returns module and offset for static functions :-(
1042	char **funNames = backtrace_symbols(&funptr, 1);
1043	const char *readable_fun = funNames[0];
1044
1045	if (stripARBHOME) {
1046	const char *ARBHOME = GB_getenvARBHOME();
1047	if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1048	readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1049	}
1050	}
1051	return readable_fun;
1052	}
1053
1054	// --------------------------------------------------------------------------------
1055
1056	#ifdef UNIT_TESTS
1057
1058	#include <test_unit.h>
1059
1060	// #define TEST_TEST_MACROS
1061
1062	#ifdef ENABLE_CRASH_TESTS
1063	static void provokesegv() { raise(SIGSEGV); }
1064	static void dont_provokesegv() {}
1065	# if defined(ASSERTION_USED)
1066	static void failassertion() { gb_assert(0); }
1067	# if defined(TEST_TEST_MACROS)
1068	static void dont_failassertion() {}
1069	# endif
1070	static void provokesegv_does_not_fail_assertion() {
1071	// provokesegv does not raise assertion
1072	// -> the following assertion fails
1073	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1074	}
1075	# endif
1076	#endif
1077
1078	void TEST_signal_tests() {
1079	// check whether we can test that no SEGV or assertion failure happened
1080	TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1081
1082	// check whether we can test for SEGV and assertion failures
1083	TEST_EXPECT_SEGFAULT(provokesegv);
1084	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1085
1086	// tests whether signal suppression works multiple times (by repeating tests)
1087	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1088	TEST_EXPECT_SEGFAULT(provokesegv);
1089
1090	// test whether SEGV can be distinguished from assertion
1091	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1092
1093	// The following section is disabled, because it will
1094	// provoke test warnings (to test these warnings).
1095	// (enable it when changing any of these TEST_..-macros used here)
1096	#if defined(TEST_TEST_MACROS)
1097	TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1098
1099	TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1100	TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1101	#if defined(ASSERTION_USED)
1102	TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1103	#endif
1104
1105	TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1106	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion);
1107	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1108	#endif
1109	}
1110
1111	#define EXPECT_CONTENT(content) TEST_EXPECT_EQUAL(GBS_mempntr(strstr), content)
1112
1113	void TEST_GBS_strstruct() {
1114	{
1115	GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT("");
1116
1117	GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb");
1118	GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17");
1119	GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_");
1120	GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000");
1121
1122	TEST_EXPECT_EQUAL(GBS_memoffset(strstr), 14);
1123	GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b");
1124	GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter");
1125	GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly");
1126
1127	GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters");
1128	EXPECT_CONTENT("butterfly flutters");
1129
1130	GBS_strforget(strstr);
1131	}
1132	{
1133	// re-alloc smaller
1134	GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT("");
1135	GBS_strforget(strstr);
1136	}
1137
1138	// trigger downsize of oversized block
1139	for (int i = 0; i<12; ++i) {
1140	GBS_strstruct *strstr = GBS_stropen(10);
1141	GBS_strforget(strstr);
1142	}
1143
1144	{
1145	GBS_strstruct *strstr = GBS_stropen(10);
1146	size_t oldbufsize = strstr->get_buffer_size();
1147	GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer
1148
1149	TEST_EXPECT_DIFFERENT(oldbufsize, strstr->get_buffer_size()); // did we reallocate?
1150	EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx");
1151	GBS_strforget(strstr);
1152	}
1153	}
1154
1155	#define TEST_SHORTENED_EQUALS(Long,Short) do { \
1156	char *buf = ARB_strdup(Long); \
1157	GBS_shorten_repeated_data(buf); \
1158	TEST_EXPECT_EQUAL(buf, Short); \
1159	free(buf); \
1160	} while(0)
1161
1162	void TEST_GBS_shorten_repeated_data() {
1163	TEST_SHORTENED_EQUALS("12345", "12345");
1164	TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1165	TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1166	TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1167	TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1168	TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1169	TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1170	TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1171	TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1172	TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1173	TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1174	TEST_SHORTENED_EQUALS("aabc", "aabc");
1175	TEST_SHORTENED_EQUALS("", "");
1176	}
1177
1178	static const char *hkey_format[] = {
1179	"/%s/bbb/ccc",
1180	"/aaa/%s/ccc",
1181	"/aaa/bbb/%s",
1182	};
1183
1184	inline const char useInHkey(const char fragment, size_t pos) {
1185	return GBS_global_string(hkey_format[pos], fragment);
1186	}
1187
1188	#define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1189	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1190	TEST_EXPECT_NO_ERROR(GB_check_hkey(useInHkey(use, i))); \
1191	} \
1192	} while(0)
1193
1194	#define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1195	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1196	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(useInHkey(use, i)), contains); \
1197	} \
1198	} while(0)
1199
1200
1201	void TEST_DB_key_checks() {
1202	// plain keys
1203	const char *shortest = "ab";
1204	const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1205	const char *too_short = shortest+1;
1206	const char *longest = too_long+1;
1207
1208	const char *empty = "";
1209	const char *slash = "sub/key";
1210	const char *comma = "no,key";
1211	const char *minus = "no-key";
1212
1213	TEST_EXPECT_NO_ERROR(GB_check_key(shortest));
1214	TEST_EXPECT_NO_ERROR(GB_check_key(longest));
1215
1216	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1217	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1218	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1219
1220	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character");
1221	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character");
1222	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character");
1223
1224	// hierarchical keys
1225	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1226	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1227
1228	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1229	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1230	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1231
1232	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1233	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1234	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "'>' expected after '-'");
1235	}
1236
1237	#define TEST_STRING2KEY(str,expected) do { \
1238	char *as_key = GBS_string_2_key(str); \
1239	TEST_EXPECT_EQUAL(as_key, expected); \
1240	TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1241	free(as_key); \
1242	} while(0)
1243
1244	void TEST_DB_key_generation() {
1245	TEST_STRING2KEY("abc", "abc");
1246	TEST_STRING2KEY("a b c", "a_b_c");
1247
1248	// invalid chars
1249	TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1250	"string_containing_doublequotes_quotes_and_othershit");
1251
1252	// length tests
1253	TEST_STRING2KEY("a", "a_"); // too short
1254	TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1255	"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1256	}
1257
1258	void TEST_TaggedContentParser() {
1259	// test helper class TextRef:
1260	TEST_REJECT(TextRef().defined()); // default to undefined
1261	{
1262	TextRef bla("blakjahd", 3);
1263	TEST_EXPECT(bla.defined());
1264	TEST_EXPECT_EQUAL(bla.get_length(), 3);
1265
1266	TEST_EXPECT(bla.compare("bl") > 0);
1267	TEST_EXPECT(bla.compare("bla") == 0);
1268	TEST_EXPECT(bla.compare("blase") < 0);
1269
1270	TextRef spaced(" spaced "+1, 10);
1271	TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1272	TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1273	TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1274	}
1275
1276	const char *text = " untagged [tag] tagged [empty] ";
1277
1278	TextRef cr_untagged(strstr(text, "untagged"), 8);
1279	TextRef cr_tagged (strstr(text, "tagged"), 6);
1280	TextRef tr_tag (strstr(text, "tag"), 3);
1281	TextRef tr_empty (strstr(text, "empty"), 5);
1282
1283	// test TaggedContentParser:
1284	{
1285	TaggedContentParser parser(text);
1286
1287	TEST_EXPECT(parser.has_part());
1288	TEST_REJECT(parser.has_tag());
1289	TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1290
1291	parser.next();
1292
1293	TEST_EXPECT(parser.has_part());
1294	TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1295	TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1296
1297	parser.next();
1298
1299	TEST_EXPECT(parser.has_part());
1300	TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1301	TEST_REJECT(parser.has_content());
1302
1303	parser.next();
1304
1305	TEST_REJECT(parser.has_part());
1306	}
1307	{ // parse untagged input
1308	TaggedContentParser parser("hi");
1309	TEST_EXPECT(parser.has_part());
1310	TEST_REJECT(parser.has_tag());
1311	TEST_EXPECT(parser.get_content().compare("hi") == 0);
1312	parser.next();
1313	TEST_REJECT(parser.has_part());
1314	}
1315	{ // parse empty input
1316	TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1317	TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1318	}
1319	{ // parse single tag w/o content
1320	TaggedContentParser parser(" [hello] ");
1321	TEST_EXPECT(parser.has_part());
1322	TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1323	TEST_REJECT(parser.has_content());
1324	parser.next();
1325	TEST_REJECT(parser.has_part());
1326	}
1327	{ // parse multi-tags
1328	TaggedContentParser parser(" [ t1 , t2 ] t");
1329	TEST_EXPECT(parser.has_part());
1330	TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1331	TEST_EXPECT(parser.get_content().compare("t") == 0);
1332	parser.next();
1333	TEST_EXPECT(parser.has_part());
1334	TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1335	TEST_EXPECT(parser.get_content().compare("t") == 0);
1336	parser.next();
1337	TEST_REJECT(parser.has_part());
1338	}
1339	}
1340
1341	#define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1342	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1343	TEST_EXPECT_EQUAL(result, expected); \
1344	free(result); \
1345	} while(0)
1346
1347	#define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1348	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1349	TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1350	free(result); \
1351	} while(0)
1352
1353	void TEST_merge_tagged_strings() {
1354	// merge two fields:
1355	TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1356	TEST_MERGE_TAGGED("SRC", "DST", "", 0, "source", "dest", "[DST] dest [SRC] source");
1357	TEST_MERGE_TAGGED("SRC", "DST", 0, "", "source", "dest", "[DST] dest [SRC] source");
1358	TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "sth", "sth", "[DST,SRC] sth");
1359
1360	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1361	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1362
1363	// update fields:
1364	TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1365	TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1366	TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1367	TEST_MERGE_TAGGED("SRC", "DST", 0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1368	TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1369
1370	// append (opposed to update this keeps old entries with same tag; useless?)
1371	TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1372	TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1373	TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1374
1375	// merge three fields:
1376	TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1377	TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1378	TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1379	TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1380	TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1381
1382	// same tests as in section above, but vv:
1383	TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1384	TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1385	TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1386	TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1387	TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1388
1389	// test real-merges (content existing in both strings):
1390	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1391	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1392	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1393	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1394	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1395	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1396	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1397	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1398	TEST_MERGE_TAGGED("P1", "P2", 0, 0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1399
1400	// merge two tagged string with deleting
1401	#define DSTSRC1 "[DST] dest1 [SRC] src1"
1402	#define DSTSRC2 "[DST] dest2 [SRC] src2"
1403	#define DSTSRC2LOW "[dst] dest2 [src] src2"
1404
1405	TEST_MERGE_TAGGED("O1", "O2", 0, 0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1406	TEST_MERGE_TAGGED("O1", "O2", "SRC", 0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1407	TEST_MERGE_TAGGED("O1", "O2", 0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1408	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1409	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1410	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1411	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1412	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1413	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1414	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1415	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1416	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1417	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1418	TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1419	}
1420
1421	void TEST_read_tagged() {
1422	GB_shell shell;
1423	GBDATA *gb_main = GB_open("new.arb", "c");
1424	{
1425	GB_transaction ta(gb_main);
1426
1427	{
1428	GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1429	TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1430	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1431
1432	GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1433	GB_UINT4 ints[] = { 1, 2 };
1434	TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1435	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1436	}
1437
1438	#define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1439	#define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1440	#define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1441
1442	GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1443	const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1444	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1445
1446	TEST_EXPECT_FULL_CONTENT(NULL);
1447	TEST_EXPECT_FULL_CONTENT("");
1448	TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1449
1450	TEST_EXPECT_TAG_CONTENT("T1", "t12");
1451	TEST_EXPECT_TAG_CONTENT("T2", "t12");
1452	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1453	TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1454
1455	TEST_EXPECT_TAG_CONTENT("AA", "aa");
1456	TEST_EXPECT_TAG_CONTENT("BB", "bb");
1457	TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1458	TEST_EXPECT_TAG_CONTENT("XX", "x1");
1459	TEST_EXPECT_TAG_CONTENT("YY", "yy");
1460	TEST_EXPECT_TAG_CONTENT("yy", "yy");
1461
1462	TEST_REJECT_TAG_CONTENT("Y");
1463	// TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1464
1465	TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1466	TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1467	TEST_EXPECT_TAG_CONTENT("fake", "fake");
1468
1469	TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1470	TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1471	TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1472	TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1473	TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1474	TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1475	TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1476	TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1477
1478	TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1479	TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1480	TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1481	TEST_EXPECT_TAG_CONTENT("LAST", "last");
1482
1483	// test incomplete tags
1484	tagged_string = "bla [WHATEVER hello";
1485	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1486	TEST_REJECT_TAG_CONTENT("WHATEVER");
1487
1488	tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1489	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1490	TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1491	TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1492	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1493
1494	// test pathological tags
1495	tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1496	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1497	TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1498	TEST_EXPECT_FULL_CONTENT("");
1499	TEST_REJECT_TAG_CONTENT(" ");
1500	TEST_REJECT_TAG_CONTENT(" ");
1501	TEST_REJECT_TAG_CONTENT(",");
1502	TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1503	TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1504	TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1505	TEST_EXPECT_TAG_CONTENT(" ", "us");
1506	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1507	}
1508	GB_close(gb_main);
1509	}
1510
1511	#define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1512	TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1513	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1514	expected); \
1515	}while(0)
1516
1517	#define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1518	TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1519	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1520	expectedErrorPart); \
1521	}while(0)
1522
1523	void TEST_tagged_eval() {
1524	GB_shell shell;
1525	GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1526	{
1527	GB_transaction ta(gb_main);
1528	GBL_env env(gb_main, "tree_missing");
1529
1530	{
1531	GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1532	TEST_REJECT_NULL(gb_species);
1533	GBL_call_env callEnv(gb_species, env);
1534
1535	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1536	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULL, "[DEF] bla");
1537	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1538	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1539	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1540
1541	// empty tags:
1542	TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULL, "");
1543	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULL, "[FILLED] xxx");
1544	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULL, "[FILLED] xxx");
1545	TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULL, "[FILLED] xxx");
1546	TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULL, "[FILLED] xxx");
1547
1548	#define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1549	#define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1550
1551	// dont eval:
1552	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULL, THREE_TAGS_UPCASE);
1553	// eval SRT:
1554	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1555	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1556	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1557	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1558	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1559	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":=-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1560	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1561	// eval ACI:
1562	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1563	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1564
1565	// test SRT/ACI errors:
1566	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1567	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1568	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1569	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1570
1571	// no error raised, if expression not applied:
1572	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1573
1574	// incomplete tags
1575	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":=<>", "[DEF] <{no tag>");
1576	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,{no>");
1577	TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* =<2,*1>", "[DEF] <{tag,{no>");
1578	TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* =<2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1579	TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* =<2,*1>", "[DEF] <{tag{},{no>");
1580	TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* =<2,*1>", "[DEF] <{tag{} xx,{no>");
1581	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,no>");
1582	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1583	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1584	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1585	TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1586	TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":=<>", "[DEF] <kept {trunk>");
1587	TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":=<>", "[DEF] <kept>");
1588	}
1589
1590	{
1591	GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1592	TEST_REJECT_NULL(gb_species);
1593	GBL_call_env callEnv(gb_species, env);
1594
1595	// run scripts using context:
1596	TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1597	TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(\|sequence\|len)", "[T1] seqlen=$l [T2] seqlen=165");
1598	TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence\|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1599
1600	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(\|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1601	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1602
1603	// content before 1st tag:
1604	TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1605	TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1606
1607	// test elimination of leading/trailing whitespace:
1608	TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1609	TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1610	TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1611
1612	#define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1613
1614	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1615	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1616	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1617	}
1618	}
1619	GB_close(gb_main);
1620	}
1621
1622	void TEST_log_action() {
1623	for (int stamped = 0; stamped<=1; ++stamped) {
1624	TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1625	{
1626	char *logged = GBS_log_action_to("comment", "action", stamped);
1627	if (stamped) {
1628	TEST_EXPECT_CONTAINS(logged, "comment\n");
1629	TEST_EXPECT_CONTAINS(logged, "action\n");
1630	}
1631	else {
1632	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1633	}
1634	free(logged);
1635	}
1636	{
1637	char *logged = GBS_log_action_to("comment\n", "action", stamped);
1638	if (stamped) {
1639	TEST_EXPECT_CONTAINS(logged, "comment\n");
1640	TEST_EXPECT_CONTAINS(logged, "action\n");
1641	}
1642	else {
1643	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1644	}
1645	free(logged);
1646	}
1647	{
1648	char *logged = GBS_log_action_to("", "action", stamped);
1649	if (stamped) {
1650	TEST_EXPECT_EQUAL(logged[0], '\n');
1651	TEST_EXPECT_CONTAINS(logged, "action\n");
1652	}
1653	else {
1654	TEST_EXPECT_EQUAL(logged, "\naction\n");
1655	}
1656	free(logged);
1657	}
1658	{
1659	char *logged = GBS_log_action_to(NULL, "action\n", stamped); // test action with trailing LF
1660	if (stamped) {
1661	TEST_EXPECT_DIFFERENT(logged[0], '\n');
1662	TEST_EXPECT_CONTAINS(logged, "action\n");
1663	}
1664	else {
1665	TEST_EXPECT_EQUAL(logged, "action\n");
1666	}
1667	free(logged);
1668	}
1669	}
1670	}
1671	TEST_PUBLISH(TEST_log_action);
1672
1673	#endif // UNIT_TESTS
1674

Note: See TracBrowser for help on using the repository browser.

Download in other formats: