Context Navigation

source: tags/arb-7.0/ARBDB/adstring.cxx

Visit:

Last change on this file was 18319, checked in by westram, 6 years ago
fix whitespace.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 65.0 KB

Line
1	// =============================================================== //
2	// //
3	// File : adstring.cxx //
4	// Purpose : various string functions //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11	#include <arb_backtrace.h>
12	#include <arb_strbuf.h>
13	#include <arb_defs.h>
14	#include <arb_str.h>
15
16	#include "gb_key.h"
17	#include "gb_aci.h"
18
19	#include <SigHandler.h>
20
21	#include <execinfo.h>
22
23	#include <cstdarg>
24	#include <cctype>
25	#include <cerrno>
26	#include <ctime>
27	#include <setjmp.h>
28
29	#include <valgrind.h>
30
31	static char GBS_string_2_key_with_exclusions(const char str, const char *additional) {
32	// converts any string to a valid key (all chars in 'additional' are additionally allowed)
33	char buf[GB_KEY_LEN_MAX+1];
34	int i;
35	int c;
36	for (i=0; i<GB_KEY_LEN_MAX;) {
37	c = *(str++);
38	if (!c) break;
39
40	if (c==' ' \|\| c == '_') {
41	buf[i++] = '_';
42	}
43	else if (isalnum(c) \|\| strchr(additional, c)) {
44	buf[i++] = c;
45	}
46	}
47	for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48	buf[i] = 0;
49	return ARB_strdup(buf);
50	}
51
52	char GBS_string_2_key(const char str) { // converts any string to a valid key
53	return GBS_string_2_key_with_exclusions(str, "");
54	}
55
56	char GB_memdup(const char source, size_t len) {
57	char *dest = ARB_alloc<char>(len);
58	memcpy(dest, source, len);
59	return dest;
60	}
61
62	static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed";
63
64	inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) {
65	// test if 'key' is a valid non-hierarchical database key.
66	// i.e. contains only letters, numbers and '_' and
67	// is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX.
68
69	if (len < GB_KEY_LEN_MIN) {
70	if (!len) return EMPTY_KEY_NOT_ALLOWED;
71	return GBS_global_string("Invalid key '%s': too short", key);
72	}
73	if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
74
75	for (int i = 0; i<len; ++i) {
76	char c = key[i];
77	bool validChar = isalnum(c) \|\| c == '_';
78	if (!validChar) {
79	return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80	}
81	}
82
83	return NULp;
84	}
85	GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
86	// test if 'key' is a valid non-hierarchical database key
87	// (i.e. a valid name for a container or field).
88
89	return check_key(key, key ? strlen(key) : 0);
90	}
91
92	GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
93	// test whether 'key' is a hierarchical key,
94	// i.e. consists of subkeys (accepted by GB_check_key), separated by '/'.
95
96	GB_ERROR err = NULp;
97
98	if (key && key[0] == '/') ++key; // accept + remove leading '/'
99	if (!key \|\| !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash)
100
101	while (!err && key[0]) {
102	int nonSlashPart = strcspn(key, "/");
103
104	err = check_key(key, nonSlashPart);
105	if (!err) {
106	key += nonSlashPart;
107	if (key[0] == '/') {
108	++key;
109	if (key[0] == 0) { // nothing after slash
110	err = EMPTY_KEY_NOT_ALLOWED;
111	}
112	}
113	else {
114	gb_assert(key[0] == 0);
115	}
116	}
117	}
118	return err;
119	}
120
121	// ----------------------------------------------
122	// escape/unescape characters in strings
123
124	char GBS_escape_string(const char str, const char *chars_to_escape, char escape_char) {
125	/*! escape characters in 'str'
126	*
127	* uses a special escape-method, which eliminates all 'chars_to_escape' completely
128	* from str (this makes further processing of the string more easy)
129	*
130	* @param str string to escape
131	*
132	* @param escape_char is the character used for escaping. For performance reasons it
133	* should be a character rarely used in 'str'.
134	*
135	* @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping)
136	* and it may not be longer than 26 bytes
137	*
138	* @return heap copy of escaped string
139	*
140	* Inverse of GBS_unescape_string()
141	*/
142
143	int len = strlen(str);
144	char buffer = ARB_alloc<char>(2len+1);
145	int j = 0;
146	int i;
147
148	gb_assert(strlen(chars_to_escape) <= 26);
149	gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
150
151	for (i = 0; str[i]; ++i) {
152	if (str[i] == escape_char) {
153	buffer[j++] = escape_char;
154	buffer[j++] = escape_char;
155	}
156	else {
157	const char *found = strchr(chars_to_escape, str[i]);
158	if (found) {
159	buffer[j++] = escape_char;
160	buffer[j++] = (found-chars_to_escape+'A');
161
162	gb_assert(found[0]<'A' \|\| found[0]>'Z'); // illegal character in chars_to_escape
163	}
164	else {
165
166	buffer[j++] = str[i];
167	}
168	}
169	}
170	buffer[j] = 0;
171
172	return buffer;
173	}
174
175	char GBS_unescape_string(const char str, const char *escaped_chars, char escape_char) {
176	//! inverse of GB_escape_string() - for params see there
177
178	int len = strlen(str);
179	char *buffer = ARB_alloc<char>(len+1);
180	int j = 0;
181	int i;
182
183	#if defined(ASSERTION_USED)
184	int escaped_chars_len = strlen(escaped_chars);
185	#endif // ASSERTION_USED
186
187	gb_assert(strlen(escaped_chars) <= 26);
188	gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
189
190	for (i = 0; str[i]; ++i) {
191	if (str[i] == escape_char) {
192	if (str[i+1] == escape_char) {
193	buffer[j++] = escape_char;
194	}
195	else {
196	int idx = str[i+1]-'A';
197
198	gb_assert(idx >= 0 && idx<escaped_chars_len);
199	buffer[j++] = escaped_chars[idx];
200	}
201	++i;
202	}
203	else {
204	buffer[j++] = str[i];
205	}
206	}
207	buffer[j] = 0;
208
209	return buffer;
210	}
211
212	char *GBS_eval_env(GB_CSTR p) {
213	GB_ERROR error = NULp;
214	GB_CSTR ka;
215	GBS_strstruct *out = GBS_stropen(1000);
216
217	while ((ka = GBS_find_string(p, "$(", 0))) {
218	GB_CSTR kz = strchr(ka, ')');
219	if (!kz) {
220	error = GBS_global_string("missing ')' for envvar '%s'", p);
221	break;
222	}
223	else {
224	char *envvar = ARB_strpartdup(ka+2, kz-1);
225	int len = ka-p;
226
227	if (len) GBS_strncat(out, p, len);
228
229	GB_CSTR genv = GB_getenv(envvar);
230	if (genv) GBS_strcat(out, genv);
231
232	p = kz+1;
233	free(envvar);
234	}
235	}
236
237	if (error) {
238	GB_export_error(error);
239	GBS_strforget(out);
240	return NULp;
241	}
242
243	GBS_strcat(out, p); // copy rest
244	return GBS_strclose(out);
245	}
246
247	long GBS_gcgchecksum(const char *seq) {
248	// GCGchecksum
249	long i;
250	long check = 0;
251	long count = 0;
252	long seqlen = strlen(seq);
253
254	for (i = 0; i < seqlen; i++) {
255	count++;
256	check += count * toupper(seq[i]);
257	if (count == 57) count = 0;
258	}
259	check %= 10000;
260
261	return check;
262	}
263
264	// Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
265	uint32_t crctab[] = {
266	0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
267	0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
268	0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
269	0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
270	0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
271	0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
272	0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
273	0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
274	0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
275	0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
276	0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
277	0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
278	0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
279	0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
280	0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
281	0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
282	0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
283	0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
284	0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
285	0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
286	0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
287	0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
288	0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
289	0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
290	0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
291	0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
292	0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
293	0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
294	0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
295	0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
296	0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
297	0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
298	0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
299	0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
300	0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
301	0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
302	0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
303	0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
304	0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
305	0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
306	0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
307	0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
308	0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
309	0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
310	0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
311	0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
312	0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
313	0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
314	0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
315	0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
316	0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
317	0x2d02ef8dL
318	};
319
320	uint32_t GB_checksum(const char seq, long length, int ignore_case, const char exclude) {
321	/* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
322	* if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
323	*/
324
325	unsigned long c = 0xffffffffL;
326	long n = length;
327	int i;
328	int tab[256]; // @@@ avoid recalc for each call
329
330	for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
331	tab[i] = ignore_case ? toupper(i) : i;
332	}
333
334	if (exclude) {
335	while (1) {
336	int k = (unsigned char )exclude++;
337	if (!k) break;
338	tab[k] = 0;
339	if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
340	}
341	}
342
343	while (n--) {
344	i = tab[(const unsigned char )seq++];
345	if (i) {
346	c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
347	}
348	}
349	c = c ^ 0xffffffffL;
350	return c;
351	}
352
353	uint32_t GBS_checksum(const char seq, int ignore_case, const char exclude) {
354	// if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
355	return GB_checksum(seq, strlen(seq), ignore_case, exclude);
356	}
357
358	size_t GBS_shorten_repeated_data(char *data) {
359	// shortens repeats in 'data'
360	// This function modifies 'data'!!
361	// e.g. "..............................ACGT....................TGCA"
362	// -> ".{30}ACGT.{20}TGCA"
363
364	#if defined(DEBUG)
365	size_t orgLen = strlen(data);
366	#endif // DEBUG
367	char *dataStart = data;
368	char *dest = data;
369	size_t repeat = 1;
370	char last = *data++;
371
372	while (last) {
373	char curr = *data++;
374	if (curr == last) {
375	repeat++;
376	}
377	else {
378	if (repeat >= 5) {
379	dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
380	}
381	else {
382	size_t r;
383	for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
384	}
385	last = curr;
386	repeat = 1;
387	}
388	}
389
390	*dest = 0;
391
392	#if defined(DEBUG)
393
394	gb_assert(strlen(dataStart) <= orgLen);
395	#endif // DEBUG
396	return dest-dataStart;
397	}
398
399
400	// ------------------------------------------
401	// helper classes for tagged fields
402
403	class TextRef {
404	const char *data; // has no terminal zero-byte!
405	int length;
406
407	public:
408	TextRef() : data(NULp), length(-1) {}
409	TextRef(const char *data_, int length_) : data(data_), length(length_) {}
410	explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
411
412	bool defined() const { return data && length>0; }
413	const char *get_data() const { return data; }
414	int get_length() const { return length; }
415
416	const char *get_following() const { return data ? data+length : NULp; }
417
418	int compare(const char *str) const {
419	gb_assert(defined());
420	int cmp = strncmp(get_data(), str, get_length());
421	if (!cmp) {
422	if (str[get_length()]) {
423	cmp = -1; // right side contains more content
424	}
425	}
426	return cmp;
427	}
428	int icompare(const char *str) const {
429	gb_assert(defined());
430	int cmp = strncasecmp(get_data(), str, get_length());
431	if (!cmp) {
432	if (str[get_length()]) {
433	cmp = -1; // right side contains more content
434	}
435	}
436	return cmp;
437	}
438	char *copy() const { return ARB_strndup(get_data(), get_length()); }
439
440	char head() const { return defined() ? data[0] : 0; }
441	char tail() const { return defined() ? data[length-1] : 0; }
442
443	TextRef headTrimmed() const {
444	if (defined()) {
445	for (int s = 0; s<length; ++s) {
446	if (!isspace(data[s])) {
447	return TextRef(data+s, length-s);
448	}
449	}
450	}
451	return TextRef();
452	}
453	TextRef tailTrimmed() const {
454	if (defined()) {
455	for (int s = length-1; s>=0; --s) {
456	if (!isspace(data[s])) {
457	return TextRef(data, s+1);
458	}
459	}
460	}
461	return TextRef();
462	}
463
464	TextRef trimmed() const {
465	return headTrimmed().tailTrimmed();
466	}
467
468	inline TextRef partBefore(const TextRef& subref) const;
469	inline TextRef partBehind(const TextRef& subref) const;
470
471	bool is_part_of(const TextRef& other) const {
472	gb_assert(defined() && other.defined());
473	return get_data()>=other.get_data() && get_following()<=other.get_following();
474	}
475
476	const char find(char c) const { return reinterpret_cast<const char>(memchr(get_data(), c, get_length())); }
477	};
478
479	static TextRef textBetween(const TextRef& t1, const TextRef& t2) {
480	const char *behind_d1 = t1.get_following();
481	const char *d2 = t2.get_data();
482
483	if (behind_d1 && d2 && behind_d1<d2) {
484	return TextRef(behind_d1, d2-behind_d1);
485	}
486	return TextRef();
487	}
488
489	inline TextRef TextRef::partBefore(const TextRef& subref) const {
490	gb_assert(subref.is_part_of(*this));
491	return textBetween(TextRef(get_data(), 0), subref);
492	}
493	inline TextRef TextRef::partBehind(const TextRef& subref) const {
494	gb_assert(subref.is_part_of(*this));
495	return TextRef(subref.get_following(), get_following()-subref.get_following());
496	}
497
498	class TaggedContentParser {
499	TextRef wholeInput;
500	TextRef tag, content; // current position
501	TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
502	TextRef nextBrackets; // next "[..]" part (behind current tag)
503
504	void findBrackets(const char *in) {
505	nextBrackets = TextRef();
506	const char *tag_start = strchr(in, '[');
507	if (tag_start) {
508	const char *tag_end = strchr(tag_start, ']');
509	if (tag_end) {
510	if (tag_end == tag_start+1) { // empty tag -> use as content
511	findBrackets(tag_end+1);
512	}
513	else {
514	const char unwanted_bracket = reinterpret_cast<const char>(memchr(tag_start+1, '[', tag_end-tag_start-1));
515	if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
516	findBrackets(unwanted_bracket);
517	}
518	else {
519	TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
520	if (name.defined()) { // not only whitespace inside brackets
521	nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
522	}
523	else {
524	findBrackets(tag_end+1);
525	}
526	}
527	}
528	}
529	}
530	}
531
532	void parse_next_multi_tag() {
533	gb_assert(restTags.defined());
534	TextRef comma(restTags.find(','), 1);
535	if (comma.defined()) {
536	tag = restTags.partBefore(comma).tailTrimmed();
537	restTags = restTags.partBehind(comma).headTrimmed();
538	}
539	else {
540	tag = restTags;
541	restTags = TextRef();
542	}
543	}
544	void parse_next() {
545	if (restTags.defined()) {
546	parse_next_multi_tag();
547	}
548	else if (nextBrackets.defined()) {
549	TextRef brackets = nextBrackets;
550	findBrackets(brackets.get_following());
551
552	content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
553
554	gb_assert(brackets.head() == '[' && brackets.tail() == ']');
555
556	TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
557	gb_assert(tags.defined());
558
559	restTags = tags;
560	parse_next_multi_tag();
561	}
562	else {
563	tag = content = TextRef();
564	gb_assert(!has_part());
565	}
566	}
567	void parse_first() {
568	gb_assert(!has_part());
569	findBrackets(wholeInput.get_data());
570	content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
571	if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
572	}
573
574	public:
575	TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
576
577	bool has_tag() const { return tag.defined(); }
578	bool has_content() const { return content.defined(); }
579
580	void next() { parse_next(); }
581	bool has_part() const { return has_tag() \|\| has_content(); } // false -> parser has finished
582
583	const TextRef& get_tag() const { return tag; }
584	const TextRef& get_content() const { return content; }
585	};
586
587
588	// -------------------------------------------
589	// helper function for tagged fields
590
591	static void g_bs_add_value_tag_to_hash(GB_HASH hash, const char tag, char *value) {
592	if (!value[0]) return; // ignore empty values
593
594	{
595	char *p;
596	p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
597	p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
598	}
599
600	GB_HASH sh = (GB_HASH )GBS_read_hash(hash, value);
601	if (!sh) {
602	sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
603	GBS_write_hash(hash, value, (long)sh);
604	}
605	GBS_write_hash(sh, tag, 1);
606	}
607
608	static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH hash, char s, char default_tag, const char del) {
609	TaggedContentParser parser(s);
610	while (parser.has_part()) {
611	if (parser.has_content()) {
612	char *content = parser.get_content().copy();
613	if (parser.has_tag()) {
614	char *tag = parser.get_tag().copy();
615	if (!del \|\| ARB_stricmp(tag, del) != 0) {
616	g_bs_add_value_tag_to_hash(hash, tag, content);
617	}
618	free(tag);
619	}
620	else {
621	g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
622	}
623	free(content);
624	}
625	parser.next();
626	}
627	}
628
629	static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH hash, char s, char default_tag, const char rtag, const char *aci, GBL_call_env& env) {
630	GB_ERROR error = NULp;
631
632	TaggedContentParser parser(s);
633	while (parser.has_part() && !error) {
634	if (parser.has_content()) {
635	char *value = parser.get_content().copy();
636	char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
637
638	if (rtag && ARB_stricmp(tag, rtag) == 0) {
639	freeset(value, GB_command_interpreter_in_env(value, aci, env));
640	if (!value) error = GB_await_error();
641	}
642
643	if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
644
645	free(tag);
646	free(value);
647	}
648	parser.next();
649	}
650
651	return error;
652	}
653
654	static void g_bs_merge_tags(const char tag, long /val/, void cd_sub_result) {
655	GBS_strstruct sub_result = (GBS_strstruct)cd_sub_result;
656
657	GBS_strcat(sub_result, tag);
658	GBS_strcat(sub_result, ",");
659	}
660
661	static void g_bs_read_tagged_hash(const char value, long subhash, void cd_g_bs_collect_tags_hash) {
662	static int counter = 0;
663
664	GBS_strstruct *sub_result = GBS_stropen(100);
665	GBS_hash_do_const_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result);
666	GBS_intcat(sub_result, counter++); // create a unique number
667
668	char *str = ARB_strupper(GBS_strclose(sub_result));
669
670	GB_HASH g_bs_collect_tags_hash = (GB_HASH)cd_g_bs_collect_tags_hash;
671	GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
672
673	free(str);
674	}
675
676	static void g_bs_read_final_hash(const char tag, long value, void cd_merge_result) {
677	GBS_strstruct merge_result = (GBS_strstruct)cd_merge_result;
678
679	char lk = const_cast<char>(strrchr(tag, ','));
680	if (lk) { // remove number at end
681	*lk = 0;
682
683	if (!merge_result->empty()) merge_result->put(' '); // skip trailing space
684	merge_result->put('[');
685	merge_result->cat(tag);
686	merge_result->put(']');
687	merge_result->put(' ');
688	}
689	merge_result->cat((char*)value);
690	}
691
692	static char g_bs_get_string_of_tag_hash(GB_HASH tag_hash) {
693	GBS_strstruct *merge_result = GBS_stropen(256);
694	GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free);
695
696	GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
697	GBS_hash_do_const_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result);
698
699	GBS_free_hash(collect_tags_hash);
700	return GBS_strclose(merge_result);
701	}
702
703	static long g_bs_free_hash_of_hashes_elem(const char /key/, long val, void ) {
704	GB_HASH hash = (GB_HASH)val;
705	if (hash) GBS_free_hash(hash);
706	return 0;
707	}
708	static void g_bs_free_hash_of_hashes(GB_HASH *hash) {
709	GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULp);
710	GBS_free_hash(hash);
711	}
712
713	char GBS_merge_tagged_strings(const char s1, const char tag1, const char replace1, const char s2, const char tag2, const char *replace2) {
714	/* Create a tagged string from two tagged strings:
715	* a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
716	*
717	* if 's2' is not empty, then delete tag 'replace1' in 's1'
718	* if 's1' is not empty, then delete tag 'replace2' in 's2'
719	*
720	* (result should never be NULp)
721	*/
722
723	char *str1 = ARB_strdup(s1);
724	char *str2 = ARB_strdup(s2);
725	char *t1 = GBS_string_2_key(tag1);
726	char *t2 = GBS_string_2_key(tag2);
727	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
728
729	if (!s1[0]) replace2 = NULp;
730	if (!s2[0]) replace1 = NULp;
731
732	if (replace1 && !replace1[0]) replace1 = NULp;
733	if (replace2 && !replace2[0]) replace2 = NULp;
734
735	g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
736	g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
737
738	char *result = g_bs_get_string_of_tag_hash(hash);
739
740	g_bs_free_hash_of_hashes(hash);
741
742	free(t2);
743	free(t1);
744	free(str2);
745	free(str1);
746
747	return result;
748	}
749
750	char GBS_modify_tagged_string_with_ACI(const char s, const char dt, const char tag, const char *aci, GBL_call_env& env) {
751	/* if 's' is untagged, tag it with default tag 'dt'.
752	* if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
753	*
754	* if result is NULp, an error has been exported.
755	*/
756
757	char *str = ARB_strdup(s);
758	char *default_tag = GBS_string_2_key(dt);
759	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
760	char *result = NULp;
761
762	GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
763
764	if (!error) {
765	result = g_bs_get_string_of_tag_hash(hash);
766	}
767	else {
768	GB_export_error(error);
769	}
770
771	g_bs_free_hash_of_hashes(hash);
772
773	free(default_tag);
774	free(str);
775
776	return result;
777	}
778
779	char GB_read_as_tagged_string(GBDATA gbd, const char *tagi) {
780	char *buf = GB_read_as_string(gbd);
781	if (buf && tagi && tagi[0]) {
782	TaggedContentParser parser(buf);
783
784	char *wantedTag = GBS_string_2_key(tagi);
785	char *contentFound = NULp;
786
787	while (parser.has_part() && !contentFound) {
788	if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
789	contentFound = parser.get_content().copy();
790	}
791	parser.next();
792	}
793	free(wantedTag);
794	free(buf);
795
796	return contentFound;
797	}
798	return buf;
799	}
800
801
802	/* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
803	* used as well to save perl macros
804	*
805	* when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
806	*
807	* always keep in mind, that many users have databases/macros written with older
808	* versions of this function. They MUST load proper!!!
809	*/
810	void GBS_fwrite_string(const char strngi, FILE out) {
811	unsigned char strng = (unsigned char )strngi;
812	int c;
813
814	putc('"', out);
815
816	while ((c = *strng++)) {
817	if (c < 32) {
818	putc('\\', out);
819	if (c == '\n')
820	putc('n', out);
821	else if (c == '\t')
822	putc('t', out);
823	else if (c<25) {
824	putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
825	}
826	else {
827	putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
828	}
829	}
830	else if (c == '"') {
831	putc('\\', out);
832	putc('"', out);
833	}
834	else if (c == '\\') {
835	putc('\\', out);
836	putc('\\', out);
837	}
838	else {
839	putc(c, out);
840	}
841	}
842	putc('"', out);
843	}
844
845	/* Read a string from a file written by GBS_fwrite_string,
846	* Searches first '"'
847	*
848	* WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
849	* any changes should be done in GBS_fconvert_string too.
850	*/
851
852	static char GBS_fread_string(FILE in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
853	GBS_strstruct *strstr = GBS_stropen(1024);
854	int x;
855
856	while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
857
858	if (x != EOF) {
859	while ((x = getc(in)) != '"') {
860	if (x == EOF) break;
861	if (x == '\\') {
862	x = getc(in); if (x==EOF) break;
863	if (x == 'n') {
864	GBS_chrcat(strstr, '\n');
865	continue;
866	}
867	if (x == 't') {
868	GBS_chrcat(strstr, '\t');
869	continue;
870	}
871	if (x>='@' && x <= '@' + 25) {
872	GBS_chrcat(strstr, x-'@');
873	continue;
874	}
875	if (x>='0' && x <= '9') {
876	GBS_chrcat(strstr, x-('0'-25));
877	continue;
878	}
879	// all other backslashes are simply skipped
880	}
881	GBS_chrcat(strstr, x);
882	}
883	}
884	return GBS_strclose(strstr);
885	}
886
887	/* does similar decoding as GBS_fread_string but works directly on an existing buffer
888	* (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
889	*
890	* inserts \0 behind decoded string (removes the closing '"')
891	* returns a pointer behind the end (") of the _encoded_ string
892	* returns NULp if a 0-character is found
893	*/
894	char GBS_fconvert_string(char buffer) {
895	char *t = buffer;
896	char *f = buffer;
897	int x;
898
899	gb_assert(f[-1] == '"');
900	// the opening " has already been read
901
902	while ((x = *f++) != '"') {
903	if (!x) break;
904
905	if (x == '\\') {
906	x = *f++;
907	if (!x) break;
908
909	if (x == 'n') {
910	*t++ = '\n';
911	continue;
912	}
913	if (x == 't') {
914	*t++ = '\t';
915	continue;
916	}
917	if (x>='@' && x <= '@' + 25) {
918	*t++ = x-'@';
919	continue;
920	}
921	if (x>='0' && x <= '9') {
922	*t++ = x-('0'-25);
923	continue;
924	}
925	// all other backslashes are simply skipped
926	}
927	*t++ = x;
928	}
929
930	if (!x) return NULp; // error (string should not contain 0-character)
931	gb_assert(x == '"');
932
933	t[0] = 0;
934	return f;
935	}
936
937	char GBS_replace_tabs_by_spaces(const char text) {
938	int tlen = strlen(text);
939	GBS_strstruct mfile = GBS_stropen(tlen 3/2 + 1);
940	int tabpos = 0;
941	int c;
942
943	while ((c=*(text++))) {
944	if (c == '\t') {
945	int ntab = (tabpos + 8) & 0xfffff8;
946	while (tabpos < ntab) {
947	GBS_chrcat(mfile, ' ');
948	tabpos++;
949	}
950	continue;
951	}
952	tabpos ++;
953	if (c == '\n') {
954	tabpos = 0;
955	}
956	GBS_chrcat(mfile, c);
957	}
958	return GBS_strclose(mfile);
959	}
960
961	char GBS_trim(const char str) {
962	// trim whitespace at beginning and end of 'str'
963	const char *whitespace = " \t\n";
964	while (str[0] && strchr(whitespace, str[0])) str++;
965
966	const char *end = strchr(str, 0)-1;
967	while (end >= str && strchr(whitespace, end[0])) end--;
968
969	return ARB_strpartdup(str, end);
970	}
971
972	static char dated_info(const char info) {
973	char *dated_info = NULp;
974	time_t date;
975
976	if (time(&date) != -1) {
977	char *dstr = ctime(&date);
978	char *nl = strchr(dstr, '\n');
979
980	if (nl) nl[0] = 0; // cut off LF
981
982	dated_info = GBS_global_string_copy("%s: %s", dstr, info);
983	}
984	else {
985	dated_info = ARB_strdup(info);
986	}
987	return dated_info;
988	}
989
990	char GBS_log_action_to(const char comment, const char *action, bool stamp) {
991	/*! concatenates 'comment' and 'action'.
992	* '\n' is appended to existing 'comment' and/or 'action' (if missing).
993	* @param comment may be NULp (=> result is 'action')
994	* @param action may NOT be NULp
995	* @param stamp true -> prefix current timestamp in front of 'action'
996	* @return heap copy of concatenation
997	*/
998	size_t clen = comment ? strlen(comment) : 0;
999	size_t alen = strlen(action);
1000
1001	GBS_strstruct new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2\n + \0 + space for stamp
1002
1003	if (comment) {
1004	GBS_strcat(new_comment, comment);
1005	if (clen == 0 \|\| comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
1006	}
1007
1008	if (stamp) {
1009	char *dated_action = dated_info(action);
1010	GBS_strcat(new_comment, dated_action);
1011	free(dated_action);
1012	}
1013	else {
1014	GBS_strcat(new_comment, action);
1015	}
1016	if (alen == 0 \|\| action[alen-1] != '\n') GBS_chrcat(new_comment, '\n');
1017
1018	return GBS_strclose(new_comment);
1019	}
1020
1021	const char GBS_funptr2readable(void funptr, bool stripARBHOME) {
1022	// only returns module and offset for static functions :-(
1023	char **funNames = backtrace_symbols(&funptr, 1);
1024	const char *readable_fun = funNames[0];
1025
1026	if (stripARBHOME) {
1027	const char *ARBHOME = GB_getenvARBHOME();
1028	if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1029	readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1030	}
1031	}
1032	return readable_fun;
1033	}
1034
1035	// --------------------------------------------------------------------------------
1036
1037	#ifdef UNIT_TESTS
1038
1039	#include <test_unit.h>
1040
1041	// #define TEST_TEST_MACROS
1042
1043	#ifdef ENABLE_CRASH_TESTS
1044	static void provokesegv() { raise(SIGSEGV); }
1045	static void dont_provokesegv() {}
1046	# if defined(ASSERTION_USED)
1047	static void failassertion() { gb_assert(0); }
1048	# if defined(TEST_TEST_MACROS)
1049	static void dont_failassertion() {}
1050	# endif
1051	static void provokesegv_does_not_fail_assertion() {
1052	// provokesegv does not raise assertion
1053	// -> the following assertion fails
1054	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1055	}
1056	# endif
1057	#endif
1058
1059	void TEST_signal_tests__crashtest() {
1060	// check whether we can test that no SEGV or assertion failure happened
1061	TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1062
1063	// check whether we can test for SEGV and assertion failures
1064	TEST_EXPECT_SEGFAULT(provokesegv);
1065	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1066
1067	// tests whether signal suppression works multiple times (by repeating tests)
1068	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1069	TEST_EXPECT_SEGFAULT(provokesegv);
1070
1071	// test whether SEGV can be distinguished from assertion
1072	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1073
1074	// The following section is disabled, because it will
1075	// provoke test warnings (to test these warnings).
1076	// (enable it when changing any of these TEST_..-macros used here)
1077	#if defined(TEST_TEST_MACROS)
1078	TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1079
1080	TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1081	TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1082	#if defined(ASSERTION_USED)
1083	TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1084	#endif
1085
1086	TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1087	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion);
1088	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1089	#endif
1090	}
1091
1092	#define TEST_SHORTENED_EQUALS(Long,Short) do { \
1093	char *buf = ARB_strdup(Long); \
1094	GBS_shorten_repeated_data(buf); \
1095	TEST_EXPECT_EQUAL(buf, Short); \
1096	free(buf); \
1097	} while(0)
1098
1099	void TEST_GBS_shorten_repeated_data() {
1100	TEST_SHORTENED_EQUALS("12345", "12345");
1101	TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1102	TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1103	TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1104	TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1105	TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1106	TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1107	TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1108	TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1109	TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1110	TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1111	TEST_SHORTENED_EQUALS("aabc", "aabc");
1112	TEST_SHORTENED_EQUALS("", "");
1113	}
1114
1115	static const char *hkey_format[] = {
1116	"/%s/bbb/ccc",
1117	"/aaa/%s/ccc",
1118	"/aaa/bbb/%s",
1119	};
1120
1121	inline const char useInHkey(const char fragment, size_t pos) {
1122	return GBS_global_string(hkey_format[pos], fragment);
1123	}
1124
1125	#define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1126	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1127	const char *hkey = useInHkey(use, i); \
1128	TEST_ANNOTATE(hkey); \
1129	TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \
1130	} \
1131	TEST_ANNOTATE(NULp); \
1132	} while(0)
1133
1134	#define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1135	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1136	const char *hkey = useInHkey(use, i); \
1137	TEST_ANNOTATE(hkey); \
1138	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \
1139	} \
1140	TEST_ANNOTATE(NULp); \
1141	} while(0)
1142
1143
1144	void TEST_DB_key_checks() {
1145	// plain keys
1146	const char *shortest = "ab";
1147	const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1148	const char *too_short = shortest+1;
1149	const char *longest = too_long+1;
1150
1151	const char *empty = "";
1152	const char *slash = "sub/key";
1153	const char *dslash = "sub//key";
1154	const char *comma = "no,key";
1155	const char *minus = "no-key";
1156
1157	// obsolete GB_LINK syntax:
1158	const char *link = "link->syntax";
1159	const char *nowhere = "link->";
1160	const char *fromNw = "->syntax";
1161
1162	TEST_EXPECT_NO_ERROR(GB_check_key(shortest));
1163	TEST_EXPECT_NO_ERROR(GB_check_key(longest));
1164
1165	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1166	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1167	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1168
1169	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'");
1170	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'");
1171	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','");
1172	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'");
1173	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'");
1174	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'");
1175	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'");
1176
1177	// hierarchical keys
1178	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1179	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1180
1181	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1182	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1183	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1184
1185	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1186	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed");
1187	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1188	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'");
1189	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'");
1190	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'");
1191	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'");
1192
1193	// test NULp keys:
1194	TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed");
1195	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed");
1196
1197	// some edge cases for hierarchical keys:
1198	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed");
1199	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well!
1200	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed");
1201	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed");
1202	TEST_EXPECT_NO_ERROR (GB_check_hkey("/key"));
1203	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'"
1204	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed");
1205	}
1206
1207	#define TEST_STRING2KEY(str,expected) do { \
1208	char *as_key = GBS_string_2_key(str); \
1209	TEST_EXPECT_EQUAL(as_key, expected); \
1210	TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1211	free(as_key); \
1212	} while(0)
1213
1214	void TEST_DB_key_generation() {
1215	TEST_STRING2KEY("abc", "abc");
1216	TEST_STRING2KEY("a b c", "a_b_c");
1217
1218	// invalid chars
1219	TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1220	"string_containing_doublequotes_quotes_and_othershit");
1221
1222	// length tests
1223	TEST_STRING2KEY("a", "a_"); // too short
1224	TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1225	"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1226	}
1227
1228	void TEST_TaggedContentParser() {
1229	// test helper class TextRef:
1230	TEST_REJECT(TextRef().defined()); // default to undefined
1231	{
1232	TextRef bla("blakjahd", 3);
1233	TEST_EXPECT(bla.defined());
1234	TEST_EXPECT_EQUAL(bla.get_length(), 3);
1235
1236	TEST_EXPECT(bla.compare("bl") > 0);
1237	TEST_EXPECT(bla.compare("bla") == 0);
1238	TEST_EXPECT(bla.compare("blase") < 0);
1239
1240	TextRef spaced(" spaced "+1, 10);
1241	TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1242	TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1243	TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1244	}
1245
1246	const char *text = " untagged [tag] tagged [empty] ";
1247
1248	TextRef cr_untagged(strstr(text, "untagged"), 8);
1249	TextRef cr_tagged (strstr(text, "tagged"), 6);
1250	TextRef tr_tag (strstr(text, "tag"), 3);
1251	TextRef tr_empty (strstr(text, "empty"), 5);
1252
1253	// test TaggedContentParser:
1254	{
1255	TaggedContentParser parser(text);
1256
1257	TEST_EXPECT(parser.has_part());
1258	TEST_REJECT(parser.has_tag());
1259	TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1260
1261	parser.next();
1262
1263	TEST_EXPECT(parser.has_part());
1264	TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1265	TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1266
1267	parser.next();
1268
1269	TEST_EXPECT(parser.has_part());
1270	TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1271	TEST_REJECT(parser.has_content());
1272
1273	parser.next();
1274
1275	TEST_REJECT(parser.has_part());
1276	}
1277	{ // parse untagged input
1278	TaggedContentParser parser("hi");
1279	TEST_EXPECT(parser.has_part());
1280	TEST_REJECT(parser.has_tag());
1281	TEST_EXPECT(parser.get_content().compare("hi") == 0);
1282	parser.next();
1283	TEST_REJECT(parser.has_part());
1284	}
1285	{ // parse empty input
1286	TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1287	TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1288	}
1289	{ // parse single tag w/o content
1290	TaggedContentParser parser(" [hello] ");
1291	TEST_EXPECT(parser.has_part());
1292	TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1293	TEST_REJECT(parser.has_content());
1294	parser.next();
1295	TEST_REJECT(parser.has_part());
1296	}
1297	{ // parse multi-tags
1298	TaggedContentParser parser(" [ t1 , t2 ] t");
1299	TEST_EXPECT(parser.has_part());
1300	TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1301	TEST_EXPECT(parser.get_content().compare("t") == 0);
1302	parser.next();
1303	TEST_EXPECT(parser.has_part());
1304	TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1305	TEST_EXPECT(parser.get_content().compare("t") == 0);
1306	parser.next();
1307	TEST_REJECT(parser.has_part());
1308	}
1309	}
1310
1311	#define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1312	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1313	TEST_EXPECT_EQUAL(result, expected); \
1314	free(result); \
1315	} while(0)
1316
1317	#define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1318	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1319	TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1320	free(result); \
1321	} while(0)
1322
1323	void TEST_merge_tagged_strings() {
1324	// merge two fields:
1325	const char *_0 = NULp;
1326
1327	TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1328	TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1329	TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1330	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1331
1332	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1333	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1334
1335	// update fields:
1336	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1337	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1338	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1339	TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1340	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1341
1342	// append (opposed to update this keeps old entries with same tag; useless?)
1343	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1344	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1345	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1346
1347	// merge three fields:
1348	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1349	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1350	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1351	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1352	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1353
1354	// same tests as in section above, but vv:
1355	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1356	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1357	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1358	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1359	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1360
1361	// test real-merges (content existing in both strings):
1362	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1363	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1364	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1365	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1366	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1367	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1368	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1369	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1370	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1371
1372	// merge two tagged string with deleting
1373	#define DSTSRC1 "[DST] dest1 [SRC] src1"
1374	#define DSTSRC2 "[DST] dest2 [SRC] src2"
1375	#define DSTSRC2LOW "[dst] dest2 [src] src2"
1376
1377	TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1378	TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1379	TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1380	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1381	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1382	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1383	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1384	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1385	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1386	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1387	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1388	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1389	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1390	TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1391	}
1392
1393	__ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1394	GB_shell shell;
1395	GBDATA *gb_main = GB_open("new.arb", "c");
1396	{
1397	GB_transaction ta(gb_main);
1398
1399	{
1400	GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1401	TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1402	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1403
1404	GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1405	GB_UINT4 ints[] = { 1, 2 };
1406	TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1407	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1408	}
1409
1410	#define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1411	#define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1412	#define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1413
1414	GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1415	const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1416	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1417
1418	TEST_EXPECT_FULL_CONTENT(NULp);
1419	TEST_EXPECT_FULL_CONTENT("");
1420	TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1421
1422	TEST_EXPECT_TAG_CONTENT("T1", "t12");
1423	TEST_EXPECT_TAG_CONTENT("T2", "t12");
1424	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1425	TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1426
1427	TEST_EXPECT_TAG_CONTENT("AA", "aa");
1428	TEST_EXPECT_TAG_CONTENT("BB", "bb");
1429	TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1430	TEST_EXPECT_TAG_CONTENT("XX", "x1");
1431	TEST_EXPECT_TAG_CONTENT("YY", "yy");
1432	TEST_EXPECT_TAG_CONTENT("yy", "yy");
1433
1434	TEST_REJECT_TAG_CONTENT("Y");
1435	// TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1436
1437	TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1438	TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1439	TEST_EXPECT_TAG_CONTENT("fake", "fake");
1440
1441	TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1442	TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1443	TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1444	TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1445	TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1446	TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1447	TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1448	TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1449
1450	TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1451	TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1452	TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1453	TEST_EXPECT_TAG_CONTENT("LAST", "last");
1454
1455	// test incomplete tags
1456	tagged_string = "bla [WHATEVER hello";
1457	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1458	TEST_REJECT_TAG_CONTENT("WHATEVER");
1459
1460	tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1461	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1462	TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1463	TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1464	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1465
1466	// test pathological tags
1467	tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1468	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1469	TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1470	TEST_EXPECT_FULL_CONTENT("");
1471	TEST_REJECT_TAG_CONTENT(" ");
1472	TEST_REJECT_TAG_CONTENT(" ");
1473	TEST_REJECT_TAG_CONTENT(",");
1474	TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1475	TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1476	TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1477	TEST_EXPECT_TAG_CONTENT(" ", "us");
1478	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1479	}
1480	GB_close(gb_main);
1481	}
1482
1483	#define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1484	TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1485	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1486	expected); \
1487	}while(0)
1488
1489	#define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1490	TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1491	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1492	expectedErrorPart); \
1493	}while(0)
1494
1495	__ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1496	GB_shell shell;
1497	GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1498	{
1499	GB_transaction ta(gb_main);
1500	GBL_env env(gb_main, "tree_missing");
1501
1502	{
1503	GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1504	TEST_REJECT_NULL(gb_species);
1505	GBL_call_env callEnv(gb_species, env);
1506
1507	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1508	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1509	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1510	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1511	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1512
1513	// empty tags:
1514	TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1515	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1516	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1517	TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1518	TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1519
1520	#define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1521	#define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1522
1523	// dont eval:
1524	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1525	// eval SRT:
1526	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1527	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1528	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1529	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1530	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1531	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":=-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1532	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1533	// eval ACI:
1534	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1535	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1536
1537	// test SRT/ACI errors:
1538	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1539	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1540	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1541	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1542
1543	// no error raised, if expression not applied:
1544	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1545
1546	// incomplete tags
1547	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":=<>", "[DEF] <{no tag>");
1548	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,{no>");
1549	TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* =<2,*1>", "[DEF] <{tag,{no>");
1550	TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* =<2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1551	TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* =<2,*1>", "[DEF] <{tag{},{no>");
1552	TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* =<2,*1>", "[DEF] <{tag{} xx,{no>");
1553	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,no>");
1554	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1555	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1556	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1557	TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1558	TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":=<>", "[DEF] <kept {trunk>");
1559	TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":=<>", "[DEF] <kept>");
1560	}
1561
1562	{
1563	GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1564	TEST_REJECT_NULL(gb_species);
1565	GBL_call_env callEnv(gb_species, env);
1566
1567	// run scripts using context:
1568	TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1569	TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(\|sequence\|len)", "[T1] seqlen=$l [T2] seqlen=165");
1570	TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence\|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1571
1572	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(\|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1573	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1574
1575	// content before 1st tag:
1576	TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1577	TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1578
1579	// test elimination of leading/trailing whitespace:
1580	TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1581	TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1582	TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1583
1584	#define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1585
1586	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1587	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1588	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1589	}
1590	}
1591	GB_close(gb_main);
1592	}
1593
1594	void TEST_log_action() {
1595	for (int stamped = 0; stamped<=1; ++stamped) {
1596	TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1597	{
1598	char *logged = GBS_log_action_to("comment", "action", stamped);
1599	if (stamped) {
1600	TEST_EXPECT_CONTAINS(logged, "comment\n");
1601	TEST_EXPECT_CONTAINS(logged, "action\n");
1602	}
1603	else {
1604	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1605	}
1606	free(logged);
1607	}
1608	{
1609	char *logged = GBS_log_action_to("comment\n", "action", stamped);
1610	if (stamped) {
1611	TEST_EXPECT_CONTAINS(logged, "comment\n");
1612	TEST_EXPECT_CONTAINS(logged, "action\n");
1613	}
1614	else {
1615	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1616	}
1617	free(logged);
1618	}
1619	{
1620	char *logged = GBS_log_action_to("", "action", stamped);
1621	if (stamped) {
1622	TEST_EXPECT_EQUAL(logged[0], '\n');
1623	TEST_EXPECT_CONTAINS(logged, "action\n");
1624	}
1625	else {
1626	TEST_EXPECT_EQUAL(logged, "\naction\n");
1627	}
1628	free(logged);
1629	}
1630	{
1631	char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1632	if (stamped) {
1633	TEST_EXPECT_DIFFERENT(logged[0], '\n');
1634	TEST_EXPECT_CONTAINS(logged, "action\n");
1635	}
1636	else {
1637	TEST_EXPECT_EQUAL(logged, "action\n");
1638	}
1639	free(logged);
1640	}
1641	}
1642	}
1643	TEST_PUBLISH(TEST_log_action);
1644
1645	#endif // UNIT_TESTS
1646

Note: See TracBrowser for help on using the repository browser.

Download in other formats: