Context Navigation

source: tags/ms_ra2q56/ARBDB/adstring.cxx

Visit:

Last change on this file was 17899, checked in by westram, 7 years ago
refactored GB_check_hkey avoids strdup using check_key() avoid multiple calls to strlen in former calls to GB_check_key
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 66.6 KB

Line
1	// =============================================================== //
2	// //
3	// File : adstring.cxx //
4	// Purpose : various string functions //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11	#include <arb_backtrace.h>
12	#include <arb_strbuf.h>
13	#include <arb_defs.h>
14	#include <arb_str.h>
15
16	#include "gb_key.h"
17	#include "gb_aci.h"
18
19	#include <SigHandler.h>
20
21	#include <execinfo.h>
22
23	#include <cstdarg>
24	#include <cctype>
25	#include <cerrno>
26	#include <ctime>
27	#include <setjmp.h>
28
29	#include <valgrind.h>
30
31	static char GBS_string_2_key_with_exclusions(const char str, const char *additional) {
32	// converts any string to a valid key (all chars in 'additional' are additionally allowed)
33	char buf[GB_KEY_LEN_MAX+1];
34	int i;
35	int c;
36	for (i=0; i<GB_KEY_LEN_MAX;) {
37	c = *(str++);
38	if (!c) break;
39
40	if (c==' ' \|\| c == '_') {
41	buf[i++] = '_';
42	}
43	else if (isalnum(c) \|\| strchr(additional, c)) {
44	buf[i++] = c;
45	}
46	}
47	for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48	buf[i] = 0;
49	return ARB_strdup(buf);
50	}
51
52	char GBS_string_2_key(const char str) { // converts any string to a valid key
53	return GBS_string_2_key_with_exclusions(str, "");
54	}
55
56	char GB_memdup(const char source, size_t len) {
57	char *dest = ARB_alloc<char>(len);
58	memcpy(dest, source, len);
59	return dest;
60	}
61
62	static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed";
63
64	inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) {
65	// test if 'key' is a valid non-hierarchical database key.
66	// i.e. contains only letters, numbers and '_' and
67	// is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX.
68
69	if (len < GB_KEY_LEN_MIN) {
70	if (!len) return EMPTY_KEY_NOT_ALLOWED;
71	return GBS_global_string("Invalid key '%s': too short", key);
72	}
73	if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
74
75	for (int i = 0; i<len; ++i) {
76	char c = key[i];
77	bool validChar = isalnum(c) \|\| c == '_';
78	if (!validChar) {
79	return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80	}
81	}
82
83	return NULp;
84	}
85	GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
86	// test if 'key' is a valid non-hierarchical database key
87	// (i.e. a valid name for a container or field).
88
89	return check_key(key, key ? strlen(key) : 0);
90	}
91
92	GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
93	// test whether 'key' is a hierarchical key,
94	// i.e. consists of subkeys (accepted by GB_check_key), separated by '/'.
95
96	GB_ERROR err = NULp;
97
98	if (key && key[0] == '/') ++key; // accept + remove leading '/'
99	if (!key \|\| !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash)
100
101	while (!err && key[0]) {
102	int nonSlashPart = strcspn(key, "/");
103
104	err = check_key(key, nonSlashPart);
105	if (!err) {
106	key += nonSlashPart;
107	if (key[0] == '/') {
108	++key;
109	if (key[0] == 0) { // nothing after slash
110	err = EMPTY_KEY_NOT_ALLOWED;
111	}
112	}
113	else {
114	gb_assert(key[0] == 0);
115	}
116	}
117	}
118	return err;
119	}
120
121	// ----------------------------------------------
122	// escape/unescape characters in strings
123
124	char GBS_escape_string(const char str, const char *chars_to_escape, char escape_char) {
125	/*! escape characters in 'str'
126	*
127	* uses a special escape-method, which eliminates all 'chars_to_escape' completely
128	* from str (this makes further processing of the string more easy)
129	*
130	* @param str string to escape
131	*
132	* @param escape_char is the character used for escaping. For performance reasons it
133	* should be a character rarely used in 'str'.
134	*
135	* @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping)
136	* and it may not be longer than 26 bytes
137	*
138	* @return heap copy of escaped string
139	*
140	* Inverse of GBS_unescape_string()
141	*/
142
143	int len = strlen(str);
144	char buffer = ARB_alloc<char>(2len+1);
145	int j = 0;
146	int i;
147
148	gb_assert(strlen(chars_to_escape) <= 26);
149	gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
150
151	for (i = 0; str[i]; ++i) {
152	if (str[i] == escape_char) {
153	buffer[j++] = escape_char;
154	buffer[j++] = escape_char;
155	}
156	else {
157	const char *found = strchr(chars_to_escape, str[i]);
158	if (found) {
159	buffer[j++] = escape_char;
160	buffer[j++] = (found-chars_to_escape+'A');
161
162	gb_assert(found[0]<'A' \|\| found[0]>'Z'); // illegal character in chars_to_escape
163	}
164	else {
165
166	buffer[j++] = str[i];
167	}
168	}
169	}
170	buffer[j] = 0;
171
172	return buffer;
173	}
174
175	char GBS_unescape_string(const char str, const char *escaped_chars, char escape_char) {
176	//! inverse of GB_escape_string() - for params see there
177
178	int len = strlen(str);
179	char *buffer = ARB_alloc<char>(len+1);
180	int j = 0;
181	int i;
182
183	#if defined(ASSERTION_USED)
184	int escaped_chars_len = strlen(escaped_chars);
185	#endif // ASSERTION_USED
186
187	gb_assert(strlen(escaped_chars) <= 26);
188	gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
189
190	for (i = 0; str[i]; ++i) {
191	if (str[i] == escape_char) {
192	if (str[i+1] == escape_char) {
193	buffer[j++] = escape_char;
194	}
195	else {
196	int idx = str[i+1]-'A';
197
198	gb_assert(idx >= 0 && idx<escaped_chars_len);
199	buffer[j++] = escaped_chars[idx];
200	}
201	++i;
202	}
203	else {
204	buffer[j++] = str[i];
205	}
206	}
207	buffer[j] = 0;
208
209	return buffer;
210	}
211
212	char *GBS_eval_env(GB_CSTR p) {
213	GB_ERROR error = NULp;
214	GB_CSTR ka;
215	GBS_strstruct *out = GBS_stropen(1000);
216
217	while ((ka = GBS_find_string(p, "$(", 0))) {
218	GB_CSTR kz = strchr(ka, ')');
219	if (!kz) {
220	error = GBS_global_string("missing ')' for envvar '%s'", p);
221	break;
222	}
223	else {
224	char *envvar = ARB_strpartdup(ka+2, kz-1);
225	int len = ka-p;
226
227	if (len) GBS_strncat(out, p, len);
228
229	GB_CSTR genv = GB_getenv(envvar);
230	if (genv) GBS_strcat(out, genv);
231
232	p = kz+1;
233	free(envvar);
234	}
235	}
236
237	if (error) {
238	GB_export_error(error);
239	GBS_strforget(out);
240	return NULp;
241	}
242
243	GBS_strcat(out, p); // copy rest
244	return GBS_strclose(out);
245	}
246
247	long GBS_gcgchecksum(const char *seq) {
248	// GCGchecksum
249	long i;
250	long check = 0;
251	long count = 0;
252	long seqlen = strlen(seq);
253
254	for (i = 0; i < seqlen; i++) {
255	count++;
256	check += count * toupper(seq[i]);
257	if (count == 57) count = 0;
258	}
259	check %= 10000;
260
261	return check;
262	}
263
264	// Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
265	uint32_t crctab[] = {
266	0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
267	0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
268	0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
269	0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
270	0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
271	0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
272	0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
273	0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
274	0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
275	0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
276	0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
277	0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
278	0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
279	0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
280	0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
281	0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
282	0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
283	0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
284	0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
285	0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
286	0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
287	0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
288	0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
289	0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
290	0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
291	0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
292	0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
293	0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
294	0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
295	0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
296	0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
297	0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
298	0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
299	0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
300	0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
301	0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
302	0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
303	0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
304	0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
305	0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
306	0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
307	0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
308	0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
309	0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
310	0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
311	0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
312	0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
313	0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
314	0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
315	0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
316	0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
317	0x2d02ef8dL
318	};
319
320	uint32_t GB_checksum(const char seq, long length, int ignore_case, const char exclude) {
321	/* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
322	* if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
323	*/
324
325	unsigned long c = 0xffffffffL;
326	long n = length;
327	int i;
328	int tab[256]; // @@@ avoid recalc for each call
329
330	for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
331	tab[i] = ignore_case ? toupper(i) : i;
332	}
333
334	if (exclude) {
335	while (1) {
336	int k = (unsigned char )exclude++;
337	if (!k) break;
338	tab[k] = 0;
339	if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
340	}
341	}
342
343	while (n--) {
344	i = tab[(const unsigned char )seq++];
345	if (i) {
346	c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
347	}
348	}
349	c = c ^ 0xffffffffL;
350	return c;
351	}
352
353	uint32_t GBS_checksum(const char seq, int ignore_case, const char exclude) {
354	// if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
355	return GB_checksum(seq, strlen(seq), ignore_case, exclude);
356	}
357
358	size_t GBS_shorten_repeated_data(char *data) {
359	// shortens repeats in 'data'
360	// This function modifies 'data'!!
361	// e.g. "..............................ACGT....................TGCA"
362	// -> ".{30}ACGT.{20}TGCA"
363
364	#if defined(DEBUG)
365	size_t orgLen = strlen(data);
366	#endif // DEBUG
367	char *dataStart = data;
368	char *dest = data;
369	size_t repeat = 1;
370	char last = *data++;
371
372	while (last) {
373	char curr = *data++;
374	if (curr == last) {
375	repeat++;
376	}
377	else {
378	if (repeat >= 5) {
379	dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
380	}
381	else {
382	size_t r;
383	for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
384	}
385	last = curr;
386	repeat = 1;
387	}
388	}
389
390	*dest = 0;
391
392	#if defined(DEBUG)
393
394	gb_assert(strlen(dataStart) <= orgLen);
395	#endif // DEBUG
396	return dest-dataStart;
397	}
398
399
400	// ------------------------------------------
401	// helper classes for tagged fields
402
403	class TextRef {
404	const char *data; // has no terminal zero-byte!
405	int length;
406
407	public:
408	TextRef() : data(NULp), length(-1) {}
409	TextRef(const char *data_, int length_) : data(data_), length(length_) {}
410	explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
411
412	bool defined() const { return data && length>0; }
413	const char *get_data() const { return data; }
414	int get_length() const { return length; }
415
416	const char *get_following() const { return data ? data+length : NULp; }
417
418	int compare(const char *str) const {
419	gb_assert(defined());
420	int cmp = strncmp(get_data(), str, get_length());
421	if (!cmp) {
422	if (str[get_length()]) {
423	cmp = -1; // right side contains more content
424	}
425	}
426	return cmp;
427	}
428	int icompare(const char *str) const {
429	gb_assert(defined());
430	int cmp = strncasecmp(get_data(), str, get_length());
431	if (!cmp) {
432	if (str[get_length()]) {
433	cmp = -1; // right side contains more content
434	}
435	}
436	return cmp;
437	}
438	char *copy() const { return ARB_strndup(get_data(), get_length()); }
439
440	char head() const { return defined() ? data[0] : 0; }
441	char tail() const { return defined() ? data[length-1] : 0; }
442
443	TextRef headTrimmed() const {
444	if (defined()) {
445	for (int s = 0; s<length; ++s) {
446	if (!isspace(data[s])) {
447	return TextRef(data+s, length-s);
448	}
449	}
450	}
451	return TextRef();
452	}
453	TextRef tailTrimmed() const {
454	if (defined()) {
455	for (int s = length-1; s>=0; --s) {
456	if (!isspace(data[s])) {
457	return TextRef(data, s+1);
458	}
459	}
460	}
461	return TextRef();
462	}
463
464	TextRef trimmed() const {
465	return headTrimmed().tailTrimmed();
466	}
467
468	inline TextRef partBefore(const TextRef& subref) const;
469	inline TextRef partBehind(const TextRef& subref) const;
470
471	bool is_part_of(const TextRef& other) const {
472	gb_assert(defined() && other.defined());
473	return get_data()>=other.get_data() && get_following()<=other.get_following();
474	}
475
476	const char find(char c) const { return reinterpret_cast<const char>(memchr(get_data(), c, get_length())); }
477	};
478
479	static TextRef textBetween(const TextRef& t1, const TextRef& t2) {
480	const char *behind_d1 = t1.get_following();
481	const char *d2 = t2.get_data();
482
483	if (behind_d1 && d2 && behind_d1<d2) {
484	return TextRef(behind_d1, d2-behind_d1);
485	}
486	return TextRef();
487	}
488
489	inline TextRef TextRef::partBefore(const TextRef& subref) const {
490	gb_assert(subref.is_part_of(*this));
491	return textBetween(TextRef(get_data(), 0), subref);
492	}
493	inline TextRef TextRef::partBehind(const TextRef& subref) const {
494	gb_assert(subref.is_part_of(*this));
495	return TextRef(subref.get_following(), get_following()-subref.get_following());
496	}
497
498	class TaggedContentParser {
499	TextRef wholeInput;
500	TextRef tag, content; // current position
501	TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
502	TextRef nextBrackets; // next "[..]" part (behind current tag)
503
504	void findBrackets(const char *in) {
505	nextBrackets = TextRef();
506	const char *tag_start = strchr(in, '[');
507	if (tag_start) {
508	const char *tag_end = strchr(tag_start, ']');
509	if (tag_end) {
510	if (tag_end == tag_start+1) { // empty tag -> use as content
511	findBrackets(tag_end+1);
512	}
513	else {
514	const char unwanted_bracket = reinterpret_cast<const char>(memchr(tag_start+1, '[', tag_end-tag_start-1));
515	if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
516	findBrackets(unwanted_bracket);
517	}
518	else {
519	TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
520	if (name.defined()) { // not only whitespace inside brackets
521	nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
522	}
523	else {
524	findBrackets(tag_end+1);
525	}
526	}
527	}
528	}
529	}
530	}
531
532	void parse_next_multi_tag() {
533	gb_assert(restTags.defined());
534	TextRef comma(restTags.find(','), 1);
535	if (comma.defined()) {
536	tag = restTags.partBefore(comma).tailTrimmed();
537	restTags = restTags.partBehind(comma).headTrimmed();
538	}
539	else {
540	tag = restTags;
541	restTags = TextRef();
542	}
543	}
544	void parse_next() {
545	if (restTags.defined()) {
546	parse_next_multi_tag();
547	}
548	else if (nextBrackets.defined()) {
549	TextRef brackets = nextBrackets;
550	findBrackets(brackets.get_following());
551
552	content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
553
554	gb_assert(brackets.head() == '[' && brackets.tail() == ']');
555
556	TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
557	gb_assert(tags.defined());
558
559	restTags = tags;
560	parse_next_multi_tag();
561	}
562	else {
563	tag = content = TextRef();
564	gb_assert(!has_part());
565	}
566	}
567	void parse_first() {
568	gb_assert(!has_part());
569	findBrackets(wholeInput.get_data());
570	content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
571	if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
572	}
573
574	public:
575	TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
576
577	bool has_tag() const { return tag.defined(); }
578	bool has_content() const { return content.defined(); }
579
580	void next() { parse_next(); }
581	bool has_part() const { return has_tag() \|\| has_content(); } // false -> parser has finished
582
583	const TextRef& get_tag() const { return tag; }
584	const TextRef& get_content() const { return content; }
585	};
586
587
588	// -------------------------------------------
589	// helper function for tagged fields
590
591	static void g_bs_add_value_tag_to_hash(GB_HASH hash, const char tag, char *value) {
592	if (!value[0]) return; // ignore empty values
593
594	{
595	char *p;
596	p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
597	p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
598	}
599
600	GB_HASH sh = (GB_HASH )GBS_read_hash(hash, value);
601	if (!sh) {
602	sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
603	GBS_write_hash(hash, value, (long)sh);
604	}
605	GBS_write_hash(sh, tag, 1);
606	}
607
608	static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH hash, char s, char default_tag, const char del) {
609	TaggedContentParser parser(s);
610	while (parser.has_part()) {
611	if (parser.has_content()) {
612	char *content = parser.get_content().copy();
613	if (parser.has_tag()) {
614	char *tag = parser.get_tag().copy();
615	if (!del \|\| ARB_stricmp(tag, del) != 0) {
616	g_bs_add_value_tag_to_hash(hash, tag, content);
617	}
618	free(tag);
619	}
620	else {
621	g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
622	}
623	free(content);
624	}
625	parser.next();
626	}
627	}
628
629	static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH hash, char s, char default_tag, const char rtag, const char *aci, GBL_call_env& env) {
630	GB_ERROR error = NULp;
631
632	TaggedContentParser parser(s);
633	while (parser.has_part() && !error) {
634	if (parser.has_content()) {
635	char *value = parser.get_content().copy();
636	char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
637
638	if (rtag && ARB_stricmp(tag, rtag) == 0) {
639	freeset(value, GB_command_interpreter_in_env(value, aci, env));
640	if (!value) error = GB_await_error();
641	}
642
643	if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
644
645	free(tag);
646	free(value);
647	}
648	parser.next();
649	}
650
651	return error;
652	}
653
654	static void g_bs_merge_tags(const char tag, long /val/, void cd_sub_result) {
655	GBS_strstruct sub_result = (GBS_strstruct)cd_sub_result;
656
657	GBS_strcat(sub_result, tag);
658	GBS_strcat(sub_result, ",");
659	}
660
661	static void g_bs_read_tagged_hash(const char value, long subhash, void cd_g_bs_collect_tags_hash) {
662	static int counter = 0;
663
664	GBS_strstruct *sub_result = GBS_stropen(100);
665	GBS_hash_do_const_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result);
666	GBS_intcat(sub_result, counter++); // create a unique number
667
668	char *str = ARB_strupper(GBS_strclose(sub_result));
669
670	GB_HASH g_bs_collect_tags_hash = (GB_HASH)cd_g_bs_collect_tags_hash;
671	GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
672
673	free(str);
674	}
675
676	static void g_bs_read_final_hash(const char tag, long value, void cd_merge_result) {
677	GBS_strstruct merge_result = (GBS_strstruct)cd_merge_result;
678
679	char lk = const_cast<char>(strrchr(tag, ','));
680	if (lk) { // remove number at end
681	*lk = 0;
682
683	if (!merge_result->empty()) merge_result->put(' '); // skip trailing space
684	merge_result->put('[');
685	merge_result->cat(tag);
686	merge_result->put(']');
687	merge_result->put(' ');
688	}
689	merge_result->cat((char*)value);
690	}
691
692	static char g_bs_get_string_of_tag_hash(GB_HASH tag_hash) {
693	GBS_strstruct *merge_result = GBS_stropen(256);
694	GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free);
695
696	GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
697	GBS_hash_do_const_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result);
698
699	GBS_free_hash(collect_tags_hash);
700	return GBS_strclose(merge_result);
701	}
702
703	static long g_bs_free_hash_of_hashes_elem(const char /key/, long val, void ) {
704	GB_HASH hash = (GB_HASH)val;
705	if (hash) GBS_free_hash(hash);
706	return 0;
707	}
708	static void g_bs_free_hash_of_hashes(GB_HASH *hash) {
709	GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULp);
710	GBS_free_hash(hash);
711	}
712
713	char GBS_merge_tagged_strings(const char s1, const char tag1, const char replace1, const char s2, const char tag2, const char *replace2) {
714	/* Create a tagged string from two tagged strings:
715	* a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
716	*
717	* if 's2' is not empty, then delete tag 'replace1' in 's1'
718	* if 's1' is not empty, then delete tag 'replace2' in 's2'
719	*
720	* (result should never be NULp)
721	*/
722
723	char *str1 = ARB_strdup(s1);
724	char *str2 = ARB_strdup(s2);
725	char *t1 = GBS_string_2_key(tag1);
726	char *t2 = GBS_string_2_key(tag2);
727	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
728
729	if (!s1[0]) replace2 = NULp;
730	if (!s2[0]) replace1 = NULp;
731
732	if (replace1 && !replace1[0]) replace1 = NULp;
733	if (replace2 && !replace2[0]) replace2 = NULp;
734
735	g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
736	g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
737
738	char *result = g_bs_get_string_of_tag_hash(hash);
739
740	g_bs_free_hash_of_hashes(hash);
741
742	free(t2);
743	free(t1);
744	free(str2);
745	free(str1);
746
747	return result;
748	}
749
750	char GBS_modify_tagged_string_with_ACI(const char s, const char dt, const char tag, const char *aci, GBL_call_env& env) {
751	/* if 's' is untagged, tag it with default tag 'dt'.
752	* if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
753	*
754	* if result is NULp, an error has been exported.
755	*/
756
757	char *str = ARB_strdup(s);
758	char *default_tag = GBS_string_2_key(dt);
759	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
760	char *result = NULp;
761
762	GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
763
764	if (!error) {
765	result = g_bs_get_string_of_tag_hash(hash);
766	}
767	else {
768	GB_export_error(error);
769	}
770
771	g_bs_free_hash_of_hashes(hash);
772
773	free(default_tag);
774	free(str);
775
776	return result;
777	}
778
779	char GB_read_as_tagged_string(GBDATA gbd, const char *tagi) {
780	char *buf = GB_read_as_string(gbd);
781	if (buf && tagi && tagi[0]) {
782	TaggedContentParser parser(buf);
783
784	char *wantedTag = GBS_string_2_key(tagi);
785	char *contentFound = NULp;
786
787	while (parser.has_part() && !contentFound) {
788	if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
789	contentFound = parser.get_content().copy();
790	}
791	parser.next();
792	}
793	free(wantedTag);
794	free(buf);
795
796	return contentFound;
797	}
798	return buf;
799	}
800
801
802	/* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
803	* used as well to save perl macros
804	*
805	* when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
806	*
807	* always keep in mind, that many users have databases/macros written with older
808	* versions of this function. They MUST load proper!!!
809	*/
810	void GBS_fwrite_string(const char strngi, FILE out) {
811	unsigned char strng = (unsigned char )strngi;
812	int c;
813
814	putc('"', out);
815
816	while ((c = *strng++)) {
817	if (c < 32) {
818	putc('\\', out);
819	if (c == '\n')
820	putc('n', out);
821	else if (c == '\t')
822	putc('t', out);
823	else if (c<25) {
824	putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
825	}
826	else {
827	putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
828	}
829	}
830	else if (c == '"') {
831	putc('\\', out);
832	putc('"', out);
833	}
834	else if (c == '\\') {
835	putc('\\', out);
836	putc('\\', out);
837	}
838	else {
839	putc(c, out);
840	}
841	}
842	putc('"', out);
843	}
844
845	/* Read a string from a file written by GBS_fwrite_string,
846	* Searches first '"'
847	*
848	* WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
849	* any changes should be done in GBS_fconvert_string too.
850	*/
851
852	static char GBS_fread_string(FILE in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
853	GBS_strstruct *strstr = GBS_stropen(1024);
854	int x;
855
856	while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
857
858	if (x != EOF) {
859	while ((x = getc(in)) != '"') {
860	if (x == EOF) break;
861	if (x == '\\') {
862	x = getc(in); if (x==EOF) break;
863	if (x == 'n') {
864	GBS_chrcat(strstr, '\n');
865	continue;
866	}
867	if (x == 't') {
868	GBS_chrcat(strstr, '\t');
869	continue;
870	}
871	if (x>='@' && x <= '@' + 25) {
872	GBS_chrcat(strstr, x-'@');
873	continue;
874	}
875	if (x>='0' && x <= '9') {
876	GBS_chrcat(strstr, x-('0'-25));
877	continue;
878	}
879	// all other backslashes are simply skipped
880	}
881	GBS_chrcat(strstr, x);
882	}
883	}
884	return GBS_strclose(strstr);
885	}
886
887	/* does similar decoding as GBS_fread_string but works directly on an existing buffer
888	* (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
889	*
890	* inserts \0 behind decoded string (removes the closing '"')
891	* returns a pointer behind the end (") of the _encoded_ string
892	* returns NULp if a 0-character is found
893	*/
894	char GBS_fconvert_string(char buffer) {
895	char *t = buffer;
896	char *f = buffer;
897	int x;
898
899	gb_assert(f[-1] == '"');
900	// the opening " has already been read
901
902	while ((x = *f++) != '"') {
903	if (!x) break;
904
905	if (x == '\\') {
906	x = *f++;
907	if (!x) break;
908
909	if (x == 'n') {
910	*t++ = '\n';
911	continue;
912	}
913	if (x == 't') {
914	*t++ = '\t';
915	continue;
916	}
917	if (x>='@' && x <= '@' + 25) {
918	*t++ = x-'@';
919	continue;
920	}
921	if (x>='0' && x <= '9') {
922	*t++ = x-('0'-25);
923	continue;
924	}
925	// all other backslashes are simply skipped
926	}
927	*t++ = x;
928	}
929
930	if (!x) return NULp; // error (string should not contain 0-character)
931	gb_assert(x == '"');
932
933	t[0] = 0;
934	return f;
935	}
936
937	char GBS_replace_tabs_by_spaces(const char text) {
938	int tlen = strlen(text);
939	GBS_strstruct mfile = GBS_stropen(tlen 3/2 + 1);
940	int tabpos = 0;
941	int c;
942
943	while ((c=*(text++))) {
944	if (c == '\t') {
945	int ntab = (tabpos + 8) & 0xfffff8;
946	while (tabpos < ntab) {
947	GBS_chrcat(mfile, ' ');
948	tabpos++;
949	}
950	continue;
951	}
952	tabpos ++;
953	if (c == '\n') {
954	tabpos = 0;
955	}
956	GBS_chrcat(mfile, c);
957	}
958	return GBS_strclose(mfile);
959	}
960
961	char GBS_trim(const char str) {
962	// trim whitespace at beginning and end of 'str'
963	const char *whitespace = " \t\n";
964	while (str[0] && strchr(whitespace, str[0])) str++;
965
966	const char *end = strchr(str, 0)-1;
967	while (end >= str && strchr(whitespace, end[0])) end--;
968
969	return ARB_strpartdup(str, end);
970	}
971
972	static char dated_info(const char info) {
973	char *dated_info = NULp;
974	time_t date;
975
976	if (time(&date) != -1) {
977	char *dstr = ctime(&date);
978	char *nl = strchr(dstr, '\n');
979
980	if (nl) nl[0] = 0; // cut off LF
981
982	dated_info = GBS_global_string_copy("%s: %s", dstr, info);
983	}
984	else {
985	dated_info = ARB_strdup(info);
986	}
987	return dated_info;
988	}
989
990	char GBS_log_action_to(const char comment, const char *action, bool stamp) {
991	/*! concatenates 'comment' and 'action'.
992	* '\n' is appended to existing 'comment' and/or 'action' (if missing).
993	* @param comment may be NULp (=> result is 'action')
994	* @param action may NOT be NULp
995	* @param stamp true -> prefix current timestamp in front of 'action'
996	* @return heap copy of concatenation
997	*/
998	size_t clen = comment ? strlen(comment) : 0;
999	size_t alen = strlen(action);
1000
1001	GBS_strstruct new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2\n + \0 + space for stamp
1002
1003	if (comment) {
1004	GBS_strcat(new_comment, comment);
1005	if (clen == 0 \|\| comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
1006	}
1007
1008	if (stamp) {
1009	char *dated_action = dated_info(action);
1010	GBS_strcat(new_comment, dated_action);
1011	free(dated_action);
1012	}
1013	else {
1014	GBS_strcat(new_comment, action);
1015	}
1016	if (alen == 0 \|\| action[alen-1] != '\n') GBS_chrcat(new_comment, '\n');
1017
1018	return GBS_strclose(new_comment);
1019	}
1020
1021	const char GBS_funptr2readable(void funptr, bool stripARBHOME) {
1022	// only returns module and offset for static functions :-(
1023	char **funNames = backtrace_symbols(&funptr, 1);
1024	const char *readable_fun = funNames[0];
1025
1026	if (stripARBHOME) {
1027	const char *ARBHOME = GB_getenvARBHOME();
1028	if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1029	readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1030	}
1031	}
1032	return readable_fun;
1033	}
1034
1035	// --------------------------------------------------------------------------------
1036
1037	#ifdef UNIT_TESTS
1038
1039	#include <test_unit.h>
1040
1041	// #define TEST_TEST_MACROS
1042
1043	#ifdef ENABLE_CRASH_TESTS
1044	static void provokesegv() { raise(SIGSEGV); }
1045	static void dont_provokesegv() {}
1046	# if defined(ASSERTION_USED)
1047	static void failassertion() { gb_assert(0); }
1048	# if defined(TEST_TEST_MACROS)
1049	static void dont_failassertion() {}
1050	# endif
1051	static void provokesegv_does_not_fail_assertion() {
1052	// provokesegv does not raise assertion
1053	// -> the following assertion fails
1054	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1055	}
1056	# endif
1057	#endif
1058
1059	void TEST_signal_tests__crashtest() {
1060	// check whether we can test that no SEGV or assertion failure happened
1061	TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1062
1063	// check whether we can test for SEGV and assertion failures
1064	TEST_EXPECT_SEGFAULT(provokesegv);
1065	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1066
1067	// tests whether signal suppression works multiple times (by repeating tests)
1068	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1069	TEST_EXPECT_SEGFAULT(provokesegv);
1070
1071	// test whether SEGV can be distinguished from assertion
1072	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1073
1074	// The following section is disabled, because it will
1075	// provoke test warnings (to test these warnings).
1076	// (enable it when changing any of these TEST_..-macros used here)
1077	#if defined(TEST_TEST_MACROS)
1078	TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1079
1080	TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1081	TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1082	#if defined(ASSERTION_USED)
1083	TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1084	#endif
1085
1086	TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1087	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion);
1088	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1089	#endif
1090	}
1091
1092	#define EXPECT_CONTENT(content) TEST_EXPECT_EQUAL(GBS_mempntr(strstr), content)
1093
1094	void TEST_GBS_strstruct() {
1095	{
1096	GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT("");
1097
1098	GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb");
1099	GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17");
1100	GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_");
1101	GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000");
1102
1103	TEST_EXPECT_EQUAL(GBS_memoffset(strstr), 14);
1104	GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b");
1105	GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter");
1106	GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly");
1107
1108	GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters");
1109	EXPECT_CONTENT("butterfly flutters");
1110
1111	GBS_strforget(strstr);
1112	}
1113	{
1114	// re-alloc smaller
1115	GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT("");
1116	GBS_strforget(strstr);
1117	}
1118
1119	// trigger downsize of oversized block
1120	for (int i = 0; i<12; ++i) {
1121	GBS_strstruct *strstr = GBS_stropen(10);
1122	GBS_strforget(strstr);
1123	}
1124
1125	{
1126	GBS_strstruct *strstr = GBS_stropen(10);
1127	size_t oldbufsize = strstr->get_buffer_size();
1128	GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer
1129
1130	TEST_EXPECT_DIFFERENT(oldbufsize, strstr->get_buffer_size()); // did we reallocate?
1131	EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx");
1132	GBS_strforget(strstr);
1133	}
1134	}
1135
1136	#define TEST_SHORTENED_EQUALS(Long,Short) do { \
1137	char *buf = ARB_strdup(Long); \
1138	GBS_shorten_repeated_data(buf); \
1139	TEST_EXPECT_EQUAL(buf, Short); \
1140	free(buf); \
1141	} while(0)
1142
1143	void TEST_GBS_shorten_repeated_data() {
1144	TEST_SHORTENED_EQUALS("12345", "12345");
1145	TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1146	TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1147	TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1148	TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1149	TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1150	TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1151	TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1152	TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1153	TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1154	TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1155	TEST_SHORTENED_EQUALS("aabc", "aabc");
1156	TEST_SHORTENED_EQUALS("", "");
1157	}
1158
1159	static const char *hkey_format[] = {
1160	"/%s/bbb/ccc",
1161	"/aaa/%s/ccc",
1162	"/aaa/bbb/%s",
1163	};
1164
1165	inline const char useInHkey(const char fragment, size_t pos) {
1166	return GBS_global_string(hkey_format[pos], fragment);
1167	}
1168
1169	#define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1170	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1171	const char *hkey = useInHkey(use, i); \
1172	TEST_ANNOTATE(hkey); \
1173	TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \
1174	} \
1175	TEST_ANNOTATE(NULp); \
1176	} while(0)
1177
1178	#define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1179	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1180	const char *hkey = useInHkey(use, i); \
1181	TEST_ANNOTATE(hkey); \
1182	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \
1183	} \
1184	TEST_ANNOTATE(NULp); \
1185	} while(0)
1186
1187
1188	void TEST_DB_key_checks() {
1189	// plain keys
1190	const char *shortest = "ab";
1191	const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1192	const char *too_short = shortest+1;
1193	const char *longest = too_long+1;
1194
1195	const char *empty = "";
1196	const char *slash = "sub/key";
1197	const char *dslash = "sub//key";
1198	const char *comma = "no,key";
1199	const char *minus = "no-key";
1200
1201	// obsolete GB_LINK syntax:
1202	const char *link = "link->syntax";
1203	const char *nowhere = "link->";
1204	const char *fromNw = "->syntax";
1205
1206	TEST_EXPECT_NO_ERROR(GB_check_key(shortest));
1207	TEST_EXPECT_NO_ERROR(GB_check_key(longest));
1208
1209	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1210	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1211	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1212
1213	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'");
1214	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'");
1215	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','");
1216	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'");
1217	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'");
1218	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'");
1219	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'");
1220
1221	// hierarchical keys
1222	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1223	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1224
1225	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1226	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1227	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1228
1229	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1230	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed");
1231	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1232	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'");
1233	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'");
1234	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'");
1235	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'");
1236
1237	// test NULp keys:
1238	TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed");
1239	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed");
1240
1241	// some edge cases for hierarchical keys:
1242	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed");
1243	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well!
1244	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed");
1245	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed");
1246	TEST_EXPECT_NO_ERROR (GB_check_hkey("/key"));
1247	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'"
1248	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed");
1249	}
1250
1251	#define TEST_STRING2KEY(str,expected) do { \
1252	char *as_key = GBS_string_2_key(str); \
1253	TEST_EXPECT_EQUAL(as_key, expected); \
1254	TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1255	free(as_key); \
1256	} while(0)
1257
1258	void TEST_DB_key_generation() {
1259	TEST_STRING2KEY("abc", "abc");
1260	TEST_STRING2KEY("a b c", "a_b_c");
1261
1262	// invalid chars
1263	TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1264	"string_containing_doublequotes_quotes_and_othershit");
1265
1266	// length tests
1267	TEST_STRING2KEY("a", "a_"); // too short
1268	TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1269	"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1270	}
1271
1272	void TEST_TaggedContentParser() {
1273	// test helper class TextRef:
1274	TEST_REJECT(TextRef().defined()); // default to undefined
1275	{
1276	TextRef bla("blakjahd", 3);
1277	TEST_EXPECT(bla.defined());
1278	TEST_EXPECT_EQUAL(bla.get_length(), 3);
1279
1280	TEST_EXPECT(bla.compare("bl") > 0);
1281	TEST_EXPECT(bla.compare("bla") == 0);
1282	TEST_EXPECT(bla.compare("blase") < 0);
1283
1284	TextRef spaced(" spaced "+1, 10);
1285	TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1286	TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1287	TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1288	}
1289
1290	const char *text = " untagged [tag] tagged [empty] ";
1291
1292	TextRef cr_untagged(strstr(text, "untagged"), 8);
1293	TextRef cr_tagged (strstr(text, "tagged"), 6);
1294	TextRef tr_tag (strstr(text, "tag"), 3);
1295	TextRef tr_empty (strstr(text, "empty"), 5);
1296
1297	// test TaggedContentParser:
1298	{
1299	TaggedContentParser parser(text);
1300
1301	TEST_EXPECT(parser.has_part());
1302	TEST_REJECT(parser.has_tag());
1303	TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1304
1305	parser.next();
1306
1307	TEST_EXPECT(parser.has_part());
1308	TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1309	TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1310
1311	parser.next();
1312
1313	TEST_EXPECT(parser.has_part());
1314	TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1315	TEST_REJECT(parser.has_content());
1316
1317	parser.next();
1318
1319	TEST_REJECT(parser.has_part());
1320	}
1321	{ // parse untagged input
1322	TaggedContentParser parser("hi");
1323	TEST_EXPECT(parser.has_part());
1324	TEST_REJECT(parser.has_tag());
1325	TEST_EXPECT(parser.get_content().compare("hi") == 0);
1326	parser.next();
1327	TEST_REJECT(parser.has_part());
1328	}
1329	{ // parse empty input
1330	TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1331	TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1332	}
1333	{ // parse single tag w/o content
1334	TaggedContentParser parser(" [hello] ");
1335	TEST_EXPECT(parser.has_part());
1336	TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1337	TEST_REJECT(parser.has_content());
1338	parser.next();
1339	TEST_REJECT(parser.has_part());
1340	}
1341	{ // parse multi-tags
1342	TaggedContentParser parser(" [ t1 , t2 ] t");
1343	TEST_EXPECT(parser.has_part());
1344	TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1345	TEST_EXPECT(parser.get_content().compare("t") == 0);
1346	parser.next();
1347	TEST_EXPECT(parser.has_part());
1348	TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1349	TEST_EXPECT(parser.get_content().compare("t") == 0);
1350	parser.next();
1351	TEST_REJECT(parser.has_part());
1352	}
1353	}
1354
1355	#define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1356	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1357	TEST_EXPECT_EQUAL(result, expected); \
1358	free(result); \
1359	} while(0)
1360
1361	#define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1362	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1363	TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1364	free(result); \
1365	} while(0)
1366
1367	void TEST_merge_tagged_strings() {
1368	// merge two fields:
1369	const char *_0 = NULp;
1370
1371	TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1372	TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1373	TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1374	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1375
1376	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1377	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1378
1379	// update fields:
1380	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1381	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1382	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1383	TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1384	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1385
1386	// append (opposed to update this keeps old entries with same tag; useless?)
1387	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1388	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1389	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1390
1391	// merge three fields:
1392	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1393	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1394	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1395	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1396	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1397
1398	// same tests as in section above, but vv:
1399	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1400	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1401	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1402	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1403	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1404
1405	// test real-merges (content existing in both strings):
1406	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1407	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1408	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1409	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1410	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1411	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1412	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1413	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1414	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1415
1416	// merge two tagged string with deleting
1417	#define DSTSRC1 "[DST] dest1 [SRC] src1"
1418	#define DSTSRC2 "[DST] dest2 [SRC] src2"
1419	#define DSTSRC2LOW "[dst] dest2 [src] src2"
1420
1421	TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1422	TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1423	TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1424	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1425	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1426	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1427	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1428	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1429	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1430	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1431	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1432	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1433	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1434	TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1435	}
1436
1437	__ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1438	GB_shell shell;
1439	GBDATA *gb_main = GB_open("new.arb", "c");
1440	{
1441	GB_transaction ta(gb_main);
1442
1443	{
1444	GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1445	TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1446	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1447
1448	GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1449	GB_UINT4 ints[] = { 1, 2 };
1450	TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1451	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1452	}
1453
1454	#define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1455	#define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1456	#define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1457
1458	GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1459	const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1460	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1461
1462	TEST_EXPECT_FULL_CONTENT(NULp);
1463	TEST_EXPECT_FULL_CONTENT("");
1464	TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1465
1466	TEST_EXPECT_TAG_CONTENT("T1", "t12");
1467	TEST_EXPECT_TAG_CONTENT("T2", "t12");
1468	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1469	TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1470
1471	TEST_EXPECT_TAG_CONTENT("AA", "aa");
1472	TEST_EXPECT_TAG_CONTENT("BB", "bb");
1473	TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1474	TEST_EXPECT_TAG_CONTENT("XX", "x1");
1475	TEST_EXPECT_TAG_CONTENT("YY", "yy");
1476	TEST_EXPECT_TAG_CONTENT("yy", "yy");
1477
1478	TEST_REJECT_TAG_CONTENT("Y");
1479	// TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1480
1481	TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1482	TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1483	TEST_EXPECT_TAG_CONTENT("fake", "fake");
1484
1485	TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1486	TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1487	TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1488	TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1489	TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1490	TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1491	TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1492	TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1493
1494	TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1495	TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1496	TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1497	TEST_EXPECT_TAG_CONTENT("LAST", "last");
1498
1499	// test incomplete tags
1500	tagged_string = "bla [WHATEVER hello";
1501	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1502	TEST_REJECT_TAG_CONTENT("WHATEVER");
1503
1504	tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1505	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1506	TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1507	TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1508	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1509
1510	// test pathological tags
1511	tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1512	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1513	TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1514	TEST_EXPECT_FULL_CONTENT("");
1515	TEST_REJECT_TAG_CONTENT(" ");
1516	TEST_REJECT_TAG_CONTENT(" ");
1517	TEST_REJECT_TAG_CONTENT(",");
1518	TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1519	TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1520	TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1521	TEST_EXPECT_TAG_CONTENT(" ", "us");
1522	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1523	}
1524	GB_close(gb_main);
1525	}
1526
1527	#define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1528	TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1529	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1530	expected); \
1531	}while(0)
1532
1533	#define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1534	TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1535	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1536	expectedErrorPart); \
1537	}while(0)
1538
1539	__ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1540	GB_shell shell;
1541	GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1542	{
1543	GB_transaction ta(gb_main);
1544	GBL_env env(gb_main, "tree_missing");
1545
1546	{
1547	GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1548	TEST_REJECT_NULL(gb_species);
1549	GBL_call_env callEnv(gb_species, env);
1550
1551	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1552	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1553	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1554	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1555	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1556
1557	// empty tags:
1558	TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1559	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1560	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1561	TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1562	TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1563
1564	#define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1565	#define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1566
1567	// dont eval:
1568	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1569	// eval SRT:
1570	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1571	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1572	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1573	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1574	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1575	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":=-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1576	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1577	// eval ACI:
1578	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1579	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1580
1581	// test SRT/ACI errors:
1582	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1583	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1584	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1585	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1586
1587	// no error raised, if expression not applied:
1588	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1589
1590	// incomplete tags
1591	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":=<>", "[DEF] <{no tag>");
1592	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,{no>");
1593	TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* =<2,*1>", "[DEF] <{tag,{no>");
1594	TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* =<2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1595	TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* =<2,*1>", "[DEF] <{tag{},{no>");
1596	TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* =<2,*1>", "[DEF] <{tag{} xx,{no>");
1597	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,no>");
1598	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1599	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1600	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1601	TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1602	TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":=<>", "[DEF] <kept {trunk>");
1603	TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":=<>", "[DEF] <kept>");
1604	}
1605
1606	{
1607	GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1608	TEST_REJECT_NULL(gb_species);
1609	GBL_call_env callEnv(gb_species, env);
1610
1611	// run scripts using context:
1612	TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1613	TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(\|sequence\|len)", "[T1] seqlen=$l [T2] seqlen=165");
1614	TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence\|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1615
1616	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(\|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1617	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1618
1619	// content before 1st tag:
1620	TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1621	TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1622
1623	// test elimination of leading/trailing whitespace:
1624	TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1625	TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1626	TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1627
1628	#define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1629
1630	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1631	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1632	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1633	}
1634	}
1635	GB_close(gb_main);
1636	}
1637
1638	void TEST_log_action() {
1639	for (int stamped = 0; stamped<=1; ++stamped) {
1640	TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1641	{
1642	char *logged = GBS_log_action_to("comment", "action", stamped);
1643	if (stamped) {
1644	TEST_EXPECT_CONTAINS(logged, "comment\n");
1645	TEST_EXPECT_CONTAINS(logged, "action\n");
1646	}
1647	else {
1648	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1649	}
1650	free(logged);
1651	}
1652	{
1653	char *logged = GBS_log_action_to("comment\n", "action", stamped);
1654	if (stamped) {
1655	TEST_EXPECT_CONTAINS(logged, "comment\n");
1656	TEST_EXPECT_CONTAINS(logged, "action\n");
1657	}
1658	else {
1659	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1660	}
1661	free(logged);
1662	}
1663	{
1664	char *logged = GBS_log_action_to("", "action", stamped);
1665	if (stamped) {
1666	TEST_EXPECT_EQUAL(logged[0], '\n');
1667	TEST_EXPECT_CONTAINS(logged, "action\n");
1668	}
1669	else {
1670	TEST_EXPECT_EQUAL(logged, "\naction\n");
1671	}
1672	free(logged);
1673	}
1674	{
1675	char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1676	if (stamped) {
1677	TEST_EXPECT_DIFFERENT(logged[0], '\n');
1678	TEST_EXPECT_CONTAINS(logged, "action\n");
1679	}
1680	else {
1681	TEST_EXPECT_EQUAL(logged, "action\n");
1682	}
1683	free(logged);
1684	}
1685	}
1686	}
1687	TEST_PUBLISH(TEST_log_action);
1688
1689	#endif // UNIT_TESTS
1690

Note: See TracBrowser for help on using the repository browser.

Download in other formats: