Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

adstring.cxx

Visit:

Last change on this file was 19346, checked in by westram, 3 years ago
reintegrates 'refactor' into 'trunk' polish `GBS_strstruct` adds: log:branches/refactor@19339:19345
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 64.5 KB

Line
1	// =============================================================== //
2	// //
3	// File : adstring.cxx //
4	// Purpose : various string functions //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11	#include <arb_backtrace.h>
12	#include <arb_strbuf.h>
13	#include <arb_defs.h>
14	#include <arb_str.h>
15
16	#include "gb_key.h"
17	#include "gb_aci.h"
18
19	#include <SigHandler.h>
20
21	#include <execinfo.h>
22
23	#include <cstdarg>
24	#include <cctype>
25	#include <cerrno>
26	#include <ctime>
27	#include <setjmp.h>
28
29	#include <valgrind.h>
30
31	static char GBS_string_2_key_with_exclusions(const char str, const char *additional) {
32	// converts any string to a valid key (all chars in 'additional' are additionally allowed)
33	char buf[GB_KEY_LEN_MAX+1];
34	int i;
35	int c;
36	for (i=0; i<GB_KEY_LEN_MAX;) {
37	c = *(str++);
38	if (!c) break;
39
40	if (c==' ' \|\| c == '_') {
41	buf[i++] = '_';
42	}
43	else if (isalnum(c) \|\| strchr(additional, c)) {
44	buf[i++] = c;
45	}
46	}
47	for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48	buf[i] = 0;
49	return ARB_strdup(buf);
50	}
51
52	char GBS_string_2_key(const char str) { // converts any string to a valid key
53	return GBS_string_2_key_with_exclusions(str, "");
54	}
55
56	char GB_memdup(const char source, size_t len) {
57	char *dest = ARB_alloc<char>(len);
58	memcpy(dest, source, len);
59	return dest;
60	}
61
62	static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed";
63
64	inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) {
65	// test if 'key' is a valid non-hierarchical database key.
66	// i.e. contains only letters, numbers and '_' and
67	// is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX.
68
69	if (len < GB_KEY_LEN_MIN) {
70	if (!len) return EMPTY_KEY_NOT_ALLOWED;
71	return GBS_global_string("Invalid key '%s': too short", key);
72	}
73	if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
74
75	for (int i = 0; i<len; ++i) {
76	char c = key[i];
77	bool validChar = isalnum(c) \|\| c == '_';
78	if (!validChar) {
79	return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
80	}
81	}
82
83	return NULp;
84	}
85	GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
86	// test if 'key' is a valid non-hierarchical database key
87	// (i.e. a valid name for a container or field).
88
89	return check_key(key, key ? strlen(key) : 0);
90	}
91
92	GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
93	// test whether 'key' is a hierarchical key,
94	// i.e. consists of subkeys (accepted by GB_check_key), separated by '/'.
95
96	GB_ERROR err = NULp;
97
98	if (key && key[0] == '/') ++key; // accept + remove leading '/'
99	if (!key \|\| !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash)
100
101	while (!err && key[0]) {
102	int nonSlashPart = strcspn(key, "/");
103
104	err = check_key(key, nonSlashPart);
105	if (!err) {
106	key += nonSlashPart;
107	if (key[0] == '/') {
108	++key;
109	if (key[0] == 0) { // nothing after slash
110	err = EMPTY_KEY_NOT_ALLOWED;
111	}
112	}
113	else {
114	gb_assert(key[0] == 0);
115	}
116	}
117	}
118	return err;
119	}
120
121	// ----------------------------------------------
122	// escape/unescape characters in strings
123
124	char GBS_escape_string(const char str, const char *chars_to_escape, char escape_char) {
125	/*! escape characters in 'str'
126	*
127	* uses a special escape-method, which eliminates all 'chars_to_escape' completely
128	* from str (this makes further processing of the string more easy)
129	*
130	* @param str string to escape
131	*
132	* @param escape_char is the character used for escaping. For performance reasons it
133	* should be a character rarely used in 'str'.
134	*
135	* @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping)
136	* and it may not be longer than 26 bytes
137	*
138	* @return heap copy of escaped string
139	*
140	* Inverse of GBS_unescape_string()
141	*/
142
143	int len = strlen(str);
144	char buffer = ARB_alloc<char>(2len+1);
145	int j = 0;
146	int i;
147
148	gb_assert(strlen(chars_to_escape) <= 26);
149	gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
150
151	for (i = 0; str[i]; ++i) {
152	if (str[i] == escape_char) {
153	buffer[j++] = escape_char;
154	buffer[j++] = escape_char;
155	}
156	else {
157	const char *found = strchr(chars_to_escape, str[i]);
158	if (found) {
159	buffer[j++] = escape_char;
160	buffer[j++] = (found-chars_to_escape+'A');
161
162	gb_assert(found[0]<'A' \|\| found[0]>'Z'); // illegal character in chars_to_escape
163	}
164	else {
165
166	buffer[j++] = str[i];
167	}
168	}
169	}
170	buffer[j] = 0;
171
172	return buffer;
173	}
174
175	char GBS_unescape_string(const char str, const char *escaped_chars, char escape_char) {
176	//! inverse of GB_escape_string() - for params see there
177
178	int len = strlen(str);
179	char *buffer = ARB_alloc<char>(len+1);
180	int j = 0;
181	int i;
182
183	#if defined(ASSERTION_USED)
184	int escaped_chars_len = strlen(escaped_chars);
185	#endif // ASSERTION_USED
186
187	gb_assert(strlen(escaped_chars) <= 26);
188	gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
189
190	for (i = 0; str[i]; ++i) {
191	if (str[i] == escape_char) {
192	if (str[i+1] == escape_char) {
193	buffer[j++] = escape_char;
194	}
195	else {
196	int idx = str[i+1]-'A';
197
198	gb_assert(idx >= 0 && idx<escaped_chars_len);
199	buffer[j++] = escaped_chars[idx];
200	}
201	++i;
202	}
203	else {
204	buffer[j++] = str[i];
205	}
206	}
207	buffer[j] = 0;
208
209	return buffer;
210	}
211
212	char *GBS_eval_env(GB_CSTR p) {
213	GB_ERROR error = NULp;
214	GB_CSTR ka;
215	GBS_strstruct out(1000);
216
217	while ((ka = GBS_find_string(p, "$(", 0))) {
218	GB_CSTR kz = strchr(ka, ')');
219	if (!kz) {
220	error = GBS_global_string("missing ')' for envvar '%s'", p);
221	break;
222	}
223	else {
224	char *envvar = ARB_strpartdup(ka+2, kz-1);
225	int len = ka-p;
226
227	if (len) out.ncat(p, len);
228
229	GB_CSTR genv = GB_getenv(envvar);
230	if (genv) out.cat(genv);
231
232	p = kz+1;
233	free(envvar);
234	}
235	}
236
237	if (error) {
238	GB_export_error(error);
239	return NULp;
240	}
241
242	out.cat(p); // copy rest
243	return out.release_memfriendly();
244	}
245
246	long GBS_gcgchecksum(const char *seq) {
247	// GCGchecksum
248	long i;
249	long check = 0;
250	long count = 0;
251	long seqlen = strlen(seq);
252
253	for (i = 0; i < seqlen; i++) {
254	count++;
255	check += count * toupper(seq[i]);
256	if (count == 57) count = 0;
257	}
258	check %= 10000;
259
260	return check;
261	}
262
263	// Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
264	uint32_t crctab[] = {
265	0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
266	0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
267	0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
268	0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
269	0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
270	0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
271	0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
272	0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
273	0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
274	0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
275	0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
276	0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
277	0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
278	0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
279	0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
280	0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
281	0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
282	0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
283	0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
284	0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
285	0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
286	0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
287	0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
288	0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
289	0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
290	0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
291	0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
292	0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
293	0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
294	0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
295	0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
296	0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
297	0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
298	0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
299	0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
300	0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
301	0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
302	0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
303	0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
304	0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
305	0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
306	0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
307	0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
308	0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
309	0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
310	0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
311	0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
312	0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
313	0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
314	0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
315	0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
316	0x2d02ef8dL
317	};
318
319	uint32_t GB_checksum(const char seq, long length, int ignore_case, const char exclude) {
320	/* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
321	* if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
322	*/
323
324	unsigned long c = 0xffffffffL;
325	long n = length;
326	int i;
327	int tab[256]; // @@@ avoid recalc for each call
328
329	for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions)
330	tab[i] = ignore_case ? toupper(i) : i;
331	}
332
333	if (exclude) {
334	while (1) {
335	int k = (unsigned char )exclude++;
336	if (!k) break;
337	tab[k] = 0;
338	if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
339	}
340	}
341
342	while (n--) {
343	i = tab[(const unsigned char )seq++];
344	if (i) {
345	c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
346	}
347	}
348	c = c ^ 0xffffffffL;
349	return c;
350	}
351
352	uint32_t GBS_checksum(const char seq, int ignore_case, const char exclude) {
353	// if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
354	return GB_checksum(seq, strlen(seq), ignore_case, exclude);
355	}
356
357	size_t GBS_shorten_repeated_data(char *data) {
358	// shortens repeats in 'data'
359	// This function modifies 'data'!!
360	// e.g. "..............................ACGT....................TGCA"
361	// -> ".{30}ACGT.{20}TGCA"
362
363	#if defined(DEBUG)
364	size_t orgLen = strlen(data);
365	#endif // DEBUG
366	char *dataStart = data;
367	char *dest = data;
368	size_t repeat = 1;
369	char last = *data++;
370
371	while (last) {
372	char curr = *data++;
373	if (curr == last) {
374	repeat++;
375	}
376	else {
377	if (repeat >= 5) {
378	dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
379	}
380	else {
381	size_t r;
382	for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
383	}
384	last = curr;
385	repeat = 1;
386	}
387	}
388
389	*dest = 0;
390
391	#if defined(DEBUG)
392
393	gb_assert(strlen(dataStart) <= orgLen);
394	#endif // DEBUG
395	return dest-dataStart;
396	}
397
398
399	// ------------------------------------------
400	// helper classes for tagged fields
401
402	class TextRef {
403	const char *data; // has no terminal zero-byte!
404	int length;
405
406	public:
407	TextRef() : data(NULp), length(-1) {}
408	TextRef(const char *data_, int length_) : data(data_), length(length_) {}
409	explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
410
411	bool defined() const { return data && length>0; }
412	const char *get_data() const { return data; }
413	int get_length() const { return length; }
414
415	const char *get_following() const { return data ? data+length : NULp; }
416
417	int compare(const char *str) const {
418	gb_assert(defined());
419	int cmp = strncmp(get_data(), str, get_length());
420	if (!cmp) {
421	if (str[get_length()]) {
422	cmp = -1; // right side contains more content
423	}
424	}
425	return cmp;
426	}
427	int icompare(const char *str) const {
428	gb_assert(defined());
429	int cmp = strncasecmp(get_data(), str, get_length());
430	if (!cmp) {
431	if (str[get_length()]) {
432	cmp = -1; // right side contains more content
433	}
434	}
435	return cmp;
436	}
437	char *copy() const { return ARB_strndup(get_data(), get_length()); }
438
439	char head() const { return defined() ? data[0] : 0; }
440	char tail() const { return defined() ? data[length-1] : 0; }
441
442	TextRef headTrimmed() const {
443	if (defined()) {
444	for (int s = 0; s<length; ++s) {
445	if (!isspace(data[s])) {
446	return TextRef(data+s, length-s);
447	}
448	}
449	}
450	return TextRef();
451	}
452	TextRef tailTrimmed() const {
453	if (defined()) {
454	for (int s = length-1; s>=0; --s) {
455	if (!isspace(data[s])) {
456	return TextRef(data, s+1);
457	}
458	}
459	}
460	return TextRef();
461	}
462
463	TextRef trimmed() const {
464	return headTrimmed().tailTrimmed();
465	}
466
467	inline TextRef partBefore(const TextRef& subref) const;
468	inline TextRef partBehind(const TextRef& subref) const;
469
470	bool is_part_of(const TextRef& other) const {
471	gb_assert(defined() && other.defined());
472	return get_data()>=other.get_data() && get_following()<=other.get_following();
473	}
474
475	const char find(char c) const { return reinterpret_cast<const char>(memchr(get_data(), c, get_length())); }
476	};
477
478	static TextRef textBetween(const TextRef& t1, const TextRef& t2) {
479	const char *behind_d1 = t1.get_following();
480	const char *d2 = t2.get_data();
481
482	if (behind_d1 && d2 && behind_d1<d2) {
483	return TextRef(behind_d1, d2-behind_d1);
484	}
485	return TextRef();
486	}
487
488	inline TextRef TextRef::partBefore(const TextRef& subref) const {
489	gb_assert(subref.is_part_of(*this));
490	return textBetween(TextRef(get_data(), 0), subref);
491	}
492	inline TextRef TextRef::partBehind(const TextRef& subref) const {
493	gb_assert(subref.is_part_of(*this));
494	return TextRef(subref.get_following(), get_following()-subref.get_following());
495	}
496
497	class TaggedContentParser {
498	TextRef wholeInput;
499	TextRef tag, content; // current position
500	TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
501	TextRef nextBrackets; // next "[..]" part (behind current tag)
502
503	void findBrackets(const char *in) {
504	nextBrackets = TextRef();
505	const char *tag_start = strchr(in, '[');
506	if (tag_start) {
507	const char *tag_end = strchr(tag_start, ']');
508	if (tag_end) {
509	if (tag_end == tag_start+1) { // empty tag -> use as content
510	findBrackets(tag_end+1);
511	}
512	else {
513	const char unwanted_bracket = reinterpret_cast<const char>(memchr(tag_start+1, '[', tag_end-tag_start-1));
514	if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
515	findBrackets(unwanted_bracket);
516	}
517	else {
518	TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
519	if (name.defined()) { // not only whitespace inside brackets
520	nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
521	}
522	else {
523	findBrackets(tag_end+1);
524	}
525	}
526	}
527	}
528	}
529	}
530
531	void parse_next_multi_tag() {
532	gb_assert(restTags.defined());
533	TextRef comma(restTags.find(','), 1);
534	if (comma.defined()) {
535	tag = restTags.partBefore(comma).tailTrimmed();
536	restTags = restTags.partBehind(comma).headTrimmed();
537	}
538	else {
539	tag = restTags;
540	restTags = TextRef();
541	}
542	}
543	void parse_next() {
544	if (restTags.defined()) {
545	parse_next_multi_tag();
546	}
547	else if (nextBrackets.defined()) {
548	TextRef brackets = nextBrackets;
549	findBrackets(brackets.get_following());
550
551	content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
552
553	gb_assert(brackets.head() == '[' && brackets.tail() == ']');
554
555	TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
556	gb_assert(tags.defined());
557
558	restTags = tags;
559	parse_next_multi_tag();
560	}
561	else {
562	tag = content = TextRef();
563	gb_assert(!has_part());
564	}
565	}
566	void parse_first() {
567	gb_assert(!has_part());
568	findBrackets(wholeInput.get_data());
569	content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
570	if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
571	}
572
573	public:
574	TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
575
576	bool has_tag() const { return tag.defined(); }
577	bool has_content() const { return content.defined(); }
578
579	void next() { parse_next(); }
580	bool has_part() const { return has_tag() \|\| has_content(); } // false -> parser has finished
581
582	const TextRef& get_tag() const { return tag; }
583	const TextRef& get_content() const { return content; }
584	};
585
586
587	// -------------------------------------------
588	// helper function for tagged fields
589
590	static void g_bs_add_value_tag_to_hash(GB_HASH hash, const char tag, char *value) {
591	if (!value[0]) return; // ignore empty values
592
593	{
594	char *p;
595	p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
596	p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
597	}
598
599	GB_HASH sh = (GB_HASH )GBS_read_hash(hash, value);
600	if (!sh) {
601	sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
602	GBS_write_hash(hash, value, (long)sh);
603	}
604	GBS_write_hash(sh, tag, 1);
605	}
606
607	static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH hash, char s, char default_tag, const char del) {
608	TaggedContentParser parser(s);
609	while (parser.has_part()) {
610	if (parser.has_content()) {
611	char *content = parser.get_content().copy();
612	if (parser.has_tag()) {
613	char *tag = parser.get_tag().copy();
614	if (!del \|\| ARB_stricmp(tag, del) != 0) {
615	g_bs_add_value_tag_to_hash(hash, tag, content);
616	}
617	free(tag);
618	}
619	else {
620	g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
621	}
622	free(content);
623	}
624	parser.next();
625	}
626	}
627
628	static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH hash, char s, char default_tag, const char rtag, const char *aci, GBL_call_env& env) {
629	GB_ERROR error = NULp;
630
631	TaggedContentParser parser(s);
632	while (parser.has_part() && !error) {
633	if (parser.has_content()) {
634	char *value = parser.get_content().copy();
635	char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
636
637	if (rtag && ARB_stricmp(tag, rtag) == 0) {
638	freeset(value, GB_command_interpreter_in_env(value, aci, env));
639	if (!value) error = GB_await_error();
640	}
641
642	if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
643
644	free(tag);
645	free(value);
646	}
647	parser.next();
648	}
649
650	return error;
651	}
652
653	static void g_bs_merge_tags(const char tag, long /val/, void cd_sub_result) {
654	GBS_strstruct& sub_result = (GBS_strstruct)cd_sub_result;
655
656	sub_result.cat(tag);
657	sub_result.put(',');
658	}
659
660	static void g_bs_read_tagged_hash(const char value, long subhash, void cd_g_bs_collect_tags_hash) {
661	static int counter = 0;
662
663	GBS_strstruct sub_result(100);
664	GBS_hash_do_const_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, &sub_result);
665	sub_result.putlong(counter++); // create a unique number
666
667	char *str = ARB_strupper(sub_result.release());
668
669	GB_HASH g_bs_collect_tags_hash = (GB_HASH)cd_g_bs_collect_tags_hash;
670	GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
671
672	free(str);
673	}
674
675	static void g_bs_read_final_hash(const char tag, long value, void cd_merge_result) {
676	GBS_strstruct& merge_result = (GBS_strstruct)cd_merge_result;
677
678	char lk = const_cast<char>(strrchr(tag, ','));
679	if (lk) { // remove number at end
680	*lk = 0;
681
682	if (!merge_result.empty()) merge_result.put(' '); // skip trailing space
683	merge_result.cat_wrapped("[]", tag);
684	merge_result.put(' ');
685	}
686	merge_result.cat((char*)value);
687	}
688
689	static char g_bs_get_string_of_tag_hash(GB_HASH tag_hash) {
690	GBS_strstruct merge_result(256);
691	GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free);
692
693	GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
694	GBS_hash_do_const_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, &merge_result);
695
696	GBS_free_hash(collect_tags_hash);
697	return merge_result.release_memfriendly();
698	}
699
700	static long g_bs_free_hash_of_hashes_elem(const char /key/, long val, void ) {
701	GB_HASH hash = (GB_HASH)val;
702	if (hash) GBS_free_hash(hash);
703	return 0;
704	}
705	static void g_bs_free_hash_of_hashes(GB_HASH *hash) {
706	GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULp);
707	GBS_free_hash(hash);
708	}
709
710	char GBS_merge_tagged_strings(const char s1, const char tag1, const char replace1, const char s2, const char tag2, const char *replace2) {
711	/* Create a tagged string from two tagged strings:
712	* a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
713	*
714	* if 's2' is not empty, then delete tag 'replace1' in 's1'
715	* if 's1' is not empty, then delete tag 'replace2' in 's2'
716	*
717	* (result should never be NULp)
718	*/
719
720	char *str1 = ARB_strdup(s1);
721	char *str2 = ARB_strdup(s2);
722	char *t1 = GBS_string_2_key(tag1);
723	char *t2 = GBS_string_2_key(tag2);
724	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
725
726	if (!s1[0]) replace2 = NULp;
727	if (!s2[0]) replace1 = NULp;
728
729	if (replace1 && !replace1[0]) replace1 = NULp;
730	if (replace2 && !replace2[0]) replace2 = NULp;
731
732	g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
733	g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
734
735	char *result = g_bs_get_string_of_tag_hash(hash);
736
737	g_bs_free_hash_of_hashes(hash);
738
739	free(t2);
740	free(t1);
741	free(str2);
742	free(str1);
743
744	return result;
745	}
746
747	char GBS_modify_tagged_string_with_ACI(const char s, const char dt, const char tag, const char *aci, GBL_call_env& env) {
748	/* if 's' is untagged, tag it with default tag 'dt'.
749	* if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
750	*
751	* if result is NULp, an error has been exported.
752	*/
753
754	char *str = ARB_strdup(s);
755	char *default_tag = GBS_string_2_key(dt);
756	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
757	char *result = NULp;
758
759	GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
760
761	if (!error) {
762	result = g_bs_get_string_of_tag_hash(hash);
763	}
764	else {
765	GB_export_error(error);
766	}
767
768	g_bs_free_hash_of_hashes(hash);
769
770	free(default_tag);
771	free(str);
772
773	return result;
774	}
775
776	char GB_read_as_tagged_string(GBDATA gbd, const char *tagi) {
777	char *buf = GB_read_as_string(gbd);
778	if (buf && tagi && tagi[0]) {
779	TaggedContentParser parser(buf);
780
781	char *wantedTag = GBS_string_2_key(tagi);
782	char *contentFound = NULp;
783
784	while (parser.has_part() && !contentFound) {
785	if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
786	contentFound = parser.get_content().copy();
787	}
788	parser.next();
789	}
790	free(wantedTag);
791	free(buf);
792
793	return contentFound;
794	}
795	return buf;
796	}
797
798
799	/* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
800	* used as well to save perl macros
801	*
802	* when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
803	*
804	* always keep in mind, that many users have databases/macros written with older
805	* versions of this function. They MUST load proper!!!
806	*/
807	void GBS_fwrite_string(const char strngi, FILE out) {
808	unsigned char strng = (unsigned char )strngi;
809	int c;
810
811	putc('"', out);
812
813	while ((c = *strng++)) {
814	if (c < 32) {
815	putc('\\', out);
816	if (c == '\n')
817	putc('n', out);
818	else if (c == '\t')
819	putc('t', out);
820	else if (c<25) {
821	putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
822	}
823	else {
824	putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
825	}
826	}
827	else if (c == '"') {
828	putc('\\', out);
829	putc('"', out);
830	}
831	else if (c == '\\') {
832	putc('\\', out);
833	putc('\\', out);
834	}
835	else {
836	putc(c, out);
837	}
838	}
839	putc('"', out);
840	}
841
842	/* Read a string from a file written by GBS_fwrite_string,
843	* Searches first '"'
844	*
845	* WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
846	* any changes should be done in GBS_fconvert_string too.
847	*/
848
849	static char GBS_fread_string(FILE in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
850	GBS_strstruct buf(1024);
851
852	int x;
853	while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
854
855	if (x != EOF) {
856	while ((x = getc(in)) != '"') {
857	if (x == EOF) break;
858	if (x == '\\') {
859	x = getc(in);
860	if (x==EOF) break;
861	if (x == 'n') { buf.put('\n'); continue; }
862	if (x == 't') { buf.put('\t'); continue; }
863	if (x>='@' && x <= '@' + 25) { buf.put(x-'@'); continue; }
864	if (x>='0' && x <= '9') { buf.put(x-('0'-25)); continue; }
865	// all other backslashes are simply skipped
866	}
867	buf.put(x);
868	}
869	}
870	return buf.release_memfriendly();
871	}
872
873	/* does similar decoding as GBS_fread_string but works directly on an existing buffer
874	* (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
875	*
876	* inserts \0 behind decoded string (removes the closing '"')
877	* returns a pointer behind the end (") of the _encoded_ string
878	* returns NULp if a 0-character is found
879	*/
880	char GBS_fconvert_string(char buffer) {
881	char *t = buffer;
882	char *f = buffer;
883	int x;
884
885	gb_assert(f[-1] == '"');
886	// the opening " has already been read
887
888	while ((x = *f++) != '"') {
889	if (!x) break;
890
891	if (x == '\\') {
892	x = *f++;
893	if (!x) break;
894
895	if (x == 'n') {
896	*t++ = '\n';
897	continue;
898	}
899	if (x == 't') {
900	*t++ = '\t';
901	continue;
902	}
903	if (x>='@' && x <= '@' + 25) {
904	*t++ = x-'@';
905	continue;
906	}
907	if (x>='0' && x <= '9') {
908	*t++ = x-('0'-25);
909	continue;
910	}
911	// all other backslashes are simply skipped
912	}
913	*t++ = x;
914	}
915
916	if (!x) return NULp; // error (string should not contain 0-character)
917	gb_assert(x == '"');
918
919	t[0] = 0;
920	return f;
921	}
922
923	char GBS_replace_tabs_by_spaces(const char text) {
924	int tlen = strlen(text);
925	GBS_strstruct mfile(tlen * 3/2 + 1);
926	int tabpos = 0;
927	int c;
928
929	while ((c=*(text++))) {
930	if (c == '\t') {
931	int ntab = (tabpos + 8) & 0xfffff8;
932	while (tabpos < ntab) {
933	mfile.put(' ');
934	tabpos++;
935	}
936	continue;
937	}
938	tabpos ++;
939	if (c == '\n') {
940	tabpos = 0;
941	}
942	mfile.put(c);
943	}
944	return mfile.release_memfriendly();
945	}
946
947	char GBS_trim(const char str) {
948	// trim whitespace at beginning and end of 'str'
949	const char *whitespace = " \t\n";
950	while (str[0] && strchr(whitespace, str[0])) str++;
951
952	const char *end = strchr(str, 0)-1;
953	while (end >= str && strchr(whitespace, end[0])) end--;
954
955	return ARB_strpartdup(str, end);
956	}
957
958	static char dated_info(const char info) {
959	char *dated_info = NULp;
960	time_t date;
961
962	if (time(&date) != -1) {
963	char *dstr = ctime(&date);
964	char *nl = strchr(dstr, '\n');
965
966	if (nl) nl[0] = 0; // cut off LF
967
968	dated_info = GBS_global_string_copy("%s: %s", dstr, info);
969	}
970	else {
971	dated_info = ARB_strdup(info);
972	}
973	return dated_info;
974	}
975
976	char GBS_log_action_to(const char comment, const char *action, bool stamp) {
977	/*! concatenates 'comment' and 'action'.
978	* '\n' is appended to existing 'comment' and/or 'action' (if missing).
979	* @param comment may be NULp (=> result is 'action')
980	* @param action may NOT be NULp
981	* @param stamp true -> prefix current timestamp in front of 'action'
982	* @return heap copy of concatenation
983	*/
984	size_t clen = comment ? strlen(comment) : 0;
985	size_t alen = strlen(action);
986
987	GBS_strstruct new_comment(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2*\n + \0 + space for stamp
988
989	if (comment) {
990	new_comment.cat(comment);
991	if (clen == 0 \|\| comment[clen-1] != '\n') new_comment.put('\n');
992	}
993
994	if (stamp) {
995	char *dated_action = dated_info(action);
996	new_comment.cat(dated_action);
997	free(dated_action);
998	}
999	else {
1000	new_comment.cat(action);
1001	}
1002	if (alen == 0 \|\| action[alen-1] != '\n') new_comment.put('\n');
1003
1004	return new_comment.release_memfriendly();
1005	}
1006
1007	const char GBS_funptr2readable(void funptr, bool stripARBHOME) {
1008	// only returns module and offset for static functions :-(
1009	char **funNames = backtrace_symbols(&funptr, 1);
1010	const char *readable_fun = funNames[0];
1011
1012	if (stripARBHOME) {
1013	const char *ARBHOME = GB_getenvARBHOME();
1014	if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1015	readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1016	}
1017	}
1018	return readable_fun;
1019	}
1020
1021	// --------------------------------------------------------------------------------
1022
1023	#ifdef UNIT_TESTS
1024
1025	#include <test_unit.h>
1026
1027	// #define TEST_TEST_MACROS
1028
1029	#ifdef ENABLE_CRASH_TESTS
1030	static void provokesegv() { raise(SIGSEGV); }
1031	static void dont_provokesegv() {}
1032	# if defined(ASSERTION_USED)
1033	static void failassertion() { gb_assert(0); }
1034	# if defined(TEST_TEST_MACROS)
1035	static void dont_failassertion() {}
1036	# endif
1037	static void provokesegv_does_not_fail_assertion() {
1038	// provokesegv does not raise assertion
1039	// -> the following assertion fails
1040	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1041	}
1042	# endif
1043	#endif
1044
1045	void TEST_signal_tests__crashtest() {
1046	// check whether we can test that no SEGV or assertion failure happened
1047	TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1048
1049	// check whether we can test for SEGV and assertion failures
1050	TEST_EXPECT_SEGFAULT(provokesegv);
1051	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1052
1053	// tests whether signal suppression works multiple times (by repeating tests)
1054	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1055	TEST_EXPECT_SEGFAULT(provokesegv);
1056
1057	// test whether SEGV can be distinguished from assertion
1058	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1059
1060	// The following section is disabled, because it will
1061	// provoke test warnings (to test these warnings).
1062	// (enable it when changing any of these TEST_..-macros used here)
1063	#if defined(TEST_TEST_MACROS)
1064	TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1065
1066	TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1067	TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1068	#if defined(ASSERTION_USED)
1069	TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1070	#endif
1071
1072	TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1073	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion);
1074	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1075	#endif
1076	}
1077
1078	#define TEST_SHORTENED_EQUALS(Long,Short) do { \
1079	char *buf = ARB_strdup(Long); \
1080	GBS_shorten_repeated_data(buf); \
1081	TEST_EXPECT_EQUAL(buf, Short); \
1082	free(buf); \
1083	} while(0)
1084
1085	void TEST_GBS_shorten_repeated_data() {
1086	TEST_SHORTENED_EQUALS("12345", "12345");
1087	TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1088	TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1089	TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1090	TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1091	TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1092	TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1093	TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1094	TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1095	TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1096	TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1097	TEST_SHORTENED_EQUALS("aabc", "aabc");
1098	TEST_SHORTENED_EQUALS("", "");
1099	}
1100
1101	static const char *hkey_format[] = {
1102	"/%s/bbb/ccc",
1103	"/aaa/%s/ccc",
1104	"/aaa/bbb/%s",
1105	};
1106
1107	inline const char useInHkey(const char fragment, size_t pos) {
1108	return GBS_global_string(hkey_format[pos], fragment);
1109	}
1110
1111	#define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1112	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1113	const char *hkey = useInHkey(use, i); \
1114	TEST_ANNOTATE(hkey); \
1115	TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \
1116	} \
1117	TEST_ANNOTATE(NULp); \
1118	} while(0)
1119
1120	#define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1121	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1122	const char *hkey = useInHkey(use, i); \
1123	TEST_ANNOTATE(hkey); \
1124	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \
1125	} \
1126	TEST_ANNOTATE(NULp); \
1127	} while(0)
1128
1129
1130	void TEST_DB_key_checks() {
1131	// plain keys
1132	const char *shortest = "ab";
1133	const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1134	const char *too_short = shortest+1;
1135	const char *longest = too_long+1;
1136
1137	const char *empty = "";
1138	const char *slash = "sub/key";
1139	const char *dslash = "sub//key";
1140	const char *comma = "no,key";
1141	const char *minus = "no-key";
1142
1143	// obsolete GB_LINK syntax:
1144	const char *link = "link->syntax";
1145	const char *nowhere = "link->";
1146	const char *fromNw = "->syntax";
1147
1148	TEST_EXPECT_NO_ERROR(GB_check_key(shortest));
1149	TEST_EXPECT_NO_ERROR(GB_check_key(longest));
1150
1151	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1152	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1153	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1154
1155	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'");
1156	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'");
1157	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','");
1158	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'");
1159	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'");
1160	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'");
1161	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'");
1162
1163	// hierarchical keys
1164	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1165	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1166
1167	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1168	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1169	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1170
1171	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1172	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed");
1173	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1174	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'");
1175	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'");
1176	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'");
1177	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'");
1178
1179	// test NULp keys:
1180	TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed");
1181	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed");
1182
1183	// some edge cases for hierarchical keys:
1184	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed");
1185	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well!
1186	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed");
1187	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed");
1188	TEST_EXPECT_NO_ERROR (GB_check_hkey("/key"));
1189	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'"
1190	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed");
1191	}
1192
1193	#define TEST_STRING2KEY(str,expected) do { \
1194	char *as_key = GBS_string_2_key(str); \
1195	TEST_EXPECT_EQUAL(as_key, expected); \
1196	TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1197	free(as_key); \
1198	} while(0)
1199
1200	void TEST_DB_key_generation() {
1201	TEST_STRING2KEY("abc", "abc");
1202	TEST_STRING2KEY("a b c", "a_b_c");
1203
1204	// invalid chars
1205	TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1206	"string_containing_doublequotes_quotes_and_othershit");
1207
1208	// length tests
1209	TEST_STRING2KEY("a", "a_"); // too short
1210	TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1211	"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1212	}
1213
1214	void TEST_TaggedContentParser() {
1215	// test helper class TextRef:
1216	TEST_REJECT(TextRef().defined()); // default to undefined
1217	{
1218	TextRef bla("blakjahd", 3);
1219	TEST_EXPECT(bla.defined());
1220	TEST_EXPECT_EQUAL(bla.get_length(), 3);
1221
1222	TEST_EXPECT(bla.compare("bl") > 0);
1223	TEST_EXPECT(bla.compare("bla") == 0);
1224	TEST_EXPECT(bla.compare("blase") < 0);
1225
1226	TextRef spaced(" spaced "+1, 10);
1227	TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1228	TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1229	TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1230	}
1231
1232	const char *text = " untagged [tag] tagged [empty] ";
1233
1234	TextRef cr_untagged(strstr(text, "untagged"), 8);
1235	TextRef cr_tagged (strstr(text, "tagged"), 6);
1236	TextRef tr_tag (strstr(text, "tag"), 3);
1237	TextRef tr_empty (strstr(text, "empty"), 5);
1238
1239	// test TaggedContentParser:
1240	{
1241	TaggedContentParser parser(text);
1242
1243	TEST_EXPECT(parser.has_part());
1244	TEST_REJECT(parser.has_tag());
1245	TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1246
1247	parser.next();
1248
1249	TEST_EXPECT(parser.has_part());
1250	TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1251	TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1252
1253	parser.next();
1254
1255	TEST_EXPECT(parser.has_part());
1256	TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1257	TEST_REJECT(parser.has_content());
1258
1259	parser.next();
1260
1261	TEST_REJECT(parser.has_part());
1262	}
1263	{ // parse untagged input
1264	TaggedContentParser parser("hi");
1265	TEST_EXPECT(parser.has_part());
1266	TEST_REJECT(parser.has_tag());
1267	TEST_EXPECT(parser.get_content().compare("hi") == 0);
1268	parser.next();
1269	TEST_REJECT(parser.has_part());
1270	}
1271	{ // parse empty input
1272	TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1273	TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1274	}
1275	{ // parse single tag w/o content
1276	TaggedContentParser parser(" [hello] ");
1277	TEST_EXPECT(parser.has_part());
1278	TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1279	TEST_REJECT(parser.has_content());
1280	parser.next();
1281	TEST_REJECT(parser.has_part());
1282	}
1283	{ // parse multi-tags
1284	TaggedContentParser parser(" [ t1 , t2 ] t");
1285	TEST_EXPECT(parser.has_part());
1286	TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1287	TEST_EXPECT(parser.get_content().compare("t") == 0);
1288	parser.next();
1289	TEST_EXPECT(parser.has_part());
1290	TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1291	TEST_EXPECT(parser.get_content().compare("t") == 0);
1292	parser.next();
1293	TEST_REJECT(parser.has_part());
1294	}
1295	}
1296
1297	#define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1298	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1299	TEST_EXPECT_EQUAL(result, expected); \
1300	free(result); \
1301	} while(0)
1302
1303	#define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1304	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1305	TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1306	free(result); \
1307	} while(0)
1308
1309	void TEST_merge_tagged_strings() {
1310	// merge two fields:
1311	const char *_0 = NULp;
1312
1313	TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1314	TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1315	TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1316	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1317
1318	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1319	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1320
1321	// update fields:
1322	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1323	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1324	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1325	TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1326	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1327
1328	// append (opposed to update this keeps old entries with same tag; useless?)
1329	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1330	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1331	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1332
1333	// merge three fields:
1334	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1335	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1336	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1337	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1338	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1339
1340	// same tests as in section above, but vv:
1341	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1342	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1343	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1344	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1345	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1346
1347	// test real-merges (content existing in both strings):
1348	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1349	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1350	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1351	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1352	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1353	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1354	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1355	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1356	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1357
1358	// merge two tagged string with deleting
1359	#define DSTSRC1 "[DST] dest1 [SRC] src1"
1360	#define DSTSRC2 "[DST] dest2 [SRC] src2"
1361	#define DSTSRC2LOW "[dst] dest2 [src] src2"
1362
1363	TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1364	TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1365	TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1366	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1367	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1368	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1369	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1370	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1371	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1372	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1373	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1374	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1375	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1376	TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1377	}
1378
1379	__ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1380	GB_shell shell;
1381	GBDATA *gb_main = GB_open("new.arb", "c");
1382	{
1383	GB_transaction ta(gb_main);
1384
1385	{
1386	GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1387	TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1388	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1389
1390	GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1391	GB_UINT4 ints[] = { 1, 2 };
1392	TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1393	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1394	}
1395
1396	#define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1397	#define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1398	#define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1399
1400	GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1401	const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1402	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1403
1404	TEST_EXPECT_FULL_CONTENT(NULp);
1405	TEST_EXPECT_FULL_CONTENT("");
1406	TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1407
1408	TEST_EXPECT_TAG_CONTENT("T1", "t12");
1409	TEST_EXPECT_TAG_CONTENT("T2", "t12");
1410	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1411	TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1412
1413	TEST_EXPECT_TAG_CONTENT("AA", "aa");
1414	TEST_EXPECT_TAG_CONTENT("BB", "bb");
1415	TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1416	TEST_EXPECT_TAG_CONTENT("XX", "x1");
1417	TEST_EXPECT_TAG_CONTENT("YY", "yy");
1418	TEST_EXPECT_TAG_CONTENT("yy", "yy");
1419
1420	TEST_REJECT_TAG_CONTENT("Y");
1421	// TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1422
1423	TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1424	TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1425	TEST_EXPECT_TAG_CONTENT("fake", "fake");
1426
1427	TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1428	TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1429	TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1430	TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1431	TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1432	TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1433	TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1434	TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1435
1436	TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1437	TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1438	TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1439	TEST_EXPECT_TAG_CONTENT("LAST", "last");
1440
1441	// test incomplete tags
1442	tagged_string = "bla [WHATEVER hello";
1443	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1444	TEST_REJECT_TAG_CONTENT("WHATEVER");
1445
1446	tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1447	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1448	TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1449	TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1450	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1451
1452	// test pathological tags
1453	tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1454	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1455	TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1456	TEST_EXPECT_FULL_CONTENT("");
1457	TEST_REJECT_TAG_CONTENT(" ");
1458	TEST_REJECT_TAG_CONTENT(" ");
1459	TEST_REJECT_TAG_CONTENT(",");
1460	TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1461	TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1462	TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1463	TEST_EXPECT_TAG_CONTENT(" ", "us");
1464	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1465	}
1466	GB_close(gb_main);
1467	}
1468
1469	#define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1470	TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1471	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1472	expected); \
1473	}while(0)
1474
1475	#define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1476	TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1477	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1478	expectedErrorPart); \
1479	}while(0)
1480
1481	__ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1482	GB_shell shell;
1483	GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1484	{
1485	GB_transaction ta(gb_main);
1486	GBL_env env(gb_main, "tree_missing");
1487
1488	{
1489	GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1490	TEST_REJECT_NULL(gb_species);
1491	GBL_call_env callEnv(gb_species, env);
1492
1493	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1494	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1495	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1496	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1497	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1498
1499	// empty tags:
1500	TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1501	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1502	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1503	TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1504	TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1505
1506	#define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1507	#define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1508
1509	// dont eval:
1510	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1511	// eval SRT:
1512	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1513	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1514	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1515	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1516	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1517	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":=-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1518	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1519	// eval ACI:
1520	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1521	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1522
1523	// test SRT/ACI errors:
1524	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1525	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1526	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1527	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1528
1529	// no error raised, if expression not applied:
1530	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1531
1532	// incomplete tags
1533	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":=<>", "[DEF] <{no tag>");
1534	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,{no>");
1535	TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* =<2,*1>", "[DEF] <{tag,{no>");
1536	TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* =<2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1537	TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* =<2,*1>", "[DEF] <{tag{},{no>");
1538	TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* =<2,*1>", "[DEF] <{tag{} xx,{no>");
1539	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,no>");
1540	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1541	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1542	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1543	TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1544	TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":=<>", "[DEF] <kept {trunk>");
1545	TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":=<>", "[DEF] <kept>");
1546	}
1547
1548	{
1549	GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1550	TEST_REJECT_NULL(gb_species);
1551	GBL_call_env callEnv(gb_species, env);
1552
1553	// run scripts using context:
1554	TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1555	TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(\|sequence\|len)", "[T1] seqlen=$l [T2] seqlen=165");
1556	TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence\|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1557
1558	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(\|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1559	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1560
1561	// content before 1st tag:
1562	TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1563	TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1564
1565	// test elimination of leading/trailing whitespace:
1566	TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1567	TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1568	TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1569
1570	#define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1571
1572	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1573	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1574	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1575	}
1576	}
1577	GB_close(gb_main);
1578	}
1579
1580	void TEST_log_action() {
1581	for (int stamped = 0; stamped<=1; ++stamped) {
1582	TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1583	{
1584	char *logged = GBS_log_action_to("comment", "action", stamped);
1585	if (stamped) {
1586	TEST_EXPECT_CONTAINS(logged, "comment\n");
1587	TEST_EXPECT_CONTAINS(logged, "action\n");
1588	}
1589	else {
1590	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1591	}
1592	free(logged);
1593	}
1594	{
1595	char *logged = GBS_log_action_to("comment\n", "action", stamped);
1596	if (stamped) {
1597	TEST_EXPECT_CONTAINS(logged, "comment\n");
1598	TEST_EXPECT_CONTAINS(logged, "action\n");
1599	}
1600	else {
1601	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1602	}
1603	free(logged);
1604	}
1605	{
1606	char *logged = GBS_log_action_to("", "action", stamped);
1607	if (stamped) {
1608	TEST_EXPECT_EQUAL(logged[0], '\n');
1609	TEST_EXPECT_CONTAINS(logged, "action\n");
1610	}
1611	else {
1612	TEST_EXPECT_EQUAL(logged, "\naction\n");
1613	}
1614	free(logged);
1615	}
1616	{
1617	char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1618	if (stamped) {
1619	TEST_EXPECT_DIFFERENT(logged[0], '\n');
1620	TEST_EXPECT_CONTAINS(logged, "action\n");
1621	}
1622	else {
1623	TEST_EXPECT_EQUAL(logged, "action\n");
1624	}
1625	free(logged);
1626	}
1627	}
1628	}
1629	TEST_PUBLISH(TEST_log_action);
1630
1631	#endif // UNIT_TESTS
1632

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/ARBDB/adstring.cxx

Download in other formats: