Context Navigation

source: tags/ms_r18q1/ARBDB/adstring.cxx

Visit:

Last change on this file was 16766, checked in by westram, 6 years ago
reintegrates 'gcc' into 'trunk' mostly cosmetics changes adds: log:branches/gcc@16655,16741:16743,16752:16765
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 64.5 KB

Line
1	// =============================================================== //
2	// //
3	// File : adstring.cxx //
4	// Purpose : various string functions //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11	#include <arb_backtrace.h>
12	#include <arb_strbuf.h>
13	#include <arb_defs.h>
14	#include <arb_str.h>
15
16	#include "gb_key.h"
17	#include "gb_aci.h"
18
19	#include <SigHandler.h>
20
21	#include <execinfo.h>
22
23	#include <cstdarg>
24	#include <cctype>
25	#include <cerrno>
26	#include <ctime>
27	#include <setjmp.h>
28
29	#include <valgrind.h>
30
31	static char GBS_string_2_key_with_exclusions(const char str, const char *additional) {
32	// converts any string to a valid key (all chars in 'additional' are additionally allowed)
33	char buf[GB_KEY_LEN_MAX+1];
34	int i;
35	int c;
36	for (i=0; i<GB_KEY_LEN_MAX;) {
37	c = *(str++);
38	if (!c) break;
39
40	if (c==' ' \|\| c == '_') {
41	buf[i++] = '_';
42	}
43	else if (isalnum(c) \|\| strchr(additional, c)) {
44	buf[i++] = c;
45	}
46	}
47	for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
48	buf[i] = 0;
49	return ARB_strdup(buf);
50	}
51
52	char GBS_string_2_key(const char str) { // converts any string to a valid key
53	return GBS_string_2_key_with_exclusions(str, "");
54	}
55
56	char GB_memdup(const char source, size_t len) {
57	char *dest = ARB_alloc<char>(len);
58	memcpy(dest, source, len);
59	return dest;
60	}
61
62	GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
63	// test whether all characters are letters, numbers or _
64	int i;
65	long len;
66
67	if (!key \|\| key[0] == 0) return "Empty key is not allowed";
68	len = strlen(key);
69	if (len>GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
70	if (len < GB_KEY_LEN_MIN) return GBS_global_string("Invalid key '%s': too short", key);
71
72	for (i = 0; key[i]; ++i) {
73	char c = key[i];
74	if ((c>='a') && (c<='z')) continue;
75	if ((c>='A') && (c<='Z')) continue;
76	if ((c>='0') && (c<='9')) continue;
77	if (c=='_') continue;
78	return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
79	}
80
81	return NULp;
82	}
83
84	GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
85	// test whether all characters are letters, numbers or _
86	// additionally allow '/' and '->' for hierarchical keys
87	GB_ERROR err = NULp;
88
89	if (!key \|\| key[0] == 0) {
90	err = "Empty key is not allowed";
91	}
92	else if (!strpbrk(key, "/-")) {
93	err = GB_check_key(key);
94	}
95	else {
96	char *key_copy = ARB_strdup(key);
97	char *start = key_copy;
98
99	if (start[0] == '/') ++start;
100
101	while (start && !err) {
102	char *key_end = strpbrk(start, "/-");
103
104	if (key_end) {
105	char c = *key_end;
106	*key_end = 0;
107	err = GB_check_key(start);
108	*key_end = c;
109
110	if (c == '-') {
111	if (key_end[1] != '>') {
112	err = GBS_global_string("'>' expected after '-' in '%s'", key);
113	}
114	start = key_end+2;
115	}
116	else {
117	gb_assert(c == '/');
118	start = key_end+1;
119	}
120	}
121	else {
122	err = GB_check_key(start);
123	start = NULp;
124	}
125	}
126
127	free(key_copy);
128	}
129
130	return err;
131	}
132
133	// ----------------------------------------------
134	// escape/unescape characters in strings
135
136	char GBS_escape_string(const char str, const char *chars_to_escape, char escape_char) {
137	/*! escape characters in 'str'
138	*
139	* uses a special escape-method, which eliminates all 'chars_to_escape' completely
140	* from str (this makes further processing of the string more easy)
141	*
142	* @param str string to escape
143	*
144	* @param escape_char is the character used for escaping. For performance reasons it
145	* should be a character rarely used in 'str'.
146	*
147	* @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping)
148	* and it may not be longer than 26 bytes
149	*
150	* @return heap copy of escaped string
151	*
152	* Inverse of GBS_unescape_string()
153	*/
154
155	int len = strlen(str);
156	char buffer = ARB_alloc<char>(2len+1);
157	int j = 0;
158	int i;
159
160	gb_assert(strlen(chars_to_escape) <= 26);
161	gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape
162
163	for (i = 0; str[i]; ++i) {
164	if (str[i] == escape_char) {
165	buffer[j++] = escape_char;
166	buffer[j++] = escape_char;
167	}
168	else {
169	const char *found = strchr(chars_to_escape, str[i]);
170	if (found) {
171	buffer[j++] = escape_char;
172	buffer[j++] = (found-chars_to_escape+'A');
173
174	gb_assert(found[0]<'A' \|\| found[0]>'Z'); // illegal character in chars_to_escape
175	}
176	else {
177
178	buffer[j++] = str[i];
179	}
180	}
181	}
182	buffer[j] = 0;
183
184	return buffer;
185	}
186
187	char GBS_unescape_string(const char str, const char *escaped_chars, char escape_char) {
188	//! inverse of GB_escape_string() - for params see there
189
190	int len = strlen(str);
191	char *buffer = ARB_alloc<char>(len+1);
192	int j = 0;
193	int i;
194
195	#if defined(ASSERTION_USED)
196	int escaped_chars_len = strlen(escaped_chars);
197	#endif // ASSERTION_USED
198
199	gb_assert(strlen(escaped_chars) <= 26);
200	gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape
201
202	for (i = 0; str[i]; ++i) {
203	if (str[i] == escape_char) {
204	if (str[i+1] == escape_char) {
205	buffer[j++] = escape_char;
206	}
207	else {
208	int idx = str[i+1]-'A';
209
210	gb_assert(idx >= 0 && idx<escaped_chars_len);
211	buffer[j++] = escaped_chars[idx];
212	}
213	++i;
214	}
215	else {
216	buffer[j++] = str[i];
217	}
218	}
219	buffer[j] = 0;
220
221	return buffer;
222	}
223
224	char *GBS_eval_env(GB_CSTR p) {
225	GB_ERROR error = NULp;
226	GB_CSTR ka;
227	GBS_strstruct *out = GBS_stropen(1000);
228
229	while ((ka = GBS_find_string(p, "$(", 0))) {
230	GB_CSTR kz = strchr(ka, ')');
231	if (!kz) {
232	error = GBS_global_string("missing ')' for envvar '%s'", p);
233	break;
234	}
235	else {
236	char *envvar = ARB_strpartdup(ka+2, kz-1);
237	int len = ka-p;
238
239	if (len) GBS_strncat(out, p, len);
240
241	GB_CSTR genv = GB_getenv(envvar);
242	if (genv) GBS_strcat(out, genv);
243
244	p = kz+1;
245	free(envvar);
246	}
247	}
248
249	if (error) {
250	GB_export_error(error);
251	GBS_strforget(out);
252	return NULp;
253	}
254
255	GBS_strcat(out, p); // copy rest
256	return GBS_strclose(out);
257	}
258
259	long GBS_gcgchecksum(const char *seq) {
260	// GCGchecksum
261	long i;
262	long check = 0;
263	long count = 0;
264	long seqlen = strlen(seq);
265
266	for (i = 0; i < seqlen; i++) {
267	count++;
268	check += count * toupper(seq[i]);
269	if (count == 57) count = 0;
270	}
271	check %= 10000;
272
273	return check;
274	}
275
276	// Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
277	uint32_t crctab[] = {
278	0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
279	0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
280	0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
281	0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
282	0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
283	0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
284	0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
285	0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
286	0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
287	0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
288	0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
289	0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
290	0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
291	0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
292	0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
293	0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
294	0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
295	0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
296	0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
297	0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
298	0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
299	0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
300	0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
301	0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
302	0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
303	0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
304	0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
305	0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
306	0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
307	0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
308	0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
309	0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
310	0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
311	0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
312	0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
313	0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
314	0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
315	0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
316	0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
317	0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
318	0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
319	0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
320	0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
321	0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
322	0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
323	0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
324	0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
325	0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
326	0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
327	0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
328	0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
329	0x2d02ef8dL
330	};
331
332	uint32_t GB_checksum(const char seq, long length, int ignore_case, const char exclude) {
333	/* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
334	* if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
335	*/
336
337	unsigned long c = 0xffffffffL;
338	long n = length;
339	int i;
340	int tab[256]; // @@@ avoid recalc for each call
341
342	for (i=0; i<256; i++) {
343	tab[i] = ignore_case ? toupper(i) : i;
344	}
345
346	if (exclude) {
347	while (1) {
348	int k = (unsigned char )exclude++;
349	if (!k) break;
350	tab[k] = 0;
351	if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
352	}
353	}
354
355	while (n--) {
356	i = tab[(const unsigned char )seq++];
357	if (i) {
358	c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
359	}
360	}
361	c = c ^ 0xffffffffL;
362	return c;
363	}
364
365	uint32_t GBS_checksum(const char seq, int ignore_case, const char exclude) {
366	// if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
367	return GB_checksum(seq, strlen(seq), ignore_case, exclude);
368	}
369
370	size_t GBS_shorten_repeated_data(char *data) {
371	// shortens repeats in 'data'
372	// This function modifies 'data'!!
373	// e.g. "..............................ACGT....................TGCA"
374	// -> ".{30}ACGT.{20}TGCA"
375
376	#if defined(DEBUG)
377	size_t orgLen = strlen(data);
378	#endif // DEBUG
379	char *dataStart = data;
380	char *dest = data;
381	size_t repeat = 1;
382	char last = *data++;
383
384	while (last) {
385	char curr = *data++;
386	if (curr == last) {
387	repeat++;
388	}
389	else {
390	if (repeat >= 5) {
391	dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
392	}
393	else {
394	size_t r;
395	for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
396	}
397	last = curr;
398	repeat = 1;
399	}
400	}
401
402	*dest = 0;
403
404	#if defined(DEBUG)
405
406	gb_assert(strlen(dataStart) <= orgLen);
407	#endif // DEBUG
408	return dest-dataStart;
409	}
410
411
412	// ------------------------------------------
413	// helper classes for tagged fields
414
415	class TextRef {
416	const char *data; // has no terminal zero-byte!
417	int length;
418
419	public:
420	TextRef() : data(NULp), length(-1) {}
421	TextRef(const char *data_, int length_) : data(data_), length(length_) {}
422	explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {}
423
424	bool defined() const { return data && length>0; }
425	const char *get_data() const { return data; }
426	int get_length() const { return length; }
427
428	const char *get_following() const { return data ? data+length : NULp; }
429
430	int compare(const char *str) const {
431	gb_assert(defined());
432	int cmp = strncmp(get_data(), str, get_length());
433	if (!cmp) {
434	if (str[get_length()]) {
435	cmp = -1; // right side contains more content
436	}
437	}
438	return cmp;
439	}
440	int icompare(const char *str) const {
441	gb_assert(defined());
442	int cmp = strncasecmp(get_data(), str, get_length());
443	if (!cmp) {
444	if (str[get_length()]) {
445	cmp = -1; // right side contains more content
446	}
447	}
448	return cmp;
449	}
450	char *copy() const { return ARB_strndup(get_data(), get_length()); }
451
452	char head() const { return defined() ? data[0] : 0; }
453	char tail() const { return defined() ? data[length-1] : 0; }
454
455	TextRef headTrimmed() const {
456	if (defined()) {
457	for (int s = 0; s<length; ++s) {
458	if (!isspace(data[s])) {
459	return TextRef(data+s, length-s);
460	}
461	}
462	}
463	return TextRef();
464	}
465	TextRef tailTrimmed() const {
466	if (defined()) {
467	for (int s = length-1; s>=0; --s) {
468	if (!isspace(data[s])) {
469	return TextRef(data, s+1);
470	}
471	}
472	}
473	return TextRef();
474	}
475
476	TextRef trimmed() const {
477	return headTrimmed().tailTrimmed();
478	}
479
480	inline TextRef partBefore(const TextRef& subref) const;
481	inline TextRef partBehind(const TextRef& subref) const;
482
483	bool is_part_of(const TextRef& other) const {
484	gb_assert(defined() && other.defined());
485	return get_data()>=other.get_data() && get_following()<=other.get_following();
486	}
487
488	const char find(char c) const { return reinterpret_cast<const char>(memchr(get_data(), c, get_length())); }
489	};
490
491	TextRef textBetween(const TextRef& t1, const TextRef& t2) {
492	const char *behind_d1 = t1.get_following();
493	const char *d2 = t2.get_data();
494
495	if (behind_d1 && d2 && behind_d1<d2) {
496	return TextRef(behind_d1, d2-behind_d1);
497	}
498	return TextRef();
499	}
500
501	inline TextRef TextRef::partBefore(const TextRef& subref) const {
502	gb_assert(subref.is_part_of(*this));
503	return textBetween(TextRef(get_data(), 0), subref);
504	}
505	inline TextRef TextRef::partBehind(const TextRef& subref) const {
506	gb_assert(subref.is_part_of(*this));
507	return TextRef(subref.get_following(), get_following()-subref.get_following());
508	}
509
510	class TaggedContentParser {
511	TextRef wholeInput;
512	TextRef tag, content; // current position
513	TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]")
514	TextRef nextBrackets; // next "[..]" part (behind current tag)
515
516	void findBrackets(const char *in) {
517	nextBrackets = TextRef();
518	const char *tag_start = strchr(in, '[');
519	if (tag_start) {
520	const char *tag_end = strchr(tag_start, ']');
521	if (tag_end) {
522	if (tag_end == tag_start+1) { // empty tag -> use as content
523	findBrackets(tag_end+1);
524	}
525	else {
526	const char unwanted_bracket = reinterpret_cast<const char>(memchr(tag_start+1, '[', tag_end-tag_start-1));
527	if (unwanted_bracket) { // tagname contains '[' -> step to next bracket
528	findBrackets(unwanted_bracket);
529	}
530	else {
531	TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed();
532	if (name.defined()) { // not only whitespace inside brackets
533	nextBrackets = TextRef(tag_start, tag_end-tag_start+1);
534	}
535	else {
536	findBrackets(tag_end+1);
537	}
538	}
539	}
540	}
541	}
542	}
543
544	void parse_next_multi_tag() {
545	gb_assert(restTags.defined());
546	TextRef comma(restTags.find(','), 1);
547	if (comma.defined()) {
548	tag = restTags.partBefore(comma).tailTrimmed();
549	restTags = restTags.partBehind(comma).headTrimmed();
550	}
551	else {
552	tag = restTags;
553	restTags = TextRef();
554	}
555	}
556	void parse_next() {
557	if (restTags.defined()) {
558	parse_next_multi_tag();
559	}
560	else if (nextBrackets.defined()) {
561	TextRef brackets = nextBrackets;
562	findBrackets(brackets.get_following());
563
564	content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed();
565
566	gb_assert(brackets.head() == '[' && brackets.tail() == ']');
567
568	TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed();
569	gb_assert(tags.defined());
570
571	restTags = tags;
572	parse_next_multi_tag();
573	}
574	else {
575	tag = content = TextRef();
576	gb_assert(!has_part());
577	}
578	}
579	void parse_first() {
580	gb_assert(!has_part());
581	findBrackets(wholeInput.get_data());
582	content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed();
583	if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag
584	}
585
586	public:
587	TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); }
588
589	bool has_tag() const { return tag.defined(); }
590	bool has_content() const { return content.defined(); }
591
592	void next() { parse_next(); }
593	bool has_part() const { return has_tag() \|\| has_content(); } // false -> parser has finished
594
595	const TextRef& get_tag() const { return tag; }
596	const TextRef& get_content() const { return content; }
597	};
598
599
600	// -------------------------------------------
601	// helper function for tagged fields
602
603	static void g_bs_add_value_tag_to_hash(GB_HASH hash, const char tag, char *value) {
604	if (!value[0]) return; // ignore empty values
605
606	{
607	char *p;
608	p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
609	p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
610	}
611
612	GB_HASH sh = (GB_HASH )GBS_read_hash(hash, value);
613	if (!sh) {
614	sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
615	GBS_write_hash(hash, value, (long)sh);
616	}
617	GBS_write_hash(sh, tag, 1);
618	}
619
620	static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH hash, char s, char default_tag, const char del) {
621	TaggedContentParser parser(s);
622	while (parser.has_part()) {
623	if (parser.has_content()) {
624	char *content = parser.get_content().copy();
625	if (parser.has_tag()) {
626	char *tag = parser.get_tag().copy();
627	if (!del \|\| ARB_stricmp(tag, del) != 0) {
628	g_bs_add_value_tag_to_hash(hash, tag, content);
629	}
630	free(tag);
631	}
632	else {
633	g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag
634	}
635	free(content);
636	}
637	parser.next();
638	}
639	}
640
641	static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH hash, char s, char default_tag, const char rtag, const char *aci, GBL_call_env& env) {
642	GB_ERROR error = NULp;
643
644	TaggedContentParser parser(s);
645	while (parser.has_part() && !error) {
646	if (parser.has_content()) {
647	char *value = parser.get_content().copy();
648	char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag);
649
650	if (rtag && ARB_stricmp(tag, rtag) == 0) {
651	freeset(value, GB_command_interpreter_in_env(value, aci, env));
652	if (!value) error = GB_await_error();
653	}
654
655	if (!error) g_bs_add_value_tag_to_hash(hash, tag, value);
656
657	free(tag);
658	free(value);
659	}
660	parser.next();
661	}
662
663	return error;
664	}
665
666	static long g_bs_merge_tags(const char tag, long val, void cd_sub_result) {
667	GBS_strstruct sub_result = (GBS_strstruct)cd_sub_result;
668
669	GBS_strcat(sub_result, tag);
670	GBS_strcat(sub_result, ",");
671
672	return val;
673	}
674
675	static long g_bs_read_tagged_hash(const char value, long subhash, void cd_g_bs_collect_tags_hash) {
676	static int counter = 0;
677
678	GBS_strstruct *sub_result = GBS_stropen(100);
679	GBS_hash_do_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result);
680	GBS_intcat(sub_result, counter++); // create a unique number
681
682	char *str = ARB_strupper(GBS_strclose(sub_result));
683
684	GB_HASH g_bs_collect_tags_hash = (GB_HASH)cd_g_bs_collect_tags_hash;
685	GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting
686
687	free(str);
688	return subhash;
689	}
690
691	static long g_bs_read_final_hash(const char tag, long value, void cd_merge_result) {
692	GBS_strstruct merge_result = (GBS_strstruct)cd_merge_result;
693
694	char lk = const_cast<char>(strrchr(tag, ','));
695	if (lk) { // remove number at end
696	*lk = 0;
697
698	if (!merge_result->empty()) merge_result->put(' '); // skip trailing space
699	merge_result->put('[');
700	merge_result->cat(tag);
701	merge_result->put(']');
702	merge_result->put(' ');
703	}
704	merge_result->cat((char*)value);
705	return value;
706	}
707
708	static char g_bs_get_string_of_tag_hash(GB_HASH tag_hash) {
709	GBS_strstruct *merge_result = GBS_stropen(256);
710	GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free);
711
712	GBS_hash_do_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
713	GBS_hash_do_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result);
714
715	GBS_free_hash(collect_tags_hash);
716	return GBS_strclose(merge_result);
717	}
718
719	static long g_bs_free_hash_of_hashes_elem(const char /key/, long val, void ) {
720	GB_HASH hash = (GB_HASH)val;
721	if (hash) GBS_free_hash(hash);
722	return 0;
723	}
724	static void g_bs_free_hash_of_hashes(GB_HASH *hash) {
725	GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULp);
726	GBS_free_hash(hash);
727	}
728
729	char GBS_merge_tagged_strings(const char s1, const char tag1, const char replace1, const char s2, const char tag2, const char *replace2) {
730	/* Create a tagged string from two tagged strings:
731	* a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
732	*
733	* if 's2' is not empty, then delete tag 'replace1' in 's1'
734	* if 's1' is not empty, then delete tag 'replace2' in 's2'
735	*
736	* (result should never be NULp)
737	*/
738
739	char *str1 = ARB_strdup(s1);
740	char *str2 = ARB_strdup(s2);
741	char *t1 = GBS_string_2_key(tag1);
742	char *t2 = GBS_string_2_key(tag2);
743	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
744
745	if (!s1[0]) replace2 = NULp;
746	if (!s2[0]) replace1 = NULp;
747
748	if (replace1 && !replace1[0]) replace1 = NULp;
749	if (replace2 && !replace2[0]) replace2 = NULp;
750
751	g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1);
752	g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2);
753
754	char *result = g_bs_get_string_of_tag_hash(hash);
755
756	g_bs_free_hash_of_hashes(hash);
757
758	free(t2);
759	free(t1);
760	free(str2);
761	free(str1);
762
763	return result;
764	}
765
766	char GBS_modify_tagged_string_with_ACI(const char s, const char dt, const char tag, const char *aci, GBL_call_env& env) {
767	/* if 's' is untagged, tag it with default tag 'dt'.
768	* if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]')
769	*
770	* if result is NULp, an error has been exported.
771	*/
772
773	char *str = ARB_strdup(s);
774	char *default_tag = GBS_string_2_key(dt);
775	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
776	char *result = NULp;
777
778	GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env);
779
780	if (!error) {
781	result = g_bs_get_string_of_tag_hash(hash);
782	}
783	else {
784	GB_export_error(error);
785	}
786
787	g_bs_free_hash_of_hashes(hash);
788
789	free(default_tag);
790	free(str);
791
792	return result;
793	}
794
795	char GB_read_as_tagged_string(GBDATA gbd, const char *tagi) {
796	char *buf = GB_read_as_string(gbd);
797	if (buf && tagi && tagi[0]) {
798	TaggedContentParser parser(buf);
799
800	char *wantedTag = GBS_string_2_key(tagi);
801	char *contentFound = NULp;
802
803	while (parser.has_part() && !contentFound) {
804	if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) {
805	contentFound = parser.get_content().copy();
806	}
807	parser.next();
808	}
809	free(wantedTag);
810	free(buf);
811
812	return contentFound;
813	}
814	return buf;
815	}
816
817
818	/* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
819	* used as well to save perl macros
820	*
821	* when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
822	*
823	* always keep in mind, that many users have databases/macros written with older
824	* versions of this function. They MUST load proper!!!
825	*/
826	void GBS_fwrite_string(const char strngi, FILE out) {
827	unsigned char strng = (unsigned char )strngi;
828	int c;
829
830	putc('"', out);
831
832	while ((c = *strng++)) {
833	if (c < 32) {
834	putc('\\', out);
835	if (c == '\n')
836	putc('n', out);
837	else if (c == '\t')
838	putc('t', out);
839	else if (c<25) {
840	putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
841	}
842	else {
843	putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
844	}
845	}
846	else if (c == '"') {
847	putc('\\', out);
848	putc('"', out);
849	}
850	else if (c == '\\') {
851	putc('\\', out);
852	putc('\\', out);
853	}
854	else {
855	putc(c, out);
856	}
857	}
858	putc('"', out);
859	}
860
861	/* Read a string from a file written by GBS_fwrite_string,
862	* Searches first '"'
863	*
864	* WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
865	* any changes should be done in GBS_fconvert_string too.
866	*/
867
868	static char GBS_fread_string(FILE in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
869	GBS_strstruct *strstr = GBS_stropen(1024);
870	int x;
871
872	while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
873
874	if (x != EOF) {
875	while ((x = getc(in)) != '"') {
876	if (x == EOF) break;
877	if (x == '\\') {
878	x = getc(in); if (x==EOF) break;
879	if (x == 'n') {
880	GBS_chrcat(strstr, '\n');
881	continue;
882	}
883	if (x == 't') {
884	GBS_chrcat(strstr, '\t');
885	continue;
886	}
887	if (x>='@' && x <= '@' + 25) {
888	GBS_chrcat(strstr, x-'@');
889	continue;
890	}
891	if (x>='0' && x <= '9') {
892	GBS_chrcat(strstr, x-('0'-25));
893	continue;
894	}
895	// all other backslashes are simply skipped
896	}
897	GBS_chrcat(strstr, x);
898	}
899	}
900	return GBS_strclose(strstr);
901	}
902
903	/* does similar decoding as GBS_fread_string but works directly on an existing buffer
904	* (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
905	*
906	* inserts \0 behind decoded string (removes the closing '"')
907	* returns a pointer behind the end (") of the _encoded_ string
908	* returns NULp if a 0-character is found
909	*/
910	char GBS_fconvert_string(char buffer) {
911	char *t = buffer;
912	char *f = buffer;
913	int x;
914
915	gb_assert(f[-1] == '"');
916	// the opening " has already been read
917
918	while ((x = *f++) != '"') {
919	if (!x) break;
920
921	if (x == '\\') {
922	x = *f++;
923	if (!x) break;
924
925	if (x == 'n') {
926	*t++ = '\n';
927	continue;
928	}
929	if (x == 't') {
930	*t++ = '\t';
931	continue;
932	}
933	if (x>='@' && x <= '@' + 25) {
934	*t++ = x-'@';
935	continue;
936	}
937	if (x>='0' && x <= '9') {
938	*t++ = x-('0'-25);
939	continue;
940	}
941	// all other backslashes are simply skipped
942	}
943	*t++ = x;
944	}
945
946	if (!x) return NULp; // error (string should not contain 0-character)
947	gb_assert(x == '"');
948
949	t[0] = 0;
950	return f;
951	}
952
953	char GBS_replace_tabs_by_spaces(const char text) {
954	int tlen = strlen(text);
955	GBS_strstruct mfile = GBS_stropen(tlen 3/2 + 1);
956	int tabpos = 0;
957	int c;
958
959	while ((c=*(text++))) {
960	if (c == '\t') {
961	int ntab = (tabpos + 8) & 0xfffff8;
962	while (tabpos < ntab) {
963	GBS_chrcat(mfile, ' ');
964	tabpos++;
965	}
966	continue;
967	}
968	tabpos ++;
969	if (c == '\n') {
970	tabpos = 0;
971	}
972	GBS_chrcat(mfile, c);
973	}
974	return GBS_strclose(mfile);
975	}
976
977	char GBS_trim(const char str) {
978	// trim whitespace at beginning and end of 'str'
979	const char *whitespace = " \t\n";
980	while (str[0] && strchr(whitespace, str[0])) str++;
981
982	const char *end = strchr(str, 0)-1;
983	while (end >= str && strchr(whitespace, end[0])) end--;
984
985	return ARB_strpartdup(str, end);
986	}
987
988	static char dated_info(const char info) {
989	char *dated_info = NULp;
990	time_t date;
991
992	if (time(&date) != -1) {
993	char *dstr = ctime(&date);
994	char *nl = strchr(dstr, '\n');
995
996	if (nl) nl[0] = 0; // cut off LF
997
998	dated_info = GBS_global_string_copy("%s: %s", dstr, info);
999	}
1000	else {
1001	dated_info = ARB_strdup(info);
1002	}
1003	return dated_info;
1004	}
1005
1006	char GBS_log_action_to(const char comment, const char *action, bool stamp) {
1007	/*! concatenates 'comment' and 'action'.
1008	* '\n' is appended to existing 'comment' and/or 'action' (if missing).
1009	* @param comment may be NULp (=> result is 'action')
1010	* @param action may NOT be NULp
1011	* @param stamp true -> prefix current timestamp in front of 'action'
1012	* @return heap copy of concatenation
1013	*/
1014	size_t clen = comment ? strlen(comment) : 0;
1015	size_t alen = strlen(action);
1016
1017	GBS_strstruct new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2\n + \0 + space for stamp
1018
1019	if (comment) {
1020	GBS_strcat(new_comment, comment);
1021	if (clen == 0 \|\| comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
1022	}
1023
1024	if (stamp) {
1025	char *dated_action = dated_info(action);
1026	GBS_strcat(new_comment, dated_action);
1027	free(dated_action);
1028	}
1029	else {
1030	GBS_strcat(new_comment, action);
1031	}
1032	if (alen == 0 \|\| action[alen-1] != '\n') GBS_chrcat(new_comment, '\n');
1033
1034	return GBS_strclose(new_comment);
1035	}
1036
1037	const char GBS_funptr2readable(void funptr, bool stripARBHOME) {
1038	// only returns module and offset for static functions :-(
1039	char **funNames = backtrace_symbols(&funptr, 1);
1040	const char *readable_fun = funNames[0];
1041
1042	if (stripARBHOME) {
1043	const char *ARBHOME = GB_getenvARBHOME();
1044	if (ARB_strBeginsWith(readable_fun, ARBHOME)) {
1045	readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME
1046	}
1047	}
1048	return readable_fun;
1049	}
1050
1051	// --------------------------------------------------------------------------------
1052
1053	#ifdef UNIT_TESTS
1054
1055	#include <test_unit.h>
1056
1057	// #define TEST_TEST_MACROS
1058
1059	#ifdef ENABLE_CRASH_TESTS
1060	static void provokesegv() { raise(SIGSEGV); }
1061	static void dont_provokesegv() {}
1062	# if defined(ASSERTION_USED)
1063	static void failassertion() { gb_assert(0); }
1064	# if defined(TEST_TEST_MACROS)
1065	static void dont_failassertion() {}
1066	# endif
1067	static void provokesegv_does_not_fail_assertion() {
1068	// provokesegv does not raise assertion
1069	// -> the following assertion fails
1070	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv);
1071	}
1072	# endif
1073	#endif
1074
1075	void TEST_signal_tests() {
1076	// check whether we can test that no SEGV or assertion failure happened
1077	TEST_EXPECT_NO_SEGFAULT(dont_provokesegv);
1078
1079	// check whether we can test for SEGV and assertion failures
1080	TEST_EXPECT_SEGFAULT(provokesegv);
1081	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1082
1083	// tests whether signal suppression works multiple times (by repeating tests)
1084	TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion);
1085	TEST_EXPECT_SEGFAULT(provokesegv);
1086
1087	// test whether SEGV can be distinguished from assertion
1088	TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1089
1090	// The following section is disabled, because it will
1091	// provoke test warnings (to test these warnings).
1092	// (enable it when changing any of these TEST_..-macros used here)
1093	#if defined(TEST_TEST_MACROS)
1094	TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv);
1095
1096	TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv);
1097	TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv);
1098	#if defined(ASSERTION_USED)
1099	TEST_EXPECT_SEGFAULT__UNWANTED(failassertion);
1100	#endif
1101
1102	TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion);
1103	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion);
1104	TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion);
1105	#endif
1106	}
1107
1108	#define EXPECT_CONTENT(content) TEST_EXPECT_EQUAL(GBS_mempntr(strstr), content)
1109
1110	void TEST_GBS_strstruct() {
1111	{
1112	GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT("");
1113
1114	GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb");
1115	GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17");
1116	GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_");
1117	GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000");
1118
1119	TEST_EXPECT_EQUAL(GBS_memoffset(strstr), 14);
1120	GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b");
1121	GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter");
1122	GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly");
1123
1124	GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters");
1125	EXPECT_CONTENT("butterfly flutters");
1126
1127	GBS_strforget(strstr);
1128	}
1129	{
1130	// re-alloc smaller
1131	GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT("");
1132	GBS_strforget(strstr);
1133	}
1134
1135	// trigger downsize of oversized block
1136	for (int i = 0; i<12; ++i) {
1137	GBS_strstruct *strstr = GBS_stropen(10);
1138	GBS_strforget(strstr);
1139	}
1140
1141	{
1142	GBS_strstruct *strstr = GBS_stropen(10);
1143	size_t oldbufsize = strstr->get_buffer_size();
1144	GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer
1145
1146	TEST_EXPECT_DIFFERENT(oldbufsize, strstr->get_buffer_size()); // did we reallocate?
1147	EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx");
1148	GBS_strforget(strstr);
1149	}
1150	}
1151
1152	#define TEST_SHORTENED_EQUALS(Long,Short) do { \
1153	char *buf = ARB_strdup(Long); \
1154	GBS_shorten_repeated_data(buf); \
1155	TEST_EXPECT_EQUAL(buf, Short); \
1156	free(buf); \
1157	} while(0)
1158
1159	void TEST_GBS_shorten_repeated_data() {
1160	TEST_SHORTENED_EQUALS("12345", "12345");
1161	TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1162	TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1163	TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1164	TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1165	TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1166	TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1167	TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1168	TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1169	TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1170	TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1171	TEST_SHORTENED_EQUALS("aabc", "aabc");
1172	TEST_SHORTENED_EQUALS("", "");
1173	}
1174
1175	static const char *hkey_format[] = {
1176	"/%s/bbb/ccc",
1177	"/aaa/%s/ccc",
1178	"/aaa/bbb/%s",
1179	};
1180
1181	inline const char useInHkey(const char fragment, size_t pos) {
1182	return GBS_global_string(hkey_format[pos], fragment);
1183	}
1184
1185	#define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \
1186	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1187	TEST_EXPECT_NO_ERROR(GB_check_hkey(useInHkey(use, i))); \
1188	} \
1189	} while(0)
1190
1191	#define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \
1192	for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \
1193	TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(useInHkey(use, i)), contains); \
1194	} \
1195	} while(0)
1196
1197
1198	void TEST_DB_key_checks() {
1199	// plain keys
1200	const char *shortest = "ab";
1201	const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345";
1202	const char *too_short = shortest+1;
1203	const char *longest = too_long+1;
1204
1205	const char *empty = "";
1206	const char *slash = "sub/key";
1207	const char *comma = "no,key";
1208	const char *minus = "no-key";
1209
1210	TEST_EXPECT_NO_ERROR(GB_check_key(shortest));
1211	TEST_EXPECT_NO_ERROR(GB_check_key(longest));
1212
1213	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short");
1214	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long");
1215	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed");
1216
1217	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character");
1218	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character");
1219	TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character");
1220
1221	// hierarchical keys
1222	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest);
1223	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest);
1224
1225	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short");
1226	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long");
1227	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed");
1228
1229	TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash);
1230	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','");
1231	TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "'>' expected after '-'");
1232	}
1233
1234	#define TEST_STRING2KEY(str,expected) do { \
1235	char *as_key = GBS_string_2_key(str); \
1236	TEST_EXPECT_EQUAL(as_key, expected); \
1237	TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \
1238	free(as_key); \
1239	} while(0)
1240
1241	void TEST_DB_key_generation() {
1242	TEST_STRING2KEY("abc", "abc");
1243	TEST_STRING2KEY("a b c", "a_b_c");
1244
1245	// invalid chars
1246	TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(",
1247	"string_containing_doublequotes_quotes_and_othershit");
1248
1249	// length tests
1250	TEST_STRING2KEY("a", "a_"); // too short
1251	TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long
1252	"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1253	}
1254
1255	void TEST_TaggedContentParser() {
1256	// test helper class TextRef:
1257	TEST_REJECT(TextRef().defined()); // default to undefined
1258	{
1259	TextRef bla("blakjahd", 3);
1260	TEST_EXPECT(bla.defined());
1261	TEST_EXPECT_EQUAL(bla.get_length(), 3);
1262
1263	TEST_EXPECT(bla.compare("bl") > 0);
1264	TEST_EXPECT(bla.compare("bla") == 0);
1265	TEST_EXPECT(bla.compare("blase") < 0);
1266
1267	TextRef spaced(" spaced "+1, 10);
1268	TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0);
1269	TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0);
1270	TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0);
1271	}
1272
1273	const char *text = " untagged [tag] tagged [empty] ";
1274
1275	TextRef cr_untagged(strstr(text, "untagged"), 8);
1276	TextRef cr_tagged (strstr(text, "tagged"), 6);
1277	TextRef tr_tag (strstr(text, "tag"), 3);
1278	TextRef tr_empty (strstr(text, "empty"), 5);
1279
1280	// test TaggedContentParser:
1281	{
1282	TaggedContentParser parser(text);
1283
1284	TEST_EXPECT(parser.has_part());
1285	TEST_REJECT(parser.has_tag());
1286	TEST_EXPECT(parser.get_content().compare("untagged") == 0);
1287
1288	parser.next();
1289
1290	TEST_EXPECT(parser.has_part());
1291	TEST_EXPECT(parser.get_tag ().compare("tag") == 0);
1292	TEST_EXPECT(parser.get_content().compare("tagged") == 0);
1293
1294	parser.next();
1295
1296	TEST_EXPECT(parser.has_part());
1297	TEST_EXPECT(parser.get_tag().compare("empty") == 0);
1298	TEST_REJECT(parser.has_content());
1299
1300	parser.next();
1301
1302	TEST_REJECT(parser.has_part());
1303	}
1304	{ // parse untagged input
1305	TaggedContentParser parser("hi");
1306	TEST_EXPECT(parser.has_part());
1307	TEST_REJECT(parser.has_tag());
1308	TEST_EXPECT(parser.get_content().compare("hi") == 0);
1309	parser.next();
1310	TEST_REJECT(parser.has_part());
1311	}
1312	{ // parse empty input
1313	TaggedContentParser empty(""); TEST_REJECT(empty.has_part());
1314	TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part());
1315	}
1316	{ // parse single tag w/o content
1317	TaggedContentParser parser(" [hello] ");
1318	TEST_EXPECT(parser.has_part());
1319	TEST_EXPECT(parser.get_tag().compare("hello") == 0);
1320	TEST_REJECT(parser.has_content());
1321	parser.next();
1322	TEST_REJECT(parser.has_part());
1323	}
1324	{ // parse multi-tags
1325	TaggedContentParser parser(" [ t1 , t2 ] t");
1326	TEST_EXPECT(parser.has_part());
1327	TEST_EXPECT(parser.get_tag().compare("t1") == 0);
1328	TEST_EXPECT(parser.get_content().compare("t") == 0);
1329	parser.next();
1330	TEST_EXPECT(parser.has_part());
1331	TEST_EXPECT(parser.get_tag().compare("t2") == 0);
1332	TEST_EXPECT(parser.get_content().compare("t") == 0);
1333	parser.next();
1334	TEST_REJECT(parser.has_part());
1335	}
1336	}
1337
1338	#define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \
1339	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1340	TEST_EXPECT_EQUAL(result, expected); \
1341	free(result); \
1342	} while(0)
1343
1344	#define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \
1345	char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \
1346	TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \
1347	free(result); \
1348	} while(0)
1349
1350	void TEST_merge_tagged_strings() {
1351	// merge two fields:
1352	const char *_0 = NULp;
1353
1354	TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source");
1355	TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source");
1356	TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source");
1357	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth");
1358
1359	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted
1360	TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags
1361
1362	// update fields:
1363	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource");
1364	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1365	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource");
1366	TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource");
1367	TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth");
1368
1369	// append (opposed to update this keeps old entries with same tag; useless?)
1370	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source");
1371	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource");
1372	TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source");
1373
1374	// merge three fields:
1375	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source");
1376	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth");
1377	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth");
1378	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source");
1379	TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source");
1380
1381	// same tests as in section above, but vv:
1382	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source");
1383	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth");
1384	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth");
1385	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source");
1386	TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source");
1387
1388	// test real-merges (content existing in both strings):
1389	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2");
1390	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2]
1391	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!)
1392	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times
1393	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled)
1394	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name
1395	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2]
1396	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags
1397	TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags
1398
1399	// merge two tagged string with deleting
1400	#define DSTSRC1 "[DST] dest1 [SRC] src1"
1401	#define DSTSRC2 "[DST] dest2 [SRC] src2"
1402	#define DSTSRC2LOW "[dst] dest2 [src] src2"
1403
1404	TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2");
1405	TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2");
1406	TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2");
1407	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1408	TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1409	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1410	TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1411	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2");
1412	TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2");
1413	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1414	TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1415	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1");
1416	TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1");
1417	TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet)
1418	}
1419
1420	__ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() {
1421	GB_shell shell;
1422	GBDATA *gb_main = GB_open("new.arb", "c");
1423	{
1424	GB_transaction ta(gb_main);
1425
1426	{
1427	GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT);
1428	TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711));
1429	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error
1430
1431	GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS);
1432	GB_UINT4 ints[] = { 1, 2 };
1433	TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2));
1434	TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error
1435	}
1436
1437	#define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected)
1438	#define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag))
1439	#define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string)
1440
1441	GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING);
1442	const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last ";
1443	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1444
1445	TEST_EXPECT_FULL_CONTENT(NULp);
1446	TEST_EXPECT_FULL_CONTENT("");
1447	TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag)
1448
1449	TEST_EXPECT_TAG_CONTENT("T1", "t12");
1450	TEST_EXPECT_TAG_CONTENT("T2", "t12");
1451	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1452	TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx");
1453
1454	TEST_EXPECT_TAG_CONTENT("AA", "aa");
1455	TEST_EXPECT_TAG_CONTENT("BB", "bb");
1456	TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW]
1457	TEST_EXPECT_TAG_CONTENT("XX", "x1");
1458	TEST_EXPECT_TAG_CONTENT("YY", "yy");
1459	TEST_EXPECT_TAG_CONTENT("yy", "yy");
1460
1461	TEST_REJECT_TAG_CONTENT("Y");
1462	// TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI
1463
1464	TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content
1465	TEST_EXPECT_TAG_CONTENT("FAKE", "fake");
1466	TEST_EXPECT_TAG_CONTENT("fake", "fake");
1467
1468	TEST_REJECT_TAG_CONTENT("NOSUCHTAG");
1469	TEST_EXPECT_TAG_CONTENT("SPACED", "spaced");
1470	TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced");
1471	TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag)
1472	TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED')
1473	TEST_EXPECT_TAG_CONTENT("PADDED", "spaced");
1474	TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced");
1475	TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced");
1476
1477	TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced");
1478	TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced");
1479	TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced");
1480	TEST_EXPECT_TAG_CONTENT("LAST", "last");
1481
1482	// test incomplete tags
1483	tagged_string = "bla [WHATEVER hello";
1484	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1485	TEST_REJECT_TAG_CONTENT("WHATEVER");
1486
1487	tagged_string = "bla [T1] t1 [T2 t2 [T3] t3";
1488	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1489	TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2");
1490	TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed
1491	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1492
1493	// test pathological tags
1494	tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a";
1495	TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string));
1496	TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2");
1497	TEST_EXPECT_FULL_CONTENT("");
1498	TEST_REJECT_TAG_CONTENT(" ");
1499	TEST_REJECT_TAG_CONTENT(" ");
1500	TEST_REJECT_TAG_CONTENT(",");
1501	TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a'
1502	TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito
1503	TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito
1504	TEST_EXPECT_TAG_CONTENT(" ", "us");
1505	TEST_EXPECT_TAG_CONTENT("T3", "t3");
1506	}
1507	GB_close(gb_main);
1508	}
1509
1510	#define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \
1511	TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \
1512	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1513	expected); \
1514	}while(0)
1515
1516	#define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \
1517	TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \
1518	GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \
1519	expectedErrorPart); \
1520	}while(0)
1521
1522	__ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() {
1523	GB_shell shell;
1524	GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r");
1525	{
1526	GB_transaction ta(gb_main);
1527	GBL_env env(gb_main, "tree_missing");
1528
1529	{
1530	GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto");
1531	TEST_REJECT_NULL(gb_species);
1532	GBL_call_env callEnv(gb_species, env);
1533
1534	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla");
1535	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla");
1536	TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla");
1537	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub");
1538	TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3");
1539
1540	// empty tags:
1541	TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, "");
1542	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1543	TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1544	TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx");
1545	TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx");
1546
1547	#define THREE_TAGS "[TAG] tag [tip] tip [top] top"
1548	#define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top"
1549
1550	// dont eval:
1551	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE);
1552	// eval SRT:
1553	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1554	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":=<>", "[TAG] <tag> [TIP] tip [TOP] top");
1555	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1556	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":=()", "[TAG] tag [TIP] (tip) [TOP] top");
1557	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result
1558	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":=-*1", "[TAG] tag [TIP] tip [TOP] top-top");
1559	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags
1560	// eval ACI:
1561	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top");
1562	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3");
1563
1564	// test SRT/ACI errors:
1565	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found");
1566	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found");
1567	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'");
1568	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'");
1569
1570	// no error raised, if expression not applied:
1571	TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE);
1572
1573	// incomplete tags
1574	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":=<>", "[DEF] <{no tag>");
1575	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,{no>");
1576	TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* =<2,*1>", "[DEF] <{tag,{no>");
1577	TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* =<2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match)
1578	TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* =<2,*1>", "[DEF] <{tag{},{no>");
1579	TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* =<2,*1>", "[DEF] <{tag{} xx,{no>");
1580	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* =<2,*1>", "[DEF] <tag,no>");
1581	TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag");
1582	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag");
1583	TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag");
1584	TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag");
1585	TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":=<>", "[DEF] <kept {trunk>");
1586	TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":=<>", "[DEF] <kept>");
1587	}
1588
1589	{
1590	GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei");
1591	TEST_REJECT_NULL(gb_species);
1592	GBL_call_env callEnv(gb_species, env);
1593
1594	// run scripts using context:
1595	TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'");
1596	TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(\|sequence\|len)", "[T1] seqlen=$l [T2] seqlen=165");
1597	TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence\|count(ACGTUN))", "[T1] nuc=66 [T2] nuc");
1598
1599	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(\|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1600	TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)");
1601
1602	// content before 1st tag:
1603	TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed");
1604	TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed");
1605
1606	// test elimination of leading/trailing whitespace:
1607	TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content
1608	TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed");
1609	TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail");
1610
1611	#define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz"
1612
1613	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz");
1614	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz");
1615	TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz");
1616	}
1617	}
1618	GB_close(gb_main);
1619	}
1620
1621	void TEST_log_action() {
1622	for (int stamped = 0; stamped<=1; ++stamped) {
1623	TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped));
1624	{
1625	char *logged = GBS_log_action_to("comment", "action", stamped);
1626	if (stamped) {
1627	TEST_EXPECT_CONTAINS(logged, "comment\n");
1628	TEST_EXPECT_CONTAINS(logged, "action\n");
1629	}
1630	else {
1631	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1632	}
1633	free(logged);
1634	}
1635	{
1636	char *logged = GBS_log_action_to("comment\n", "action", stamped);
1637	if (stamped) {
1638	TEST_EXPECT_CONTAINS(logged, "comment\n");
1639	TEST_EXPECT_CONTAINS(logged, "action\n");
1640	}
1641	else {
1642	TEST_EXPECT_EQUAL(logged, "comment\naction\n");
1643	}
1644	free(logged);
1645	}
1646	{
1647	char *logged = GBS_log_action_to("", "action", stamped);
1648	if (stamped) {
1649	TEST_EXPECT_EQUAL(logged[0], '\n');
1650	TEST_EXPECT_CONTAINS(logged, "action\n");
1651	}
1652	else {
1653	TEST_EXPECT_EQUAL(logged, "\naction\n");
1654	}
1655	free(logged);
1656	}
1657	{
1658	char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF
1659	if (stamped) {
1660	TEST_EXPECT_DIFFERENT(logged[0], '\n');
1661	TEST_EXPECT_CONTAINS(logged, "action\n");
1662	}
1663	else {
1664	TEST_EXPECT_EQUAL(logged, "action\n");
1665	}
1666	free(logged);
1667	}
1668	}
1669	}
1670	TEST_PUBLISH(TEST_log_action);
1671
1672	#endif // UNIT_TESTS
1673

Note: See TracBrowser for help on using the repository browser.

Download in other formats: