Context Navigation

source: tags/svn.1.5.4/ARBDB/adstring.cxx

Visit:

Last change on this file was 8319, checked in by westram, 14 years ago
ignored PERL2ARB interface as referrer (to detect functions that are only used from perl) moved several functions to static scope or removed them (partly reverted by [13155]) for some functions it's ok to be only used from perl (e.g. macro support functions). Added comments there! there is still some dead code in there, e.g. read-security is implemented, but unused (and unwanted)
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 34.3 KB

Line
1	// =============================================================== //
2	// //
3	// File : adstring.cxx //
4	// Purpose : various string functions //
5	// //
6	// Institute of Microbiology (Technical University Munich) //
7	// http://www.arb-home.de/ //
8	// //
9	// =============================================================== //
10
11	#include <arb_backtrace.h>
12	#include <arb_strbuf.h>
13	#include <arb_sort.h>
14
15	#include "gb_key.h"
16
17	#include <SigHandler.h>
18
19	#include <execinfo.h>
20
21	#include <cstdarg>
22	#include <cctype>
23	#include <cerrno>
24	#include <ctime>
25	#include <setjmp.h>
26
27	#include <valgrind.h>
28
29	static char GBS_string_2_key_with_exclusions(const char str, const char *additional) {
30	// converts any string to a valid key (all chars in 'additional' are additionally allowed)
31	char buf[GB_KEY_LEN_MAX+1];
32	int i;
33	int c;
34	for (i=0; i<GB_KEY_LEN_MAX;) {
35	c = *(str++);
36	if (!c) break;
37
38	if (c==' ' \|\| c == '_') {
39	buf[i++] = '_';
40	}
41	else if (isalnum(c) \|\| strchr(additional, c) != 0) {
42	buf[i++] = c;
43	}
44	}
45	for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_';
46	buf[i] = 0;
47	return strdup(buf);
48	}
49
50	char GBS_string_2_key(const char str) // converts any string to a valid key
51	{
52	return GBS_string_2_key_with_exclusions(str, "");
53	}
54
55	char GB_memdup(const char source, size_t len) {
56	char dest = (char )malloc(len);
57	memcpy(dest, source, len);
58	return dest;
59	}
60
61	GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT
62	// test whether all characters are letters, numbers or _
63	int i;
64	long len;
65
66	if (!key \|\| key[0] == 0) return "Empty key is not allowed";
67	len = strlen(key);
68	if (len>GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key);
69	if (len < GB_KEY_LEN_MIN) return GBS_global_string("Invalid key '%s': too short", key);
70
71	for (i = 0; key[i]; ++i) {
72	char c = key[i];
73	if ((c>='a') && (c<='z')) continue;
74	if ((c>='A') && (c<='Z')) continue;
75	if ((c>='0') && (c<='9')) continue;
76	if ((c=='_')) continue;
77	return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
78	}
79
80	return 0;
81	}
82	GB_ERROR GB_check_link_name(const char *key) { // goes to header: __ATTR__USERESULT
83	// test whether all characters are letters, numbers or _
84	int i;
85	long len;
86
87	if (!key \|\| key[0] == 0) return GB_export_error("Empty key is not allowed");
88	len = strlen(key);
89	if (len>GB_KEY_LEN_MAX) return GB_export_errorf("Invalid key '%s': too long", key);
90	if (len < 1) return GB_export_errorf("Invalid key '%s': too short", key); // here it differs from GB_check_key
91
92	for (i = 0; key[i]; ++i) {
93	char c = key[i];
94	if ((c>='a') && (c<='z')) continue;
95	if ((c>='A') && (c<='Z')) continue;
96	if ((c>='0') && (c<='9')) continue;
97	if ((c=='_')) continue;
98	return GB_export_errorf("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key);
99	}
100
101	return 0;
102	}
103	GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT
104	// test whether all characters are letters, numbers or _
105	// additionally allow '/' and '->' for hierarchical keys
106	GB_ERROR err = 0;
107
108	if (!key \|\| key[0] == 0) {
109	err = GB_export_error("Empty key is not allowed");
110	}
111	else if (!strpbrk(key, "/-")) {
112	err = GB_check_key(key);
113	}
114	else {
115	char *key_copy = strdup(key);
116	char *start = key_copy;
117
118	if (start[0] == '/') ++start;
119
120	while (start && !err) {
121	char *key_end = strpbrk(start, "/-");
122
123	if (key_end) {
124	char c = *key_end;
125	*key_end = 0;
126	err = GB_check_key(start);
127	*key_end = c;
128
129	if (c == '-') {
130	if (key_end[1] != '>') {
131	err = GB_export_errorf("'>' expected after '-' in '%s'", key);
132	}
133	start = key_end+2;
134	}
135	else {
136	gb_assert(c == '/');
137	start = key_end+1;
138	}
139	}
140	else {
141	err = GB_check_key(start);
142	start = 0;
143	}
144	}
145
146	free(key_copy);
147	}
148
149	return err;
150	}
151
152	// ---------------------------
153	// escape characters
154
155	char GBS_remove_escape(char com) // \ is the escape character
156
157	{
158	char result, s, *d;
159	int ch;
160
161	s = d = result = strdup(com);
162	while ((ch = *(s++))) {
163	switch (ch) {
164	case '\\':
165	ch = *(s++); if (!ch) { s--; break; };
166	switch (ch) {
167	case 'n': *(d++) = '\n'; break;
168	case 't': *(d++) = '\t'; break;
169	case '0': *(d++) = '\0'; break;
170	default: *(d++) = ch; break;
171	}
172	break;
173	default:
174	*(d++) = ch;
175	}
176	}
177	*d = 0;
178	return result;
179	}
180
181	// ----------------------------------------------
182	// escape/unescape characters in strings
183
184	char GBS_escape_string(const char str, const char *chars_to_escape, char escape_char) {
185	/*! escape characters in 'str'
186	*
187	* uses a special escape-method, which eliminates all 'chars_to_escape' completely
188	* from str (this makes further processing of the string more easy)
189	*
190	* @param escape_char is the character used for escaping. For performance reasons it
191	* should be a character rarely used in 'str'.
192	*
193	* @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping)
194	* and it may not be longer than 26 bytes
195	*
196	* @return heap copy of escaped string
197	*
198	* Inverse of GBS_unescape_string()
199	*/
200
201	int len = strlen(str);
202	char buffer = (char)malloc(2*len+1);
203	int j = 0;
204	int i;
205
206	gb_assert(strlen(chars_to_escape) <= 26);
207	gb_assert(strchr(chars_to_escape, escape_char) == 0); // escape_char may not be included in chars_to_escape
208
209	for (i = 0; str[i]; ++i) {
210	if (str[i] == escape_char) {
211	buffer[j++] = escape_char;
212	buffer[j++] = escape_char;
213	}
214	else {
215	const char *found = strchr(chars_to_escape, str[i]);
216	if (found) {
217	buffer[j++] = escape_char;
218	buffer[j++] = (found-chars_to_escape+'A');
219
220	gb_assert(found[0]<'A' \|\| found[0]>'Z'); // illegal character in chars_to_escape
221	}
222	else {
223
224	buffer[j++] = str[i];
225	}
226	}
227	}
228	buffer[j] = 0;
229
230	return buffer;
231	}
232
233	char GBS_unescape_string(const char str, const char *escaped_chars, char escape_char) {
234	//! inverse of GB_escape_string() - for params see there
235
236	int len = strlen(str);
237	char buffer = (char)malloc(len+1);
238	int j = 0;
239	int i;
240
241	#if defined(ASSERTION_USED)
242	int escaped_chars_len = strlen(escaped_chars);
243	#endif // ASSERTION_USED
244
245	gb_assert(strlen(escaped_chars) <= 26);
246	gb_assert(strchr(escaped_chars, escape_char) == 0); // escape_char may not be included in chars_to_escape
247
248	for (i = 0; str[i]; ++i) {
249	if (str[i] == escape_char) {
250	if (str[i+1] == escape_char) {
251	buffer[j++] = escape_char;
252	}
253	else {
254	int idx = str[i+1]-'A';
255
256	gb_assert(idx >= 0 && idx<escaped_chars_len);
257	buffer[j++] = escaped_chars[idx];
258	}
259	++i;
260	}
261	else {
262	buffer[j++] = str[i];
263	}
264	}
265	buffer[j] = 0;
266
267	return buffer;
268	}
269
270	char *GBS_eval_env(GB_CSTR p) {
271	GB_ERROR error = 0;
272	GB_CSTR ka;
273	GBS_strstruct *out = GBS_stropen(1000);
274
275	while ((ka = GBS_find_string(p, "$(", 0))) {
276	GB_CSTR kz = strchr(ka, ')');
277	if (!kz) {
278	error = GBS_global_string("missing ')' for envvar '%s'", p);
279	break;
280	}
281	else {
282	char *envvar = GB_strpartdup(ka+2, kz-1);
283	int len = ka-p;
284
285	if (len) GBS_strncat(out, p, len);
286
287	GB_CSTR genv = GB_getenv(envvar);
288	if (genv) GBS_strcat(out, genv);
289
290	p = kz+1;
291	free(envvar);
292	}
293	}
294
295	if (error) {
296	GB_export_error(error);
297	GBS_strforget(out);
298	return 0;
299	}
300
301	GBS_strcat(out, p); // copy rest
302	return GBS_strclose(out);
303	}
304
305	long GBS_gcgchecksum(const char *seq)
306	// GCGchecksum
307	{
308	long i;
309	long check = 0;
310	long count = 0;
311	long seqlen = strlen(seq);
312
313	for (i = 0; i < seqlen; i++) {
314	count++;
315	check += count * toupper(seq[i]);
316	if (count == 57) count = 0;
317	}
318	check %= 10000;
319
320	return check;
321	}
322
323	// Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source)
324	uint32_t crctab[] = {
325	0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
326	0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
327	0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
328	0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
329	0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
330	0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
331	0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
332	0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
333	0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
334	0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
335	0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
336	0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
337	0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
338	0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
339	0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
340	0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
341	0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
342	0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
343	0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
344	0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
345	0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
346	0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
347	0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
348	0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
349	0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
350	0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
351	0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
352	0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
353	0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
354	0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
355	0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
356	0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
357	0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
358	0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
359	0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
360	0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
361	0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
362	0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
363	0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
364	0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
365	0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
366	0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
367	0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
368	0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
369	0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
370	0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
371	0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
372	0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
373	0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
374	0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
375	0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
376	0x2d02ef8dL
377	};
378
379	uint32_t GB_checksum(const char seq, long length, int ignore_case, const char exclude) // RALF: 02-12-96
380	{
381	/* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source
382	* if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too)
383	*/
384
385	unsigned long c = 0xffffffffL;
386	long n = length;
387	int i;
388	int tab[256];
389
390	for (i=0; i<256; i++) {
391	tab[i] = ignore_case ? toupper(i) : i;
392	}
393
394	if (exclude) {
395	while (1) {
396	int k = (unsigned char )exclude++;
397	if (!k) break;
398	tab[k] = 0;
399	if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0;
400	}
401	}
402
403	while (n--) {
404	i = tab[(const unsigned char )seq++];
405	if (i) {
406	c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8);
407	}
408	}
409	c = c ^ 0xffffffffL;
410	return c;
411	}
412
413	uint32_t GBS_checksum(const char seq, int ignore_case, const char exclude)
414	// if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too)
415	{
416	return GB_checksum(seq, strlen(seq), ignore_case, exclude);
417	}
418
419	/* extract all words in a text that:
420	1. minlen < 1.0 contain more than minlen*len_of_text characters that also exists in chars
421	2. minlen > 1.0 contain more than minlen characters that also exists in chars
422	*/
423
424	char GBS_extract_words(const char source, const char *chars, float minlen, bool sort_output) {
425	char *s = strdup(source);
426	char ps = (char )GB_calloc(sizeof(char *), (strlen(source)>>1) + 1);
427	GBS_strstruct *strstruct = GBS_stropen(1000);
428	char *f = s;
429	int count = 0;
430	char *p;
431	char *h;
432	int cnt;
433	int len;
434	int iminlen = (int) (minlen+.5);
435
436	while ((p = strtok(f, " \t,;:\|"))) {
437	f = 0;
438	cnt = 0;
439	len = strlen(p);
440	for (h=p; *h; h++) {
441	if (strchr(chars, *h)) cnt++;
442	}
443
444	if (minlen == 1.0) {
445	if (cnt != len) continue;
446	}
447	else if (minlen > 1.0) {
448	if (cnt < iminlen) continue;
449	}
450	else {
451	if (len < 3 \|\| cnt < minlen*len) continue;
452	}
453	ps[count] = p;
454	count ++;
455	}
456	if (sort_output) {
457	GB_sort((void **)ps, 0, count, GB_string_comparator, 0);
458	}
459	for (cnt = 0; cnt<count; cnt++) {
460	if (cnt) {
461	GBS_chrcat(strstruct, ' ');
462	}
463	GBS_strcat(strstruct, ps[cnt]);
464	}
465
466	free(ps);
467	free(s);
468	return GBS_strclose(strstruct);
469	}
470
471
472	size_t GBS_shorten_repeated_data(char *data) {
473	// shortens repeats in 'data'
474	// This function modifies 'data'!!
475	// e.g. "..............................ACGT....................TGCA"
476	// -> ".{30}ACGT.{20}TGCA"
477
478	#if defined(DEBUG)
479	size_t orgLen = strlen(data);
480	#endif // DEBUG
481	char *dataStart = data;
482	char *dest = data;
483	size_t repeat = 1;
484	char last = *data++;
485
486	while (last) {
487	char curr = *data++;
488	if (curr == last) {
489	repeat++;
490	}
491	else {
492	if (repeat >= 5) {
493	dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count
494	}
495	else {
496	size_t r;
497	for (r = 0; r<repeat; r++) *dest++ = last; // insert plain
498	}
499	last = curr;
500	repeat = 1;
501	}
502	}
503
504	*dest = 0;
505
506	#if defined(DEBUG)
507
508	gb_assert(strlen(dataStart) <= orgLen);
509	#endif // DEBUG
510	return dest-dataStart;
511	}
512
513
514	// -------------------------------------------
515	// helper function for tagged fields
516
517	static GB_ERROR g_bs_add_value_tag_to_hash(GBDATA gb_main, GB_HASH hash, char tag, char value, const char rtag, const char srt, const char aci, GBDATA gbd) {
518	char *p;
519	GB_HASH *sh;
520	char *to_free = 0;
521	if (rtag && strcmp(tag, rtag) == 0) {
522	if (srt) {
523	value = to_free = GBS_string_eval(value, srt, gbd);
524	}
525	else if (aci) {
526	value = to_free = GB_command_interpreter(gb_main, value, aci, gbd, 0);
527	}
528	if (!value) return GB_await_error();
529	}
530
531	p=value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{'
532	p=value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}'
533
534	sh = (GB_HASH *)GBS_read_hash(hash, value);
535	if (!sh) {
536	sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent
537	GBS_write_hash(hash, value, (long)sh);
538	}
539
540	GBS_write_hash(sh, tag, 1);
541	if (to_free) free(to_free);
542	return 0;
543	}
544
545
546	static GB_ERROR g_bs_convert_string_to_tagged_hash(GB_HASH hash, char s, char default_tag, const char del,
547	GBDATA gb_main, const char rtag, const char srt, const char aci, GBDATA *gbd) {
548	char *se; // string end
549	char *sa; // string start and tag end
550	char *ts; // tag start
551	char *t;
552	GB_ERROR error = 0;
553	while (s && s[0]) {
554	ts = strchr(s, '[');
555	if (!ts) {
556	error = g_bs_add_value_tag_to_hash(gb_main, hash, default_tag, s, rtag, srt, aci, gbd); // no tag found, use default tag
557	if (error) break;
558	break;
559	}
560	else {
561	*(ts++) = 0;
562	}
563	sa = strchr(ts, ']');
564	if (sa) {
565	*sa++ = 0;
566	while (*sa == ' ') sa++;
567	}
568	else {
569	error = g_bs_add_value_tag_to_hash(gb_main, hash, default_tag, s, rtag, srt, aci, gbd); // no tag found, use default tag
570	if (error) break;
571	break;
572	}
573	se = strchr(sa, '[');
574	if (se) {
575	while (se>sa && se[-1] == ' ') se--;
576	*(se++) = 0;
577	}
578	for (t = strtok(ts, ","); t; t = strtok(0, ",")) {
579	if (del && strcmp(t, del) == 0) continue; // test, whether to delete
580	if (sa[0] == 0) continue;
581	error = g_bs_add_value_tag_to_hash(gb_main, hash, t, sa, rtag, srt, aci, gbd); // tag found, use tag
582	if (error) break;
583	}
584	s = se;
585	}
586	return error;
587	}
588
589	static long g_bs_merge_tags(const char tag, long val, void cd_sub_result) {
590	GBS_strstruct sub_result = (GBS_strstruct)cd_sub_result;
591
592	GBS_strcat(sub_result, tag);
593	GBS_strcat(sub_result, ",");
594
595	return val;
596	}
597
598	static long g_bs_read_tagged_hash(const char value, long subhash, void cd_g_bs_collect_tags_hash) {
599	char *str;
600	static int counter = 0;
601	GBS_strstruct *sub_result = GBS_stropen(100);
602
603	GBS_hash_do_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result);
604	GBS_intcat(sub_result, counter++); // create a unique number
605
606	str = GBS_strclose(sub_result);
607
608	GB_HASH g_bs_collect_tags_hash = (GB_HASH)cd_g_bs_collect_tags_hash;
609	GBS_write_hash(g_bs_collect_tags_hash, str, (long)strdup(value)); // send output to new hash for sorting
610
611	free(str);
612	return 0;
613	}
614
615	static long g_bs_read_final_hash(const char tag, long value, void cd_merge_result) {
616	GBS_strstruct merge_result = (GBS_strstruct)cd_merge_result;
617
618	char lk = const_cast<char>(strrchr(tag, ','));
619	if (lk) { // remove number at end
620	*lk = 0;
621	GBS_strcat(merge_result, " [");
622	GBS_strcat(merge_result, tag);
623	GBS_strcat(merge_result, "] ");
624	}
625	GBS_strcat(merge_result, (char *)value);
626	return value;
627	}
628
629	static char g_bs_get_string_of_tag_hash(GB_HASH tag_hash) {
630	GBS_strstruct *merge_result = GBS_stropen(256);
631	GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free);
632
633	GBS_hash_do_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash
634	GBS_hash_do_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result);
635
636	GBS_free_hash(collect_tags_hash);
637	return GBS_strclose(merge_result);
638	}
639
640	static long g_bs_free_hash_of_hashes_elem(const char /key/, long val, void ) {
641	GB_HASH hash = (GB_HASH)val;
642	if (hash) GBS_free_hash(hash);
643	return 0;
644	}
645	static void g_bs_free_hash_of_hashes(GB_HASH *hash) {
646	GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULL);
647	GBS_free_hash(hash);
648	}
649
650	char GBS_merge_tagged_strings(const char s1, const char tag1, const char replace1, const char s2, const char tag2, const char *replace2) {
651	/* Create a tagged string from two tagged strings:
652	* a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string'
653	*
654	* if 's2' is not empty, then delete tag 'replace1' in 's1'
655	* if 's1' is not empty, then delete tag 'replace2' in 's2'
656	*
657	* if result is NULL, an error has been exported.
658	*/
659
660	char *str1 = strdup(s1);
661	char *str2 = strdup(s2);
662	char *t1 = GBS_string_2_key(tag1);
663	char *t2 = GBS_string_2_key(tag2);
664	char *result = 0;
665	GB_ERROR error = 0;
666	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
667
668	if (!strlen(s1)) replace2 = 0;
669	if (!strlen(s2)) replace1 = 0;
670
671	if (replace1 && replace1[0] == 0) replace1 = 0;
672	if (replace2 && replace2[0] == 0) replace2 = 0;
673
674	error = g_bs_convert_string_to_tagged_hash(hash, str1, t1, replace1, 0, 0, 0, 0, 0);
675	if (!error) error = g_bs_convert_string_to_tagged_hash(hash, str2, t2, replace2, 0, 0, 0, 0, 0);
676
677	if (!error) {
678	result = g_bs_get_string_of_tag_hash(hash);
679	}
680	else {
681	GB_export_error(error);
682	}
683
684	g_bs_free_hash_of_hashes(hash);
685
686	free(t2);
687	free(t1);
688	free(str2);
689	free(str1);
690
691	return result;
692	}
693
694	char GBS_string_eval_tagged_string(GBDATA gb_main, const char s, const char dt, const char tag, const char srt, const char aci, GBDATA gbd) {
695	/* if 's' is untagged, tag it with default tag 'dt'.
696	* if 'tag' is != NULL -> apply 'srt' or 'aci' to that part of the content of 's', which is tagged with 'tag'
697	*
698	* if result is NULL, an error has been exported.
699	*/
700
701	char *str = strdup(s);
702	char *default_tag = GBS_string_2_key(dt);
703	GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE);
704	char *result = 0;
705	GB_ERROR error = g_bs_convert_string_to_tagged_hash(hash, str, default_tag, 0, gb_main, tag, srt, aci, gbd);
706
707	if (!error) {
708	result = g_bs_get_string_of_tag_hash(hash);
709	}
710	else {
711	GB_export_error(error);
712	}
713
714	g_bs_free_hash_of_hashes(hash);
715	free(default_tag);
716	free(str);
717
718	return result;
719	}
720
721
722	char GB_read_as_tagged_string(GBDATA gbd, const char *tagi) {
723	char *s;
724	char *tag;
725	char *buf;
726	char *se; // string end
727	char *sa; // string anfang and tag end
728	char *ts; // tag start
729	char *t;
730
731	buf = s = GB_read_as_string(gbd);
732	if (!s) return s;
733	if (!tagi) return s;
734	if (!strlen(tagi)) return s;
735
736	tag = GBS_string_2_key(tagi);
737
738	while (s) {
739	ts = strchr(s, '[');
740	if (!ts) goto notfound; // no tag
741
742	*(ts++) = 0;
743
744	sa = strchr(ts, ']');
745	if (!sa) goto notfound;
746
747	*sa++ = 0;
748	while (*sa == ' ') sa++;
749
750	se = strchr(sa, '[');
751	if (se) {
752	while (se>sa && se[-1] == ' ') se--;
753	*(se++) = 0;
754	}
755	for (t = strtok(ts, ","); t; t = strtok(0, ",")) {
756	if (strcmp(t, tag) == 0) {
757	s = strdup(sa);
758	free(buf);
759	goto found;
760	}
761	}
762	s = se;
763	}
764	notfound :
765	// Nothing found
766	free(buf);
767	s = 0;
768	found :
769	free(tag);
770	return s;
771	}
772
773
774	/* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties)
775	* used as well to save perl macros
776	*
777	* when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well
778	*
779	* always keep in mind, that many users have databases/macros written with older
780	* versions of this function. They MUST load proper!!!
781	*/
782	void GBS_fwrite_string(const char strngi, FILE out) {
783	unsigned char strng = (unsigned char )strngi;
784	int c;
785
786	putc('"', out);
787
788	while ((c = *strng++)) {
789	if (c < 32) {
790	putc('\\', out);
791	if (c == '\n')
792	putc('n', out);
793	else if (c == '\t')
794	putc('t', out);
795	else if (c<25) {
796	putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above)
797	}
798	else {
799	putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6
800	}
801	}
802	else if (c == '"') {
803	putc('\\', out);
804	putc('"', out);
805	}
806	else if (c == '\\') {
807	putc('\\', out);
808	putc('\\', out);
809	}
810	else {
811	putc(c, out);
812	}
813	}
814	putc('"', out);
815	}
816
817	/* Read a string from a file written by GBS_fwrite_string,
818	* Searches first '"'
819	*
820	* WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string)
821	* any changes should be done in GBS_fconvert_string too.
822	*/
823
824	static char GBS_fread_string(FILE in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused!
825	GBS_strstruct *strstr = GBS_stropen(1024);
826	int x;
827
828	while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"'
829
830	if (x != EOF) {
831	while ((x = getc(in)) != '"') {
832	if (x == EOF) break;
833	if (x == '\\') {
834	x = getc(in); if (x==EOF) break;
835	if (x == 'n') {
836	GBS_chrcat(strstr, '\n');
837	continue;
838	}
839	if (x == 't') {
840	GBS_chrcat(strstr, '\t');
841	continue;
842	}
843	if (x>='@' && x <= '@' + 25) {
844	GBS_chrcat(strstr, x-'@');
845	continue;
846	}
847	if (x>='0' && x <= '9') {
848	GBS_chrcat(strstr, x-('0'-25));
849	continue;
850	}
851	// all other backslashes are simply skipped
852	}
853	GBS_chrcat(strstr, x);
854	}
855	}
856	return GBS_strclose(strstr);
857	}
858
859	/* does similar decoding as GBS_fread_string but works directly on an existing buffer
860	* (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!)
861	*
862	* inserts \0 behind decoded string (removes the closing '"')
863	* returns a pointer behind the end (") of the _encoded_ string
864	* returns NULL if a 0-character is found
865	*/
866	char GBS_fconvert_string(char buffer) {
867	char *t = buffer;
868	char *f = buffer;
869	int x;
870
871	gb_assert(f[-1] == '"');
872	// the opening " has already been read
873
874	while ((x = *f++) != '"') {
875	if (!x) break;
876
877	if (x == '\\') {
878	x = *f++;
879	if (!x) break;
880
881	if (x == 'n') {
882	*t++ = '\n';
883	continue;
884	}
885	if (x == 't') {
886	*t++ = '\t';
887	continue;
888	}
889	if (x>='@' && x <= '@' + 25) {
890	*t++ = x-'@';
891	continue;
892	}
893	if (x>='0' && x <= '9') {
894	*t++ = x-('0'-25);
895	continue;
896	}
897	// all other backslashes are simply skipped
898	}
899	*t++ = x;
900	}
901
902	if (!x) return 0; // error (string should not contain 0-character)
903	gb_assert(x == '"');
904
905	t[0] = 0;
906	return f;
907	}
908
909	char GBS_replace_tabs_by_spaces(const char text) {
910	int tlen = strlen(text);
911	GBS_strstruct mfile = GBS_stropen(tlen 3/2);
912	int tabpos = 0;
913	int c;
914
915	while ((c=*(text++))) {
916	if (c == '\t') {
917	int ntab = (tabpos + 8) & 0xfffff8;
918	while (tabpos < ntab) {
919	GBS_chrcat(mfile, ' ');
920	tabpos++;
921	}
922	continue;
923	}
924	tabpos ++;
925	if (c == '\n') {
926	tabpos = 0;
927	}
928	GBS_chrcat(mfile, c);
929	}
930	return GBS_strclose(mfile);
931	}
932
933	const char GBS_readable_size(unsigned long long size, const char unit_suffix) {
934	// return human readable size information
935	// returned string is maximal 6+strlen(unit) characters long
936	// (using "b" as 'unit_suffix' produces '### b', '### Mb' etc)
937
938	if (size<1000) return GBS_global_string("%llu %s", size, unit_suffix);
939
940	const char *units = "kMGTPEZY"; // kilo, Mega, Giga, Tera, ... should be enough forever
941	int i;
942
943	for (i = 0; units[i]; ++i) {
944	char unit = units[i];
945	if (size<1000*1024) {
946	double amount = size/(double)1024;
947	if (amount<10.0) return GBS_global_string("%4.2f %c%s", amount+0.005, unit, unit_suffix);
948	if (amount<100.0) return GBS_global_string("%4.1f %c%s", amount+0.05, unit, unit_suffix);
949	return GBS_global_string("%i %c%s", (int)(amount+0.5), unit, unit_suffix);
950	}
951	size /= 1024; // next unit
952	}
953	return GBS_global_string("MUCH %s", unit_suffix);
954	}
955
956	char GBS_trim(const char str) {
957	// trim whitespace at beginning and end of 'str'
958	const char *whitespace = " \t\n";
959	while (str[0] && strchr(whitespace, str[0])) str++;
960
961	const char *end = strchr(str, 0)-1;
962	while (end >= str && strchr(whitespace, end[0])) end--;
963
964	return GB_strpartdup(str, end);
965	}
966
967	static char dated_info(const char info) {
968	char *dated_info = 0;
969	time_t date;
970	if (time(&date) != -1) {
971	char *dstr = ctime(&date);
972	char *nl = strchr(dstr, '\n');
973
974	if (nl) nl[0] = 0; // cut off LF
975
976	dated_info = GBS_global_string_copy("%s: %s", dstr, info);
977	}
978	else {
979	dated_info = strdup(info);
980	}
981	return dated_info;
982	}
983
984	char GBS_log_dated_action_to(const char comment, const char *action) {
985	/*! appends 'action' prefixed by current timestamp to 'comment'
986	*/
987	size_t clen = comment ? strlen(comment) : 0;
988	size_t alen = strlen(action);
989
990	GBS_strstruct *new_comment = GBS_stropen(clen+alen+100);
991
992	if (comment) {
993	GBS_strcat(new_comment, comment);
994	if (comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n');
995	}
996
997	char *dated_action = dated_info(action);
998	GBS_strcat(new_comment, dated_action);
999	GBS_chrcat(new_comment, '\n');
1000
1001	free(dated_action);
1002
1003	return GBS_strclose(new_comment);
1004	}
1005
1006	// --------------------------------------------------------------------------------
1007
1008	#ifdef UNIT_TESTS
1009
1010	#include <test_unit.h>
1011
1012	#ifdef ENABLE_CRASH_TESTS
1013	static void provokesegv() { (int )0 = 0; }
1014	#if defined(ASSERTION_USED)
1015	static void failassertion() { gb_assert(0); }
1016	static void provokesegv_does_not_fail_assertion() {
1017	// provokesegv does not raise assertion
1018	// -> the following assertion fails
1019	TEST_ASSERT_CODE_ASSERTION_FAILS(provokesegv);
1020	}
1021	#endif
1022	#endif
1023
1024	void TEST_signal_tests() {
1025	// check whether we can test for SEGV and assertion failures
1026	TEST_ASSERT_SEGFAULT(provokesegv);
1027	TEST_ASSERT_CODE_ASSERTION_FAILS(failassertion);
1028
1029	// tests whether signal suppression works multiple times (by repeating tests)
1030	TEST_ASSERT_CODE_ASSERTION_FAILS(failassertion);
1031	TEST_ASSERT_SEGFAULT(provokesegv);
1032
1033	// test whether SEGV can be distinguished from assertion
1034	TEST_ASSERT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion);
1035	}
1036
1037	#define EXPECT_CONTENT(content) TEST_ASSERT_EQUAL(GBS_mempntr(strstr), content)
1038
1039	void TEST_GBS_strstruct() {
1040	{
1041	GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT("");
1042
1043	GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb");
1044	GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17");
1045	GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_");
1046	GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000");
1047
1048	TEST_ASSERT_EQUAL(GBS_memoffset(strstr), 14);
1049	GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b");
1050	GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter");
1051	GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly");
1052
1053	GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters");
1054	EXPECT_CONTENT("butterfly flutters");
1055
1056	free(GBS_strclose(strstr));
1057	}
1058	{
1059	// re-alloc smaller
1060	GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT("");
1061	GBS_strforget(strstr);
1062	}
1063
1064	// trigger downsize of oversized block
1065	for (int i = 0; i<12; ++i) {
1066	GBS_strstruct *strstr = GBS_stropen(10);
1067	GBS_strforget(strstr);
1068	}
1069
1070	{
1071	GBS_strstruct *strstr = GBS_stropen(10);
1072	size_t oldbufsize = strstr->get_buffer_size();
1073	GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer
1074
1075	TEST_ASSERT(oldbufsize != strstr->get_buffer_size()); // did we reallocate?
1076	EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx");
1077	GBS_strforget(strstr);
1078	}
1079	}
1080
1081	#define TEST_SHORTENED_EQUALS(Long,Short) do { \
1082	char *buf = strdup(Long); \
1083	GBS_shorten_repeated_data(buf); \
1084	TEST_ASSERT_EQUAL(buf, Short); \
1085	free(buf); \
1086	} while(0)
1087
1088	void TEST_GBS_shorten_repeated_data() {
1089	TEST_SHORTENED_EQUALS("12345", "12345");
1090	TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc");
1091	TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc");
1092	TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc");
1093	TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc");
1094	TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc");
1095	TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc");
1096	TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc");
1097	TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc");
1098	TEST_SHORTENED_EQUALS("aaaabc", "aaaabc");
1099	TEST_SHORTENED_EQUALS("aaabc", "aaabc");
1100	TEST_SHORTENED_EQUALS("aabc", "aabc");
1101	TEST_SHORTENED_EQUALS("", "");
1102
1103	}
1104	#endif
1105

Note: See TracBrowser for help on using the repository browser.

Download in other formats: