1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : adstring.cxx // |
---|
4 | // Purpose : various string functions // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include <arb_backtrace.h> |
---|
12 | #include <arb_strbuf.h> |
---|
13 | #include <arb_sort.h> |
---|
14 | #include <arb_defs.h> |
---|
15 | #include <arb_str.h> |
---|
16 | |
---|
17 | #include "gb_key.h" |
---|
18 | |
---|
19 | #include <SigHandler.h> |
---|
20 | |
---|
21 | #include <execinfo.h> |
---|
22 | |
---|
23 | #include <cstdarg> |
---|
24 | #include <cctype> |
---|
25 | #include <cerrno> |
---|
26 | #include <ctime> |
---|
27 | #include <setjmp.h> |
---|
28 | |
---|
29 | #include <valgrind.h> |
---|
30 | |
---|
31 | static char *GBS_string_2_key_with_exclusions(const char *str, const char *additional) { |
---|
32 | // converts any string to a valid key (all chars in 'additional' are additionally allowed) |
---|
33 | char buf[GB_KEY_LEN_MAX+1]; |
---|
34 | int i; |
---|
35 | int c; |
---|
36 | for (i=0; i<GB_KEY_LEN_MAX;) { |
---|
37 | c = *(str++); |
---|
38 | if (!c) break; |
---|
39 | |
---|
40 | if (c==' ' || c == '_') { |
---|
41 | buf[i++] = '_'; |
---|
42 | } |
---|
43 | else if (isalnum(c) || strchr(additional, c) != 0) { |
---|
44 | buf[i++] = c; |
---|
45 | } |
---|
46 | } |
---|
47 | for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_'; |
---|
48 | buf[i] = 0; |
---|
49 | return ARB_strdup(buf); |
---|
50 | } |
---|
51 | |
---|
52 | char *GBS_string_2_key(const char *str) // converts any string to a valid key |
---|
53 | { |
---|
54 | return GBS_string_2_key_with_exclusions(str, ""); |
---|
55 | } |
---|
56 | |
---|
57 | char *GB_memdup(const char *source, size_t len) { |
---|
58 | char *dest = ARB_alloc<char>(len); |
---|
59 | memcpy(dest, source, len); |
---|
60 | return dest; |
---|
61 | } |
---|
62 | |
---|
63 | GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT |
---|
64 | // test whether all characters are letters, numbers or _ |
---|
65 | int i; |
---|
66 | long len; |
---|
67 | |
---|
68 | if (!key || key[0] == 0) return "Empty key is not allowed"; |
---|
69 | len = strlen(key); |
---|
70 | if (len>GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key); |
---|
71 | if (len < GB_KEY_LEN_MIN) return GBS_global_string("Invalid key '%s': too short", key); |
---|
72 | |
---|
73 | for (i = 0; key[i]; ++i) { |
---|
74 | char c = key[i]; |
---|
75 | if ((c>='a') && (c<='z')) continue; |
---|
76 | if ((c>='A') && (c<='Z')) continue; |
---|
77 | if ((c>='0') && (c<='9')) continue; |
---|
78 | if (c=='_') continue; |
---|
79 | return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key); |
---|
80 | } |
---|
81 | |
---|
82 | return 0; |
---|
83 | } |
---|
84 | GB_ERROR GB_check_link_name(const char *key) { // goes to header: __ATTR__USERESULT |
---|
85 | // test whether all characters are letters, numbers or _ |
---|
86 | int i; |
---|
87 | long len; |
---|
88 | |
---|
89 | if (!key || key[0] == 0) return GB_export_error("Empty key is not allowed"); |
---|
90 | len = strlen(key); |
---|
91 | if (len>GB_KEY_LEN_MAX) return GB_export_errorf("Invalid key '%s': too long", key); |
---|
92 | if (len < 1) return GB_export_errorf("Invalid key '%s': too short", key); // here it differs from GB_check_key |
---|
93 | |
---|
94 | for (i = 0; key[i]; ++i) { |
---|
95 | char c = key[i]; |
---|
96 | if ((c>='a') && (c<='z')) continue; |
---|
97 | if ((c>='A') && (c<='Z')) continue; |
---|
98 | if ((c>='0') && (c<='9')) continue; |
---|
99 | if (c=='_') continue; |
---|
100 | return GB_export_errorf("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key); |
---|
101 | } |
---|
102 | |
---|
103 | return 0; |
---|
104 | } |
---|
105 | GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT |
---|
106 | // test whether all characters are letters, numbers or _ |
---|
107 | // additionally allow '/' and '->' for hierarchical keys |
---|
108 | GB_ERROR err = 0; |
---|
109 | |
---|
110 | if (!key || key[0] == 0) { |
---|
111 | err = "Empty key is not allowed"; |
---|
112 | } |
---|
113 | else if (!strpbrk(key, "/-")) { |
---|
114 | err = GB_check_key(key); |
---|
115 | } |
---|
116 | else { |
---|
117 | char *key_copy = ARB_strdup(key); |
---|
118 | char *start = key_copy; |
---|
119 | |
---|
120 | if (start[0] == '/') ++start; |
---|
121 | |
---|
122 | while (start && !err) { |
---|
123 | char *key_end = strpbrk(start, "/-"); |
---|
124 | |
---|
125 | if (key_end) { |
---|
126 | char c = *key_end; |
---|
127 | *key_end = 0; |
---|
128 | err = GB_check_key(start); |
---|
129 | *key_end = c; |
---|
130 | |
---|
131 | if (c == '-') { |
---|
132 | if (key_end[1] != '>') { |
---|
133 | err = GBS_global_string("'>' expected after '-' in '%s'", key); |
---|
134 | } |
---|
135 | start = key_end+2; |
---|
136 | } |
---|
137 | else { |
---|
138 | gb_assert(c == '/'); |
---|
139 | start = key_end+1; |
---|
140 | } |
---|
141 | } |
---|
142 | else { |
---|
143 | err = GB_check_key(start); |
---|
144 | start = 0; |
---|
145 | } |
---|
146 | } |
---|
147 | |
---|
148 | free(key_copy); |
---|
149 | } |
---|
150 | |
---|
151 | return err; |
---|
152 | } |
---|
153 | |
---|
154 | // --------------------------- |
---|
155 | // escape characters |
---|
156 | |
---|
157 | char *GBS_remove_escape(char *com) // \ is the escape character |
---|
158 | |
---|
159 | { |
---|
160 | char *result, *s, *d; |
---|
161 | int ch; |
---|
162 | |
---|
163 | s = d = result = ARB_strdup(com); |
---|
164 | while ((ch = *(s++))) { |
---|
165 | switch (ch) { |
---|
166 | case '\\': |
---|
167 | ch = *(s++); if (!ch) { s--; break; }; |
---|
168 | switch (ch) { |
---|
169 | case 'n': *(d++) = '\n'; break; |
---|
170 | case 't': *(d++) = '\t'; break; |
---|
171 | case '0': *(d++) = '\0'; break; |
---|
172 | default: *(d++) = ch; break; |
---|
173 | } |
---|
174 | break; |
---|
175 | default: |
---|
176 | *(d++) = ch; |
---|
177 | } |
---|
178 | } |
---|
179 | *d = 0; |
---|
180 | return result; |
---|
181 | } |
---|
182 | |
---|
183 | // ---------------------------------------------- |
---|
184 | // escape/unescape characters in strings |
---|
185 | |
---|
186 | char *GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char) { |
---|
187 | /*! escape characters in 'str' |
---|
188 | * |
---|
189 | * uses a special escape-method, which eliminates all 'chars_to_escape' completely |
---|
190 | * from str (this makes further processing of the string more easy) |
---|
191 | * |
---|
192 | * @param str string to escape |
---|
193 | * |
---|
194 | * @param escape_char is the character used for escaping. For performance reasons it |
---|
195 | * should be a character rarely used in 'str'. |
---|
196 | * |
---|
197 | * @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping) |
---|
198 | * and it may not be longer than 26 bytes |
---|
199 | * |
---|
200 | * @return heap copy of escaped string |
---|
201 | * |
---|
202 | * Inverse of GBS_unescape_string() |
---|
203 | */ |
---|
204 | |
---|
205 | int len = strlen(str); |
---|
206 | char *buffer = ARB_alloc<char>(2*len+1); |
---|
207 | int j = 0; |
---|
208 | int i; |
---|
209 | |
---|
210 | gb_assert(strlen(chars_to_escape) <= 26); |
---|
211 | gb_assert(strchr(chars_to_escape, escape_char) == 0); // escape_char may not be included in chars_to_escape |
---|
212 | |
---|
213 | for (i = 0; str[i]; ++i) { |
---|
214 | if (str[i] == escape_char) { |
---|
215 | buffer[j++] = escape_char; |
---|
216 | buffer[j++] = escape_char; |
---|
217 | } |
---|
218 | else { |
---|
219 | const char *found = strchr(chars_to_escape, str[i]); |
---|
220 | if (found) { |
---|
221 | buffer[j++] = escape_char; |
---|
222 | buffer[j++] = (found-chars_to_escape+'A'); |
---|
223 | |
---|
224 | gb_assert(found[0]<'A' || found[0]>'Z'); // illegal character in chars_to_escape |
---|
225 | } |
---|
226 | else { |
---|
227 | |
---|
228 | buffer[j++] = str[i]; |
---|
229 | } |
---|
230 | } |
---|
231 | } |
---|
232 | buffer[j] = 0; |
---|
233 | |
---|
234 | return buffer; |
---|
235 | } |
---|
236 | |
---|
237 | char *GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char) { |
---|
238 | //! inverse of GB_escape_string() - for params see there |
---|
239 | |
---|
240 | int len = strlen(str); |
---|
241 | char *buffer = ARB_alloc<char>(len+1); |
---|
242 | int j = 0; |
---|
243 | int i; |
---|
244 | |
---|
245 | #if defined(ASSERTION_USED) |
---|
246 | int escaped_chars_len = strlen(escaped_chars); |
---|
247 | #endif // ASSERTION_USED |
---|
248 | |
---|
249 | gb_assert(strlen(escaped_chars) <= 26); |
---|
250 | gb_assert(strchr(escaped_chars, escape_char) == 0); // escape_char may not be included in chars_to_escape |
---|
251 | |
---|
252 | for (i = 0; str[i]; ++i) { |
---|
253 | if (str[i] == escape_char) { |
---|
254 | if (str[i+1] == escape_char) { |
---|
255 | buffer[j++] = escape_char; |
---|
256 | } |
---|
257 | else { |
---|
258 | int idx = str[i+1]-'A'; |
---|
259 | |
---|
260 | gb_assert(idx >= 0 && idx<escaped_chars_len); |
---|
261 | buffer[j++] = escaped_chars[idx]; |
---|
262 | } |
---|
263 | ++i; |
---|
264 | } |
---|
265 | else { |
---|
266 | buffer[j++] = str[i]; |
---|
267 | } |
---|
268 | } |
---|
269 | buffer[j] = 0; |
---|
270 | |
---|
271 | return buffer; |
---|
272 | } |
---|
273 | |
---|
274 | char *GBS_eval_env(GB_CSTR p) { |
---|
275 | GB_ERROR error = 0; |
---|
276 | GB_CSTR ka; |
---|
277 | GBS_strstruct *out = GBS_stropen(1000); |
---|
278 | |
---|
279 | while ((ka = GBS_find_string(p, "$(", 0))) { |
---|
280 | GB_CSTR kz = strchr(ka, ')'); |
---|
281 | if (!kz) { |
---|
282 | error = GBS_global_string("missing ')' for envvar '%s'", p); |
---|
283 | break; |
---|
284 | } |
---|
285 | else { |
---|
286 | char *envvar = ARB_strpartdup(ka+2, kz-1); |
---|
287 | int len = ka-p; |
---|
288 | |
---|
289 | if (len) GBS_strncat(out, p, len); |
---|
290 | |
---|
291 | GB_CSTR genv = GB_getenv(envvar); |
---|
292 | if (genv) GBS_strcat(out, genv); |
---|
293 | |
---|
294 | p = kz+1; |
---|
295 | free(envvar); |
---|
296 | } |
---|
297 | } |
---|
298 | |
---|
299 | if (error) { |
---|
300 | GB_export_error(error); |
---|
301 | GBS_strforget(out); |
---|
302 | return 0; |
---|
303 | } |
---|
304 | |
---|
305 | GBS_strcat(out, p); // copy rest |
---|
306 | return GBS_strclose(out); |
---|
307 | } |
---|
308 | |
---|
309 | long GBS_gcgchecksum(const char *seq) |
---|
310 | // GCGchecksum |
---|
311 | { |
---|
312 | long i; |
---|
313 | long check = 0; |
---|
314 | long count = 0; |
---|
315 | long seqlen = strlen(seq); |
---|
316 | |
---|
317 | for (i = 0; i < seqlen; i++) { |
---|
318 | count++; |
---|
319 | check += count * toupper(seq[i]); |
---|
320 | if (count == 57) count = 0; |
---|
321 | } |
---|
322 | check %= 10000; |
---|
323 | |
---|
324 | return check; |
---|
325 | } |
---|
326 | |
---|
327 | // Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source) |
---|
328 | uint32_t crctab[] = { |
---|
329 | 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, |
---|
330 | 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, |
---|
331 | 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, |
---|
332 | 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, |
---|
333 | 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, |
---|
334 | 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, |
---|
335 | 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, |
---|
336 | 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, |
---|
337 | 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, |
---|
338 | 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, |
---|
339 | 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, |
---|
340 | 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, |
---|
341 | 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, |
---|
342 | 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, |
---|
343 | 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, |
---|
344 | 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, |
---|
345 | 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, |
---|
346 | 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, |
---|
347 | 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, |
---|
348 | 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, |
---|
349 | 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, |
---|
350 | 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, |
---|
351 | 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, |
---|
352 | 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, |
---|
353 | 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, |
---|
354 | 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, |
---|
355 | 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, |
---|
356 | 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, |
---|
357 | 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, |
---|
358 | 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, |
---|
359 | 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, |
---|
360 | 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, |
---|
361 | 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, |
---|
362 | 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, |
---|
363 | 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, |
---|
364 | 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, |
---|
365 | 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, |
---|
366 | 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, |
---|
367 | 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, |
---|
368 | 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, |
---|
369 | 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, |
---|
370 | 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, |
---|
371 | 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, |
---|
372 | 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, |
---|
373 | 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, |
---|
374 | 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, |
---|
375 | 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, |
---|
376 | 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, |
---|
377 | 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, |
---|
378 | 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, |
---|
379 | 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, |
---|
380 | 0x2d02ef8dL |
---|
381 | }; |
---|
382 | |
---|
383 | uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude) // RALF: 02-12-96 |
---|
384 | { |
---|
385 | /* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source |
---|
386 | * if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too) |
---|
387 | */ |
---|
388 | |
---|
389 | unsigned long c = 0xffffffffL; |
---|
390 | long n = length; |
---|
391 | int i; |
---|
392 | int tab[256]; |
---|
393 | |
---|
394 | for (i=0; i<256; i++) { |
---|
395 | tab[i] = ignore_case ? toupper(i) : i; |
---|
396 | } |
---|
397 | |
---|
398 | if (exclude) { |
---|
399 | while (1) { |
---|
400 | int k = *(unsigned char *)exclude++; |
---|
401 | if (!k) break; |
---|
402 | tab[k] = 0; |
---|
403 | if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0; |
---|
404 | } |
---|
405 | } |
---|
406 | |
---|
407 | while (n--) { |
---|
408 | i = tab[*(const unsigned char *)seq++]; |
---|
409 | if (i) { |
---|
410 | c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8); |
---|
411 | } |
---|
412 | } |
---|
413 | c = c ^ 0xffffffffL; |
---|
414 | return c; |
---|
415 | } |
---|
416 | |
---|
417 | uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude) |
---|
418 | // if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too) |
---|
419 | { |
---|
420 | return GB_checksum(seq, strlen(seq), ignore_case, exclude); |
---|
421 | } |
---|
422 | |
---|
423 | /* extract all words in a text that: |
---|
424 | 1. minlen < 1.0 contain more than minlen*len_of_text characters that also exists in chars |
---|
425 | 2. minlen > 1.0 contain more than minlen characters that also exists in chars |
---|
426 | */ |
---|
427 | |
---|
428 | char *GBS_extract_words(const char *source, const char *chars, float minlen, bool sort_output) { |
---|
429 | char *s = ARB_strdup(source); |
---|
430 | char **ps = ARB_calloc<char*>((strlen(source)>>1) + 1); |
---|
431 | GBS_strstruct *strstruct = GBS_stropen(1000); |
---|
432 | char *f = s; |
---|
433 | int count = 0; |
---|
434 | char *p; |
---|
435 | char *h; |
---|
436 | int cnt; |
---|
437 | int len; |
---|
438 | int iminlen = (int) (minlen+.5); |
---|
439 | |
---|
440 | while ((p = strtok(f, " \t,;:|"))) { |
---|
441 | f = 0; |
---|
442 | cnt = 0; |
---|
443 | len = strlen(p); |
---|
444 | for (h=p; *h; h++) { |
---|
445 | if (strchr(chars, *h)) cnt++; |
---|
446 | } |
---|
447 | |
---|
448 | if (minlen == 1.0) { |
---|
449 | if (cnt != len) continue; |
---|
450 | } |
---|
451 | else if (minlen > 1.0) { |
---|
452 | if (cnt < iminlen) continue; |
---|
453 | } |
---|
454 | else { |
---|
455 | if (len < 3 || cnt < minlen*len) continue; |
---|
456 | } |
---|
457 | ps[count] = p; |
---|
458 | count ++; |
---|
459 | } |
---|
460 | if (sort_output) { |
---|
461 | GB_sort((void **)ps, 0, count, GB_string_comparator, 0); |
---|
462 | } |
---|
463 | for (cnt = 0; cnt<count; cnt++) { |
---|
464 | if (cnt) { |
---|
465 | GBS_chrcat(strstruct, ' '); |
---|
466 | } |
---|
467 | GBS_strcat(strstruct, ps[cnt]); |
---|
468 | } |
---|
469 | |
---|
470 | free(ps); |
---|
471 | free(s); |
---|
472 | return GBS_strclose(strstruct); |
---|
473 | } |
---|
474 | |
---|
475 | |
---|
476 | size_t GBS_shorten_repeated_data(char *data) { |
---|
477 | // shortens repeats in 'data' |
---|
478 | // This function modifies 'data'!! |
---|
479 | // e.g. "..............................ACGT....................TGCA" |
---|
480 | // -> ".{30}ACGT.{20}TGCA" |
---|
481 | |
---|
482 | #if defined(DEBUG) |
---|
483 | size_t orgLen = strlen(data); |
---|
484 | #endif // DEBUG |
---|
485 | char *dataStart = data; |
---|
486 | char *dest = data; |
---|
487 | size_t repeat = 1; |
---|
488 | char last = *data++; |
---|
489 | |
---|
490 | while (last) { |
---|
491 | char curr = *data++; |
---|
492 | if (curr == last) { |
---|
493 | repeat++; |
---|
494 | } |
---|
495 | else { |
---|
496 | if (repeat >= 5) { |
---|
497 | dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count |
---|
498 | } |
---|
499 | else { |
---|
500 | size_t r; |
---|
501 | for (r = 0; r<repeat; r++) *dest++ = last; // insert plain |
---|
502 | } |
---|
503 | last = curr; |
---|
504 | repeat = 1; |
---|
505 | } |
---|
506 | } |
---|
507 | |
---|
508 | *dest = 0; |
---|
509 | |
---|
510 | #if defined(DEBUG) |
---|
511 | |
---|
512 | gb_assert(strlen(dataStart) <= orgLen); |
---|
513 | #endif // DEBUG |
---|
514 | return dest-dataStart; |
---|
515 | } |
---|
516 | |
---|
517 | |
---|
518 | // ------------------------------------------- |
---|
519 | // helper function for tagged fields |
---|
520 | |
---|
521 | static GB_ERROR g_bs_add_value_tag_to_hash(GBDATA *gb_main, GB_HASH *hash, char *tag, char *value, const char *rtag, const char *srt, const char *aci, GBDATA *gbd) { |
---|
522 | char *to_free = NULL; |
---|
523 | |
---|
524 | if (rtag && strcmp(tag, rtag) == 0) { |
---|
525 | if (srt) { |
---|
526 | value = to_free = GBS_string_eval(value, srt, gbd); |
---|
527 | } |
---|
528 | else if (aci) { |
---|
529 | value = to_free = GB_command_interpreter(gb_main, value, aci, gbd, 0); |
---|
530 | } |
---|
531 | if (!value) return GB_await_error(); |
---|
532 | } |
---|
533 | |
---|
534 | { |
---|
535 | char *p; |
---|
536 | p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{' |
---|
537 | p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}' |
---|
538 | } |
---|
539 | |
---|
540 | GB_HASH *sh = (GB_HASH *)GBS_read_hash(hash, value); |
---|
541 | if (!sh) { |
---|
542 | sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent |
---|
543 | GBS_write_hash(hash, value, (long)sh); |
---|
544 | } |
---|
545 | |
---|
546 | GBS_write_hash(sh, tag, 1); |
---|
547 | free(to_free); |
---|
548 | return NULL; |
---|
549 | } |
---|
550 | |
---|
551 | |
---|
552 | static GB_ERROR g_bs_convert_string_to_tagged_hash(GB_HASH *hash, char *s, char *default_tag, const char *del, |
---|
553 | GBDATA *gb_main, const char *rtag, const char *srt, const char *aci, GBDATA *gbd) { |
---|
554 | char *se; // string end |
---|
555 | char *sa; // string start and tag end |
---|
556 | char *ts; // tag start |
---|
557 | char *t; |
---|
558 | GB_ERROR error = 0; |
---|
559 | while (s && s[0]) { |
---|
560 | ts = strchr(s, '['); |
---|
561 | if (!ts) { |
---|
562 | error = g_bs_add_value_tag_to_hash(gb_main, hash, default_tag, s, rtag, srt, aci, gbd); // no tag found, use default tag |
---|
563 | if (error) break; |
---|
564 | break; |
---|
565 | } |
---|
566 | else { |
---|
567 | *(ts++) = 0; |
---|
568 | } |
---|
569 | sa = strchr(ts, ']'); |
---|
570 | if (sa) { |
---|
571 | *sa++ = 0; |
---|
572 | while (*sa == ' ') sa++; |
---|
573 | } |
---|
574 | else { |
---|
575 | error = g_bs_add_value_tag_to_hash(gb_main, hash, default_tag, s, rtag, srt, aci, gbd); // no tag found, use default tag |
---|
576 | if (error) break; |
---|
577 | break; |
---|
578 | } |
---|
579 | se = strchr(sa, '['); |
---|
580 | if (se) { |
---|
581 | while (se>sa && se[-1] == ' ') se--; |
---|
582 | *(se++) = 0; |
---|
583 | } |
---|
584 | for (t = strtok(ts, ","); t; t = strtok(0, ",")) { |
---|
585 | if (del && strcmp(t, del) == 0) continue; // test, whether to delete |
---|
586 | if (sa[0] == 0) continue; |
---|
587 | error = g_bs_add_value_tag_to_hash(gb_main, hash, t, sa, rtag, srt, aci, gbd); // tag found, use tag |
---|
588 | if (error) break; |
---|
589 | } |
---|
590 | s = se; |
---|
591 | } |
---|
592 | return error; |
---|
593 | } |
---|
594 | |
---|
595 | static long g_bs_merge_tags(const char *tag, long val, void *cd_sub_result) { |
---|
596 | GBS_strstruct *sub_result = (GBS_strstruct*)cd_sub_result; |
---|
597 | |
---|
598 | GBS_strcat(sub_result, tag); |
---|
599 | GBS_strcat(sub_result, ","); |
---|
600 | |
---|
601 | return val; |
---|
602 | } |
---|
603 | |
---|
604 | static long g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash) { |
---|
605 | static int counter = 0; |
---|
606 | |
---|
607 | GBS_strstruct *sub_result = GBS_stropen(100); |
---|
608 | GBS_hash_do_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result); |
---|
609 | GBS_intcat(sub_result, counter++); // create a unique number |
---|
610 | |
---|
611 | char *str = GBS_strclose(sub_result); |
---|
612 | |
---|
613 | GB_HASH *g_bs_collect_tags_hash = (GB_HASH*)cd_g_bs_collect_tags_hash; |
---|
614 | GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting |
---|
615 | |
---|
616 | free(str); |
---|
617 | return subhash; |
---|
618 | } |
---|
619 | |
---|
620 | static long g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result) { |
---|
621 | GBS_strstruct *merge_result = (GBS_strstruct*)cd_merge_result; |
---|
622 | |
---|
623 | char *lk = const_cast<char*>(strrchr(tag, ',')); |
---|
624 | if (lk) { // remove number at end |
---|
625 | *lk = 0; |
---|
626 | GBS_strcat(merge_result, " ["); |
---|
627 | GBS_strcat(merge_result, tag); |
---|
628 | GBS_strcat(merge_result, "] "); |
---|
629 | } |
---|
630 | GBS_strcat(merge_result, (char *)value); |
---|
631 | return value; |
---|
632 | } |
---|
633 | |
---|
634 | static char *g_bs_get_string_of_tag_hash(GB_HASH *tag_hash) { |
---|
635 | GBS_strstruct *merge_result = GBS_stropen(256); |
---|
636 | GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free); |
---|
637 | |
---|
638 | GBS_hash_do_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash |
---|
639 | GBS_hash_do_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result); |
---|
640 | |
---|
641 | GBS_free_hash(collect_tags_hash); |
---|
642 | return GBS_strclose(merge_result); |
---|
643 | } |
---|
644 | |
---|
645 | static long g_bs_free_hash_of_hashes_elem(const char */*key*/, long val, void *) { |
---|
646 | GB_HASH *hash = (GB_HASH*)val; |
---|
647 | if (hash) GBS_free_hash(hash); |
---|
648 | return 0; |
---|
649 | } |
---|
650 | static void g_bs_free_hash_of_hashes(GB_HASH *hash) { |
---|
651 | GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULL); |
---|
652 | GBS_free_hash(hash); |
---|
653 | } |
---|
654 | |
---|
655 | char *GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2) { |
---|
656 | /* Create a tagged string from two tagged strings: |
---|
657 | * a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string' |
---|
658 | * |
---|
659 | * if 's2' is not empty, then delete tag 'replace1' in 's1' |
---|
660 | * if 's1' is not empty, then delete tag 'replace2' in 's2' |
---|
661 | * |
---|
662 | * if result is NULL, an error has been exported. |
---|
663 | */ |
---|
664 | |
---|
665 | char *str1 = ARB_strdup(s1); |
---|
666 | char *str2 = ARB_strdup(s2); |
---|
667 | char *t1 = GBS_string_2_key(tag1); |
---|
668 | char *t2 = GBS_string_2_key(tag2); |
---|
669 | GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE); |
---|
670 | |
---|
671 | if (!s1[0]) replace2 = NULL; |
---|
672 | if (!s2[0]) replace1 = NULL; |
---|
673 | |
---|
674 | if (replace1 && !replace1[0]) replace1 = NULL; |
---|
675 | if (replace2 && !replace2[0]) replace2 = NULL; |
---|
676 | |
---|
677 | GB_ERROR error = g_bs_convert_string_to_tagged_hash(hash, str1, t1, replace1, 0, 0, 0, 0, 0); |
---|
678 | if (!error) error = g_bs_convert_string_to_tagged_hash(hash, str2, t2, replace2, 0, 0, 0, 0, 0); |
---|
679 | |
---|
680 | char *result = NULL; |
---|
681 | if (!error) { |
---|
682 | result = g_bs_get_string_of_tag_hash(hash); |
---|
683 | } |
---|
684 | else { |
---|
685 | GB_export_error(error); |
---|
686 | } |
---|
687 | |
---|
688 | g_bs_free_hash_of_hashes(hash); |
---|
689 | |
---|
690 | free(t2); |
---|
691 | free(t1); |
---|
692 | free(str2); |
---|
693 | free(str1); |
---|
694 | |
---|
695 | return result; |
---|
696 | } |
---|
697 | |
---|
698 | char *GBS_string_eval_tagged_string(GBDATA *gb_main, const char *s, const char *dt, const char *tag, const char *srt, const char *aci, GBDATA *gbd) { |
---|
699 | /* if 's' is untagged, tag it with default tag 'dt'. |
---|
700 | * if 'tag' is != NULL -> apply 'srt' or 'aci' to that part of the content of 's', which is tagged with 'tag' |
---|
701 | * |
---|
702 | * if result is NULL, an error has been exported. |
---|
703 | */ |
---|
704 | |
---|
705 | char *str = ARB_strdup(s); |
---|
706 | char *default_tag = GBS_string_2_key(dt); |
---|
707 | GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE); |
---|
708 | char *result = 0; |
---|
709 | GB_ERROR error = g_bs_convert_string_to_tagged_hash(hash, str, default_tag, 0, gb_main, tag, srt, aci, gbd); |
---|
710 | |
---|
711 | if (!error) { |
---|
712 | result = g_bs_get_string_of_tag_hash(hash); |
---|
713 | } |
---|
714 | else { |
---|
715 | GB_export_error(error); |
---|
716 | } |
---|
717 | |
---|
718 | g_bs_free_hash_of_hashes(hash); |
---|
719 | free(default_tag); |
---|
720 | free(str); |
---|
721 | |
---|
722 | return result; |
---|
723 | } |
---|
724 | |
---|
725 | |
---|
726 | char *GB_read_as_tagged_string(GBDATA *gbd, const char *tagi) { |
---|
727 | char *s; |
---|
728 | char *tag; |
---|
729 | char *buf; |
---|
730 | char *se; // string end |
---|
731 | char *sa; // string anfang and tag end |
---|
732 | char *ts; // tag start |
---|
733 | char *t; |
---|
734 | |
---|
735 | buf = s = GB_read_as_string(gbd); |
---|
736 | if (!s) return s; |
---|
737 | if (!tagi) return s; |
---|
738 | if (!strlen(tagi)) return s; |
---|
739 | |
---|
740 | tag = GBS_string_2_key(tagi); |
---|
741 | |
---|
742 | while (s) { |
---|
743 | ts = strchr(s, '['); |
---|
744 | if (!ts) goto notfound; // no tag |
---|
745 | |
---|
746 | *(ts++) = 0; |
---|
747 | |
---|
748 | sa = strchr(ts, ']'); |
---|
749 | if (!sa) goto notfound; |
---|
750 | |
---|
751 | *sa++ = 0; |
---|
752 | while (*sa == ' ') sa++; |
---|
753 | |
---|
754 | se = strchr(sa, '['); |
---|
755 | if (se) { |
---|
756 | while (se>sa && se[-1] == ' ') se--; |
---|
757 | *(se++) = 0; |
---|
758 | } |
---|
759 | for (t = strtok(ts, ","); t; t = strtok(0, ",")) { |
---|
760 | if (strcmp(t, tag) == 0) { |
---|
761 | s = ARB_strdup(sa); |
---|
762 | free(buf); |
---|
763 | goto found; |
---|
764 | } |
---|
765 | } |
---|
766 | s = se; |
---|
767 | } |
---|
768 | notfound : |
---|
769 | // Nothing found |
---|
770 | free(buf); |
---|
771 | s = 0; |
---|
772 | found : |
---|
773 | free(tag); |
---|
774 | return s; |
---|
775 | } |
---|
776 | |
---|
777 | |
---|
778 | /* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties) |
---|
779 | * used as well to save perl macros |
---|
780 | * |
---|
781 | * when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well |
---|
782 | * |
---|
783 | * always keep in mind, that many users have databases/macros written with older |
---|
784 | * versions of this function. They MUST load proper!!! |
---|
785 | */ |
---|
786 | void GBS_fwrite_string(const char *strngi, FILE *out) { |
---|
787 | unsigned char *strng = (unsigned char *)strngi; |
---|
788 | int c; |
---|
789 | |
---|
790 | putc('"', out); |
---|
791 | |
---|
792 | while ((c = *strng++)) { |
---|
793 | if (c < 32) { |
---|
794 | putc('\\', out); |
---|
795 | if (c == '\n') |
---|
796 | putc('n', out); |
---|
797 | else if (c == '\t') |
---|
798 | putc('t', out); |
---|
799 | else if (c<25) { |
---|
800 | putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above) |
---|
801 | } |
---|
802 | else { |
---|
803 | putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6 |
---|
804 | } |
---|
805 | } |
---|
806 | else if (c == '"') { |
---|
807 | putc('\\', out); |
---|
808 | putc('"', out); |
---|
809 | } |
---|
810 | else if (c == '\\') { |
---|
811 | putc('\\', out); |
---|
812 | putc('\\', out); |
---|
813 | } |
---|
814 | else { |
---|
815 | putc(c, out); |
---|
816 | } |
---|
817 | } |
---|
818 | putc('"', out); |
---|
819 | } |
---|
820 | |
---|
821 | /* Read a string from a file written by GBS_fwrite_string, |
---|
822 | * Searches first '"' |
---|
823 | * |
---|
824 | * WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string) |
---|
825 | * any changes should be done in GBS_fconvert_string too. |
---|
826 | */ |
---|
827 | |
---|
828 | static char *GBS_fread_string(FILE *in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused! |
---|
829 | GBS_strstruct *strstr = GBS_stropen(1024); |
---|
830 | int x; |
---|
831 | |
---|
832 | while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"' |
---|
833 | |
---|
834 | if (x != EOF) { |
---|
835 | while ((x = getc(in)) != '"') { |
---|
836 | if (x == EOF) break; |
---|
837 | if (x == '\\') { |
---|
838 | x = getc(in); if (x==EOF) break; |
---|
839 | if (x == 'n') { |
---|
840 | GBS_chrcat(strstr, '\n'); |
---|
841 | continue; |
---|
842 | } |
---|
843 | if (x == 't') { |
---|
844 | GBS_chrcat(strstr, '\t'); |
---|
845 | continue; |
---|
846 | } |
---|
847 | if (x>='@' && x <= '@' + 25) { |
---|
848 | GBS_chrcat(strstr, x-'@'); |
---|
849 | continue; |
---|
850 | } |
---|
851 | if (x>='0' && x <= '9') { |
---|
852 | GBS_chrcat(strstr, x-('0'-25)); |
---|
853 | continue; |
---|
854 | } |
---|
855 | // all other backslashes are simply skipped |
---|
856 | } |
---|
857 | GBS_chrcat(strstr, x); |
---|
858 | } |
---|
859 | } |
---|
860 | return GBS_strclose(strstr); |
---|
861 | } |
---|
862 | |
---|
863 | /* does similar decoding as GBS_fread_string but works directly on an existing buffer |
---|
864 | * (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!) |
---|
865 | * |
---|
866 | * inserts \0 behind decoded string (removes the closing '"') |
---|
867 | * returns a pointer behind the end (") of the _encoded_ string |
---|
868 | * returns NULL if a 0-character is found |
---|
869 | */ |
---|
870 | char *GBS_fconvert_string(char *buffer) { |
---|
871 | char *t = buffer; |
---|
872 | char *f = buffer; |
---|
873 | int x; |
---|
874 | |
---|
875 | gb_assert(f[-1] == '"'); |
---|
876 | // the opening " has already been read |
---|
877 | |
---|
878 | while ((x = *f++) != '"') { |
---|
879 | if (!x) break; |
---|
880 | |
---|
881 | if (x == '\\') { |
---|
882 | x = *f++; |
---|
883 | if (!x) break; |
---|
884 | |
---|
885 | if (x == 'n') { |
---|
886 | *t++ = '\n'; |
---|
887 | continue; |
---|
888 | } |
---|
889 | if (x == 't') { |
---|
890 | *t++ = '\t'; |
---|
891 | continue; |
---|
892 | } |
---|
893 | if (x>='@' && x <= '@' + 25) { |
---|
894 | *t++ = x-'@'; |
---|
895 | continue; |
---|
896 | } |
---|
897 | if (x>='0' && x <= '9') { |
---|
898 | *t++ = x-('0'-25); |
---|
899 | continue; |
---|
900 | } |
---|
901 | // all other backslashes are simply skipped |
---|
902 | } |
---|
903 | *t++ = x; |
---|
904 | } |
---|
905 | |
---|
906 | if (!x) return 0; // error (string should not contain 0-character) |
---|
907 | gb_assert(x == '"'); |
---|
908 | |
---|
909 | t[0] = 0; |
---|
910 | return f; |
---|
911 | } |
---|
912 | |
---|
913 | char *GBS_replace_tabs_by_spaces(const char *text) { |
---|
914 | int tlen = strlen(text); |
---|
915 | GBS_strstruct *mfile = GBS_stropen(tlen * 3/2 + 1); |
---|
916 | int tabpos = 0; |
---|
917 | int c; |
---|
918 | |
---|
919 | while ((c=*(text++))) { |
---|
920 | if (c == '\t') { |
---|
921 | int ntab = (tabpos + 8) & 0xfffff8; |
---|
922 | while (tabpos < ntab) { |
---|
923 | GBS_chrcat(mfile, ' '); |
---|
924 | tabpos++; |
---|
925 | } |
---|
926 | continue; |
---|
927 | } |
---|
928 | tabpos ++; |
---|
929 | if (c == '\n') { |
---|
930 | tabpos = 0; |
---|
931 | } |
---|
932 | GBS_chrcat(mfile, c); |
---|
933 | } |
---|
934 | return GBS_strclose(mfile); |
---|
935 | } |
---|
936 | |
---|
937 | char *GBS_trim(const char *str) { |
---|
938 | // trim whitespace at beginning and end of 'str' |
---|
939 | const char *whitespace = " \t\n"; |
---|
940 | while (str[0] && strchr(whitespace, str[0])) str++; |
---|
941 | |
---|
942 | const char *end = strchr(str, 0)-1; |
---|
943 | while (end >= str && strchr(whitespace, end[0])) end--; |
---|
944 | |
---|
945 | return ARB_strpartdup(str, end); |
---|
946 | } |
---|
947 | |
---|
948 | static char *dated_info(const char *info) { |
---|
949 | char *dated_info = 0; |
---|
950 | time_t date; |
---|
951 | if (time(&date) != -1) { |
---|
952 | char *dstr = ctime(&date); |
---|
953 | char *nl = strchr(dstr, '\n'); |
---|
954 | |
---|
955 | if (nl) nl[0] = 0; // cut off LF |
---|
956 | |
---|
957 | dated_info = GBS_global_string_copy("%s: %s", dstr, info); |
---|
958 | } |
---|
959 | else { |
---|
960 | dated_info = ARB_strdup(info); |
---|
961 | } |
---|
962 | return dated_info; |
---|
963 | } |
---|
964 | |
---|
965 | char *GBS_log_action_to(const char *comment, const char *action, bool stamp) { |
---|
966 | /*! concatenates 'comment' and 'action'. |
---|
967 | * '\n' is appended to existing 'comment' and/or 'action' (if missing). |
---|
968 | * @param comment may be NULL (=> result is 'action') |
---|
969 | * @param action may NOT be NULL |
---|
970 | * @param stamp true -> prefix current timestamp in front of 'action' |
---|
971 | * @return heap copy of concatenation |
---|
972 | */ |
---|
973 | size_t clen = comment ? strlen(comment) : 0; |
---|
974 | size_t alen = strlen(action); |
---|
975 | |
---|
976 | GBS_strstruct *new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2*\n + \0 + space for stamp |
---|
977 | |
---|
978 | if (comment) { |
---|
979 | GBS_strcat(new_comment, comment); |
---|
980 | if (clen == 0 || comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n'); |
---|
981 | } |
---|
982 | |
---|
983 | if (stamp) { |
---|
984 | char *dated_action = dated_info(action); |
---|
985 | GBS_strcat(new_comment, dated_action); |
---|
986 | free(dated_action); |
---|
987 | } |
---|
988 | else { |
---|
989 | GBS_strcat(new_comment, action); |
---|
990 | } |
---|
991 | if (alen == 0 || action[alen-1] != '\n') GBS_chrcat(new_comment, '\n'); |
---|
992 | |
---|
993 | return GBS_strclose(new_comment); |
---|
994 | } |
---|
995 | |
---|
996 | const char *GBS_funptr2readable(void *funptr, bool stripARBHOME) { |
---|
997 | // only returns module and offset for static functions :-( |
---|
998 | char **funNames = backtrace_symbols(&funptr, 1); |
---|
999 | const char *readable_fun = funNames[0]; |
---|
1000 | |
---|
1001 | if (stripARBHOME) { |
---|
1002 | const char *ARBHOME = GB_getenvARBHOME(); |
---|
1003 | if (ARB_strBeginsWith(readable_fun, ARBHOME)) { |
---|
1004 | readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME |
---|
1005 | } |
---|
1006 | } |
---|
1007 | return readable_fun; |
---|
1008 | } |
---|
1009 | |
---|
1010 | // -------------------------------------------------------------------------------- |
---|
1011 | |
---|
1012 | #ifdef UNIT_TESTS |
---|
1013 | |
---|
1014 | #include <test_unit.h> |
---|
1015 | |
---|
1016 | // #define TEST_TEST_MACROS |
---|
1017 | |
---|
1018 | #ifdef ENABLE_CRASH_TESTS |
---|
1019 | static void provokesegv() { raise(SIGSEGV); } |
---|
1020 | static void dont_provokesegv() {} |
---|
1021 | # if defined(ASSERTION_USED) |
---|
1022 | static void failassertion() { gb_assert(0); } |
---|
1023 | # if defined(TEST_TEST_MACROS) |
---|
1024 | static void dont_failassertion() {} |
---|
1025 | # endif |
---|
1026 | static void provokesegv_does_not_fail_assertion() { |
---|
1027 | // provokesegv does not raise assertion |
---|
1028 | // -> the following assertion fails |
---|
1029 | TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv); |
---|
1030 | } |
---|
1031 | # endif |
---|
1032 | #endif |
---|
1033 | |
---|
1034 | void TEST_signal_tests() { |
---|
1035 | // check whether we can test that no SEGV or assertion failure happened |
---|
1036 | TEST_EXPECT_NO_SEGFAULT(dont_provokesegv); |
---|
1037 | |
---|
1038 | // check whether we can test for SEGV and assertion failures |
---|
1039 | TEST_EXPECT_SEGFAULT(provokesegv); |
---|
1040 | TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion); |
---|
1041 | |
---|
1042 | // tests whether signal suppression works multiple times (by repeating tests) |
---|
1043 | TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion); |
---|
1044 | TEST_EXPECT_SEGFAULT(provokesegv); |
---|
1045 | |
---|
1046 | // test whether SEGV can be distinguished from assertion |
---|
1047 | TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion); |
---|
1048 | |
---|
1049 | // The following section is disabled, because it will |
---|
1050 | // provoke test warnings (to test these warnings). |
---|
1051 | // (enable it when changing any of these TEST_..-macros used here) |
---|
1052 | #if defined(TEST_TEST_MACROS) |
---|
1053 | TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv); |
---|
1054 | |
---|
1055 | TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv); |
---|
1056 | TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv); |
---|
1057 | #if defined(ASSERTION_USED) |
---|
1058 | TEST_EXPECT_SEGFAULT__UNWANTED(failassertion); |
---|
1059 | #endif |
---|
1060 | |
---|
1061 | TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion); |
---|
1062 | TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion); |
---|
1063 | TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion); |
---|
1064 | #endif |
---|
1065 | } |
---|
1066 | |
---|
1067 | #define EXPECT_CONTENT(content) TEST_EXPECT_EQUAL(GBS_mempntr(strstr), content) |
---|
1068 | |
---|
1069 | void TEST_GBS_strstruct() { |
---|
1070 | { |
---|
1071 | GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT(""); |
---|
1072 | |
---|
1073 | GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb"); |
---|
1074 | GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17"); |
---|
1075 | GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_"); |
---|
1076 | GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000"); |
---|
1077 | |
---|
1078 | TEST_EXPECT_EQUAL(GBS_memoffset(strstr), 14); |
---|
1079 | GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b"); |
---|
1080 | GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter"); |
---|
1081 | GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly"); |
---|
1082 | |
---|
1083 | GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters"); |
---|
1084 | EXPECT_CONTENT("butterfly flutters"); |
---|
1085 | |
---|
1086 | GBS_strforget(strstr); |
---|
1087 | } |
---|
1088 | { |
---|
1089 | // re-alloc smaller |
---|
1090 | GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT(""); |
---|
1091 | GBS_strforget(strstr); |
---|
1092 | } |
---|
1093 | |
---|
1094 | // trigger downsize of oversized block |
---|
1095 | for (int i = 0; i<12; ++i) { |
---|
1096 | GBS_strstruct *strstr = GBS_stropen(10); |
---|
1097 | GBS_strforget(strstr); |
---|
1098 | } |
---|
1099 | |
---|
1100 | { |
---|
1101 | GBS_strstruct *strstr = GBS_stropen(10); |
---|
1102 | size_t oldbufsize = strstr->get_buffer_size(); |
---|
1103 | GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer |
---|
1104 | |
---|
1105 | TEST_EXPECT_DIFFERENT(oldbufsize, strstr->get_buffer_size()); // did we reallocate? |
---|
1106 | EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx"); |
---|
1107 | GBS_strforget(strstr); |
---|
1108 | } |
---|
1109 | } |
---|
1110 | |
---|
1111 | #define TEST_SHORTENED_EQUALS(Long,Short) do { \ |
---|
1112 | char *buf = ARB_strdup(Long); \ |
---|
1113 | GBS_shorten_repeated_data(buf); \ |
---|
1114 | TEST_EXPECT_EQUAL(buf, Short); \ |
---|
1115 | free(buf); \ |
---|
1116 | } while(0) |
---|
1117 | |
---|
1118 | void TEST_GBS_shorten_repeated_data() { |
---|
1119 | TEST_SHORTENED_EQUALS("12345", "12345"); |
---|
1120 | TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc"); |
---|
1121 | TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc"); |
---|
1122 | TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc"); |
---|
1123 | TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc"); |
---|
1124 | TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc"); |
---|
1125 | TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc"); |
---|
1126 | TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc"); |
---|
1127 | TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc"); |
---|
1128 | TEST_SHORTENED_EQUALS("aaaabc", "aaaabc"); |
---|
1129 | TEST_SHORTENED_EQUALS("aaabc", "aaabc"); |
---|
1130 | TEST_SHORTENED_EQUALS("aabc", "aabc"); |
---|
1131 | TEST_SHORTENED_EQUALS("", ""); |
---|
1132 | } |
---|
1133 | |
---|
1134 | static const char *hkey_format[] = { |
---|
1135 | "/%s/bbb/ccc", |
---|
1136 | "/aaa/%s/ccc", |
---|
1137 | "/aaa/bbb/%s", |
---|
1138 | }; |
---|
1139 | |
---|
1140 | inline const char *useInHkey(const char *fragment, size_t pos) { |
---|
1141 | return GBS_global_string(hkey_format[pos], fragment); |
---|
1142 | } |
---|
1143 | |
---|
1144 | #define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \ |
---|
1145 | for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \ |
---|
1146 | TEST_EXPECT_NO_ERROR(GB_check_hkey(useInHkey(use, i))); \ |
---|
1147 | } \ |
---|
1148 | } while(0) |
---|
1149 | |
---|
1150 | #define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \ |
---|
1151 | for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \ |
---|
1152 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(useInHkey(use, i)), contains); \ |
---|
1153 | } \ |
---|
1154 | } while(0) |
---|
1155 | |
---|
1156 | |
---|
1157 | void TEST_DB_key_checks() { |
---|
1158 | // plain keys |
---|
1159 | const char *shortest = "ab"; |
---|
1160 | const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345"; |
---|
1161 | const char *too_short = shortest+1; |
---|
1162 | const char *longest = too_long+1; |
---|
1163 | |
---|
1164 | const char *empty = ""; |
---|
1165 | const char *slash = "sub/key"; |
---|
1166 | const char *comma = "no,key"; |
---|
1167 | const char *minus = "no-key"; |
---|
1168 | |
---|
1169 | TEST_EXPECT_NO_ERROR(GB_check_key(shortest)); |
---|
1170 | TEST_EXPECT_NO_ERROR(GB_check_key(longest)); |
---|
1171 | |
---|
1172 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short"); |
---|
1173 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long"); |
---|
1174 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed"); |
---|
1175 | |
---|
1176 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character"); |
---|
1177 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character"); |
---|
1178 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character"); |
---|
1179 | |
---|
1180 | // hierarchical keys |
---|
1181 | TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest); |
---|
1182 | TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest); |
---|
1183 | |
---|
1184 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short"); |
---|
1185 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long"); |
---|
1186 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed"); |
---|
1187 | |
---|
1188 | TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash); |
---|
1189 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','"); |
---|
1190 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "'>' expected after '-'"); |
---|
1191 | } |
---|
1192 | |
---|
1193 | #define TEST_STRING2KEY(str,expected) do { \ |
---|
1194 | char *as_key = GBS_string_2_key(str); \ |
---|
1195 | TEST_EXPECT_EQUAL(as_key, expected); \ |
---|
1196 | TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \ |
---|
1197 | free(as_key); \ |
---|
1198 | } while(0) |
---|
1199 | |
---|
1200 | void TEST_DB_key_generation() { |
---|
1201 | TEST_STRING2KEY("abc", "abc"); |
---|
1202 | TEST_STRING2KEY("a b c", "a_b_c"); |
---|
1203 | |
---|
1204 | // invalid chars |
---|
1205 | TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(", |
---|
1206 | "string_containing_doublequotes_quotes_and_othershit"); |
---|
1207 | |
---|
1208 | // length tests |
---|
1209 | TEST_STRING2KEY("a", "a_"); // too short |
---|
1210 | TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long |
---|
1211 | "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); |
---|
1212 | } |
---|
1213 | |
---|
1214 | #define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \ |
---|
1215 | char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \ |
---|
1216 | TEST_EXPECT_EQUAL(result, expected); \ |
---|
1217 | free(result); \ |
---|
1218 | } while(0) |
---|
1219 | |
---|
1220 | #define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \ |
---|
1221 | char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \ |
---|
1222 | TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \ |
---|
1223 | free(result); \ |
---|
1224 | } while(0) |
---|
1225 | |
---|
1226 | void TEST_merge_tagged_strings() { |
---|
1227 | // merge two fields: |
---|
1228 | TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", " [D_] dest [S_] source"); // @@@ elim leading space? |
---|
1229 | TEST_MERGE_TAGGED("SRC", "DST", "", 0, "source", "dest", " [DST] dest [SRC] source"); |
---|
1230 | TEST_MERGE_TAGGED("SRC", "DST", 0, "", "source", "dest", " [DST] dest [SRC] source"); |
---|
1231 | TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "sth", "sth", " [DST,SRC] sth"); |
---|
1232 | |
---|
1233 | // update fields: |
---|
1234 | TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "newsource", " [DST] dest [SRC] source", " [DST] dest [SRC] newsource"); |
---|
1235 | TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "newsource", " [DST,SRC] sth", " [DST] sth [SRC] newsource"); |
---|
1236 | TEST_MERGE_TAGGED("SRC", "DST", 0, "SRC", "sth", " [DST] sth [SRC] source", " [DST,SRC] sth"); |
---|
1237 | |
---|
1238 | // append (opposed to update this keeps old entries with same tag; useless?) |
---|
1239 | TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "newsource", "[DST] dest [SRC] source", " [DST] dest [SRC] newsource [SRC] source"); |
---|
1240 | TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "newsource", "[DST,SRC] sth", " [DST,SRC] sth [SRC] newsource"); |
---|
1241 | TEST_MERGE_TAGGED("SRC", "DST", 0, 0, "sth", "[DST] sth [SRC] source", " [DST,SRC] sth [SRC] source"); |
---|
1242 | |
---|
1243 | // merge three fields: |
---|
1244 | TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "oth", " [DST] dest [SRC] source", " [DST] dest [OTH] oth [SRC] source"); |
---|
1245 | TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "oth", " [DST,SRC] sth", " [DST,SRC] sth [OTH] oth"); |
---|
1246 | TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "sth", " [DST,SRC] sth", " [DST,OTH,SRC] sth"); |
---|
1247 | TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "dest", " [DST] dest [SRC] source", " [DST,OTH] dest [SRC] source"); |
---|
1248 | TEST_MERGE_TAGGED("OTH", "DST", 0, 0, "source", " [DST] dest [SRC] source", " [DST] dest [OTH,SRC] source"); |
---|
1249 | |
---|
1250 | // same tests as in section above, but vv |
---|
1251 | TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST] dest [SRC] source", "oth", " [DST] dest [OTH] oth [SRC] source"); |
---|
1252 | TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST,SRC] sth", "oth", " [DST,SRC] sth [OTH] oth"); |
---|
1253 | TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST,SRC] sth", "sth", " [DST,OTH,SRC] sth"); |
---|
1254 | TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST] dest [SRC] source", "dest", " [DST,OTH] dest [SRC] source"); |
---|
1255 | TEST_MERGE_TAGGED("DST", "OTH", 0, 0, " [DST] dest [SRC] source", "source", " [DST] dest [OTH,SRC] source"); |
---|
1256 | } |
---|
1257 | |
---|
1258 | void TEST_log_action() { |
---|
1259 | for (int stamped = 0; stamped<=1; ++stamped) { |
---|
1260 | TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped)); |
---|
1261 | { |
---|
1262 | char *logged = GBS_log_action_to("comment", "action", stamped); |
---|
1263 | if (stamped) { |
---|
1264 | TEST_EXPECT_CONTAINS(logged, "comment\n"); |
---|
1265 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
---|
1266 | } |
---|
1267 | else { |
---|
1268 | TEST_EXPECT_EQUAL(logged, "comment\naction\n"); |
---|
1269 | } |
---|
1270 | free(logged); |
---|
1271 | } |
---|
1272 | { |
---|
1273 | char *logged = GBS_log_action_to("comment\n", "action", stamped); |
---|
1274 | if (stamped) { |
---|
1275 | TEST_EXPECT_CONTAINS(logged, "comment\n"); |
---|
1276 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
---|
1277 | } |
---|
1278 | else { |
---|
1279 | TEST_EXPECT_EQUAL(logged, "comment\naction\n"); |
---|
1280 | } |
---|
1281 | free(logged); |
---|
1282 | } |
---|
1283 | { |
---|
1284 | char *logged = GBS_log_action_to("", "action", stamped); |
---|
1285 | if (stamped) { |
---|
1286 | TEST_EXPECT_EQUAL(logged[0], '\n'); |
---|
1287 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
---|
1288 | } |
---|
1289 | else { |
---|
1290 | TEST_EXPECT_EQUAL(logged, "\naction\n"); |
---|
1291 | } |
---|
1292 | free(logged); |
---|
1293 | } |
---|
1294 | { |
---|
1295 | char *logged = GBS_log_action_to(NULL, "action\n", stamped); // test action with trailing LF |
---|
1296 | if (stamped) { |
---|
1297 | TEST_EXPECT_DIFFERENT(logged[0], '\n'); |
---|
1298 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
---|
1299 | } |
---|
1300 | else { |
---|
1301 | TEST_EXPECT_EQUAL(logged, "action\n"); |
---|
1302 | } |
---|
1303 | free(logged); |
---|
1304 | } |
---|
1305 | } |
---|
1306 | } |
---|
1307 | TEST_PUBLISH(TEST_log_action); |
---|
1308 | |
---|
1309 | #endif // UNIT_TESTS |
---|
1310 | |
---|