1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : adstring.cxx // |
---|
4 | // Purpose : various string functions // |
---|
5 | // // |
---|
6 | // Institute of Microbiology (Technical University Munich) // |
---|
7 | // http://www.arb-home.de/ // |
---|
8 | // // |
---|
9 | // =============================================================== // |
---|
10 | |
---|
11 | #include <arb_backtrace.h> |
---|
12 | #include <arb_strbuf.h> |
---|
13 | #include <arb_sort.h> |
---|
14 | |
---|
15 | #include "gb_key.h" |
---|
16 | |
---|
17 | #include <SigHandler.h> |
---|
18 | |
---|
19 | #include <execinfo.h> |
---|
20 | |
---|
21 | #include <cstdarg> |
---|
22 | #include <cctype> |
---|
23 | #include <cerrno> |
---|
24 | #include <ctime> |
---|
25 | #include <setjmp.h> |
---|
26 | |
---|
27 | #include <valgrind.h> |
---|
28 | |
---|
29 | static char *GBS_string_2_key_with_exclusions(const char *str, const char *additional) { |
---|
30 | // converts any string to a valid key (all chars in 'additional' are additionally allowed) |
---|
31 | char buf[GB_KEY_LEN_MAX+1]; |
---|
32 | int i; |
---|
33 | int c; |
---|
34 | for (i=0; i<GB_KEY_LEN_MAX;) { |
---|
35 | c = *(str++); |
---|
36 | if (!c) break; |
---|
37 | |
---|
38 | if (c==' ' || c == '_') { |
---|
39 | buf[i++] = '_'; |
---|
40 | } |
---|
41 | else if (isalnum(c) || strchr(additional, c) != 0) { |
---|
42 | buf[i++] = c; |
---|
43 | } |
---|
44 | } |
---|
45 | for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_'; |
---|
46 | buf[i] = 0; |
---|
47 | return strdup(buf); |
---|
48 | } |
---|
49 | |
---|
50 | char *GBS_string_2_key(const char *str) // converts any string to a valid key |
---|
51 | { |
---|
52 | return GBS_string_2_key_with_exclusions(str, ""); |
---|
53 | } |
---|
54 | |
---|
55 | char *GB_memdup(const char *source, size_t len) { |
---|
56 | char *dest = (char *)malloc(len); |
---|
57 | memcpy(dest, source, len); |
---|
58 | return dest; |
---|
59 | } |
---|
60 | |
---|
61 | GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT |
---|
62 | // test whether all characters are letters, numbers or _ |
---|
63 | int i; |
---|
64 | long len; |
---|
65 | |
---|
66 | if (!key || key[0] == 0) return "Empty key is not allowed"; |
---|
67 | len = strlen(key); |
---|
68 | if (len>GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key); |
---|
69 | if (len < GB_KEY_LEN_MIN) return GBS_global_string("Invalid key '%s': too short", key); |
---|
70 | |
---|
71 | for (i = 0; key[i]; ++i) { |
---|
72 | char c = key[i]; |
---|
73 | if ((c>='a') && (c<='z')) continue; |
---|
74 | if ((c>='A') && (c<='Z')) continue; |
---|
75 | if ((c>='0') && (c<='9')) continue; |
---|
76 | if ((c=='_')) continue; |
---|
77 | return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key); |
---|
78 | } |
---|
79 | |
---|
80 | return 0; |
---|
81 | } |
---|
82 | GB_ERROR GB_check_link_name(const char *key) { // goes to header: __ATTR__USERESULT |
---|
83 | // test whether all characters are letters, numbers or _ |
---|
84 | int i; |
---|
85 | long len; |
---|
86 | |
---|
87 | if (!key || key[0] == 0) return GB_export_error("Empty key is not allowed"); |
---|
88 | len = strlen(key); |
---|
89 | if (len>GB_KEY_LEN_MAX) return GB_export_errorf("Invalid key '%s': too long", key); |
---|
90 | if (len < 1) return GB_export_errorf("Invalid key '%s': too short", key); // here it differs from GB_check_key |
---|
91 | |
---|
92 | for (i = 0; key[i]; ++i) { |
---|
93 | char c = key[i]; |
---|
94 | if ((c>='a') && (c<='z')) continue; |
---|
95 | if ((c>='A') && (c<='Z')) continue; |
---|
96 | if ((c>='0') && (c<='9')) continue; |
---|
97 | if ((c=='_')) continue; |
---|
98 | return GB_export_errorf("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key); |
---|
99 | } |
---|
100 | |
---|
101 | return 0; |
---|
102 | } |
---|
103 | GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT |
---|
104 | // test whether all characters are letters, numbers or _ |
---|
105 | // additionally allow '/' and '->' for hierarchical keys |
---|
106 | GB_ERROR err = 0; |
---|
107 | |
---|
108 | if (!key || key[0] == 0) { |
---|
109 | err = GB_export_error("Empty key is not allowed"); |
---|
110 | } |
---|
111 | else if (!strpbrk(key, "/-")) { |
---|
112 | err = GB_check_key(key); |
---|
113 | } |
---|
114 | else { |
---|
115 | char *key_copy = strdup(key); |
---|
116 | char *start = key_copy; |
---|
117 | |
---|
118 | if (start[0] == '/') ++start; |
---|
119 | |
---|
120 | while (start && !err) { |
---|
121 | char *key_end = strpbrk(start, "/-"); |
---|
122 | |
---|
123 | if (key_end) { |
---|
124 | char c = *key_end; |
---|
125 | *key_end = 0; |
---|
126 | err = GB_check_key(start); |
---|
127 | *key_end = c; |
---|
128 | |
---|
129 | if (c == '-') { |
---|
130 | if (key_end[1] != '>') { |
---|
131 | err = GB_export_errorf("'>' expected after '-' in '%s'", key); |
---|
132 | } |
---|
133 | start = key_end+2; |
---|
134 | } |
---|
135 | else { |
---|
136 | gb_assert(c == '/'); |
---|
137 | start = key_end+1; |
---|
138 | } |
---|
139 | } |
---|
140 | else { |
---|
141 | err = GB_check_key(start); |
---|
142 | start = 0; |
---|
143 | } |
---|
144 | } |
---|
145 | |
---|
146 | free(key_copy); |
---|
147 | } |
---|
148 | |
---|
149 | return err; |
---|
150 | } |
---|
151 | |
---|
152 | // --------------------------- |
---|
153 | // escape characters |
---|
154 | |
---|
155 | char *GBS_remove_escape(char *com) // \ is the escape character |
---|
156 | |
---|
157 | { |
---|
158 | char *result, *s, *d; |
---|
159 | int ch; |
---|
160 | |
---|
161 | s = d = result = strdup(com); |
---|
162 | while ((ch = *(s++))) { |
---|
163 | switch (ch) { |
---|
164 | case '\\': |
---|
165 | ch = *(s++); if (!ch) { s--; break; }; |
---|
166 | switch (ch) { |
---|
167 | case 'n': *(d++) = '\n'; break; |
---|
168 | case 't': *(d++) = '\t'; break; |
---|
169 | case '0': *(d++) = '\0'; break; |
---|
170 | default: *(d++) = ch; break; |
---|
171 | } |
---|
172 | break; |
---|
173 | default: |
---|
174 | *(d++) = ch; |
---|
175 | } |
---|
176 | } |
---|
177 | *d = 0; |
---|
178 | return result; |
---|
179 | } |
---|
180 | |
---|
181 | // ---------------------------------------------- |
---|
182 | // escape/unescape characters in strings |
---|
183 | |
---|
184 | char *GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char) { |
---|
185 | /*! escape characters in 'str' |
---|
186 | * |
---|
187 | * uses a special escape-method, which eliminates all 'chars_to_escape' completely |
---|
188 | * from str (this makes further processing of the string more easy) |
---|
189 | * |
---|
190 | * @param escape_char is the character used for escaping. For performance reasons it |
---|
191 | * should be a character rarely used in 'str'. |
---|
192 | * |
---|
193 | * @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping) |
---|
194 | * and it may not be longer than 26 bytes |
---|
195 | * |
---|
196 | * @return heap copy of escaped string |
---|
197 | * |
---|
198 | * Inverse of GBS_unescape_string() |
---|
199 | */ |
---|
200 | |
---|
201 | int len = strlen(str); |
---|
202 | char *buffer = (char*)malloc(2*len+1); |
---|
203 | int j = 0; |
---|
204 | int i; |
---|
205 | |
---|
206 | gb_assert(strlen(chars_to_escape) <= 26); |
---|
207 | gb_assert(strchr(chars_to_escape, escape_char) == 0); // escape_char may not be included in chars_to_escape |
---|
208 | |
---|
209 | for (i = 0; str[i]; ++i) { |
---|
210 | if (str[i] == escape_char) { |
---|
211 | buffer[j++] = escape_char; |
---|
212 | buffer[j++] = escape_char; |
---|
213 | } |
---|
214 | else { |
---|
215 | const char *found = strchr(chars_to_escape, str[i]); |
---|
216 | if (found) { |
---|
217 | buffer[j++] = escape_char; |
---|
218 | buffer[j++] = (found-chars_to_escape+'A'); |
---|
219 | |
---|
220 | gb_assert(found[0]<'A' || found[0]>'Z'); // illegal character in chars_to_escape |
---|
221 | } |
---|
222 | else { |
---|
223 | |
---|
224 | buffer[j++] = str[i]; |
---|
225 | } |
---|
226 | } |
---|
227 | } |
---|
228 | buffer[j] = 0; |
---|
229 | |
---|
230 | return buffer; |
---|
231 | } |
---|
232 | |
---|
233 | char *GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char) { |
---|
234 | //! inverse of GB_escape_string() - for params see there |
---|
235 | |
---|
236 | int len = strlen(str); |
---|
237 | char *buffer = (char*)malloc(len+1); |
---|
238 | int j = 0; |
---|
239 | int i; |
---|
240 | |
---|
241 | #if defined(ASSERTION_USED) |
---|
242 | int escaped_chars_len = strlen(escaped_chars); |
---|
243 | #endif // ASSERTION_USED |
---|
244 | |
---|
245 | gb_assert(strlen(escaped_chars) <= 26); |
---|
246 | gb_assert(strchr(escaped_chars, escape_char) == 0); // escape_char may not be included in chars_to_escape |
---|
247 | |
---|
248 | for (i = 0; str[i]; ++i) { |
---|
249 | if (str[i] == escape_char) { |
---|
250 | if (str[i+1] == escape_char) { |
---|
251 | buffer[j++] = escape_char; |
---|
252 | } |
---|
253 | else { |
---|
254 | int idx = str[i+1]-'A'; |
---|
255 | |
---|
256 | gb_assert(idx >= 0 && idx<escaped_chars_len); |
---|
257 | buffer[j++] = escaped_chars[idx]; |
---|
258 | } |
---|
259 | ++i; |
---|
260 | } |
---|
261 | else { |
---|
262 | buffer[j++] = str[i]; |
---|
263 | } |
---|
264 | } |
---|
265 | buffer[j] = 0; |
---|
266 | |
---|
267 | return buffer; |
---|
268 | } |
---|
269 | |
---|
270 | char *GBS_eval_env(GB_CSTR p) { |
---|
271 | GB_ERROR error = 0; |
---|
272 | GB_CSTR ka; |
---|
273 | GBS_strstruct *out = GBS_stropen(1000); |
---|
274 | |
---|
275 | while ((ka = GBS_find_string(p, "$(", 0))) { |
---|
276 | GB_CSTR kz = strchr(ka, ')'); |
---|
277 | if (!kz) { |
---|
278 | error = GBS_global_string("missing ')' for envvar '%s'", p); |
---|
279 | break; |
---|
280 | } |
---|
281 | else { |
---|
282 | char *envvar = GB_strpartdup(ka+2, kz-1); |
---|
283 | int len = ka-p; |
---|
284 | |
---|
285 | if (len) GBS_strncat(out, p, len); |
---|
286 | |
---|
287 | GB_CSTR genv = GB_getenv(envvar); |
---|
288 | if (genv) GBS_strcat(out, genv); |
---|
289 | |
---|
290 | p = kz+1; |
---|
291 | free(envvar); |
---|
292 | } |
---|
293 | } |
---|
294 | |
---|
295 | if (error) { |
---|
296 | GB_export_error(error); |
---|
297 | GBS_strforget(out); |
---|
298 | return 0; |
---|
299 | } |
---|
300 | |
---|
301 | GBS_strcat(out, p); // copy rest |
---|
302 | return GBS_strclose(out); |
---|
303 | } |
---|
304 | |
---|
305 | long GBS_gcgchecksum(const char *seq) |
---|
306 | // GCGchecksum |
---|
307 | { |
---|
308 | long i; |
---|
309 | long check = 0; |
---|
310 | long count = 0; |
---|
311 | long seqlen = strlen(seq); |
---|
312 | |
---|
313 | for (i = 0; i < seqlen; i++) { |
---|
314 | count++; |
---|
315 | check += count * toupper(seq[i]); |
---|
316 | if (count == 57) count = 0; |
---|
317 | } |
---|
318 | check %= 10000; |
---|
319 | |
---|
320 | return check; |
---|
321 | } |
---|
322 | |
---|
323 | // Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source) |
---|
324 | uint32_t crctab[] = { |
---|
325 | 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, |
---|
326 | 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, |
---|
327 | 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, |
---|
328 | 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, |
---|
329 | 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, |
---|
330 | 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, |
---|
331 | 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, |
---|
332 | 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, |
---|
333 | 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, |
---|
334 | 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, |
---|
335 | 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, |
---|
336 | 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, |
---|
337 | 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, |
---|
338 | 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, |
---|
339 | 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, |
---|
340 | 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, |
---|
341 | 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, |
---|
342 | 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, |
---|
343 | 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, |
---|
344 | 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, |
---|
345 | 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, |
---|
346 | 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, |
---|
347 | 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, |
---|
348 | 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, |
---|
349 | 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, |
---|
350 | 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, |
---|
351 | 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, |
---|
352 | 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, |
---|
353 | 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, |
---|
354 | 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, |
---|
355 | 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, |
---|
356 | 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, |
---|
357 | 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, |
---|
358 | 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, |
---|
359 | 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, |
---|
360 | 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, |
---|
361 | 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, |
---|
362 | 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, |
---|
363 | 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, |
---|
364 | 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, |
---|
365 | 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, |
---|
366 | 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, |
---|
367 | 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, |
---|
368 | 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, |
---|
369 | 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, |
---|
370 | 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, |
---|
371 | 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, |
---|
372 | 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, |
---|
373 | 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, |
---|
374 | 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, |
---|
375 | 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, |
---|
376 | 0x2d02ef8dL |
---|
377 | }; |
---|
378 | |
---|
379 | uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude) // RALF: 02-12-96 |
---|
380 | { |
---|
381 | /* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source |
---|
382 | * if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too) |
---|
383 | */ |
---|
384 | |
---|
385 | unsigned long c = 0xffffffffL; |
---|
386 | long n = length; |
---|
387 | int i; |
---|
388 | int tab[256]; |
---|
389 | |
---|
390 | for (i=0; i<256; i++) { |
---|
391 | tab[i] = ignore_case ? toupper(i) : i; |
---|
392 | } |
---|
393 | |
---|
394 | if (exclude) { |
---|
395 | while (1) { |
---|
396 | int k = *(unsigned char *)exclude++; |
---|
397 | if (!k) break; |
---|
398 | tab[k] = 0; |
---|
399 | if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0; |
---|
400 | } |
---|
401 | } |
---|
402 | |
---|
403 | while (n--) { |
---|
404 | i = tab[*(const unsigned char *)seq++]; |
---|
405 | if (i) { |
---|
406 | c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8); |
---|
407 | } |
---|
408 | } |
---|
409 | c = c ^ 0xffffffffL; |
---|
410 | return c; |
---|
411 | } |
---|
412 | |
---|
413 | uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude) |
---|
414 | // if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too) |
---|
415 | { |
---|
416 | return GB_checksum(seq, strlen(seq), ignore_case, exclude); |
---|
417 | } |
---|
418 | |
---|
419 | /* extract all words in a text that: |
---|
420 | 1. minlen < 1.0 contain more than minlen*len_of_text characters that also exists in chars |
---|
421 | 2. minlen > 1.0 contain more than minlen characters that also exists in chars |
---|
422 | */ |
---|
423 | |
---|
424 | char *GBS_extract_words(const char *source, const char *chars, float minlen, bool sort_output) { |
---|
425 | char *s = strdup(source); |
---|
426 | char **ps = (char **)GB_calloc(sizeof(char *), (strlen(source)>>1) + 1); |
---|
427 | GBS_strstruct *strstruct = GBS_stropen(1000); |
---|
428 | char *f = s; |
---|
429 | int count = 0; |
---|
430 | char *p; |
---|
431 | char *h; |
---|
432 | int cnt; |
---|
433 | int len; |
---|
434 | int iminlen = (int) (minlen+.5); |
---|
435 | |
---|
436 | while ((p = strtok(f, " \t,;:|"))) { |
---|
437 | f = 0; |
---|
438 | cnt = 0; |
---|
439 | len = strlen(p); |
---|
440 | for (h=p; *h; h++) { |
---|
441 | if (strchr(chars, *h)) cnt++; |
---|
442 | } |
---|
443 | |
---|
444 | if (minlen == 1.0) { |
---|
445 | if (cnt != len) continue; |
---|
446 | } |
---|
447 | else if (minlen > 1.0) { |
---|
448 | if (cnt < iminlen) continue; |
---|
449 | } |
---|
450 | else { |
---|
451 | if (len < 3 || cnt < minlen*len) continue; |
---|
452 | } |
---|
453 | ps[count] = p; |
---|
454 | count ++; |
---|
455 | } |
---|
456 | if (sort_output) { |
---|
457 | GB_sort((void **)ps, 0, count, GB_string_comparator, 0); |
---|
458 | } |
---|
459 | for (cnt = 0; cnt<count; cnt++) { |
---|
460 | if (cnt) { |
---|
461 | GBS_chrcat(strstruct, ' '); |
---|
462 | } |
---|
463 | GBS_strcat(strstruct, ps[cnt]); |
---|
464 | } |
---|
465 | |
---|
466 | free(ps); |
---|
467 | free(s); |
---|
468 | return GBS_strclose(strstruct); |
---|
469 | } |
---|
470 | |
---|
471 | |
---|
472 | size_t GBS_shorten_repeated_data(char *data) { |
---|
473 | // shortens repeats in 'data' |
---|
474 | // This function modifies 'data'!! |
---|
475 | // e.g. "..............................ACGT....................TGCA" |
---|
476 | // -> ".{30}ACGT.{20}TGCA" |
---|
477 | |
---|
478 | #if defined(DEBUG) |
---|
479 | size_t orgLen = strlen(data); |
---|
480 | #endif // DEBUG |
---|
481 | char *dataStart = data; |
---|
482 | char *dest = data; |
---|
483 | size_t repeat = 1; |
---|
484 | char last = *data++; |
---|
485 | |
---|
486 | while (last) { |
---|
487 | char curr = *data++; |
---|
488 | if (curr == last) { |
---|
489 | repeat++; |
---|
490 | } |
---|
491 | else { |
---|
492 | if (repeat >= 5) { |
---|
493 | dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count |
---|
494 | } |
---|
495 | else { |
---|
496 | size_t r; |
---|
497 | for (r = 0; r<repeat; r++) *dest++ = last; // insert plain |
---|
498 | } |
---|
499 | last = curr; |
---|
500 | repeat = 1; |
---|
501 | } |
---|
502 | } |
---|
503 | |
---|
504 | *dest = 0; |
---|
505 | |
---|
506 | #if defined(DEBUG) |
---|
507 | |
---|
508 | gb_assert(strlen(dataStart) <= orgLen); |
---|
509 | #endif // DEBUG |
---|
510 | return dest-dataStart; |
---|
511 | } |
---|
512 | |
---|
513 | |
---|
514 | // ------------------------------------------- |
---|
515 | // helper function for tagged fields |
---|
516 | |
---|
517 | static GB_ERROR g_bs_add_value_tag_to_hash(GBDATA *gb_main, GB_HASH *hash, char *tag, char *value, const char *rtag, const char *srt, const char *aci, GBDATA *gbd) { |
---|
518 | char *p; |
---|
519 | GB_HASH *sh; |
---|
520 | char *to_free = 0; |
---|
521 | if (rtag && strcmp(tag, rtag) == 0) { |
---|
522 | if (srt) { |
---|
523 | value = to_free = GBS_string_eval(value, srt, gbd); |
---|
524 | } |
---|
525 | else if (aci) { |
---|
526 | value = to_free = GB_command_interpreter(gb_main, value, aci, gbd, 0); |
---|
527 | } |
---|
528 | if (!value) return GB_await_error(); |
---|
529 | } |
---|
530 | |
---|
531 | p=value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{' |
---|
532 | p=value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}' |
---|
533 | |
---|
534 | sh = (GB_HASH *)GBS_read_hash(hash, value); |
---|
535 | if (!sh) { |
---|
536 | sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent |
---|
537 | GBS_write_hash(hash, value, (long)sh); |
---|
538 | } |
---|
539 | |
---|
540 | GBS_write_hash(sh, tag, 1); |
---|
541 | if (to_free) free(to_free); |
---|
542 | return 0; |
---|
543 | } |
---|
544 | |
---|
545 | |
---|
546 | static GB_ERROR g_bs_convert_string_to_tagged_hash(GB_HASH *hash, char *s, char *default_tag, const char *del, |
---|
547 | GBDATA *gb_main, const char *rtag, const char *srt, const char *aci, GBDATA *gbd) { |
---|
548 | char *se; // string end |
---|
549 | char *sa; // string start and tag end |
---|
550 | char *ts; // tag start |
---|
551 | char *t; |
---|
552 | GB_ERROR error = 0; |
---|
553 | while (s && s[0]) { |
---|
554 | ts = strchr(s, '['); |
---|
555 | if (!ts) { |
---|
556 | error = g_bs_add_value_tag_to_hash(gb_main, hash, default_tag, s, rtag, srt, aci, gbd); // no tag found, use default tag |
---|
557 | if (error) break; |
---|
558 | break; |
---|
559 | } |
---|
560 | else { |
---|
561 | *(ts++) = 0; |
---|
562 | } |
---|
563 | sa = strchr(ts, ']'); |
---|
564 | if (sa) { |
---|
565 | *sa++ = 0; |
---|
566 | while (*sa == ' ') sa++; |
---|
567 | } |
---|
568 | else { |
---|
569 | error = g_bs_add_value_tag_to_hash(gb_main, hash, default_tag, s, rtag, srt, aci, gbd); // no tag found, use default tag |
---|
570 | if (error) break; |
---|
571 | break; |
---|
572 | } |
---|
573 | se = strchr(sa, '['); |
---|
574 | if (se) { |
---|
575 | while (se>sa && se[-1] == ' ') se--; |
---|
576 | *(se++) = 0; |
---|
577 | } |
---|
578 | for (t = strtok(ts, ","); t; t = strtok(0, ",")) { |
---|
579 | if (del && strcmp(t, del) == 0) continue; // test, whether to delete |
---|
580 | if (sa[0] == 0) continue; |
---|
581 | error = g_bs_add_value_tag_to_hash(gb_main, hash, t, sa, rtag, srt, aci, gbd); // tag found, use tag |
---|
582 | if (error) break; |
---|
583 | } |
---|
584 | s = se; |
---|
585 | } |
---|
586 | return error; |
---|
587 | } |
---|
588 | |
---|
589 | static long g_bs_merge_tags(const char *tag, long val, void *cd_sub_result) { |
---|
590 | GBS_strstruct *sub_result = (GBS_strstruct*)cd_sub_result; |
---|
591 | |
---|
592 | GBS_strcat(sub_result, tag); |
---|
593 | GBS_strcat(sub_result, ","); |
---|
594 | |
---|
595 | return val; |
---|
596 | } |
---|
597 | |
---|
598 | static long g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash) { |
---|
599 | char *str; |
---|
600 | static int counter = 0; |
---|
601 | GBS_strstruct *sub_result = GBS_stropen(100); |
---|
602 | |
---|
603 | GBS_hash_do_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result); |
---|
604 | GBS_intcat(sub_result, counter++); // create a unique number |
---|
605 | |
---|
606 | str = GBS_strclose(sub_result); |
---|
607 | |
---|
608 | GB_HASH *g_bs_collect_tags_hash = (GB_HASH*)cd_g_bs_collect_tags_hash; |
---|
609 | GBS_write_hash(g_bs_collect_tags_hash, str, (long)strdup(value)); // send output to new hash for sorting |
---|
610 | |
---|
611 | free(str); |
---|
612 | return 0; |
---|
613 | } |
---|
614 | |
---|
615 | static long g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result) { |
---|
616 | GBS_strstruct *merge_result = (GBS_strstruct*)cd_merge_result; |
---|
617 | |
---|
618 | char *lk = const_cast<char*>(strrchr(tag, ',')); |
---|
619 | if (lk) { // remove number at end |
---|
620 | *lk = 0; |
---|
621 | GBS_strcat(merge_result, " ["); |
---|
622 | GBS_strcat(merge_result, tag); |
---|
623 | GBS_strcat(merge_result, "] "); |
---|
624 | } |
---|
625 | GBS_strcat(merge_result, (char *)value); |
---|
626 | return value; |
---|
627 | } |
---|
628 | |
---|
629 | static char *g_bs_get_string_of_tag_hash(GB_HASH *tag_hash) { |
---|
630 | GBS_strstruct *merge_result = GBS_stropen(256); |
---|
631 | GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free); |
---|
632 | |
---|
633 | GBS_hash_do_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash |
---|
634 | GBS_hash_do_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result); |
---|
635 | |
---|
636 | GBS_free_hash(collect_tags_hash); |
---|
637 | return GBS_strclose(merge_result); |
---|
638 | } |
---|
639 | |
---|
640 | static long g_bs_free_hash_of_hashes_elem(const char */*key*/, long val, void *) { |
---|
641 | GB_HASH *hash = (GB_HASH*)val; |
---|
642 | if (hash) GBS_free_hash(hash); |
---|
643 | return 0; |
---|
644 | } |
---|
645 | static void g_bs_free_hash_of_hashes(GB_HASH *hash) { |
---|
646 | GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULL); |
---|
647 | GBS_free_hash(hash); |
---|
648 | } |
---|
649 | |
---|
650 | char *GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2) { |
---|
651 | /* Create a tagged string from two tagged strings: |
---|
652 | * a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string' |
---|
653 | * |
---|
654 | * if 's2' is not empty, then delete tag 'replace1' in 's1' |
---|
655 | * if 's1' is not empty, then delete tag 'replace2' in 's2' |
---|
656 | * |
---|
657 | * if result is NULL, an error has been exported. |
---|
658 | */ |
---|
659 | |
---|
660 | char *str1 = strdup(s1); |
---|
661 | char *str2 = strdup(s2); |
---|
662 | char *t1 = GBS_string_2_key(tag1); |
---|
663 | char *t2 = GBS_string_2_key(tag2); |
---|
664 | char *result = 0; |
---|
665 | GB_ERROR error = 0; |
---|
666 | GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE); |
---|
667 | |
---|
668 | if (!strlen(s1)) replace2 = 0; |
---|
669 | if (!strlen(s2)) replace1 = 0; |
---|
670 | |
---|
671 | if (replace1 && replace1[0] == 0) replace1 = 0; |
---|
672 | if (replace2 && replace2[0] == 0) replace2 = 0; |
---|
673 | |
---|
674 | error = g_bs_convert_string_to_tagged_hash(hash, str1, t1, replace1, 0, 0, 0, 0, 0); |
---|
675 | if (!error) error = g_bs_convert_string_to_tagged_hash(hash, str2, t2, replace2, 0, 0, 0, 0, 0); |
---|
676 | |
---|
677 | if (!error) { |
---|
678 | result = g_bs_get_string_of_tag_hash(hash); |
---|
679 | } |
---|
680 | else { |
---|
681 | GB_export_error(error); |
---|
682 | } |
---|
683 | |
---|
684 | g_bs_free_hash_of_hashes(hash); |
---|
685 | |
---|
686 | free(t2); |
---|
687 | free(t1); |
---|
688 | free(str2); |
---|
689 | free(str1); |
---|
690 | |
---|
691 | return result; |
---|
692 | } |
---|
693 | |
---|
694 | char *GBS_string_eval_tagged_string(GBDATA *gb_main, const char *s, const char *dt, const char *tag, const char *srt, const char *aci, GBDATA *gbd) { |
---|
695 | /* if 's' is untagged, tag it with default tag 'dt'. |
---|
696 | * if 'tag' is != NULL -> apply 'srt' or 'aci' to that part of the content of 's', which is tagged with 'tag' |
---|
697 | * |
---|
698 | * if result is NULL, an error has been exported. |
---|
699 | */ |
---|
700 | |
---|
701 | char *str = strdup(s); |
---|
702 | char *default_tag = GBS_string_2_key(dt); |
---|
703 | GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE); |
---|
704 | char *result = 0; |
---|
705 | GB_ERROR error = g_bs_convert_string_to_tagged_hash(hash, str, default_tag, 0, gb_main, tag, srt, aci, gbd); |
---|
706 | |
---|
707 | if (!error) { |
---|
708 | result = g_bs_get_string_of_tag_hash(hash); |
---|
709 | } |
---|
710 | else { |
---|
711 | GB_export_error(error); |
---|
712 | } |
---|
713 | |
---|
714 | g_bs_free_hash_of_hashes(hash); |
---|
715 | free(default_tag); |
---|
716 | free(str); |
---|
717 | |
---|
718 | return result; |
---|
719 | } |
---|
720 | |
---|
721 | |
---|
722 | char *GB_read_as_tagged_string(GBDATA *gbd, const char *tagi) { |
---|
723 | char *s; |
---|
724 | char *tag; |
---|
725 | char *buf; |
---|
726 | char *se; // string end |
---|
727 | char *sa; // string anfang and tag end |
---|
728 | char *ts; // tag start |
---|
729 | char *t; |
---|
730 | |
---|
731 | buf = s = GB_read_as_string(gbd); |
---|
732 | if (!s) return s; |
---|
733 | if (!tagi) return s; |
---|
734 | if (!strlen(tagi)) return s; |
---|
735 | |
---|
736 | tag = GBS_string_2_key(tagi); |
---|
737 | |
---|
738 | while (s) { |
---|
739 | ts = strchr(s, '['); |
---|
740 | if (!ts) goto notfound; // no tag |
---|
741 | |
---|
742 | *(ts++) = 0; |
---|
743 | |
---|
744 | sa = strchr(ts, ']'); |
---|
745 | if (!sa) goto notfound; |
---|
746 | |
---|
747 | *sa++ = 0; |
---|
748 | while (*sa == ' ') sa++; |
---|
749 | |
---|
750 | se = strchr(sa, '['); |
---|
751 | if (se) { |
---|
752 | while (se>sa && se[-1] == ' ') se--; |
---|
753 | *(se++) = 0; |
---|
754 | } |
---|
755 | for (t = strtok(ts, ","); t; t = strtok(0, ",")) { |
---|
756 | if (strcmp(t, tag) == 0) { |
---|
757 | s = strdup(sa); |
---|
758 | free(buf); |
---|
759 | goto found; |
---|
760 | } |
---|
761 | } |
---|
762 | s = se; |
---|
763 | } |
---|
764 | notfound : |
---|
765 | // Nothing found |
---|
766 | free(buf); |
---|
767 | s = 0; |
---|
768 | found : |
---|
769 | free(tag); |
---|
770 | return s; |
---|
771 | } |
---|
772 | |
---|
773 | |
---|
774 | /* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties) |
---|
775 | * used as well to save perl macros |
---|
776 | * |
---|
777 | * when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well |
---|
778 | * |
---|
779 | * always keep in mind, that many users have databases/macros written with older |
---|
780 | * versions of this function. They MUST load proper!!! |
---|
781 | */ |
---|
782 | void GBS_fwrite_string(const char *strngi, FILE *out) { |
---|
783 | unsigned char *strng = (unsigned char *)strngi; |
---|
784 | int c; |
---|
785 | |
---|
786 | putc('"', out); |
---|
787 | |
---|
788 | while ((c = *strng++)) { |
---|
789 | if (c < 32) { |
---|
790 | putc('\\', out); |
---|
791 | if (c == '\n') |
---|
792 | putc('n', out); |
---|
793 | else if (c == '\t') |
---|
794 | putc('t', out); |
---|
795 | else if (c<25) { |
---|
796 | putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above) |
---|
797 | } |
---|
798 | else { |
---|
799 | putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6 |
---|
800 | } |
---|
801 | } |
---|
802 | else if (c == '"') { |
---|
803 | putc('\\', out); |
---|
804 | putc('"', out); |
---|
805 | } |
---|
806 | else if (c == '\\') { |
---|
807 | putc('\\', out); |
---|
808 | putc('\\', out); |
---|
809 | } |
---|
810 | else { |
---|
811 | putc(c, out); |
---|
812 | } |
---|
813 | } |
---|
814 | putc('"', out); |
---|
815 | } |
---|
816 | |
---|
817 | /* Read a string from a file written by GBS_fwrite_string, |
---|
818 | * Searches first '"' |
---|
819 | * |
---|
820 | * WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string) |
---|
821 | * any changes should be done in GBS_fconvert_string too. |
---|
822 | */ |
---|
823 | |
---|
824 | static char *GBS_fread_string(FILE *in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused! |
---|
825 | GBS_strstruct *strstr = GBS_stropen(1024); |
---|
826 | int x; |
---|
827 | |
---|
828 | while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"' |
---|
829 | |
---|
830 | if (x != EOF) { |
---|
831 | while ((x = getc(in)) != '"') { |
---|
832 | if (x == EOF) break; |
---|
833 | if (x == '\\') { |
---|
834 | x = getc(in); if (x==EOF) break; |
---|
835 | if (x == 'n') { |
---|
836 | GBS_chrcat(strstr, '\n'); |
---|
837 | continue; |
---|
838 | } |
---|
839 | if (x == 't') { |
---|
840 | GBS_chrcat(strstr, '\t'); |
---|
841 | continue; |
---|
842 | } |
---|
843 | if (x>='@' && x <= '@' + 25) { |
---|
844 | GBS_chrcat(strstr, x-'@'); |
---|
845 | continue; |
---|
846 | } |
---|
847 | if (x>='0' && x <= '9') { |
---|
848 | GBS_chrcat(strstr, x-('0'-25)); |
---|
849 | continue; |
---|
850 | } |
---|
851 | // all other backslashes are simply skipped |
---|
852 | } |
---|
853 | GBS_chrcat(strstr, x); |
---|
854 | } |
---|
855 | } |
---|
856 | return GBS_strclose(strstr); |
---|
857 | } |
---|
858 | |
---|
859 | /* does similar decoding as GBS_fread_string but works directly on an existing buffer |
---|
860 | * (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!) |
---|
861 | * |
---|
862 | * inserts \0 behind decoded string (removes the closing '"') |
---|
863 | * returns a pointer behind the end (") of the _encoded_ string |
---|
864 | * returns NULL if a 0-character is found |
---|
865 | */ |
---|
866 | char *GBS_fconvert_string(char *buffer) { |
---|
867 | char *t = buffer; |
---|
868 | char *f = buffer; |
---|
869 | int x; |
---|
870 | |
---|
871 | gb_assert(f[-1] == '"'); |
---|
872 | // the opening " has already been read |
---|
873 | |
---|
874 | while ((x = *f++) != '"') { |
---|
875 | if (!x) break; |
---|
876 | |
---|
877 | if (x == '\\') { |
---|
878 | x = *f++; |
---|
879 | if (!x) break; |
---|
880 | |
---|
881 | if (x == 'n') { |
---|
882 | *t++ = '\n'; |
---|
883 | continue; |
---|
884 | } |
---|
885 | if (x == 't') { |
---|
886 | *t++ = '\t'; |
---|
887 | continue; |
---|
888 | } |
---|
889 | if (x>='@' && x <= '@' + 25) { |
---|
890 | *t++ = x-'@'; |
---|
891 | continue; |
---|
892 | } |
---|
893 | if (x>='0' && x <= '9') { |
---|
894 | *t++ = x-('0'-25); |
---|
895 | continue; |
---|
896 | } |
---|
897 | // all other backslashes are simply skipped |
---|
898 | } |
---|
899 | *t++ = x; |
---|
900 | } |
---|
901 | |
---|
902 | if (!x) return 0; // error (string should not contain 0-character) |
---|
903 | gb_assert(x == '"'); |
---|
904 | |
---|
905 | t[0] = 0; |
---|
906 | return f; |
---|
907 | } |
---|
908 | |
---|
909 | char *GBS_replace_tabs_by_spaces(const char *text) { |
---|
910 | int tlen = strlen(text); |
---|
911 | GBS_strstruct *mfile = GBS_stropen(tlen * 3/2); |
---|
912 | int tabpos = 0; |
---|
913 | int c; |
---|
914 | |
---|
915 | while ((c=*(text++))) { |
---|
916 | if (c == '\t') { |
---|
917 | int ntab = (tabpos + 8) & 0xfffff8; |
---|
918 | while (tabpos < ntab) { |
---|
919 | GBS_chrcat(mfile, ' '); |
---|
920 | tabpos++; |
---|
921 | } |
---|
922 | continue; |
---|
923 | } |
---|
924 | tabpos ++; |
---|
925 | if (c == '\n') { |
---|
926 | tabpos = 0; |
---|
927 | } |
---|
928 | GBS_chrcat(mfile, c); |
---|
929 | } |
---|
930 | return GBS_strclose(mfile); |
---|
931 | } |
---|
932 | |
---|
933 | const char *GBS_readable_size(unsigned long long size, const char *unit_suffix) { |
---|
934 | // return human readable size information |
---|
935 | // returned string is maximal 6+strlen(unit) characters long |
---|
936 | // (using "b" as 'unit_suffix' produces '### b', '### Mb' etc) |
---|
937 | |
---|
938 | if (size<1000) return GBS_global_string("%llu %s", size, unit_suffix); |
---|
939 | |
---|
940 | const char *units = "kMGTPEZY"; // kilo, Mega, Giga, Tera, ... should be enough forever |
---|
941 | int i; |
---|
942 | |
---|
943 | for (i = 0; units[i]; ++i) { |
---|
944 | char unit = units[i]; |
---|
945 | if (size<1000*1024) { |
---|
946 | double amount = size/(double)1024; |
---|
947 | if (amount<10.0) return GBS_global_string("%4.2f %c%s", amount+0.005, unit, unit_suffix); |
---|
948 | if (amount<100.0) return GBS_global_string("%4.1f %c%s", amount+0.05, unit, unit_suffix); |
---|
949 | return GBS_global_string("%i %c%s", (int)(amount+0.5), unit, unit_suffix); |
---|
950 | } |
---|
951 | size /= 1024; // next unit |
---|
952 | } |
---|
953 | return GBS_global_string("MUCH %s", unit_suffix); |
---|
954 | } |
---|
955 | |
---|
956 | char *GBS_trim(const char *str) { |
---|
957 | // trim whitespace at beginning and end of 'str' |
---|
958 | const char *whitespace = " \t\n"; |
---|
959 | while (str[0] && strchr(whitespace, str[0])) str++; |
---|
960 | |
---|
961 | const char *end = strchr(str, 0)-1; |
---|
962 | while (end >= str && strchr(whitespace, end[0])) end--; |
---|
963 | |
---|
964 | return GB_strpartdup(str, end); |
---|
965 | } |
---|
966 | |
---|
967 | static char *dated_info(const char *info) { |
---|
968 | char *dated_info = 0; |
---|
969 | time_t date; |
---|
970 | if (time(&date) != -1) { |
---|
971 | char *dstr = ctime(&date); |
---|
972 | char *nl = strchr(dstr, '\n'); |
---|
973 | |
---|
974 | if (nl) nl[0] = 0; // cut off LF |
---|
975 | |
---|
976 | dated_info = GBS_global_string_copy("%s: %s", dstr, info); |
---|
977 | } |
---|
978 | else { |
---|
979 | dated_info = strdup(info); |
---|
980 | } |
---|
981 | return dated_info; |
---|
982 | } |
---|
983 | |
---|
984 | char *GBS_log_dated_action_to(const char *comment, const char *action) { |
---|
985 | /*! appends 'action' prefixed by current timestamp to 'comment' |
---|
986 | */ |
---|
987 | size_t clen = comment ? strlen(comment) : 0; |
---|
988 | size_t alen = strlen(action); |
---|
989 | |
---|
990 | GBS_strstruct *new_comment = GBS_stropen(clen+alen+100); |
---|
991 | |
---|
992 | if (comment) { |
---|
993 | GBS_strcat(new_comment, comment); |
---|
994 | if (comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n'); |
---|
995 | } |
---|
996 | |
---|
997 | char *dated_action = dated_info(action); |
---|
998 | GBS_strcat(new_comment, dated_action); |
---|
999 | GBS_chrcat(new_comment, '\n'); |
---|
1000 | |
---|
1001 | free(dated_action); |
---|
1002 | |
---|
1003 | return GBS_strclose(new_comment); |
---|
1004 | } |
---|
1005 | |
---|
1006 | // -------------------------------------------------------------------------------- |
---|
1007 | |
---|
1008 | #ifdef UNIT_TESTS |
---|
1009 | |
---|
1010 | #include <test_unit.h> |
---|
1011 | |
---|
1012 | #ifdef ENABLE_CRASH_TESTS |
---|
1013 | static void provokesegv() { *(int *)0 = 0; } |
---|
1014 | #if defined(ASSERTION_USED) |
---|
1015 | static void failassertion() { gb_assert(0); } |
---|
1016 | static void provokesegv_does_not_fail_assertion() { |
---|
1017 | // provokesegv does not raise assertion |
---|
1018 | // -> the following assertion fails |
---|
1019 | TEST_ASSERT_CODE_ASSERTION_FAILS(provokesegv); |
---|
1020 | } |
---|
1021 | #endif |
---|
1022 | #endif |
---|
1023 | |
---|
1024 | void TEST_signal_tests() { |
---|
1025 | // check whether we can test for SEGV and assertion failures |
---|
1026 | TEST_ASSERT_SEGFAULT(provokesegv); |
---|
1027 | TEST_ASSERT_CODE_ASSERTION_FAILS(failassertion); |
---|
1028 | |
---|
1029 | // tests whether signal suppression works multiple times (by repeating tests) |
---|
1030 | TEST_ASSERT_CODE_ASSERTION_FAILS(failassertion); |
---|
1031 | TEST_ASSERT_SEGFAULT(provokesegv); |
---|
1032 | |
---|
1033 | // test whether SEGV can be distinguished from assertion |
---|
1034 | TEST_ASSERT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion); |
---|
1035 | } |
---|
1036 | |
---|
1037 | #define EXPECT_CONTENT(content) TEST_ASSERT_EQUAL(GBS_mempntr(strstr), content) |
---|
1038 | |
---|
1039 | void TEST_GBS_strstruct() { |
---|
1040 | { |
---|
1041 | GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT(""); |
---|
1042 | |
---|
1043 | GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb"); |
---|
1044 | GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17"); |
---|
1045 | GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_"); |
---|
1046 | GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000"); |
---|
1047 | |
---|
1048 | TEST_ASSERT_EQUAL(GBS_memoffset(strstr), 14); |
---|
1049 | GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b"); |
---|
1050 | GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter"); |
---|
1051 | GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly"); |
---|
1052 | |
---|
1053 | GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters"); |
---|
1054 | EXPECT_CONTENT("butterfly flutters"); |
---|
1055 | |
---|
1056 | free(GBS_strclose(strstr)); |
---|
1057 | } |
---|
1058 | { |
---|
1059 | // re-alloc smaller |
---|
1060 | GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT(""); |
---|
1061 | GBS_strforget(strstr); |
---|
1062 | } |
---|
1063 | |
---|
1064 | // trigger downsize of oversized block |
---|
1065 | for (int i = 0; i<12; ++i) { |
---|
1066 | GBS_strstruct *strstr = GBS_stropen(10); |
---|
1067 | GBS_strforget(strstr); |
---|
1068 | } |
---|
1069 | |
---|
1070 | { |
---|
1071 | GBS_strstruct *strstr = GBS_stropen(10); |
---|
1072 | size_t oldbufsize = strstr->get_buffer_size(); |
---|
1073 | GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer |
---|
1074 | |
---|
1075 | TEST_ASSERT(oldbufsize != strstr->get_buffer_size()); // did we reallocate? |
---|
1076 | EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx"); |
---|
1077 | GBS_strforget(strstr); |
---|
1078 | } |
---|
1079 | } |
---|
1080 | |
---|
1081 | #define TEST_SHORTENED_EQUALS(Long,Short) do { \ |
---|
1082 | char *buf = strdup(Long); \ |
---|
1083 | GBS_shorten_repeated_data(buf); \ |
---|
1084 | TEST_ASSERT_EQUAL(buf, Short); \ |
---|
1085 | free(buf); \ |
---|
1086 | } while(0) |
---|
1087 | |
---|
1088 | void TEST_GBS_shorten_repeated_data() { |
---|
1089 | TEST_SHORTENED_EQUALS("12345", "12345"); |
---|
1090 | TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc"); |
---|
1091 | TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc"); |
---|
1092 | TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc"); |
---|
1093 | TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc"); |
---|
1094 | TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc"); |
---|
1095 | TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc"); |
---|
1096 | TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc"); |
---|
1097 | TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc"); |
---|
1098 | TEST_SHORTENED_EQUALS("aaaabc", "aaaabc"); |
---|
1099 | TEST_SHORTENED_EQUALS("aaabc", "aaabc"); |
---|
1100 | TEST_SHORTENED_EQUALS("aabc", "aabc"); |
---|
1101 | TEST_SHORTENED_EQUALS("", ""); |
---|
1102 | |
---|
1103 | } |
---|
1104 | #endif |
---|
1105 | |
---|