| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : adstring.cxx // |
|---|
| 4 | // Purpose : various string functions // |
|---|
| 5 | // // |
|---|
| 6 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 7 | // http://www.arb-home.de/ // |
|---|
| 8 | // // |
|---|
| 9 | // =============================================================== // |
|---|
| 10 | |
|---|
| 11 | #include <arb_backtrace.h> |
|---|
| 12 | #include <arb_strbuf.h> |
|---|
| 13 | #include <arb_defs.h> |
|---|
| 14 | #include <arb_str.h> |
|---|
| 15 | |
|---|
| 16 | #include "gb_key.h" |
|---|
| 17 | #include "gb_aci.h" |
|---|
| 18 | |
|---|
| 19 | #include <SigHandler.h> |
|---|
| 20 | |
|---|
| 21 | #include <execinfo.h> |
|---|
| 22 | |
|---|
| 23 | #include <cstdarg> |
|---|
| 24 | #include <cctype> |
|---|
| 25 | #include <cerrno> |
|---|
| 26 | #include <ctime> |
|---|
| 27 | #include <setjmp.h> |
|---|
| 28 | |
|---|
| 29 | #include <valgrind.h> |
|---|
| 30 | |
|---|
| 31 | static char *GBS_string_2_key_with_exclusions(const char *str, const char *additional) { |
|---|
| 32 | // converts any string to a valid key (all chars in 'additional' are additionally allowed) |
|---|
| 33 | char buf[GB_KEY_LEN_MAX+1]; |
|---|
| 34 | int i; |
|---|
| 35 | int c; |
|---|
| 36 | for (i=0; i<GB_KEY_LEN_MAX;) { |
|---|
| 37 | c = *(str++); |
|---|
| 38 | if (!c) break; |
|---|
| 39 | |
|---|
| 40 | if (c==' ' || c == '_') { |
|---|
| 41 | buf[i++] = '_'; |
|---|
| 42 | } |
|---|
| 43 | else if (isalnum(c) || strchr(additional, c)) { |
|---|
| 44 | buf[i++] = c; |
|---|
| 45 | } |
|---|
| 46 | } |
|---|
| 47 | for (; i<GB_KEY_LEN_MIN; i++) buf[i] = '_'; |
|---|
| 48 | buf[i] = 0; |
|---|
| 49 | return ARB_strdup(buf); |
|---|
| 50 | } |
|---|
| 51 | |
|---|
| 52 | char *GBS_string_2_key(const char *str) { // converts any string to a valid key |
|---|
| 53 | return GBS_string_2_key_with_exclusions(str, ""); |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | char *GB_memdup(const char *source, size_t len) { |
|---|
| 57 | char *dest = ARB_alloc<char>(len); |
|---|
| 58 | memcpy(dest, source, len); |
|---|
| 59 | return dest; |
|---|
| 60 | } |
|---|
| 61 | |
|---|
| 62 | static const char *EMPTY_KEY_NOT_ALLOWED = "Empty key is not allowed"; |
|---|
| 63 | |
|---|
| 64 | inline __ATTR__USERESULT GB_ERROR check_key(const char *key, int len) { |
|---|
| 65 | // test if 'key' is a valid non-hierarchical database key. |
|---|
| 66 | // i.e. contains only letters, numbers and '_' and |
|---|
| 67 | // is inside length constraints GB_KEY_LEN_MIN/GB_KEY_LEN_MAX. |
|---|
| 68 | |
|---|
| 69 | if (len < GB_KEY_LEN_MIN) { |
|---|
| 70 | if (!len) return EMPTY_KEY_NOT_ALLOWED; |
|---|
| 71 | return GBS_global_string("Invalid key '%s': too short", key); |
|---|
| 72 | } |
|---|
| 73 | if (len > GB_KEY_LEN_MAX) return GBS_global_string("Invalid key '%s': too long", key); |
|---|
| 74 | |
|---|
| 75 | for (int i = 0; i<len; ++i) { |
|---|
| 76 | char c = key[i]; |
|---|
| 77 | bool validChar = isalnum(c) || c == '_'; |
|---|
| 78 | if (!validChar) { |
|---|
| 79 | return GBS_global_string("Invalid character '%c' in '%s'; allowed: a-z A-Z 0-9 '_' ", c, key); |
|---|
| 80 | } |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | return NULp; |
|---|
| 84 | } |
|---|
| 85 | GB_ERROR GB_check_key(const char *key) { // goes to header: __ATTR__USERESULT |
|---|
| 86 | // test if 'key' is a valid non-hierarchical database key |
|---|
| 87 | // (i.e. a valid name for a container or field). |
|---|
| 88 | |
|---|
| 89 | return check_key(key, key ? strlen(key) : 0); |
|---|
| 90 | } |
|---|
| 91 | |
|---|
| 92 | GB_ERROR GB_check_hkey(const char *key) { // goes to header: __ATTR__USERESULT |
|---|
| 93 | // test whether 'key' is a hierarchical key, |
|---|
| 94 | // i.e. consists of subkeys (accepted by GB_check_key), separated by '/'. |
|---|
| 95 | |
|---|
| 96 | GB_ERROR err = NULp; |
|---|
| 97 | |
|---|
| 98 | if (key && key[0] == '/') ++key; // accept + remove leading '/' |
|---|
| 99 | if (!key || !key[0]) err = EMPTY_KEY_NOT_ALLOWED; // reject NULp, empty (or single slash) |
|---|
| 100 | |
|---|
| 101 | while (!err && key[0]) { |
|---|
| 102 | int nonSlashPart = strcspn(key, "/"); |
|---|
| 103 | |
|---|
| 104 | err = check_key(key, nonSlashPart); |
|---|
| 105 | if (!err) { |
|---|
| 106 | key += nonSlashPart; |
|---|
| 107 | if (key[0] == '/') { |
|---|
| 108 | ++key; |
|---|
| 109 | if (key[0] == 0) { // nothing after slash |
|---|
| 110 | err = EMPTY_KEY_NOT_ALLOWED; |
|---|
| 111 | } |
|---|
| 112 | } |
|---|
| 113 | else { |
|---|
| 114 | gb_assert(key[0] == 0); |
|---|
| 115 | } |
|---|
| 116 | } |
|---|
| 117 | } |
|---|
| 118 | return err; |
|---|
| 119 | } |
|---|
| 120 | |
|---|
| 121 | // ---------------------------------------------- |
|---|
| 122 | // escape/unescape characters in strings |
|---|
| 123 | |
|---|
| 124 | char *GBS_escape_string(const char *str, const char *chars_to_escape, char escape_char) { |
|---|
| 125 | /*! escape characters in 'str' |
|---|
| 126 | * |
|---|
| 127 | * uses a special escape-method, which eliminates all 'chars_to_escape' completely |
|---|
| 128 | * from str (this makes further processing of the string more easy) |
|---|
| 129 | * |
|---|
| 130 | * @param str string to escape |
|---|
| 131 | * |
|---|
| 132 | * @param escape_char is the character used for escaping. For performance reasons it |
|---|
| 133 | * should be a character rarely used in 'str'. |
|---|
| 134 | * |
|---|
| 135 | * @param chars_to_escape may not contain 'A'-'Z' (these are used for escaping) |
|---|
| 136 | * and it may not be longer than 26 bytes |
|---|
| 137 | * |
|---|
| 138 | * @return heap copy of escaped string |
|---|
| 139 | * |
|---|
| 140 | * Inverse of GBS_unescape_string() |
|---|
| 141 | */ |
|---|
| 142 | |
|---|
| 143 | int len = strlen(str); |
|---|
| 144 | char *buffer = ARB_alloc<char>(2*len+1); |
|---|
| 145 | int j = 0; |
|---|
| 146 | int i; |
|---|
| 147 | |
|---|
| 148 | gb_assert(strlen(chars_to_escape) <= 26); |
|---|
| 149 | gb_assert(!strchr(chars_to_escape, escape_char)); // escape_char may not be included in chars_to_escape |
|---|
| 150 | |
|---|
| 151 | for (i = 0; str[i]; ++i) { |
|---|
| 152 | if (str[i] == escape_char) { |
|---|
| 153 | buffer[j++] = escape_char; |
|---|
| 154 | buffer[j++] = escape_char; |
|---|
| 155 | } |
|---|
| 156 | else { |
|---|
| 157 | const char *found = strchr(chars_to_escape, str[i]); |
|---|
| 158 | if (found) { |
|---|
| 159 | buffer[j++] = escape_char; |
|---|
| 160 | buffer[j++] = (found-chars_to_escape+'A'); |
|---|
| 161 | |
|---|
| 162 | gb_assert(found[0]<'A' || found[0]>'Z'); // illegal character in chars_to_escape |
|---|
| 163 | } |
|---|
| 164 | else { |
|---|
| 165 | |
|---|
| 166 | buffer[j++] = str[i]; |
|---|
| 167 | } |
|---|
| 168 | } |
|---|
| 169 | } |
|---|
| 170 | buffer[j] = 0; |
|---|
| 171 | |
|---|
| 172 | return buffer; |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | char *GBS_unescape_string(const char *str, const char *escaped_chars, char escape_char) { |
|---|
| 176 | //! inverse of GB_escape_string() - for params see there |
|---|
| 177 | |
|---|
| 178 | int len = strlen(str); |
|---|
| 179 | char *buffer = ARB_alloc<char>(len+1); |
|---|
| 180 | int j = 0; |
|---|
| 181 | int i; |
|---|
| 182 | |
|---|
| 183 | #if defined(ASSERTION_USED) |
|---|
| 184 | int escaped_chars_len = strlen(escaped_chars); |
|---|
| 185 | #endif // ASSERTION_USED |
|---|
| 186 | |
|---|
| 187 | gb_assert(strlen(escaped_chars) <= 26); |
|---|
| 188 | gb_assert(!strchr(escaped_chars, escape_char)); // escape_char may not be included in chars_to_escape |
|---|
| 189 | |
|---|
| 190 | for (i = 0; str[i]; ++i) { |
|---|
| 191 | if (str[i] == escape_char) { |
|---|
| 192 | if (str[i+1] == escape_char) { |
|---|
| 193 | buffer[j++] = escape_char; |
|---|
| 194 | } |
|---|
| 195 | else { |
|---|
| 196 | int idx = str[i+1]-'A'; |
|---|
| 197 | |
|---|
| 198 | gb_assert(idx >= 0 && idx<escaped_chars_len); |
|---|
| 199 | buffer[j++] = escaped_chars[idx]; |
|---|
| 200 | } |
|---|
| 201 | ++i; |
|---|
| 202 | } |
|---|
| 203 | else { |
|---|
| 204 | buffer[j++] = str[i]; |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | buffer[j] = 0; |
|---|
| 208 | |
|---|
| 209 | return buffer; |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | char *GBS_eval_env(GB_CSTR p) { |
|---|
| 213 | GB_ERROR error = NULp; |
|---|
| 214 | GB_CSTR ka; |
|---|
| 215 | GBS_strstruct *out = GBS_stropen(1000); |
|---|
| 216 | |
|---|
| 217 | while ((ka = GBS_find_string(p, "$(", 0))) { |
|---|
| 218 | GB_CSTR kz = strchr(ka, ')'); |
|---|
| 219 | if (!kz) { |
|---|
| 220 | error = GBS_global_string("missing ')' for envvar '%s'", p); |
|---|
| 221 | break; |
|---|
| 222 | } |
|---|
| 223 | else { |
|---|
| 224 | char *envvar = ARB_strpartdup(ka+2, kz-1); |
|---|
| 225 | int len = ka-p; |
|---|
| 226 | |
|---|
| 227 | if (len) GBS_strncat(out, p, len); |
|---|
| 228 | |
|---|
| 229 | GB_CSTR genv = GB_getenv(envvar); |
|---|
| 230 | if (genv) GBS_strcat(out, genv); |
|---|
| 231 | |
|---|
| 232 | p = kz+1; |
|---|
| 233 | free(envvar); |
|---|
| 234 | } |
|---|
| 235 | } |
|---|
| 236 | |
|---|
| 237 | if (error) { |
|---|
| 238 | GB_export_error(error); |
|---|
| 239 | GBS_strforget(out); |
|---|
| 240 | return NULp; |
|---|
| 241 | } |
|---|
| 242 | |
|---|
| 243 | GBS_strcat(out, p); // copy rest |
|---|
| 244 | return GBS_strclose(out); |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | long GBS_gcgchecksum(const char *seq) { |
|---|
| 248 | // GCGchecksum |
|---|
| 249 | long i; |
|---|
| 250 | long check = 0; |
|---|
| 251 | long count = 0; |
|---|
| 252 | long seqlen = strlen(seq); |
|---|
| 253 | |
|---|
| 254 | for (i = 0; i < seqlen; i++) { |
|---|
| 255 | count++; |
|---|
| 256 | check += count * toupper(seq[i]); |
|---|
| 257 | if (count == 57) count = 0; |
|---|
| 258 | } |
|---|
| 259 | check %= 10000; |
|---|
| 260 | |
|---|
| 261 | return check; |
|---|
| 262 | } |
|---|
| 263 | |
|---|
| 264 | // Table of CRC-32's of all single byte values (made by makecrc.c of ZIP source) |
|---|
| 265 | uint32_t crctab[] = { |
|---|
| 266 | 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, |
|---|
| 267 | 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, |
|---|
| 268 | 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, |
|---|
| 269 | 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, |
|---|
| 270 | 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, |
|---|
| 271 | 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, |
|---|
| 272 | 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, |
|---|
| 273 | 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, |
|---|
| 274 | 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, |
|---|
| 275 | 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, |
|---|
| 276 | 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, |
|---|
| 277 | 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, |
|---|
| 278 | 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, |
|---|
| 279 | 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, |
|---|
| 280 | 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, |
|---|
| 281 | 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, |
|---|
| 282 | 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, |
|---|
| 283 | 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, |
|---|
| 284 | 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, |
|---|
| 285 | 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, |
|---|
| 286 | 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, |
|---|
| 287 | 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, |
|---|
| 288 | 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, |
|---|
| 289 | 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, |
|---|
| 290 | 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, |
|---|
| 291 | 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, |
|---|
| 292 | 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, |
|---|
| 293 | 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, |
|---|
| 294 | 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, |
|---|
| 295 | 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, |
|---|
| 296 | 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, |
|---|
| 297 | 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, |
|---|
| 298 | 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, |
|---|
| 299 | 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, |
|---|
| 300 | 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, |
|---|
| 301 | 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, |
|---|
| 302 | 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, |
|---|
| 303 | 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, |
|---|
| 304 | 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, |
|---|
| 305 | 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, |
|---|
| 306 | 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, |
|---|
| 307 | 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, |
|---|
| 308 | 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, |
|---|
| 309 | 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, |
|---|
| 310 | 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, |
|---|
| 311 | 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, |
|---|
| 312 | 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, |
|---|
| 313 | 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, |
|---|
| 314 | 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, |
|---|
| 315 | 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, |
|---|
| 316 | 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, |
|---|
| 317 | 0x2d02ef8dL |
|---|
| 318 | }; |
|---|
| 319 | |
|---|
| 320 | uint32_t GB_checksum(const char *seq, long length, int ignore_case, const char *exclude) { |
|---|
| 321 | /* CRC32checksum: modified from CRC-32 algorithm found in ZIP compression source |
|---|
| 322 | * if ignore_case == true -> treat all characters as uppercase-chars (applies to exclude too) |
|---|
| 323 | */ |
|---|
| 324 | |
|---|
| 325 | unsigned long c = 0xffffffffL; |
|---|
| 326 | long n = length; |
|---|
| 327 | int i; |
|---|
| 328 | int tab[256]; // @@@ avoid recalc for each call |
|---|
| 329 | |
|---|
| 330 | for (i=0; i<256; i++) { // LOOP_VECTORIZED // tested down to gcc 5.5.0 (may fail on older gcc versions) |
|---|
| 331 | tab[i] = ignore_case ? toupper(i) : i; |
|---|
| 332 | } |
|---|
| 333 | |
|---|
| 334 | if (exclude) { |
|---|
| 335 | while (1) { |
|---|
| 336 | int k = *(unsigned char *)exclude++; |
|---|
| 337 | if (!k) break; |
|---|
| 338 | tab[k] = 0; |
|---|
| 339 | if (ignore_case) tab[toupper(k)] = tab[tolower(k)] = 0; |
|---|
| 340 | } |
|---|
| 341 | } |
|---|
| 342 | |
|---|
| 343 | while (n--) { |
|---|
| 344 | i = tab[*(const unsigned char *)seq++]; |
|---|
| 345 | if (i) { |
|---|
| 346 | c = crctab[((int) c ^ i) & 0xff] ^ (c >> 8); |
|---|
| 347 | } |
|---|
| 348 | } |
|---|
| 349 | c = c ^ 0xffffffffL; |
|---|
| 350 | return c; |
|---|
| 351 | } |
|---|
| 352 | |
|---|
| 353 | uint32_t GBS_checksum(const char *seq, int ignore_case, const char *exclude) { |
|---|
| 354 | // if 'ignore_case' == true -> treat all characters as uppercase-chars (applies to 'exclude' too) |
|---|
| 355 | return GB_checksum(seq, strlen(seq), ignore_case, exclude); |
|---|
| 356 | } |
|---|
| 357 | |
|---|
| 358 | size_t GBS_shorten_repeated_data(char *data) { |
|---|
| 359 | // shortens repeats in 'data' |
|---|
| 360 | // This function modifies 'data'!! |
|---|
| 361 | // e.g. "..............................ACGT....................TGCA" |
|---|
| 362 | // -> ".{30}ACGT.{20}TGCA" |
|---|
| 363 | |
|---|
| 364 | #if defined(DEBUG) |
|---|
| 365 | size_t orgLen = strlen(data); |
|---|
| 366 | #endif // DEBUG |
|---|
| 367 | char *dataStart = data; |
|---|
| 368 | char *dest = data; |
|---|
| 369 | size_t repeat = 1; |
|---|
| 370 | char last = *data++; |
|---|
| 371 | |
|---|
| 372 | while (last) { |
|---|
| 373 | char curr = *data++; |
|---|
| 374 | if (curr == last) { |
|---|
| 375 | repeat++; |
|---|
| 376 | } |
|---|
| 377 | else { |
|---|
| 378 | if (repeat >= 5) { |
|---|
| 379 | dest += sprintf(dest, "%c{%zu}", last, repeat); // insert repeat count |
|---|
| 380 | } |
|---|
| 381 | else { |
|---|
| 382 | size_t r; |
|---|
| 383 | for (r = 0; r<repeat; r++) *dest++ = last; // insert plain |
|---|
| 384 | } |
|---|
| 385 | last = curr; |
|---|
| 386 | repeat = 1; |
|---|
| 387 | } |
|---|
| 388 | } |
|---|
| 389 | |
|---|
| 390 | *dest = 0; |
|---|
| 391 | |
|---|
| 392 | #if defined(DEBUG) |
|---|
| 393 | |
|---|
| 394 | gb_assert(strlen(dataStart) <= orgLen); |
|---|
| 395 | #endif // DEBUG |
|---|
| 396 | return dest-dataStart; |
|---|
| 397 | } |
|---|
| 398 | |
|---|
| 399 | |
|---|
| 400 | // ------------------------------------------ |
|---|
| 401 | // helper classes for tagged fields |
|---|
| 402 | |
|---|
| 403 | class TextRef { |
|---|
| 404 | const char *data; // has no terminal zero-byte! |
|---|
| 405 | int length; |
|---|
| 406 | |
|---|
| 407 | public: |
|---|
| 408 | TextRef() : data(NULp), length(-1) {} |
|---|
| 409 | TextRef(const char *data_, int length_) : data(data_), length(length_) {} |
|---|
| 410 | explicit TextRef(const char *zeroTerminated) : data(zeroTerminated), length(strlen(data)) {} |
|---|
| 411 | |
|---|
| 412 | bool defined() const { return data && length>0; } |
|---|
| 413 | const char *get_data() const { return data; } |
|---|
| 414 | int get_length() const { return length; } |
|---|
| 415 | |
|---|
| 416 | const char *get_following() const { return data ? data+length : NULp; } |
|---|
| 417 | |
|---|
| 418 | int compare(const char *str) const { |
|---|
| 419 | gb_assert(defined()); |
|---|
| 420 | int cmp = strncmp(get_data(), str, get_length()); |
|---|
| 421 | if (!cmp) { |
|---|
| 422 | if (str[get_length()]) { |
|---|
| 423 | cmp = -1; // right side contains more content |
|---|
| 424 | } |
|---|
| 425 | } |
|---|
| 426 | return cmp; |
|---|
| 427 | } |
|---|
| 428 | int icompare(const char *str) const { |
|---|
| 429 | gb_assert(defined()); |
|---|
| 430 | int cmp = strncasecmp(get_data(), str, get_length()); |
|---|
| 431 | if (!cmp) { |
|---|
| 432 | if (str[get_length()]) { |
|---|
| 433 | cmp = -1; // right side contains more content |
|---|
| 434 | } |
|---|
| 435 | } |
|---|
| 436 | return cmp; |
|---|
| 437 | } |
|---|
| 438 | char *copy() const { return ARB_strndup(get_data(), get_length()); } |
|---|
| 439 | |
|---|
| 440 | char head() const { return defined() ? data[0] : 0; } |
|---|
| 441 | char tail() const { return defined() ? data[length-1] : 0; } |
|---|
| 442 | |
|---|
| 443 | TextRef headTrimmed() const { |
|---|
| 444 | if (defined()) { |
|---|
| 445 | for (int s = 0; s<length; ++s) { |
|---|
| 446 | if (!isspace(data[s])) { |
|---|
| 447 | return TextRef(data+s, length-s); |
|---|
| 448 | } |
|---|
| 449 | } |
|---|
| 450 | } |
|---|
| 451 | return TextRef(); |
|---|
| 452 | } |
|---|
| 453 | TextRef tailTrimmed() const { |
|---|
| 454 | if (defined()) { |
|---|
| 455 | for (int s = length-1; s>=0; --s) { |
|---|
| 456 | if (!isspace(data[s])) { |
|---|
| 457 | return TextRef(data, s+1); |
|---|
| 458 | } |
|---|
| 459 | } |
|---|
| 460 | } |
|---|
| 461 | return TextRef(); |
|---|
| 462 | } |
|---|
| 463 | |
|---|
| 464 | TextRef trimmed() const { |
|---|
| 465 | return headTrimmed().tailTrimmed(); |
|---|
| 466 | } |
|---|
| 467 | |
|---|
| 468 | inline TextRef partBefore(const TextRef& subref) const; |
|---|
| 469 | inline TextRef partBehind(const TextRef& subref) const; |
|---|
| 470 | |
|---|
| 471 | bool is_part_of(const TextRef& other) const { |
|---|
| 472 | gb_assert(defined() && other.defined()); |
|---|
| 473 | return get_data()>=other.get_data() && get_following()<=other.get_following(); |
|---|
| 474 | } |
|---|
| 475 | |
|---|
| 476 | const char *find(char c) const { return reinterpret_cast<const char*>(memchr(get_data(), c, get_length())); } |
|---|
| 477 | }; |
|---|
| 478 | |
|---|
| 479 | static TextRef textBetween(const TextRef& t1, const TextRef& t2) { |
|---|
| 480 | const char *behind_d1 = t1.get_following(); |
|---|
| 481 | const char *d2 = t2.get_data(); |
|---|
| 482 | |
|---|
| 483 | if (behind_d1 && d2 && behind_d1<d2) { |
|---|
| 484 | return TextRef(behind_d1, d2-behind_d1); |
|---|
| 485 | } |
|---|
| 486 | return TextRef(); |
|---|
| 487 | } |
|---|
| 488 | |
|---|
| 489 | inline TextRef TextRef::partBefore(const TextRef& subref) const { |
|---|
| 490 | gb_assert(subref.is_part_of(*this)); |
|---|
| 491 | return textBetween(TextRef(get_data(), 0), subref); |
|---|
| 492 | } |
|---|
| 493 | inline TextRef TextRef::partBehind(const TextRef& subref) const { |
|---|
| 494 | gb_assert(subref.is_part_of(*this)); |
|---|
| 495 | return TextRef(subref.get_following(), get_following()-subref.get_following()); |
|---|
| 496 | } |
|---|
| 497 | |
|---|
| 498 | class TaggedContentParser { |
|---|
| 499 | TextRef wholeInput; |
|---|
| 500 | TextRef tag, content; // current position |
|---|
| 501 | TextRef restTags; // store (rest of) multiple tags (e.g. from "[t1,t2]") |
|---|
| 502 | TextRef nextBrackets; // next "[..]" part (behind current tag) |
|---|
| 503 | |
|---|
| 504 | void findBrackets(const char *in) { |
|---|
| 505 | nextBrackets = TextRef(); |
|---|
| 506 | const char *tag_start = strchr(in, '['); |
|---|
| 507 | if (tag_start) { |
|---|
| 508 | const char *tag_end = strchr(tag_start, ']'); |
|---|
| 509 | if (tag_end) { |
|---|
| 510 | if (tag_end == tag_start+1) { // empty tag -> use as content |
|---|
| 511 | findBrackets(tag_end+1); |
|---|
| 512 | } |
|---|
| 513 | else { |
|---|
| 514 | const char *unwanted_bracket = reinterpret_cast<const char*>(memchr(tag_start+1, '[', tag_end-tag_start-1)); |
|---|
| 515 | if (unwanted_bracket) { // tagname contains '[' -> step to next bracket |
|---|
| 516 | findBrackets(unwanted_bracket); |
|---|
| 517 | } |
|---|
| 518 | else { |
|---|
| 519 | TextRef name = TextRef(tag_start+1, tag_end-tag_start-1).trimmed(); |
|---|
| 520 | if (name.defined()) { // not only whitespace inside brackets |
|---|
| 521 | nextBrackets = TextRef(tag_start, tag_end-tag_start+1); |
|---|
| 522 | } |
|---|
| 523 | else { |
|---|
| 524 | findBrackets(tag_end+1); |
|---|
| 525 | } |
|---|
| 526 | } |
|---|
| 527 | } |
|---|
| 528 | } |
|---|
| 529 | } |
|---|
| 530 | } |
|---|
| 531 | |
|---|
| 532 | void parse_next_multi_tag() { |
|---|
| 533 | gb_assert(restTags.defined()); |
|---|
| 534 | TextRef comma(restTags.find(','), 1); |
|---|
| 535 | if (comma.defined()) { |
|---|
| 536 | tag = restTags.partBefore(comma).tailTrimmed(); |
|---|
| 537 | restTags = restTags.partBehind(comma).headTrimmed(); |
|---|
| 538 | } |
|---|
| 539 | else { |
|---|
| 540 | tag = restTags; |
|---|
| 541 | restTags = TextRef(); |
|---|
| 542 | } |
|---|
| 543 | } |
|---|
| 544 | void parse_next() { |
|---|
| 545 | if (restTags.defined()) { |
|---|
| 546 | parse_next_multi_tag(); |
|---|
| 547 | } |
|---|
| 548 | else if (nextBrackets.defined()) { |
|---|
| 549 | TextRef brackets = nextBrackets; |
|---|
| 550 | findBrackets(brackets.get_following()); |
|---|
| 551 | |
|---|
| 552 | content = (nextBrackets.defined() ? textBetween(brackets, nextBrackets) : wholeInput.partBehind(brackets)).trimmed(); |
|---|
| 553 | |
|---|
| 554 | gb_assert(brackets.head() == '[' && brackets.tail() == ']'); |
|---|
| 555 | |
|---|
| 556 | TextRef tags = TextRef(brackets.get_data()+1, brackets.get_length()-2).trimmed(); |
|---|
| 557 | gb_assert(tags.defined()); |
|---|
| 558 | |
|---|
| 559 | restTags = tags; |
|---|
| 560 | parse_next_multi_tag(); |
|---|
| 561 | } |
|---|
| 562 | else { |
|---|
| 563 | tag = content = TextRef(); |
|---|
| 564 | gb_assert(!has_part()); |
|---|
| 565 | } |
|---|
| 566 | } |
|---|
| 567 | void parse_first() { |
|---|
| 568 | gb_assert(!has_part()); |
|---|
| 569 | findBrackets(wholeInput.get_data()); |
|---|
| 570 | content = (nextBrackets.defined() ? wholeInput.partBefore(nextBrackets) : wholeInput).trimmed(); |
|---|
| 571 | if (!content.defined()) parse_next(); // no untagged prefix seen -> directly goto first tag |
|---|
| 572 | } |
|---|
| 573 | |
|---|
| 574 | public: |
|---|
| 575 | TaggedContentParser(const char *input_) : wholeInput(input_) { parse_first(); } |
|---|
| 576 | |
|---|
| 577 | bool has_tag() const { return tag.defined(); } |
|---|
| 578 | bool has_content() const { return content.defined(); } |
|---|
| 579 | |
|---|
| 580 | void next() { parse_next(); } |
|---|
| 581 | bool has_part() const { return has_tag() || has_content(); } // false -> parser has finished |
|---|
| 582 | |
|---|
| 583 | const TextRef& get_tag() const { return tag; } |
|---|
| 584 | const TextRef& get_content() const { return content; } |
|---|
| 585 | }; |
|---|
| 586 | |
|---|
| 587 | |
|---|
| 588 | // ------------------------------------------- |
|---|
| 589 | // helper function for tagged fields |
|---|
| 590 | |
|---|
| 591 | static void g_bs_add_value_tag_to_hash(GB_HASH *hash, const char *tag, char *value) { |
|---|
| 592 | if (!value[0]) return; // ignore empty values |
|---|
| 593 | |
|---|
| 594 | { |
|---|
| 595 | char *p; |
|---|
| 596 | p = value; while ((p = strchr(p, '['))) *p = '{'; // replace all '[' by '{' |
|---|
| 597 | p = value; while ((p = strchr(p, ']'))) *p = '}'; // replace all ']' by '}' |
|---|
| 598 | } |
|---|
| 599 | |
|---|
| 600 | GB_HASH *sh = (GB_HASH *)GBS_read_hash(hash, value); |
|---|
| 601 | if (!sh) { |
|---|
| 602 | sh = GBS_create_hash(10, GB_IGNORE_CASE); // Tags are case independent |
|---|
| 603 | GBS_write_hash(hash, value, (long)sh); |
|---|
| 604 | } |
|---|
| 605 | GBS_write_hash(sh, tag, 1); |
|---|
| 606 | } |
|---|
| 607 | |
|---|
| 608 | static void g_bs_convert_string_to_tagged_hash_with_delete(GB_HASH *hash, char *s, char *default_tag, const char *del) { |
|---|
| 609 | TaggedContentParser parser(s); |
|---|
| 610 | while (parser.has_part()) { |
|---|
| 611 | if (parser.has_content()) { |
|---|
| 612 | char *content = parser.get_content().copy(); |
|---|
| 613 | if (parser.has_tag()) { |
|---|
| 614 | char *tag = parser.get_tag().copy(); |
|---|
| 615 | if (!del || ARB_stricmp(tag, del) != 0) { |
|---|
| 616 | g_bs_add_value_tag_to_hash(hash, tag, content); |
|---|
| 617 | } |
|---|
| 618 | free(tag); |
|---|
| 619 | } |
|---|
| 620 | else { |
|---|
| 621 | g_bs_add_value_tag_to_hash(hash, default_tag, content); // no tag found, use default tag |
|---|
| 622 | } |
|---|
| 623 | free(content); |
|---|
| 624 | } |
|---|
| 625 | parser.next(); |
|---|
| 626 | } |
|---|
| 627 | } |
|---|
| 628 | |
|---|
| 629 | static GB_ERROR g_bs_convert_string_to_tagged_hash_with_rewrite(GB_HASH *hash, char *s, char *default_tag, const char *rtag, const char *aci, GBL_call_env& env) { |
|---|
| 630 | GB_ERROR error = NULp; |
|---|
| 631 | |
|---|
| 632 | TaggedContentParser parser(s); |
|---|
| 633 | while (parser.has_part() && !error) { |
|---|
| 634 | if (parser.has_content()) { |
|---|
| 635 | char *value = parser.get_content().copy(); |
|---|
| 636 | char *tag = parser.has_tag() ? parser.get_tag().copy() : strdup(default_tag); |
|---|
| 637 | |
|---|
| 638 | if (rtag && ARB_stricmp(tag, rtag) == 0) { |
|---|
| 639 | freeset(value, GB_command_interpreter_in_env(value, aci, env)); |
|---|
| 640 | if (!value) error = GB_await_error(); |
|---|
| 641 | } |
|---|
| 642 | |
|---|
| 643 | if (!error) g_bs_add_value_tag_to_hash(hash, tag, value); |
|---|
| 644 | |
|---|
| 645 | free(tag); |
|---|
| 646 | free(value); |
|---|
| 647 | } |
|---|
| 648 | parser.next(); |
|---|
| 649 | } |
|---|
| 650 | |
|---|
| 651 | return error; |
|---|
| 652 | } |
|---|
| 653 | |
|---|
| 654 | static void g_bs_merge_tags(const char *tag, long /*val*/, void *cd_sub_result) { |
|---|
| 655 | GBS_strstruct *sub_result = (GBS_strstruct*)cd_sub_result; |
|---|
| 656 | |
|---|
| 657 | GBS_strcat(sub_result, tag); |
|---|
| 658 | GBS_strcat(sub_result, ","); |
|---|
| 659 | } |
|---|
| 660 | |
|---|
| 661 | static void g_bs_read_tagged_hash(const char *value, long subhash, void *cd_g_bs_collect_tags_hash) { |
|---|
| 662 | static int counter = 0; |
|---|
| 663 | |
|---|
| 664 | GBS_strstruct *sub_result = GBS_stropen(100); |
|---|
| 665 | GBS_hash_do_const_sorted_loop((GB_HASH *)subhash, g_bs_merge_tags, GBS_HCF_sortedByKey, sub_result); |
|---|
| 666 | GBS_intcat(sub_result, counter++); // create a unique number |
|---|
| 667 | |
|---|
| 668 | char *str = ARB_strupper(GBS_strclose(sub_result)); |
|---|
| 669 | |
|---|
| 670 | GB_HASH *g_bs_collect_tags_hash = (GB_HASH*)cd_g_bs_collect_tags_hash; |
|---|
| 671 | GBS_write_hash(g_bs_collect_tags_hash, str, (long)ARB_strdup(value)); // send output to new hash for sorting |
|---|
| 672 | |
|---|
| 673 | free(str); |
|---|
| 674 | } |
|---|
| 675 | |
|---|
| 676 | static void g_bs_read_final_hash(const char *tag, long value, void *cd_merge_result) { |
|---|
| 677 | GBS_strstruct *merge_result = (GBS_strstruct*)cd_merge_result; |
|---|
| 678 | |
|---|
| 679 | char *lk = const_cast<char*>(strrchr(tag, ',')); |
|---|
| 680 | if (lk) { // remove number at end |
|---|
| 681 | *lk = 0; |
|---|
| 682 | |
|---|
| 683 | if (!merge_result->empty()) merge_result->put(' '); // skip trailing space |
|---|
| 684 | merge_result->put('['); |
|---|
| 685 | merge_result->cat(tag); |
|---|
| 686 | merge_result->put(']'); |
|---|
| 687 | merge_result->put(' '); |
|---|
| 688 | } |
|---|
| 689 | merge_result->cat((char*)value); |
|---|
| 690 | } |
|---|
| 691 | |
|---|
| 692 | static char *g_bs_get_string_of_tag_hash(GB_HASH *tag_hash) { |
|---|
| 693 | GBS_strstruct *merge_result = GBS_stropen(256); |
|---|
| 694 | GB_HASH *collect_tags_hash = GBS_create_dynaval_hash(512, GB_IGNORE_CASE, GBS_dynaval_free); |
|---|
| 695 | |
|---|
| 696 | GBS_hash_do_const_sorted_loop(tag_hash, g_bs_read_tagged_hash, GBS_HCF_sortedByKey, collect_tags_hash); // move everything into collect_tags_hash |
|---|
| 697 | GBS_hash_do_const_sorted_loop(collect_tags_hash, g_bs_read_final_hash, GBS_HCF_sortedByKey, merge_result); |
|---|
| 698 | |
|---|
| 699 | GBS_free_hash(collect_tags_hash); |
|---|
| 700 | return GBS_strclose(merge_result); |
|---|
| 701 | } |
|---|
| 702 | |
|---|
| 703 | static long g_bs_free_hash_of_hashes_elem(const char */*key*/, long val, void *) { |
|---|
| 704 | GB_HASH *hash = (GB_HASH*)val; |
|---|
| 705 | if (hash) GBS_free_hash(hash); |
|---|
| 706 | return 0; |
|---|
| 707 | } |
|---|
| 708 | static void g_bs_free_hash_of_hashes(GB_HASH *hash) { |
|---|
| 709 | GBS_hash_do_loop(hash, g_bs_free_hash_of_hashes_elem, NULp); |
|---|
| 710 | GBS_free_hash(hash); |
|---|
| 711 | } |
|---|
| 712 | |
|---|
| 713 | char *GBS_merge_tagged_strings(const char *s1, const char *tag1, const char *replace1, const char *s2, const char *tag2, const char *replace2) { |
|---|
| 714 | /* Create a tagged string from two tagged strings: |
|---|
| 715 | * a tagged string is something like '[tag,tag,tag] string [tag] string [tag,tag] string' |
|---|
| 716 | * |
|---|
| 717 | * if 's2' is not empty, then delete tag 'replace1' in 's1' |
|---|
| 718 | * if 's1' is not empty, then delete tag 'replace2' in 's2' |
|---|
| 719 | * |
|---|
| 720 | * (result should never be NULp) |
|---|
| 721 | */ |
|---|
| 722 | |
|---|
| 723 | char *str1 = ARB_strdup(s1); |
|---|
| 724 | char *str2 = ARB_strdup(s2); |
|---|
| 725 | char *t1 = GBS_string_2_key(tag1); |
|---|
| 726 | char *t2 = GBS_string_2_key(tag2); |
|---|
| 727 | GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE); |
|---|
| 728 | |
|---|
| 729 | if (!s1[0]) replace2 = NULp; |
|---|
| 730 | if (!s2[0]) replace1 = NULp; |
|---|
| 731 | |
|---|
| 732 | if (replace1 && !replace1[0]) replace1 = NULp; |
|---|
| 733 | if (replace2 && !replace2[0]) replace2 = NULp; |
|---|
| 734 | |
|---|
| 735 | g_bs_convert_string_to_tagged_hash_with_delete(hash, str1, t1, replace1); |
|---|
| 736 | g_bs_convert_string_to_tagged_hash_with_delete(hash, str2, t2, replace2); |
|---|
| 737 | |
|---|
| 738 | char *result = g_bs_get_string_of_tag_hash(hash); |
|---|
| 739 | |
|---|
| 740 | g_bs_free_hash_of_hashes(hash); |
|---|
| 741 | |
|---|
| 742 | free(t2); |
|---|
| 743 | free(t1); |
|---|
| 744 | free(str2); |
|---|
| 745 | free(str1); |
|---|
| 746 | |
|---|
| 747 | return result; |
|---|
| 748 | } |
|---|
| 749 | |
|---|
| 750 | char *GBS_modify_tagged_string_with_ACI(const char *s, const char *dt, const char *tag, const char *aci, GBL_call_env& env) { |
|---|
| 751 | /* if 's' is untagged, tag it with default tag 'dt'. |
|---|
| 752 | * if 'tag' is specified -> apply 'aci' to that part of the content of 's', which is tagged with 'tag' (i.e. look for '[tag]') |
|---|
| 753 | * |
|---|
| 754 | * if result is NULp, an error has been exported. |
|---|
| 755 | */ |
|---|
| 756 | |
|---|
| 757 | char *str = ARB_strdup(s); |
|---|
| 758 | char *default_tag = GBS_string_2_key(dt); |
|---|
| 759 | GB_HASH *hash = GBS_create_hash(16, GB_MIND_CASE); |
|---|
| 760 | char *result = NULp; |
|---|
| 761 | |
|---|
| 762 | GB_ERROR error = g_bs_convert_string_to_tagged_hash_with_rewrite(hash, str, default_tag, tag, aci, env); |
|---|
| 763 | |
|---|
| 764 | if (!error) { |
|---|
| 765 | result = g_bs_get_string_of_tag_hash(hash); |
|---|
| 766 | } |
|---|
| 767 | else { |
|---|
| 768 | GB_export_error(error); |
|---|
| 769 | } |
|---|
| 770 | |
|---|
| 771 | g_bs_free_hash_of_hashes(hash); |
|---|
| 772 | |
|---|
| 773 | free(default_tag); |
|---|
| 774 | free(str); |
|---|
| 775 | |
|---|
| 776 | return result; |
|---|
| 777 | } |
|---|
| 778 | |
|---|
| 779 | char *GB_read_as_tagged_string(GBDATA *gbd, const char *tagi) { |
|---|
| 780 | char *buf = GB_read_as_string(gbd); |
|---|
| 781 | if (buf && tagi && tagi[0]) { |
|---|
| 782 | TaggedContentParser parser(buf); |
|---|
| 783 | |
|---|
| 784 | char *wantedTag = GBS_string_2_key(tagi); |
|---|
| 785 | char *contentFound = NULp; |
|---|
| 786 | |
|---|
| 787 | while (parser.has_part() && !contentFound) { |
|---|
| 788 | if (parser.has_tag() && parser.get_tag().icompare(wantedTag) == 0) { |
|---|
| 789 | contentFound = parser.get_content().copy(); |
|---|
| 790 | } |
|---|
| 791 | parser.next(); |
|---|
| 792 | } |
|---|
| 793 | free(wantedTag); |
|---|
| 794 | free(buf); |
|---|
| 795 | |
|---|
| 796 | return contentFound; |
|---|
| 797 | } |
|---|
| 798 | return buf; |
|---|
| 799 | } |
|---|
| 800 | |
|---|
| 801 | |
|---|
| 802 | /* be CAREFUL : this function is used to save ARB ASCII database (i.e. properties) |
|---|
| 803 | * used as well to save perl macros |
|---|
| 804 | * |
|---|
| 805 | * when changing GBS_fwrite_string -> GBS_fread_string needs to be fixed as well |
|---|
| 806 | * |
|---|
| 807 | * always keep in mind, that many users have databases/macros written with older |
|---|
| 808 | * versions of this function. They MUST load proper!!! |
|---|
| 809 | */ |
|---|
| 810 | void GBS_fwrite_string(const char *strngi, FILE *out) { |
|---|
| 811 | unsigned char *strng = (unsigned char *)strngi; |
|---|
| 812 | int c; |
|---|
| 813 | |
|---|
| 814 | putc('"', out); |
|---|
| 815 | |
|---|
| 816 | while ((c = *strng++)) { |
|---|
| 817 | if (c < 32) { |
|---|
| 818 | putc('\\', out); |
|---|
| 819 | if (c == '\n') |
|---|
| 820 | putc('n', out); |
|---|
| 821 | else if (c == '\t') |
|---|
| 822 | putc('t', out); |
|---|
| 823 | else if (c<25) { |
|---|
| 824 | putc(c+'@', out); // characters ASCII 0..24 encoded as \@..\X (\n and \t are done above) |
|---|
| 825 | } |
|---|
| 826 | else { |
|---|
| 827 | putc(c+('0'-25), out); // characters ASCII 25..31 encoded as \0..\6 |
|---|
| 828 | } |
|---|
| 829 | } |
|---|
| 830 | else if (c == '"') { |
|---|
| 831 | putc('\\', out); |
|---|
| 832 | putc('"', out); |
|---|
| 833 | } |
|---|
| 834 | else if (c == '\\') { |
|---|
| 835 | putc('\\', out); |
|---|
| 836 | putc('\\', out); |
|---|
| 837 | } |
|---|
| 838 | else { |
|---|
| 839 | putc(c, out); |
|---|
| 840 | } |
|---|
| 841 | } |
|---|
| 842 | putc('"', out); |
|---|
| 843 | } |
|---|
| 844 | |
|---|
| 845 | /* Read a string from a file written by GBS_fwrite_string, |
|---|
| 846 | * Searches first '"' |
|---|
| 847 | * |
|---|
| 848 | * WARNING : changing this function affects perl-macro execution (read warnings for GBS_fwrite_string) |
|---|
| 849 | * any changes should be done in GBS_fconvert_string too. |
|---|
| 850 | */ |
|---|
| 851 | |
|---|
| 852 | static char *GBS_fread_string(FILE *in) { // @@@ should be used when reading things written by GBS_fwrite_string, but it's unused! |
|---|
| 853 | GBS_strstruct *strstr = GBS_stropen(1024); |
|---|
| 854 | int x; |
|---|
| 855 | |
|---|
| 856 | while ((x = getc(in)) != '"') if (x == EOF) break; // Search first '"' |
|---|
| 857 | |
|---|
| 858 | if (x != EOF) { |
|---|
| 859 | while ((x = getc(in)) != '"') { |
|---|
| 860 | if (x == EOF) break; |
|---|
| 861 | if (x == '\\') { |
|---|
| 862 | x = getc(in); if (x==EOF) break; |
|---|
| 863 | if (x == 'n') { |
|---|
| 864 | GBS_chrcat(strstr, '\n'); |
|---|
| 865 | continue; |
|---|
| 866 | } |
|---|
| 867 | if (x == 't') { |
|---|
| 868 | GBS_chrcat(strstr, '\t'); |
|---|
| 869 | continue; |
|---|
| 870 | } |
|---|
| 871 | if (x>='@' && x <= '@' + 25) { |
|---|
| 872 | GBS_chrcat(strstr, x-'@'); |
|---|
| 873 | continue; |
|---|
| 874 | } |
|---|
| 875 | if (x>='0' && x <= '9') { |
|---|
| 876 | GBS_chrcat(strstr, x-('0'-25)); |
|---|
| 877 | continue; |
|---|
| 878 | } |
|---|
| 879 | // all other backslashes are simply skipped |
|---|
| 880 | } |
|---|
| 881 | GBS_chrcat(strstr, x); |
|---|
| 882 | } |
|---|
| 883 | } |
|---|
| 884 | return GBS_strclose(strstr); |
|---|
| 885 | } |
|---|
| 886 | |
|---|
| 887 | /* does similar decoding as GBS_fread_string but works directly on an existing buffer |
|---|
| 888 | * (WARNING : GBS_fconvert_string is used by gb_read_file which reads ARB ASCII databases!!) |
|---|
| 889 | * |
|---|
| 890 | * inserts \0 behind decoded string (removes the closing '"') |
|---|
| 891 | * returns a pointer behind the end (") of the _encoded_ string |
|---|
| 892 | * returns NULp if a 0-character is found |
|---|
| 893 | */ |
|---|
| 894 | char *GBS_fconvert_string(char *buffer) { |
|---|
| 895 | char *t = buffer; |
|---|
| 896 | char *f = buffer; |
|---|
| 897 | int x; |
|---|
| 898 | |
|---|
| 899 | gb_assert(f[-1] == '"'); |
|---|
| 900 | // the opening " has already been read |
|---|
| 901 | |
|---|
| 902 | while ((x = *f++) != '"') { |
|---|
| 903 | if (!x) break; |
|---|
| 904 | |
|---|
| 905 | if (x == '\\') { |
|---|
| 906 | x = *f++; |
|---|
| 907 | if (!x) break; |
|---|
| 908 | |
|---|
| 909 | if (x == 'n') { |
|---|
| 910 | *t++ = '\n'; |
|---|
| 911 | continue; |
|---|
| 912 | } |
|---|
| 913 | if (x == 't') { |
|---|
| 914 | *t++ = '\t'; |
|---|
| 915 | continue; |
|---|
| 916 | } |
|---|
| 917 | if (x>='@' && x <= '@' + 25) { |
|---|
| 918 | *t++ = x-'@'; |
|---|
| 919 | continue; |
|---|
| 920 | } |
|---|
| 921 | if (x>='0' && x <= '9') { |
|---|
| 922 | *t++ = x-('0'-25); |
|---|
| 923 | continue; |
|---|
| 924 | } |
|---|
| 925 | // all other backslashes are simply skipped |
|---|
| 926 | } |
|---|
| 927 | *t++ = x; |
|---|
| 928 | } |
|---|
| 929 | |
|---|
| 930 | if (!x) return NULp; // error (string should not contain 0-character) |
|---|
| 931 | gb_assert(x == '"'); |
|---|
| 932 | |
|---|
| 933 | t[0] = 0; |
|---|
| 934 | return f; |
|---|
| 935 | } |
|---|
| 936 | |
|---|
| 937 | char *GBS_replace_tabs_by_spaces(const char *text) { |
|---|
| 938 | int tlen = strlen(text); |
|---|
| 939 | GBS_strstruct *mfile = GBS_stropen(tlen * 3/2 + 1); |
|---|
| 940 | int tabpos = 0; |
|---|
| 941 | int c; |
|---|
| 942 | |
|---|
| 943 | while ((c=*(text++))) { |
|---|
| 944 | if (c == '\t') { |
|---|
| 945 | int ntab = (tabpos + 8) & 0xfffff8; |
|---|
| 946 | while (tabpos < ntab) { |
|---|
| 947 | GBS_chrcat(mfile, ' '); |
|---|
| 948 | tabpos++; |
|---|
| 949 | } |
|---|
| 950 | continue; |
|---|
| 951 | } |
|---|
| 952 | tabpos ++; |
|---|
| 953 | if (c == '\n') { |
|---|
| 954 | tabpos = 0; |
|---|
| 955 | } |
|---|
| 956 | GBS_chrcat(mfile, c); |
|---|
| 957 | } |
|---|
| 958 | return GBS_strclose(mfile); |
|---|
| 959 | } |
|---|
| 960 | |
|---|
| 961 | char *GBS_trim(const char *str) { |
|---|
| 962 | // trim whitespace at beginning and end of 'str' |
|---|
| 963 | const char *whitespace = " \t\n"; |
|---|
| 964 | while (str[0] && strchr(whitespace, str[0])) str++; |
|---|
| 965 | |
|---|
| 966 | const char *end = strchr(str, 0)-1; |
|---|
| 967 | while (end >= str && strchr(whitespace, end[0])) end--; |
|---|
| 968 | |
|---|
| 969 | return ARB_strpartdup(str, end); |
|---|
| 970 | } |
|---|
| 971 | |
|---|
| 972 | static char *dated_info(const char *info) { |
|---|
| 973 | char *dated_info = NULp; |
|---|
| 974 | time_t date; |
|---|
| 975 | |
|---|
| 976 | if (time(&date) != -1) { |
|---|
| 977 | char *dstr = ctime(&date); |
|---|
| 978 | char *nl = strchr(dstr, '\n'); |
|---|
| 979 | |
|---|
| 980 | if (nl) nl[0] = 0; // cut off LF |
|---|
| 981 | |
|---|
| 982 | dated_info = GBS_global_string_copy("%s: %s", dstr, info); |
|---|
| 983 | } |
|---|
| 984 | else { |
|---|
| 985 | dated_info = ARB_strdup(info); |
|---|
| 986 | } |
|---|
| 987 | return dated_info; |
|---|
| 988 | } |
|---|
| 989 | |
|---|
| 990 | char *GBS_log_action_to(const char *comment, const char *action, bool stamp) { |
|---|
| 991 | /*! concatenates 'comment' and 'action'. |
|---|
| 992 | * '\n' is appended to existing 'comment' and/or 'action' (if missing). |
|---|
| 993 | * @param comment may be NULp (=> result is 'action') |
|---|
| 994 | * @param action may NOT be NULp |
|---|
| 995 | * @param stamp true -> prefix current timestamp in front of 'action' |
|---|
| 996 | * @return heap copy of concatenation |
|---|
| 997 | */ |
|---|
| 998 | size_t clen = comment ? strlen(comment) : 0; |
|---|
| 999 | size_t alen = strlen(action); |
|---|
| 1000 | |
|---|
| 1001 | GBS_strstruct *new_comment = GBS_stropen(clen+1+(stamp ? 100 : 0)+alen+1+1); // + 2*\n + \0 + space for stamp |
|---|
| 1002 | |
|---|
| 1003 | if (comment) { |
|---|
| 1004 | GBS_strcat(new_comment, comment); |
|---|
| 1005 | if (clen == 0 || comment[clen-1] != '\n') GBS_chrcat(new_comment, '\n'); |
|---|
| 1006 | } |
|---|
| 1007 | |
|---|
| 1008 | if (stamp) { |
|---|
| 1009 | char *dated_action = dated_info(action); |
|---|
| 1010 | GBS_strcat(new_comment, dated_action); |
|---|
| 1011 | free(dated_action); |
|---|
| 1012 | } |
|---|
| 1013 | else { |
|---|
| 1014 | GBS_strcat(new_comment, action); |
|---|
| 1015 | } |
|---|
| 1016 | if (alen == 0 || action[alen-1] != '\n') GBS_chrcat(new_comment, '\n'); |
|---|
| 1017 | |
|---|
| 1018 | return GBS_strclose(new_comment); |
|---|
| 1019 | } |
|---|
| 1020 | |
|---|
| 1021 | const char *GBS_funptr2readable(void *funptr, bool stripARBHOME) { |
|---|
| 1022 | // only returns module and offset for static functions :-( |
|---|
| 1023 | char **funNames = backtrace_symbols(&funptr, 1); |
|---|
| 1024 | const char *readable_fun = funNames[0]; |
|---|
| 1025 | |
|---|
| 1026 | if (stripARBHOME) { |
|---|
| 1027 | const char *ARBHOME = GB_getenvARBHOME(); |
|---|
| 1028 | if (ARB_strBeginsWith(readable_fun, ARBHOME)) { |
|---|
| 1029 | readable_fun += strlen(ARBHOME)+1; // +1 hides slash behind ARBHOME |
|---|
| 1030 | } |
|---|
| 1031 | } |
|---|
| 1032 | return readable_fun; |
|---|
| 1033 | } |
|---|
| 1034 | |
|---|
| 1035 | // -------------------------------------------------------------------------------- |
|---|
| 1036 | |
|---|
| 1037 | #ifdef UNIT_TESTS |
|---|
| 1038 | |
|---|
| 1039 | #include <test_unit.h> |
|---|
| 1040 | |
|---|
| 1041 | // #define TEST_TEST_MACROS |
|---|
| 1042 | |
|---|
| 1043 | #ifdef ENABLE_CRASH_TESTS |
|---|
| 1044 | static void provokesegv() { raise(SIGSEGV); } |
|---|
| 1045 | static void dont_provokesegv() {} |
|---|
| 1046 | # if defined(ASSERTION_USED) |
|---|
| 1047 | static void failassertion() { gb_assert(0); } |
|---|
| 1048 | # if defined(TEST_TEST_MACROS) |
|---|
| 1049 | static void dont_failassertion() {} |
|---|
| 1050 | # endif |
|---|
| 1051 | static void provokesegv_does_not_fail_assertion() { |
|---|
| 1052 | // provokesegv does not raise assertion |
|---|
| 1053 | // -> the following assertion fails |
|---|
| 1054 | TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv); |
|---|
| 1055 | } |
|---|
| 1056 | # endif |
|---|
| 1057 | #endif |
|---|
| 1058 | |
|---|
| 1059 | void TEST_signal_tests__crashtest() { |
|---|
| 1060 | // check whether we can test that no SEGV or assertion failure happened |
|---|
| 1061 | TEST_EXPECT_NO_SEGFAULT(dont_provokesegv); |
|---|
| 1062 | |
|---|
| 1063 | // check whether we can test for SEGV and assertion failures |
|---|
| 1064 | TEST_EXPECT_SEGFAULT(provokesegv); |
|---|
| 1065 | TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion); |
|---|
| 1066 | |
|---|
| 1067 | // tests whether signal suppression works multiple times (by repeating tests) |
|---|
| 1068 | TEST_EXPECT_CODE_ASSERTION_FAILS(failassertion); |
|---|
| 1069 | TEST_EXPECT_SEGFAULT(provokesegv); |
|---|
| 1070 | |
|---|
| 1071 | // test whether SEGV can be distinguished from assertion |
|---|
| 1072 | TEST_EXPECT_CODE_ASSERTION_FAILS(provokesegv_does_not_fail_assertion); |
|---|
| 1073 | |
|---|
| 1074 | // The following section is disabled, because it will |
|---|
| 1075 | // provoke test warnings (to test these warnings). |
|---|
| 1076 | // (enable it when changing any of these TEST_..-macros used here) |
|---|
| 1077 | #if defined(TEST_TEST_MACROS) |
|---|
| 1078 | TEST_EXPECT_NO_SEGFAULT__WANTED(provokesegv); |
|---|
| 1079 | |
|---|
| 1080 | TEST_EXPECT_SEGFAULT__WANTED(dont_provokesegv); |
|---|
| 1081 | TEST_EXPECT_SEGFAULT__UNWANTED(provokesegv); |
|---|
| 1082 | #if defined(ASSERTION_USED) |
|---|
| 1083 | TEST_EXPECT_SEGFAULT__UNWANTED(failassertion); |
|---|
| 1084 | #endif |
|---|
| 1085 | |
|---|
| 1086 | TEST_EXPECT_CODE_ASSERTION_FAILS__WANTED(dont_failassertion); |
|---|
| 1087 | TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(failassertion); |
|---|
| 1088 | TEST_EXPECT_CODE_ASSERTION_FAILS__UNWANTED(provokesegv_does_not_fail_assertion); |
|---|
| 1089 | #endif |
|---|
| 1090 | } |
|---|
| 1091 | |
|---|
| 1092 | #define EXPECT_CONTENT(content) TEST_EXPECT_EQUAL(GBS_mempntr(strstr), content) |
|---|
| 1093 | |
|---|
| 1094 | void TEST_GBS_strstruct() { |
|---|
| 1095 | { |
|---|
| 1096 | GBS_strstruct *strstr = GBS_stropen(1000); EXPECT_CONTENT(""); |
|---|
| 1097 | |
|---|
| 1098 | GBS_chrncat(strstr, 'b', 3); EXPECT_CONTENT("bbb"); |
|---|
| 1099 | GBS_intcat(strstr, 17); EXPECT_CONTENT("bbb17"); |
|---|
| 1100 | GBS_chrcat(strstr, '_'); EXPECT_CONTENT("bbb17_"); |
|---|
| 1101 | GBS_floatcat(strstr, 3.5); EXPECT_CONTENT("bbb17_3.500000"); |
|---|
| 1102 | |
|---|
| 1103 | TEST_EXPECT_EQUAL(GBS_memoffset(strstr), 14); |
|---|
| 1104 | GBS_str_cut_tail(strstr, 13); EXPECT_CONTENT("b"); |
|---|
| 1105 | GBS_strcat(strstr, "utter"); EXPECT_CONTENT("butter"); |
|---|
| 1106 | GBS_strncat(strstr, "flying", 3); EXPECT_CONTENT("butterfly"); |
|---|
| 1107 | |
|---|
| 1108 | GBS_strnprintf(strstr, 200, "%c%s", ' ', "flutters"); |
|---|
| 1109 | EXPECT_CONTENT("butterfly flutters"); |
|---|
| 1110 | |
|---|
| 1111 | GBS_strforget(strstr); |
|---|
| 1112 | } |
|---|
| 1113 | { |
|---|
| 1114 | // re-alloc smaller |
|---|
| 1115 | GBS_strstruct *strstr = GBS_stropen(500); EXPECT_CONTENT(""); |
|---|
| 1116 | GBS_strforget(strstr); |
|---|
| 1117 | } |
|---|
| 1118 | |
|---|
| 1119 | // trigger downsize of oversized block |
|---|
| 1120 | for (int i = 0; i<12; ++i) { |
|---|
| 1121 | GBS_strstruct *strstr = GBS_stropen(10); |
|---|
| 1122 | GBS_strforget(strstr); |
|---|
| 1123 | } |
|---|
| 1124 | |
|---|
| 1125 | { |
|---|
| 1126 | GBS_strstruct *strstr = GBS_stropen(10); |
|---|
| 1127 | size_t oldbufsize = strstr->get_buffer_size(); |
|---|
| 1128 | GBS_chrncat(strstr, 'x', 20); // trigger reallocation of buffer |
|---|
| 1129 | |
|---|
| 1130 | TEST_EXPECT_DIFFERENT(oldbufsize, strstr->get_buffer_size()); // did we reallocate? |
|---|
| 1131 | EXPECT_CONTENT("xxxxxxxxxxxxxxxxxxxx"); |
|---|
| 1132 | GBS_strforget(strstr); |
|---|
| 1133 | } |
|---|
| 1134 | } |
|---|
| 1135 | |
|---|
| 1136 | #define TEST_SHORTENED_EQUALS(Long,Short) do { \ |
|---|
| 1137 | char *buf = ARB_strdup(Long); \ |
|---|
| 1138 | GBS_shorten_repeated_data(buf); \ |
|---|
| 1139 | TEST_EXPECT_EQUAL(buf, Short); \ |
|---|
| 1140 | free(buf); \ |
|---|
| 1141 | } while(0) |
|---|
| 1142 | |
|---|
| 1143 | void TEST_GBS_shorten_repeated_data() { |
|---|
| 1144 | TEST_SHORTENED_EQUALS("12345", "12345"); |
|---|
| 1145 | TEST_SHORTENED_EQUALS("aaaaaaaaaaaabc", "a{12}bc"); |
|---|
| 1146 | TEST_SHORTENED_EQUALS("aaaaaaaaaaabc", "a{11}bc"); |
|---|
| 1147 | TEST_SHORTENED_EQUALS("aaaaaaaaaabc", "a{10}bc"); |
|---|
| 1148 | TEST_SHORTENED_EQUALS("aaaaaaaaabc", "a{9}bc"); |
|---|
| 1149 | TEST_SHORTENED_EQUALS("aaaaaaaabc", "a{8}bc"); |
|---|
| 1150 | TEST_SHORTENED_EQUALS("aaaaaaabc", "a{7}bc"); |
|---|
| 1151 | TEST_SHORTENED_EQUALS("aaaaaabc", "a{6}bc"); |
|---|
| 1152 | TEST_SHORTENED_EQUALS("aaaaabc", "a{5}bc"); |
|---|
| 1153 | TEST_SHORTENED_EQUALS("aaaabc", "aaaabc"); |
|---|
| 1154 | TEST_SHORTENED_EQUALS("aaabc", "aaabc"); |
|---|
| 1155 | TEST_SHORTENED_EQUALS("aabc", "aabc"); |
|---|
| 1156 | TEST_SHORTENED_EQUALS("", ""); |
|---|
| 1157 | } |
|---|
| 1158 | |
|---|
| 1159 | static const char *hkey_format[] = { |
|---|
| 1160 | "/%s/bbb/ccc", |
|---|
| 1161 | "/aaa/%s/ccc", |
|---|
| 1162 | "/aaa/bbb/%s", |
|---|
| 1163 | }; |
|---|
| 1164 | |
|---|
| 1165 | inline const char *useInHkey(const char *fragment, size_t pos) { |
|---|
| 1166 | return GBS_global_string(hkey_format[pos], fragment); |
|---|
| 1167 | } |
|---|
| 1168 | |
|---|
| 1169 | #define TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(use) do { \ |
|---|
| 1170 | for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \ |
|---|
| 1171 | const char *hkey = useInHkey(use, i); \ |
|---|
| 1172 | TEST_ANNOTATE(hkey); \ |
|---|
| 1173 | TEST_EXPECT_NO_ERROR(GB_check_hkey(hkey)); \ |
|---|
| 1174 | } \ |
|---|
| 1175 | TEST_ANNOTATE(NULp); \ |
|---|
| 1176 | } while(0) |
|---|
| 1177 | |
|---|
| 1178 | #define TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(use,contains) do { \ |
|---|
| 1179 | for (size_t i = 0; i<ARRAY_ELEMS(hkey_format); ++i) { \ |
|---|
| 1180 | const char *hkey = useInHkey(use, i); \ |
|---|
| 1181 | TEST_ANNOTATE(hkey); \ |
|---|
| 1182 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(hkey), contains); \ |
|---|
| 1183 | } \ |
|---|
| 1184 | TEST_ANNOTATE(NULp); \ |
|---|
| 1185 | } while(0) |
|---|
| 1186 | |
|---|
| 1187 | |
|---|
| 1188 | void TEST_DB_key_checks() { |
|---|
| 1189 | // plain keys |
|---|
| 1190 | const char *shortest = "ab"; |
|---|
| 1191 | const char *too_long = "ab345678901234567890123456789012345678901234567890123456789012345"; |
|---|
| 1192 | const char *too_short = shortest+1; |
|---|
| 1193 | const char *longest = too_long+1; |
|---|
| 1194 | |
|---|
| 1195 | const char *empty = ""; |
|---|
| 1196 | const char *slash = "sub/key"; |
|---|
| 1197 | const char *dslash = "sub//key"; |
|---|
| 1198 | const char *comma = "no,key"; |
|---|
| 1199 | const char *minus = "no-key"; |
|---|
| 1200 | |
|---|
| 1201 | // obsolete GB_LINK syntax: |
|---|
| 1202 | const char *link = "link->syntax"; |
|---|
| 1203 | const char *nowhere = "link->"; |
|---|
| 1204 | const char *fromNw = "->syntax"; |
|---|
| 1205 | |
|---|
| 1206 | TEST_EXPECT_NO_ERROR(GB_check_key(shortest)); |
|---|
| 1207 | TEST_EXPECT_NO_ERROR(GB_check_key(longest)); |
|---|
| 1208 | |
|---|
| 1209 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_short), "too short"); |
|---|
| 1210 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(too_long), "too long"); |
|---|
| 1211 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(empty), "not allowed"); |
|---|
| 1212 | |
|---|
| 1213 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(slash), "Invalid character '/'"); |
|---|
| 1214 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(dslash), "Invalid character '/'"); |
|---|
| 1215 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(comma), "Invalid character ','"); |
|---|
| 1216 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(minus), "Invalid character '-'"); |
|---|
| 1217 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(link), "Invalid character '-'"); |
|---|
| 1218 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(nowhere), "Invalid character '-'"); |
|---|
| 1219 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key(fromNw), "Invalid character '-'"); |
|---|
| 1220 | |
|---|
| 1221 | // hierarchical keys |
|---|
| 1222 | TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(shortest); |
|---|
| 1223 | TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(longest); |
|---|
| 1224 | |
|---|
| 1225 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_short, "too short"); |
|---|
| 1226 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(too_long, "too long"); |
|---|
| 1227 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(empty, "not allowed"); |
|---|
| 1228 | |
|---|
| 1229 | TEST_IN_HKEYS_USING_EXPECT_NO_ERROR(slash); |
|---|
| 1230 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(dslash, "Empty key is not allowed"); |
|---|
| 1231 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(comma, "Invalid character ','"); |
|---|
| 1232 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(minus, "Invalid character '-'"); |
|---|
| 1233 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(link, "Invalid character '-'"); |
|---|
| 1234 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(nowhere, "Invalid character '-'"); |
|---|
| 1235 | TEST_IN_HKEYS_USING_EXPECT_ERROR_CONTAINS(fromNw, "Invalid character '-'"); |
|---|
| 1236 | |
|---|
| 1237 | // test NULp keys: |
|---|
| 1238 | TEST_EXPECT_ERROR_CONTAINS(GB_check_key (NULp), "Empty key is not allowed"); |
|---|
| 1239 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(NULp), "Empty key is not allowed"); |
|---|
| 1240 | |
|---|
| 1241 | // some edge cases for hierarchical keys: |
|---|
| 1242 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//"), "Empty key is not allowed"); |
|---|
| 1243 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("//key"), "Empty key is not allowed"); // @@@ is double slash compensated by GB_search etc? if yes -> accept here as well! |
|---|
| 1244 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key//"), "Empty key is not allowed"); |
|---|
| 1245 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("/"), "Empty key is not allowed"); |
|---|
| 1246 | TEST_EXPECT_NO_ERROR (GB_check_hkey("/key")); |
|---|
| 1247 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey("key/"), "Empty key is not allowed"); // @@@ use better message? e.g. "invalid trailing '/'" |
|---|
| 1248 | TEST_EXPECT_ERROR_CONTAINS(GB_check_hkey(""), "Empty key is not allowed"); |
|---|
| 1249 | } |
|---|
| 1250 | |
|---|
| 1251 | #define TEST_STRING2KEY(str,expected) do { \ |
|---|
| 1252 | char *as_key = GBS_string_2_key(str); \ |
|---|
| 1253 | TEST_EXPECT_EQUAL(as_key, expected); \ |
|---|
| 1254 | TEST_EXPECT_NO_ERROR(GB_check_key(as_key)); \ |
|---|
| 1255 | free(as_key); \ |
|---|
| 1256 | } while(0) |
|---|
| 1257 | |
|---|
| 1258 | void TEST_DB_key_generation() { |
|---|
| 1259 | TEST_STRING2KEY("abc", "abc"); |
|---|
| 1260 | TEST_STRING2KEY("a b c", "a_b_c"); |
|---|
| 1261 | |
|---|
| 1262 | // invalid chars |
|---|
| 1263 | TEST_STRING2KEY("string containing \"double-quotes\", 'quotes' and other:shit!*&^@!%@(", |
|---|
| 1264 | "string_containing_doublequotes_quotes_and_othershit"); |
|---|
| 1265 | |
|---|
| 1266 | // length tests |
|---|
| 1267 | TEST_STRING2KEY("a", "a_"); // too short |
|---|
| 1268 | TEST_STRING2KEY("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", // too long |
|---|
| 1269 | "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); |
|---|
| 1270 | } |
|---|
| 1271 | |
|---|
| 1272 | void TEST_TaggedContentParser() { |
|---|
| 1273 | // test helper class TextRef: |
|---|
| 1274 | TEST_REJECT(TextRef().defined()); // default to undefined |
|---|
| 1275 | { |
|---|
| 1276 | TextRef bla("blakjahd", 3); |
|---|
| 1277 | TEST_EXPECT(bla.defined()); |
|---|
| 1278 | TEST_EXPECT_EQUAL(bla.get_length(), 3); |
|---|
| 1279 | |
|---|
| 1280 | TEST_EXPECT(bla.compare("bl") > 0); |
|---|
| 1281 | TEST_EXPECT(bla.compare("bla") == 0); |
|---|
| 1282 | TEST_EXPECT(bla.compare("blase") < 0); |
|---|
| 1283 | |
|---|
| 1284 | TextRef spaced(" spaced "+1, 10); |
|---|
| 1285 | TEST_EXPECT(spaced.headTrimmed().compare("spaced ") == 0); |
|---|
| 1286 | TEST_EXPECT(spaced.tailTrimmed().compare(" spaced") == 0); |
|---|
| 1287 | TEST_EXPECT(spaced.trimmed ().compare("spaced") == 0); |
|---|
| 1288 | } |
|---|
| 1289 | |
|---|
| 1290 | const char *text = " untagged [tag] tagged [empty] "; |
|---|
| 1291 | |
|---|
| 1292 | TextRef cr_untagged(strstr(text, "untagged"), 8); |
|---|
| 1293 | TextRef cr_tagged (strstr(text, "tagged"), 6); |
|---|
| 1294 | TextRef tr_tag (strstr(text, "tag"), 3); |
|---|
| 1295 | TextRef tr_empty (strstr(text, "empty"), 5); |
|---|
| 1296 | |
|---|
| 1297 | // test TaggedContentParser: |
|---|
| 1298 | { |
|---|
| 1299 | TaggedContentParser parser(text); |
|---|
| 1300 | |
|---|
| 1301 | TEST_EXPECT(parser.has_part()); |
|---|
| 1302 | TEST_REJECT(parser.has_tag()); |
|---|
| 1303 | TEST_EXPECT(parser.get_content().compare("untagged") == 0); |
|---|
| 1304 | |
|---|
| 1305 | parser.next(); |
|---|
| 1306 | |
|---|
| 1307 | TEST_EXPECT(parser.has_part()); |
|---|
| 1308 | TEST_EXPECT(parser.get_tag ().compare("tag") == 0); |
|---|
| 1309 | TEST_EXPECT(parser.get_content().compare("tagged") == 0); |
|---|
| 1310 | |
|---|
| 1311 | parser.next(); |
|---|
| 1312 | |
|---|
| 1313 | TEST_EXPECT(parser.has_part()); |
|---|
| 1314 | TEST_EXPECT(parser.get_tag().compare("empty") == 0); |
|---|
| 1315 | TEST_REJECT(parser.has_content()); |
|---|
| 1316 | |
|---|
| 1317 | parser.next(); |
|---|
| 1318 | |
|---|
| 1319 | TEST_REJECT(parser.has_part()); |
|---|
| 1320 | } |
|---|
| 1321 | { // parse untagged input |
|---|
| 1322 | TaggedContentParser parser("hi"); |
|---|
| 1323 | TEST_EXPECT(parser.has_part()); |
|---|
| 1324 | TEST_REJECT(parser.has_tag()); |
|---|
| 1325 | TEST_EXPECT(parser.get_content().compare("hi") == 0); |
|---|
| 1326 | parser.next(); |
|---|
| 1327 | TEST_REJECT(parser.has_part()); |
|---|
| 1328 | } |
|---|
| 1329 | { // parse empty input |
|---|
| 1330 | TaggedContentParser empty(""); TEST_REJECT(empty.has_part()); |
|---|
| 1331 | TaggedContentParser white(" \t\n "); TEST_REJECT(white.has_part()); |
|---|
| 1332 | } |
|---|
| 1333 | { // parse single tag w/o content |
|---|
| 1334 | TaggedContentParser parser(" [hello] "); |
|---|
| 1335 | TEST_EXPECT(parser.has_part()); |
|---|
| 1336 | TEST_EXPECT(parser.get_tag().compare("hello") == 0); |
|---|
| 1337 | TEST_REJECT(parser.has_content()); |
|---|
| 1338 | parser.next(); |
|---|
| 1339 | TEST_REJECT(parser.has_part()); |
|---|
| 1340 | } |
|---|
| 1341 | { // parse multi-tags |
|---|
| 1342 | TaggedContentParser parser(" [ t1 , t2 ] t"); |
|---|
| 1343 | TEST_EXPECT(parser.has_part()); |
|---|
| 1344 | TEST_EXPECT(parser.get_tag().compare("t1") == 0); |
|---|
| 1345 | TEST_EXPECT(parser.get_content().compare("t") == 0); |
|---|
| 1346 | parser.next(); |
|---|
| 1347 | TEST_EXPECT(parser.has_part()); |
|---|
| 1348 | TEST_EXPECT(parser.get_tag().compare("t2") == 0); |
|---|
| 1349 | TEST_EXPECT(parser.get_content().compare("t") == 0); |
|---|
| 1350 | parser.next(); |
|---|
| 1351 | TEST_REJECT(parser.has_part()); |
|---|
| 1352 | } |
|---|
| 1353 | } |
|---|
| 1354 | |
|---|
| 1355 | #define TEST_MERGE_TAGGED(t1,t2,r1,r2,s1,s2,expected) do { \ |
|---|
| 1356 | char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \ |
|---|
| 1357 | TEST_EXPECT_EQUAL(result, expected); \ |
|---|
| 1358 | free(result); \ |
|---|
| 1359 | } while(0) |
|---|
| 1360 | |
|---|
| 1361 | #define TEST_MERGE_TAGGED__BROKEN(t1,t2,r1,r2,s1,s2,expected,got) do { \ |
|---|
| 1362 | char *result = GBS_merge_tagged_strings(s1, t1, r1, s2, t2, r2); \ |
|---|
| 1363 | TEST_EXPECT_EQUAL__BROKEN(result, expected, got); \ |
|---|
| 1364 | free(result); \ |
|---|
| 1365 | } while(0) |
|---|
| 1366 | |
|---|
| 1367 | void TEST_merge_tagged_strings() { |
|---|
| 1368 | // merge two fields: |
|---|
| 1369 | const char *_0 = NULp; |
|---|
| 1370 | |
|---|
| 1371 | TEST_MERGE_TAGGED("S", "D", "", "", "source", "dest", "[D_] dest [S_] source"); |
|---|
| 1372 | TEST_MERGE_TAGGED("SRC", "DST", "", _0, "source", "dest", "[DST] dest [SRC] source"); |
|---|
| 1373 | TEST_MERGE_TAGGED("SRC", "DST", _0, "", "source", "dest", "[DST] dest [SRC] source"); |
|---|
| 1374 | TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "sth", "[DST,SRC] sth"); |
|---|
| 1375 | |
|---|
| 1376 | TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth", "sth", "[DST,SRC] sth"); // show default tags do not get deleted |
|---|
| 1377 | TEST_MERGE_TAGGED("SRC", "DST", "SRC", "DST", "sth [SRC] del", "sth [DST] del", "[DST,SRC] sth"); // exception: already present default tags |
|---|
| 1378 | |
|---|
| 1379 | // update fields: |
|---|
| 1380 | TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST] dest [SRC] source", "[DST] dest [SRC] newsource"); |
|---|
| 1381 | TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource"); |
|---|
| 1382 | TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "newsource", " [DST,src] sth", "[DST] sth [SRC] newsource"); |
|---|
| 1383 | TEST_MERGE_TAGGED("SRC", "DST", _0, "src", "newsource", " [DST,SRC] sth", "[DST] sth [SRC] newsource"); |
|---|
| 1384 | TEST_MERGE_TAGGED("SRC", "DST", _0, "SRC", "sth", " [DST] sth [SRC] source", "[DST,SRC] sth"); |
|---|
| 1385 | |
|---|
| 1386 | // append (opposed to update this keeps old entries with same tag; useless?) |
|---|
| 1387 | TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST] dest [SRC] source", "[DST] dest [SRC] newsource [SRC] source"); |
|---|
| 1388 | TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "newsource", "[DST,SRC] sth", "[DST,SRC] sth [SRC] newsource"); |
|---|
| 1389 | TEST_MERGE_TAGGED("SRC", "DST", _0, _0, "sth", "[DST] sth [SRC] source", "[DST,SRC] sth [SRC] source"); |
|---|
| 1390 | |
|---|
| 1391 | // merge three fields: |
|---|
| 1392 | TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST] dest [SRC] source", "[DST] dest [OTH] oth [SRC] source"); |
|---|
| 1393 | TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "oth", " [DST,SRC] sth", "[DST,SRC] sth [OTH] oth"); |
|---|
| 1394 | TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "sth", " [DST,SRC] sth", "[DST,OTH,SRC] sth"); |
|---|
| 1395 | TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "dest", " [DST] dest [SRC] source", "[DST,OTH] dest [SRC] source"); |
|---|
| 1396 | TEST_MERGE_TAGGED("OTH", "DST", _0, _0, "source", " [DST] dest [SRC] source", "[DST] dest [OTH,SRC] source"); |
|---|
| 1397 | |
|---|
| 1398 | // same tests as in section above, but vv: |
|---|
| 1399 | TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "oth", "[DST] dest [OTH] oth [SRC] source"); |
|---|
| 1400 | TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "oth", "[DST,SRC] sth [OTH] oth"); |
|---|
| 1401 | TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST,SRC] sth", "sth", "[DST,OTH,SRC] sth"); |
|---|
| 1402 | TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "dest", "[DST,OTH] dest [SRC] source"); |
|---|
| 1403 | TEST_MERGE_TAGGED("DST", "OTH", _0, _0, " [DST] dest [SRC] source", "source", "[DST] dest [OTH,SRC] source"); |
|---|
| 1404 | |
|---|
| 1405 | // test real-merges (content existing in both strings): |
|---|
| 1406 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre1 [C1] c1 [C2] c2", "pre2[C2]c2[C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1] pre1 [P2] pre2"); |
|---|
| 1407 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [C2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content for [C2] |
|---|
| 1408 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2", "pre [c2]c2 [C3]c3", "[C1] c1 [C2] c2 [C3] c3 [P1,P2] pre"); // identical content + different tag-case for [C2] (tests that tags are case-insensitive!) |
|---|
| 1409 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [C2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content for [C2] -> inserts that tag multiple times |
|---|
| 1410 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // continue processing last result (multiple tags with same name are handled) |
|---|
| 1411 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " [C1] c1 [C2] c2a [C2] c2b [C3] c3", "[C2] c2b [C3]c3 [C2] c2a", "[C1] c1 [C2] c2a [C2] c2b [C3] c3"); // merge multiple tags with same name |
|---|
| 1412 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1] c1 [C2] c2a", "pre [c2]c2b [C3]c3", "[C1] c1 [C2] c2a [C2] c2b [C3] c3 [P1,P2] pre"); // different content and different tag-case for [C2] |
|---|
| 1413 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [C1,C4] c1 [C2] c2a ", "pre [c2] c2b [C4,C3]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // multitags |
|---|
| 1414 | TEST_MERGE_TAGGED("P1", "P2", _0, _0, " pre [ C1, C4] c1 [C2 ] c2a ", "pre [ c2] c2b [C4, C3 ]c3", "[C1,C4] c1 [C2] c2a [C2] c2b [C3,C4] c3 [P1,P2] pre"); // spaced-multitags |
|---|
| 1415 | |
|---|
| 1416 | // merge two tagged string with deleting |
|---|
| 1417 | #define DSTSRC1 "[DST] dest1 [SRC] src1" |
|---|
| 1418 | #define DSTSRC2 "[DST] dest2 [SRC] src2" |
|---|
| 1419 | #define DSTSRC2LOW "[dst] dest2 [src] src2" |
|---|
| 1420 | |
|---|
| 1421 | TEST_MERGE_TAGGED("O1", "O2", _0, _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); |
|---|
| 1422 | TEST_MERGE_TAGGED("O1", "O2", "SRC", _0, DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src2"); |
|---|
| 1423 | TEST_MERGE_TAGGED("O1", "O2", _0, "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src1 [SRC] src2"); |
|---|
| 1424 | TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2"); |
|---|
| 1425 | TEST_MERGE_TAGGED("O1", "O2", "SRC", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2"); |
|---|
| 1426 | TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2"); |
|---|
| 1427 | TEST_MERGE_TAGGED("O1", "O2", "src", "DST", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2"); |
|---|
| 1428 | TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2, "[DST] dest1 [SRC] src2"); |
|---|
| 1429 | TEST_MERGE_TAGGED("O1", "O2", "SRC", "dst", DSTSRC1, DSTSRC2LOW, "[DST] dest1 [SRC] src2"); |
|---|
| 1430 | TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1"); |
|---|
| 1431 | TEST_MERGE_TAGGED("O1", "O2", "DST", "SRC", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1"); |
|---|
| 1432 | TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2, "[DST] dest2 [SRC] src1"); |
|---|
| 1433 | TEST_MERGE_TAGGED("O1", "O2", "dst", "src", DSTSRC1, DSTSRC2LOW, "[DST] dest2 [SRC] src1"); |
|---|
| 1434 | TEST_MERGE_TAGGED("O1", "O2", "SRC,DST", "DST,SRC", DSTSRC1, DSTSRC2, "[DST] dest1 [DST] dest2 [SRC] src1 [SRC] src2"); // delete does not handle multiple tags (yet) |
|---|
| 1435 | } |
|---|
| 1436 | |
|---|
| 1437 | __ATTR__REDUCED_OPTIMIZE void TEST_read_tagged() { |
|---|
| 1438 | GB_shell shell; |
|---|
| 1439 | GBDATA *gb_main = GB_open("new.arb", "c"); |
|---|
| 1440 | { |
|---|
| 1441 | GB_transaction ta(gb_main); |
|---|
| 1442 | |
|---|
| 1443 | { |
|---|
| 1444 | GBDATA *gb_int_entry = GB_create(gb_main, "int", GB_INT); |
|---|
| 1445 | TEST_EXPECT_NO_ERROR(GB_write_int(gb_int_entry, 4711)); |
|---|
| 1446 | TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_int_entry, "USELESS")); // reading from GB_INT doesn't make sense, but has to work w/o error |
|---|
| 1447 | |
|---|
| 1448 | GBDATA *gb_ints_entry = GB_create(gb_main, "int", GB_INTS); |
|---|
| 1449 | GB_UINT4 ints[] = { 1, 2 }; |
|---|
| 1450 | TEST_EXPECT_NO_ERROR(GB_write_ints(gb_ints_entry, ints, 2)); |
|---|
| 1451 | TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_ints_entry, "USELESS")); // reading from GB_INTS doesn't make sense, but has to work w/o error |
|---|
| 1452 | } |
|---|
| 1453 | |
|---|
| 1454 | #define TEST_EXPECT_TAG_CONTENT(tag,expected) TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag), expected) |
|---|
| 1455 | #define TEST_REJECT_TAG_CONTENT(tag) TEST_EXPECT_NORESULT__NOERROREXPORTED(GB_read_as_tagged_string(gb_entry, tag)) |
|---|
| 1456 | #define TEST_EXPECT_FULL_CONTENT(tag) TEST_EXPECT_TAG_CONTENT(tag,tagged_string) |
|---|
| 1457 | |
|---|
| 1458 | GBDATA *gb_entry = GB_create(gb_main, "str", GB_STRING); |
|---|
| 1459 | const char *tagged_string = "[T1,T2] t12 [T3] t3[T4]t4[][]xxx[AA]aa[WW]w1 [WW]w2 [BB]bb [XX]x1 [XX]x2 [yy] yy [Y] y [EMPTY][FAKE,EMPTY]fake[ SP1ST, SPACED, PADDED ,UNSPACED,_SCORED_,FOLLOWED ,FOLLAST ] spaced [LAST] last "; |
|---|
| 1460 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string)); |
|---|
| 1461 | |
|---|
| 1462 | TEST_EXPECT_FULL_CONTENT(NULp); |
|---|
| 1463 | TEST_EXPECT_FULL_CONTENT(""); |
|---|
| 1464 | TEST_REJECT_TAG_CONTENT(" "); // searches for tag '_' (no such tag) |
|---|
| 1465 | |
|---|
| 1466 | TEST_EXPECT_TAG_CONTENT("T1", "t12"); |
|---|
| 1467 | TEST_EXPECT_TAG_CONTENT("T2", "t12"); |
|---|
| 1468 | TEST_EXPECT_TAG_CONTENT("T3", "t3"); |
|---|
| 1469 | TEST_EXPECT_TAG_CONTENT("T4", "t4[][]xxx"); |
|---|
| 1470 | |
|---|
| 1471 | TEST_EXPECT_TAG_CONTENT("AA", "aa"); |
|---|
| 1472 | TEST_EXPECT_TAG_CONTENT("BB", "bb"); |
|---|
| 1473 | TEST_EXPECT_TAG_CONTENT("WW", "w1"); // now finds 1st occurrence of [WW] |
|---|
| 1474 | TEST_EXPECT_TAG_CONTENT("XX", "x1"); |
|---|
| 1475 | TEST_EXPECT_TAG_CONTENT("YY", "yy"); |
|---|
| 1476 | TEST_EXPECT_TAG_CONTENT("yy", "yy"); |
|---|
| 1477 | |
|---|
| 1478 | TEST_REJECT_TAG_CONTENT("Y"); |
|---|
| 1479 | // TEST_EXPECT_TAG_CONTENT("Y", "y"); // @@@ tags with length == 1 are never found -> should be handled when used via GUI |
|---|
| 1480 | |
|---|
| 1481 | TEST_EXPECT_TAG_CONTENT("EMPTY", "fake"); // now reports 1st non-empty content |
|---|
| 1482 | TEST_EXPECT_TAG_CONTENT("FAKE", "fake"); |
|---|
| 1483 | TEST_EXPECT_TAG_CONTENT("fake", "fake"); |
|---|
| 1484 | |
|---|
| 1485 | TEST_REJECT_TAG_CONTENT("NOSUCHTAG"); |
|---|
| 1486 | TEST_EXPECT_TAG_CONTENT("SPACED", "spaced"); |
|---|
| 1487 | TEST_EXPECT_TAG_CONTENT("SP1ST", "spaced"); |
|---|
| 1488 | TEST_REJECT_TAG_CONTENT(" SPACED"); // dito (specified space is converted into '_' before searching tag) |
|---|
| 1489 | TEST_REJECT_TAG_CONTENT("_SPACED"); // not found (tag stored with space, search performed for '_SPACED') |
|---|
| 1490 | TEST_EXPECT_TAG_CONTENT("PADDED", "spaced"); |
|---|
| 1491 | TEST_EXPECT_TAG_CONTENT("FOLLOWED", "spaced"); |
|---|
| 1492 | TEST_EXPECT_TAG_CONTENT("FOLLAST", "spaced"); |
|---|
| 1493 | |
|---|
| 1494 | TEST_EXPECT_TAG_CONTENT("_SCORED_", "spaced"); |
|---|
| 1495 | TEST_EXPECT_TAG_CONTENT(" SCORED ", "spaced"); |
|---|
| 1496 | TEST_EXPECT_TAG_CONTENT("UNSPACED", "spaced"); |
|---|
| 1497 | TEST_EXPECT_TAG_CONTENT("LAST", "last"); |
|---|
| 1498 | |
|---|
| 1499 | // test incomplete tags |
|---|
| 1500 | tagged_string = "bla [WHATEVER hello"; |
|---|
| 1501 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string)); |
|---|
| 1502 | TEST_REJECT_TAG_CONTENT("WHATEVER"); |
|---|
| 1503 | |
|---|
| 1504 | tagged_string = "bla [T1] t1 [T2 t2 [T3] t3"; |
|---|
| 1505 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string)); |
|---|
| 1506 | TEST_EXPECT_TAG_CONTENT("T1", "t1 [T2 t2"); |
|---|
| 1507 | TEST_REJECT_TAG_CONTENT("T2"); // tag is unclosed |
|---|
| 1508 | TEST_EXPECT_TAG_CONTENT("T3", "t3"); |
|---|
| 1509 | |
|---|
| 1510 | // test pathological tags |
|---|
| 1511 | tagged_string = "bla [T1] t1 [ ] sp1 [ ] sp2 [___] us [T3] t3 [_a] a"; |
|---|
| 1512 | TEST_EXPECT_NO_ERROR(GB_write_string(gb_entry, tagged_string)); |
|---|
| 1513 | TEST_EXPECT_TAG_CONTENT("T1", "t1 [ ] sp1 [ ] sp2"); |
|---|
| 1514 | TEST_EXPECT_FULL_CONTENT(""); |
|---|
| 1515 | TEST_REJECT_TAG_CONTENT(" "); |
|---|
| 1516 | TEST_REJECT_TAG_CONTENT(" "); |
|---|
| 1517 | TEST_REJECT_TAG_CONTENT(","); |
|---|
| 1518 | TEST_EXPECT_TAG_CONTENT(", a", "a"); // searches for tag '_a' |
|---|
| 1519 | TEST_EXPECT_TAG_CONTENT(", a,", "a"); // dito |
|---|
| 1520 | TEST_EXPECT_TAG_CONTENT(", ,a,", "a"); // dito |
|---|
| 1521 | TEST_EXPECT_TAG_CONTENT(" ", "us"); |
|---|
| 1522 | TEST_EXPECT_TAG_CONTENT("T3", "t3"); |
|---|
| 1523 | } |
|---|
| 1524 | GB_close(gb_main); |
|---|
| 1525 | } |
|---|
| 1526 | |
|---|
| 1527 | #define TEST_EXPECT_EVAL_TAGGED(in,dtag,tag,aci,expected) do{ \ |
|---|
| 1528 | TEST_EXPECT_EQUAL_STRINGCOPY__NOERROREXPORTED( \ |
|---|
| 1529 | GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \ |
|---|
| 1530 | expected); \ |
|---|
| 1531 | }while(0) |
|---|
| 1532 | |
|---|
| 1533 | #define TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(in,dtag,tag,aci,expectedErrorPart) do{ \ |
|---|
| 1534 | TEST_EXPECT_NORESULT__ERROREXPORTED_CONTAINS( \ |
|---|
| 1535 | GBS_modify_tagged_string_with_ACI(in, dtag, tag, aci, callEnv), \ |
|---|
| 1536 | expectedErrorPart); \ |
|---|
| 1537 | }while(0) |
|---|
| 1538 | |
|---|
| 1539 | __ATTR__REDUCED_OPTIMIZE void TEST_tagged_eval() { |
|---|
| 1540 | GB_shell shell; |
|---|
| 1541 | GBDATA *gb_main = GB_open("TEST_loadsave.arb", "r"); |
|---|
| 1542 | { |
|---|
| 1543 | GB_transaction ta(gb_main); |
|---|
| 1544 | GBL_env env(gb_main, "tree_missing"); |
|---|
| 1545 | |
|---|
| 1546 | { |
|---|
| 1547 | GBDATA *gb_species = GBT_find_species(gb_main, "MhcBurto"); |
|---|
| 1548 | TEST_REJECT_NULL(gb_species); |
|---|
| 1549 | GBL_call_env callEnv(gb_species, env); |
|---|
| 1550 | |
|---|
| 1551 | TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", "", "[DEF] bla"); |
|---|
| 1552 | TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", NULp, "[DEF] bla"); |
|---|
| 1553 | TEST_EXPECT_EVAL_TAGGED("bla", "def", "tag", ":bla=blub", "[DEF] bla"); |
|---|
| 1554 | TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", ":bla=blub", "[TAG] blub"); |
|---|
| 1555 | TEST_EXPECT_EVAL_TAGGED("bla", "tag", "tag", "len", "[TAG] 3"); |
|---|
| 1556 | |
|---|
| 1557 | // empty tags: |
|---|
| 1558 | TEST_EXPECT_EVAL_TAGGED("[empty] ", "def", "empty", NULp, ""); |
|---|
| 1559 | TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx"); |
|---|
| 1560 | TEST_EXPECT_EVAL_TAGGED("[empty] [filled] xxx", "def", "empty", NULp, "[FILLED] xxx"); |
|---|
| 1561 | TEST_EXPECT_EVAL_TAGGED("[empty][filled] xxx", "def", "empty", NULp, "[FILLED] xxx"); |
|---|
| 1562 | TEST_EXPECT_EVAL_TAGGED("[filled] xxx [empty]", "def", "empty", NULp, "[FILLED] xxx"); |
|---|
| 1563 | |
|---|
| 1564 | #define THREE_TAGS "[TAG] tag [tip] tip [top] top" |
|---|
| 1565 | #define THREE_TAGS_UPCASE "[TAG] tag [TIP] tip [TOP] top" |
|---|
| 1566 | |
|---|
| 1567 | // dont eval: |
|---|
| 1568 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", NULp, THREE_TAGS_UPCASE); |
|---|
| 1569 | // eval SRT: |
|---|
| 1570 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TAG", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top"); |
|---|
| 1571 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tag", ":*=<*>", "[TAG] <tag> [TIP] tip [TOP] top"); |
|---|
| 1572 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top"); |
|---|
| 1573 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "TIP", ":*=(*)", "[TAG] tag [TIP] (tip) [TOP] top"); |
|---|
| 1574 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":*=", "[TAG] tag [TOP] top"); // tag emptied by SRT was removed from result |
|---|
| 1575 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", ":*=*-*1", "[TAG] tag [TIP] tip [TOP] top-top"); |
|---|
| 1576 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", ":i=o", "[TAG] tag [TIP,TOP] top"); // merge tags |
|---|
| 1577 | // eval ACI: |
|---|
| 1578 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "tip", "len", "[TAG] tag [TIP] 3 [TOP] top"); |
|---|
| 1579 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "top", "len", "[TAG] tag [TIP] tip [TOP] 3"); |
|---|
| 1580 | |
|---|
| 1581 | // test SRT/ACI errors: |
|---|
| 1582 | TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", ":*", "no '=' found"); |
|---|
| 1583 | TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("untagged", "def", "def", ":*", "no '=' found"); |
|---|
| 1584 | TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED(THREE_TAGS, "def", "top", "illcmd", "Unknown command 'illcmd'"); |
|---|
| 1585 | TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("un [tagged", "def", "def", "illcmd", "Unknown command 'illcmd'"); |
|---|
| 1586 | |
|---|
| 1587 | // no error raised, if expression not applied: |
|---|
| 1588 | TEST_EXPECT_EVAL_TAGGED(THREE_TAGS, "def", "no", "illcmd", THREE_TAGS_UPCASE); |
|---|
| 1589 | |
|---|
| 1590 | // incomplete tags |
|---|
| 1591 | TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":*=<*>", "[DEF] <{no tag>"); |
|---|
| 1592 | TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,{no>"); |
|---|
| 1593 | TEST_EXPECT_EVAL_TAGGED("[no [tag", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag,{no>"); |
|---|
| 1594 | TEST_EXPECT_EVAL_TAGGED("[no [tag] xx", "def", "def", ":* *=<*2,*1>", "[DEF] {no [TAG] xx"); // SRT changes nothing here (no match) |
|---|
| 1595 | TEST_EXPECT_EVAL_TAGGED("[no [tag[]", "def", "def", ":* *=<*2,*1>", "[DEF] <{tag{},{no>"); |
|---|
| 1596 | TEST_EXPECT_EVAL_TAGGED("[no [tag[] xx","def", "def", ":* *=<*2,*1>", "[DEF] <{tag{} xx,{no>"); |
|---|
| 1597 | TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":* *=<*2,*1>", "[DEF] <tag,no>"); |
|---|
| 1598 | TEST_EXPECT_EVAL_TAGGED("[no tag", "def", "def", ":no=yes", "[DEF] {yes tag"); |
|---|
| 1599 | TEST_EXPECT_EVAL_TAGGED("no tag", "def", "def", ":no=yes", "[DEF] yes tag"); |
|---|
| 1600 | TEST_EXPECT_EVAL_TAGGED("no tag", "def", "DEF", ":no=yes", "[DEF] yes tag"); |
|---|
| 1601 | TEST_EXPECT_EVAL_TAGGED("no tag", "DEF", "def", ":no=yes", "[DEF] yes tag"); |
|---|
| 1602 | TEST_EXPECT_EVAL_TAGGED("kept [trunk", "def", "def", ":*=<*>", "[DEF] <kept {trunk>"); |
|---|
| 1603 | TEST_EXPECT_EVAL_TAGGED("kept", "def", "def", ":*=<*>", "[DEF] <kept>"); |
|---|
| 1604 | } |
|---|
| 1605 | |
|---|
| 1606 | { |
|---|
| 1607 | GBDATA *gb_species = GBT_find_species(gb_main, "MetMazei"); |
|---|
| 1608 | TEST_REJECT_NULL(gb_species); |
|---|
| 1609 | GBL_call_env callEnv(gb_species, env); |
|---|
| 1610 | |
|---|
| 1611 | // run scripts using context: |
|---|
| 1612 | TEST_EXPECT_EVAL_TAGGED("[T1,T2] name='$n'", "def", "T1", ":$n=*(name)", "[T1] name='MetMazei' [T2] name='$n'"); |
|---|
| 1613 | TEST_EXPECT_EVAL_TAGGED("[T1,T2] seqlen=$l", "def", "T2", ":$l=*(|sequence|len)", "[T1] seqlen=$l [T2] seqlen=165"); |
|---|
| 1614 | TEST_EXPECT_EVAL_TAGGED("[T1,T2] nuc", "def", "T1", "dd;\"=\";command(sequence|count(ACGTUN))", "[T1] nuc=66 [T2] nuc"); |
|---|
| 1615 | |
|---|
| 1616 | TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax='$t'", "def", "def", ":$t=*(|taxonomy(2))", "Failed to read tree 'tree_missing' (Reason: tree not found)"); |
|---|
| 1617 | TEST_EXPECT_EVAL_TAGGED_ERROR_EXPORTED("tax", "def", "def", "dd;\"=\";taxonomy(2)", "Failed to read tree 'tree_missing' (Reason: tree not found)"); |
|---|
| 1618 | |
|---|
| 1619 | // content before 1st tag: |
|---|
| 1620 | TEST_EXPECT_EVAL_TAGGED("untagged [tag] tagged", "def", "tag", ":g=G", "[DEF] untagged [TAG] taGGed"); |
|---|
| 1621 | TEST_EXPECT_EVAL_TAGGED(" [tag] tagged", "def", "tag", ":g=G", "[TAG] taGGed"); |
|---|
| 1622 | |
|---|
| 1623 | // test elimination of leading/trailing whitespace: |
|---|
| 1624 | TEST_EXPECT_EVAL_TAGGED(" untagged ", "def", "def", ":g=G", "[DEF] untaGGed"); // untagged content |
|---|
| 1625 | TEST_EXPECT_EVAL_TAGGED("[tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed"); |
|---|
| 1626 | TEST_EXPECT_EVAL_TAGGED(" [trail] trail [tag] tagged ", "def", "tag", ":g=G", "[TAG] taGGed [TRAIL] trail"); |
|---|
| 1627 | |
|---|
| 1628 | #define MIXED_TAGS "[tag] tag [tip,top] tiptop [xx,yy,zz] zzz" |
|---|
| 1629 | |
|---|
| 1630 | TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "tip", ":tip=top", "[TAG] tag [TIP] toptop [TOP] tiptop [XX,YY,ZZ] zzz"); |
|---|
| 1631 | TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "yy", ":zzz=tiptop", "[TAG] tag [TIP,TOP,YY] tiptop [XX,ZZ] zzz"); |
|---|
| 1632 | TEST_EXPECT_EVAL_TAGGED(MIXED_TAGS, "def", "top", ":tiptop=zzz", "[TAG] tag [TIP] tiptop [TOP,XX,YY,ZZ] zzz"); |
|---|
| 1633 | } |
|---|
| 1634 | } |
|---|
| 1635 | GB_close(gb_main); |
|---|
| 1636 | } |
|---|
| 1637 | |
|---|
| 1638 | void TEST_log_action() { |
|---|
| 1639 | for (int stamped = 0; stamped<=1; ++stamped) { |
|---|
| 1640 | TEST_ANNOTATE(GBS_global_string("stamped=%i", stamped)); |
|---|
| 1641 | { |
|---|
| 1642 | char *logged = GBS_log_action_to("comment", "action", stamped); |
|---|
| 1643 | if (stamped) { |
|---|
| 1644 | TEST_EXPECT_CONTAINS(logged, "comment\n"); |
|---|
| 1645 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
|---|
| 1646 | } |
|---|
| 1647 | else { |
|---|
| 1648 | TEST_EXPECT_EQUAL(logged, "comment\naction\n"); |
|---|
| 1649 | } |
|---|
| 1650 | free(logged); |
|---|
| 1651 | } |
|---|
| 1652 | { |
|---|
| 1653 | char *logged = GBS_log_action_to("comment\n", "action", stamped); |
|---|
| 1654 | if (stamped) { |
|---|
| 1655 | TEST_EXPECT_CONTAINS(logged, "comment\n"); |
|---|
| 1656 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
|---|
| 1657 | } |
|---|
| 1658 | else { |
|---|
| 1659 | TEST_EXPECT_EQUAL(logged, "comment\naction\n"); |
|---|
| 1660 | } |
|---|
| 1661 | free(logged); |
|---|
| 1662 | } |
|---|
| 1663 | { |
|---|
| 1664 | char *logged = GBS_log_action_to("", "action", stamped); |
|---|
| 1665 | if (stamped) { |
|---|
| 1666 | TEST_EXPECT_EQUAL(logged[0], '\n'); |
|---|
| 1667 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
|---|
| 1668 | } |
|---|
| 1669 | else { |
|---|
| 1670 | TEST_EXPECT_EQUAL(logged, "\naction\n"); |
|---|
| 1671 | } |
|---|
| 1672 | free(logged); |
|---|
| 1673 | } |
|---|
| 1674 | { |
|---|
| 1675 | char *logged = GBS_log_action_to(NULp, "action\n", stamped); // test action with trailing LF |
|---|
| 1676 | if (stamped) { |
|---|
| 1677 | TEST_EXPECT_DIFFERENT(logged[0], '\n'); |
|---|
| 1678 | TEST_EXPECT_CONTAINS(logged, "action\n"); |
|---|
| 1679 | } |
|---|
| 1680 | else { |
|---|
| 1681 | TEST_EXPECT_EQUAL(logged, "action\n"); |
|---|
| 1682 | } |
|---|
| 1683 | free(logged); |
|---|
| 1684 | } |
|---|
| 1685 | } |
|---|
| 1686 | } |
|---|
| 1687 | TEST_PUBLISH(TEST_log_action); |
|---|
| 1688 | |
|---|
| 1689 | #endif // UNIT_TESTS |
|---|
| 1690 | |
|---|