| 1 | // =============================================================== // |
|---|
| 2 | // // |
|---|
| 3 | // File : admatch.cxx // |
|---|
| 4 | // Purpose : functions related to string match/replace // |
|---|
| 5 | // // |
|---|
| 6 | // ReCoded for POSIX ERE // |
|---|
| 7 | // by Ralf Westram (coder@reallysoft.de) in April 2009 // |
|---|
| 8 | // Institute of Microbiology (Technical University Munich) // |
|---|
| 9 | // http://www.arb-home.de/ // |
|---|
| 10 | // // |
|---|
| 11 | // =============================================================== // |
|---|
| 12 | |
|---|
| 13 | #include "gb_local.h" |
|---|
| 14 | |
|---|
| 15 | #include "gb_aci_impl.h" |
|---|
| 16 | |
|---|
| 17 | #include <arb_strbuf.h> |
|---|
| 18 | #include <arb_match.h> |
|---|
| 19 | |
|---|
| 20 | #include <cctype> |
|---|
| 21 | |
|---|
| 22 | using namespace GBL_IMPL; |
|---|
| 23 | |
|---|
| 24 | // ------------------------ |
|---|
| 25 | // string matcher |
|---|
| 26 | |
|---|
| 27 | enum string_matcher_type { |
|---|
| 28 | SM_INVALID = -1, |
|---|
| 29 | SM_ANY = 0, // matches any string |
|---|
| 30 | SM_WILDCARDED, // match with wildcards (GBS_string_matches) |
|---|
| 31 | SM_REGEXPR, // match using regexpr |
|---|
| 32 | }; |
|---|
| 33 | |
|---|
| 34 | struct GBS_string_matcher { |
|---|
| 35 | string_matcher_type type; |
|---|
| 36 | GB_CASE case_flag; |
|---|
| 37 | char *wildexpr; |
|---|
| 38 | GBS_regex *regexpr; |
|---|
| 39 | }; |
|---|
| 40 | |
|---|
| 41 | GBS_string_matcher *GBS_compile_matcher(const char *search_expr, GB_CASE case_flag) { |
|---|
| 42 | /* returns a valid string matcher (to be used with GBS_string_matches_regexp) |
|---|
| 43 | * or NULp (in which case an error was exported) |
|---|
| 44 | */ |
|---|
| 45 | |
|---|
| 46 | GBS_string_matcher *matcher = ARB_alloc<GBS_string_matcher>(1); |
|---|
| 47 | GB_ERROR error = NULp; |
|---|
| 48 | |
|---|
| 49 | matcher->type = SM_INVALID; |
|---|
| 50 | matcher->case_flag = case_flag; |
|---|
| 51 | matcher->wildexpr = NULp; |
|---|
| 52 | matcher->regexpr = NULp; |
|---|
| 53 | |
|---|
| 54 | if (search_expr[0] == '/') { |
|---|
| 55 | const char *end = strchr(search_expr, 0)-1; |
|---|
| 56 | if (end>search_expr && end[0] == '/') { |
|---|
| 57 | GB_CASE expr_attached_case; |
|---|
| 58 | const char *unwrapped_expr = GBS_unwrap_regexpr(search_expr, &expr_attached_case, &error); |
|---|
| 59 | |
|---|
| 60 | if (unwrapped_expr) { |
|---|
| 61 | if (expr_attached_case != GB_MIND_CASE) error = "format '/../i' not allowed here"; |
|---|
| 62 | else { |
|---|
| 63 | matcher->regexpr = GBS_compile_regexpr(unwrapped_expr, case_flag, &error); |
|---|
| 64 | if (matcher->regexpr) { |
|---|
| 65 | matcher->type = SM_REGEXPR; |
|---|
| 66 | } |
|---|
| 67 | } |
|---|
| 68 | } |
|---|
| 69 | } |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | if (!matcher->regexpr && !error) { |
|---|
| 73 | if (strcmp(search_expr, "*") == 0) { |
|---|
| 74 | matcher->type = SM_ANY; |
|---|
| 75 | } |
|---|
| 76 | else { |
|---|
| 77 | matcher->type = SM_WILDCARDED; |
|---|
| 78 | matcher->wildexpr = ARB_strdup(search_expr); |
|---|
| 79 | } |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | if (matcher->type == SM_INVALID) { |
|---|
| 83 | error = GBS_global_string("Failed to create GBS_string_matcher from '%s'", search_expr); |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | if (error) { |
|---|
| 87 | GBS_free_matcher(matcher); |
|---|
| 88 | matcher = NULp; |
|---|
| 89 | GB_export_error(error); |
|---|
| 90 | } |
|---|
| 91 | return matcher; |
|---|
| 92 | } |
|---|
| 93 | |
|---|
| 94 | void GBS_free_matcher(GBS_string_matcher *matcher) { |
|---|
| 95 | free(matcher->wildexpr); |
|---|
| 96 | if (matcher->regexpr) GBS_free_regexpr(matcher->regexpr); |
|---|
| 97 | free(matcher); |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | // ------------------------- |
|---|
| 101 | // wildcard search |
|---|
| 102 | |
|---|
| 103 | GB_CSTR GBS_find_string(GB_CSTR cont, GB_CSTR substr, int match_mode) { |
|---|
| 104 | /* search a substring in another string |
|---|
| 105 | * match_mode == 0 -> exact match |
|---|
| 106 | * match_mode == 1 -> a==A |
|---|
| 107 | * match_mode == 2 -> a==a && a==? |
|---|
| 108 | * match_mode == else -> a==A && a==? |
|---|
| 109 | */ |
|---|
| 110 | const char *p1, *p2; |
|---|
| 111 | char b; |
|---|
| 112 | |
|---|
| 113 | switch (match_mode) { |
|---|
| 114 | |
|---|
| 115 | case 0: // exact match |
|---|
| 116 | for (p1 = cont, p2 = substr; *p1;) { |
|---|
| 117 | if (!(b = *p2)) { |
|---|
| 118 | return (char *)cont; |
|---|
| 119 | } |
|---|
| 120 | else { |
|---|
| 121 | if (b == *p1) { |
|---|
| 122 | p1++; |
|---|
| 123 | p2++; |
|---|
| 124 | } |
|---|
| 125 | else { |
|---|
| 126 | p2 = substr; |
|---|
| 127 | p1 = (++cont); |
|---|
| 128 | } |
|---|
| 129 | } |
|---|
| 130 | } |
|---|
| 131 | if (!*p2) return (char *)cont; |
|---|
| 132 | break; |
|---|
| 133 | |
|---|
| 134 | case 1: // a==A |
|---|
| 135 | for (p1 = cont, p2 = substr; *p1;) { |
|---|
| 136 | if (!(b = *p2)) { |
|---|
| 137 | return (char *)cont; |
|---|
| 138 | } |
|---|
| 139 | else { |
|---|
| 140 | if (toupper(*p1) == toupper(b)) { |
|---|
| 141 | p1++; |
|---|
| 142 | p2++; |
|---|
| 143 | } |
|---|
| 144 | else { |
|---|
| 145 | p2 = substr; |
|---|
| 146 | p1 = (++cont); |
|---|
| 147 | } |
|---|
| 148 | } |
|---|
| 149 | } |
|---|
| 150 | if (!*p2) return (char *)cont; |
|---|
| 151 | break; |
|---|
| 152 | case 2: // a==a && a==? |
|---|
| 153 | for (p1 = cont, p2 = substr; *p1;) { |
|---|
| 154 | if (!(b = *p2)) { |
|---|
| 155 | return (char *)cont; |
|---|
| 156 | } |
|---|
| 157 | else { |
|---|
| 158 | if (b == *p1 || (b=='?')) { |
|---|
| 159 | p1++; |
|---|
| 160 | p2++; |
|---|
| 161 | } |
|---|
| 162 | else { |
|---|
| 163 | p2 = substr; |
|---|
| 164 | p1 = (++cont); |
|---|
| 165 | } |
|---|
| 166 | } |
|---|
| 167 | } |
|---|
| 168 | if (!*p2) return (char *)cont; |
|---|
| 169 | break; |
|---|
| 170 | |
|---|
| 171 | default: // a==A && a==? |
|---|
| 172 | for (p1 = cont, p2 = substr; *p1;) { |
|---|
| 173 | if (!(b = *p2)) { |
|---|
| 174 | return (char *)cont; |
|---|
| 175 | } |
|---|
| 176 | else { |
|---|
| 177 | if (toupper(*p1) == toupper(b) || (b=='?')) { |
|---|
| 178 | p1++; |
|---|
| 179 | p2++; |
|---|
| 180 | } |
|---|
| 181 | else { |
|---|
| 182 | p2 = substr; |
|---|
| 183 | p1 = (++cont); |
|---|
| 184 | } |
|---|
| 185 | } |
|---|
| 186 | } |
|---|
| 187 | if (!*p2) return (char *)cont; |
|---|
| 188 | break; |
|---|
| 189 | } |
|---|
| 190 | return NULp; |
|---|
| 191 | } |
|---|
| 192 | |
|---|
| 193 | bool GBS_string_matches(const char *str, const char *expr, GB_CASE case_sens) { |
|---|
| 194 | /* Wildcards in 'expr' string: |
|---|
| 195 | * ? one character |
|---|
| 196 | * * several characters |
|---|
| 197 | * |
|---|
| 198 | * if 'case_sens' == GB_IGNORE_CASE -> change all letters to uppercase |
|---|
| 199 | * |
|---|
| 200 | * returns true if strings are equal, false otherwise |
|---|
| 201 | */ |
|---|
| 202 | |
|---|
| 203 | const char *ps = str; |
|---|
| 204 | const char *pe = expr; |
|---|
| 205 | |
|---|
| 206 | while (1) { |
|---|
| 207 | char s = *ps; |
|---|
| 208 | char e = *pe; |
|---|
| 209 | |
|---|
| 210 | if (e == '*') { |
|---|
| 211 | if (!pe[1]) { // '*' at end of expression |
|---|
| 212 | break; // always match (even "nothing") |
|---|
| 213 | } |
|---|
| 214 | |
|---|
| 215 | const char *nextStar = ARB_strchrnul(pe+1, '*'); |
|---|
| 216 | int len = nextStar-pe-1; // part after '*' (and before EOS or next '*') |
|---|
| 217 | if (!nextStar[0]) { // no 2nd '*' found |
|---|
| 218 | // -> tail of string (if there is any) has to match |
|---|
| 219 | int psl = strlen(ps); // length of tail |
|---|
| 220 | if (psl<len) { // str-tail shorter than expr-tail |
|---|
| 221 | return false; // -> match impossible |
|---|
| 222 | } |
|---|
| 223 | |
|---|
| 224 | ps += psl-len; // skip over characters expected to match the '*' (=goto str-tail) |
|---|
| 225 | ++pe; // goto expr-tail |
|---|
| 226 | } |
|---|
| 227 | else { // found 2nd '*' -> search for string part between stars |
|---|
| 228 | { |
|---|
| 229 | char *part = ARB_strpartdup(pe+1, nextStar-1); |
|---|
| 230 | ps = GBS_find_string(ps, part, 2+(case_sens == GB_IGNORE_CASE)); // match with '?' wildcard |
|---|
| 231 | free(part); |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | if (!ps) { |
|---|
| 235 | return false; |
|---|
| 236 | } |
|---|
| 237 | ps += len; |
|---|
| 238 | pe = nextStar; |
|---|
| 239 | } |
|---|
| 240 | continue; |
|---|
| 241 | } |
|---|
| 242 | |
|---|
| 243 | if (!s) { |
|---|
| 244 | return !e; |
|---|
| 245 | } |
|---|
| 246 | if (s != e) { |
|---|
| 247 | if (e != '?') { |
|---|
| 248 | if (!e) { |
|---|
| 249 | return !s; |
|---|
| 250 | } |
|---|
| 251 | if (case_sens == GB_IGNORE_CASE) { |
|---|
| 252 | s = toupper(s); |
|---|
| 253 | e = toupper(e); |
|---|
| 254 | if (s != e) { |
|---|
| 255 | return false; |
|---|
| 256 | } |
|---|
| 257 | } |
|---|
| 258 | else { |
|---|
| 259 | return false; |
|---|
| 260 | } |
|---|
| 261 | } |
|---|
| 262 | } |
|---|
| 263 | ps++; |
|---|
| 264 | pe++; |
|---|
| 265 | } |
|---|
| 266 | return true; |
|---|
| 267 | } |
|---|
| 268 | |
|---|
| 269 | bool GBS_string_matches_regexp(const char *str, const GBS_string_matcher *expr) { |
|---|
| 270 | /* Wildcard or regular expression match |
|---|
| 271 | * Returns true if match |
|---|
| 272 | * |
|---|
| 273 | * Use GBS_compile_matcher() and GBS_free_matcher() to maintain 'expr' |
|---|
| 274 | */ |
|---|
| 275 | bool matches = false; |
|---|
| 276 | |
|---|
| 277 | switch (expr->type) { |
|---|
| 278 | case SM_ANY: { |
|---|
| 279 | matches = true; |
|---|
| 280 | break; |
|---|
| 281 | } |
|---|
| 282 | case SM_WILDCARDED: { |
|---|
| 283 | matches = GBS_string_matches(str, expr->wildexpr, expr->case_flag); |
|---|
| 284 | break; |
|---|
| 285 | } |
|---|
| 286 | case SM_REGEXPR: { |
|---|
| 287 | matches = GBS_regmatch_compiled(str, expr->regexpr, NULp); |
|---|
| 288 | break; |
|---|
| 289 | } |
|---|
| 290 | case SM_INVALID: { |
|---|
| 291 | gb_assert(0); |
|---|
| 292 | break; |
|---|
| 293 | } |
|---|
| 294 | } |
|---|
| 295 | |
|---|
| 296 | return matches; |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | // ----------------------------------- |
|---|
| 300 | // Search replace tool (SRT) |
|---|
| 301 | |
|---|
| 302 | #define GBS_SET ((char)1) |
|---|
| 303 | #define GBS_SEP ((char)2) |
|---|
| 304 | #define GBS_MWILD ((char)3) |
|---|
| 305 | #define GBS_WILD ((char)4) |
|---|
| 306 | |
|---|
| 307 | __ATTR__USERESULT static GB_ERROR gbs_build_replace_string(GBS_strstruct& out, |
|---|
| 308 | char *replaceBy, // will be modified! |
|---|
| 309 | const char *sWildcards, long sWildMax, |
|---|
| 310 | const char*const *mWildcards, long mWildMax, |
|---|
| 311 | const GBL_call_env& callEnv) |
|---|
| 312 | { |
|---|
| 313 | int sWildAuto = 0; // count plain occurrences of '?' in replace string (ie. w/o number behind) |
|---|
| 314 | int mWildAuto = 0; // same for '*' |
|---|
| 315 | |
|---|
| 316 | GBDATA *gb_container = callEnv.get_item_ref(); |
|---|
| 317 | |
|---|
| 318 | char *p = replaceBy; |
|---|
| 319 | char c; |
|---|
| 320 | while ((c=*(p++))) { |
|---|
| 321 | switch (c) { |
|---|
| 322 | case GBS_MWILD: |
|---|
| 323 | case GBS_WILD: { |
|---|
| 324 | char d = *(p++); |
|---|
| 325 | if (d=='(') { // "*(..)" expressions |
|---|
| 326 | char *closingParen = search_matching_parenthesis(p); |
|---|
| 327 | |
|---|
| 328 | if (!closingParen) { |
|---|
| 329 | return GBS_global_string("Unbalanced parenthesis in '%s'", p-1); |
|---|
| 330 | } |
|---|
| 331 | |
|---|
| 332 | // found reference: "*(gbd)" |
|---|
| 333 | int separator = 0; |
|---|
| 334 | *closingParen = 0; |
|---|
| 335 | char *psym = strpbrk(p, "#|:"); |
|---|
| 336 | if (psym) { |
|---|
| 337 | separator = *psym; |
|---|
| 338 | *psym = 0; |
|---|
| 339 | } |
|---|
| 340 | |
|---|
| 341 | GBDATA *gb_entry = NULp; |
|---|
| 342 | if (*p) { // key was specified |
|---|
| 343 | if (!gb_container) { |
|---|
| 344 | return GBS_global_string("can't read key '%s' (called w/o database item)", p); |
|---|
| 345 | } |
|---|
| 346 | if (!GB_is_container(gb_container)) { |
|---|
| 347 | if (ARB_strBeginsWith(p, "../")) { // redirect search via parent |
|---|
| 348 | p += 3; |
|---|
| 349 | gb_container = GB_get_father(gb_container); |
|---|
| 350 | } |
|---|
| 351 | else { |
|---|
| 352 | return GBS_global_string("can't read key '%s' (DB item is no container)", p); |
|---|
| 353 | } |
|---|
| 354 | } |
|---|
| 355 | |
|---|
| 356 | gb_entry = GB_search(gb_container, p, GB_FIND); |
|---|
| 357 | callEnv.track_field_access(p); |
|---|
| 358 | |
|---|
| 359 | if (!gb_entry && GB_have_error()) { |
|---|
| 360 | return GB_await_error(); |
|---|
| 361 | } |
|---|
| 362 | } |
|---|
| 363 | else { |
|---|
| 364 | gb_entry = gb_container; |
|---|
| 365 | } |
|---|
| 366 | |
|---|
| 367 | if (psym) *psym = separator; |
|---|
| 368 | |
|---|
| 369 | char *entry = (gb_entry && gb_entry != gb_container) |
|---|
| 370 | ? GB_read_as_string(gb_entry) |
|---|
| 371 | : ARB_strdup(""); |
|---|
| 372 | |
|---|
| 373 | if (entry) { |
|---|
| 374 | char *h; |
|---|
| 375 | switch (separator) { |
|---|
| 376 | case ':': |
|---|
| 377 | h = GBS_string_eval_in_env(entry, psym+1, callEnv); |
|---|
| 378 | if (!h) { |
|---|
| 379 | free(entry); |
|---|
| 380 | return GB_await_error(); |
|---|
| 381 | } |
|---|
| 382 | |
|---|
| 383 | out.cat(h); |
|---|
| 384 | free(h); |
|---|
| 385 | break; |
|---|
| 386 | |
|---|
| 387 | case '|': |
|---|
| 388 | h = GB_command_interpreter_in_env(entry, psym+1, callEnv); |
|---|
| 389 | if (!h) { |
|---|
| 390 | free(entry); |
|---|
| 391 | return GB_await_error(); |
|---|
| 392 | } |
|---|
| 393 | |
|---|
| 394 | out.cat(h); |
|---|
| 395 | free(h); |
|---|
| 396 | break; |
|---|
| 397 | |
|---|
| 398 | case '#': |
|---|
| 399 | if (!entry[0]) { // missing field or empty content |
|---|
| 400 | out.cat(psym+1); |
|---|
| 401 | break; |
|---|
| 402 | } |
|---|
| 403 | // fall-through |
|---|
| 404 | default: |
|---|
| 405 | out.cat(entry); |
|---|
| 406 | break; |
|---|
| 407 | } |
|---|
| 408 | free(entry); |
|---|
| 409 | } |
|---|
| 410 | *closingParen = ')'; |
|---|
| 411 | p = closingParen+1; |
|---|
| 412 | } |
|---|
| 413 | else { |
|---|
| 414 | int wildcard_num = d - '1'; |
|---|
| 415 | bool followed_by_number = wildcard_num>=0 && wildcard_num<=9; // @@@ in fact this will also accept ':' |
|---|
| 416 | |
|---|
| 417 | if (c == GBS_WILD) { |
|---|
| 418 | if (!followed_by_number) { // char behind wildcard is not in [1-9] |
|---|
| 419 | --p; // "put back" that character |
|---|
| 420 | wildcard_num = sWildAuto++; |
|---|
| 421 | } |
|---|
| 422 | if (wildcard_num>=sWildMax) { |
|---|
| 423 | out.put('?'); |
|---|
| 424 | } |
|---|
| 425 | else { |
|---|
| 426 | out.put(sWildcards[wildcard_num]); |
|---|
| 427 | } |
|---|
| 428 | } |
|---|
| 429 | else { |
|---|
| 430 | if (!followed_by_number) { // char behind wildcard is not in [1-9] |
|---|
| 431 | --p; // "put back" that character |
|---|
| 432 | wildcard_num = mWildAuto++; |
|---|
| 433 | } |
|---|
| 434 | if (wildcard_num>=mWildMax) { |
|---|
| 435 | out.put('*'); |
|---|
| 436 | } |
|---|
| 437 | else { |
|---|
| 438 | out.cat(mWildcards[wildcard_num]); |
|---|
| 439 | } |
|---|
| 440 | } |
|---|
| 441 | } |
|---|
| 442 | break; |
|---|
| 443 | } |
|---|
| 444 | default: |
|---|
| 445 | out.put(c); |
|---|
| 446 | break; |
|---|
| 447 | } |
|---|
| 448 | } |
|---|
| 449 | return NULp; |
|---|
| 450 | } |
|---|
| 451 | |
|---|
| 452 | static char *gbs_compress_command(const char *com) { |
|---|
| 453 | /* Prepare SRT. |
|---|
| 454 | * |
|---|
| 455 | * Replaces all |
|---|
| 456 | * '=' by GBS_SET |
|---|
| 457 | * ':' by GBS_SEP |
|---|
| 458 | * '?' by GBS_WILD if followed by a number or '?' |
|---|
| 459 | * '*' by GBS_MWILD or '(' |
|---|
| 460 | * \ is the escape character |
|---|
| 461 | */ |
|---|
| 462 | |
|---|
| 463 | char *result = ARB_strdup(com); |
|---|
| 464 | char *d = result; |
|---|
| 465 | const char *s = result; |
|---|
| 466 | char ch; |
|---|
| 467 | |
|---|
| 468 | while ((ch = *(s++))) { |
|---|
| 469 | switch (ch) { |
|---|
| 470 | case '=': *(d++) = GBS_SET; break; |
|---|
| 471 | case ':': *(d++) = GBS_SEP; break; |
|---|
| 472 | case '?': *(d++) = GBS_WILD; break; |
|---|
| 473 | case '*': *(d++) = GBS_MWILD; break; |
|---|
| 474 | case '\\': |
|---|
| 475 | ch = *(s++); if (!ch) { s--; break; }; |
|---|
| 476 | switch (ch) { |
|---|
| 477 | case 'n': *(d++) = '\n'; break; |
|---|
| 478 | case 't': *(d++) = '\t'; break; |
|---|
| 479 | case '0': *(d++) = '\0'; break; |
|---|
| 480 | default: *(d++) = ch; break; |
|---|
| 481 | } |
|---|
| 482 | break; |
|---|
| 483 | |
|---|
| 484 | default: *(d++) = ch; break; |
|---|
| 485 | } |
|---|
| 486 | } |
|---|
| 487 | *d = 0; |
|---|
| 488 | return result; |
|---|
| 489 | } |
|---|
| 490 | |
|---|
| 491 | // AISC_MKPT_PROMOTE: class GBL_call_env; |
|---|
| 492 | |
|---|
| 493 | char *GBS_string_eval_in_env(const char *insource, const char *icommand, const GBL_call_env& callEnv) { |
|---|
| 494 | /* GBS_string_eval_in_env replaces substrings in source (implements SRT) |
|---|
| 495 | * Syntax: command = "oliver=olli:peter=peti" |
|---|
| 496 | * |
|---|
| 497 | * Returns a heapcopy of result of replacement. |
|---|
| 498 | * |
|---|
| 499 | * * is a wildcard for any number of characters |
|---|
| 500 | * ? is a wildcard for exactly one character |
|---|
| 501 | * |
|---|
| 502 | * To reference the parts matched by wildcards on the left side of the '=' use '?' and '*', |
|---|
| 503 | * to reference in a particular order use |
|---|
| 504 | * *1 to reference to the first occurrence of * |
|---|
| 505 | * *2 ----------"-------- second ------"------- |
|---|
| 506 | * ... |
|---|
| 507 | * *9 ----------"-------- ninth -------"------- |
|---|
| 508 | * |
|---|
| 509 | * If the first and last characters of the search string are no '*' wildcards, |
|---|
| 510 | * then the replace is repeated as many times as possible. |
|---|
| 511 | * |
|---|
| 512 | * '\' is the escape character: e.g. \n is newline; '\\' is '\'; '\=' is '='; .... |
|---|
| 513 | * |
|---|
| 514 | * If the passed GBL_call_env refers to a database entry (which has to be of type GB_DB, i.e. has to be a container), |
|---|
| 515 | * fields of that container may be inserted using |
|---|
| 516 | * |
|---|
| 517 | * *(arb_field) is the value of the containers child entry 'arb_field' |
|---|
| 518 | * *(arb_field#string) value of the child entry 'arb_field' or 'string' (if that entry does not exist) |
|---|
| 519 | * *(arb_field\:SRT) runs SRT recursively on the value of the child entry 'arb_field' |
|---|
| 520 | * *([arb_field]|ACI) runs the ACI command interpreter on the value of the child entry 'arb_field' (or on an empty string) |
|---|
| 521 | * |
|---|
| 522 | * If an error occurs it returns NULp - in this case the error-message gets exported! |
|---|
| 523 | * |
|---|
| 524 | * Notes: |
|---|
| 525 | * - global interpreter (SRT+ACI+REG) is provided by GB_command_interpreter_in_env() |
|---|
| 526 | * - REG is provided by GBS_regreplace(), GBS_regmatch() and GBS_regmatch_compiled() |
|---|
| 527 | * - ACI is only provided via GB_command_interpreter_in_env() |
|---|
| 528 | */ |
|---|
| 529 | if (!icommand || !icommand[0]) { |
|---|
| 530 | return ARB_strdup(insource); |
|---|
| 531 | } |
|---|
| 532 | |
|---|
| 533 | if (traceACI) { |
|---|
| 534 | print_trace(GBS_global_string("SR: in='%s' cmd='%s':\n", insource, icommand)); |
|---|
| 535 | } |
|---|
| 536 | LocallyModify<int> inc(traceIndent, traceIndent+1); |
|---|
| 537 | |
|---|
| 538 | char *command = gbs_compress_command(icommand); |
|---|
| 539 | |
|---|
| 540 | // copy insource (to allow to modify it) |
|---|
| 541 | size_t inlen = strlen(insource); |
|---|
| 542 | GBS_strstruct in(inlen+1); |
|---|
| 543 | in.ncat(insource, inlen); |
|---|
| 544 | |
|---|
| 545 | GBS_strstruct out(inlen+500); |
|---|
| 546 | |
|---|
| 547 | GB_ERROR error = NULp; |
|---|
| 548 | char *next_subcmd; |
|---|
| 549 | for (char *subcmd = command; subcmd; subcmd = next_subcmd) { // loop over sub-commands |
|---|
| 550 | // search next subcommand (=pos behind next colon): |
|---|
| 551 | next_subcmd = strchr(subcmd, GBS_SEP); |
|---|
| 552 | if (next_subcmd) *(next_subcmd++) = 0; |
|---|
| 553 | |
|---|
| 554 | if (!subcmd[0]) continue; // empty subcommand -> do nothing |
|---|
| 555 | |
|---|
| 556 | // search for replace string: |
|---|
| 557 | char *replaceBy = strchr(subcmd+1, GBS_SET); |
|---|
| 558 | if (!replaceBy) { |
|---|
| 559 | error = GBS_global_string("SRT ERROR: no '=' found in command '%s' (position > %zi)", icommand, subcmd-command+1); |
|---|
| 560 | break; |
|---|
| 561 | } |
|---|
| 562 | *(replaceBy++) = 0; |
|---|
| 563 | |
|---|
| 564 | GB_CSTR not_yet_copied = in.get_data(); // point into 'in' string (to not-yet-copied part) |
|---|
| 565 | out.erase(); |
|---|
| 566 | |
|---|
| 567 | if (in.empty() && subcmd[0] == GBS_MWILD && subcmd[1] == 0) { |
|---|
| 568 | // plain '*' shall also match an empty input string -> handle manually here |
|---|
| 569 | const char *empty = ""; |
|---|
| 570 | error = gbs_build_replace_string(out, replaceBy, NULp, 0, &empty, 1, callEnv); |
|---|
| 571 | } |
|---|
| 572 | else { |
|---|
| 573 | char sWildcard[40]; // character which matched vs one '?' |
|---|
| 574 | char *mWildcard[10]; // substrings which matched vs one '*' |
|---|
| 575 | long sWildSeen = 0; // number of '?' seen (on left side on subcommand) |
|---|
| 576 | long mWildSeen = 0; // number of '*' seen (on left side on subcommand) |
|---|
| 577 | |
|---|
| 578 | bool match_failed = false; |
|---|
| 579 | for (GB_CSTR source = not_yet_copied; *source; ) { // loop over string |
|---|
| 580 | gb_assert(!match_failed); |
|---|
| 581 | |
|---|
| 582 | char *search = subcmd; |
|---|
| 583 | GB_CSTR start_match = NULp; // start of string that matches a wildcard (none yet) |
|---|
| 584 | |
|---|
| 585 | char c; |
|---|
| 586 | while (!match_failed && (c = *(search++))) { // match expression vs. string |
|---|
| 587 | switch (c) { |
|---|
| 588 | case GBS_MWILD: { |
|---|
| 589 | if (!start_match) start_match = source; |
|---|
| 590 | |
|---|
| 591 | char *start_of_wildcard = search; |
|---|
| 592 | if (!(c = *(search++))) { // last character is a '*' wildcard -> expression matched |
|---|
| 593 | mWildcard[mWildSeen++] = ARB_strdup(source); |
|---|
| 594 | source = strchr(source, 0); // jump to EOS |
|---|
| 595 | --search; |
|---|
| 596 | break; // (effectively does exit while-loop) |
|---|
| 597 | } |
|---|
| 598 | // @@@ 'c' read in above if-condition is ignored if non-zero (got tests) |
|---|
| 599 | |
|---|
| 600 | while ((c=*(search++)) && c!=GBS_MWILD && c!=GBS_WILD) ; // search the next wildcardstring |
|---|
| 601 | |
|---|
| 602 | search--; // back one character |
|---|
| 603 | *search = 0; |
|---|
| 604 | |
|---|
| 605 | char what_wild_card = c; |
|---|
| 606 | GB_CSTR p = GBS_find_string(source, start_of_wildcard, 0); |
|---|
| 607 | |
|---|
| 608 | if (!p) match_failed = true; // string behind wildcard does not appear in input -> no match |
|---|
| 609 | else { |
|---|
| 610 | mWildcard[mWildSeen++] = ARB_strpartdup(source, p-1); |
|---|
| 611 | source = p + strlen(start_of_wildcard); |
|---|
| 612 | *search = what_wild_card; |
|---|
| 613 | } |
|---|
| 614 | break; |
|---|
| 615 | } |
|---|
| 616 | case GBS_WILD: |
|---|
| 617 | if (!source[0]) match_failed = true; // '?' does not match "nothing" -> no match |
|---|
| 618 | else { |
|---|
| 619 | if (!start_match) start_match = source; |
|---|
| 620 | sWildcard[sWildSeen++] = *(source++); |
|---|
| 621 | } |
|---|
| 622 | break; |
|---|
| 623 | |
|---|
| 624 | default: |
|---|
| 625 | if (start_match) { |
|---|
| 626 | if (c != *(source++)) match_failed = true; // mismatch after '?' or after last '*' |
|---|
| 627 | } |
|---|
| 628 | else { |
|---|
| 629 | char *buf1 = search-1; |
|---|
| 630 | |
|---|
| 631 | while ((c=*(search++)) && c != GBS_MWILD && c!=GBS_WILD) ; // search the next wildcardstring |
|---|
| 632 | |
|---|
| 633 | search--; // back one character |
|---|
| 634 | *search = 0; |
|---|
| 635 | |
|---|
| 636 | char what_wild_card = c; |
|---|
| 637 | GB_CSTR p = GBS_find_string(source, buf1, 0); |
|---|
| 638 | if (!p) { |
|---|
| 639 | // string infrontof wildcard (or EOS) not found -> no match |
|---|
| 640 | match_failed = true; |
|---|
| 641 | } |
|---|
| 642 | else { |
|---|
| 643 | start_match = p; |
|---|
| 644 | source = p + strlen(buf1); |
|---|
| 645 | *search = what_wild_card; |
|---|
| 646 | } |
|---|
| 647 | |
|---|
| 648 | } |
|---|
| 649 | break; |
|---|
| 650 | } |
|---|
| 651 | } |
|---|
| 652 | |
|---|
| 653 | if (!match_failed) { |
|---|
| 654 | /* now we got |
|---|
| 655 | * |
|---|
| 656 | * in: GBS_strstruct containing entire input string |
|---|
| 657 | * source: pointer to end of match (inside 'in') |
|---|
| 658 | * start_match: pointer to start of match (inside 'in') |
|---|
| 659 | * not_yet_copied: pointer to the not-copied part of the input string |
|---|
| 660 | * replaceBy: the replace string |
|---|
| 661 | */ |
|---|
| 662 | |
|---|
| 663 | // now look for the replace string |
|---|
| 664 | out.ncat(not_yet_copied, start_match-not_yet_copied); // concat part before the match |
|---|
| 665 | error = gbs_build_replace_string(out, replaceBy, sWildcard, sWildSeen, mWildcard, mWildSeen, callEnv); // execute SRT command |
|---|
| 666 | not_yet_copied = source; |
|---|
| 667 | } |
|---|
| 668 | |
|---|
| 669 | for (long i = 0; i < mWildSeen; i++) { |
|---|
| 670 | freenull(mWildcard[i]); |
|---|
| 671 | } |
|---|
| 672 | sWildSeen = 0; |
|---|
| 673 | mWildSeen = 0; |
|---|
| 674 | |
|---|
| 675 | if (error || match_failed) break; |
|---|
| 676 | } |
|---|
| 677 | } |
|---|
| 678 | |
|---|
| 679 | // Note: reached when left side expression didn't match input string |
|---|
| 680 | // (also reached when done with current sub-expression) |
|---|
| 681 | if (error) break; |
|---|
| 682 | |
|---|
| 683 | out.cat(not_yet_copied); // cat the rest of the input |
|---|
| 684 | |
|---|
| 685 | if (traceACI) { |
|---|
| 686 | print_trace(GBS_global_string("'%s' -> '%s'\n", in.get_data(), out.get_data())); |
|---|
| 687 | } |
|---|
| 688 | |
|---|
| 689 | in.swap_content(out); |
|---|
| 690 | } |
|---|
| 691 | free(command); |
|---|
| 692 | if (error) { |
|---|
| 693 | GB_export_error(error); |
|---|
| 694 | return NULp; |
|---|
| 695 | } |
|---|
| 696 | return in.release(); |
|---|
| 697 | } |
|---|
| 698 | |
|---|
| 699 | char *GBS_string_eval(const char *insource, const char *icommand) { |
|---|
| 700 | GBL_env env(NULp, NULp); |
|---|
| 701 | GBL_call_env callEnv(NULp, env); |
|---|
| 702 | |
|---|
| 703 | return GBS_string_eval_in_env(insource, icommand, callEnv); |
|---|
| 704 | } |
|---|
| 705 | |
|---|
| 706 | // -------------------------------------------------------------------------------- |
|---|
| 707 | |
|---|
| 708 | #ifdef UNIT_TESTS |
|---|
| 709 | #ifndef TEST_UNIT_H |
|---|
| 710 | #include <test_unit.h> |
|---|
| 711 | #endif |
|---|
| 712 | |
|---|
| 713 | #include <arb_strarray.h> |
|---|
| 714 | |
|---|
| 715 | static char *tokenMatchResults(const char *expr, GB_CASE caseDef, const char *tokenStr) { |
|---|
| 716 | ConstStrArray token; |
|---|
| 717 | GBT_split_string(token,tokenStr, ';'); |
|---|
| 718 | |
|---|
| 719 | GBS_string_matcher *matcher = GBS_compile_matcher(expr, caseDef); |
|---|
| 720 | for (int t = 0; token[t]; ++t) { |
|---|
| 721 | bool matched = GBS_string_matches_regexp(token[t], matcher); |
|---|
| 722 | token.replace(t, matched ? "1" : "0"); |
|---|
| 723 | } |
|---|
| 724 | GBS_free_matcher(matcher); |
|---|
| 725 | return GBT_join_strings(token, 0); |
|---|
| 726 | } |
|---|
| 727 | |
|---|
| 728 | |
|---|
| 729 | #define TEST_MATCH_TOKENS(expr,caseDef,tokenStr,expected) do{ \ |
|---|
| 730 | char *results = tokenMatchResults(expr, caseDef, tokenStr); \ |
|---|
| 731 | TEST_EXPECT_EQUAL(results, expected); \ |
|---|
| 732 | free(results); \ |
|---|
| 733 | }while(0) |
|---|
| 734 | |
|---|
| 735 | #define TEST_MATCH_TOKENS__BROKEN(expr,caseDef,tokenStr,expected,got) do{ \ |
|---|
| 736 | char *results = tokenMatchResults(expr, caseDef, tokenStr); \ |
|---|
| 737 | TEST_EXPECT_EQUAL__BROKEN(results, expected, got); \ |
|---|
| 738 | free(results); \ |
|---|
| 739 | }while(0) |
|---|
| 740 | |
|---|
| 741 | void TEST_matcher() { |
|---|
| 742 | TEST_MATCH_TOKENS("???", GB_MIND_CASE, "ab;abc;abcd", "010"); // only matches 2nd string |
|---|
| 743 | TEST_MATCH_TOKENS("???*", GB_MIND_CASE, "ab;abc;abcd", "011"); // match at least 3 characters |
|---|
| 744 | TEST_MATCH_TOKENS("?*", GB_MIND_CASE, ";a;ab;abc", "0111"); // match at least 1 character |
|---|
| 745 | |
|---|
| 746 | TEST_MATCH_TOKENS("a*c", GB_MIND_CASE, "ca;ab;abc;abC;ABC;abcd", "001000"); |
|---|
| 747 | TEST_MATCH_TOKENS("a*c", GB_IGNORE_CASE, "ca;ab;abc;abC;ABC;abcd", "001110"); |
|---|
| 748 | |
|---|
| 749 | TEST_MATCH_TOKENS("a*c*a", GB_MIND_CASE, "aca;aaacccaaa;a--c--a;acac;acaca;aba", "111010"); |
|---|
| 750 | TEST_MATCH_TOKENS("a*c?d*c", GB_MIND_CASE, "acxdc;a--cxd--c;acxdcxdcxdc;acdcdcdc", "1110"); |
|---|
| 751 | |
|---|
| 752 | TEST_MATCH_TOKENS("???*.c", GB_MIND_CASE, "ab.c;abc.c;abcd.c", "011"); |
|---|
| 753 | TEST_MATCH_TOKENS("?b.*.c", GB_MIND_CASE, "ab.c;ab..c;ab.x.c", "011"); |
|---|
| 754 | TEST_MATCH_TOKENS("rere*erer", GB_MIND_CASE, "rerer;rererer;rerererer;rererererer", "0011"); |
|---|
| 755 | } |
|---|
| 756 | |
|---|
| 757 | TEST_PUBLISH(TEST_matcher); |
|---|
| 758 | |
|---|
| 759 | #endif // UNIT_TESTS |
|---|
| 760 | |
|---|
| 761 | // -------------------------------------------------------------------------------- |
|---|
| 762 | |
|---|