1 | // =============================================================== // |
---|
2 | // // |
---|
3 | // File : admatch.cxx // |
---|
4 | // Purpose : functions related to string match/replace // |
---|
5 | // // |
---|
6 | // ReCoded for POSIX ERE // |
---|
7 | // by Ralf Westram (coder@reallysoft.de) in April 2009 // |
---|
8 | // Institute of Microbiology (Technical University Munich) // |
---|
9 | // http://www.arb-home.de/ // |
---|
10 | // // |
---|
11 | // =============================================================== // |
---|
12 | |
---|
13 | #include "gb_local.h" |
---|
14 | |
---|
15 | #include "gb_aci_impl.h" |
---|
16 | |
---|
17 | #include <arb_strbuf.h> |
---|
18 | #include <arb_match.h> |
---|
19 | |
---|
20 | #include <cctype> |
---|
21 | |
---|
22 | using namespace GBL_IMPL; |
---|
23 | |
---|
24 | // ------------------------ |
---|
25 | // string matcher |
---|
26 | |
---|
27 | enum string_matcher_type { |
---|
28 | SM_INVALID = -1, |
---|
29 | SM_ANY = 0, // matches any string |
---|
30 | SM_WILDCARDED, // match with wildcards (GBS_string_matches) |
---|
31 | SM_REGEXPR, // match using regexpr |
---|
32 | }; |
---|
33 | |
---|
34 | struct GBS_string_matcher { |
---|
35 | string_matcher_type type; |
---|
36 | GB_CASE case_flag; |
---|
37 | char *wildexpr; |
---|
38 | GBS_regex *regexpr; |
---|
39 | }; |
---|
40 | |
---|
41 | GBS_string_matcher *GBS_compile_matcher(const char *search_expr, GB_CASE case_flag) { |
---|
42 | /* returns a valid string matcher (to be used with GBS_string_matches_regexp) |
---|
43 | * or NULp (in which case an error was exported) |
---|
44 | */ |
---|
45 | |
---|
46 | GBS_string_matcher *matcher = ARB_alloc<GBS_string_matcher>(1); |
---|
47 | GB_ERROR error = NULp; |
---|
48 | |
---|
49 | matcher->type = SM_INVALID; |
---|
50 | matcher->case_flag = case_flag; |
---|
51 | matcher->wildexpr = NULp; |
---|
52 | matcher->regexpr = NULp; |
---|
53 | |
---|
54 | if (search_expr[0] == '/') { |
---|
55 | const char *end = strchr(search_expr, 0)-1; |
---|
56 | if (end>search_expr && end[0] == '/') { |
---|
57 | GB_CASE expr_attached_case; |
---|
58 | const char *unwrapped_expr = GBS_unwrap_regexpr(search_expr, &expr_attached_case, &error); |
---|
59 | |
---|
60 | if (unwrapped_expr) { |
---|
61 | if (expr_attached_case != GB_MIND_CASE) error = "format '/../i' not allowed here"; |
---|
62 | else { |
---|
63 | matcher->regexpr = GBS_compile_regexpr(unwrapped_expr, case_flag, &error); |
---|
64 | if (matcher->regexpr) { |
---|
65 | matcher->type = SM_REGEXPR; |
---|
66 | } |
---|
67 | } |
---|
68 | } |
---|
69 | } |
---|
70 | } |
---|
71 | |
---|
72 | if (!matcher->regexpr && !error) { |
---|
73 | if (strcmp(search_expr, "*") == 0) { |
---|
74 | matcher->type = SM_ANY; |
---|
75 | } |
---|
76 | else { |
---|
77 | matcher->type = SM_WILDCARDED; |
---|
78 | matcher->wildexpr = ARB_strdup(search_expr); |
---|
79 | } |
---|
80 | } |
---|
81 | |
---|
82 | if (matcher->type == SM_INVALID) { |
---|
83 | error = GBS_global_string("Failed to create GBS_string_matcher from '%s'", search_expr); |
---|
84 | } |
---|
85 | |
---|
86 | if (error) { |
---|
87 | GBS_free_matcher(matcher); |
---|
88 | matcher = NULp; |
---|
89 | GB_export_error(error); |
---|
90 | } |
---|
91 | return matcher; |
---|
92 | } |
---|
93 | |
---|
94 | void GBS_free_matcher(GBS_string_matcher *matcher) { |
---|
95 | free(matcher->wildexpr); |
---|
96 | if (matcher->regexpr) GBS_free_regexpr(matcher->regexpr); |
---|
97 | free(matcher); |
---|
98 | } |
---|
99 | |
---|
100 | // ------------------------- |
---|
101 | // wildcard search |
---|
102 | |
---|
103 | GB_CSTR GBS_find_string(GB_CSTR cont, GB_CSTR substr, int match_mode) { |
---|
104 | /* search a substring in another string |
---|
105 | * match_mode == 0 -> exact match |
---|
106 | * match_mode == 1 -> a==A |
---|
107 | * match_mode == 2 -> a==a && a==? |
---|
108 | * match_mode == else -> a==A && a==? |
---|
109 | */ |
---|
110 | const char *p1, *p2; |
---|
111 | char b; |
---|
112 | |
---|
113 | switch (match_mode) { |
---|
114 | |
---|
115 | case 0: // exact match |
---|
116 | for (p1 = cont, p2 = substr; *p1;) { |
---|
117 | if (!(b = *p2)) { |
---|
118 | return (char *)cont; |
---|
119 | } |
---|
120 | else { |
---|
121 | if (b == *p1) { |
---|
122 | p1++; |
---|
123 | p2++; |
---|
124 | } |
---|
125 | else { |
---|
126 | p2 = substr; |
---|
127 | p1 = (++cont); |
---|
128 | } |
---|
129 | } |
---|
130 | } |
---|
131 | if (!*p2) return (char *)cont; |
---|
132 | break; |
---|
133 | |
---|
134 | case 1: // a==A |
---|
135 | for (p1 = cont, p2 = substr; *p1;) { |
---|
136 | if (!(b = *p2)) { |
---|
137 | return (char *)cont; |
---|
138 | } |
---|
139 | else { |
---|
140 | if (toupper(*p1) == toupper(b)) { |
---|
141 | p1++; |
---|
142 | p2++; |
---|
143 | } |
---|
144 | else { |
---|
145 | p2 = substr; |
---|
146 | p1 = (++cont); |
---|
147 | } |
---|
148 | } |
---|
149 | } |
---|
150 | if (!*p2) return (char *)cont; |
---|
151 | break; |
---|
152 | case 2: // a==a && a==? |
---|
153 | for (p1 = cont, p2 = substr; *p1;) { |
---|
154 | if (!(b = *p2)) { |
---|
155 | return (char *)cont; |
---|
156 | } |
---|
157 | else { |
---|
158 | if (b == *p1 || (b=='?')) { |
---|
159 | p1++; |
---|
160 | p2++; |
---|
161 | } |
---|
162 | else { |
---|
163 | p2 = substr; |
---|
164 | p1 = (++cont); |
---|
165 | } |
---|
166 | } |
---|
167 | } |
---|
168 | if (!*p2) return (char *)cont; |
---|
169 | break; |
---|
170 | |
---|
171 | default: // a==A && a==? |
---|
172 | for (p1 = cont, p2 = substr; *p1;) { |
---|
173 | if (!(b = *p2)) { |
---|
174 | return (char *)cont; |
---|
175 | } |
---|
176 | else { |
---|
177 | if (toupper(*p1) == toupper(b) || (b=='?')) { |
---|
178 | p1++; |
---|
179 | p2++; |
---|
180 | } |
---|
181 | else { |
---|
182 | p2 = substr; |
---|
183 | p1 = (++cont); |
---|
184 | } |
---|
185 | } |
---|
186 | } |
---|
187 | if (!*p2) return (char *)cont; |
---|
188 | break; |
---|
189 | } |
---|
190 | return NULp; |
---|
191 | } |
---|
192 | |
---|
193 | bool GBS_string_matches(const char *str, const char *expr, GB_CASE case_sens) { |
---|
194 | /* Wildcards in 'expr' string: |
---|
195 | * ? one character |
---|
196 | * * several characters |
---|
197 | * |
---|
198 | * if 'case_sens' == GB_IGNORE_CASE -> change all letters to uppercase |
---|
199 | * |
---|
200 | * returns true if strings are equal, false otherwise |
---|
201 | */ |
---|
202 | |
---|
203 | const char *ps = str; |
---|
204 | const char *pe = expr; |
---|
205 | |
---|
206 | while (1) { |
---|
207 | char s = *ps; |
---|
208 | char e = *pe; |
---|
209 | |
---|
210 | if (e == '*') { |
---|
211 | if (!pe[1]) { // '*' at end of expression |
---|
212 | break; // always match (even "nothing") |
---|
213 | } |
---|
214 | |
---|
215 | const char *nextStar = ARB_strchrnul(pe+1, '*'); |
---|
216 | int len = nextStar-pe-1; // part after '*' (and before EOS or next '*') |
---|
217 | if (!nextStar[0]) { // no 2nd '*' found |
---|
218 | // -> tail of string (if there is any) has to match |
---|
219 | int psl = strlen(ps); // length of tail |
---|
220 | if (psl<len) { // str-tail shorter than expr-tail |
---|
221 | return false; // -> match impossible |
---|
222 | } |
---|
223 | |
---|
224 | ps += psl-len; // skip over characters expected to match the '*' (=goto str-tail) |
---|
225 | ++pe; // goto expr-tail |
---|
226 | } |
---|
227 | else { // found 2nd '*' -> search for string part between stars |
---|
228 | { |
---|
229 | char *part = ARB_strpartdup(pe+1, nextStar-1); |
---|
230 | ps = GBS_find_string(ps, part, 2+(case_sens == GB_IGNORE_CASE)); // match with '?' wildcard |
---|
231 | free(part); |
---|
232 | } |
---|
233 | |
---|
234 | if (!ps) { |
---|
235 | return false; |
---|
236 | } |
---|
237 | ps += len; |
---|
238 | pe = nextStar; |
---|
239 | } |
---|
240 | continue; |
---|
241 | } |
---|
242 | |
---|
243 | if (!s) { |
---|
244 | return !e; |
---|
245 | } |
---|
246 | if (s != e) { |
---|
247 | if (e != '?') { |
---|
248 | if (!e) { |
---|
249 | return !s; |
---|
250 | } |
---|
251 | if (case_sens == GB_IGNORE_CASE) { |
---|
252 | s = toupper(s); |
---|
253 | e = toupper(e); |
---|
254 | if (s != e) { |
---|
255 | return false; |
---|
256 | } |
---|
257 | } |
---|
258 | else { |
---|
259 | return false; |
---|
260 | } |
---|
261 | } |
---|
262 | } |
---|
263 | ps++; |
---|
264 | pe++; |
---|
265 | } |
---|
266 | return true; |
---|
267 | } |
---|
268 | |
---|
269 | bool GBS_string_matches_regexp(const char *str, const GBS_string_matcher *expr) { |
---|
270 | /* Wildcard or regular expression match |
---|
271 | * Returns true if match |
---|
272 | * |
---|
273 | * Use GBS_compile_matcher() and GBS_free_matcher() to maintain 'expr' |
---|
274 | */ |
---|
275 | bool matches = false; |
---|
276 | |
---|
277 | switch (expr->type) { |
---|
278 | case SM_ANY: { |
---|
279 | matches = true; |
---|
280 | break; |
---|
281 | } |
---|
282 | case SM_WILDCARDED: { |
---|
283 | matches = GBS_string_matches(str, expr->wildexpr, expr->case_flag); |
---|
284 | break; |
---|
285 | } |
---|
286 | case SM_REGEXPR: { |
---|
287 | matches = GBS_regmatch_compiled(str, expr->regexpr, NULp); |
---|
288 | break; |
---|
289 | } |
---|
290 | case SM_INVALID: { |
---|
291 | gb_assert(0); |
---|
292 | break; |
---|
293 | } |
---|
294 | } |
---|
295 | |
---|
296 | return matches; |
---|
297 | } |
---|
298 | |
---|
299 | // ----------------------------------- |
---|
300 | // Search replace tool (SRT) |
---|
301 | |
---|
302 | #define GBS_SET ((char)1) |
---|
303 | #define GBS_SEP ((char)2) |
---|
304 | #define GBS_MWILD ((char)3) |
---|
305 | #define GBS_WILD ((char)4) |
---|
306 | |
---|
307 | __ATTR__USERESULT static GB_ERROR gbs_build_replace_string(GBS_strstruct& out, |
---|
308 | char *replaceBy, // will be modified! |
---|
309 | const char *sWildcards, long sWildMax, |
---|
310 | const char*const *mWildcards, long mWildMax, |
---|
311 | const GBL_call_env& callEnv) |
---|
312 | { |
---|
313 | int sWildAuto = 0; // count plain occurrences of '?' in replace string (ie. w/o number behind) |
---|
314 | int mWildAuto = 0; // same for '*' |
---|
315 | |
---|
316 | GBDATA *gb_container = callEnv.get_item_ref(); |
---|
317 | |
---|
318 | char *p = replaceBy; |
---|
319 | char c; |
---|
320 | while ((c=*(p++))) { |
---|
321 | switch (c) { |
---|
322 | case GBS_MWILD: |
---|
323 | case GBS_WILD: { |
---|
324 | char d = *(p++); |
---|
325 | if (d=='(') { // "*(..)" expressions |
---|
326 | char *closingParen = search_matching_parenthesis(p); |
---|
327 | |
---|
328 | if (!closingParen) { |
---|
329 | return GBS_global_string("Unbalanced parenthesis in '%s'", p-1); |
---|
330 | } |
---|
331 | |
---|
332 | // found reference: "*(gbd)" |
---|
333 | int separator = 0; |
---|
334 | *closingParen = 0; |
---|
335 | char *psym = strpbrk(p, "#|:"); |
---|
336 | if (psym) { |
---|
337 | separator = *psym; |
---|
338 | *psym = 0; |
---|
339 | } |
---|
340 | |
---|
341 | GBDATA *gb_entry = NULp; |
---|
342 | if (*p) { // key was specified |
---|
343 | if (!gb_container) { |
---|
344 | return GBS_global_string("can't read key '%s' (called w/o database item)", p); |
---|
345 | } |
---|
346 | if (!GB_is_container(gb_container)) { |
---|
347 | if (ARB_strBeginsWith(p, "../")) { // redirect search via parent |
---|
348 | p += 3; |
---|
349 | gb_container = GB_get_father(gb_container); |
---|
350 | } |
---|
351 | else { |
---|
352 | return GBS_global_string("can't read key '%s' (DB item is no container)", p); |
---|
353 | } |
---|
354 | } |
---|
355 | |
---|
356 | gb_entry = GB_search(gb_container, p, GB_FIND); |
---|
357 | callEnv.track_field_access(p); |
---|
358 | |
---|
359 | if (!gb_entry && GB_have_error()) { |
---|
360 | return GB_await_error(); |
---|
361 | } |
---|
362 | } |
---|
363 | else { |
---|
364 | gb_entry = gb_container; |
---|
365 | } |
---|
366 | |
---|
367 | if (psym) *psym = separator; |
---|
368 | |
---|
369 | char *entry = (gb_entry && gb_entry != gb_container) |
---|
370 | ? GB_read_as_string(gb_entry) |
---|
371 | : ARB_strdup(""); |
---|
372 | |
---|
373 | if (entry) { |
---|
374 | char *h; |
---|
375 | switch (separator) { |
---|
376 | case ':': |
---|
377 | h = GBS_string_eval_in_env(entry, psym+1, callEnv); |
---|
378 | if (!h) { |
---|
379 | free(entry); |
---|
380 | return GB_await_error(); |
---|
381 | } |
---|
382 | |
---|
383 | out.cat(h); |
---|
384 | free(h); |
---|
385 | break; |
---|
386 | |
---|
387 | case '|': |
---|
388 | h = GB_command_interpreter_in_env(entry, psym+1, callEnv); |
---|
389 | if (!h) { |
---|
390 | free(entry); |
---|
391 | return GB_await_error(); |
---|
392 | } |
---|
393 | |
---|
394 | out.cat(h); |
---|
395 | free(h); |
---|
396 | break; |
---|
397 | |
---|
398 | case '#': |
---|
399 | if (!entry[0]) { // missing field or empty content |
---|
400 | out.cat(psym+1); |
---|
401 | break; |
---|
402 | } |
---|
403 | // fall-through |
---|
404 | default: |
---|
405 | out.cat(entry); |
---|
406 | break; |
---|
407 | } |
---|
408 | free(entry); |
---|
409 | } |
---|
410 | *closingParen = ')'; |
---|
411 | p = closingParen+1; |
---|
412 | } |
---|
413 | else { |
---|
414 | int wildcard_num = d - '1'; |
---|
415 | bool followed_by_number = wildcard_num>=0 && wildcard_num<=9; // @@@ in fact this will also accept ':' |
---|
416 | |
---|
417 | if (c == GBS_WILD) { |
---|
418 | if (!followed_by_number) { // char behind wildcard is not in [1-9] |
---|
419 | --p; // "put back" that character |
---|
420 | wildcard_num = sWildAuto++; |
---|
421 | } |
---|
422 | if (wildcard_num>=sWildMax) { |
---|
423 | out.put('?'); |
---|
424 | } |
---|
425 | else { |
---|
426 | out.put(sWildcards[wildcard_num]); |
---|
427 | } |
---|
428 | } |
---|
429 | else { |
---|
430 | if (!followed_by_number) { // char behind wildcard is not in [1-9] |
---|
431 | --p; // "put back" that character |
---|
432 | wildcard_num = mWildAuto++; |
---|
433 | } |
---|
434 | if (wildcard_num>=mWildMax) { |
---|
435 | out.put('*'); |
---|
436 | } |
---|
437 | else { |
---|
438 | out.cat(mWildcards[wildcard_num]); |
---|
439 | } |
---|
440 | } |
---|
441 | } |
---|
442 | break; |
---|
443 | } |
---|
444 | default: |
---|
445 | out.put(c); |
---|
446 | break; |
---|
447 | } |
---|
448 | } |
---|
449 | return NULp; |
---|
450 | } |
---|
451 | |
---|
452 | static char *gbs_compress_command(const char *com) { |
---|
453 | /* Prepare SRT. |
---|
454 | * |
---|
455 | * Replaces all |
---|
456 | * '=' by GBS_SET |
---|
457 | * ':' by GBS_SEP |
---|
458 | * '?' by GBS_WILD if followed by a number or '?' |
---|
459 | * '*' by GBS_MWILD or '(' |
---|
460 | * \ is the escape character |
---|
461 | */ |
---|
462 | |
---|
463 | char *result = ARB_strdup(com); |
---|
464 | char *d = result; |
---|
465 | const char *s = result; |
---|
466 | char ch; |
---|
467 | |
---|
468 | while ((ch = *(s++))) { |
---|
469 | switch (ch) { |
---|
470 | case '=': *(d++) = GBS_SET; break; |
---|
471 | case ':': *(d++) = GBS_SEP; break; |
---|
472 | case '?': *(d++) = GBS_WILD; break; |
---|
473 | case '*': *(d++) = GBS_MWILD; break; |
---|
474 | case '\\': |
---|
475 | ch = *(s++); if (!ch) { s--; break; }; |
---|
476 | switch (ch) { |
---|
477 | case 'n': *(d++) = '\n'; break; |
---|
478 | case 't': *(d++) = '\t'; break; |
---|
479 | case '0': *(d++) = '\0'; break; |
---|
480 | default: *(d++) = ch; break; |
---|
481 | } |
---|
482 | break; |
---|
483 | |
---|
484 | default: *(d++) = ch; break; |
---|
485 | } |
---|
486 | } |
---|
487 | *d = 0; |
---|
488 | return result; |
---|
489 | } |
---|
490 | |
---|
491 | // AISC_MKPT_PROMOTE: class GBL_call_env; |
---|
492 | |
---|
493 | char *GBS_string_eval_in_env(const char *insource, const char *icommand, const GBL_call_env& callEnv) { |
---|
494 | /* GBS_string_eval_in_env replaces substrings in source (implements SRT) |
---|
495 | * Syntax: command = "oliver=olli:peter=peti" |
---|
496 | * |
---|
497 | * Returns a heapcopy of result of replacement. |
---|
498 | * |
---|
499 | * * is a wildcard for any number of characters |
---|
500 | * ? is a wildcard for exactly one character |
---|
501 | * |
---|
502 | * To reference the parts matched by wildcards on the left side of the '=' use '?' and '*', |
---|
503 | * to reference in a particular order use |
---|
504 | * *1 to reference to the first occurrence of * |
---|
505 | * *2 ----------"-------- second ------"------- |
---|
506 | * ... |
---|
507 | * *9 ----------"-------- ninth -------"------- |
---|
508 | * |
---|
509 | * If the first and last characters of the search string are no '*' wildcards, |
---|
510 | * then the replace is repeated as many times as possible. |
---|
511 | * |
---|
512 | * '\' is the escape character: e.g. \n is newline; '\\' is '\'; '\=' is '='; .... |
---|
513 | * |
---|
514 | * If the passed GBL_call_env refers to a database entry (which has to be of type GB_DB, i.e. has to be a container), |
---|
515 | * fields of that container may be inserted using |
---|
516 | * |
---|
517 | * *(arb_field) is the value of the containers child entry 'arb_field' |
---|
518 | * *(arb_field#string) value of the child entry 'arb_field' or 'string' (if that entry does not exist) |
---|
519 | * *(arb_field\:SRT) runs SRT recursively on the value of the child entry 'arb_field' |
---|
520 | * *([arb_field]|ACI) runs the ACI command interpreter on the value of the child entry 'arb_field' (or on an empty string) |
---|
521 | * |
---|
522 | * If an error occurs it returns NULp - in this case the error-message gets exported! |
---|
523 | * |
---|
524 | * Notes: |
---|
525 | * - global interpreter (SRT+ACI+REG) is provided by GB_command_interpreter_in_env() |
---|
526 | * - REG is provided by GBS_regreplace(), GBS_regmatch() and GBS_regmatch_compiled() |
---|
527 | * - ACI is only provided via GB_command_interpreter_in_env() |
---|
528 | */ |
---|
529 | if (!icommand || !icommand[0]) { |
---|
530 | return ARB_strdup(insource); |
---|
531 | } |
---|
532 | |
---|
533 | if (traceACI) { |
---|
534 | print_trace(GBS_global_string("SR: in='%s' cmd='%s':\n", insource, icommand)); |
---|
535 | } |
---|
536 | LocallyModify<int> inc(traceIndent, traceIndent+1); |
---|
537 | |
---|
538 | char *command = gbs_compress_command(icommand); |
---|
539 | |
---|
540 | // copy insource (to allow to modify it) |
---|
541 | size_t inlen = strlen(insource); |
---|
542 | GBS_strstruct in(inlen+1); |
---|
543 | in.ncat(insource, inlen); |
---|
544 | |
---|
545 | GBS_strstruct out(inlen+500); |
---|
546 | |
---|
547 | GB_ERROR error = NULp; |
---|
548 | char *next_subcmd; |
---|
549 | for (char *subcmd = command; subcmd; subcmd = next_subcmd) { // loop over sub-commands |
---|
550 | // search next subcommand (=pos behind next colon): |
---|
551 | next_subcmd = strchr(subcmd, GBS_SEP); |
---|
552 | if (next_subcmd) *(next_subcmd++) = 0; |
---|
553 | |
---|
554 | if (!subcmd[0]) continue; // empty subcommand -> do nothing |
---|
555 | |
---|
556 | // search for replace string: |
---|
557 | char *replaceBy = strchr(subcmd+1, GBS_SET); |
---|
558 | if (!replaceBy) { |
---|
559 | error = GBS_global_string("SRT ERROR: no '=' found in command '%s' (position > %zi)", icommand, subcmd-command+1); |
---|
560 | break; |
---|
561 | } |
---|
562 | *(replaceBy++) = 0; |
---|
563 | |
---|
564 | GB_CSTR not_yet_copied = in.get_data(); // point into 'in' string (to not-yet-copied part) |
---|
565 | out.erase(); |
---|
566 | |
---|
567 | if (in.empty() && subcmd[0] == GBS_MWILD && subcmd[1] == 0) { |
---|
568 | // plain '*' shall also match an empty input string -> handle manually here |
---|
569 | const char *empty = ""; |
---|
570 | error = gbs_build_replace_string(out, replaceBy, NULp, 0, &empty, 1, callEnv); |
---|
571 | } |
---|
572 | else { |
---|
573 | char sWildcard[40]; // character which matched vs one '?' |
---|
574 | char *mWildcard[10]; // substrings which matched vs one '*' |
---|
575 | long sWildSeen = 0; // number of '?' seen (on left side on subcommand) |
---|
576 | long mWildSeen = 0; // number of '*' seen (on left side on subcommand) |
---|
577 | |
---|
578 | bool match_failed = false; |
---|
579 | for (GB_CSTR source = not_yet_copied; *source; ) { // loop over string |
---|
580 | gb_assert(!match_failed); |
---|
581 | |
---|
582 | char *search = subcmd; |
---|
583 | GB_CSTR start_match = NULp; // start of string that matches a wildcard (none yet) |
---|
584 | |
---|
585 | char c; |
---|
586 | while (!match_failed && (c = *(search++))) { // match expression vs. string |
---|
587 | switch (c) { |
---|
588 | case GBS_MWILD: { |
---|
589 | if (!start_match) start_match = source; |
---|
590 | |
---|
591 | char *start_of_wildcard = search; |
---|
592 | if (!(c = *(search++))) { // last character is a '*' wildcard -> expression matched |
---|
593 | mWildcard[mWildSeen++] = ARB_strdup(source); |
---|
594 | source = strchr(source, 0); // jump to EOS |
---|
595 | --search; |
---|
596 | break; // (effectively does exit while-loop) |
---|
597 | } |
---|
598 | // @@@ 'c' read in above if-condition is ignored if non-zero (got tests) |
---|
599 | |
---|
600 | while ((c=*(search++)) && c!=GBS_MWILD && c!=GBS_WILD) ; // search the next wildcardstring |
---|
601 | |
---|
602 | search--; // back one character |
---|
603 | *search = 0; |
---|
604 | |
---|
605 | char what_wild_card = c; |
---|
606 | GB_CSTR p = GBS_find_string(source, start_of_wildcard, 0); |
---|
607 | |
---|
608 | if (!p) match_failed = true; // string behind wildcard does not appear in input -> no match |
---|
609 | else { |
---|
610 | mWildcard[mWildSeen++] = ARB_strpartdup(source, p-1); |
---|
611 | source = p + strlen(start_of_wildcard); |
---|
612 | *search = what_wild_card; |
---|
613 | } |
---|
614 | break; |
---|
615 | } |
---|
616 | case GBS_WILD: |
---|
617 | if (!source[0]) match_failed = true; // '?' does not match "nothing" -> no match |
---|
618 | else { |
---|
619 | if (!start_match) start_match = source; |
---|
620 | sWildcard[sWildSeen++] = *(source++); |
---|
621 | } |
---|
622 | break; |
---|
623 | |
---|
624 | default: |
---|
625 | if (start_match) { |
---|
626 | if (c != *(source++)) match_failed = true; // mismatch after '?' or after last '*' |
---|
627 | } |
---|
628 | else { |
---|
629 | char *buf1 = search-1; |
---|
630 | |
---|
631 | while ((c=*(search++)) && c != GBS_MWILD && c!=GBS_WILD) ; // search the next wildcardstring |
---|
632 | |
---|
633 | search--; // back one character |
---|
634 | *search = 0; |
---|
635 | |
---|
636 | char what_wild_card = c; |
---|
637 | GB_CSTR p = GBS_find_string(source, buf1, 0); |
---|
638 | if (!p) { |
---|
639 | // string infrontof wildcard (or EOS) not found -> no match |
---|
640 | match_failed = true; |
---|
641 | } |
---|
642 | else { |
---|
643 | start_match = p; |
---|
644 | source = p + strlen(buf1); |
---|
645 | *search = what_wild_card; |
---|
646 | } |
---|
647 | |
---|
648 | } |
---|
649 | break; |
---|
650 | } |
---|
651 | } |
---|
652 | |
---|
653 | if (!match_failed) { |
---|
654 | /* now we got |
---|
655 | * |
---|
656 | * in: GBS_strstruct containing entire input string |
---|
657 | * source: pointer to end of match (inside 'in') |
---|
658 | * start_match: pointer to start of match (inside 'in') |
---|
659 | * not_yet_copied: pointer to the not-copied part of the input string |
---|
660 | * replaceBy: the replace string |
---|
661 | */ |
---|
662 | |
---|
663 | // now look for the replace string |
---|
664 | out.ncat(not_yet_copied, start_match-not_yet_copied); // concat part before the match |
---|
665 | error = gbs_build_replace_string(out, replaceBy, sWildcard, sWildSeen, mWildcard, mWildSeen, callEnv); // execute SRT command |
---|
666 | not_yet_copied = source; |
---|
667 | } |
---|
668 | |
---|
669 | for (long i = 0; i < mWildSeen; i++) { |
---|
670 | freenull(mWildcard[i]); |
---|
671 | } |
---|
672 | sWildSeen = 0; |
---|
673 | mWildSeen = 0; |
---|
674 | |
---|
675 | if (error || match_failed) break; |
---|
676 | } |
---|
677 | } |
---|
678 | |
---|
679 | // Note: reached when left side expression didn't match input string |
---|
680 | // (also reached when done with current sub-expression) |
---|
681 | if (error) break; |
---|
682 | |
---|
683 | out.cat(not_yet_copied); // cat the rest of the input |
---|
684 | |
---|
685 | if (traceACI) { |
---|
686 | print_trace(GBS_global_string("'%s' -> '%s'\n", in.get_data(), out.get_data())); |
---|
687 | } |
---|
688 | |
---|
689 | in.swap_content(out); |
---|
690 | } |
---|
691 | free(command); |
---|
692 | if (error) { |
---|
693 | GB_export_error(error); |
---|
694 | return NULp; |
---|
695 | } |
---|
696 | return in.release(); |
---|
697 | } |
---|
698 | |
---|
699 | char *GBS_string_eval(const char *insource, const char *icommand) { |
---|
700 | GBL_env env(NULp, NULp); |
---|
701 | GBL_call_env callEnv(NULp, env); |
---|
702 | |
---|
703 | return GBS_string_eval_in_env(insource, icommand, callEnv); |
---|
704 | } |
---|
705 | |
---|
706 | // -------------------------------------------------------------------------------- |
---|
707 | |
---|
708 | #ifdef UNIT_TESTS |
---|
709 | #ifndef TEST_UNIT_H |
---|
710 | #include <test_unit.h> |
---|
711 | #endif |
---|
712 | |
---|
713 | #include <arb_strarray.h> |
---|
714 | |
---|
715 | static char *tokenMatchResults(const char *expr, GB_CASE caseDef, const char *tokenStr) { |
---|
716 | ConstStrArray token; |
---|
717 | GBT_split_string(token,tokenStr, ';'); |
---|
718 | |
---|
719 | GBS_string_matcher *matcher = GBS_compile_matcher(expr, caseDef); |
---|
720 | for (int t = 0; token[t]; ++t) { |
---|
721 | bool matched = GBS_string_matches_regexp(token[t], matcher); |
---|
722 | token.replace(t, matched ? "1" : "0"); |
---|
723 | } |
---|
724 | GBS_free_matcher(matcher); |
---|
725 | return GBT_join_strings(token, 0); |
---|
726 | } |
---|
727 | |
---|
728 | |
---|
729 | #define TEST_MATCH_TOKENS(expr,caseDef,tokenStr,expected) do{ \ |
---|
730 | char *results = tokenMatchResults(expr, caseDef, tokenStr); \ |
---|
731 | TEST_EXPECT_EQUAL(results, expected); \ |
---|
732 | free(results); \ |
---|
733 | }while(0) |
---|
734 | |
---|
735 | #define TEST_MATCH_TOKENS__BROKEN(expr,caseDef,tokenStr,expected,got) do{ \ |
---|
736 | char *results = tokenMatchResults(expr, caseDef, tokenStr); \ |
---|
737 | TEST_EXPECT_EQUAL__BROKEN(results, expected, got); \ |
---|
738 | free(results); \ |
---|
739 | }while(0) |
---|
740 | |
---|
741 | void TEST_matcher() { |
---|
742 | TEST_MATCH_TOKENS("???", GB_MIND_CASE, "ab;abc;abcd", "010"); // only matches 2nd string |
---|
743 | TEST_MATCH_TOKENS("???*", GB_MIND_CASE, "ab;abc;abcd", "011"); // match at least 3 characters |
---|
744 | TEST_MATCH_TOKENS("?*", GB_MIND_CASE, ";a;ab;abc", "0111"); // match at least 1 character |
---|
745 | |
---|
746 | TEST_MATCH_TOKENS("a*c", GB_MIND_CASE, "ca;ab;abc;abC;ABC;abcd", "001000"); |
---|
747 | TEST_MATCH_TOKENS("a*c", GB_IGNORE_CASE, "ca;ab;abc;abC;ABC;abcd", "001110"); |
---|
748 | |
---|
749 | TEST_MATCH_TOKENS("a*c*a", GB_MIND_CASE, "aca;aaacccaaa;a--c--a;acac;acaca;aba", "111010"); |
---|
750 | TEST_MATCH_TOKENS("a*c?d*c", GB_MIND_CASE, "acxdc;a--cxd--c;acxdcxdcxdc;acdcdcdc", "1110"); |
---|
751 | |
---|
752 | TEST_MATCH_TOKENS("???*.c", GB_MIND_CASE, "ab.c;abc.c;abcd.c", "011"); |
---|
753 | TEST_MATCH_TOKENS("?b.*.c", GB_MIND_CASE, "ab.c;ab..c;ab.x.c", "011"); |
---|
754 | TEST_MATCH_TOKENS("rere*erer", GB_MIND_CASE, "rerer;rererer;rerererer;rererererer", "0011"); |
---|
755 | } |
---|
756 | |
---|
757 | TEST_PUBLISH(TEST_matcher); |
---|
758 | |
---|
759 | #endif // UNIT_TESTS |
---|
760 | |
---|
761 | // -------------------------------------------------------------------------------- |
---|
762 | |
---|