1 | /* Copyright (c) 1988 AT&T */ |
---|
2 | /* All Rights Reserved */ |
---|
3 | |
---|
4 | /* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T */ |
---|
5 | /* The copyright notice above does not evidence any */ |
---|
6 | /* actual or intended publication of such source code. */ |
---|
7 | |
---|
8 | #ifndef _REGEXP_H |
---|
9 | #define _REGEXP_H |
---|
10 | |
---|
11 | #include <string.h> |
---|
12 | #ifndef __STDC__ |
---|
13 | #define __STDC__ |
---|
14 | #endif |
---|
15 | #ifdef __cplusplus |
---|
16 | extern "C" { |
---|
17 | #endif |
---|
18 | |
---|
19 | #define CBRA 2 |
---|
20 | #define CCHR 4 |
---|
21 | #define CDOT 8 |
---|
22 | #define CCL 12 |
---|
23 | #define CXCL 16 |
---|
24 | #define CDOL 20 |
---|
25 | #define CCEOF 22 |
---|
26 | #define CKET 24 |
---|
27 | #define CBACK 36 |
---|
28 | #define NCCL 40 |
---|
29 | |
---|
30 | #define STAR 01 |
---|
31 | #define RNGE 03 |
---|
32 | |
---|
33 | #define NBRA 9 |
---|
34 | |
---|
35 | #define PLACE(c) ep[c >> 3] |= bittab[c & 07] |
---|
36 | #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) |
---|
37 | #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) |
---|
38 | |
---|
39 | static char *braslist[NBRA]; |
---|
40 | static char *braelist[NBRA]; |
---|
41 | int sed, nbra; |
---|
42 | char *loc1, *loc2, *locs; |
---|
43 | static int nodelim; |
---|
44 | |
---|
45 | int circf; |
---|
46 | static int low; |
---|
47 | static int size; |
---|
48 | |
---|
49 | static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; |
---|
50 | |
---|
51 | #ifdef __STDC__ |
---|
52 | int advance(const char *lp, const char *ep); |
---|
53 | static void getrnge(const char *str); |
---|
54 | #else |
---|
55 | int advance(); |
---|
56 | static void getrnge(); |
---|
57 | #endif |
---|
58 | |
---|
59 | char * |
---|
60 | #ifdef __STDC__ |
---|
61 | compile(char *instring, char *ep, const char *endbuf, int seof) |
---|
62 | #else |
---|
63 | compile(instring, ep, endbuf, seof) |
---|
64 | register char *ep; |
---|
65 | char *instring, *endbuf; |
---|
66 | int seof; |
---|
67 | #endif |
---|
68 | { |
---|
69 | INIT /* Dependent declarations and initializations */ |
---|
70 | register int c; |
---|
71 | register int eof = seof; |
---|
72 | char *lastep = instring; |
---|
73 | int cclcnt; |
---|
74 | char bracket[NBRA], *bracketp; |
---|
75 | int closed; |
---|
76 | int neg; |
---|
77 | int lc; |
---|
78 | int i, cflg; |
---|
79 | int iflag; /* used for non-ascii characters in brackets */ |
---|
80 | |
---|
81 | lastep = 0; |
---|
82 | if ((c = GETC()) == eof || c == '\n') { |
---|
83 | if (c == '\n') { |
---|
84 | UNGETC(c); |
---|
85 | nodelim = 1; |
---|
86 | } |
---|
87 | if (*ep == 0 && !sed) |
---|
88 | ERROR(41); |
---|
89 | RETURN(ep); |
---|
90 | } |
---|
91 | bracketp = bracket; |
---|
92 | circf = closed = nbra = 0; |
---|
93 | if (c == '^') |
---|
94 | circf++; |
---|
95 | else |
---|
96 | UNGETC(c); |
---|
97 | while (1) { |
---|
98 | if (ep >= endbuf) |
---|
99 | ERROR(50); |
---|
100 | c = GETC(); |
---|
101 | if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) |
---|
102 | lastep = ep; |
---|
103 | if (c == eof) { |
---|
104 | *ep++ = CCEOF; |
---|
105 | if (bracketp != bracket) |
---|
106 | ERROR(42); |
---|
107 | RETURN(ep); |
---|
108 | } |
---|
109 | switch (c) { |
---|
110 | |
---|
111 | case '.': |
---|
112 | *ep++ = CDOT; |
---|
113 | continue; |
---|
114 | |
---|
115 | case '\n': |
---|
116 | if (!sed) { |
---|
117 | UNGETC(c); |
---|
118 | *ep++ = CCEOF; |
---|
119 | nodelim = 1; |
---|
120 | if (bracketp != bracket) |
---|
121 | ERROR(42); |
---|
122 | RETURN(ep); |
---|
123 | } else ERROR(36); |
---|
124 | case '*': |
---|
125 | if (lastep == 0 || *lastep == CBRA || *lastep == CKET) |
---|
126 | goto defchar; |
---|
127 | *lastep |= STAR; |
---|
128 | continue; |
---|
129 | |
---|
130 | case '$': |
---|
131 | if (PEEKC() != eof && PEEKC() != '\n') |
---|
132 | goto defchar; |
---|
133 | *ep++ = CDOL; |
---|
134 | continue; |
---|
135 | |
---|
136 | case '[': |
---|
137 | if (&ep[17] >= endbuf) |
---|
138 | ERROR(50); |
---|
139 | |
---|
140 | *ep++ = CCL; |
---|
141 | lc = 0; |
---|
142 | for (i = 0; i < 16; i++) |
---|
143 | ep[i] = 0; |
---|
144 | |
---|
145 | neg = 0; |
---|
146 | if ((c = GETC()) == '^') { |
---|
147 | neg = 1; |
---|
148 | c = GETC(); |
---|
149 | } |
---|
150 | iflag = 1; |
---|
151 | do { |
---|
152 | c &= 0377; |
---|
153 | if (c == '\0' || c == '\n') |
---|
154 | ERROR(49); |
---|
155 | if ((c & 0200) && iflag) { |
---|
156 | iflag = 0; |
---|
157 | if (&ep[32] >= endbuf) |
---|
158 | ERROR(50); |
---|
159 | ep[-1] = CXCL; |
---|
160 | for (i = 16; i < 32; i++) |
---|
161 | ep[i] = 0; |
---|
162 | } |
---|
163 | if (c == '-' && lc != 0) { |
---|
164 | if ((c = GETC()) == ']') { |
---|
165 | PLACE('-'); |
---|
166 | break; |
---|
167 | } |
---|
168 | if ((c & 0200) && iflag) { |
---|
169 | iflag = 0; |
---|
170 | if (&ep[32] >= endbuf) |
---|
171 | ERROR(50); |
---|
172 | ep[-1] = CXCL; |
---|
173 | for (i = 16; i < 32; i++) |
---|
174 | ep[i] = 0; |
---|
175 | } |
---|
176 | while (lc < c) { |
---|
177 | PLACE(lc); |
---|
178 | lc++; |
---|
179 | } |
---|
180 | } |
---|
181 | lc = c; |
---|
182 | PLACE(c); |
---|
183 | } while ((c = GETC()) != ']'); |
---|
184 | |
---|
185 | if (iflag) |
---|
186 | iflag = 16; |
---|
187 | else |
---|
188 | iflag = 32; |
---|
189 | |
---|
190 | if (neg) { |
---|
191 | if (iflag == 32) { |
---|
192 | for (cclcnt = 0; cclcnt < iflag; |
---|
193 | cclcnt++) |
---|
194 | ep[cclcnt] ^= 0377; |
---|
195 | ep[0] &= 0376; |
---|
196 | } else { |
---|
197 | ep[-1] = NCCL; |
---|
198 | /* make nulls match so test fails */ |
---|
199 | ep[0] |= 01; |
---|
200 | } |
---|
201 | } |
---|
202 | |
---|
203 | ep += iflag; |
---|
204 | |
---|
205 | continue; |
---|
206 | |
---|
207 | case '\\': |
---|
208 | switch (c = GETC()) { |
---|
209 | |
---|
210 | case '(': |
---|
211 | if (nbra >= NBRA) |
---|
212 | ERROR(43); |
---|
213 | *bracketp++ = nbra; |
---|
214 | *ep++ = CBRA; |
---|
215 | *ep++ = nbra++; |
---|
216 | continue; |
---|
217 | |
---|
218 | case ')': |
---|
219 | if (bracketp <= bracket) |
---|
220 | ERROR(42); |
---|
221 | *ep++ = CKET; |
---|
222 | *ep++ = *--bracketp; |
---|
223 | closed++; |
---|
224 | continue; |
---|
225 | |
---|
226 | case '{': |
---|
227 | if (lastep == (char *) 0) |
---|
228 | goto defchar; |
---|
229 | *lastep |= RNGE; |
---|
230 | cflg = 0; |
---|
231 | nlim: |
---|
232 | c = GETC(); |
---|
233 | i = 0; |
---|
234 | do { |
---|
235 | if ('0' <= c && c <= '9') |
---|
236 | i = 10 * i + c - '0'; |
---|
237 | else |
---|
238 | ERROR(16); |
---|
239 | } while (((c = GETC()) != '\\') && (c != ',')); |
---|
240 | if (i >= 255) |
---|
241 | ERROR(11); |
---|
242 | *ep++ = i; |
---|
243 | if (c == ',') { |
---|
244 | if (cflg++) |
---|
245 | ERROR(44); |
---|
246 | if ((c = GETC()) == '\\') |
---|
247 | *ep++ = 255; |
---|
248 | else { |
---|
249 | UNGETC(c); |
---|
250 | goto nlim; |
---|
251 | /* get 2'nd number */ |
---|
252 | } |
---|
253 | } |
---|
254 | if (GETC() != '}') |
---|
255 | ERROR(45); |
---|
256 | if (!cflg) /* one number */ |
---|
257 | *ep++ = i; |
---|
258 | else if ((ep[-1] & 0377) < (ep[-2] & 0377)) |
---|
259 | ERROR(46); |
---|
260 | continue; |
---|
261 | |
---|
262 | case '\n': |
---|
263 | ERROR(36); |
---|
264 | |
---|
265 | case 'n': |
---|
266 | c = '\n'; |
---|
267 | goto defchar; |
---|
268 | |
---|
269 | default: |
---|
270 | if (c >= '1' && c <= '9') { |
---|
271 | if ((c -= '1') >= closed) |
---|
272 | ERROR(25); |
---|
273 | *ep++ = CBACK; |
---|
274 | *ep++ = c; |
---|
275 | continue; |
---|
276 | } |
---|
277 | } |
---|
278 | /* Drop through to default to use \ to turn off special chars */ |
---|
279 | |
---|
280 | defchar: |
---|
281 | default: |
---|
282 | lastep = ep; |
---|
283 | *ep++ = CCHR; |
---|
284 | *ep++ = c; |
---|
285 | } |
---|
286 | } |
---|
287 | } |
---|
288 | |
---|
289 | #ifdef __STDC__ |
---|
290 | int |
---|
291 | step(const char *p1, const char *p2) |
---|
292 | #else |
---|
293 | int |
---|
294 | step(p1, p2) |
---|
295 | register char *p1, *p2; |
---|
296 | #endif |
---|
297 | { |
---|
298 | char c; |
---|
299 | |
---|
300 | |
---|
301 | if (circf) { |
---|
302 | loc1 = (char *) p1; |
---|
303 | return (advance(p1, p2)); |
---|
304 | } |
---|
305 | /* fast check for first character */ |
---|
306 | if (*p2 == CCHR) { |
---|
307 | c = p2[1]; |
---|
308 | do { |
---|
309 | if (*p1 != c) |
---|
310 | continue; |
---|
311 | if (advance(p1, p2)) { |
---|
312 | loc1 = (char *) p1; |
---|
313 | return (1); |
---|
314 | } |
---|
315 | } while (*p1++); |
---|
316 | return (0); |
---|
317 | } |
---|
318 | /* regular algorithm */ |
---|
319 | do { |
---|
320 | if (advance(p1, p2)) { |
---|
321 | loc1 = (char *) p1; |
---|
322 | return (1); |
---|
323 | } |
---|
324 | } while (*p1++); |
---|
325 | return (0); |
---|
326 | } |
---|
327 | |
---|
328 | int |
---|
329 | #ifdef __STDC__ |
---|
330 | advance(const char *lp, const char *ep) |
---|
331 | #else |
---|
332 | advance(lp, ep) |
---|
333 | register char *lp, *ep; |
---|
334 | #endif |
---|
335 | { |
---|
336 | #ifdef __STDC__ |
---|
337 | const char *curlp; |
---|
338 | #else |
---|
339 | register char *curlp; |
---|
340 | #endif |
---|
341 | int c; |
---|
342 | char *bbeg; |
---|
343 | register char neg; |
---|
344 | int ct; |
---|
345 | |
---|
346 | while (1) { |
---|
347 | neg = 0; |
---|
348 | switch (*ep++) { |
---|
349 | |
---|
350 | case CCHR: |
---|
351 | if (*ep++ == *lp++) |
---|
352 | continue; |
---|
353 | return (0); |
---|
354 | |
---|
355 | case CDOT: |
---|
356 | if (*lp++) |
---|
357 | continue; |
---|
358 | return (0); |
---|
359 | |
---|
360 | case CDOL: |
---|
361 | if (*lp == 0) |
---|
362 | continue; |
---|
363 | return (0); |
---|
364 | |
---|
365 | case CCEOF: |
---|
366 | loc2 = (char *) lp; |
---|
367 | return (1); |
---|
368 | |
---|
369 | case CXCL: |
---|
370 | c = (unsigned char)*lp++; |
---|
371 | if (ISTHERE(c)) { |
---|
372 | ep += 32; |
---|
373 | continue; |
---|
374 | } |
---|
375 | return (0); |
---|
376 | |
---|
377 | case NCCL: |
---|
378 | neg = 1; |
---|
379 | |
---|
380 | case CCL: |
---|
381 | c = *lp++; |
---|
382 | if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { |
---|
383 | ep += 16; |
---|
384 | continue; |
---|
385 | } |
---|
386 | return (0); |
---|
387 | |
---|
388 | case CBRA: |
---|
389 | braslist[(int)(*ep++)] = (char *) lp; |
---|
390 | continue; |
---|
391 | |
---|
392 | case CKET: |
---|
393 | braelist[(int)(*ep++)] = (char *) lp; |
---|
394 | continue; |
---|
395 | |
---|
396 | case CCHR | RNGE: |
---|
397 | c = *ep++; |
---|
398 | getrnge(ep); |
---|
399 | while (low--) |
---|
400 | if (*lp++ != c) |
---|
401 | return (0); |
---|
402 | curlp = lp; |
---|
403 | while (size--) |
---|
404 | if (*lp++ != c) |
---|
405 | break; |
---|
406 | if (size < 0) |
---|
407 | lp++; |
---|
408 | ep += 2; |
---|
409 | goto star; |
---|
410 | |
---|
411 | case CDOT | RNGE: |
---|
412 | getrnge(ep); |
---|
413 | while (low--) |
---|
414 | if (*lp++ == '\0') |
---|
415 | return (0); |
---|
416 | curlp = lp; |
---|
417 | while (size--) |
---|
418 | if (*lp++ == '\0') |
---|
419 | break; |
---|
420 | if (size < 0) |
---|
421 | lp++; |
---|
422 | ep += 2; |
---|
423 | goto star; |
---|
424 | |
---|
425 | case CXCL | RNGE: |
---|
426 | getrnge(ep + 32); |
---|
427 | while (low--) { |
---|
428 | c = (unsigned char)*lp++; |
---|
429 | if (!ISTHERE(c)) |
---|
430 | return (0); |
---|
431 | } |
---|
432 | curlp = lp; |
---|
433 | while (size--) { |
---|
434 | c = (unsigned char)*lp++; |
---|
435 | if (!ISTHERE(c)) |
---|
436 | break; |
---|
437 | } |
---|
438 | if (size < 0) |
---|
439 | lp++; |
---|
440 | ep += 34; /* 32 + 2 */ |
---|
441 | goto star; |
---|
442 | |
---|
443 | case NCCL | RNGE: |
---|
444 | neg = 1; |
---|
445 | |
---|
446 | case CCL | RNGE: |
---|
447 | getrnge(ep + 16); |
---|
448 | while (low--) { |
---|
449 | c = *lp++; |
---|
450 | if (((c & 0200) || !ISTHERE(c)) ^ neg) |
---|
451 | return (0); |
---|
452 | } |
---|
453 | curlp = lp; |
---|
454 | while (size--) { |
---|
455 | c = *lp++; |
---|
456 | if (((c & 0200) || !ISTHERE(c)) ^ neg) |
---|
457 | break; |
---|
458 | } |
---|
459 | if (size < 0) |
---|
460 | lp++; |
---|
461 | ep += 18; /* 16 + 2 */ |
---|
462 | goto star; |
---|
463 | |
---|
464 | case CBACK: |
---|
465 | bbeg = braslist[(int)(*ep)]; |
---|
466 | ct = braelist[(int)(*ep++)] - bbeg; |
---|
467 | |
---|
468 | if (ecmp(bbeg, lp, ct)) { |
---|
469 | lp += ct; |
---|
470 | continue; |
---|
471 | } |
---|
472 | return (0); |
---|
473 | |
---|
474 | case CBACK | STAR: |
---|
475 | bbeg = braslist[(int)(*ep)]; |
---|
476 | ct = braelist[(int)(*ep++)] - bbeg; |
---|
477 | curlp = lp; |
---|
478 | while (ecmp(bbeg, lp, ct)) |
---|
479 | lp += ct; |
---|
480 | |
---|
481 | while (lp >= curlp) { |
---|
482 | if (advance(lp, ep)) |
---|
483 | return (1); |
---|
484 | lp -= ct; |
---|
485 | } |
---|
486 | return (0); |
---|
487 | |
---|
488 | |
---|
489 | case CDOT | STAR: |
---|
490 | curlp = lp; |
---|
491 | while (*lp++); |
---|
492 | goto star; |
---|
493 | |
---|
494 | case CCHR | STAR: |
---|
495 | curlp = lp; |
---|
496 | while (*lp++ == *ep); |
---|
497 | ep++; |
---|
498 | goto star; |
---|
499 | |
---|
500 | case CXCL | STAR: |
---|
501 | curlp = lp; |
---|
502 | do { |
---|
503 | c = (unsigned char)*lp++; |
---|
504 | } while (ISTHERE(c)); |
---|
505 | ep += 32; |
---|
506 | goto star; |
---|
507 | |
---|
508 | case NCCL | STAR: |
---|
509 | neg = 1; |
---|
510 | |
---|
511 | case CCL | STAR: |
---|
512 | curlp = lp; |
---|
513 | do { |
---|
514 | c = *lp++; |
---|
515 | } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); |
---|
516 | ep += 16; |
---|
517 | goto star; |
---|
518 | |
---|
519 | star: |
---|
520 | do { |
---|
521 | if (--lp == locs) |
---|
522 | break; |
---|
523 | if (advance(lp, ep)) |
---|
524 | return (1); |
---|
525 | } while (lp > curlp); |
---|
526 | return (0); |
---|
527 | |
---|
528 | } |
---|
529 | } |
---|
530 | } |
---|
531 | |
---|
532 | static void |
---|
533 | #ifdef __STDC__ |
---|
534 | getrnge(const char *str) |
---|
535 | #else |
---|
536 | getrnge(str) |
---|
537 | register char *str; |
---|
538 | #endif |
---|
539 | { |
---|
540 | low = *str++ & 0377; |
---|
541 | size = ((*str & 0377) == 255)? 20000: (*str &0377) - low; |
---|
542 | } |
---|
543 | |
---|
544 | #ifdef __cplusplus |
---|
545 | } |
---|
546 | #endif |
---|
547 | |
---|
548 | #endif /* _REGEXP_H */ |
---|