comparison regex.c @ 679:b088267ea1c8

Clean up whitespace.
author Richard Stallman <rms@gnu.org>
date Sat, 31 Aug 1996 23:41:26 +0000
parents bd677f8ba924
children d794d4ea2333
comparison
equal deleted inserted replaced
678:bd677f8ba924 679:b088267ea1c8
1 /* Extended regular expression matching and search library, 1 /* Extended regular expression matching and search library, version
2 version 0.12. 2 0.12. (Implements POSIX draft P10003.2/D11.2, except for
3 (Implements POSIX draft P10003.2/D11.2, except for
4 internationalization features.) 3 internationalization features.)
5 4
6 Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. 5 Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
7 6
8 This program is free software; you can redistribute it and/or modify 7 This program is free software; you can redistribute it and/or modify
10 the Free Software Foundation; either version 2, or (at your option) 9 the Free Software Foundation; either version 2, or (at your option)
11 any later version. 10 any later version.
12 11
13 This program is distributed in the hope that it will be useful, 12 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details. 15 GNU General Public License for more details.
17 16
18 You should have received a copy of the GNU General Public License 17 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software 18 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
21 USA. */ 20 USA. */
22 21
23 /* AIX requires this to be the first thing in the file. */ 22 /* AIX requires this to be the first thing in the file. */
24 #if defined (_AIX) && !defined (REGEX_MALLOC) 23 #if defined (_AIX) && !defined (REGEX_MALLOC)
25 #pragma alloca 24 #pragma alloca
26 #endif 25 #endif
30 29
31 #ifdef HAVE_CONFIG_H 30 #ifdef HAVE_CONFIG_H
32 #include <config.h> 31 #include <config.h>
33 #endif 32 #endif
34 33
35 /* We need this for `regex.h', and perhaps for the Emacs include files. */ 34 /* We need this for `regex.h', and perhaps for the Emacs include files. */
36 #include <sys/types.h> 35 #include <sys/types.h>
37 36
38 /* This is for other GNU distributions with internationalized messages. */ 37 /* This is for other GNU distributions with internationalized messages. */
39 #if HAVE_LIBINTL_H || defined (_LIBC) 38 #if HAVE_LIBINTL_H || defined (_LIBC)
40 # include <libintl.h> 39 # include <libintl.h>
41 #else 40 #else
42 # define gettext(msgid) (msgid) 41 # define gettext(msgid) (msgid)
43 #endif 42 #endif
69 char *malloc (); 68 char *malloc ();
70 char *realloc (); 69 char *realloc ();
71 #endif 70 #endif
72 71
73 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 72 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
74 If nothing else has been done, use the method below. */ 73 If nothing else has been done, use the method below. */
75 #ifdef INHIBIT_STRING_HEADER 74 #ifdef INHIBIT_STRING_HEADER
76 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) 75 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
77 #if !defined (bzero) && !defined (bcopy) 76 #if !defined (bzero) && !defined (bcopy)
78 #undef INHIBIT_STRING_HEADER 77 #undef INHIBIT_STRING_HEADER
79 #endif 78 #endif
165 /* Jim Meyering writes: 164 /* Jim Meyering writes:
166 165
167 "... Some ctype macros are valid only for character codes that 166 "... Some ctype macros are valid only for character codes that
168 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 167 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
169 using /bin/cc or gcc but without giving an ansi option). So, all 168 using /bin/cc or gcc but without giving an ansi option). So, all
170 ctype uses should be through macros like ISPRINT... If 169 ctype uses should be through macros like ISPRINT... If
171 STDC_HEADERS is defined, then autoconf has verified that the ctype 170 STDC_HEADERS is defined, then autoconf has verified that the ctype
172 macros don't need to be guarded with references to isascii. ... 171 macros don't need to be guarded with references to isascii. ...
173 Defining isascii to 1 should let any compiler worth its salt 172 Defining isascii to 1 should let any compiler worth its salt
174 eliminate the && through constant folding." */ 173 eliminate the && through constant folding." */
175 174
176 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 175 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
177 #define ISASCII(c) 1 176 #define ISASCII(c) 1
178 #else 177 #else
179 #define ISASCII(c) isascii(c) 178 #define ISASCII(c) isascii(c)
206 #endif 205 #endif
207 206
208 /* We remove any previous definition of `SIGN_EXTEND_CHAR', 207 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
209 since ours (we hope) works properly with all combinations of 208 since ours (we hope) works properly with all combinations of
210 machines, compilers, `char' and `unsigned char' argument types. 209 machines, compilers, `char' and `unsigned char' argument types.
211 (Per Bothner suggested the basic approach.) */ 210 (Per Bothner suggested the basic approach.) */
212 #undef SIGN_EXTEND_CHAR 211 #undef SIGN_EXTEND_CHAR
213 #if __STDC__ 212 #if __STDC__
214 #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 213 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
215 #else /* not __STDC__ */ 214 #else /* not __STDC__ */
216 /* As in Harbison and Steele. */ 215 /* As in Harbison and Steele. */
244 #else /* not __GNUC__ */ 243 #else /* not __GNUC__ */
245 #if HAVE_ALLOCA_H 244 #if HAVE_ALLOCA_H
246 #include <alloca.h> 245 #include <alloca.h>
247 #else /* not __GNUC__ or HAVE_ALLOCA_H */ 246 #else /* not __GNUC__ or HAVE_ALLOCA_H */
248 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */ 247 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */
249 #ifndef _AIX /* Already did AIX, up at the top. */ 248 #ifndef _AIX /* Already did AIX, up at the top. */
250 char *alloca (); 249 char *alloca ();
251 #endif /* not _AIX */ 250 #endif /* not _AIX */
252 #endif 251 #endif
253 #endif /* not HAVE_ALLOCA_H */ 252 #endif /* not HAVE_ALLOCA_H */
254 #endif /* not __GNUC__ */ 253 #endif /* not __GNUC__ */
291 290
292 #define REGEX_ALLOCATE_STACK alloca 291 #define REGEX_ALLOCATE_STACK alloca
293 292
294 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 293 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
295 REGEX_REALLOCATE (source, osize, nsize) 294 REGEX_REALLOCATE (source, osize, nsize)
296 /* No need to explicitly free anything. */ 295 /* No need to explicitly free anything. */
297 #define REGEX_FREE_STACK(arg) 296 #define REGEX_FREE_STACK(arg)
298 297
299 #endif /* not REGEX_MALLOC */ 298 #endif /* not REGEX_MALLOC */
300 #endif /* not using relocating allocator */ 299 #endif /* not using relocating allocator */
301 300
302 301
303 /* True if `size1' is non-NULL and PTR is pointing anywhere inside 302 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
304 `string1' or just past its end. This works if PTR is NULL, which is 303 `string1' or just past its end. This works if PTR is NULL, which is
305 a good thing. */ 304 a good thing. */
306 #define FIRST_STRING_P(ptr) \ 305 #define FIRST_STRING_P(ptr) \
307 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 306 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
308 307
309 /* (Re)Allocate N items of type T using malloc, or fail. */ 308 /* (Re)Allocate N items of type T using malloc, or fail. */
310 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 309 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
311 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 310 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
312 #define RETALLOC_IF(addr, n, t) \ 311 #define RETALLOC_IF(addr, n, t) \
313 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 312 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
314 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 313 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
315 314
316 #define BYTEWIDTH 8 /* In bits. */ 315 #define BYTEWIDTH 8 /* In bits. */
317 316
318 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 317 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
319 318
320 #undef MAX 319 #undef MAX
321 #undef MIN 320 #undef MIN
327 #define true 1 326 #define true 1
328 327
329 static int re_match_2_internal (); 328 static int re_match_2_internal ();
330 329
331 /* These are the command codes that appear in compiled regular 330 /* These are the command codes that appear in compiled regular
332 expressions. Some opcodes are followed by argument bytes. A 331 expressions. Some opcodes are followed by argument bytes. A
333 command code can specify any interpretation whatsoever for its 332 command code can specify any interpretation whatsoever for its
334 arguments. Zero bytes may appear in the compiled regular expression. */ 333 arguments. Zero bytes may appear in the compiled regular expression. */
335 334
336 typedef enum 335 typedef enum
337 { 336 {
338 no_op = 0, 337 no_op = 0,
339 338
340 /* Succeed right away--no more backtracking. */ 339 /* Succeed right away--no more backtracking. */
341 succeed, 340 succeed,
342 341
343 /* Followed by one byte giving n, then by n literal bytes. */ 342 /* Followed by one byte giving n, then by n literal bytes. */
344 exactn, 343 exactn,
345 344
346 /* Matches any (more or less) character. */ 345 /* Matches any (more or less) character. */
347 anychar, 346 anychar,
348 347
349 /* Matches any one char belonging to specified set. First 348 /* Matches any one char belonging to specified set. First
350 following byte is number of bitmap bytes. Then come bytes 349 following byte is number of bitmap bytes. Then come bytes
351 for a bitmap saying which chars are in. Bits in each byte 350 for a bitmap saying which chars are in. Bits in each byte
352 are ordered low-bit-first. A character is in the set if its 351 are ordered low-bit-first. A character is in the set if its
353 bit is 1. A character too large to have a bit in the map is 352 bit is 1. A character too large to have a bit in the map is
354 automatically not in the set. */ 353 automatically not in the set. */
355 charset, 354 charset,
356 355
357 /* Same parameters as charset, but match any character that is 356 /* Same parameters as charset, but match any character that is
358 not one of those specified. */ 357 not one of those specified. */
359 charset_not, 358 charset_not,
360 359
361 /* Start remembering the text that is matched, for storing in a 360 /* Start remembering the text that is matched, for storing in a
362 register. Followed by one byte with the register number, in 361 register. Followed by one byte with the register number, in
363 the range 0 to one less than the pattern buffer's re_nsub 362 the range 0 to one less than the pattern buffer's re_nsub
364 field. Then followed by one byte with the number of groups 363 field. Then followed by one byte with the number of groups
365 inner to this one. (This last has to be part of the 364 inner to this one. (This last has to be part of the
366 start_memory only because we need it in the on_failure_jump 365 start_memory only because we need it in the on_failure_jump
367 of re_match_2.) */ 366 of re_match_2.) */
368 start_memory, 367 start_memory,
369 368
370 /* Stop remembering the text that is matched and store it in a 369 /* Stop remembering the text that is matched and store it in a
371 memory register. Followed by one byte with the register 370 memory register. Followed by one byte with the register
372 number, in the range 0 to one less than `re_nsub' in the 371 number, in the range 0 to one less than `re_nsub' in the
373 pattern buffer, and one byte with the number of inner groups, 372 pattern buffer, and one byte with the number of inner groups,
374 just like `start_memory'. (We need the number of inner 373 just like `start_memory'. (We need the number of inner
375 groups here because we don't have any easy way of finding the 374 groups here because we don't have any easy way of finding the
376 corresponding start_memory when we're at a stop_memory.) */ 375 corresponding start_memory when we're at a stop_memory.) */
377 stop_memory, 376 stop_memory,
378 377
379 /* Match a duplicate of something remembered. Followed by one 378 /* Match a duplicate of something remembered. Followed by one
380 byte containing the register number. */ 379 byte containing the register number. */
381 duplicate, 380 duplicate,
382 381
383 /* Fail unless at beginning of line. */ 382 /* Fail unless at beginning of line. */
384 begline, 383 begline,
385 384
386 /* Fail unless at end of line. */ 385 /* Fail unless at end of line. */
387 endline, 386 endline,
388 387
389 /* Succeeds if at beginning of buffer (if emacs) or at beginning 388 /* Succeeds if at beginning of buffer (if emacs) or at beginning
390 of string to be matched (if not). */ 389 of string to be matched (if not). */
391 begbuf, 390 begbuf,
392 391
393 /* Analogously, for end of buffer/string. */ 392 /* Analogously, for end of buffer/string. */
394 endbuf, 393 endbuf,
395 394
396 /* Followed by two byte relative address to which to jump. */ 395 /* Followed by two byte relative address to which to jump. */
397 jump, 396 jump,
398 397
399 /* Same as jump, but marks the end of an alternative. */ 398 /* Same as jump, but marks the end of an alternative. */
400 jump_past_alt, 399 jump_past_alt,
401 400
402 /* Followed by two-byte relative address of place to resume at 401 /* Followed by two-byte relative address of place to resume at
403 in case of failure. */ 402 in case of failure. */
404 on_failure_jump, 403 on_failure_jump,
405 404
406 /* Like on_failure_jump, but pushes a placeholder instead of the 405 /* Like on_failure_jump, but pushes a placeholder instead of the
407 current string position when executed. */ 406 current string position when executed. */
408 on_failure_keep_string_jump, 407 on_failure_keep_string_jump,
409 408
410 /* Throw away latest failure point and then jump to following 409 /* Throw away latest failure point and then jump to following
411 two-byte relative address. */ 410 two-byte relative address. */
412 pop_failure_jump, 411 pop_failure_jump,
413 412
414 /* Change to pop_failure_jump if know won't have to backtrack to 413 /* Change to pop_failure_jump if know won't have to backtrack to
415 match; otherwise change to jump. This is used to jump 414 match; otherwise change to jump. This is used to jump
416 back to the beginning of a repeat. If what follows this jump 415 back to the beginning of a repeat. If what follows this jump
417 clearly won't match what the repeat does, such that we can be 416 clearly won't match what the repeat does, such that we can be
418 sure that there is no use backtracking out of repetitions 417 sure that there is no use backtracking out of repetitions
419 already matched, then we change it to a pop_failure_jump. 418 already matched, then we change it to a pop_failure_jump.
420 Followed by two-byte address. */ 419 Followed by two-byte address. */
421 maybe_pop_jump, 420 maybe_pop_jump,
422 421
423 /* Jump to following two-byte address, and push a dummy failure 422 /* Jump to following two-byte address, and push a dummy failure
424 point. This failure point will be thrown away if an attempt 423 point. This failure point will be thrown away if an attempt
425 is made to use it for a failure. A `+' construct makes this 424 is made to use it for a failure. A `+' construct makes this
426 before the first repeat. Also used as an intermediary kind 425 before the first repeat. Also used as an intermediary kind
427 of jump when compiling an alternative. */ 426 of jump when compiling an alternative. */
428 dummy_failure_jump, 427 dummy_failure_jump,
429 428
430 /* Push a dummy failure point and continue. Used at the end of 429 /* Push a dummy failure point and continue. Used at the end of
431 alternatives. */ 430 alternatives. */
432 push_dummy_failure, 431 push_dummy_failure,
433 432
434 /* Followed by two-byte relative address and two-byte number n. 433 /* Followed by two-byte relative address and two-byte number n.
435 After matching N times, jump to the address upon failure. */ 434 After matching N times, jump to the address upon failure. */
436 succeed_n, 435 succeed_n,
437 436
438 /* Followed by two-byte relative address, and two-byte number n. 437 /* Followed by two-byte relative address, and two-byte number n.
439 Jump to the address N times, then fail. */ 438 Jump to the address N times, then fail. */
440 jump_n, 439 jump_n,
441 440
442 /* Set the following two-byte relative address to the 441 /* Set the following two-byte relative address to the
443 subsequent two-byte number. The address *includes* the two 442 subsequent two-byte number. The address *includes* the two
444 bytes of number. */ 443 bytes of number. */
445 set_number_at, 444 set_number_at,
446 445
447 wordchar, /* Matches any word-constituent character. */ 446 wordchar, /* Matches any word-constituent character. */
448 notwordchar, /* Matches any char that is not a word-constituent. */ 447 notwordchar, /* Matches any char that is not a word-constituent. */
449 448
450 wordbeg, /* Succeeds if at word beginning. */ 449 wordbeg, /* Succeeds if at word beginning. */
451 wordend, /* Succeeds if at word end. */ 450 wordend, /* Succeeds if at word end. */
452 451
453 wordbound, /* Succeeds if at a word boundary. */ 452 wordbound, /* Succeeds if at a word boundary. */
454 notwordbound /* Succeeds if not at a word boundary. */ 453 notwordbound /* Succeeds if not at a word boundary. */
455 454
456 #ifdef emacs 455 #ifdef emacs
457 ,before_dot, /* Succeeds if before point. */ 456 ,before_dot, /* Succeeds if before point. */
458 at_dot, /* Succeeds if at point. */ 457 at_dot, /* Succeeds if at point. */
459 after_dot, /* Succeeds if after point. */ 458 after_dot, /* Succeeds if after point. */
460 459
461 /* Matches any character whose syntax is specified. Followed by 460 /* Matches any character whose syntax is specified. Followed by
462 a byte which contains a syntax code, e.g., Sword. */ 461 a byte which contains a syntax code, e.g., Sword. */
463 syntaxspec, 462 syntaxspec,
464 463
465 /* Matches any character whose syntax is not that specified. */ 464 /* Matches any character whose syntax is not that specified. */
466 notsyntaxspec 465 notsyntaxspec
467 #endif /* emacs */ 466 #endif /* emacs */
505 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 504 int temp = SIGN_EXTEND_CHAR (*(source + 1));
506 *dest = *source & 0377; 505 *dest = *source & 0377;
507 *dest += temp << 8; 506 *dest += temp << 8;
508 } 507 }
509 508
510 #ifndef EXTRACT_MACROS /* To debug the macros. */ 509 #ifndef EXTRACT_MACROS /* To debug the macros. */
511 #undef EXTRACT_NUMBER 510 #undef EXTRACT_NUMBER
512 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 511 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
513 #endif /* not EXTRACT_MACROS */ 512 #endif /* not EXTRACT_MACROS */
514 513
515 #endif /* DEBUG */ 514 #endif /* DEBUG */
518 SOURCE must be an lvalue. */ 517 SOURCE must be an lvalue. */
519 518
520 #define EXTRACT_NUMBER_AND_INCR(destination, source) \ 519 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
521 do { \ 520 do { \
522 EXTRACT_NUMBER (destination, source); \ 521 EXTRACT_NUMBER (destination, source); \
523 (source) += 2; \ 522 (source) += 2; \
524 } while (0) 523 } while (0)
525 524
526 #ifdef DEBUG 525 #ifdef DEBUG
527 static void 526 static void
528 extract_number_and_incr (destination, source) 527 extract_number_and_incr (destination, source)
543 542
544 /* If DEBUG is defined, Regex prints many voluminous messages about what 543 /* If DEBUG is defined, Regex prints many voluminous messages about what
545 it is doing (if the variable `debug' is nonzero). If linked with the 544 it is doing (if the variable `debug' is nonzero). If linked with the
546 main program in `iregex.c', you can enter patterns and strings 545 main program in `iregex.c', you can enter patterns and strings
547 interactively. And if linked with the main program in `main.c' and 546 interactively. And if linked with the main program in `main.c' and
548 the other test files, you can run the already-written tests. */ 547 the other test files, you can run the already-written tests. */
549 548
550 #ifdef DEBUG 549 #ifdef DEBUG
551 550
552 /* We use standard I/O for debugging. */ 551 /* We use standard I/O for debugging. */
553 #include <stdio.h> 552 #include <stdio.h>
560 #define DEBUG_STATEMENT(e) e 559 #define DEBUG_STATEMENT(e) e
561 #define DEBUG_PRINT1(x) if (debug) printf (x) 560 #define DEBUG_PRINT1(x) if (debug) printf (x)
562 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 561 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
563 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 562 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
564 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 563 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
565 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 564 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
566 if (debug) print_partial_compiled_pattern (s, e) 565 if (debug) print_partial_compiled_pattern (s, e)
567 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 566 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
568 if (debug) print_double_string (w, s1, sz1, s2, sz2) 567 if (debug) print_double_string (w, s1, sz1, s2, sz2)
569 568
570 569
580 while (i < (1 << BYTEWIDTH)) 579 while (i < (1 << BYTEWIDTH))
581 { 580 {
582 if (fastmap[i++]) 581 if (fastmap[i++])
583 { 582 {
584 was_a_range = 0; 583 was_a_range = 0;
585 putchar (i - 1); 584 putchar (i - 1);
586 while (i < (1 << BYTEWIDTH) && fastmap[i]) 585 while (i < (1 << BYTEWIDTH) && fastmap[i])
587 { 586 {
588 was_a_range = 1; 587 was_a_range = 1;
589 i++; 588 i++;
590 } 589 }
591 if (was_a_range) 590 if (was_a_range)
592 { 591 {
593 printf ("-"); 592 printf ("-");
594 putchar (i - 1); 593 putchar (i - 1);
595 } 594 }
596 } 595 }
597 } 596 }
598 putchar ('\n'); 597 putchar ('\n');
599 } 598 }
600 599
601 600
622 { 621 {
623 printf ("%d:\t", p - start); 622 printf ("%d:\t", p - start);
624 623
625 switch ((re_opcode_t) *p++) 624 switch ((re_opcode_t) *p++)
626 { 625 {
627 case no_op: 626 case no_op:
628 printf ("/no_op"); 627 printf ("/no_op");
629 break; 628 break;
630 629
631 case exactn: 630 case exactn:
632 mcnt = *p++; 631 mcnt = *p++;
633 printf ("/exactn/%d", mcnt); 632 printf ("/exactn/%d", mcnt);
634 do 633 do
635 { 634 {
636 putchar ('/'); 635 putchar ('/');
637 putchar (*p++); 636 putchar (*p++);
638 } 637 }
639 while (--mcnt); 638 while (--mcnt);
640 break; 639 break;
641 640
642 case start_memory: 641 case start_memory:
643 mcnt = *p++; 642 mcnt = *p++;
644 printf ("/start_memory/%d/%d", mcnt, *p++); 643 printf ("/start_memory/%d/%d", mcnt, *p++);
645 break; 644 break;
646 645
647 case stop_memory: 646 case stop_memory:
648 mcnt = *p++; 647 mcnt = *p++;
649 printf ("/stop_memory/%d/%d", mcnt, *p++); 648 printf ("/stop_memory/%d/%d", mcnt, *p++);
650 break; 649 break;
651 650
652 case duplicate: 651 case duplicate:
653 printf ("/duplicate/%d", *p++); 652 printf ("/duplicate/%d", *p++);
654 break; 653 break;
655 654
656 case anychar: 655 case anychar:
657 printf ("/anychar"); 656 printf ("/anychar");
658 break; 657 break;
659 658
660 case charset: 659 case charset:
661 case charset_not: 660 case charset_not:
662 { 661 {
663 register int c, last = -100; 662 register int c, last = -100;
664 register int in_range = 0; 663 register int in_range = 0;
665 664
666 printf ("/charset [%s", 665 printf ("/charset [%s",
667 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 666 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
668 667
669 assert (p + *p < pend); 668 assert (p + *p < pend);
670 669
671 for (c = 0; c < 256; c++) 670 for (c = 0; c < 256; c++)
672 if (c / 8 < *p 671 if (c / 8 < *p
673 && (p[1 + (c/8)] & (1 << (c % 8)))) 672 && (p[1 + (c/8)] & (1 << (c % 8))))
674 { 673 {
675 /* Are we starting a range? */ 674 /* Are we starting a range? */
676 if (last + 1 == c && ! in_range) 675 if (last + 1 == c && ! in_range)
678 putchar ('-'); 677 putchar ('-');
679 in_range = 1; 678 in_range = 1;
680 } 679 }
681 /* Have we broken a range? */ 680 /* Have we broken a range? */
682 else if (last + 1 != c && in_range) 681 else if (last + 1 != c && in_range)
683 { 682 {
684 putchar (last); 683 putchar (last);
685 in_range = 0; 684 in_range = 0;
686 } 685 }
687 686
688 if (! in_range) 687 if (! in_range)
689 putchar (c); 688 putchar (c);
690 689
691 last = c; 690 last = c;
692 } 691 }
693 692
694 if (in_range) 693 if (in_range)
695 putchar (last); 694 putchar (last);
696 695
697 putchar (']'); 696 putchar (']');
700 } 699 }
701 break; 700 break;
702 701
703 case begline: 702 case begline:
704 printf ("/begline"); 703 printf ("/begline");
705 break; 704 break;
706 705
707 case endline: 706 case endline:
708 printf ("/endline"); 707 printf ("/endline");
709 break; 708 break;
710 709
711 case on_failure_jump: 710 case on_failure_jump:
712 extract_number_and_incr (&mcnt, &p); 711 extract_number_and_incr (&mcnt, &p);
713 printf ("/on_failure_jump to %d", p + mcnt - start); 712 printf ("/on_failure_jump to %d", p + mcnt - start);
714 break; 713 break;
715 714
716 case on_failure_keep_string_jump: 715 case on_failure_keep_string_jump:
717 extract_number_and_incr (&mcnt, &p); 716 extract_number_and_incr (&mcnt, &p);
718 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 717 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
719 break; 718 break;
720 719
721 case dummy_failure_jump: 720 case dummy_failure_jump:
722 extract_number_and_incr (&mcnt, &p); 721 extract_number_and_incr (&mcnt, &p);
723 printf ("/dummy_failure_jump to %d", p + mcnt - start); 722 printf ("/dummy_failure_jump to %d", p + mcnt - start);
724 break; 723 break;
725 724
726 case push_dummy_failure: 725 case push_dummy_failure:
727 printf ("/push_dummy_failure"); 726 printf ("/push_dummy_failure");
728 break; 727 break;
729 728
730 case maybe_pop_jump: 729 case maybe_pop_jump:
731 extract_number_and_incr (&mcnt, &p);
732 printf ("/maybe_pop_jump to %d", p + mcnt - start);
733 break;
734
735 case pop_failure_jump:
736 extract_number_and_incr (&mcnt, &p); 730 extract_number_and_incr (&mcnt, &p);
737 printf ("/pop_failure_jump to %d", p + mcnt - start); 731 printf ("/maybe_pop_jump to %d", p + mcnt - start);
738 break; 732 break;
739 733
740 case jump_past_alt: 734 case pop_failure_jump:
741 extract_number_and_incr (&mcnt, &p); 735 extract_number_and_incr (&mcnt, &p);
742 printf ("/jump_past_alt to %d", p + mcnt - start); 736 printf ("/pop_failure_jump to %d", p + mcnt - start);
743 break; 737 break;
744 738
745 case jump: 739 case jump_past_alt:
746 extract_number_and_incr (&mcnt, &p); 740 extract_number_and_incr (&mcnt, &p);
747 printf ("/jump to %d", p + mcnt - start); 741 printf ("/jump_past_alt to %d", p + mcnt - start);
748 break; 742 break;
749 743
750 case succeed_n: 744 case jump:
751 extract_number_and_incr (&mcnt, &p); 745 extract_number_and_incr (&mcnt, &p);
752 extract_number_and_incr (&mcnt2, &p); 746 printf ("/jump to %d", p + mcnt - start);
747 break;
748
749 case succeed_n:
750 extract_number_and_incr (&mcnt, &p);
751 extract_number_and_incr (&mcnt2, &p);
753 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); 752 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
754 break; 753 break;
755 754
756 case jump_n: 755 case jump_n:
757 extract_number_and_incr (&mcnt, &p); 756 extract_number_and_incr (&mcnt, &p);
758 extract_number_and_incr (&mcnt2, &p); 757 extract_number_and_incr (&mcnt2, &p);
759 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); 758 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
760 break; 759 break;
761 760
762 case set_number_at: 761 case set_number_at:
763 extract_number_and_incr (&mcnt, &p); 762 extract_number_and_incr (&mcnt, &p);
764 extract_number_and_incr (&mcnt2, &p); 763 extract_number_and_incr (&mcnt2, &p);
765 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); 764 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
766 break; 765 break;
767 766
768 case wordbound: 767 case wordbound:
769 printf ("/wordbound"); 768 printf ("/wordbound");
770 break; 769 break;
771 770
772 case notwordbound: 771 case notwordbound:
773 printf ("/notwordbound"); 772 printf ("/notwordbound");
774 break; 773 break;
775 774
776 case wordbeg: 775 case wordbeg:
777 printf ("/wordbeg"); 776 printf ("/wordbeg");
778 break; 777 break;
779 778
781 printf ("/wordend"); 780 printf ("/wordend");
782 781
783 #ifdef emacs 782 #ifdef emacs
784 case before_dot: 783 case before_dot:
785 printf ("/before_dot"); 784 printf ("/before_dot");
786 break; 785 break;
787 786
788 case at_dot: 787 case at_dot:
789 printf ("/at_dot"); 788 printf ("/at_dot");
790 break; 789 break;
791 790
792 case after_dot: 791 case after_dot:
793 printf ("/after_dot"); 792 printf ("/after_dot");
794 break; 793 break;
795 794
796 case syntaxspec: 795 case syntaxspec:
797 printf ("/syntaxspec"); 796 printf ("/syntaxspec");
798 mcnt = *p++; 797 mcnt = *p++;
799 printf ("/%d", mcnt); 798 printf ("/%d", mcnt);
800 break; 799 break;
801 800
802 case notsyntaxspec: 801 case notsyntaxspec:
803 printf ("/notsyntaxspec"); 802 printf ("/notsyntaxspec");
804 mcnt = *p++; 803 mcnt = *p++;
805 printf ("/%d", mcnt); 804 printf ("/%d", mcnt);
806 break; 805 break;
807 #endif /* emacs */ 806 #endif /* emacs */
808 807
809 case wordchar: 808 case wordchar:
810 printf ("/wordchar"); 809 printf ("/wordchar");
811 break; 810 break;
812 811
813 case notwordchar: 812 case notwordchar:
814 printf ("/notwordchar"); 813 printf ("/notwordchar");
815 break; 814 break;
816 815
817 case begbuf: 816 case begbuf:
818 printf ("/begbuf"); 817 printf ("/begbuf");
819 break; 818 break;
820 819
821 case endbuf: 820 case endbuf:
822 printf ("/endbuf"); 821 printf ("/endbuf");
823 break; 822 break;
824 823
825 default: 824 default:
826 printf ("?%d", *(p-1)); 825 printf ("?%d", *(p-1));
827 } 826 }
828 827
829 putchar ('\n'); 828 putchar ('\n');
830 } 829 }
831 830
873 if (where == NULL) 872 if (where == NULL)
874 printf ("(null)"); 873 printf ("(null)");
875 else 874 else
876 { 875 {
877 if (FIRST_STRING_P (where)) 876 if (FIRST_STRING_P (where))
878 { 877 {
879 for (this_char = where - string1; this_char < size1; this_char++) 878 for (this_char = where - string1; this_char < size1; this_char++)
880 putchar (string1[this_char]); 879 putchar (string1[this_char]);
881 880
882 where = string2; 881 where = string2;
883 } 882 }
884 883
885 for (this_char = where - string2; this_char < size2; this_char++) 884 for (this_char = where - string2; this_char < size2; this_char++)
886 putchar (string2[this_char]); 885 putchar (string2[this_char]);
887 } 886 }
888 } 887 }
889 888
890 #else /* not DEBUG */ 889 #else /* not DEBUG */
891 890
913 /* Specify the precise syntax of regexps for compilation. This provides 912 /* Specify the precise syntax of regexps for compilation. This provides
914 for compatibility for various utilities which historically have 913 for compatibility for various utilities which historically have
915 different, incompatible syntaxes. 914 different, incompatible syntaxes.
916 915
917 The argument SYNTAX is a bit mask comprised of the various bits 916 The argument SYNTAX is a bit mask comprised of the various bits
918 defined in regex.h. We return the old syntax. */ 917 defined in regex.h. We return the old syntax. */
919 918
920 reg_syntax_t 919 reg_syntax_t
921 re_set_syntax (syntax) 920 re_set_syntax (syntax)
922 reg_syntax_t syntax; 921 reg_syntax_t syntax;
923 { 922 {
926 re_syntax_options = syntax; 925 re_syntax_options = syntax;
927 return ret; 926 return ret;
928 } 927 }
929 928
930 /* This table gives an error message for each of the error codes listed 929 /* This table gives an error message for each of the error codes listed
931 in regex.h. Obviously the order here has to be same as there. 930 in regex.h. Obviously the order here has to be same as there.
932 POSIX doesn't require that we do anything for REG_NOERROR, 931 POSIX doesn't require that we do anything for REG_NOERROR,
933 but why not be nice? */ 932 but why not be nice? */
934 933
935 static const char *re_error_msgid[] = 934 static const char *re_error_msgid[] =
936 { 935 {
937 gettext_noop ("Success"), /* REG_NOERROR */ 936 gettext_noop ("Success"), /* REG_NOERROR */
938 gettext_noop ("No match"), /* REG_NOMATCH */ 937 gettext_noop ("No match"), /* REG_NOMATCH */
951 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 950 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
952 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 951 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
953 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 952 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
954 }; 953 };
955 954
956 /* Avoiding alloca during matching, to placate r_alloc. */ 955 /* Avoiding alloca during matching, to placate r_alloc. */
957 956
958 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 957 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
959 searching and matching functions should not call alloca. On some 958 searching and matching functions should not call alloca. On some
960 systems, alloca is implemented in terms of malloc, and if we're 959 systems, alloca is implemented in terms of malloc, and if we're
961 using the relocating allocator routines, then malloc could cause a 960 using the relocating allocator routines, then malloc could cause a
983 982
984 /* The match routines may not allocate if (1) they would do it with malloc 983 /* The match routines may not allocate if (1) they would do it with malloc
985 and (2) it's not safe for them to use malloc. 984 and (2) it's not safe for them to use malloc.
986 Note that if REL_ALLOC is defined, matching would not use malloc for the 985 Note that if REL_ALLOC is defined, matching would not use malloc for the
987 failure stack, but we would still use it for the register vectors; 986 failure stack, but we would still use it for the register vectors;
988 so REL_ALLOC should not affect this. */ 987 so REL_ALLOC should not affect this. */
989 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) 988 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
990 #undef MATCH_MAY_ALLOCATE 989 #undef MATCH_MAY_ALLOCATE
991 #endif 990 #endif
992 991
993 992
1004 #endif 1003 #endif
1005 1004
1006 /* Roughly the maximum number of failure points on the stack. Would be 1005 /* Roughly the maximum number of failure points on the stack. Would be
1007 exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 1006 exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1008 This is a variable only so users of regex can assign to it; we never 1007 This is a variable only so users of regex can assign to it; we never
1009 change it ourselves. */ 1008 change it ourselves. */
1010 #if defined (MATCH_MAY_ALLOCATE) 1009 #if defined (MATCH_MAY_ALLOCATE)
1011 /* 4400 was enough to cause a crash on Alpha OSF/1, 1010 /* 4400 was enough to cause a crash on Alpha OSF/1,
1012 whose default stack limit is 2mb. */ 1011 whose default stack limit is 2mb. */
1013 int re_max_failures = 20000; 1012 int re_max_failures = 20000;
1014 #else 1013 #else
1065 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 1064 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1066 1065
1067 Return 1 if succeeds, and 0 if either ran out of memory 1066 Return 1 if succeeds, and 0 if either ran out of memory
1068 allocating space for it or it was already too large. 1067 allocating space for it or it was already too large.
1069 1068
1070 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1069 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1071 1070
1072 #define DOUBLE_FAIL_STACK(fail_stack) \ 1071 #define DOUBLE_FAIL_STACK(fail_stack) \
1073 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ 1072 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
1074 ? 0 \ 1073 ? 0 \
1075 : ((fail_stack).stack = (fail_stack_elt_t *) \ 1074 : ((fail_stack).stack = (fail_stack_elt_t *) \
1076 REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1075 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1077 (fail_stack).size * sizeof (fail_stack_elt_t), \ 1076 (fail_stack).size * sizeof (fail_stack_elt_t), \
1078 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ 1077 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
1079 \ 1078 \
1080 (fail_stack).stack == NULL \ 1079 (fail_stack).stack == NULL \
1081 ? 0 \ 1080 ? 0 \
1082 : ((fail_stack).size <<= 1, \ 1081 : ((fail_stack).size <<= 1, \
1083 1))) 1082 1)))
1084 1083
1085 1084
1086 /* Push pointer POINTER on FAIL_STACK. 1085 /* Push pointer POINTER on FAIL_STACK.
1087 Return 1 if was able to do so and 0 if ran out of memory allocating 1086 Return 1 if was able to do so and 0 if ran out of memory allocating
1088 space to do so. */ 1087 space to do so. */
1093 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1092 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1094 1)) 1093 1))
1095 1094
1096 /* Push a pointer value onto the failure stack. 1095 /* Push a pointer value onto the failure stack.
1097 Assumes the variable `fail_stack'. Probably should only 1096 Assumes the variable `fail_stack'. Probably should only
1098 be called from within `PUSH_FAILURE_POINT'. */ 1097 be called from within `PUSH_FAILURE_POINT'. */
1099 #define PUSH_FAILURE_POINTER(item) \ 1098 #define PUSH_FAILURE_POINTER(item) \
1100 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 1099 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1101 1100
1102 /* This pushes an integer-valued item onto the failure stack. 1101 /* This pushes an integer-valued item onto the failure stack.
1103 Assumes the variable `fail_stack'. Probably should only 1102 Assumes the variable `fail_stack'. Probably should only
1104 be called from within `PUSH_FAILURE_POINT'. */ 1103 be called from within `PUSH_FAILURE_POINT'. */
1105 #define PUSH_FAILURE_INT(item) \ 1104 #define PUSH_FAILURE_INT(item) \
1106 fail_stack.stack[fail_stack.avail++].integer = (item) 1105 fail_stack.stack[fail_stack.avail++].integer = (item)
1107 1106
1108 /* Push a fail_stack_elt_t value onto the failure stack. 1107 /* Push a fail_stack_elt_t value onto the failure stack.
1109 Assumes the variable `fail_stack'. Probably should only 1108 Assumes the variable `fail_stack'. Probably should only
1110 be called from within `PUSH_FAILURE_POINT'. */ 1109 be called from within `PUSH_FAILURE_POINT'. */
1111 #define PUSH_FAILURE_ELT(item) \ 1110 #define PUSH_FAILURE_ELT(item) \
1112 fail_stack.stack[fail_stack.avail++] = (item) 1111 fail_stack.stack[fail_stack.avail++] = (item)
1113 1112
1114 /* These three POP... operations complement the three PUSH... operations. 1113 /* These three POP... operations complement the three PUSH... operations.
1115 All assume that `fail_stack' is nonempty. */ 1114 All assume that `fail_stack' is nonempty. */
1140 do { \ 1139 do { \
1141 char *destination; \ 1140 char *destination; \
1142 /* Must be int, so when we don't save any registers, the arithmetic \ 1141 /* Must be int, so when we don't save any registers, the arithmetic \
1143 of 0 + -1 isn't done as unsigned. */ \ 1142 of 0 + -1 isn't done as unsigned. */ \
1144 int this_reg; \ 1143 int this_reg; \
1145 \ 1144 \
1146 DEBUG_STATEMENT (failure_id++); \ 1145 DEBUG_STATEMENT (failure_id++); \
1147 DEBUG_STATEMENT (nfailure_points_pushed++); \ 1146 DEBUG_STATEMENT (nfailure_points_pushed++); \
1148 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1147 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1149 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 1148 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1150 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1149 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1151 \ 1150 \
1152 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ 1151 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1153 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 1152 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1154 \ 1153 \
1155 /* Ensure we have enough space allocated for what we will push. */ \ 1154 /* Ensure we have enough space allocated for what we will push. */ \
1156 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 1155 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1157 { \ 1156 { \
1158 if (!DOUBLE_FAIL_STACK (fail_stack)) \ 1157 if (!DOUBLE_FAIL_STACK (fail_stack)) \
1159 return failure_code; \ 1158 return failure_code; \
1160 \ 1159 \
1161 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 1160 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1162 (fail_stack).size); \ 1161 (fail_stack).size); \
1163 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 1162 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1164 } \ 1163 } \
1165 \ 1164 \
1166 /* Push the info, starting with the registers. */ \ 1165 /* Push the info, starting with the registers. */ \
1167 DEBUG_PRINT1 ("\n"); \ 1166 DEBUG_PRINT1 ("\n"); \
1168 \ 1167 \
1200 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ 1199 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1201 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 1200 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1202 PUSH_FAILURE_POINTER (pattern_place); \ 1201 PUSH_FAILURE_POINTER (pattern_place); \
1203 \ 1202 \
1204 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ 1203 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1205 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 1204 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1206 size2); \ 1205 size2); \
1207 DEBUG_PRINT1 ("'\n"); \ 1206 DEBUG_PRINT1 ("'\n"); \
1208 PUSH_FAILURE_POINTER (string_place); \ 1207 PUSH_FAILURE_POINTER (string_place); \
1209 \ 1208 \
1210 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 1209 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1247 LOW_REG, HIGH_REG -- the highest and lowest active registers. 1246 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1248 REGSTART, REGEND -- arrays of string positions. 1247 REGSTART, REGEND -- arrays of string positions.
1249 REG_INFO -- array of information about each subexpression. 1248 REG_INFO -- array of information about each subexpression.
1250 1249
1251 Also assumes the variables `fail_stack' and (if debugging), `bufp', 1250 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1252 `pend', `string1', `size1', `string2', and `size2'. */ 1251 `pend', `string1', `size1', `string2', and `size2'. */
1253 1252
1254 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1253 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1255 { \ 1254 { \
1256 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ 1255 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1257 int this_reg; \ 1256 int this_reg; \
1260 assert (!FAIL_STACK_EMPTY ()); \ 1259 assert (!FAIL_STACK_EMPTY ()); \
1261 \ 1260 \
1262 /* Remove failure points and point to how many regs pushed. */ \ 1261 /* Remove failure points and point to how many regs pushed. */ \
1263 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 1262 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1264 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 1263 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1265 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 1264 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1266 \ 1265 \
1267 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 1266 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1268 \ 1267 \
1269 DEBUG_POP (&failure_id); \ 1268 DEBUG_POP (&failure_id); \
1270 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 1269 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1292 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ 1291 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1293 \ 1292 \
1294 if (1) \ 1293 if (1) \
1295 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 1294 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1296 { \ 1295 { \
1297 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ 1296 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1298 \ 1297 \
1299 reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1298 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1300 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ 1299 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1301 \ 1300 \
1302 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1301 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1303 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ 1302 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1304 \ 1303 \
1305 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1304 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1306 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 1305 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1307 } \ 1306 } \
1308 else \ 1307 else \
1309 { \ 1308 { \
1310 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 1309 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1311 { \ 1310 { \
1337 { 1336 {
1338 fail_stack_elt_t word; 1337 fail_stack_elt_t word;
1339 struct 1338 struct
1340 { 1339 {
1341 /* This field is one if this group can match the empty string, 1340 /* This field is one if this group can match the empty string,
1342 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 1341 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1343 #define MATCH_NULL_UNSET_VALUE 3 1342 #define MATCH_NULL_UNSET_VALUE 3
1344 unsigned match_null_string_p : 2; 1343 unsigned match_null_string_p : 2;
1345 unsigned is_active : 1; 1344 unsigned is_active : 1;
1346 unsigned matched_something : 1; 1345 unsigned matched_something : 1;
1347 unsigned ever_matched_something : 1; 1346 unsigned ever_matched_something : 1;
1398 if (translate) c = (unsigned char) translate[c]; \ 1397 if (translate) c = (unsigned char) translate[c]; \
1399 } while (0) 1398 } while (0)
1400 #endif 1399 #endif
1401 1400
1402 /* Fetch the next character in the uncompiled pattern, with no 1401 /* Fetch the next character in the uncompiled pattern, with no
1403 translation. */ 1402 translation. */
1404 #define PATFETCH_RAW(c) \ 1403 #define PATFETCH_RAW(c) \
1405 do {if (p == pend) return REG_EEND; \ 1404 do {if (p == pend) return REG_EEND; \
1406 c = (unsigned char) *p++; \ 1405 c = (unsigned char) *p++; \
1407 } while (0) 1406 } while (0)
1408 1407
1409 /* Go backwards one character in the pattern. */ 1408 /* Go backwards one character in the pattern. */
1410 #define PATUNFETCH p-- 1409 #define PATUNFETCH p--
1411 1410
1423 /* Macros for outputting the compiled pattern into `buffer'. */ 1422 /* Macros for outputting the compiled pattern into `buffer'. */
1424 1423
1425 /* If the buffer isn't allocated when it comes in, use this. */ 1424 /* If the buffer isn't allocated when it comes in, use this. */
1426 #define INIT_BUF_SIZE 32 1425 #define INIT_BUF_SIZE 32
1427 1426
1428 /* Make sure we have at least N more bytes of space in buffer. */ 1427 /* Make sure we have at least N more bytes of space in buffer. */
1429 #define GET_BUFFER_SPACE(n) \ 1428 #define GET_BUFFER_SPACE(n) \
1430 while (b - bufp->buffer + (n) > bufp->allocated) \ 1429 while (b - bufp->buffer + (n) > bufp->allocated) \
1431 EXTEND_BUFFER () 1430 EXTEND_BUFFER ()
1432 1431
1433 /* Make sure we have one more byte of buffer space and then add C to it. */ 1432 /* Make sure we have one more byte of buffer space and then add C to it. */
1445 *b++ = (unsigned char) (c1); \ 1444 *b++ = (unsigned char) (c1); \
1446 *b++ = (unsigned char) (c2); \ 1445 *b++ = (unsigned char) (c2); \
1447 } while (0) 1446 } while (0)
1448 1447
1449 1448
1450 /* As with BUF_PUSH_2, except for three bytes. */ 1449 /* As with BUF_PUSH_2, except for three bytes. */
1451 #define BUF_PUSH_3(c1, c2, c3) \ 1450 #define BUF_PUSH_3(c1, c2, c3) \
1452 do { \ 1451 do { \
1453 GET_BUFFER_SPACE (3); \ 1452 GET_BUFFER_SPACE (3); \
1454 *b++ = (unsigned char) (c1); \ 1453 *b++ = (unsigned char) (c1); \
1455 *b++ = (unsigned char) (c2); \ 1454 *b++ = (unsigned char) (c2); \
1456 *b++ = (unsigned char) (c3); \ 1455 *b++ = (unsigned char) (c3); \
1457 } while (0) 1456 } while (0)
1458 1457
1459 1458
1460 /* Store a jump with opcode OP at LOC to location TO. We store a 1459 /* Store a jump with opcode OP at LOC to location TO. We store a
1461 relative address offset by the three bytes the jump itself occupies. */ 1460 relative address offset by the three bytes the jump itself occupies. */
1462 #define STORE_JUMP(op, loc, to) \ 1461 #define STORE_JUMP(op, loc, to) \
1463 store_op1 (op, loc, (to) - (loc) - 3) 1462 store_op1 (op, loc, (to) - (loc) - 3)
1464 1463
1465 /* Likewise, for a two-argument jump. */ 1464 /* Likewise, for a two-argument jump. */
1466 #define STORE_JUMP2(op, loc, to, arg) \ 1465 #define STORE_JUMP2(op, loc, to, arg) \
1467 store_op2 (op, loc, (to) - (loc) - 3, arg) 1466 store_op2 (op, loc, (to) - (loc) - 3, arg)
1468 1467
1469 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1468 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1470 #define INSERT_JUMP(op, loc, to) \ 1469 #define INSERT_JUMP(op, loc, to) \
1471 insert_op1 (op, loc, (to) - (loc) - 3, b) 1470 insert_op1 (op, loc, (to) - (loc) - 3, b)
1472 1471
1473 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 1472 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1474 #define INSERT_JUMP2(op, loc, to, arg) \ 1473 #define INSERT_JUMP2(op, loc, to, arg) \
1475 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) 1474 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1476 1475
1477 1476
1478 /* This is not an arbitrary limit: the arguments which represent offsets 1477 /* This is not an arbitrary limit: the arguments which represent offsets
1479 into the pattern are two bytes long. So if 2^16 bytes turns out to 1478 into the pattern are two bytes long. So if 2^16 bytes turns out to
1480 be too small, many things would have to change. */ 1479 be too small, many things would have to change. */
1481 #define MAX_BUF_SIZE (1L << 16) 1480 #define MAX_BUF_SIZE (1L << 16)
1482 1481
1483 1482
1484 /* Extend the buffer by twice its current size via realloc and 1483 /* Extend the buffer by twice its current size via realloc and
1485 reset the pointers that pointed into the old block to point to the 1484 reset the pointers that pointed into the old block to point to the
1486 correct places in the new one. If extending the buffer results in it 1485 correct places in the new one. If extending the buffer results in it
1487 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 1486 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1488 #define EXTEND_BUFFER() \ 1487 #define EXTEND_BUFFER() \
1489 do { \ 1488 do { \
1490 unsigned char *old_buffer = bufp->buffer; \ 1489 unsigned char *old_buffer = bufp->buffer; \
1491 if (bufp->allocated == MAX_BUF_SIZE) \ 1490 if (bufp->allocated == MAX_BUF_SIZE) \
1492 return REG_ESIZE; \ 1491 return REG_ESIZE; \
1493 bufp->allocated <<= 1; \ 1492 bufp->allocated <<= 1; \
1494 if (bufp->allocated > MAX_BUF_SIZE) \ 1493 if (bufp->allocated > MAX_BUF_SIZE) \
1495 bufp->allocated = MAX_BUF_SIZE; \ 1494 bufp->allocated = MAX_BUF_SIZE; \
1496 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ 1495 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1497 if (bufp->buffer == NULL) \ 1496 if (bufp->buffer == NULL) \
1498 return REG_ESPACE; \ 1497 return REG_ESPACE; \
1499 /* If the buffer moved, move all the pointers into it. */ \ 1498 /* If the buffer moved, move all the pointers into it. */ \
1500 if (old_buffer != bufp->buffer) \ 1499 if (old_buffer != bufp->buffer) \
1501 { \ 1500 { \
1502 b = (b - old_buffer) + bufp->buffer; \ 1501 b = (b - old_buffer) + bufp->buffer; \
1503 begalt = (begalt - old_buffer) + bufp->buffer; \ 1502 begalt = (begalt - old_buffer) + bufp->buffer; \
1504 if (fixup_alt_jump) \ 1503 if (fixup_alt_jump) \
1505 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 1504 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1506 if (laststart) \ 1505 if (laststart) \
1507 laststart = (laststart - old_buffer) + bufp->buffer; \ 1506 laststart = (laststart - old_buffer) + bufp->buffer; \
1508 if (pending_exact) \ 1507 if (pending_exact) \
1509 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ 1508 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
1510 } \ 1509 } \
1511 } while (0) 1510 } while (0)
1512 1511
1513 1512
1514 /* Since we have one byte reserved for the register number argument to 1513 /* Since we have one byte reserved for the register number argument to
1522 1521
1523 1522
1524 /* Macros for the compile stack. */ 1523 /* Macros for the compile stack. */
1525 1524
1526 /* Since offsets can go either forwards or backwards, this type needs to 1525 /* Since offsets can go either forwards or backwards, this type needs to
1527 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 1526 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1528 typedef int pattern_offset_t; 1527 typedef int pattern_offset_t;
1529 1528
1530 typedef struct 1529 typedef struct
1531 { 1530 {
1532 pattern_offset_t begalt_offset; 1531 pattern_offset_t begalt_offset;
1548 #define INIT_COMPILE_STACK_SIZE 32 1547 #define INIT_COMPILE_STACK_SIZE 32
1549 1548
1550 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 1549 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1551 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 1550 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1552 1551
1553 /* The next available element. */ 1552 /* The next available element. */
1554 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1553 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1555 1554
1556 1555
1557 /* Set the bit for character C in a list. */ 1556 /* Set the bit for character C in a list. */
1558 #define SET_LIST_BIT(c) \ 1557 #define SET_LIST_BIT(c) \
1559 (b[((unsigned char) (c)) / BYTEWIDTH] \ 1558 (b[((unsigned char) (c)) / BYTEWIDTH] \
1560 |= 1 << (((unsigned char) c) % BYTEWIDTH)) 1559 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1561 1560
1562 1561
1563 /* Get the next unsigned number in the uncompiled pattern. */ 1562 /* Get the next unsigned number in the uncompiled pattern. */
1564 #define GET_UNSIGNED_NUMBER(num) \ 1563 #define GET_UNSIGNED_NUMBER(num) \
1565 { if (p != pend) \ 1564 { if (p != pend) \
1566 { \ 1565 { \
1567 PATFETCH (c); \ 1566 PATFETCH (c); \
1568 while (ISDIGIT (c)) \ 1567 while (ISDIGIT (c)) \
1569 { \ 1568 { \
1570 if (num < 0) \ 1569 if (num < 0) \
1571 num = 0; \ 1570 num = 0; \
1572 num = num * 10 + c - '0'; \ 1571 num = num * 10 + c - '0'; \
1573 if (p == pend) \ 1572 if (p == pend) \
1574 break; \ 1573 break; \
1575 PATFETCH (c); \ 1574 PATFETCH (c); \
1576 } \ 1575 } \
1577 } \ 1576 } \
1578 } 1577 }
1579 1578
1580 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 1579 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1581 1580
1582 #define IS_CHAR_CLASS(string) \ 1581 #define IS_CHAR_CLASS(string) \
1598 1597
1599 static fail_stack_type fail_stack; 1598 static fail_stack_type fail_stack;
1600 1599
1601 /* Size with which the following vectors are currently allocated. 1600 /* Size with which the following vectors are currently allocated.
1602 That is so we can make them bigger as needed, 1601 That is so we can make them bigger as needed,
1603 but never make them smaller. */ 1602 but never make them smaller. */
1604 static int regs_allocated_size; 1603 static int regs_allocated_size;
1605 1604
1606 static const char ** regstart, ** regend; 1605 static const char ** regstart, ** regend;
1607 static const char ** old_regstart, ** old_regend; 1606 static const char ** old_regstart, ** old_regend;
1608 static const char **best_regstart, **best_regend; 1607 static const char **best_regstart, **best_regend;
1609 static register_info_type *reg_info; 1608 static register_info_type *reg_info;
1610 static const char **reg_dummy; 1609 static const char **reg_dummy;
1611 static register_info_type *reg_info_dummy; 1610 static register_info_type *reg_info_dummy;
1612 1611
1613 /* Make the register vectors big enough for NUM_REGS registers, 1612 /* Make the register vectors big enough for NUM_REGS registers,
1614 but don't make them smaller. */ 1613 but don't make them smaller. */
1615 1614
1616 static 1615 static
1617 regex_grow_registers (num_regs) 1616 regex_grow_registers (num_regs)
1618 int num_regs; 1617 int num_regs;
1619 { 1618 {
1702 /* Place in the uncompiled pattern (i.e., the {) to 1701 /* Place in the uncompiled pattern (i.e., the {) to
1703 which to go back if the interval is invalid. */ 1702 which to go back if the interval is invalid. */
1704 const char *beg_interval; 1703 const char *beg_interval;
1705 1704
1706 /* Address of the place where a forward jump should go to the end of 1705 /* Address of the place where a forward jump should go to the end of
1707 the containing expression. Each alternative of an `or' -- except the 1706 the containing expression. Each alternative of an `or' -- except the
1708 last -- ends with a forward jump of this sort. */ 1707 last -- ends with a forward jump of this sort. */
1709 unsigned char *fixup_alt_jump = 0; 1708 unsigned char *fixup_alt_jump = 0;
1710 1709
1711 /* Counts open-groups as they are encountered. Remembered for the 1710 /* Counts open-groups as they are encountered. Remembered for the
1712 matching close-group on the compile stack, so the same register 1711 matching close-group on the compile stack, so the same register
1718 if (debug) 1717 if (debug)
1719 { 1718 {
1720 unsigned debug_count; 1719 unsigned debug_count;
1721 1720
1722 for (debug_count = 0; debug_count < size; debug_count++) 1721 for (debug_count = 0; debug_count < size; debug_count++)
1723 putchar (pattern[debug_count]); 1722 putchar (pattern[debug_count]);
1724 putchar ('\n'); 1723 putchar ('\n');
1725 } 1724 }
1726 #endif /* DEBUG */ 1725 #endif /* DEBUG */
1727 1726
1728 /* Initialize the compile stack. */ 1727 /* Initialize the compile stack. */
1753 1752
1754 if (bufp->allocated == 0) 1753 if (bufp->allocated == 0)
1755 { 1754 {
1756 if (bufp->buffer) 1755 if (bufp->buffer)
1757 { /* If zero allocated, but buffer is non-null, try to realloc 1756 { /* If zero allocated, but buffer is non-null, try to realloc
1758 enough space. This loses if buffer's address is bogus, but 1757 enough space. This loses if buffer's address is bogus, but
1759 that is the user's responsibility. */ 1758 that is the user's responsibility. */
1760 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); 1759 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1761 } 1760 }
1762 else 1761 else
1763 { /* Caller did not allocate a buffer. Do it for them. */ 1762 { /* Caller did not allocate a buffer. Do it for them. */
1764 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 1763 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1765 } 1764 }
1766 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 1765 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1767 1766
1768 bufp->allocated = INIT_BUF_SIZE; 1767 bufp->allocated = INIT_BUF_SIZE;
1769 } 1768 }
1770 1769
1774 while (p != pend) 1773 while (p != pend)
1775 { 1774 {
1776 PATFETCH (c); 1775 PATFETCH (c);
1777 1776
1778 switch (c) 1777 switch (c)
1779 { 1778 {
1780 case '^': 1779 case '^':
1781 { 1780 {
1782 if ( /* If at start of pattern, it's an operator. */ 1781 if ( /* If at start of pattern, it's an operator. */
1783 p == pattern + 1 1782 p == pattern + 1
1784 /* If context independent, it's an operator. */ 1783 /* If context independent, it's an operator. */
1785 || syntax & RE_CONTEXT_INDEP_ANCHORS 1784 || syntax & RE_CONTEXT_INDEP_ANCHORS
1786 /* Otherwise, depends on what's come before. */ 1785 /* Otherwise, depends on what's come before. */
1787 || at_begline_loc_p (pattern, p, syntax)) 1786 || at_begline_loc_p (pattern, p, syntax))
1788 BUF_PUSH (begline); 1787 BUF_PUSH (begline);
1789 else 1788 else
1790 goto normal_char; 1789 goto normal_char;
1791 } 1790 }
1792 break; 1791 break;
1793 1792
1794 1793
1795 case '$': 1794 case '$':
1796 { 1795 {
1797 if ( /* If at end of pattern, it's an operator. */ 1796 if ( /* If at end of pattern, it's an operator. */
1798 p == pend 1797 p == pend
1799 /* If context independent, it's an operator. */ 1798 /* If context independent, it's an operator. */
1800 || syntax & RE_CONTEXT_INDEP_ANCHORS 1799 || syntax & RE_CONTEXT_INDEP_ANCHORS
1801 /* Otherwise, depends on what's next. */ 1800 /* Otherwise, depends on what's next. */
1802 || at_endline_loc_p (p, pend, syntax)) 1801 || at_endline_loc_p (p, pend, syntax))
1803 BUF_PUSH (endline); 1802 BUF_PUSH (endline);
1804 else 1803 else
1805 goto normal_char; 1804 goto normal_char;
1806 } 1805 }
1807 break; 1806 break;
1808 1807
1809 1808
1810 case '+': 1809 case '+':
1811 case '?': 1810 case '?':
1812 if ((syntax & RE_BK_PLUS_QM) 1811 if ((syntax & RE_BK_PLUS_QM)
1813 || (syntax & RE_LIMITED_OPS)) 1812 || (syntax & RE_LIMITED_OPS))
1814 goto normal_char; 1813 goto normal_char;
1815 handle_plus: 1814 handle_plus:
1816 case '*': 1815 case '*':
1817 /* If there is no previous pattern... */ 1816 /* If there is no previous pattern... */
1818 if (!laststart) 1817 if (!laststart)
1819 { 1818 {
1820 if (syntax & RE_CONTEXT_INVALID_OPS) 1819 if (syntax & RE_CONTEXT_INVALID_OPS)
1821 FREE_STACK_RETURN (REG_BADRPT); 1820 FREE_STACK_RETURN (REG_BADRPT);
1822 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 1821 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1823 goto normal_char; 1822 goto normal_char;
1824 } 1823 }
1825 1824
1826 { 1825 {
1827 /* Are we optimizing this jump? */ 1826 /* Are we optimizing this jump? */
1828 boolean keep_string_p = false; 1827 boolean keep_string_p = false;
1829 1828
1830 /* 1 means zero (many) matches is allowed. */ 1829 /* 1 means zero (many) matches is allowed. */
1831 char zero_times_ok = 0, many_times_ok = 0; 1830 char zero_times_ok = 0, many_times_ok = 0;
1832 1831
1833 /* If there is a sequence of repetition chars, collapse it 1832 /* If there is a sequence of repetition chars, collapse it
1834 down to just one (the right one). We can't combine 1833 down to just one (the right one). We can't combine
1835 interval operators with these because of, e.g., `a{2}*', 1834 interval operators with these because of, e.g., `a{2}*',
1836 which should only match an even number of `a's. */ 1835 which should only match an even number of `a's. */
1837 1836
1838 for (;;) 1837 for (;;)
1839 { 1838 {
1840 zero_times_ok |= c != '+'; 1839 zero_times_ok |= c != '+';
1841 many_times_ok |= c != '?'; 1840 many_times_ok |= c != '?';
1842 1841
1843 if (p == pend) 1842 if (p == pend)
1844 break; 1843 break;
1845 1844
1846 PATFETCH (c); 1845 PATFETCH (c);
1847 1846
1848 if (c == '*' 1847 if (c == '*'
1849 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 1848 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1850 ; 1849 ;
1851 1850
1852 else if (syntax & RE_BK_PLUS_QM && c == '\\') 1851 else if (syntax & RE_BK_PLUS_QM && c == '\\')
1853 { 1852 {
1854 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 1853 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1855 1854
1856 PATFETCH (c1); 1855 PATFETCH (c1);
1857 if (!(c1 == '+' || c1 == '?')) 1856 if (!(c1 == '+' || c1 == '?'))
1858 { 1857 {
1859 PATUNFETCH; 1858 PATUNFETCH;
1860 PATUNFETCH; 1859 PATUNFETCH;
1861 break; 1860 break;
1862 } 1861 }
1863 1862
1864 c = c1; 1863 c = c1;
1865 } 1864 }
1866 else 1865 else
1867 { 1866 {
1868 PATUNFETCH; 1867 PATUNFETCH;
1869 break; 1868 break;
1870 } 1869 }
1871 1870
1872 /* If we get here, we found another repeat character. */ 1871 /* If we get here, we found another repeat character. */
1873 } 1872 }
1874 1873
1875 /* Star, etc. applied to an empty pattern is equivalent 1874 /* Star, etc. applied to an empty pattern is equivalent
1876 to an empty pattern. */ 1875 to an empty pattern. */
1877 if (!laststart) 1876 if (!laststart)
1878 break; 1877 break;
1879 1878
1880 /* Now we know whether or not zero matches is allowed 1879 /* Now we know whether or not zero matches is allowed
1881 and also whether or not two or more matches is allowed. */ 1880 and also whether or not two or more matches is allowed. */
1882 if (many_times_ok) 1881 if (many_times_ok)
1883 { /* More than one repetition is allowed, so put in at the 1882 { /* More than one repetition is allowed, so put in at the
1884 end a backward relative jump from `b' to before the next 1883 end a backward relative jump from `b' to before the next
1885 jump we're going to put in below (which jumps from 1884 jump we're going to put in below (which jumps from
1886 laststart to after this jump). 1885 laststart to after this jump).
1887 1886
1888 But if we are at the `*' in the exact sequence `.*\n', 1887 But if we are at the `*' in the exact sequence `.*\n',
1889 insert an unconditional jump backwards to the ., 1888 insert an unconditional jump backwards to the .,
1890 instead of the beginning of the loop. This way we only 1889 instead of the beginning of the loop. This way we only
1891 push a failure point once, instead of every time 1890 push a failure point once, instead of every time
1892 through the loop. */ 1891 through the loop. */
1893 assert (p - 1 > pattern); 1892 assert (p - 1 > pattern);
1894 1893
1895 /* Allocate the space for the jump. */ 1894 /* Allocate the space for the jump. */
1896 GET_BUFFER_SPACE (3); 1895 GET_BUFFER_SPACE (3);
1897 1896
1898 /* We know we are not at the first character of the pattern, 1897 /* We know we are not at the first character of the pattern,
1899 because laststart was nonzero. And we've already 1898 because laststart was nonzero. And we've already
1900 incremented `p', by the way, to be the character after 1899 incremented `p', by the way, to be the character after
1901 the `*'. Do we have to do something analogous here 1900 the `*'. Do we have to do something analogous here
1902 for null bytes, because of RE_DOT_NOT_NULL? */ 1901 for null bytes, because of RE_DOT_NOT_NULL? */
1903 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 1902 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1904 && zero_times_ok 1903 && zero_times_ok
1905 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 1904 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1906 && !(syntax & RE_DOT_NEWLINE)) 1905 && !(syntax & RE_DOT_NEWLINE))
1907 { /* We have .*\n. */ 1906 { /* We have .*\n. */
1908 STORE_JUMP (jump, b, laststart); 1907 STORE_JUMP (jump, b, laststart);
1909 keep_string_p = true; 1908 keep_string_p = true;
1910 } 1909 }
1911 else 1910 else
1912 /* Anything else. */ 1911 /* Anything else. */
1913 STORE_JUMP (maybe_pop_jump, b, laststart - 3); 1912 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1914 1913
1915 /* We've added more stuff to the buffer. */ 1914 /* We've added more stuff to the buffer. */
1916 b += 3; 1915 b += 3;
1917 } 1916 }
1918 1917
1919 /* On failure, jump from laststart to b + 3, which will be the 1918 /* On failure, jump from laststart to b + 3, which will be the
1920 end of the buffer after this jump is inserted. */ 1919 end of the buffer after this jump is inserted. */
1921 GET_BUFFER_SPACE (3); 1920 GET_BUFFER_SPACE (3);
1922 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 1921 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1923 : on_failure_jump, 1922 : on_failure_jump,
1924 laststart, b + 3); 1923 laststart, b + 3);
1925 pending_exact = 0; 1924 pending_exact = 0;
1926 b += 3; 1925 b += 3;
1927 1926
1928 if (!zero_times_ok) 1927 if (!zero_times_ok)
1929 { 1928 {
1930 /* At least one repetition is required, so insert a 1929 /* At least one repetition is required, so insert a
1931 `dummy_failure_jump' before the initial 1930 `dummy_failure_jump' before the initial
1932 `on_failure_jump' instruction of the loop. This 1931 `on_failure_jump' instruction of the loop. This
1933 effects a skip over that instruction the first time 1932 effects a skip over that instruction the first time
1934 we hit that loop. */ 1933 we hit that loop. */
1935 GET_BUFFER_SPACE (3); 1934 GET_BUFFER_SPACE (3);
1936 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 1935 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1937 b += 3; 1936 b += 3;
1938 } 1937 }
1939 } 1938 }
1940 break; 1939 break;
1941 1940
1942 1941
1943 case '.': 1942 case '.':
1944 laststart = b; 1943 laststart = b;
1945 BUF_PUSH (anychar); 1944 BUF_PUSH (anychar);
1946 break; 1945 break;
1947 1946
1948 1947
1949 case '[': 1948 case '[':
1950 { 1949 {
1951 boolean had_char_class = false; 1950 boolean had_char_class = false;
1952 1951
1953 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 1952 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1954 1953
1955 /* Ensure that we have enough space to push a charset: the 1954 /* Ensure that we have enough space to push a charset: the
1956 opcode, the length count, and the bitset; 34 bytes in all. */ 1955 opcode, the length count, and the bitset; 34 bytes in all. */
1957 GET_BUFFER_SPACE (34); 1956 GET_BUFFER_SPACE (34);
1958 1957
1959 laststart = b; 1958 laststart = b;
1960 1959
1961 /* We test `*p == '^' twice, instead of using an if 1960 /* We test `*p == '^' twice, instead of using an if
1962 statement, so we only need one BUF_PUSH. */ 1961 statement, so we only need one BUF_PUSH. */
1963 BUF_PUSH (*p == '^' ? charset_not : charset); 1962 BUF_PUSH (*p == '^' ? charset_not : charset);
1964 if (*p == '^') 1963 if (*p == '^')
1965 p++; 1964 p++;
1966 1965
1967 /* Remember the first position in the bracket expression. */ 1966 /* Remember the first position in the bracket expression. */
1968 p1 = p; 1967 p1 = p;
1969 1968
1970 /* Push the number of bytes in the bitmap. */ 1969 /* Push the number of bytes in the bitmap. */
1971 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 1970 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1972 1971
1973 /* Clear the whole map. */ 1972 /* Clear the whole map. */
1974 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 1973 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1975 1974
1976 /* charset_not matches newline according to a syntax bit. */ 1975 /* charset_not matches newline according to a syntax bit. */
1977 if ((re_opcode_t) b[-2] == charset_not 1976 if ((re_opcode_t) b[-2] == charset_not
1978 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 1977 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1979 SET_LIST_BIT ('\n'); 1978 SET_LIST_BIT ('\n');
1980 1979
1981 /* Read in characters and ranges, setting map bits. */ 1980 /* Read in characters and ranges, setting map bits. */
1982 for (;;) 1981 for (;;)
1983 { 1982 {
1984 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 1983 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1985 1984
1986 PATFETCH (c); 1985 PATFETCH (c);
1987 1986
1988 /* \ might escape characters inside [...] and [^...]. */ 1987 /* \ might escape characters inside [...] and [^...]. */
1989 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 1988 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1990 { 1989 {
1991 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 1990 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1992 1991
1993 PATFETCH (c1); 1992 PATFETCH (c1);
1994 SET_LIST_BIT (c1); 1993 SET_LIST_BIT (c1);
1995 continue; 1994 continue;
1996 } 1995 }
1997 1996
1998 /* Could be the end of the bracket expression. If it's 1997 /* Could be the end of the bracket expression. If it's
1999 not (i.e., when the bracket expression is `[]' so 1998 not (i.e., when the bracket expression is `[]' so
2000 far), the ']' character bit gets set way below. */ 1999 far), the ']' character bit gets set way below. */
2001 if (c == ']' && p != p1 + 1) 2000 if (c == ']' && p != p1 + 1)
2002 break; 2001 break;
2003 2002
2004 /* Look ahead to see if it's a range when the last thing 2003 /* Look ahead to see if it's a range when the last thing
2005 was a character class. */ 2004 was a character class. */
2006 if (had_char_class && c == '-' && *p != ']') 2005 if (had_char_class && c == '-' && *p != ']')
2007 FREE_STACK_RETURN (REG_ERANGE); 2006 FREE_STACK_RETURN (REG_ERANGE);
2008 2007
2009 /* Look ahead to see if it's a range when the last thing 2008 /* Look ahead to see if it's a range when the last thing
2010 was a character: if this is a hyphen not at the 2009 was a character: if this is a hyphen not at the
2011 beginning or the end of a list, then it's the range 2010 beginning or the end of a list, then it's the range
2012 operator. */ 2011 operator. */
2013 if (c == '-' 2012 if (c == '-'
2014 && !(p - 2 >= pattern && p[-2] == '[') 2013 && !(p - 2 >= pattern && p[-2] == '[')
2015 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 2014 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2016 && *p != ']') 2015 && *p != ']')
2017 { 2016 {
2018 reg_errcode_t ret 2017 reg_errcode_t ret
2019 = compile_range (&p, pend, translate, syntax, b); 2018 = compile_range (&p, pend, translate, syntax, b);
2020 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2019 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2021 } 2020 }
2022 2021
2023 else if (p[0] == '-' && p[1] != ']') 2022 else if (p[0] == '-' && p[1] != ']')
2024 { /* This handles ranges made up of characters only. */ 2023 { /* This handles ranges made up of characters only. */
2025 reg_errcode_t ret; 2024 reg_errcode_t ret;
2026 2025
2027 /* Move past the `-'. */ 2026 /* Move past the `-'. */
2028 PATFETCH (c1); 2027 PATFETCH (c1);
2029 2028
2030 ret = compile_range (&p, pend, translate, syntax, b); 2029 ret = compile_range (&p, pend, translate, syntax, b);
2031 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2030 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2032 } 2031 }
2033 2032
2034 /* See if we're at the beginning of a possible character 2033 /* See if we're at the beginning of a possible character
2035 class. */ 2034 class. */
2036 2035
2037 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2036 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2038 { /* Leave room for the null. */ 2037 { /* Leave room for the null. */
2039 char str[CHAR_CLASS_MAX_LENGTH + 1]; 2038 char str[CHAR_CLASS_MAX_LENGTH + 1];
2040 2039
2041 PATFETCH (c); 2040 PATFETCH (c);
2042 c1 = 0; 2041 c1 = 0;
2043 2042
2044 /* If pattern is `[[:'. */ 2043 /* If pattern is `[[:'. */
2045 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2044 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2046 2045
2047 for (;;) 2046 for (;;)
2048 { 2047 {
2049 PATFETCH (c); 2048 PATFETCH (c);
2050 if (c == ':' || c == ']' || p == pend 2049 if (c == ':' || c == ']' || p == pend
2051 || c1 == CHAR_CLASS_MAX_LENGTH) 2050 || c1 == CHAR_CLASS_MAX_LENGTH)
2052 break; 2051 break;
2053 str[c1++] = c; 2052 str[c1++] = c;
2054 } 2053 }
2055 str[c1] = '\0'; 2054 str[c1] = '\0';
2056 2055
2057 /* If isn't a word bracketed by `[:' and:`]': 2056 /* If isn't a word bracketed by `[:' and:`]':
2058 undo the ending character, the letters, and leave 2057 undo the ending character, the letters, and leave
2059 the leading `:' and `[' (but set bits for them). */ 2058 the leading `:' and `[' (but set bits for them). */
2060 if (c == ':' && *p == ']') 2059 if (c == ':' && *p == ']')
2061 { 2060 {
2062 int ch; 2061 int ch;
2063 boolean is_alnum = STREQ (str, "alnum"); 2062 boolean is_alnum = STREQ (str, "alnum");
2064 boolean is_alpha = STREQ (str, "alpha"); 2063 boolean is_alpha = STREQ (str, "alpha");
2065 boolean is_blank = STREQ (str, "blank"); 2064 boolean is_blank = STREQ (str, "blank");
2066 boolean is_cntrl = STREQ (str, "cntrl"); 2065 boolean is_cntrl = STREQ (str, "cntrl");
2067 boolean is_digit = STREQ (str, "digit"); 2066 boolean is_digit = STREQ (str, "digit");
2068 boolean is_graph = STREQ (str, "graph"); 2067 boolean is_graph = STREQ (str, "graph");
2069 boolean is_lower = STREQ (str, "lower"); 2068 boolean is_lower = STREQ (str, "lower");
2070 boolean is_print = STREQ (str, "print"); 2069 boolean is_print = STREQ (str, "print");
2071 boolean is_punct = STREQ (str, "punct"); 2070 boolean is_punct = STREQ (str, "punct");
2072 boolean is_space = STREQ (str, "space"); 2071 boolean is_space = STREQ (str, "space");
2073 boolean is_upper = STREQ (str, "upper"); 2072 boolean is_upper = STREQ (str, "upper");
2074 boolean is_xdigit = STREQ (str, "xdigit"); 2073 boolean is_xdigit = STREQ (str, "xdigit");
2075 2074
2076 if (!IS_CHAR_CLASS (str)) 2075 if (!IS_CHAR_CLASS (str))
2077 FREE_STACK_RETURN (REG_ECTYPE); 2076 FREE_STACK_RETURN (REG_ECTYPE);
2078 2077
2079 /* Throw away the ] at the end of the character 2078 /* Throw away the ] at the end of the character
2080 class. */ 2079 class. */
2081 PATFETCH (c); 2080 PATFETCH (c);
2082 2081
2083 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2082 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2084 2083
2085 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 2084 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2086 { 2085 {
2087 /* This was split into 3 if's to 2086 /* This was split into 3 if's to
2088 avoid an arbitrary limit in some compiler. */ 2087 avoid an arbitrary limit in some compiler. */
2089 if ( (is_alnum && ISALNUM (ch)) 2088 if ( (is_alnum && ISALNUM (ch))
2090 || (is_alpha && ISALPHA (ch)) 2089 || (is_alpha && ISALPHA (ch))
2091 || (is_blank && ISBLANK (ch)) 2090 || (is_blank && ISBLANK (ch))
2092 || (is_cntrl && ISCNTRL (ch))) 2091 || (is_cntrl && ISCNTRL (ch)))
2093 SET_LIST_BIT (ch); 2092 SET_LIST_BIT (ch);
2094 if ( (is_digit && ISDIGIT (ch)) 2093 if ( (is_digit && ISDIGIT (ch))
2095 || (is_graph && ISGRAPH (ch)) 2094 || (is_graph && ISGRAPH (ch))
2096 || (is_lower && ISLOWER (ch)) 2095 || (is_lower && ISLOWER (ch))
2097 || (is_print && ISPRINT (ch))) 2096 || (is_print && ISPRINT (ch)))
2098 SET_LIST_BIT (ch); 2097 SET_LIST_BIT (ch);
2099 if ( (is_punct && ISPUNCT (ch)) 2098 if ( (is_punct && ISPUNCT (ch))
2100 || (is_space && ISSPACE (ch)) 2099 || (is_space && ISSPACE (ch))
2101 || (is_upper && ISUPPER (ch)) 2100 || (is_upper && ISUPPER (ch))
2102 || (is_xdigit && ISXDIGIT (ch))) 2101 || (is_xdigit && ISXDIGIT (ch)))
2103 SET_LIST_BIT (ch); 2102 SET_LIST_BIT (ch);
2104 } 2103 }
2105 had_char_class = true; 2104 had_char_class = true;
2106 } 2105 }
2107 else 2106 else
2108 { 2107 {
2109 c1++; 2108 c1++;
2110 while (c1--) 2109 while (c1--)
2111 PATUNFETCH; 2110 PATUNFETCH;
2112 SET_LIST_BIT ('['); 2111 SET_LIST_BIT ('[');
2113 SET_LIST_BIT (':'); 2112 SET_LIST_BIT (':');
2114 had_char_class = false; 2113 had_char_class = false;
2115 } 2114 }
2116 } 2115 }
2117 else 2116 else
2118 { 2117 {
2119 had_char_class = false; 2118 had_char_class = false;
2120 SET_LIST_BIT (c); 2119 SET_LIST_BIT (c);
2121 } 2120 }
2122 } 2121 }
2123 2122
2124 /* Discard any (non)matching list bytes that are all 0 at the 2123 /* Discard any (non)matching list bytes that are all 0 at the
2125 end of the map. Decrease the map-length byte too. */ 2124 end of the map. Decrease the map-length byte too. */
2126 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 2125 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2127 b[-1]--; 2126 b[-1]--;
2128 b += b[-1]; 2127 b += b[-1];
2129 } 2128 }
2130 break; 2129 break;
2131 2130
2132 2131
2133 case '(': 2132 case '(':
2134 if (syntax & RE_NO_BK_PARENS) 2133 if (syntax & RE_NO_BK_PARENS)
2135 goto handle_open; 2134 goto handle_open;
2136 else 2135 else
2137 goto normal_char; 2136 goto normal_char;
2138 2137
2139 2138
2140 case ')': 2139 case ')':
2141 if (syntax & RE_NO_BK_PARENS) 2140 if (syntax & RE_NO_BK_PARENS)
2142 goto handle_close; 2141 goto handle_close;
2143 else 2142 else
2144 goto normal_char; 2143 goto normal_char;
2145 2144
2146 2145
2147 case '\n': 2146 case '\n':
2148 if (syntax & RE_NEWLINE_ALT) 2147 if (syntax & RE_NEWLINE_ALT)
2149 goto handle_alt; 2148 goto handle_alt;
2150 else 2149 else
2151 goto normal_char; 2150 goto normal_char;
2152 2151
2153 2152
2154 case '|': 2153 case '|':
2155 if (syntax & RE_NO_BK_VBAR) 2154 if (syntax & RE_NO_BK_VBAR)
2156 goto handle_alt; 2155 goto handle_alt;
2157 else 2156 else
2158 goto normal_char; 2157 goto normal_char;
2159 2158
2160 2159
2161 case '{': 2160 case '{':
2162 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 2161 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2163 goto handle_interval; 2162 goto handle_interval;
2164 else 2163 else
2165 goto normal_char; 2164 goto normal_char;
2166 2165
2167 2166
2168 case '\\': 2167 case '\\':
2169 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2168 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2170 2169
2171 /* Do not translate the character after the \, so that we can 2170 /* Do not translate the character after the \, so that we can
2172 distinguish, e.g., \B from \b, even if we normally would 2171 distinguish, e.g., \B from \b, even if we normally would
2173 translate, e.g., B to b. */ 2172 translate, e.g., B to b. */
2174 PATFETCH_RAW (c); 2173 PATFETCH_RAW (c);
2175 2174
2176 switch (c) 2175 switch (c)
2177 { 2176 {
2178 case '(': 2177 case '(':
2179 if (syntax & RE_NO_BK_PARENS) 2178 if (syntax & RE_NO_BK_PARENS)
2180 goto normal_backslash; 2179 goto normal_backslash;
2181 2180
2182 handle_open: 2181 handle_open:
2183 bufp->re_nsub++; 2182 bufp->re_nsub++;
2184 regnum++; 2183 regnum++;
2185 2184
2186 if (COMPILE_STACK_FULL) 2185 if (COMPILE_STACK_FULL)
2187 { 2186 {
2188 RETALLOC (compile_stack.stack, compile_stack.size << 1, 2187 RETALLOC (compile_stack.stack, compile_stack.size << 1,
2189 compile_stack_elt_t); 2188 compile_stack_elt_t);
2190 if (compile_stack.stack == NULL) return REG_ESPACE; 2189 if (compile_stack.stack == NULL) return REG_ESPACE;
2191 2190
2192 compile_stack.size <<= 1; 2191 compile_stack.size <<= 1;
2193 } 2192 }
2194 2193
2195 /* These are the values to restore when we hit end of this 2194 /* These are the values to restore when we hit end of this
2196 group. They are all relative offsets, so that if the 2195 group. They are all relative offsets, so that if the
2197 whole pattern moves because of realloc, they will still 2196 whole pattern moves because of realloc, they will still
2198 be valid. */ 2197 be valid. */
2199 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 2198 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2200 COMPILE_STACK_TOP.fixup_alt_jump 2199 COMPILE_STACK_TOP.fixup_alt_jump
2201 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 2200 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2202 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 2201 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2203 COMPILE_STACK_TOP.regnum = regnum; 2202 COMPILE_STACK_TOP.regnum = regnum;
2204 2203
2205 /* We will eventually replace the 0 with the number of 2204 /* We will eventually replace the 0 with the number of
2206 groups inner to this one. But do not push a 2205 groups inner to this one. But do not push a
2207 start_memory for groups beyond the last one we can 2206 start_memory for groups beyond the last one we can
2208 represent in the compiled pattern. */ 2207 represent in the compiled pattern. */
2209 if (regnum <= MAX_REGNUM) 2208 if (regnum <= MAX_REGNUM)
2210 { 2209 {
2211 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; 2210 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2212 BUF_PUSH_3 (start_memory, regnum, 0); 2211 BUF_PUSH_3 (start_memory, regnum, 0);
2213 } 2212 }
2214 2213
2215 compile_stack.avail++; 2214 compile_stack.avail++;
2216 2215
2217 fixup_alt_jump = 0; 2216 fixup_alt_jump = 0;
2218 laststart = 0; 2217 laststart = 0;
2219 begalt = b; 2218 begalt = b;
2220 /* If we've reached MAX_REGNUM groups, then this open 2219 /* If we've reached MAX_REGNUM groups, then this open
2221 won't actually generate any code, so we'll have to 2220 won't actually generate any code, so we'll have to
2222 clear pending_exact explicitly. */ 2221 clear pending_exact explicitly. */
2223 pending_exact = 0; 2222 pending_exact = 0;
2224 break; 2223 break;
2225 2224
2226 2225
2227 case ')': 2226 case ')':
2228 if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 2227 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2229 2228
2230 if (COMPILE_STACK_EMPTY) 2229 if (COMPILE_STACK_EMPTY)
2231 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2230 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2232 goto normal_backslash; 2231 goto normal_backslash;
2233 else 2232 else
2234 FREE_STACK_RETURN (REG_ERPAREN); 2233 FREE_STACK_RETURN (REG_ERPAREN);
2235 2234
2236 handle_close: 2235 handle_close:
2237 if (fixup_alt_jump) 2236 if (fixup_alt_jump)
2238 { /* Push a dummy failure point at the end of the 2237 { /* Push a dummy failure point at the end of the
2239 alternative for a possible future 2238 alternative for a possible future
2240 `pop_failure_jump' to pop. See comments at 2239 `pop_failure_jump' to pop. See comments at
2241 `push_dummy_failure' in `re_match_2'. */ 2240 `push_dummy_failure' in `re_match_2'. */
2242 BUF_PUSH (push_dummy_failure); 2241 BUF_PUSH (push_dummy_failure);
2243 2242
2244 /* We allocated space for this jump when we assigned 2243 /* We allocated space for this jump when we assigned
2245 to `fixup_alt_jump', in the `handle_alt' case below. */ 2244 to `fixup_alt_jump', in the `handle_alt' case below. */
2246 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 2245 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2247 } 2246 }
2248 2247
2249 /* See similar code for backslashed left paren above. */ 2248 /* See similar code for backslashed left paren above. */
2250 if (COMPILE_STACK_EMPTY) 2249 if (COMPILE_STACK_EMPTY)
2251 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2250 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2252 goto normal_char; 2251 goto normal_char;
2253 else 2252 else
2254 FREE_STACK_RETURN (REG_ERPAREN); 2253 FREE_STACK_RETURN (REG_ERPAREN);
2255 2254
2256 /* Since we just checked for an empty stack above, this 2255 /* Since we just checked for an empty stack above, this
2257 ``can't happen''. */ 2256 ``can't happen''. */
2258 assert (compile_stack.avail != 0); 2257 assert (compile_stack.avail != 0);
2259 { 2258 {
2260 /* We don't just want to restore into `regnum', because 2259 /* We don't just want to restore into `regnum', because
2261 later groups should continue to be numbered higher, 2260 later groups should continue to be numbered higher,
2262 as in `(ab)c(de)' -- the second group is #2. */ 2261 as in `(ab)c(de)' -- the second group is #2. */
2263 regnum_t this_group_regnum; 2262 regnum_t this_group_regnum;
2264 2263
2265 compile_stack.avail--; 2264 compile_stack.avail--;
2266 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 2265 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2267 fixup_alt_jump 2266 fixup_alt_jump
2268 = COMPILE_STACK_TOP.fixup_alt_jump 2267 = COMPILE_STACK_TOP.fixup_alt_jump
2269 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 2268 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2270 : 0; 2269 : 0;
2271 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 2270 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2272 this_group_regnum = COMPILE_STACK_TOP.regnum; 2271 this_group_regnum = COMPILE_STACK_TOP.regnum;
2273 /* If we've reached MAX_REGNUM groups, then this open 2272 /* If we've reached MAX_REGNUM groups, then this open
2274 won't actually generate any code, so we'll have to 2273 won't actually generate any code, so we'll have to
2275 clear pending_exact explicitly. */ 2274 clear pending_exact explicitly. */
2276 pending_exact = 0; 2275 pending_exact = 0;
2277 2276
2278 /* We're at the end of the group, so now we know how many 2277 /* We're at the end of the group, so now we know how many
2279 groups were inside this one. */ 2278 groups were inside this one. */
2280 if (this_group_regnum <= MAX_REGNUM) 2279 if (this_group_regnum <= MAX_REGNUM)
2281 { 2280 {
2282 unsigned char *inner_group_loc 2281 unsigned char *inner_group_loc
2283 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; 2282 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2284 2283
2285 *inner_group_loc = regnum - this_group_regnum; 2284 *inner_group_loc = regnum - this_group_regnum;
2286 BUF_PUSH_3 (stop_memory, this_group_regnum, 2285 BUF_PUSH_3 (stop_memory, this_group_regnum,
2287 regnum - this_group_regnum); 2286 regnum - this_group_regnum);
2288 } 2287 }
2289 } 2288 }
2290 break; 2289 break;
2291 2290
2292 2291
2293 case '|': /* `\|'. */ 2292 case '|': /* `\|'. */
2294 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 2293 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2295 goto normal_backslash; 2294 goto normal_backslash;
2296 handle_alt: 2295 handle_alt:
2297 if (syntax & RE_LIMITED_OPS) 2296 if (syntax & RE_LIMITED_OPS)
2298 goto normal_char; 2297 goto normal_char;
2299 2298
2300 /* Insert before the previous alternative a jump which 2299 /* Insert before the previous alternative a jump which
2301 jumps to this alternative if the former fails. */ 2300 jumps to this alternative if the former fails. */
2302 GET_BUFFER_SPACE (3); 2301 GET_BUFFER_SPACE (3);
2303 INSERT_JUMP (on_failure_jump, begalt, b + 6); 2302 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2304 pending_exact = 0; 2303 pending_exact = 0;
2305 b += 3; 2304 b += 3;
2306 2305
2307 /* The alternative before this one has a jump after it 2306 /* The alternative before this one has a jump after it
2308 which gets executed if it gets matched. Adjust that 2307 which gets executed if it gets matched. Adjust that
2309 jump so it will jump to this alternative's analogous 2308 jump so it will jump to this alternative's analogous
2310 jump (put in below, which in turn will jump to the next 2309 jump (put in below, which in turn will jump to the next
2311 (if any) alternative's such jump, etc.). The last such 2310 (if any) alternative's such jump, etc.). The last such
2312 jump jumps to the correct final destination. A picture: 2311 jump jumps to the correct final destination. A picture:
2313 _____ _____ 2312 _____ _____
2314 | | | | 2313 | | | |
2315 | v | v 2314 | v | v
2316 a | b | c 2315 a | b | c
2317 2316
2318 If we are at `b', then fixup_alt_jump right now points to a 2317 If we are at `b', then fixup_alt_jump right now points to a
2319 three-byte space after `a'. We'll put in the jump, set 2318 three-byte space after `a'. We'll put in the jump, set
2320 fixup_alt_jump to right after `b', and leave behind three 2319 fixup_alt_jump to right after `b', and leave behind three
2321 bytes which we'll fill in when we get to after `c'. */ 2320 bytes which we'll fill in when we get to after `c'. */
2322 2321
2323 if (fixup_alt_jump) 2322 if (fixup_alt_jump)
2324 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 2323 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2325 2324
2326 /* Mark and leave space for a jump after this alternative, 2325 /* Mark and leave space for a jump after this alternative,
2327 to be filled in later either by next alternative or 2326 to be filled in later either by next alternative or
2328 when know we're at the end of a series of alternatives. */ 2327 when know we're at the end of a series of alternatives. */
2329 fixup_alt_jump = b; 2328 fixup_alt_jump = b;
2330 GET_BUFFER_SPACE (3); 2329 GET_BUFFER_SPACE (3);
2331 b += 3; 2330 b += 3;
2332 2331
2333 laststart = 0; 2332 laststart = 0;
2334 begalt = b; 2333 begalt = b;
2335 break; 2334 break;
2336 2335
2337 2336
2338 case '{': 2337 case '{':
2339 /* If \{ is a literal. */ 2338 /* If \{ is a literal. */
2340 if (!(syntax & RE_INTERVALS) 2339 if (!(syntax & RE_INTERVALS)
2341 /* If we're at `\{' and it's not the open-interval 2340 /* If we're at `\{' and it's not the open-interval
2342 operator. */ 2341 operator. */
2343 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 2342 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2344 || (p - 2 == pattern && p == pend)) 2343 || (p - 2 == pattern && p == pend))
2345 goto normal_backslash; 2344 goto normal_backslash;
2346 2345
2347 handle_interval: 2346 handle_interval:
2348 { 2347 {
2349 /* If got here, then the syntax allows intervals. */ 2348 /* If got here, then the syntax allows intervals. */
2350 2349
2351 /* At least (most) this many matches must be made. */ 2350 /* At least (most) this many matches must be made. */
2352 int lower_bound = -1, upper_bound = -1; 2351 int lower_bound = -1, upper_bound = -1;
2353 2352
2354 beg_interval = p - 1; 2353 beg_interval = p - 1;
2355 2354
2356 if (p == pend) 2355 if (p == pend)
2357 { 2356 {
2358 if (syntax & RE_NO_BK_BRACES) 2357 if (syntax & RE_NO_BK_BRACES)
2359 goto unfetch_interval; 2358 goto unfetch_interval;
2360 else 2359 else
2361 FREE_STACK_RETURN (REG_EBRACE); 2360 FREE_STACK_RETURN (REG_EBRACE);
2362 } 2361 }
2363 2362
2364 GET_UNSIGNED_NUMBER (lower_bound); 2363 GET_UNSIGNED_NUMBER (lower_bound);
2365 2364
2366 if (c == ',') 2365 if (c == ',')
2367 { 2366 {
2368 GET_UNSIGNED_NUMBER (upper_bound); 2367 GET_UNSIGNED_NUMBER (upper_bound);
2369 if (upper_bound < 0) upper_bound = RE_DUP_MAX; 2368 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2370 } 2369 }
2371 else 2370 else
2372 /* Interval such as `{1}' => match exactly once. */ 2371 /* Interval such as `{1}' => match exactly once. */
2373 upper_bound = lower_bound; 2372 upper_bound = lower_bound;
2374 2373
2375 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 2374 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2376 || lower_bound > upper_bound) 2375 || lower_bound > upper_bound)
2377 { 2376 {
2378 if (syntax & RE_NO_BK_BRACES) 2377 if (syntax & RE_NO_BK_BRACES)
2379 goto unfetch_interval; 2378 goto unfetch_interval;
2380 else 2379 else
2381 FREE_STACK_RETURN (REG_BADBR); 2380 FREE_STACK_RETURN (REG_BADBR);
2382 } 2381 }
2383 2382
2384 if (!(syntax & RE_NO_BK_BRACES)) 2383 if (!(syntax & RE_NO_BK_BRACES))
2385 { 2384 {
2386 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 2385 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2387 2386
2388 PATFETCH (c); 2387 PATFETCH (c);
2389 } 2388 }
2390 2389
2391 if (c != '}') 2390 if (c != '}')
2392 { 2391 {
2393 if (syntax & RE_NO_BK_BRACES) 2392 if (syntax & RE_NO_BK_BRACES)
2394 goto unfetch_interval; 2393 goto unfetch_interval;
2395 else 2394 else
2396 FREE_STACK_RETURN (REG_BADBR); 2395 FREE_STACK_RETURN (REG_BADBR);
2397 } 2396 }
2398 2397
2399 /* We just parsed a valid interval. */ 2398 /* We just parsed a valid interval. */
2400 2399
2401 /* If it's invalid to have no preceding re. */ 2400 /* If it's invalid to have no preceding re. */
2402 if (!laststart) 2401 if (!laststart)
2403 { 2402 {
2404 if (syntax & RE_CONTEXT_INVALID_OPS) 2403 if (syntax & RE_CONTEXT_INVALID_OPS)
2405 FREE_STACK_RETURN (REG_BADRPT); 2404 FREE_STACK_RETURN (REG_BADRPT);
2406 else if (syntax & RE_CONTEXT_INDEP_OPS) 2405 else if (syntax & RE_CONTEXT_INDEP_OPS)
2407 laststart = b; 2406 laststart = b;
2408 else 2407 else
2409 goto unfetch_interval; 2408 goto unfetch_interval;
2410 } 2409 }
2411 2410
2412 /* If the upper bound is zero, don't want to succeed at 2411 /* If the upper bound is zero, don't want to succeed at
2413 all; jump from `laststart' to `b + 3', which will be 2412 all; jump from `laststart' to `b + 3', which will be
2414 the end of the buffer after we insert the jump. */ 2413 the end of the buffer after we insert the jump. */
2415 if (upper_bound == 0) 2414 if (upper_bound == 0)
2416 { 2415 {
2417 GET_BUFFER_SPACE (3); 2416 GET_BUFFER_SPACE (3);
2418 INSERT_JUMP (jump, laststart, b + 3); 2417 INSERT_JUMP (jump, laststart, b + 3);
2419 b += 3; 2418 b += 3;
2420 } 2419 }
2421 2420
2422 /* Otherwise, we have a nontrivial interval. When 2421 /* Otherwise, we have a nontrivial interval. When
2423 we're all done, the pattern will look like: 2422 we're all done, the pattern will look like:
2424 set_number_at <jump count> <upper bound> 2423 set_number_at <jump count> <upper bound>
2425 set_number_at <succeed_n count> <lower bound> 2424 set_number_at <succeed_n count> <lower bound>
2426 succeed_n <after jump addr> <succeed_n count> 2425 succeed_n <after jump addr> <succeed_n count>
2427 <body of loop> 2426 <body of loop>
2428 jump_n <succeed_n addr> <jump count> 2427 jump_n <succeed_n addr> <jump count>
2429 (The upper bound and `jump_n' are omitted if 2428 (The upper bound and `jump_n' are omitted if
2430 `upper_bound' is 1, though.) */ 2429 `upper_bound' is 1, though.) */
2431 else 2430 else
2432 { /* If the upper bound is > 1, we need to insert 2431 { /* If the upper bound is > 1, we need to insert
2433 more at the end of the loop. */ 2432 more at the end of the loop. */
2434 unsigned nbytes = 10 + (upper_bound > 1) * 10; 2433 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2435 2434
2436 GET_BUFFER_SPACE (nbytes); 2435 GET_BUFFER_SPACE (nbytes);
2437 2436
2438 /* Initialize lower bound of the `succeed_n', even 2437 /* Initialize lower bound of the `succeed_n', even
2439 though it will be set during matching by its 2438 though it will be set during matching by its
2440 attendant `set_number_at' (inserted next), 2439 attendant `set_number_at' (inserted next),
2441 because `re_compile_fastmap' needs to know. 2440 because `re_compile_fastmap' needs to know.
2442 Jump to the `jump_n' we might insert below. */ 2441 Jump to the `jump_n' we might insert below. */
2443 INSERT_JUMP2 (succeed_n, laststart, 2442 INSERT_JUMP2 (succeed_n, laststart,
2444 b + 5 + (upper_bound > 1) * 5, 2443 b + 5 + (upper_bound > 1) * 5,
2445 lower_bound); 2444 lower_bound);
2446 b += 5; 2445 b += 5;
2447 2446
2448 /* Code to initialize the lower bound. Insert 2447 /* Code to initialize the lower bound. Insert
2449 before the `succeed_n'. The `5' is the last two 2448 before the `succeed_n'. The `5' is the last two
2450 bytes of this `set_number_at', plus 3 bytes of 2449 bytes of this `set_number_at', plus 3 bytes of
2451 the following `succeed_n'. */ 2450 the following `succeed_n'. */
2452 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 2451 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2453 b += 5; 2452 b += 5;
2454 2453
2455 if (upper_bound > 1) 2454 if (upper_bound > 1)
2456 { /* More than one repetition is allowed, so 2455 { /* More than one repetition is allowed, so
2457 append a backward jump to the `succeed_n' 2456 append a backward jump to the `succeed_n'
2458 that starts this interval. 2457 that starts this interval.
2459 2458
2460 When we've reached this during matching, 2459 When we've reached this during matching,
2461 we'll have matched the interval once, so 2460 we'll have matched the interval once, so
2462 jump back only `upper_bound - 1' times. */ 2461 jump back only `upper_bound - 1' times. */
2463 STORE_JUMP2 (jump_n, b, laststart + 5, 2462 STORE_JUMP2 (jump_n, b, laststart + 5,
2464 upper_bound - 1); 2463 upper_bound - 1);
2465 b += 5; 2464 b += 5;
2466 2465
2467 /* The location we want to set is the second 2466 /* The location we want to set is the second
2468 parameter of the `jump_n'; that is `b-2' as 2467 parameter of the `jump_n'; that is `b-2' as
2469 an absolute address. `laststart' will be 2468 an absolute address. `laststart' will be
2470 the `set_number_at' we're about to insert; 2469 the `set_number_at' we're about to insert;
2471 `laststart+3' the number to set, the source 2470 `laststart+3' the number to set, the source
2472 for the relative address. But we are 2471 for the relative address. But we are
2473 inserting into the middle of the pattern -- 2472 inserting into the middle of the pattern --
2474 so everything is getting moved up by 5. 2473 so everything is getting moved up by 5.
2475 Conclusion: (b - 2) - (laststart + 3) + 5, 2474 Conclusion: (b - 2) - (laststart + 3) + 5,
2476 i.e., b - laststart. 2475 i.e., b - laststart.
2477 2476
2478 We insert this at the beginning of the loop 2477 We insert this at the beginning of the loop
2479 so that if we fail during matching, we'll 2478 so that if we fail during matching, we'll
2480 reinitialize the bounds. */ 2479 reinitialize the bounds. */
2481 insert_op2 (set_number_at, laststart, b - laststart, 2480 insert_op2 (set_number_at, laststart, b - laststart,
2482 upper_bound - 1, b); 2481 upper_bound - 1, b);
2483 b += 5; 2482 b += 5;
2484 } 2483 }
2485 } 2484 }
2486 pending_exact = 0; 2485 pending_exact = 0;
2487 beg_interval = NULL; 2486 beg_interval = NULL;
2488 } 2487 }
2489 break; 2488 break;
2490 2489
2491 unfetch_interval: 2490 unfetch_interval:
2492 /* If an invalid interval, match the characters as literals. */ 2491 /* If an invalid interval, match the characters as literals. */
2493 assert (beg_interval); 2492 assert (beg_interval);
2494 p = beg_interval; 2493 p = beg_interval;
2495 beg_interval = NULL; 2494 beg_interval = NULL;
2496 2495
2497 /* normal_char and normal_backslash need `c'. */ 2496 /* normal_char and normal_backslash need `c'. */
2498 PATFETCH (c); 2497 PATFETCH (c);
2499 2498
2500 if (!(syntax & RE_NO_BK_BRACES)) 2499 if (!(syntax & RE_NO_BK_BRACES))
2501 { 2500 {
2502 if (p > pattern && p[-1] == '\\') 2501 if (p > pattern && p[-1] == '\\')
2503 goto normal_backslash; 2502 goto normal_backslash;
2504 } 2503 }
2505 goto normal_char; 2504 goto normal_char;
2506 2505
2507 #ifdef emacs 2506 #ifdef emacs
2508 /* There is no way to specify the before_dot and after_dot 2507 /* There is no way to specify the before_dot and after_dot
2509 operators. rms says this is ok. --karl */ 2508 operators. rms says this is ok. --karl */
2510 case '=': 2509 case '=':
2511 BUF_PUSH (at_dot); 2510 BUF_PUSH (at_dot);
2512 break; 2511 break;
2513 2512
2514 case 's': 2513 case 's':
2515 laststart = b; 2514 laststart = b;
2516 PATFETCH (c); 2515 PATFETCH (c);
2517 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 2516 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2518 break; 2517 break;
2519 2518
2520 case 'S': 2519 case 'S':
2521 laststart = b; 2520 laststart = b;
2522 PATFETCH (c); 2521 PATFETCH (c);
2523 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 2522 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2524 break; 2523 break;
2525 #endif /* emacs */ 2524 #endif /* emacs */
2526 2525
2527 2526
2528 case 'w': 2527 case 'w':
2529 laststart = b; 2528 laststart = b;
2530 BUF_PUSH (wordchar); 2529 BUF_PUSH (wordchar);
2531 break; 2530 break;
2532 2531
2533 2532
2534 case 'W': 2533 case 'W':
2535 laststart = b; 2534 laststart = b;
2536 BUF_PUSH (notwordchar); 2535 BUF_PUSH (notwordchar);
2537 break; 2536 break;
2538 2537
2539 2538
2540 case '<': 2539 case '<':
2541 BUF_PUSH (wordbeg); 2540 BUF_PUSH (wordbeg);
2542 break; 2541 break;
2543 2542
2544 case '>': 2543 case '>':
2545 BUF_PUSH (wordend); 2544 BUF_PUSH (wordend);
2546 break; 2545 break;
2547 2546
2548 case 'b': 2547 case 'b':
2549 BUF_PUSH (wordbound); 2548 BUF_PUSH (wordbound);
2550 break; 2549 break;
2551 2550
2552 case 'B': 2551 case 'B':
2553 BUF_PUSH (notwordbound); 2552 BUF_PUSH (notwordbound);
2554 break; 2553 break;
2555 2554
2556 case '`': 2555 case '`':
2557 BUF_PUSH (begbuf); 2556 BUF_PUSH (begbuf);
2558 break; 2557 break;
2559 2558
2560 case '\'': 2559 case '\'':
2561 BUF_PUSH (endbuf); 2560 BUF_PUSH (endbuf);
2562 break; 2561 break;
2563 2562
2564 case '1': case '2': case '3': case '4': case '5': 2563 case '1': case '2': case '3': case '4': case '5':
2565 case '6': case '7': case '8': case '9': 2564 case '6': case '7': case '8': case '9':
2566 if (syntax & RE_NO_BK_REFS) 2565 if (syntax & RE_NO_BK_REFS)
2567 goto normal_char; 2566 goto normal_char;
2568 2567
2569 c1 = c - '0'; 2568 c1 = c - '0';
2570 2569
2571 if (c1 > regnum) 2570 if (c1 > regnum)
2572 FREE_STACK_RETURN (REG_ESUBREG); 2571 FREE_STACK_RETURN (REG_ESUBREG);
2573 2572
2574 /* Can't back reference to a subexpression if inside of it. */ 2573 /* Can't back reference to a subexpression if inside of it. */
2575 if (group_in_compile_stack (compile_stack, c1)) 2574 if (group_in_compile_stack (compile_stack, c1))
2576 goto normal_char; 2575 goto normal_char;
2577 2576
2578 laststart = b; 2577 laststart = b;
2579 BUF_PUSH_2 (duplicate, c1); 2578 BUF_PUSH_2 (duplicate, c1);
2580 break; 2579 break;
2581 2580
2582 2581
2583 case '+': 2582 case '+':
2584 case '?': 2583 case '?':
2585 if (syntax & RE_BK_PLUS_QM) 2584 if (syntax & RE_BK_PLUS_QM)
2586 goto handle_plus; 2585 goto handle_plus;
2587 else 2586 else
2588 goto normal_backslash; 2587 goto normal_backslash;
2589 2588
2590 default: 2589 default:
2591 normal_backslash: 2590 normal_backslash:
2592 /* You might think it would be useful for \ to mean 2591 /* You might think it would be useful for \ to mean
2593 not to translate; but if we don't translate it 2592 not to translate; but if we don't translate it
2594 it will never match anything. */ 2593 it will never match anything. */
2595 c = TRANSLATE (c); 2594 c = TRANSLATE (c);
2596 goto normal_char; 2595 goto normal_char;
2597 } 2596 }
2598 break; 2597 break;
2599 2598
2600 2599
2601 default: 2600 default:
2602 /* Expects the character in `c'. */ 2601 /* Expects the character in `c'. */
2603 normal_char: 2602 normal_char:
2604 /* If no exactn currently being built. */ 2603 /* If no exactn currently being built. */
2605 if (!pending_exact 2604 if (!pending_exact
2606 2605
2607 /* If last exactn not at current position. */ 2606 /* If last exactn not at current position. */
2608 || pending_exact + *pending_exact + 1 != b 2607 || pending_exact + *pending_exact + 1 != b
2609 2608
2610 /* We have only one byte following the exactn for the count. */ 2609 /* We have only one byte following the exactn for the count. */
2611 || *pending_exact == (1 << BYTEWIDTH) - 1 2610 || *pending_exact == (1 << BYTEWIDTH) - 1
2612 2611
2613 /* If followed by a repetition operator. */ 2612 /* If followed by a repetition operator. */
2614 || *p == '*' || *p == '^' 2613 || *p == '*' || *p == '^'
2615 || ((syntax & RE_BK_PLUS_QM) 2614 || ((syntax & RE_BK_PLUS_QM)
2616 ? *p == '\\' && (p[1] == '+' || p[1] == '?') 2615 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2617 : (*p == '+' || *p == '?')) 2616 : (*p == '+' || *p == '?'))
2618 || ((syntax & RE_INTERVALS) 2617 || ((syntax & RE_INTERVALS)
2619 && ((syntax & RE_NO_BK_BRACES) 2618 && ((syntax & RE_NO_BK_BRACES)
2620 ? *p == '{' 2619 ? *p == '{'
2621 : (p[0] == '\\' && p[1] == '{')))) 2620 : (p[0] == '\\' && p[1] == '{'))))
2622 { 2621 {
2623 /* Start building a new exactn. */ 2622 /* Start building a new exactn. */
2624 2623
2625 laststart = b; 2624 laststart = b;
2626 2625
2627 BUF_PUSH_2 (exactn, 0); 2626 BUF_PUSH_2 (exactn, 0);
2628 pending_exact = b - 1; 2627 pending_exact = b - 1;
2629 } 2628 }
2630 2629
2631 BUF_PUSH (c); 2630 BUF_PUSH (c);
2632 (*pending_exact)++; 2631 (*pending_exact)++;
2633 break; 2632 break;
2634 } /* switch (c) */ 2633 } /* switch (c) */
2635 } /* while p != pend */ 2634 } /* while p != pend */
2636 2635
2637 2636
2638 /* Through the pattern now. */ 2637 /* Through the pattern now. */
2639 2638
2705 return REG_NOERROR; 2704 return REG_NOERROR;
2706 } /* regex_compile */ 2705 } /* regex_compile */
2707 2706
2708 /* Subroutines for `regex_compile'. */ 2707 /* Subroutines for `regex_compile'. */
2709 2708
2710 /* Store OP at LOC followed by two-byte integer parameter ARG. */ 2709 /* Store OP at LOC followed by two-byte integer parameter ARG. */
2711 2710
2712 static void 2711 static void
2713 store_op1 (op, loc, arg) 2712 store_op1 (op, loc, arg)
2714 re_opcode_t op; 2713 re_opcode_t op;
2715 unsigned char *loc; 2714 unsigned char *loc;
2786 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 2785 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2787 2786
2788 return 2787 return
2789 /* After a subexpression? */ 2788 /* After a subexpression? */
2790 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 2789 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2791 /* After an alternative? */ 2790 /* After an alternative? */
2792 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 2791 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2793 } 2792 }
2794 2793
2795 2794
2796 /* The dual of at_begline_loc_p. This one is for $. We assume there is 2795 /* The dual of at_begline_loc_p. This one is for $. We assume there is
2806 const char *next_next = p + 1 < pend ? p + 1 : 0; 2805 const char *next_next = p + 1 < pend ? p + 1 : 0;
2807 2806
2808 return 2807 return
2809 /* Before a subexpression? */ 2808 /* Before a subexpression? */
2810 (syntax & RE_NO_BK_PARENS ? *next == ')' 2809 (syntax & RE_NO_BK_PARENS ? *next == ')'
2811 : next_backslash && next_next && *next_next == ')') 2810 : next_backslash && next_next && *next_next == ')')
2812 /* Before an alternative? */ 2811 /* Before an alternative? */
2813 || (syntax & RE_NO_BK_VBAR ? *next == '|' 2812 || (syntax & RE_NO_BK_VBAR ? *next == '|'
2814 : next_backslash && next_next && *next_next == '|'); 2813 : next_backslash && next_next && *next_next == '|');
2815 } 2814 }
2816 2815
2817 2816
2818 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 2817 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2819 false if it's not. */ 2818 false if it's not. */
2874 2873
2875 /* Have to increment the pointer into the pattern string, so the 2874 /* Have to increment the pointer into the pattern string, so the
2876 caller isn't still at the ending character. */ 2875 caller isn't still at the ending character. */
2877 (*p_ptr)++; 2876 (*p_ptr)++;
2878 2877
2879 /* If the start is after the end, the range is empty. */ 2878 /* If the start is after the end, the range is empty. */
2880 if (range_start > range_end) 2879 if (range_start > range_end)
2881 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 2880 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2882 2881
2883 /* Here we see why `this_char' has to be larger than an `unsigned 2882 /* Here we see why `this_char' has to be larger than an `unsigned
2884 char' -- the range is inclusive, so if `range_end' == 0xff 2883 char' -- the range is inclusive, so if `range_end' == 0xff
2928 /* This holds the pointer to the failure stack, when 2927 /* This holds the pointer to the failure stack, when
2929 it is allocated relocatably. */ 2928 it is allocated relocatably. */
2930 fail_stack_elt_t *failure_stack_ptr; 2929 fail_stack_elt_t *failure_stack_ptr;
2931 2930
2932 /* Assume that each path through the pattern can be null until 2931 /* Assume that each path through the pattern can be null until
2933 proven otherwise. We set this false at the bottom of switch 2932 proven otherwise. We set this false at the bottom of switch
2934 statement, to which we get only if a particular path doesn't 2933 statement, to which we get only if a particular path doesn't
2935 match the empty string. */ 2934 match the empty string. */
2936 boolean path_can_be_null = true; 2935 boolean path_can_be_null = true;
2937 2936
2938 /* We aren't doing a `succeed_n' to begin with. */ 2937 /* We aren't doing a `succeed_n' to begin with. */
2939 boolean succeed_n_p = false; 2938 boolean succeed_n_p = false;
2940 2939
2941 assert (fastmap != NULL && p != NULL); 2940 assert (fastmap != NULL && p != NULL);
2942 2941
2943 INIT_FAIL_STACK (); 2942 INIT_FAIL_STACK ();
2944 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 2943 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
2945 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 2944 bufp->fastmap_accurate = 1; /* It will be when we're done. */
2946 bufp->can_be_null = 0; 2945 bufp->can_be_null = 0;
2947 2946
2948 while (1) 2947 while (1)
2949 { 2948 {
2963 } 2962 }
2964 else 2963 else
2965 break; 2964 break;
2966 } 2965 }
2967 2966
2968 /* We should never be about to go beyond the end of the pattern. */ 2967 /* We should never be about to go beyond the end of the pattern. */
2969 assert (p < pend); 2968 assert (p < pend);
2970 2969
2971 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 2970 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
2972 { 2971 {
2973 2972
2974 /* I guess the idea here is to simply not bother with a fastmap 2973 /* I guess the idea here is to simply not bother with a fastmap
2975 if a backreference is used, since it's too hard to figure out 2974 if a backreference is used, since it's too hard to figure out
2976 the fastmap for the corresponding group. Setting 2975 the fastmap for the corresponding group. Setting
2977 `can_be_null' stops `re_search_2' from using the fastmap, so 2976 `can_be_null' stops `re_search_2' from using the fastmap, so
2978 that is all we do. */ 2977 that is all we do. */
2979 case duplicate: 2978 case duplicate:
2980 bufp->can_be_null = 1; 2979 bufp->can_be_null = 1;
2981 goto done; 2980 goto done;
2982 2981
2983 2982
2984 /* Following are the cases which match a character. These end 2983 /* Following are the cases which match a character. These end
2985 with `break'. */ 2984 with `break'. */
2986 2985
2987 case exactn: 2986 case exactn:
2988 fastmap[p[1]] = 1; 2987 fastmap[p[1]] = 1;
2989 break; 2988 break;
2990 2989
2991 2990
2992 case charset: 2991 case charset:
2993 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 2992 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2994 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 2993 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2995 fastmap[j] = 1; 2994 fastmap[j] = 1;
2996 break; 2995 break;
2997 2996
2998 2997
2999 case charset_not: 2998 case charset_not:
3000 /* Chars beyond end of map must be allowed. */ 2999 /* Chars beyond end of map must be allowed. */
3001 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 3000 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3002 fastmap[j] = 1; 3001 fastmap[j] = 1;
3003 3002
3004 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 3003 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3005 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 3004 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3006 fastmap[j] = 1; 3005 fastmap[j] = 1;
3007 break; 3006 break;
3008 3007
3009 3008
3010 case wordchar: 3009 case wordchar:
3011 for (j = 0; j < (1 << BYTEWIDTH); j++) 3010 for (j = 0; j < (1 << BYTEWIDTH); j++)
3012 if (SYNTAX (j) == Sword) 3011 if (SYNTAX (j) == Sword)
3019 if (SYNTAX (j) != Sword) 3018 if (SYNTAX (j) != Sword)
3020 fastmap[j] = 1; 3019 fastmap[j] = 1;
3021 break; 3020 break;
3022 3021
3023 3022
3024 case anychar: 3023 case anychar:
3025 { 3024 {
3026 int fastmap_newline = fastmap['\n']; 3025 int fastmap_newline = fastmap['\n'];
3027 3026
3028 /* `.' matches anything ... */ 3027 /* `.' matches anything ... */
3029 for (j = 0; j < (1 << BYTEWIDTH); j++) 3028 for (j = 0; j < (1 << BYTEWIDTH); j++)
3030 fastmap[j] = 1; 3029 fastmap[j] = 1;
3031 3030
3032 /* ... except perhaps newline. */ 3031 /* ... except perhaps newline. */
3033 if (!(bufp->syntax & RE_DOT_NEWLINE)) 3032 if (!(bufp->syntax & RE_DOT_NEWLINE))
3034 fastmap['\n'] = fastmap_newline; 3033 fastmap['\n'] = fastmap_newline;
3035 3034
3036 /* Return if we have already set `can_be_null'; if we have, 3035 /* Return if we have already set `can_be_null'; if we have,
3037 then the fastmap is irrelevant. Something's wrong here. */ 3036 then the fastmap is irrelevant. Something's wrong here. */
3038 else if (bufp->can_be_null) 3037 else if (bufp->can_be_null)
3039 goto done; 3038 goto done;
3040 3039
3041 /* Otherwise, have to check alternative paths. */ 3040 /* Otherwise, have to check alternative paths. */
3042 break; 3041 break;
3043 } 3042 }
3044 3043
3045 #ifdef emacs 3044 #ifdef emacs
3046 case syntaxspec: 3045 case syntaxspec:
3047 k = *p++; 3046 k = *p++;
3048 for (j = 0; j < (1 << BYTEWIDTH); j++) 3047 for (j = 0; j < (1 << BYTEWIDTH); j++)
3049 if (SYNTAX (j) == (enum syntaxcode) k) 3048 if (SYNTAX (j) == (enum syntaxcode) k)
3050 fastmap[j] = 1; 3049 fastmap[j] = 1;
3051 break; 3050 break;
3058 fastmap[j] = 1; 3057 fastmap[j] = 1;
3059 break; 3058 break;
3060 3059
3061 3060
3062 /* All cases after this match the empty string. These end with 3061 /* All cases after this match the empty string. These end with
3063 `continue'. */ 3062 `continue'. */
3064 3063
3065 3064
3066 case before_dot: 3065 case before_dot:
3067 case at_dot: 3066 case at_dot:
3068 case after_dot: 3067 case after_dot:
3069 continue; 3068 continue;
3070 #endif /* emacs */ 3069 #endif /* emacs */
3071 3070
3072 3071
3073 case no_op: 3072 case no_op:
3074 case begline: 3073 case begline:
3075 case endline: 3074 case endline:
3076 case begbuf: 3075 case begbuf:
3077 case endbuf: 3076 case endbuf:
3078 case wordbound: 3077 case wordbound:
3079 case notwordbound: 3078 case notwordbound:
3080 case wordbeg: 3079 case wordbeg:
3081 case wordend: 3080 case wordend:
3082 case push_dummy_failure: 3081 case push_dummy_failure:
3083 continue; 3082 continue;
3084 3083
3085 3084
3086 case jump_n: 3085 case jump_n:
3087 case pop_failure_jump: 3086 case pop_failure_jump:
3088 case maybe_pop_jump: 3087 case maybe_pop_jump:
3089 case jump: 3088 case jump:
3090 case jump_past_alt: 3089 case jump_past_alt:
3091 case dummy_failure_jump: 3090 case dummy_failure_jump:
3092 EXTRACT_NUMBER_AND_INCR (j, p); 3091 EXTRACT_NUMBER_AND_INCR (j, p);
3093 p += j; 3092 p += j;
3094 if (j > 0) 3093 if (j > 0)
3095 continue; 3094 continue;
3096 3095
3097 /* Jump backward implies we just went through the body of a 3096 /* Jump backward implies we just went through the body of a
3098 loop and matched nothing. Opcode jumped to should be 3097 loop and matched nothing. Opcode jumped to should be
3099 `on_failure_jump' or `succeed_n'. Just treat it like an 3098 `on_failure_jump' or `succeed_n'. Just treat it like an
3100 ordinary jump. For a * loop, it has pushed its failure 3099 ordinary jump. For a * loop, it has pushed its failure
3101 point already; if so, discard that as redundant. */ 3100 point already; if so, discard that as redundant. */
3102 if ((re_opcode_t) *p != on_failure_jump 3101 if ((re_opcode_t) *p != on_failure_jump
3103 && (re_opcode_t) *p != succeed_n) 3102 && (re_opcode_t) *p != succeed_n)
3104 continue; 3103 continue;
3105 3104
3106 p++; 3105 p++;
3107 EXTRACT_NUMBER_AND_INCR (j, p); 3106 EXTRACT_NUMBER_AND_INCR (j, p);
3108 p += j; 3107 p += j;
3109 3108
3110 /* If what's on the stack is where we are now, pop it. */ 3109 /* If what's on the stack is where we are now, pop it. */
3111 if (!FAIL_STACK_EMPTY () 3110 if (!FAIL_STACK_EMPTY ()
3112 && fail_stack.stack[fail_stack.avail - 1].pointer == p) 3111 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3113 fail_stack.avail--; 3112 fail_stack.avail--;
3114 3113
3115 continue; 3114 continue;
3116 3115
3117 3116
3118 case on_failure_jump: 3117 case on_failure_jump:
3119 case on_failure_keep_string_jump: 3118 case on_failure_keep_string_jump:
3120 handle_on_failure_jump: 3119 handle_on_failure_jump:
3121 EXTRACT_NUMBER_AND_INCR (j, p); 3120 EXTRACT_NUMBER_AND_INCR (j, p);
3122 3121
3123 /* For some patterns, e.g., `(a?)?', `p+j' here points to the 3122 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3124 end of the pattern. We don't want to push such a point, 3123 end of the pattern. We don't want to push such a point,
3125 since when we restore it above, entering the switch will 3124 since when we restore it above, entering the switch will
3126 increment `p' past the end of the pattern. We don't need 3125 increment `p' past the end of the pattern. We don't need
3127 to push such a point since we obviously won't find any more 3126 to push such a point since we obviously won't find any more
3128 fastmap entries beyond `pend'. Such a pattern can match 3127 fastmap entries beyond `pend'. Such a pattern can match
3129 the null string, though. */ 3128 the null string, though. */
3130 if (p + j < pend) 3129 if (p + j < pend)
3131 { 3130 {
3132 if (!PUSH_PATTERN_OP (p + j, fail_stack)) 3131 if (!PUSH_PATTERN_OP (p + j, fail_stack))
3133 { 3132 {
3134 RESET_FAIL_STACK (); 3133 RESET_FAIL_STACK ();
3135 return -2; 3134 return -2;
3136 } 3135 }
3137 }
3138 else
3139 bufp->can_be_null = 1;
3140
3141 if (succeed_n_p)
3142 {
3143 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3144 succeed_n_p = false;
3145 } 3136 }
3146 3137 else
3147 continue; 3138 bufp->can_be_null = 1;
3139
3140 if (succeed_n_p)
3141 {
3142 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3143 succeed_n_p = false;
3144 }
3145
3146 continue;
3148 3147
3149 3148
3150 case succeed_n: 3149 case succeed_n:
3151 /* Get to the number of times to succeed. */ 3150 /* Get to the number of times to succeed. */
3152 p += 2; 3151 p += 2;
3153 3152
3154 /* Increment p past the n for when k != 0. */ 3153 /* Increment p past the n for when k != 0. */
3155 EXTRACT_NUMBER_AND_INCR (k, p); 3154 EXTRACT_NUMBER_AND_INCR (k, p);
3156 if (k == 0) 3155 if (k == 0)
3157 { 3156 {
3158 p -= 4; 3157 p -= 4;
3159 succeed_n_p = true; /* Spaghetti code alert. */ 3158 succeed_n_p = true; /* Spaghetti code alert. */
3160 goto handle_on_failure_jump; 3159 goto handle_on_failure_jump;
3161 } 3160 }
3162 continue; 3161 continue;
3163 3162
3164 3163
3165 case set_number_at: 3164 case set_number_at:
3166 p += 4; 3165 p += 4;
3167 continue; 3166 continue;
3168 3167
3169 3168
3170 case start_memory: 3169 case start_memory:
3171 case stop_memory: 3170 case stop_memory:
3172 p += 2; 3171 p += 2;
3173 continue; 3172 continue;
3174 3173
3175 3174
3176 default: 3175 default:
3177 abort (); /* We have listed all the cases. */ 3176 abort (); /* We have listed all the cases. */
3178 } /* switch *p++ */ 3177 } /* switch *p++ */
3179 3178
3180 /* Getting here means we have found the possible starting 3179 /* Getting here means we have found the possible starting
3181 characters for one path of the pattern -- and that the empty 3180 characters for one path of the pattern -- and that the empty
3182 string does not match. We need not follow this path further. 3181 string does not match. We need not follow this path further.
3183 Instead, look at the next alternative (remembered on the 3182 Instead, look at the next alternative (remembered on the
3184 stack), or quit if no more. The test at the top of the loop 3183 stack), or quit if no more. The test at the top of the loop
3185 does these things. */ 3184 does these things. */
3186 path_can_be_null = false; 3185 path_can_be_null = false;
3187 p = pend; 3186 p = pend;
3188 } /* while p */ 3187 } /* while p */
3189 3188
3190 /* Set `can_be_null' for the last path (also the first path, if the 3189 /* Set `can_be_null' for the last path (also the first path, if the
3191 pattern is empty). */ 3190 pattern is empty). */
3192 bufp->can_be_null |= path_can_be_null; 3191 bufp->can_be_null |= path_can_be_null;
3193 3192
3194 done: 3193 done:
3195 RESET_FAIL_STACK (); 3194 RESET_FAIL_STACK ();
3196 return 0; 3195 return 0;
3229 regs->num_regs = 0; 3228 regs->num_regs = 0;
3230 regs->start = regs->end = (regoff_t *) 0; 3229 regs->start = regs->end = (regoff_t *) 0;
3231 } 3230 }
3232 } 3231 }
3233 3232
3234 /* Searching routines. */ 3233 /* Searching routines. */
3235 3234
3236 /* Like re_search_2, below, but only one string is specified, and 3235 /* Like re_search_2, below, but only one string is specified, and
3237 doesn't let you say where to stop matching. */ 3236 doesn't let you say where to stop matching. */
3238 3237
3239 int 3238 int
3343 == '\n'))) 3342 == '\n')))
3344 goto advance; 3343 goto advance;
3345 } 3344 }
3346 3345
3347 /* If a fastmap is supplied, skip quickly over characters that 3346 /* If a fastmap is supplied, skip quickly over characters that
3348 cannot be the start of a match. If the pattern can match the 3347 cannot be the start of a match. If the pattern can match the
3349 null string, however, we don't need to skip characters; we want 3348 null string, however, we don't need to skip characters; we want
3350 the first null string. */ 3349 the first null string. */
3351 if (fastmap && startpos < total_size && !bufp->can_be_null) 3350 if (fastmap && startpos < total_size && !bufp->can_be_null)
3352 { 3351 {
3353 if (range > 0) /* Searching forwards. */ 3352 if (range > 0) /* Searching forwards. */
3354 { 3353 {
3355 register const char *d; 3354 register const char *d;
3356 register int lim = 0; 3355 register int lim = 0;
3357 int irange = range; 3356 int irange = range;
3358 3357
3359 if (startpos < size1 && startpos + range >= size1) 3358 if (startpos < size1 && startpos + range >= size1)
3360 lim = range - (size1 - startpos); 3359 lim = range - (size1 - startpos);
3361 3360
3362 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 3361 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3363 3362
3364 /* Written out as an if-else to avoid testing `translate' 3363 /* Written out as an if-else to avoid testing `translate'
3365 inside the loop. */ 3364 inside the loop. */
3366 if (translate) 3365 if (translate)
3367 while (range > lim 3366 while (range > lim
3368 && !fastmap[(unsigned char) 3367 && !fastmap[(unsigned char)
3369 translate[(unsigned char) *d++]]) 3368 translate[(unsigned char) *d++]])
3370 range--; 3369 range--;
3371 else 3370 else
3372 while (range > lim && !fastmap[(unsigned char) *d++]) 3371 while (range > lim && !fastmap[(unsigned char) *d++])
3373 range--; 3372 range--;
3374 3373
3375 startpos += irange - range; 3374 startpos += irange - range;
3376 } 3375 }
3377 else /* Searching backwards. */ 3376 else /* Searching backwards. */
3378 { 3377 {
3379 register char c = (size1 == 0 || startpos >= size1 3378 register char c = (size1 == 0 || startpos >= size1
3380 ? string2[startpos - size1] 3379 ? string2[startpos - size1]
3381 : string1[startpos]); 3380 : string1[startpos]);
3382 3381
3383 if (!fastmap[(unsigned char) TRANSLATE (c)]) 3382 if (!fastmap[(unsigned char) TRANSLATE (c)])
3384 goto advance; 3383 goto advance;
3385 } 3384 }
3386 } 3385 }
3387 3386
3388 /* If can't match the null string, and that's all we have left, fail. */ 3387 /* If can't match the null string, and that's all we have left, fail. */
3389 if (range >= 0 && startpos == total_size && fastmap 3388 if (range >= 0 && startpos == total_size && fastmap
3390 && !bufp->can_be_null) 3389 && !bufp->can_be_null)
3391 return -1; 3390 return -1;
3392 3391
3393 val = re_match_2_internal (bufp, string1, size1, string2, size2, 3392 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3394 startpos, regs, stop); 3393 startpos, regs, stop);
3395 #ifndef REGEX_MALLOC 3394 #ifndef REGEX_MALLOC
3404 if (val == -2) 3403 if (val == -2)
3405 return -2; 3404 return -2;
3406 3405
3407 advance: 3406 advance:
3408 if (!range) 3407 if (!range)
3409 break; 3408 break;
3410 else if (range > 0) 3409 else if (range > 0)
3411 { 3410 {
3412 range--; 3411 range--;
3413 startpos++; 3412 startpos++;
3414 } 3413 }
3415 else 3414 else
3416 { 3415 {
3417 range++; 3416 range++;
3418 startpos--; 3417 startpos--;
3419 } 3418 }
3420 } 3419 }
3421 return -1; 3420 return -1;
3422 } /* re_search_2 */ 3421 } /* re_search_2 */
3423 3422
3424 /* Declarations and macros for re_match_2. */ 3423 /* Declarations and macros for re_match_2. */
3425 3424
3426 static int bcmp_translate (); 3425 static int bcmp_translate ();
3427 static boolean alt_match_null_string_p (), 3426 static boolean alt_match_null_string_p (),
3428 common_op_match_null_string_p (), 3427 common_op_match_null_string_p (),
3429 group_match_null_string_p (); 3428 group_match_null_string_p ();
3430 3429
3431 /* This converts PTR, a pointer into one of the search strings `string1' 3430 /* This converts PTR, a pointer into one of the search strings `string1'
3432 and `string2' into an offset from the beginning of that string. */ 3431 and `string2' into an offset from the beginning of that string. */
3433 #define POINTER_TO_OFFSET(ptr) \ 3432 #define POINTER_TO_OFFSET(ptr) \
3434 (FIRST_STRING_P (ptr) \ 3433 (FIRST_STRING_P (ptr) \
3440 #define MATCHING_IN_FIRST_STRING (dend == end_match_1) 3439 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3441 3440
3442 /* Call before fetching a character with *d. This switches over to 3441 /* Call before fetching a character with *d. This switches over to
3443 string2 if necessary. */ 3442 string2 if necessary. */
3444 #define PREFETCH() \ 3443 #define PREFETCH() \
3445 while (d == dend) \ 3444 while (d == dend) \
3446 { \ 3445 { \
3447 /* End of string2 => fail. */ \ 3446 /* End of string2 => fail. */ \
3448 if (dend == end_match_2) \ 3447 if (dend == end_match_2) \
3449 goto fail; \ 3448 goto fail; \
3450 /* End of string1 => advance to string2. */ \ 3449 /* End of string1 => advance to string2. */ \
3451 d = string2; \ 3450 d = string2; \
3452 dend = end_match_2; \ 3451 dend = end_match_2; \
3453 } 3452 }
3454 3453
3455 3454
3456 /* Test if at very beginning or at very end of the virtual concatenation 3455 /* Test if at very beginning or at very end of the virtual concatenation
3457 of `string1' and `string2'. If only one string, it's `string2'. */ 3456 of `string1' and `string2'. If only one string, it's `string2'. */
3458 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 3457 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3459 #define AT_STRINGS_END(d) ((d) == end2) 3458 #define AT_STRINGS_END(d) ((d) == end2)
3460 3459
3461 3460
3462 /* Test if D points to a character which is word-constituent. We have 3461 /* Test if D points to a character which is word-constituent. We have
3463 two special cases to check for: if past the end of string1, look at 3462 two special cases to check for: if past the end of string1, look at
3464 the first character in string2; and if before the beginning of 3463 the first character in string2; and if before the beginning of
3465 string2, look at the last character in string1. */ 3464 string2, look at the last character in string1. */
3466 #define WORDCHAR_P(d) \ 3465 #define WORDCHAR_P(d) \
3467 (SYNTAX ((d) == end1 ? *string2 \ 3466 (SYNTAX ((d) == end1 ? *string2 \
3468 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 3467 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3469 == Sword) 3468 == Sword)
3470 3469
3471 /* Disabled due to a compiler bug -- see comment at case wordbound */ 3470 /* Disabled due to a compiler bug -- see comment at case wordbound */
3472 #if 0 3471 #if 0
3473 /* Test if the character before D and the one at D differ with respect 3472 /* Test if the character before D and the one at D differ with respect
3495 } while (0) 3494 } while (0)
3496 #else 3495 #else
3497 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 3496 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
3498 #endif /* not MATCH_MAY_ALLOCATE */ 3497 #endif /* not MATCH_MAY_ALLOCATE */
3499 3498
3500 /* These values must meet several constraints. They must not be valid 3499 /* These values must meet several constraints. They must not be valid
3501 register values; since we have a limit of 255 registers (because 3500 register values; since we have a limit of 255 registers (because
3502 we use only one byte in the pattern for the register number), we can 3501 we use only one byte in the pattern for the register number), we can
3503 use numbers larger than 255. They must differ by 1, because of 3502 use numbers larger than 255. They must differ by 1, because of
3504 NUM_FAILURE_ITEMS above. And the value for the lowest register must 3503 NUM_FAILURE_ITEMS above. And the value for the lowest register must
3505 be larger than the value for the highest register, so we do not try 3504 be larger than the value for the highest register, so we do not try
3506 to actually save any registers when none are active. */ 3505 to actually save any registers when none are active. */
3507 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 3506 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3508 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 3507 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3509 3508
3510 /* Matching routines. */ 3509 /* Matching routines. */
3511 3510
3512 #ifndef emacs /* Emacs never uses this. */ 3511 #ifndef emacs /* Emacs never uses this. */
3513 /* re_match is like re_match_2 except it takes only a single string. */ 3512 /* re_match is like re_match_2 except it takes only a single string. */
3514 3513
3515 int 3514 int
3516 re_match (bufp, string, size, pos, regs) 3515 re_match (bufp, string, size, pos, regs)
3517 struct re_pattern_buffer *bufp; 3516 struct re_pattern_buffer *bufp;
3531 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 3530 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3532 and SIZE2, respectively). We start matching at POS, and stop 3531 and SIZE2, respectively). We start matching at POS, and stop
3533 matching at STOP. 3532 matching at STOP.
3534 3533
3535 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 3534 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3536 store offsets for the substring each group matched in REGS. See the 3535 store offsets for the substring each group matched in REGS. See the
3537 documentation for exactly how many groups we fill. 3536 documentation for exactly how many groups we fill.
3538 3537
3539 We return -1 if no match, -2 if an internal error (such as the 3538 We return -1 if no match, -2 if an internal error (such as the
3540 failure stack overflowing). Otherwise, we return the length of the 3539 failure stack overflowing). Otherwise, we return the length of the
3541 matched substring. */ 3540 matched substring. */
3542 3541
3543 int 3542 int
3544 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 3543 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3545 struct re_pattern_buffer *bufp; 3544 struct re_pattern_buffer *bufp;
3554 alloca (0); 3553 alloca (0);
3555 return result; 3554 return result;
3556 } 3555 }
3557 3556
3558 /* This is a separate function so that we can force an alloca cleanup 3557 /* This is a separate function so that we can force an alloca cleanup
3559 afterwards. */ 3558 afterwards. */
3560 static int 3559 static int
3561 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 3560 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3562 struct re_pattern_buffer *bufp; 3561 struct re_pattern_buffer *bufp;
3563 const char *string1, *string2; 3562 const char *string1, *string2;
3564 int size1, size2; 3563 int size1, size2;
3572 3571
3573 /* Just past the end of the corresponding string. */ 3572 /* Just past the end of the corresponding string. */
3574 const char *end1, *end2; 3573 const char *end1, *end2;
3575 3574
3576 /* Pointers into string1 and string2, just past the last characters in 3575 /* Pointers into string1 and string2, just past the last characters in
3577 each to consider matching. */ 3576 each to consider matching. */
3578 const char *end_match_1, *end_match_2; 3577 const char *end_match_1, *end_match_2;
3579 3578
3580 /* Where we are in the data, and the end of the current string. */ 3579 /* Where we are in the data, and the end of the current string. */
3581 const char *d, *dend; 3580 const char *d, *dend;
3582 3581
3586 3585
3587 /* Mark the opcode just after a start_memory, so we can test for an 3586 /* Mark the opcode just after a start_memory, so we can test for an
3588 empty subpattern when we get to the stop_memory. */ 3587 empty subpattern when we get to the stop_memory. */
3589 unsigned char *just_past_start_mem = 0; 3588 unsigned char *just_past_start_mem = 0;
3590 3589
3591 /* We use this to map every character in the string. */ 3590 /* We use this to map every character in the string. */
3592 RE_TRANSLATE_TYPE translate = bufp->translate; 3591 RE_TRANSLATE_TYPE translate = bufp->translate;
3593 3592
3594 /* Failure point stack. Each place that can handle a failure further 3593 /* Failure point stack. Each place that can handle a failure further
3595 down the line pushes a failure point on this stack. It consists of 3594 down the line pushes a failure point on this stack. It consists of
3596 restart, regend, and reg_info for all registers corresponding to 3595 restart, regend, and reg_info for all registers corresponding to
3597 the subexpressions we're currently inside, plus the number of such 3596 the subexpressions we're currently inside, plus the number of such
3598 registers, and, finally, two char *'s. The first char * is where 3597 registers, and, finally, two char *'s. The first char * is where
3599 to resume scanning the pattern; the second one is where to resume 3598 to resume scanning the pattern; the second one is where to resume
3600 scanning the strings. If the latter is zero, the failure point is 3599 scanning the strings. If the latter is zero, the failure point is
3601 a ``dummy''; if a failure happens and the failure point is a dummy, 3600 a ``dummy''; if a failure happens and the failure point is a dummy,
3602 it gets discarded and the next next one is tried. */ 3601 it gets discarded and the next next one is tried. */
3603 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 3602 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
3604 fail_stack_type fail_stack; 3603 fail_stack_type fail_stack;
3605 #endif 3604 #endif
3606 #ifdef DEBUG 3605 #ifdef DEBUG
3607 static unsigned failure_id = 0; 3606 static unsigned failure_id = 0;
3608 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 3607 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3611 /* This holds the pointer to the failure stack, when 3610 /* This holds the pointer to the failure stack, when
3612 it is allocated relocatably. */ 3611 it is allocated relocatably. */
3613 fail_stack_elt_t *failure_stack_ptr; 3612 fail_stack_elt_t *failure_stack_ptr;
3614 3613
3615 /* We fill all the registers internally, independent of what we 3614 /* We fill all the registers internally, independent of what we
3616 return, for use in backreferences. The number here includes 3615 return, for use in backreferences. The number here includes
3617 an element for register zero. */ 3616 an element for register zero. */
3618 unsigned num_regs = bufp->re_nsub + 1; 3617 unsigned num_regs = bufp->re_nsub + 1;
3619 3618
3620 /* The currently active registers. */ 3619 /* The currently active registers. */
3621 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; 3620 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3644 /* The is_active field of reg_info helps us keep track of which (possibly 3643 /* The is_active field of reg_info helps us keep track of which (possibly
3645 nested) subexpressions we are currently in. The matched_something 3644 nested) subexpressions we are currently in. The matched_something
3646 field of reg_info[reg_num] helps us tell whether or not we have 3645 field of reg_info[reg_num] helps us tell whether or not we have
3647 matched any of the pattern so far this time through the reg_num-th 3646 matched any of the pattern so far this time through the reg_num-th
3648 subexpression. These two fields get reset each time through any 3647 subexpression. These two fields get reset each time through any
3649 loop their register is in. */ 3648 loop their register is in. */
3650 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 3649 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
3651 register_info_type *reg_info; 3650 register_info_type *reg_info;
3652 #endif 3651 #endif
3653 3652
3654 /* The following record the register info as found in the above 3653 /* The following record the register info as found in the above
3655 variables when we find a match better than any we've seen before. 3654 variables when we find a match better than any we've seen before.
3660 const char **best_regstart, **best_regend; 3659 const char **best_regstart, **best_regend;
3661 #endif 3660 #endif
3662 3661
3663 /* Logically, this is `best_regend[0]'. But we don't want to have to 3662 /* Logically, this is `best_regend[0]'. But we don't want to have to
3664 allocate space for that if we're not allocating space for anything 3663 allocate space for that if we're not allocating space for anything
3665 else (see below). Also, we never need info about register 0 for 3664 else (see below). Also, we never need info about register 0 for
3666 any of the other register vectors, and it seems rather a kludge to 3665 any of the other register vectors, and it seems rather a kludge to
3667 treat `best_regend' differently than the rest. So we keep track of 3666 treat `best_regend' differently than the rest. So we keep track of
3668 the end of the best match so far in a separate variable. We 3667 the end of the best match so far in a separate variable. We
3669 initialize this to NULL so that when we backtrack the first time 3668 initialize this to NULL so that when we backtrack the first time
3670 and need to test it, it's not garbage. */ 3669 and need to test it, it's not garbage. */
3705 reg_info = REGEX_TALLOC (num_regs, register_info_type); 3704 reg_info = REGEX_TALLOC (num_regs, register_info_type);
3706 reg_dummy = REGEX_TALLOC (num_regs, const char *); 3705 reg_dummy = REGEX_TALLOC (num_regs, const char *);
3707 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 3706 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3708 3707
3709 if (!(regstart && regend && old_regstart && old_regend && reg_info 3708 if (!(regstart && regend && old_regstart && old_regend && reg_info
3710 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 3709 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3711 { 3710 {
3712 FREE_VARIABLES (); 3711 FREE_VARIABLES ();
3713 return -2; 3712 return -2;
3714 } 3713 }
3715 } 3714 }
3716 else 3715 else
3717 { 3716 {
3718 /* We must initialize all our variables to NULL, so that 3717 /* We must initialize all our variables to NULL, so that
3719 `FREE_VARIABLES' doesn't try to free them. */ 3718 `FREE_VARIABLES' doesn't try to free them. */
3720 regstart = regend = old_regstart = old_regend = best_regstart 3719 regstart = regend = old_regstart = old_regend = best_regstart
3721 = best_regend = reg_dummy = NULL; 3720 = best_regend = reg_dummy = NULL;
3722 reg_info = reg_info_dummy = (register_info_type *) NULL; 3721 reg_info = reg_info_dummy = (register_info_type *) NULL;
3723 } 3722 }
3724 #endif /* MATCH_MAY_ALLOCATE */ 3723 #endif /* MATCH_MAY_ALLOCATE */
3725 3724
3726 /* The starting position is bogus. */ 3725 /* The starting position is bogus. */
3734 start_memory/stop_memory has been seen for. Also initialize the 3733 start_memory/stop_memory has been seen for. Also initialize the
3735 register information struct. */ 3734 register information struct. */
3736 for (mcnt = 1; mcnt < num_regs; mcnt++) 3735 for (mcnt = 1; mcnt < num_regs; mcnt++)
3737 { 3736 {
3738 regstart[mcnt] = regend[mcnt] 3737 regstart[mcnt] = regend[mcnt]
3739 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 3738 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3740 3739
3741 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 3740 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3742 IS_ACTIVE (reg_info[mcnt]) = 0; 3741 IS_ACTIVE (reg_info[mcnt]) = 0;
3743 MATCHED_SOMETHING (reg_info[mcnt]) = 0; 3742 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3744 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 3743 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3745 } 3744 }
3746 3745
3747 /* We move `string1' into `string2' if the latter's empty -- but not if 3746 /* We move `string1' into `string2' if the latter's empty -- but not if
3748 `string1' is null. */ 3747 `string1' is null. */
3749 if (size2 == 0 && string1 != NULL) 3748 if (size2 == 0 && string1 != NULL)
3750 { 3749 {
3751 string2 = string1; 3750 string2 = string1;
3752 size2 = size1; 3751 size2 = size1;
3753 string1 = 0; 3752 string1 = 0;
3789 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 3788 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3790 DEBUG_PRINT1 ("The string to match is: `"); 3789 DEBUG_PRINT1 ("The string to match is: `");
3791 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 3790 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3792 DEBUG_PRINT1 ("'\n"); 3791 DEBUG_PRINT1 ("'\n");
3793 3792
3794 /* This loops over pattern commands. It exits by returning from the 3793 /* This loops over pattern commands. It exits by returning from the
3795 function if the match is complete, or it drops through if the match 3794 function if the match is complete, or it drops through if the match
3796 fails at this starting point in the input data. */ 3795 fails at this starting point in the input data. */
3797 for (;;) 3796 for (;;)
3798 { 3797 {
3799 DEBUG_PRINT2 ("\n0x%x: ", p); 3798 DEBUG_PRINT2 ("\n0x%x: ", p);
3800 3799
3801 if (p == pend) 3800 if (p == pend)
3802 { /* End of pattern means we might have succeeded. */ 3801 { /* End of pattern means we might have succeeded. */
3803 DEBUG_PRINT1 ("end of pattern ... "); 3802 DEBUG_PRINT1 ("end of pattern ... ");
3804 3803
3805 /* If we haven't matched the entire string, and we want the 3804 /* If we haven't matched the entire string, and we want the
3806 longest match, try backtracking. */ 3805 longest match, try backtracking. */
3807 if (d != end_match_2) 3806 if (d != end_match_2)
3808 { 3807 {
3809 /* 1 if this match ends in the same string (string1 or string2) 3808 /* 1 if this match ends in the same string (string1 or string2)
3810 as the best previous match. */ 3809 as the best previous match. */
3811 boolean same_str_p = (FIRST_STRING_P (match_end) 3810 boolean same_str_p = (FIRST_STRING_P (match_end)
3812 == MATCHING_IN_FIRST_STRING); 3811 == MATCHING_IN_FIRST_STRING);
3813 /* 1 if this match is the best seen so far. */ 3812 /* 1 if this match is the best seen so far. */
3814 boolean best_match_p; 3813 boolean best_match_p;
3815 3814
3816 /* AIX compiler got confused when this was combined 3815 /* AIX compiler got confused when this was combined
3817 with the previous declaration. */ 3816 with the previous declaration. */
3818 if (same_str_p) 3817 if (same_str_p)
3819 best_match_p = d > match_end; 3818 best_match_p = d > match_end;
3820 else 3819 else
3821 best_match_p = !MATCHING_IN_FIRST_STRING; 3820 best_match_p = !MATCHING_IN_FIRST_STRING;
3822 3821
3823 DEBUG_PRINT1 ("backtracking.\n"); 3822 DEBUG_PRINT1 ("backtracking.\n");
3824 3823
3825 if (!FAIL_STACK_EMPTY ()) 3824 if (!FAIL_STACK_EMPTY ())
3826 { /* More failure points to try. */ 3825 { /* More failure points to try. */
3827 3826
3828 /* If exceeds best match so far, save it. */ 3827 /* If exceeds best match so far, save it. */
3829 if (!best_regs_set || best_match_p) 3828 if (!best_regs_set || best_match_p)
3830 { 3829 {
3831 best_regs_set = true; 3830 best_regs_set = true;
3832 match_end = d; 3831 match_end = d;
3833 3832
3834 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 3833 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3835 3834
3836 for (mcnt = 1; mcnt < num_regs; mcnt++) 3835 for (mcnt = 1; mcnt < num_regs; mcnt++)
3837 { 3836 {
3838 best_regstart[mcnt] = regstart[mcnt]; 3837 best_regstart[mcnt] = regstart[mcnt];
3839 best_regend[mcnt] = regend[mcnt]; 3838 best_regend[mcnt] = regend[mcnt];
3840 } 3839 }
3841 } 3840 }
3842 goto fail; 3841 goto fail;
3843 } 3842 }
3844 3843
3845 /* If no failure points, don't restore garbage. And if 3844 /* If no failure points, don't restore garbage. And if
3846 last match is real best match, don't restore second 3845 last match is real best match, don't restore second
3847 best one. */ 3846 best one. */
3848 else if (best_regs_set && !best_match_p) 3847 else if (best_regs_set && !best_match_p)
3849 { 3848 {
3850 restore_best_regs: 3849 restore_best_regs:
3851 /* Restore best match. It may happen that `dend == 3850 /* Restore best match. It may happen that `dend ==
3852 end_match_1' while the restored d is in string2. 3851 end_match_1' while the restored d is in string2.
3853 For example, the pattern `x.*y.*z' against the 3852 For example, the pattern `x.*y.*z' against the
3854 strings `x-' and `y-z-', if the two strings are 3853 strings `x-' and `y-z-', if the two strings are
3855 not consecutive in memory. */ 3854 not consecutive in memory. */
3856 DEBUG_PRINT1 ("Restoring best registers.\n"); 3855 DEBUG_PRINT1 ("Restoring best registers.\n");
3857 3856
3858 d = match_end; 3857 d = match_end;
3859 dend = ((d >= string1 && d <= end1) 3858 dend = ((d >= string1 && d <= end1)
3860 ? end_match_1 : end_match_2); 3859 ? end_match_1 : end_match_2);
3861 3860
3862 for (mcnt = 1; mcnt < num_regs; mcnt++) 3861 for (mcnt = 1; mcnt < num_regs; mcnt++)
3863 { 3862 {
3864 regstart[mcnt] = best_regstart[mcnt]; 3863 regstart[mcnt] = best_regstart[mcnt];
3865 regend[mcnt] = best_regend[mcnt]; 3864 regend[mcnt] = best_regend[mcnt];
3866 } 3865 }
3867 } 3866 }
3868 } /* d != end_match_2 */ 3867 } /* d != end_match_2 */
3869 3868
3870 succeed_label: 3869 succeed_label:
3871 DEBUG_PRINT1 ("Accepting match.\n"); 3870 DEBUG_PRINT1 ("Accepting match.\n");
3872 3871
3873 /* If caller wants register contents data back, do it. */ 3872 /* If caller wants register contents data back, do it. */
3874 if (regs && !bufp->no_sub) 3873 if (regs && !bufp->no_sub)
3875 { 3874 {
3876 /* Have the register data arrays been allocated? */ 3875 /* Have the register data arrays been allocated? */
3877 if (bufp->regs_allocated == REGS_UNALLOCATED) 3876 if (bufp->regs_allocated == REGS_UNALLOCATED)
3878 { /* No. So allocate them with malloc. We need one 3877 { /* No. So allocate them with malloc. We need one
3879 extra element beyond `num_regs' for the `-1' marker 3878 extra element beyond `num_regs' for the `-1' marker
3880 GNU code uses. */ 3879 GNU code uses. */
3881 regs->num_regs = MAX (RE_NREGS, num_regs + 1); 3880 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3882 regs->start = TALLOC (regs->num_regs, regoff_t); 3881 regs->start = TALLOC (regs->num_regs, regoff_t);
3883 regs->end = TALLOC (regs->num_regs, regoff_t); 3882 regs->end = TALLOC (regs->num_regs, regoff_t);
3884 if (regs->start == NULL || regs->end == NULL) 3883 if (regs->start == NULL || regs->end == NULL)
3885 { 3884 {
3886 FREE_VARIABLES (); 3885 FREE_VARIABLES ();
3887 return -2; 3886 return -2;
3888 } 3887 }
3889 bufp->regs_allocated = REGS_REALLOCATE; 3888 bufp->regs_allocated = REGS_REALLOCATE;
3890 } 3889 }
3891 else if (bufp->regs_allocated == REGS_REALLOCATE) 3890 else if (bufp->regs_allocated == REGS_REALLOCATE)
3892 { /* Yes. If we need more elements than were already 3891 { /* Yes. If we need more elements than were already
3893 allocated, reallocate them. If we need fewer, just 3892 allocated, reallocate them. If we need fewer, just
3894 leave it alone. */ 3893 leave it alone. */
3895 if (regs->num_regs < num_regs + 1) 3894 if (regs->num_regs < num_regs + 1)
3896 { 3895 {
3897 regs->num_regs = num_regs + 1; 3896 regs->num_regs = num_regs + 1;
3898 RETALLOC (regs->start, regs->num_regs, regoff_t); 3897 RETALLOC (regs->start, regs->num_regs, regoff_t);
3899 RETALLOC (regs->end, regs->num_regs, regoff_t); 3898 RETALLOC (regs->end, regs->num_regs, regoff_t);
3900 if (regs->start == NULL || regs->end == NULL) 3899 if (regs->start == NULL || regs->end == NULL)
3901 { 3900 {
3902 FREE_VARIABLES (); 3901 FREE_VARIABLES ();
3903 return -2; 3902 return -2;
3904 } 3903 }
3905 } 3904 }
3906 } 3905 }
3907 else 3906 else
3908 { 3907 {
3909 /* These braces fend off a "empty body in an else-statement" 3908 /* These braces fend off a "empty body in an else-statement"
3910 warning under GCC when assert expands to nothing. */ 3909 warning under GCC when assert expands to nothing. */
3911 assert (bufp->regs_allocated == REGS_FIXED); 3910 assert (bufp->regs_allocated == REGS_FIXED);
3912 } 3911 }
3913 3912
3914 /* Convert the pointer data in `regstart' and `regend' to 3913 /* Convert the pointer data in `regstart' and `regend' to
3915 indices. Register zero has to be set differently, 3914 indices. Register zero has to be set differently,
3916 since we haven't kept track of any info for it. */ 3915 since we haven't kept track of any info for it. */
3917 if (regs->num_regs > 0) 3916 if (regs->num_regs > 0)
3918 { 3917 {
3919 regs->start[0] = pos; 3918 regs->start[0] = pos;
3920 regs->end[0] = (MATCHING_IN_FIRST_STRING 3919 regs->end[0] = (MATCHING_IN_FIRST_STRING
3921 ? ((regoff_t) (d - string1)) 3920 ? ((regoff_t) (d - string1))
3922 : ((regoff_t) (d - string2 + size1))); 3921 : ((regoff_t) (d - string2 + size1)));
3923 } 3922 }
3924 3923
3925 /* Go through the first `min (num_regs, regs->num_regs)' 3924 /* Go through the first `min (num_regs, regs->num_regs)'
3926 registers, since that is all we initialized. */ 3925 registers, since that is all we initialized. */
3927 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) 3926 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3928 { 3927 {
3929 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 3928 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3930 regs->start[mcnt] = regs->end[mcnt] = -1; 3929 regs->start[mcnt] = regs->end[mcnt] = -1;
3931 else 3930 else
3932 { 3931 {
3933 regs->start[mcnt] 3932 regs->start[mcnt]
3934 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 3933 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
3935 regs->end[mcnt] 3934 regs->end[mcnt]
3936 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 3935 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
3937 } 3936 }
3938 } 3937 }
3939 3938
3940 /* If the regs structure we return has more elements than 3939 /* If the regs structure we return has more elements than
3941 were in the pattern, set the extra elements to -1. If 3940 were in the pattern, set the extra elements to -1. If
3942 we (re)allocated the registers, this is the case, 3941 we (re)allocated the registers, this is the case,
3943 because we always allocate enough to have at least one 3942 because we always allocate enough to have at least one
3944 -1 at the end. */ 3943 -1 at the end. */
3945 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) 3944 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3946 regs->start[mcnt] = regs->end[mcnt] = -1; 3945 regs->start[mcnt] = regs->end[mcnt] = -1;
3947 } /* regs && !bufp->no_sub */ 3946 } /* regs && !bufp->no_sub */
3948 3947
3949 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 3948 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3950 nfailure_points_pushed, nfailure_points_popped, 3949 nfailure_points_pushed, nfailure_points_popped,
3951 nfailure_points_pushed - nfailure_points_popped); 3950 nfailure_points_pushed - nfailure_points_popped);
3952 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 3951 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3953 3952
3954 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 3953 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3955 ? string1 3954 ? string1
3956 : string2 - size1); 3955 : string2 - size1);
3957 3956
3958 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 3957 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3959 3958
3960 FREE_VARIABLES (); 3959 FREE_VARIABLES ();
3961 return mcnt; 3960 return mcnt;
3962 } 3961 }
3963 3962
3964 /* Otherwise match next pattern command. */ 3963 /* Otherwise match next pattern command. */
3965 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 3964 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3966 { 3965 {
3967 /* Ignore these. Used to ignore the n of succeed_n's which 3966 /* Ignore these. Used to ignore the n of succeed_n's which
3968 currently have n == 0. */ 3967 currently have n == 0. */
3969 case no_op: 3968 case no_op:
3970 DEBUG_PRINT1 ("EXECUTING no_op.\n"); 3969 DEBUG_PRINT1 ("EXECUTING no_op.\n");
3971 break; 3970 break;
3972 3971
3973 case succeed: 3972 case succeed:
3974 DEBUG_PRINT1 ("EXECUTING succeed.\n"); 3973 DEBUG_PRINT1 ("EXECUTING succeed.\n");
3975 goto succeed_label; 3974 goto succeed_label;
3976 3975
3977 /* Match the next n pattern characters exactly. The following 3976 /* Match the next n pattern characters exactly. The following
3978 byte in the pattern defines n, and the n bytes after that 3977 byte in the pattern defines n, and the n bytes after that
3979 are the characters to match. */ 3978 are the characters to match. */
3980 case exactn: 3979 case exactn:
3981 mcnt = *p++; 3980 mcnt = *p++;
3982 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 3981 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3983 3982
3984 /* This is written out as an if-else so we don't waste time 3983 /* This is written out as an if-else so we don't waste time
3985 testing `translate' inside the loop. */ 3984 testing `translate' inside the loop. */
3986 if (translate) 3985 if (translate)
3987 { 3986 {
3988 do 3987 do
3989 { 3988 {
3990 PREFETCH (); 3989 PREFETCH ();
3991 if ((unsigned char) translate[(unsigned char) *d++] 3990 if ((unsigned char) translate[(unsigned char) *d++]
3992 != (unsigned char) *p++) 3991 != (unsigned char) *p++)
3993 goto fail; 3992 goto fail;
3994 } 3993 }
3995 while (--mcnt); 3994 while (--mcnt);
3996 } 3995 }
3997 else 3996 else
3998 { 3997 {
4002 if (*d++ != (char) *p++) goto fail; 4001 if (*d++ != (char) *p++) goto fail;
4003 } 4002 }
4004 while (--mcnt); 4003 while (--mcnt);
4005 } 4004 }
4006 SET_REGS_MATCHED (); 4005 SET_REGS_MATCHED ();
4007 break; 4006 break;
4008 4007
4009 4008
4010 /* Match any character except possibly a newline or a null. */ 4009 /* Match any character except possibly a newline or a null. */
4011 case anychar: 4010 case anychar:
4012 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 4011 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4013 4012
4014 PREFETCH (); 4013 PREFETCH ();
4015 4014
4016 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 4015 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4017 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 4016 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4018 goto fail; 4017 goto fail;
4019 4018
4020 SET_REGS_MATCHED (); 4019 SET_REGS_MATCHED ();
4021 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 4020 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4022 d++; 4021 d++;
4023 break; 4022 break;
4024 4023
4025 4024
4026 case charset: 4025 case charset:
4027 case charset_not: 4026 case charset_not:
4028 { 4027 {
4029 register unsigned char c; 4028 register unsigned char c;
4030 boolean not = (re_opcode_t) *(p - 1) == charset_not; 4029 boolean not = (re_opcode_t) *(p - 1) == charset_not;
4031 4030
4032 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 4031 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4033 4032
4034 PREFETCH (); 4033 PREFETCH ();
4035 c = TRANSLATE (*d); /* The character to match. */ 4034 c = TRANSLATE (*d); /* The character to match. */
4036 4035
4037 /* Cast to `unsigned' instead of `unsigned char' in case the 4036 /* Cast to `unsigned' instead of `unsigned char' in case the
4038 bit list is a full 32 bytes long. */ 4037 bit list is a full 32 bytes long. */
4039 if (c < (unsigned) (*p * BYTEWIDTH) 4038 if (c < (unsigned) (*p * BYTEWIDTH)
4040 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 4039 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4041 not = !not; 4040 not = !not;
4042 4041
4043 p += 1 + *p; 4042 p += 1 + *p;
4044 4043
4045 if (!not) goto fail; 4044 if (!not) goto fail;
4046 4045
4047 SET_REGS_MATCHED (); 4046 SET_REGS_MATCHED ();
4048 d++; 4047 d++;
4049 break; 4048 break;
4050 } 4049 }
4051 4050
4052 4051
4053 /* The beginning of a group is represented by start_memory. 4052 /* The beginning of a group is represented by start_memory.
4054 The arguments are the register number in the next byte, and the 4053 The arguments are the register number in the next byte, and the
4055 number of groups inner to this one in the next. The text 4054 number of groups inner to this one in the next. The text
4056 matched within the group is recorded (in the internal 4055 matched within the group is recorded (in the internal
4057 registers data structure) under the register number. */ 4056 registers data structure) under the register number. */
4058 case start_memory: 4057 case start_memory:
4059 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 4058 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4060 4059
4061 /* Find out if this group can match the empty string. */ 4060 /* Find out if this group can match the empty string. */
4062 p1 = p; /* To send to group_match_null_string_p. */ 4061 p1 = p; /* To send to group_match_null_string_p. */
4063 4062
4064 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 4063 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4065 REG_MATCH_NULL_STRING_P (reg_info[*p]) 4064 REG_MATCH_NULL_STRING_P (reg_info[*p])
4066 = group_match_null_string_p (&p1, pend, reg_info); 4065 = group_match_null_string_p (&p1, pend, reg_info);
4067 4066
4068 /* Save the position in the string where we were the last time 4067 /* Save the position in the string where we were the last time
4069 we were at this open-group operator in case the group is 4068 we were at this open-group operator in case the group is
4070 operated upon by a repetition operator, e.g., with `(a*)*b' 4069 operated upon by a repetition operator, e.g., with `(a*)*b'
4071 against `ab'; then we want to ignore where we are now in 4070 against `ab'; then we want to ignore where we are now in
4072 the string in case this attempt to match fails. */ 4071 the string in case this attempt to match fails. */
4073 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4072 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4074 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 4073 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4075 : regstart[*p]; 4074 : regstart[*p];
4076 DEBUG_PRINT2 (" old_regstart: %d\n", 4075 DEBUG_PRINT2 (" old_regstart: %d\n",
4077 POINTER_TO_OFFSET (old_regstart[*p])); 4076 POINTER_TO_OFFSET (old_regstart[*p]));
4078 4077
4079 regstart[*p] = d; 4078 regstart[*p] = d;
4080 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 4079 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4081 4080
4082 IS_ACTIVE (reg_info[*p]) = 1; 4081 IS_ACTIVE (reg_info[*p]) = 1;
4083 MATCHED_SOMETHING (reg_info[*p]) = 0; 4082 MATCHED_SOMETHING (reg_info[*p]) = 0;
4084 4083
4085 /* Clear this whenever we change the register activity status. */ 4084 /* Clear this whenever we change the register activity status. */
4086 set_regs_matched_done = 0; 4085 set_regs_matched_done = 0;
4087 4086
4088 /* This is the new highest active register. */ 4087 /* This is the new highest active register. */
4089 highest_active_reg = *p; 4088 highest_active_reg = *p;
4090 4089
4091 /* If nothing was active before, this is the new lowest active 4090 /* If nothing was active before, this is the new lowest active
4092 register. */ 4091 register. */
4093 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 4092 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4094 lowest_active_reg = *p; 4093 lowest_active_reg = *p;
4095 4094
4096 /* Move past the register number and inner group count. */ 4095 /* Move past the register number and inner group count. */
4097 p += 2; 4096 p += 2;
4098 just_past_start_mem = p; 4097 just_past_start_mem = p;
4099 4098
4100 break; 4099 break;
4101 4100
4102 4101
4103 /* The stop_memory opcode represents the end of a group. Its 4102 /* The stop_memory opcode represents the end of a group. Its
4104 arguments are the same as start_memory's: the register 4103 arguments are the same as start_memory's: the register
4105 number, and the number of inner groups. */ 4104 number, and the number of inner groups. */
4106 case stop_memory: 4105 case stop_memory:
4107 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 4106 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4108 4107
4109 /* We need to save the string position the last time we were at 4108 /* We need to save the string position the last time we were at
4110 this close-group operator in case the group is operated 4109 this close-group operator in case the group is operated
4111 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 4110 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4112 against `aba'; then we want to ignore where we are now in 4111 against `aba'; then we want to ignore where we are now in
4113 the string in case this attempt to match fails. */ 4112 the string in case this attempt to match fails. */
4114 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4113 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4115 ? REG_UNSET (regend[*p]) ? d : regend[*p] 4114 ? REG_UNSET (regend[*p]) ? d : regend[*p]
4116 : regend[*p]; 4115 : regend[*p];
4117 DEBUG_PRINT2 (" old_regend: %d\n", 4116 DEBUG_PRINT2 (" old_regend: %d\n",
4118 POINTER_TO_OFFSET (old_regend[*p])); 4117 POINTER_TO_OFFSET (old_regend[*p]));
4119 4118
4120 regend[*p] = d; 4119 regend[*p] = d;
4121 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 4120 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4122 4121
4123 /* This register isn't active anymore. */ 4122 /* This register isn't active anymore. */
4124 IS_ACTIVE (reg_info[*p]) = 0; 4123 IS_ACTIVE (reg_info[*p]) = 0;
4125 4124
4126 /* Clear this whenever we change the register activity status. */ 4125 /* Clear this whenever we change the register activity status. */
4127 set_regs_matched_done = 0; 4126 set_regs_matched_done = 0;
4128 4127
4129 /* If this was the only register active, nothing is active 4128 /* If this was the only register active, nothing is active
4130 anymore. */ 4129 anymore. */
4131 if (lowest_active_reg == highest_active_reg) 4130 if (lowest_active_reg == highest_active_reg)
4132 { 4131 {
4133 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 4132 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4134 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 4133 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4135 } 4134 }
4136 else 4135 else
4137 { /* We must scan for the new highest active register, since 4136 { /* We must scan for the new highest active register, since
4138 it isn't necessarily one less than now: consider 4137 it isn't necessarily one less than now: consider
4139 (a(b)c(d(e)f)g). When group 3 ends, after the f), the 4138 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4140 new highest active register is 1. */ 4139 new highest active register is 1. */
4141 unsigned char r = *p - 1; 4140 unsigned char r = *p - 1;
4142 while (r > 0 && !IS_ACTIVE (reg_info[r])) 4141 while (r > 0 && !IS_ACTIVE (reg_info[r]))
4143 r--; 4142 r--;
4144 4143
4145 /* If we end up at register zero, that means that we saved 4144 /* If we end up at register zero, that means that we saved
4146 the registers as the result of an `on_failure_jump', not 4145 the registers as the result of an `on_failure_jump', not
4147 a `start_memory', and we jumped to past the innermost 4146 a `start_memory', and we jumped to past the innermost
4148 `stop_memory'. For example, in ((.)*) we save 4147 `stop_memory'. For example, in ((.)*) we save
4149 registers 1 and 2 as a result of the *, but when we pop 4148 registers 1 and 2 as a result of the *, but when we pop
4150 back to the second ), we are at the stop_memory 1. 4149 back to the second ), we are at the stop_memory 1.
4151 Thus, nothing is active. */ 4150 Thus, nothing is active. */
4152 if (r == 0) 4151 if (r == 0)
4153 { 4152 {
4154 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 4153 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4155 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 4154 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4156 } 4155 }
4157 else 4156 else
4158 highest_active_reg = r; 4157 highest_active_reg = r;
4159 } 4158 }
4160 4159
4161 /* If just failed to match something this time around with a 4160 /* If just failed to match something this time around with a
4162 group that's operated on by a repetition operator, try to 4161 group that's operated on by a repetition operator, try to
4163 force exit from the ``loop'', and restore the register 4162 force exit from the ``loop'', and restore the register
4164 information for this group that we had before trying this 4163 information for this group that we had before trying this
4165 last match. */ 4164 last match. */
4166 if ((!MATCHED_SOMETHING (reg_info[*p]) 4165 if ((!MATCHED_SOMETHING (reg_info[*p])
4167 || just_past_start_mem == p - 1) 4166 || just_past_start_mem == p - 1)
4168 && (p + 2) < pend) 4167 && (p + 2) < pend)
4169 { 4168 {
4170 boolean is_a_jump_n = false; 4169 boolean is_a_jump_n = false;
4171 4170
4172 p1 = p + 2; 4171 p1 = p + 2;
4173 mcnt = 0; 4172 mcnt = 0;
4174 switch ((re_opcode_t) *p1++) 4173 switch ((re_opcode_t) *p1++)
4175 { 4174 {
4176 case jump_n: 4175 case jump_n:
4177 is_a_jump_n = true; 4176 is_a_jump_n = true;
4178 case pop_failure_jump: 4177 case pop_failure_jump:
4179 case maybe_pop_jump: 4178 case maybe_pop_jump:
4180 case jump: 4179 case jump:
4181 case dummy_failure_jump: 4180 case dummy_failure_jump:
4182 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4181 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4183 if (is_a_jump_n) 4182 if (is_a_jump_n)
4184 p1 += 2; 4183 p1 += 2;
4185 break; 4184 break;
4186 4185
4187 default: 4186 default:
4188 /* do nothing */ ; 4187 /* do nothing */ ;
4189 } 4188 }
4190 p1 += mcnt; 4189 p1 += mcnt;
4191 4190
4192 /* If the next operation is a jump backwards in the pattern 4191 /* If the next operation is a jump backwards in the pattern
4193 to an on_failure_jump right before the start_memory 4192 to an on_failure_jump right before the start_memory
4194 corresponding to this stop_memory, exit from the loop 4193 corresponding to this stop_memory, exit from the loop
4195 by forcing a failure after pushing on the stack the 4194 by forcing a failure after pushing on the stack the
4196 on_failure_jump's jump in the pattern, and d. */ 4195 on_failure_jump's jump in the pattern, and d. */
4197 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 4196 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4198 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) 4197 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4199 { 4198 {
4200 /* If this group ever matched anything, then restore 4199 /* If this group ever matched anything, then restore
4201 what its registers were before trying this last 4200 what its registers were before trying this last
4202 failed match, e.g., with `(a*)*b' against `ab' for 4201 failed match, e.g., with `(a*)*b' against `ab' for
4203 regstart[1], and, e.g., with `((a*)*(b*)*)*' 4202 regstart[1], and, e.g., with `((a*)*(b*)*)*'
4204 against `aba' for regend[3]. 4203 against `aba' for regend[3].
4205 4204
4206 Also restore the registers for inner groups for, 4205 Also restore the registers for inner groups for,
4207 e.g., `((a*)(b*))*' against `aba' (register 3 would 4206 e.g., `((a*)(b*))*' against `aba' (register 3 would
4208 otherwise get trashed). */ 4207 otherwise get trashed). */
4209 4208
4210 if (EVER_MATCHED_SOMETHING (reg_info[*p])) 4209 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4211 { 4210 {
4212 unsigned r; 4211 unsigned r;
4213 4212
4214 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 4213 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4215 4214
4216 /* Restore this and inner groups' (if any) registers. */ 4215 /* Restore this and inner groups' (if any) registers. */
4217 for (r = *p; r < *p + *(p + 1); r++) 4216 for (r = *p; r < *p + *(p + 1); r++)
4218 { 4217 {
4219 regstart[r] = old_regstart[r]; 4218 regstart[r] = old_regstart[r];
4220 4219
4221 /* xx why this test? */ 4220 /* xx why this test? */
4222 if (old_regend[r] >= regstart[r]) 4221 if (old_regend[r] >= regstart[r])
4223 regend[r] = old_regend[r]; 4222 regend[r] = old_regend[r];
4224 } 4223 }
4225 } 4224 }
4226 p1++; 4225 p1++;
4227 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4226 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4228 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 4227 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4229 4228
4230 goto fail; 4229 goto fail;
4231 } 4230 }
4232 } 4231 }
4233 4232
4234 /* Move past the register number and the inner group count. */ 4233 /* Move past the register number and the inner group count. */
4235 p += 2; 4234 p += 2;
4236 break; 4235 break;
4237 4236
4238 4237
4239 /* \<digit> has been turned into a `duplicate' command which is 4238 /* \<digit> has been turned into a `duplicate' command which is
4240 followed by the numeric value of <digit> as the register number. */ 4239 followed by the numeric value of <digit> as the register number. */
4241 case duplicate: 4240 case duplicate:
4242 { 4241 {
4243 register const char *d2, *dend2; 4242 register const char *d2, *dend2;
4244 int regno = *p++; /* Get which register to match against. */ 4243 int regno = *p++; /* Get which register to match against. */
4245 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 4244 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4246 4245
4247 /* Can't back reference a group which we've never matched. */ 4246 /* Can't back reference a group which we've never matched. */
4248 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 4247 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4249 goto fail; 4248 goto fail;
4250 4249
4251 /* Where in input to try to start matching. */ 4250 /* Where in input to try to start matching. */
4252 d2 = regstart[regno]; 4251 d2 = regstart[regno];
4253 4252
4254 /* Where to stop matching; if both the place to start and 4253 /* Where to stop matching; if both the place to start and
4255 the place to stop matching are in the same string, then 4254 the place to stop matching are in the same string, then
4256 set to the place to stop, otherwise, for now have to use 4255 set to the place to stop, otherwise, for now have to use
4257 the end of the first string. */ 4256 the end of the first string. */
4258 4257
4259 dend2 = ((FIRST_STRING_P (regstart[regno]) 4258 dend2 = ((FIRST_STRING_P (regstart[regno])
4260 == FIRST_STRING_P (regend[regno])) 4259 == FIRST_STRING_P (regend[regno]))
4261 ? regend[regno] : end_match_1); 4260 ? regend[regno] : end_match_1);
4262 for (;;) 4261 for (;;)
4263 { 4262 {
4264 /* If necessary, advance to next segment in register 4263 /* If necessary, advance to next segment in register
4265 contents. */ 4264 contents. */
4266 while (d2 == dend2) 4265 while (d2 == dend2)
4267 { 4266 {
4268 if (dend2 == end_match_2) break; 4267 if (dend2 == end_match_2) break;
4269 if (dend2 == regend[regno]) break; 4268 if (dend2 == regend[regno]) break;
4270 4269
4271 /* End of string1 => advance to string2. */ 4270 /* End of string1 => advance to string2. */
4272 d2 = string2; 4271 d2 = string2;
4273 dend2 = regend[regno]; 4272 dend2 = regend[regno];
4274 } 4273 }
4275 /* At end of register contents => success */ 4274 /* At end of register contents => success */
4276 if (d2 == dend2) break; 4275 if (d2 == dend2) break;
4277 4276
4278 /* If necessary, advance to next segment in data. */ 4277 /* If necessary, advance to next segment in data. */
4280 4279
4281 /* How many characters left in this segment to match. */ 4280 /* How many characters left in this segment to match. */
4282 mcnt = dend - d; 4281 mcnt = dend - d;
4283 4282
4284 /* Want how many consecutive characters we can match in 4283 /* Want how many consecutive characters we can match in
4285 one shot, so, if necessary, adjust the count. */ 4284 one shot, so, if necessary, adjust the count. */
4286 if (mcnt > dend2 - d2) 4285 if (mcnt > dend2 - d2)
4287 mcnt = dend2 - d2; 4286 mcnt = dend2 - d2;
4288 4287
4289 /* Compare that many; failure if mismatch, else move 4288 /* Compare that many; failure if mismatch, else move
4290 past them. */ 4289 past them. */
4291 if (translate 4290 if (translate
4292 ? bcmp_translate (d, d2, mcnt, translate) 4291 ? bcmp_translate (d, d2, mcnt, translate)
4293 : bcmp (d, d2, mcnt)) 4292 : bcmp (d, d2, mcnt))
4294 goto fail; 4293 goto fail;
4295 d += mcnt, d2 += mcnt; 4294 d += mcnt, d2 += mcnt;
4296 4295
4297 /* Do this because we've match some characters. */ 4296 /* Do this because we've match some characters. */
4298 SET_REGS_MATCHED (); 4297 SET_REGS_MATCHED ();
4299 } 4298 }
4300 } 4299 }
4301 break; 4300 break;
4302 4301
4303 4302
4304 /* begline matches the empty string at the beginning of the string 4303 /* begline matches the empty string at the beginning of the string
4305 (unless `not_bol' is set in `bufp'), and, if 4304 (unless `not_bol' is set in `bufp'), and, if
4306 `newline_anchor' is set, after newlines. */ 4305 `newline_anchor' is set, after newlines. */
4307 case begline: 4306 case begline:
4308 DEBUG_PRINT1 ("EXECUTING begline.\n"); 4307 DEBUG_PRINT1 ("EXECUTING begline.\n");
4309 4308
4310 if (AT_STRINGS_BEG (d)) 4309 if (AT_STRINGS_BEG (d))
4311 { 4310 {
4312 if (!bufp->not_bol) break; 4311 if (!bufp->not_bol) break;
4313 } 4312 }
4314 else if (d[-1] == '\n' && bufp->newline_anchor) 4313 else if (d[-1] == '\n' && bufp->newline_anchor)
4315 { 4314 {
4316 break; 4315 break;
4317 } 4316 }
4318 /* In all other cases, we fail. */ 4317 /* In all other cases, we fail. */
4319 goto fail; 4318 goto fail;
4320 4319
4321 4320
4322 /* endline is the dual of begline. */ 4321 /* endline is the dual of begline. */
4323 case endline: 4322 case endline:
4324 DEBUG_PRINT1 ("EXECUTING endline.\n"); 4323 DEBUG_PRINT1 ("EXECUTING endline.\n");
4325 4324
4326 if (AT_STRINGS_END (d)) 4325 if (AT_STRINGS_END (d))
4327 { 4326 {
4328 if (!bufp->not_eol) break; 4327 if (!bufp->not_eol) break;
4329 } 4328 }
4330 4329
4331 /* We have to ``prefetch'' the next character. */ 4330 /* We have to ``prefetch'' the next character. */
4332 else if ((d == end1 ? *string2 : *d) == '\n' 4331 else if ((d == end1 ? *string2 : *d) == '\n'
4333 && bufp->newline_anchor) 4332 && bufp->newline_anchor)
4334 { 4333 {
4335 break; 4334 break;
4336 } 4335 }
4337 goto fail; 4336 goto fail;
4338 4337
4339 4338
4340 /* Match at the very beginning of the data. */ 4339 /* Match at the very beginning of the data. */
4341 case begbuf: 4340 case begbuf:
4342 DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 4341 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4343 if (AT_STRINGS_BEG (d)) 4342 if (AT_STRINGS_BEG (d))
4344 break; 4343 break;
4345 goto fail; 4344 goto fail;
4346 4345
4347 4346
4348 /* Match at the very end of the data. */ 4347 /* Match at the very end of the data. */
4349 case endbuf: 4348 case endbuf:
4350 DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 4349 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4351 if (AT_STRINGS_END (d)) 4350 if (AT_STRINGS_END (d))
4352 break; 4351 break;
4353 goto fail; 4352 goto fail;
4354 4353
4355 4354
4356 /* on_failure_keep_string_jump is used to optimize `.*\n'. It 4355 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
4357 pushes NULL as the value for the string on the stack. Then 4356 pushes NULL as the value for the string on the stack. Then
4358 `pop_failure_point' will keep the current value for the 4357 `pop_failure_point' will keep the current value for the
4359 string, instead of restoring it. To see why, consider 4358 string, instead of restoring it. To see why, consider
4360 matching `foo\nbar' against `.*\n'. The .* matches the foo; 4359 matching `foo\nbar' against `.*\n'. The .* matches the foo;
4361 then the . fails against the \n. But the next thing we want 4360 then the . fails against the \n. But the next thing we want
4362 to do is match the \n against the \n; if we restored the 4361 to do is match the \n against the \n; if we restored the
4363 string value, we would be back at the foo. 4362 string value, we would be back at the foo.
4364 4363
4365 Because this is used only in specific cases, we don't need to 4364 Because this is used only in specific cases, we don't need to
4366 check all the things that `on_failure_jump' does, to make 4365 check all the things that `on_failure_jump' does, to make
4367 sure the right things get saved on the stack. Hence we don't 4366 sure the right things get saved on the stack. Hence we don't
4368 share its code. The only reason to push anything on the 4367 share its code. The only reason to push anything on the
4369 stack at all is that otherwise we would have to change 4368 stack at all is that otherwise we would have to change
4370 `anychar's code to do something besides goto fail in this 4369 `anychar's code to do something besides goto fail in this
4371 case; that seems worse than this. */ 4370 case; that seems worse than this. */
4372 case on_failure_keep_string_jump: 4371 case on_failure_keep_string_jump:
4373 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 4372 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4374 4373
4375 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4374 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4376 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 4375 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4377 4376
4378 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 4377 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4379 break; 4378 break;
4380 4379
4381 4380
4382 /* Uses of on_failure_jump: 4381 /* Uses of on_failure_jump:
4383 4382
4384 Each alternative starts with an on_failure_jump that points 4383 Each alternative starts with an on_failure_jump that points
4385 to the beginning of the next alternative. Each alternative 4384 to the beginning of the next alternative. Each alternative
4386 except the last ends with a jump that in effect jumps past 4385 except the last ends with a jump that in effect jumps past
4387 the rest of the alternatives. (They really jump to the 4386 the rest of the alternatives. (They really jump to the
4388 ending jump of the following alternative, because tensioning 4387 ending jump of the following alternative, because tensioning
4389 these jumps is a hassle.) 4388 these jumps is a hassle.)
4390 4389
4391 Repeats start with an on_failure_jump that points past both 4390 Repeats start with an on_failure_jump that points past both
4392 the repetition text and either the following jump or 4391 the repetition text and either the following jump or
4393 pop_failure_jump back to this on_failure_jump. */ 4392 pop_failure_jump back to this on_failure_jump. */
4394 case on_failure_jump: 4393 case on_failure_jump:
4395 on_failure: 4394 on_failure:
4396 DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 4395 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4397 4396
4398 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4397 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4399 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 4398 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4400 4399
4401 /* If this on_failure_jump comes right before a group (i.e., 4400 /* If this on_failure_jump comes right before a group (i.e.,
4402 the original * applied to a group), save the information 4401 the original * applied to a group), save the information
4403 for that group and all inner ones, so that if we fail back 4402 for that group and all inner ones, so that if we fail back
4404 to this point, the group's information will be correct. 4403 to this point, the group's information will be correct.
4405 For example, in \(a*\)*\1, we need the preceding group, 4404 For example, in \(a*\)*\1, we need the preceding group,
4406 and in \(zz\(a*\)b*\)\2, we need the inner group. */ 4405 and in \(zz\(a*\)b*\)\2, we need the inner group. */
4407 4406
4408 /* We can't use `p' to check ahead because we push 4407 /* We can't use `p' to check ahead because we push
4409 a failure point to `p + mcnt' after we do this. */ 4408 a failure point to `p + mcnt' after we do this. */
4410 p1 = p; 4409 p1 = p;
4411 4410
4412 /* We need to skip no_op's before we look for the 4411 /* We need to skip no_op's before we look for the
4413 start_memory in case this on_failure_jump is happening as 4412 start_memory in case this on_failure_jump is happening as
4414 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 4413 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4415 against aba. */ 4414 against aba. */
4416 while (p1 < pend && (re_opcode_t) *p1 == no_op) 4415 while (p1 < pend && (re_opcode_t) *p1 == no_op)
4417 p1++; 4416 p1++;
4418 4417
4419 if (p1 < pend && (re_opcode_t) *p1 == start_memory) 4418 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4420 { 4419 {
4421 /* We have a new highest active register now. This will 4420 /* We have a new highest active register now. This will
4422 get reset at the start_memory we are about to get to, 4421 get reset at the start_memory we are about to get to,
4423 but we will have saved all the registers relevant to 4422 but we will have saved all the registers relevant to
4424 this repetition op, as described above. */ 4423 this repetition op, as described above. */
4425 highest_active_reg = *(p1 + 1) + *(p1 + 2); 4424 highest_active_reg = *(p1 + 1) + *(p1 + 2);
4426 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 4425 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4427 lowest_active_reg = *(p1 + 1); 4426 lowest_active_reg = *(p1 + 1);
4428 } 4427 }
4429 4428
4430 DEBUG_PRINT1 (":\n"); 4429 DEBUG_PRINT1 (":\n");
4431 PUSH_FAILURE_POINT (p + mcnt, d, -2); 4430 PUSH_FAILURE_POINT (p + mcnt, d, -2);
4432 break; 4431 break;
4433 4432
4434 4433
4435 /* A smart repeat ends with `maybe_pop_jump'. 4434 /* A smart repeat ends with `maybe_pop_jump'.
4436 We change it to either `pop_failure_jump' or `jump'. */ 4435 We change it to either `pop_failure_jump' or `jump'. */
4437 case maybe_pop_jump: 4436 case maybe_pop_jump:
4438 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4437 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4439 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 4438 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4440 { 4439 {
4441 register unsigned char *p2 = p; 4440 register unsigned char *p2 = p;
4442 4441
4443 /* Compare the beginning of the repeat with what in the 4442 /* Compare the beginning of the repeat with what in the
4444 pattern follows its end. If we can establish that there 4443 pattern follows its end. If we can establish that there
4445 is nothing that they would both match, i.e., that we 4444 is nothing that they would both match, i.e., that we
4446 would have to backtrack because of (as in, e.g., `a*a') 4445 would have to backtrack because of (as in, e.g., `a*a')
4447 then we can change to pop_failure_jump, because we'll 4446 then we can change to pop_failure_jump, because we'll
4448 never have to backtrack. 4447 never have to backtrack.
4449 4448
4450 This is not true in the case of alternatives: in 4449 This is not true in the case of alternatives: in
4451 `(a|ab)*' we do need to backtrack to the `ab' alternative 4450 `(a|ab)*' we do need to backtrack to the `ab' alternative
4452 (e.g., if the string was `ab'). But instead of trying to 4451 (e.g., if the string was `ab'). But instead of trying to
4453 detect that here, the alternative has put on a dummy 4452 detect that here, the alternative has put on a dummy
4454 failure point which is what we will end up popping. */ 4453 failure point which is what we will end up popping. */
4455 4454
4456 /* Skip over open/close-group commands. 4455 /* Skip over open/close-group commands.
4457 If what follows this loop is a ...+ construct, 4456 If what follows this loop is a ...+ construct,
4458 look at what begins its body, since we will have to 4457 look at what begins its body, since we will have to
4459 match at least one of that. */ 4458 match at least one of that. */
4471 } 4470 }
4472 4471
4473 p1 = p + mcnt; 4472 p1 = p + mcnt;
4474 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 4473 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4475 to the `maybe_finalize_jump' of this case. Examine what 4474 to the `maybe_finalize_jump' of this case. Examine what
4476 follows. */ 4475 follows. */
4477 4476
4478 /* If we're at the end of the pattern, we can change. */ 4477 /* If we're at the end of the pattern, we can change. */
4479 if (p2 == pend) 4478 if (p2 == pend)
4480 { 4479 {
4481 /* Consider what happens when matching ":\(.*\)" 4480 /* Consider what happens when matching ":\(.*\)"
4482 against ":/". I don't really understand this code 4481 against ":/". I don't really understand this code
4483 yet. */ 4482 yet. */
4484 p[-3] = (unsigned char) pop_failure_jump; 4483 p[-3] = (unsigned char) pop_failure_jump;
4485 DEBUG_PRINT1 4484 DEBUG_PRINT1
4486 (" End of pattern: change to `pop_failure_jump'.\n"); 4485 (" End of pattern: change to `pop_failure_jump'.\n");
4487 } 4486 }
4488 4487
4489 else if ((re_opcode_t) *p2 == exactn 4488 else if ((re_opcode_t) *p2 == exactn
4490 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 4489 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4491 { 4490 {
4492 register unsigned char c 4491 register unsigned char c
4493 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 4492 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4494 4493
4495 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) 4494 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4496 { 4495 {
4497 p[-3] = (unsigned char) pop_failure_jump; 4496 p[-3] = (unsigned char) pop_failure_jump;
4498 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 4497 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4499 c, p1[5]); 4498 c, p1[5]);
4500 } 4499 }
4501 4500
4502 else if ((re_opcode_t) p1[3] == charset 4501 else if ((re_opcode_t) p1[3] == charset
4503 || (re_opcode_t) p1[3] == charset_not) 4502 || (re_opcode_t) p1[3] == charset_not)
4504 { 4503 {
4505 int not = (re_opcode_t) p1[3] == charset_not; 4504 int not = (re_opcode_t) p1[3] == charset_not;
4506 4505
4507 if (c < (unsigned char) (p1[4] * BYTEWIDTH) 4506 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4508 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 4507 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4509 not = !not; 4508 not = !not;
4510 4509
4511 /* `not' is equal to 1 if c would match, which means 4510 /* `not' is equal to 1 if c would match, which means
4512 that we can't change to pop_failure_jump. */ 4511 that we can't change to pop_failure_jump. */
4513 if (!not) 4512 if (!not)
4514 { 4513 {
4515 p[-3] = (unsigned char) pop_failure_jump; 4514 p[-3] = (unsigned char) pop_failure_jump;
4516 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 4515 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4517 } 4516 }
4518 } 4517 }
4519 } 4518 }
4520 else if ((re_opcode_t) *p2 == charset) 4519 else if ((re_opcode_t) *p2 == charset)
4521 { 4520 {
4522 #ifdef DEBUG 4521 #ifdef DEBUG
4523 register unsigned char c 4522 register unsigned char c
4524 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 4523 = *p2 == (unsigned char) endline ? '\n' : p2[2];
4525 #endif 4524 #endif
4526 4525
4527 if ((re_opcode_t) p1[3] == exactn 4526 if ((re_opcode_t) p1[3] == exactn
4528 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 4527 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4529 && (p2[2 + p1[5] / BYTEWIDTH] 4528 && (p2[2 + p1[5] / BYTEWIDTH]
4530 & (1 << (p1[5] % BYTEWIDTH))))) 4529 & (1 << (p1[5] % BYTEWIDTH)))))
4531 { 4530 {
4532 p[-3] = (unsigned char) pop_failure_jump; 4531 p[-3] = (unsigned char) pop_failure_jump;
4533 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 4532 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
4534 c, p1[5]); 4533 c, p1[5]);
4535 } 4534 }
4536 4535
4537 else if ((re_opcode_t) p1[3] == charset_not) 4536 else if ((re_opcode_t) p1[3] == charset_not)
4538 { 4537 {
4539 int idx; 4538 int idx;
4540 /* We win if the charset_not inside the loop 4539 /* We win if the charset_not inside the loop
4544 || (idx < (int) p1[4] 4543 || (idx < (int) p1[4]
4545 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 4544 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4546 break; 4545 break;
4547 4546
4548 if (idx == p2[1]) 4547 if (idx == p2[1])
4549 { 4548 {
4550 p[-3] = (unsigned char) pop_failure_jump; 4549 p[-3] = (unsigned char) pop_failure_jump;
4551 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 4550 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4552 } 4551 }
4553 } 4552 }
4554 else if ((re_opcode_t) p1[3] == charset) 4553 else if ((re_opcode_t) p1[3] == charset)
4555 { 4554 {
4556 int idx; 4555 int idx;
4557 /* We win if the charset inside the loop 4556 /* We win if the charset inside the loop
4561 idx++) 4560 idx++)
4562 if ((p2[2 + idx] & p1[5 + idx]) != 0) 4561 if ((p2[2 + idx] & p1[5 + idx]) != 0)
4563 break; 4562 break;
4564 4563
4565 if (idx == p2[1] || idx == p1[4]) 4564 if (idx == p2[1] || idx == p1[4])
4566 { 4565 {
4567 p[-3] = (unsigned char) pop_failure_jump; 4566 p[-3] = (unsigned char) pop_failure_jump;
4568 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 4567 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
4569 } 4568 }
4570 } 4569 }
4571 } 4570 }
4572 } 4571 }
4573 p -= 2; /* Point at relative address again. */ 4572 p -= 2; /* Point at relative address again. */
4574 if ((re_opcode_t) p[-1] != pop_failure_jump) 4573 if ((re_opcode_t) p[-1] != pop_failure_jump)
4575 { 4574 {
4576 p[-1] = (unsigned char) jump; 4575 p[-1] = (unsigned char) jump;
4577 DEBUG_PRINT1 (" Match => jump.\n"); 4576 DEBUG_PRINT1 (" Match => jump.\n");
4578 goto unconditional_jump; 4577 goto unconditional_jump;
4579 } 4578 }
4580 /* Note fall through. */ 4579 /* Note fall through. */
4581 4580
4582 4581
4583 /* The end of a simple repeat has a pop_failure_jump back to 4582 /* The end of a simple repeat has a pop_failure_jump back to
4584 its matching on_failure_jump, where the latter will push a 4583 its matching on_failure_jump, where the latter will push a
4585 failure point. The pop_failure_jump takes off failure 4584 failure point. The pop_failure_jump takes off failure
4586 points put on by this pop_failure_jump's matching 4585 points put on by this pop_failure_jump's matching
4587 on_failure_jump; we got through the pattern to here from the 4586 on_failure_jump; we got through the pattern to here from the
4588 matching on_failure_jump, so didn't fail. */ 4587 matching on_failure_jump, so didn't fail. */
4589 case pop_failure_jump: 4588 case pop_failure_jump:
4590 { 4589 {
4591 /* We need to pass separate storage for the lowest and 4590 /* We need to pass separate storage for the lowest and
4592 highest registers, even though we don't care about the 4591 highest registers, even though we don't care about the
4593 actual values. Otherwise, we will restore only one 4592 actual values. Otherwise, we will restore only one
4594 register from the stack, since lowest will == highest in 4593 register from the stack, since lowest will == highest in
4595 `pop_failure_point'. */ 4594 `pop_failure_point'. */
4596 unsigned dummy_low_reg, dummy_high_reg; 4595 unsigned dummy_low_reg, dummy_high_reg;
4597 unsigned char *pdummy; 4596 unsigned char *pdummy;
4598 const char *sdummy; 4597 const char *sdummy;
4599 4598
4600 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 4599 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4601 POP_FAILURE_POINT (sdummy, pdummy, 4600 POP_FAILURE_POINT (sdummy, pdummy,
4602 dummy_low_reg, dummy_high_reg, 4601 dummy_low_reg, dummy_high_reg,
4603 reg_dummy, reg_dummy, reg_info_dummy); 4602 reg_dummy, reg_dummy, reg_info_dummy);
4604 } 4603 }
4605 /* Note fall through. */ 4604 /* Note fall through. */
4606 4605
4607 4606
4608 /* Unconditionally jump (without popping any failure points). */ 4607 /* Unconditionally jump (without popping any failure points). */
4609 case jump: 4608 case jump:
4610 unconditional_jump: 4609 unconditional_jump:
4611 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 4610 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
4612 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 4611 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4613 p += mcnt; /* Do the jump. */ 4612 p += mcnt; /* Do the jump. */
4614 DEBUG_PRINT2 ("(to 0x%x).\n", p); 4613 DEBUG_PRINT2 ("(to 0x%x).\n", p);
4615 break; 4614 break;
4616 4615
4617 4616
4618 /* We need this opcode so we can detect where alternatives end 4617 /* We need this opcode so we can detect where alternatives end
4619 in `group_match_null_string_p' et al. */ 4618 in `group_match_null_string_p' et al. */
4620 case jump_past_alt: 4619 case jump_past_alt:
4621 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 4620 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4622 goto unconditional_jump; 4621 goto unconditional_jump;
4623 4622
4624 4623
4625 /* Normally, the on_failure_jump pushes a failure point, which 4624 /* Normally, the on_failure_jump pushes a failure point, which
4626 then gets popped at pop_failure_jump. We will end up at 4625 then gets popped at pop_failure_jump. We will end up at
4627 pop_failure_jump, also, and with a pattern of, say, `a+', we 4626 pop_failure_jump, also, and with a pattern of, say, `a+', we
4628 are skipping over the on_failure_jump, so we have to push 4627 are skipping over the on_failure_jump, so we have to push
4629 something meaningless for pop_failure_jump to pop. */ 4628 something meaningless for pop_failure_jump to pop. */
4630 case dummy_failure_jump: 4629 case dummy_failure_jump:
4631 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 4630 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4632 /* It doesn't matter what we push for the string here. What 4631 /* It doesn't matter what we push for the string here. What
4633 the code at `fail' tests is the value for the pattern. */ 4632 the code at `fail' tests is the value for the pattern. */
4634 PUSH_FAILURE_POINT (0, 0, -2); 4633 PUSH_FAILURE_POINT (0, 0, -2);
4635 goto unconditional_jump; 4634 goto unconditional_jump;
4636 4635
4637 4636
4638 /* At the end of an alternative, we need to push a dummy failure 4637 /* At the end of an alternative, we need to push a dummy failure
4639 point in case we are followed by a `pop_failure_jump', because 4638 point in case we are followed by a `pop_failure_jump', because
4640 we don't want the failure point for the alternative to be 4639 we don't want the failure point for the alternative to be
4641 popped. For example, matching `(a|ab)*' against `aab' 4640 popped. For example, matching `(a|ab)*' against `aab'
4642 requires that we match the `ab' alternative. */ 4641 requires that we match the `ab' alternative. */
4643 case push_dummy_failure: 4642 case push_dummy_failure:
4644 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 4643 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4645 /* See comments just above at `dummy_failure_jump' about the 4644 /* See comments just above at `dummy_failure_jump' about the
4646 two zeroes. */ 4645 two zeroes. */
4647 PUSH_FAILURE_POINT (0, 0, -2); 4646 PUSH_FAILURE_POINT (0, 0, -2);
4648 break; 4647 break;
4649 4648
4650 /* Have to succeed matching what follows at least n times. 4649 /* Have to succeed matching what follows at least n times.
4651 After that, handle like `on_failure_jump'. */ 4650 After that, handle like `on_failure_jump'. */
4652 case succeed_n: 4651 case succeed_n:
4653 EXTRACT_NUMBER (mcnt, p + 2); 4652 EXTRACT_NUMBER (mcnt, p + 2);
4654 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 4653 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4655 4654
4656 assert (mcnt >= 0); 4655 assert (mcnt >= 0);
4657 /* Originally, this is how many times we HAVE to succeed. */ 4656 /* Originally, this is how many times we HAVE to succeed. */
4658 if (mcnt > 0) 4657 if (mcnt > 0)
4659 { 4658 {
4660 mcnt--; 4659 mcnt--;
4661 p += 2; 4660 p += 2;
4662 STORE_NUMBER_AND_INCR (p, mcnt); 4661 STORE_NUMBER_AND_INCR (p, mcnt);
4663 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); 4662 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
4664 } 4663 }
4665 else if (mcnt == 0) 4664 else if (mcnt == 0)
4666 { 4665 {
4667 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); 4666 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
4668 p[2] = (unsigned char) no_op; 4667 p[2] = (unsigned char) no_op;
4669 p[3] = (unsigned char) no_op; 4668 p[3] = (unsigned char) no_op;
4670 goto on_failure; 4669 goto on_failure;
4671 } 4670 }
4672 break; 4671 break;
4673 4672
4674 case jump_n: 4673 case jump_n:
4675 EXTRACT_NUMBER (mcnt, p + 2); 4674 EXTRACT_NUMBER (mcnt, p + 2);
4676 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 4675 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4677 4676
4678 /* Originally, this is how many times we CAN jump. */ 4677 /* Originally, this is how many times we CAN jump. */
4679 if (mcnt) 4678 if (mcnt)
4680 { 4679 {
4681 mcnt--; 4680 mcnt--;
4682 STORE_NUMBER (p + 2, mcnt); 4681 STORE_NUMBER (p + 2, mcnt);
4683 goto unconditional_jump; 4682 goto unconditional_jump;
4684 } 4683 }
4685 /* If don't have to jump any more, skip over the rest of command. */ 4684 /* If don't have to jump any more, skip over the rest of command. */
4686 else 4685 else
4687 p += 4; 4686 p += 4;
4688 break; 4687 break;
4689 4688
4690 case set_number_at: 4689 case set_number_at:
4691 { 4690 {
4692 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 4691 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4693 4692
4694 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4693 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4695 p1 = p + mcnt; 4694 p1 = p + mcnt;
4696 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4695 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4697 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 4696 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
4698 STORE_NUMBER (p1, mcnt); 4697 STORE_NUMBER (p1, mcnt);
4699 break; 4698 break;
4700 } 4699 }
4701 4700
4702 #if 0 4701 #if 0
4703 /* The DEC Alpha C compiler 3.x generates incorrect code for the 4702 /* The DEC Alpha C compiler 3.x generates incorrect code for the
4704 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 4703 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
4705 AT_WORD_BOUNDARY, so this code is disabled. Expanding the 4704 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
4706 macro and introducing temporary variables works around the bug. */ 4705 macro and introducing temporary variables works around the bug. */
4707 4706
4708 case wordbound: 4707 case wordbound:
4709 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 4708 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4710 if (AT_WORD_BOUNDARY (d)) 4709 if (AT_WORD_BOUNDARY (d))
4747 break; 4746 break;
4748 } 4747 }
4749 #endif 4748 #endif
4750 4749
4751 case wordbeg: 4750 case wordbeg:
4752 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 4751 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4753 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 4752 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4754 break; 4753 break;
4755 goto fail; 4754 goto fail;
4756 4755
4757 case wordend: 4756 case wordend:
4758 DEBUG_PRINT1 ("EXECUTING wordend.\n"); 4757 DEBUG_PRINT1 ("EXECUTING wordend.\n");
4759 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 4758 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4760 && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) 4759 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4761 break; 4760 break;
4762 goto fail; 4761 goto fail;
4763 4762
4764 #ifdef emacs 4763 #ifdef emacs
4765 case before_dot: 4764 case before_dot:
4766 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 4765 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4767 if (PTR_CHAR_POS ((unsigned char *) d) >= point) 4766 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4768 goto fail; 4767 goto fail;
4769 break; 4768 break;
4770 4769
4771 case at_dot: 4770 case at_dot:
4772 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 4771 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4773 if (PTR_CHAR_POS ((unsigned char *) d) != point) 4772 if (PTR_CHAR_POS ((unsigned char *) d) != point)
4774 goto fail; 4773 goto fail;
4775 break; 4774 break;
4776 4775
4777 case after_dot: 4776 case after_dot:
4778 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 4777 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4779 if (PTR_CHAR_POS ((unsigned char *) d) <= point) 4778 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4780 goto fail; 4779 goto fail;
4781 break; 4780 break;
4782 4781
4783 case syntaxspec: 4782 case syntaxspec:
4784 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 4783 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4785 mcnt = *p++; 4784 mcnt = *p++;
4786 goto matchsyntax; 4785 goto matchsyntax;
4787 4786
4788 case wordchar: 4787 case wordchar:
4789 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 4788 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4790 mcnt = (int) Sword; 4789 mcnt = (int) Sword;
4791 matchsyntax: 4790 matchsyntax:
4792 PREFETCH (); 4791 PREFETCH ();
4793 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 4792 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
4794 d++; 4793 d++;
4795 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) 4794 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
4796 goto fail; 4795 goto fail;
4797 SET_REGS_MATCHED (); 4796 SET_REGS_MATCHED ();
4798 break; 4797 break;
4799 4798
4800 case notsyntaxspec: 4799 case notsyntaxspec:
4801 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 4800 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4802 mcnt = *p++; 4801 mcnt = *p++;
4803 goto matchnotsyntax; 4802 goto matchnotsyntax;
4804 4803
4805 case notwordchar: 4804 case notwordchar:
4806 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 4805 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4807 mcnt = (int) Sword; 4806 mcnt = (int) Sword;
4808 matchnotsyntax: 4807 matchnotsyntax:
4809 PREFETCH (); 4808 PREFETCH ();
4810 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ 4809 /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
4811 d++; 4810 d++;
4812 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) 4811 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
4813 goto fail; 4812 goto fail;
4814 SET_REGS_MATCHED (); 4813 SET_REGS_MATCHED ();
4815 break; 4814 break;
4816 4815
4817 #else /* not emacs */ 4816 #else /* not emacs */
4818 case wordchar: 4817 case wordchar:
4819 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 4818 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4820 PREFETCH (); 4819 PREFETCH ();
4821 if (!WORDCHAR_P (d)) 4820 if (!WORDCHAR_P (d))
4822 goto fail; 4821 goto fail;
4823 SET_REGS_MATCHED (); 4822 SET_REGS_MATCHED ();
4824 d++; 4823 d++;
4825 break; 4824 break;
4826 4825
4827 case notwordchar: 4826 case notwordchar:
4828 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 4827 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4829 PREFETCH (); 4828 PREFETCH ();
4830 if (WORDCHAR_P (d)) 4829 if (WORDCHAR_P (d))
4831 goto fail; 4830 goto fail;
4832 SET_REGS_MATCHED (); 4831 SET_REGS_MATCHED ();
4833 d++; 4832 d++;
4834 break; 4833 break;
4835 #endif /* not emacs */ 4834 #endif /* not emacs */
4836 4835
4837 default: 4836 default:
4838 abort (); 4837 abort ();
4839 } 4838 }
4840 continue; /* Successfully executed one pattern command; keep going. */ 4839 continue; /* Successfully executed one pattern command; keep going. */
4841 4840
4842 4841
4843 /* We goto here if a matching operation fails. */ 4842 /* We goto here if a matching operation fails. */
4844 fail: 4843 fail:
4845 if (!FAIL_STACK_EMPTY ()) 4844 if (!FAIL_STACK_EMPTY ())
4846 { /* A restart point is known. Restore to that state. */ 4845 { /* A restart point is known. Restore to that state. */
4847 DEBUG_PRINT1 ("\nFAIL:\n"); 4846 DEBUG_PRINT1 ("\nFAIL:\n");
4848 POP_FAILURE_POINT (d, p, 4847 POP_FAILURE_POINT (d, p,
4849 lowest_active_reg, highest_active_reg, 4848 lowest_active_reg, highest_active_reg,
4850 regstart, regend, reg_info); 4849 regstart, regend, reg_info);
4851 4850
4852 /* If this failure point is a dummy, try the next one. */ 4851 /* If this failure point is a dummy, try the next one. */
4853 if (!p) 4852 if (!p)
4854 goto fail; 4853 goto fail;
4855 4854
4856 /* If we failed to the end of the pattern, don't examine *p. */ 4855 /* If we failed to the end of the pattern, don't examine *p. */
4857 assert (p <= pend); 4856 assert (p <= pend);
4858 if (p < pend) 4857 if (p < pend)
4859 { 4858 {
4860 boolean is_a_jump_n = false; 4859 boolean is_a_jump_n = false;
4861 4860
4862 /* If failed to a backwards jump that's part of a repetition 4861 /* If failed to a backwards jump that's part of a repetition
4863 loop, need to pop this failure point and use the next one. */ 4862 loop, need to pop this failure point and use the next one. */
4864 switch ((re_opcode_t) *p) 4863 switch ((re_opcode_t) *p)
4865 { 4864 {
4866 case jump_n: 4865 case jump_n:
4867 is_a_jump_n = true; 4866 is_a_jump_n = true;
4868 case maybe_pop_jump: 4867 case maybe_pop_jump:
4869 case pop_failure_jump: 4868 case pop_failure_jump:
4870 case jump: 4869 case jump:
4871 p1 = p + 1; 4870 p1 = p + 1;
4872 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4871 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4873 p1 += mcnt; 4872 p1 += mcnt;
4874 4873
4875 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 4874 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4876 || (!is_a_jump_n 4875 || (!is_a_jump_n
4877 && (re_opcode_t) *p1 == on_failure_jump)) 4876 && (re_opcode_t) *p1 == on_failure_jump))
4878 goto fail; 4877 goto fail;
4879 break; 4878 break;
4880 default: 4879 default:
4881 /* do nothing */ ; 4880 /* do nothing */ ;
4882 } 4881 }
4883 } 4882 }
4884 4883
4885 if (d >= string1 && d <= end1) 4884 if (d >= string1 && d <= end1)
4886 dend = end_match_1; 4885 dend = end_match_1;
4887 } 4886 }
4888 else 4887 else
4889 break; /* Matching at this starting point really fails. */ 4888 break; /* Matching at this starting point really fails. */
4890 } /* for (;;) */ 4889 } /* for (;;) */
4891 4890
4892 if (best_regs_set) 4891 if (best_regs_set)
4893 goto restore_best_regs; 4892 goto restore_best_regs;
4894 4893
4895 FREE_VARIABLES (); 4894 FREE_VARIABLES ();
4896 4895
4897 return -1; /* Failure to match. */ 4896 return -1; /* Failure to match. */
4898 } /* re_match_2 */ 4897 } /* re_match_2 */
4899 4898
4900 /* Subroutine definitions for re_match_2. */ 4899 /* Subroutine definitions for re_match_2. */
4901 4900
4902 4901
4921 4920
4922 while (p1 < end) 4921 while (p1 < end)
4923 { 4922 {
4924 /* Skip over opcodes that can match nothing, and return true or 4923 /* Skip over opcodes that can match nothing, and return true or
4925 false, as appropriate, when we get to one that can't, or to the 4924 false, as appropriate, when we get to one that can't, or to the
4926 matching stop_memory. */ 4925 matching stop_memory. */
4927 4926
4928 switch ((re_opcode_t) *p1) 4927 switch ((re_opcode_t) *p1)
4929 { 4928 {
4930 /* Could be either a loop or a series of alternatives. */ 4929 /* Could be either a loop or a series of alternatives. */
4931 case on_failure_jump: 4930 case on_failure_jump:
4932 p1++; 4931 p1++;
4933 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4932 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4934 4933
4935 /* If the next operation is not a jump backwards in the 4934 /* If the next operation is not a jump backwards in the
4936 pattern. */ 4935 pattern. */
4937 4936
4938 if (mcnt >= 0) 4937 if (mcnt >= 0)
4939 { 4938 {
4940 /* Go through the on_failure_jumps of the alternatives, 4939 /* Go through the on_failure_jumps of the alternatives,
4941 seeing if any of the alternatives cannot match nothing. 4940 seeing if any of the alternatives cannot match nothing.
4942 The last alternative starts with only a jump, 4941 The last alternative starts with only a jump,
4943 whereas the rest start with on_failure_jump and end 4942 whereas the rest start with on_failure_jump and end
4944 with a jump, e.g., here is the pattern for `a|b|c': 4943 with a jump, e.g., here is the pattern for `a|b|c':
4945 4944
4946 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 4945 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4947 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 4946 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4948 /exactn/1/c 4947 /exactn/1/c
4949 4948
4950 So, we have to first go through the first (n-1) 4949 So, we have to first go through the first (n-1)
4951 alternatives and then deal with the last one separately. */ 4950 alternatives and then deal with the last one separately. */
4952 4951
4953 4952
4954 /* Deal with the first (n-1) alternatives, which start 4953 /* Deal with the first (n-1) alternatives, which start
4955 with an on_failure_jump (see above) that jumps to right 4954 with an on_failure_jump (see above) that jumps to right
4956 past a jump_past_alt. */ 4955 past a jump_past_alt. */
4957 4956
4958 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) 4957 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4959 { 4958 {
4960 /* `mcnt' holds how many bytes long the alternative 4959 /* `mcnt' holds how many bytes long the alternative
4961 is, including the ending `jump_past_alt' and 4960 is, including the ending `jump_past_alt' and
4962 its number. */ 4961 its number. */
4963 4962
4964 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 4963 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4965 reg_info)) 4964 reg_info))
4966 return false; 4965 return false;
4967 4966
4968 /* Move to right after this alternative, including the 4967 /* Move to right after this alternative, including the
4969 jump_past_alt. */ 4968 jump_past_alt. */
4970 p1 += mcnt; 4969 p1 += mcnt;
4971 4970
4972 /* Break if it's the beginning of an n-th alternative 4971 /* Break if it's the beginning of an n-th alternative
4973 that doesn't begin with an on_failure_jump. */ 4972 that doesn't begin with an on_failure_jump. */
4974 if ((re_opcode_t) *p1 != on_failure_jump) 4973 if ((re_opcode_t) *p1 != on_failure_jump)
4975 break; 4974 break;
4976 4975
4977 /* Still have to check that it's not an n-th 4976 /* Still have to check that it's not an n-th
4978 alternative that starts with an on_failure_jump. */ 4977 alternative that starts with an on_failure_jump. */
4979 p1++; 4978 p1++;
4980 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4979 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4981 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) 4980 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4982 { 4981 {
4983 /* Get to the beginning of the n-th alternative. */ 4982 /* Get to the beginning of the n-th alternative. */
4984 p1 -= 3; 4983 p1 -= 3;
4985 break; 4984 break;
4986 } 4985 }
4987 } 4986 }
4988 4987
4989 /* Deal with the last alternative: go back and get number 4988 /* Deal with the last alternative: go back and get number
4990 of the `jump_past_alt' just before it. `mcnt' contains 4989 of the `jump_past_alt' just before it. `mcnt' contains
4991 the length of the alternative. */ 4990 the length of the alternative. */
4992 EXTRACT_NUMBER (mcnt, p1 - 2); 4991 EXTRACT_NUMBER (mcnt, p1 - 2);
4993 4992
4994 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) 4993 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4995 return false; 4994 return false;
4996 4995
4997 p1 += mcnt; /* Get past the n-th alternative. */ 4996 p1 += mcnt; /* Get past the n-th alternative. */
4998 } /* if mcnt > 0 */ 4997 } /* if mcnt > 0 */
4999 break; 4998 break;
5000 4999
5001 5000
5002 case stop_memory: 5001 case stop_memory:
5003 assert (p1[1] == **p); 5002 assert (p1[1] == **p);
5004 *p = p1 + 2; 5003 *p = p1 + 2;
5005 return true; 5004 return true;
5006 5005
5007 5006
5008 default: 5007 default:
5009 if (!common_op_match_null_string_p (&p1, end, reg_info)) 5008 if (!common_op_match_null_string_p (&p1, end, reg_info))
5010 return false; 5009 return false;
5011 } 5010 }
5012 } /* while p1 < end */ 5011 } /* while p1 < end */
5013 5012
5014 return false; 5013 return false;
5015 } /* group_match_null_string_p */ 5014 } /* group_match_null_string_p */
5016 5015
5028 unsigned char *p1 = p; 5027 unsigned char *p1 = p;
5029 5028
5030 while (p1 < end) 5029 while (p1 < end)
5031 { 5030 {
5032 /* Skip over opcodes that can match nothing, and break when we get 5031 /* Skip over opcodes that can match nothing, and break when we get
5033 to one that can't. */ 5032 to one that can't. */
5034 5033
5035 switch ((re_opcode_t) *p1) 5034 switch ((re_opcode_t) *p1)
5036 { 5035 {
5037 /* It's a loop. */ 5036 /* It's a loop. */
5038 case on_failure_jump: 5037 case on_failure_jump:
5039 p1++; 5038 p1++;
5040 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5039 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5041 p1 += mcnt; 5040 p1 += mcnt;
5042 break; 5041 break;
5043 5042
5044 default: 5043 default:
5045 if (!common_op_match_null_string_p (&p1, end, reg_info)) 5044 if (!common_op_match_null_string_p (&p1, end, reg_info))
5046 return false; 5045 return false;
5047 } 5046 }
5048 } /* while p1 < end */ 5047 } /* while p1 < end */
5049 5048
5050 return true; 5049 return true;
5051 } /* alt_match_null_string_p */ 5050 } /* alt_match_null_string_p */
5052 5051
5088 reg_no = *p1; 5087 reg_no = *p1;
5089 assert (reg_no > 0 && reg_no <= MAX_REGNUM); 5088 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5090 ret = group_match_null_string_p (&p1, end, reg_info); 5089 ret = group_match_null_string_p (&p1, end, reg_info);
5091 5090
5092 /* Have to set this here in case we're checking a group which 5091 /* Have to set this here in case we're checking a group which
5093 contains a group and a back reference to it. */ 5092 contains a group and a back reference to it. */
5094 5093
5095 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 5094 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5096 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 5095 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5097 5096
5098 if (!ret) 5097 if (!ret)
5099 return false; 5098 return false;
5100 break; 5099 break;
5101 5100
5102 /* If this is an optimized succeed_n for zero times, make the jump. */ 5101 /* If this is an optimized succeed_n for zero times, make the jump. */
5103 case jump: 5102 case jump:
5104 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5103 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5105 if (mcnt >= 0) 5104 if (mcnt >= 0)
5106 p1 += mcnt; 5105 p1 += mcnt;
5107 else 5106 else
5108 return false; 5107 return false;
5109 break; 5108 break;
5110 5109
5111 case succeed_n: 5110 case succeed_n:
5112 /* Get to the number of times to succeed. */ 5111 /* Get to the number of times to succeed. */
5113 p1 += 2; 5112 p1 += 2;
5114 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5113 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5115 5114
5116 if (mcnt == 0) 5115 if (mcnt == 0)
5117 { 5116 {
5118 p1 -= 4; 5117 p1 -= 4;
5119 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5118 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5120 p1 += mcnt; 5119 p1 += mcnt;
5121 } 5120 }
5122 else 5121 else
5123 return false; 5122 return false;
5124 break; 5123 break;
5125 5124
5126 case duplicate: 5125 case duplicate:
5127 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 5126 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5128 return false; 5127 return false;
5129 break; 5128 break;
5130 5129
5131 case set_number_at: 5130 case set_number_at:
5132 p1 += 4; 5131 p1 += 4;
5133 5132
5166 Returns 0 if the pattern was valid, otherwise an error string. 5165 Returns 0 if the pattern was valid, otherwise an error string.
5167 5166
5168 Assumes the `allocated' (and perhaps `buffer') and `translate' fields 5167 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5169 are set in BUFP on entry. 5168 are set in BUFP on entry.
5170 5169
5171 We call regex_compile to do the actual compilation. */ 5170 We call regex_compile to do the actual compilation. */
5172 5171
5173 const char * 5172 const char *
5174 re_compile_pattern (pattern, length, bufp) 5173 re_compile_pattern (pattern, length, bufp)
5175 const char *pattern; 5174 const char *pattern;
5176 int length; 5175 int length;
5185 /* And GNU code determines whether or not to get register information 5184 /* And GNU code determines whether or not to get register information
5186 by passing null for the REGS argument to re_match, etc., not by 5185 by passing null for the REGS argument to re_match, etc., not by
5187 setting no_sub. */ 5186 setting no_sub. */
5188 bufp->no_sub = 0; 5187 bufp->no_sub = 0;
5189 5188
5190 /* Match anchors at newline. */ 5189 /* Match anchors at newline. */
5191 bufp->newline_anchor = 1; 5190 bufp->newline_anchor = 1;
5192 5191
5193 ret = regex_compile (pattern, length, re_syntax_options, bufp); 5192 ret = regex_compile (pattern, length, re_syntax_options, bufp);
5194 5193
5195 if (!ret) 5194 if (!ret)
5196 return NULL; 5195 return NULL;
5197 return gettext (re_error_msgid[(int) ret]); 5196 return gettext (re_error_msgid[(int) ret]);
5198 } 5197 }
5199 5198
5200 /* Entry points compatible with 4.2 BSD regex library. We don't define 5199 /* Entry points compatible with 4.2 BSD regex library. We don't define
5201 them unless specifically requested. */ 5200 them unless specifically requested. */
5202 5201
5203 #if defined (_REGEX_RE_COMP) || defined (_LIBC) 5202 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
5204 5203
5205 /* BSD has one and only one pattern buffer. */ 5204 /* BSD has one and only one pattern buffer. */
5206 static struct re_pattern_buffer re_comp_buf; 5205 static struct re_pattern_buffer re_comp_buf;
5226 5225
5227 if (!re_comp_buf.buffer) 5226 if (!re_comp_buf.buffer)
5228 { 5227 {
5229 re_comp_buf.buffer = (unsigned char *) malloc (200); 5228 re_comp_buf.buffer = (unsigned char *) malloc (200);
5230 if (re_comp_buf.buffer == NULL) 5229 if (re_comp_buf.buffer == NULL)
5231 return gettext (re_error_msgid[(int) REG_ESPACE]); 5230 return gettext (re_error_msgid[(int) REG_ESPACE]);
5232 re_comp_buf.allocated = 200; 5231 re_comp_buf.allocated = 200;
5233 5232
5234 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); 5233 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5235 if (re_comp_buf.fastmap == NULL) 5234 if (re_comp_buf.fastmap == NULL)
5236 return gettext (re_error_msgid[(int) REG_ESPACE]); 5235 return gettext (re_error_msgid[(int) REG_ESPACE]);
5237 } 5236 }
5238 5237
5239 /* Since `re_exec' always passes NULL for the `regs' argument, we 5238 /* Since `re_exec' always passes NULL for the `regs' argument, we
5240 don't need to initialize the pattern buffer fields which affect it. */ 5239 don't need to initialize the pattern buffer fields which affect it. */
5241 5240
5242 /* Match anchors at newlines. */ 5241 /* Match anchors at newlines. */
5243 re_comp_buf.newline_anchor = 1; 5242 re_comp_buf.newline_anchor = 1;
5244 5243
5245 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 5244 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5246 5245
5247 if (!ret) 5246 if (!ret)
5269 5268
5270 #ifndef emacs 5269 #ifndef emacs
5271 5270
5272 /* regcomp takes a regular expression as a string and compiles it. 5271 /* regcomp takes a regular expression as a string and compiles it.
5273 5272
5274 PREG is a regex_t *. We do not expect any fields to be initialized, 5273 PREG is a regex_t *. We do not expect any fields to be initialized,
5275 since POSIX says we shouldn't. Thus, we set 5274 since POSIX says we shouldn't. Thus, we set
5276 5275
5277 `buffer' to the compiled pattern; 5276 `buffer' to the compiled pattern;
5278 `used' to the length of the compiled pattern; 5277 `used' to the length of the compiled pattern;
5279 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the 5278 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5298 5297
5299 If REG_NOSUB is set, then when PREG is passed to regexec, that 5298 If REG_NOSUB is set, then when PREG is passed to regexec, that
5300 routine will report only success or failure, and nothing about the 5299 routine will report only success or failure, and nothing about the
5301 registers. 5300 registers.
5302 5301
5303 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for 5302 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
5304 the return codes and their meanings.) */ 5303 the return codes and their meanings.) */
5305 5304
5306 int 5305 int
5307 regcomp (preg, pattern, cflags) 5306 regcomp (preg, pattern, cflags)
5308 regex_t *preg; 5307 regex_t *preg;
5331 5330
5332 preg->translate 5331 preg->translate
5333 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE 5332 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5334 * sizeof (*(RE_TRANSLATE_TYPE)0)); 5333 * sizeof (*(RE_TRANSLATE_TYPE)0));
5335 if (preg->translate == NULL) 5334 if (preg->translate == NULL)
5336 return (int) REG_ESPACE; 5335 return (int) REG_ESPACE;
5337 5336
5338 /* Map uppercase characters to corresponding lowercase ones. */ 5337 /* Map uppercase characters to corresponding lowercase ones. */
5339 for (i = 0; i < CHAR_SET_SIZE; i++) 5338 for (i = 0; i < CHAR_SET_SIZE; i++)
5340 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; 5339 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5341 } 5340 }
5342 else 5341 else
5343 preg->translate = NULL; 5342 preg->translate = NULL;
5344 5343
5345 /* If REG_NEWLINE is set, newlines are treated differently. */ 5344 /* If REG_NEWLINE is set, newlines are treated differently. */
5346 if (cflags & REG_NEWLINE) 5345 if (cflags & REG_NEWLINE)
5347 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ 5346 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
5348 syntax &= ~RE_DOT_NEWLINE; 5347 syntax &= ~RE_DOT_NEWLINE;
5349 syntax |= RE_HAT_LISTS_NOT_NEWLINE; 5348 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5350 /* It also changes the matching behavior. */ 5349 /* It also changes the matching behavior. */
5351 preg->newline_anchor = 1; 5350 preg->newline_anchor = 1;
5352 } 5351 }
5353 else 5352 else
5354 preg->newline_anchor = 0; 5353 preg->newline_anchor = 0;
5355 5354
5369 5368
5370 /* regexec searches for a given pattern, specified by PREG, in the 5369 /* regexec searches for a given pattern, specified by PREG, in the
5371 string STRING. 5370 string STRING.
5372 5371
5373 If NMATCH is zero or REG_NOSUB was set in the cflags argument to 5372 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5374 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 5373 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
5375 least NMATCH elements, and we set them to the offsets of the 5374 least NMATCH elements, and we set them to the offsets of the
5376 corresponding matched substrings. 5375 corresponding matched substrings.
5377 5376
5378 EFLAGS specifies `execution flags' which affect matching: if 5377 EFLAGS specifies `execution flags' which affect matching: if
5379 REG_NOTBOL is set, then ^ does not match at the beginning of the 5378 REG_NOTBOL is set, then ^ does not match at the beginning of the
5400 private_preg.not_bol = !!(eflags & REG_NOTBOL); 5399 private_preg.not_bol = !!(eflags & REG_NOTBOL);
5401 private_preg.not_eol = !!(eflags & REG_NOTEOL); 5400 private_preg.not_eol = !!(eflags & REG_NOTEOL);
5402 5401
5403 /* The user has told us exactly how many registers to return 5402 /* The user has told us exactly how many registers to return
5404 information about, via `nmatch'. We have to pass that on to the 5403 information about, via `nmatch'. We have to pass that on to the
5405 matching routines. */ 5404 matching routines. */
5406 private_preg.regs_allocated = REGS_FIXED; 5405 private_preg.regs_allocated = REGS_FIXED;
5407 5406
5408 if (want_reg_info) 5407 if (want_reg_info)
5409 { 5408 {
5410 regs.num_regs = nmatch; 5409 regs.num_regs = nmatch;
5411 regs.start = TALLOC (nmatch, regoff_t); 5410 regs.start = TALLOC (nmatch, regoff_t);
5412 regs.end = TALLOC (nmatch, regoff_t); 5411 regs.end = TALLOC (nmatch, regoff_t);
5413 if (regs.start == NULL || regs.end == NULL) 5412 if (regs.start == NULL || regs.end == NULL)
5414 return (int) REG_NOMATCH; 5413 return (int) REG_NOMATCH;
5415 } 5414 }
5416 5415
5417 /* Perform the searching operation. */ 5416 /* Perform the searching operation. */
5418 ret = re_search (&private_preg, string, len, 5417 ret = re_search (&private_preg, string, len,
5419 /* start: */ 0, /* range: */ len, 5418 /* start: */ 0, /* range: */ len,
5420 want_reg_info ? &regs : (struct re_registers *) 0); 5419 want_reg_info ? &regs : (struct re_registers *) 0);
5421 5420
5422 /* Copy the register information to the POSIX structure. */ 5421 /* Copy the register information to the POSIX structure. */
5423 if (want_reg_info) 5422 if (want_reg_info)
5424 { 5423 {
5425 if (ret >= 0) 5424 if (ret >= 0)
5426 { 5425 {
5427 unsigned r; 5426 unsigned r;
5428 5427
5429 for (r = 0; r < nmatch; r++) 5428 for (r = 0; r < nmatch; r++)
5430 { 5429 {
5431 pmatch[r].rm_so = regs.start[r]; 5430 pmatch[r].rm_so = regs.start[r];
5432 pmatch[r].rm_eo = regs.end[r]; 5431 pmatch[r].rm_eo = regs.end[r];
5433 } 5432 }
5434 } 5433 }
5435 5434
5436 /* If we needed the temporary register info, free the space now. */ 5435 /* If we needed the temporary register info, free the space now. */
5437 free (regs.start); 5436 free (regs.start);
5438 free (regs.end); 5437 free (regs.end);
5439 } 5438 }
5440 5439
5441 /* We want zero return to mean success, unlike `re_search'. */ 5440 /* We want zero return to mean success, unlike `re_search'. */
5457 size_t msg_size; 5456 size_t msg_size;
5458 5457
5459 if (errcode < 0 5458 if (errcode < 0
5460 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) 5459 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
5461 /* Only error codes returned by the rest of the code should be passed 5460 /* Only error codes returned by the rest of the code should be passed
5462 to this routine. If we are given anything else, or if other regex 5461 to this routine. If we are given anything else, or if other regex
5463 code generates an invalid error code, then the program has a bug. 5462 code generates an invalid error code, then the program has a bug.
5464 Dump core so we can fix it. */ 5463 Dump core so we can fix it. */
5465 abort (); 5464 abort ();
5466 5465
5467 msg = gettext (re_error_msgid[errcode]); 5466 msg = gettext (re_error_msgid[errcode]);
5469 msg_size = strlen (msg) + 1; /* Includes the null. */ 5468 msg_size = strlen (msg) + 1; /* Includes the null. */
5470 5469
5471 if (errbuf_size != 0) 5470 if (errbuf_size != 0)
5472 { 5471 {
5473 if (msg_size > errbuf_size) 5472 if (msg_size > errbuf_size)
5474 { 5473 {
5475 strncpy (errbuf, msg, errbuf_size - 1); 5474 strncpy (errbuf, msg, errbuf_size - 1);
5476 errbuf[errbuf_size - 1] = 0; 5475 errbuf[errbuf_size - 1] = 0;
5477 } 5476 }
5478 else 5477 else
5479 strcpy (errbuf, msg); 5478 strcpy (errbuf, msg);
5480 } 5479 }
5481 5480
5482 return msg_size; 5481 return msg_size;
5483 } 5482 }
5484 5483