Mercurial > hg > octave-nkf > gnulib-hg
annotate lib/regex.h @ 16846:a80d21de5373
system-quote, execute, spawn-pipe: Escape '?' on Windows.
* lib/system-quote.c (SHELL_SPECIAL_CHARS, CMD_SPECIAL_CHARS): Add the
'?' character.
* lib/w32spawn.h (SHELL_SPECIAL_CHARS): Likewise.
* tests/test-system-quote-main.c (check_all): Check also strings like
"??????????".
Reported by Eli Zaretskii <eliz@gnu.org>.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Fri, 11 May 2012 01:39:04 +0200 (2012-05-10) |
parents | e011e0a7ab5a |
children | b6b08f30c630 |
rev | line source |
---|---|
14 | 1 /* Definitions for data structures and routines for the regular |
4020 | 2 expression library. |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
3 Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2012 |
14079
97fc9a21a8fb
maint: update almost all copyright ranges to include 2011
Jim Meyering <meyering@redhat.com>
parents:
13647
diff
changeset
|
4 Free Software Foundation, Inc. |
4020 | 5 This file is part of the GNU C Library. |
1283 | 6 |
4020 | 7 This program is free software; you can redistribute it and/or modify |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 2, or (at your option) | |
10 any later version. | |
14 | 11 |
4020 | 12 This program is distributed in the hope that it will be useful, |
1283 | 13 but WITHOUT ANY WARRANTY; without even the implied warranty of |
4020 | 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 GNU General Public License for more details. | |
14 | 16 |
4020 | 17 You should have received a copy of the GNU General Public License along |
16366
bb182ee4a09d
maint: replace FSF snail-mail addresses with URLs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16361
diff
changeset
|
18 with this program; if not, see <http://www.gnu.org/licenses/>. */ |
14 | 19 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
20 #ifndef _REGEX_H |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
21 #define _REGEX_H 1 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
22 |
4304 | 23 #include <sys/types.h> |
24 | |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
25 /* Allow the use in C++ code. */ |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
26 #ifdef __cplusplus |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
27 extern "C" { |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
28 #endif |
14 | 29 |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
30 /* Define __USE_GNU to declare GNU extensions that violate the |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
31 POSIX name space rules. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
32 #ifdef _GNU_SOURCE |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
33 # define __USE_GNU 1 |
6099 | 34 #endif |
389 | 35 |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
36 #ifdef _REGEX_LARGE_OFFSETS |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
37 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
38 /* Use types and values that are wide enough to represent signed and |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
39 unsigned byte offsets in memory. This currently works only when |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
40 the regex code is used outside of the GNU C library; it is not yet |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
41 supported within glibc itself, and glibc users should not define |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
42 _REGEX_LARGE_OFFSETS. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
43 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
44 /* The type of nonnegative object indexes. Traditionally, GNU regex |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
45 uses 'int' for these. Code that uses __re_idx_t should work |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
46 regardless of whether the type is signed. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
47 typedef size_t __re_idx_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
48 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
49 /* The type of object sizes. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
50 typedef size_t __re_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
51 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
52 /* The type of object sizes, in places where the traditional code |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
53 uses unsigned long int. */ |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
54 typedef size_t __re_long_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
55 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
56 #else |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
57 |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
58 /* The traditional GNU regex implementation mishandles strings longer |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
59 than INT_MAX. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
60 typedef int __re_idx_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
61 typedef unsigned int __re_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
62 typedef unsigned long int __re_long_size_t; |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
63 |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
64 #endif |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
65 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
66 /* The following two types have to be signed and unsigned integer type |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
67 wide enough to hold a value of a pointer. For most ANSI compilers |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
68 ptrdiff_t and size_t should be likely OK. Still size of these two |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
69 types is 2 for Microsoft C. Ugh... */ |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
70 typedef long int s_reg_t; |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
71 typedef unsigned long int active_reg_t; |
14 | 72 |
73 /* The following bits are used to determine the regexp syntax we | |
74 recognize. The set/not-set meanings are chosen so that Emacs syntax | |
75 remains the value 0. The bits are given in alphabetical order, and | |
76 the definitions shifted by one from the previous bit; thus, when we | |
77 add or remove a bit, only one other definition need change. */ | |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
78 typedef unsigned long int reg_syntax_t; |
14 | 79 |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
80 #ifdef __USE_GNU |
14 | 81 /* If this bit is not set, then \ inside a bracket expression is literal. |
82 If set, then such a \ quotes the following character. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
83 # define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) |
14 | 84 |
85 /* If this bit is not set, then + and ? are operators, and \+ and \? are | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
86 literals. |
14 | 87 If set, then \+ and \? are operators and + and ? are literals. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
88 # define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) |
14 | 89 |
90 /* If this bit is set, then character classes are supported. They are: | |
91 [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], | |
92 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. | |
93 If not set, then character classes are not supported. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
94 # define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) |
14 | 95 |
96 /* If this bit is set, then ^ and $ are always anchors (outside bracket | |
97 expressions, of course). | |
98 If this bit is not set, then it depends: | |
13647
e5c0e28232bc
regex documentation update from Reuben Thomas <rrt@sc3d.org>, 20 Aug 2010 12:04:39 +0100
Karl Berry <karl@freefriends.org>
parents:
12559
diff
changeset
|
99 ^ is an anchor if it is at the beginning of a regular |
e5c0e28232bc
regex documentation update from Reuben Thomas <rrt@sc3d.org>, 20 Aug 2010 12:04:39 +0100
Karl Berry <karl@freefriends.org>
parents:
12559
diff
changeset
|
100 expression or after an open-group or an alternation operator; |
e5c0e28232bc
regex documentation update from Reuben Thomas <rrt@sc3d.org>, 20 Aug 2010 12:04:39 +0100
Karl Berry <karl@freefriends.org>
parents:
12559
diff
changeset
|
101 $ is an anchor if it is at the end of a regular expression, or |
e5c0e28232bc
regex documentation update from Reuben Thomas <rrt@sc3d.org>, 20 Aug 2010 12:04:39 +0100
Karl Berry <karl@freefriends.org>
parents:
12559
diff
changeset
|
102 before a close-group or an alternation operator. |
14 | 103 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
104 This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because |
14 | 105 POSIX draft 11.2 says that * etc. in leading positions is undefined. |
106 We already implemented a previous draft which made those constructs | |
107 invalid, though, so we haven't changed the code back. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
108 # define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) |
14 | 109 |
110 /* If this bit is set, then special characters are always special | |
111 regardless of where they are in the pattern. | |
112 If this bit is not set, then special characters are special only in | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
113 some contexts; otherwise they are ordinary. Specifically, |
14 | 114 * + ? and intervals are only special when not after the beginning, |
115 open-group, or alternation operator. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
116 # define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) |
14 | 117 |
118 /* If this bit is set, then *, +, ?, and { cannot be first in an re or | |
119 immediately after an alternation or begin-group operator. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
120 # define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) |
14 | 121 |
122 /* If this bit is set, then . matches newline. | |
123 If not set, then it doesn't. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
124 # define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) |
14 | 125 |
126 /* If this bit is set, then . doesn't match NUL. | |
127 If not set, then it does. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
128 # define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) |
14 | 129 |
130 /* If this bit is set, nonmatching lists [^...] do not match newline. | |
131 If not set, they do. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
132 # define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) |
14 | 133 |
134 /* If this bit is set, either \{...\} or {...} defines an | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
135 interval, depending on RE_NO_BK_BRACES. |
14 | 136 If not set, \{, \}, {, and } are literals. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
137 # define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) |
14 | 138 |
139 /* If this bit is set, +, ? and | aren't recognized as operators. | |
140 If not set, they are. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
141 # define RE_LIMITED_OPS (RE_INTERVALS << 1) |
14 | 142 |
143 /* If this bit is set, newline is an alternation operator. | |
144 If not set, newline is literal. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
145 # define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) |
14 | 146 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
147 /* If this bit is set, then '{...}' defines an interval, and \{ and \} |
14 | 148 are literals. |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
149 If not set, then '\{...\}' defines an interval. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
150 # define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) |
14 | 151 |
152 /* If this bit is set, (...) defines a group, and \( and \) are literals. | |
153 If not set, \(...\) defines a group, and ( and ) are literals. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
154 # define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) |
14 | 155 |
156 /* If this bit is set, then \<digit> matches <digit>. | |
157 If not set, then \<digit> is a back-reference. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
158 # define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) |
14 | 159 |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
160 /* If this bit is set, then | is an alternation operator, and \| is literal. |
14 | 161 If not set, then \| is an alternation operator, and | is literal. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
162 # define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) |
14 | 163 |
164 /* If this bit is set, then an ending range point collating higher | |
165 than the starting range point, as in [z-a], is invalid. | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
166 If not set, then when ending range point collates higher than the |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
167 starting range point, the range is ignored. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
168 # define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) |
14 | 169 |
170 /* If this bit is set, then an unmatched ) is ordinary. | |
171 If not set, then an unmatched ) is invalid. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
172 # define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) |
14 | 173 |
389 | 174 /* If this bit is set, succeed as soon as we match the whole pattern, |
175 without further backtracking. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
176 # define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) |
389 | 177 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
178 /* If this bit is set, do not process the GNU regex operators. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
179 If not set, then the GNU regex operators are recognized. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
180 # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
181 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
182 /* If this bit is set, turn on internal regex debugging. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
183 If not set, and debugging was on, turn it off. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
184 This only works if regex.c is compiled -DDEBUG. |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
185 We define this bit always, so that all that's needed to turn on |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
186 debugging is to recompile regex.c; the calling code can always have |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
187 this bit set, and it won't affect anything in the normal case. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
188 # define RE_DEBUG (RE_NO_GNU_OPS << 1) |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
189 |
3227 | 190 /* If this bit is set, a syntactically invalid interval is treated as |
191 a string of ordinary characters. For example, the ERE 'a{1' is | |
192 treated as 'a\{1'. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
193 # define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) |
3227 | 194 |
4020 | 195 /* If this bit is set, then ignore case when matching. |
196 If not set, then case is significant. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
197 # define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) |
4020 | 198 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
199 /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
200 for ^, because it is difficult to scan the regex backwards to find |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
201 whether ^ should be special. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
202 # define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
203 |
13647
e5c0e28232bc
regex documentation update from Reuben Thomas <rrt@sc3d.org>, 20 Aug 2010 12:04:39 +0100
Karl Berry <karl@freefriends.org>
parents:
12559
diff
changeset
|
204 /* If this bit is set, then \{ cannot be first in a regex or |
e5c0e28232bc
regex documentation update from Reuben Thomas <rrt@sc3d.org>, 20 Aug 2010 12:04:39 +0100
Karl Berry <karl@freefriends.org>
parents:
12559
diff
changeset
|
205 immediately after an alternation, open-group or \} operator. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
206 # define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
207 |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
208 /* If this bit is set, then no_sub will be set to 1 during |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
209 re_compile_pattern. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
210 # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
211 #endif |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
212 |
14 | 213 /* This global variable defines the particular regexp syntax to use (for |
214 some interfaces). When a regexp is compiled, the syntax used is | |
215 stored in the pattern buffer, so changing this does not affect | |
216 already-compiled regexps. */ | |
217 extern reg_syntax_t re_syntax_options; | |
218 | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
219 #ifdef __USE_GNU |
14 | 220 /* Define combinations of the above bits for the standard possibilities. |
221 (The [[[ comments delimit what gets put into the Texinfo file, so | |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
222 don't delete them!) */ |
14 | 223 /* [[[begin syntaxes]]] */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
224 # define RE_SYNTAX_EMACS 0 |
14 | 225 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
226 # define RE_SYNTAX_AWK \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
227 (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
228 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
229 | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
230 | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
231 | RE_CHAR_CLASSES \ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
232 | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
233 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
234 # define RE_SYNTAX_GNU_AWK \ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
235 ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
236 | RE_INVALID_INTERVAL_ORD) \ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
237 & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
238 | RE_CONTEXT_INVALID_OPS )) |
14 | 239 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
240 # define RE_SYNTAX_POSIX_AWK \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
241 (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
242 | RE_INTERVALS | RE_NO_GNU_OPS \ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
243 | RE_INVALID_INTERVAL_ORD) |
14 | 244 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
245 # define RE_SYNTAX_GREP \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
246 (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
247 | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
248 | RE_NEWLINE_ALT) |
14 | 249 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
250 # define RE_SYNTAX_EGREP \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
251 (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
252 | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
253 | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
254 | RE_NO_BK_VBAR) |
14 | 255 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
256 # define RE_SYNTAX_POSIX_EGREP \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
257 (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
258 | RE_INVALID_INTERVAL_ORD) |
14 | 259 |
389 | 260 /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
261 # define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC |
389 | 262 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
263 # define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC |
14 | 264 |
265 /* Syntax bits common to both basic and extended POSIX regex syntax. */ | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
266 # define _RE_SYNTAX_POSIX_COMMON \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
267 (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
268 | RE_INTERVALS | RE_NO_EMPTY_RANGES) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
269 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
270 # define RE_SYNTAX_POSIX_BASIC \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
271 (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) |
14 | 272 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
273 /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
274 RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
275 isn't minimal, since other operators, such as \`, aren't disabled. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
276 # define RE_SYNTAX_POSIX_MINIMAL_BASIC \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
277 (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) |
14 | 278 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
279 # define RE_SYNTAX_POSIX_EXTENDED \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
280 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
281 | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
282 | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
283 | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) |
14 | 284 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
285 /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
286 removed and RE_NO_BK_REFS is added. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
287 # define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
288 (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
289 | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
290 | RE_NO_BK_PARENS | RE_NO_BK_REFS \ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
291 | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
292 /* [[[end syntaxes]]] */ |
14 | 293 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
294 /* Maximum number of duplicates an interval can allow. POSIX-conforming |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
295 systems might define this in <limits.h>, but we want our |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
296 value, so remove any previous define. */ |
16650
8ad751eaba31
regex: allow inclusion of <regex.h> before <limits.h>
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
297 # ifdef _REGEX_INCLUDE_LIMITS_H |
8ad751eaba31
regex: allow inclusion of <regex.h> before <limits.h>
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
298 # include <limits.h> |
8ad751eaba31
regex: allow inclusion of <regex.h> before <limits.h>
Paul Eggert <eggert@cs.ucla.edu>
parents:
16366
diff
changeset
|
299 # endif |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
300 # ifdef RE_DUP_MAX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
301 # undef RE_DUP_MAX |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
302 # endif |
6733
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
303 |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
304 /* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
305 the counter as a 2-byte signed integer. This is no longer true, so |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
306 RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to |
16705
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16650
diff
changeset
|
307 ((SIZE_MAX - 9) / 10) if _REGEX_LARGE_OFFSETS is defined. |
6733
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
308 However, there would be a huge performance problem if someone |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
309 actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains |
1c9a307d93bd
* regcomp.c (init_dfa): Don't use wchar_t or wctype_t if RE_ENABLE_I18N
Paul Eggert <eggert@cs.ucla.edu>
parents:
6726
diff
changeset
|
310 its historical value. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
311 # define RE_DUP_MAX (0x7fff) |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
312 #endif |
14 | 313 |
314 | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
315 /* POSIX 'cflags' bits (i.e., information for 'regcomp'). */ |
14 | 316 |
317 /* If this bit is set, then use extended regular expression syntax. | |
318 If not set, then use basic regular expression syntax. */ | |
319 #define REG_EXTENDED 1 | |
320 | |
321 /* If this bit is set, then ignore case when matching. | |
322 If not set, then case is significant. */ | |
6099 | 323 #define REG_ICASE (1 << 1) |
649
89f4c1937ac7
update FSF address in copyright and remove any trailing blanks
Jim Meyering <jim@meyering.net>
parents:
502
diff
changeset
|
324 |
14 | 325 /* If this bit is set, then anchors do not match at newline |
326 characters in the string. | |
327 If not set, then anchors do match at newlines. */ | |
6099 | 328 #define REG_NEWLINE (1 << 2) |
14 | 329 |
330 /* If this bit is set, then report only success or fail in regexec. | |
331 If not set, then returns differ between not matching and errors. */ | |
6099 | 332 #define REG_NOSUB (1 << 3) |
14 | 333 |
334 | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
335 /* POSIX 'eflags' bits (i.e., information for regexec). */ |
14 | 336 |
337 /* If this bit is set, then the beginning-of-line operator doesn't match | |
338 the beginning of the string (presumably because it's not the | |
339 beginning of a line). | |
340 If not set, then the beginning-of-line operator does match the | |
341 beginning of the string. */ | |
342 #define REG_NOTBOL 1 | |
343 | |
344 /* Like REG_NOTBOL, except for the end-of-line. */ | |
345 #define REG_NOTEOL (1 << 1) | |
346 | |
5968
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
347 /* Use PMATCH[0] to delimit the start and end of the search in the |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
348 buffer. */ |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
349 #define REG_STARTEND (1 << 2) |
541fed6ae301
* modules/regex (Files): Add lib/regex_internal.c,
Paul Eggert <eggert@cs.ucla.edu>
parents:
5851
diff
changeset
|
350 |
14 | 351 |
352 /* If any error codes are removed, changed, or added, update the | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
353 '__re_error_msgid' table in regcomp.c. */ |
6099 | 354 |
14 | 355 typedef enum |
356 { | |
6099 | 357 _REG_ENOSYS = -1, /* This will never happen for this implementation. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
358 _REG_NOERROR = 0, /* Success. */ |
6099 | 359 _REG_NOMATCH, /* Didn't find a match (for regexec). */ |
14 | 360 |
361 /* POSIX regcomp return error codes. (In the order listed in the | |
362 standard.) */ | |
6099 | 363 _REG_BADPAT, /* Invalid pattern. */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
364 _REG_ECOLLATE, /* Invalid collating element. */ |
6099 | 365 _REG_ECTYPE, /* Invalid character class name. */ |
366 _REG_EESCAPE, /* Trailing backslash. */ | |
367 _REG_ESUBREG, /* Invalid back reference. */ | |
368 _REG_EBRACK, /* Unmatched left bracket. */ | |
369 _REG_EPAREN, /* Parenthesis imbalance. */ | |
370 _REG_EBRACE, /* Unmatched \{. */ | |
371 _REG_BADBR, /* Invalid contents of \{\}. */ | |
372 _REG_ERANGE, /* Invalid range end. */ | |
373 _REG_ESPACE, /* Ran out of memory. */ | |
374 _REG_BADRPT, /* No preceding re for repetition op. */ | |
14 | 375 |
376 /* Error codes we've added. */ | |
6099 | 377 _REG_EEND, /* Premature end. */ |
16705
54b750a813cb
regex: diagnose too-large repeat counts in EREs
Paul Eggert <eggert@cs.ucla.edu>
parents:
16650
diff
changeset
|
378 _REG_ESIZE, /* Too large (e.g., repeat count too large). */ |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
379 _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
380 } reg_errcode_t; |
6099 | 381 |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
382 #if defined _XOPEN_SOURCE || defined __USE_XOPEN2K |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
383 # define REG_ENOSYS _REG_ENOSYS |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
384 #endif |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
385 #define REG_NOERROR _REG_NOERROR |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
386 #define REG_NOMATCH _REG_NOMATCH |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
387 #define REG_BADPAT _REG_BADPAT |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
388 #define REG_ECOLLATE _REG_ECOLLATE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
389 #define REG_ECTYPE _REG_ECTYPE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
390 #define REG_EESCAPE _REG_EESCAPE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
391 #define REG_ESUBREG _REG_ESUBREG |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
392 #define REG_EBRACK _REG_EBRACK |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
393 #define REG_EPAREN _REG_EPAREN |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
394 #define REG_EBRACE _REG_EBRACE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
395 #define REG_BADBR _REG_BADBR |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
396 #define REG_ERANGE _REG_ERANGE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
397 #define REG_ESPACE _REG_ESPACE |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
398 #define REG_BADRPT _REG_BADRPT |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
399 #define REG_EEND _REG_EEND |
6099 | 400 #define REG_ESIZE _REG_ESIZE |
401 #define REG_ERPAREN _REG_ERPAREN | |
14 | 402 |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
403 /* This data structure represents a compiled pattern. Before calling |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
404 the pattern compiler, the fields 'buffer', 'allocated', 'fastmap', |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
405 and 'translate' can be set. After the pattern has been compiled, |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
406 the fields 're_nsub', 'not_bol' and 'not_eol' are available. All |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
407 other fields are private to the regex routines. */ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
408 |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
409 #ifndef RE_TRANSLATE_TYPE |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
410 # define __RE_TRANSLATE_TYPE unsigned char * |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
411 # ifdef __USE_GNU |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
412 # define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
413 # endif |
6099 | 414 #endif |
415 | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
416 #ifdef __USE_GNU |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
417 # define __REPB_PREFIX(name) name |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
418 #else |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
419 # define __REPB_PREFIX(name) __##name |
6099 | 420 #endif |
421 | |
14 | 422 struct re_pattern_buffer |
423 { | |
16770
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16705
diff
changeset
|
424 /* Space that holds the compiled pattern. The type |
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16705
diff
changeset
|
425 'struct re_dfa_t' is private and is not declared here. */ |
e011e0a7ab5a
regex: remove unnecessary type punning
Paul Eggert <eggert@cs.ucla.edu>
parents:
16705
diff
changeset
|
426 struct re_dfa_t *__REPB_PREFIX(buffer); |
14 | 427 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
428 /* Number of bytes to which 'buffer' points. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
429 __re_long_size_t __REPB_PREFIX(allocated); |
14 | 430 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
431 /* Number of bytes actually used in 'buffer'. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
432 __re_long_size_t __REPB_PREFIX(used); |
14 | 433 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
434 /* Syntax setting with which the pattern was compiled. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
435 reg_syntax_t __REPB_PREFIX(syntax); |
14 | 436 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
437 /* Pointer to a fastmap, if any, otherwise zero. re_search uses the |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
438 fastmap, if there is one, to skip over impossible starting points |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
439 for matches. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
440 char *__REPB_PREFIX(fastmap); |
14 | 441 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
442 /* Either a translate table to apply to all characters before |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
443 comparing them, or zero for no translation. The translation is |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
444 applied to a pattern when it is compiled and to a string when it |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
445 is matched. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
446 __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); |
14 | 447 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
448 /* Number of subexpressions found by the compiler. */ |
14 | 449 size_t re_nsub; |
450 | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
451 /* Zero if this pattern cannot match the empty string, one else. |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
452 Well, in truth it's used only in 're_search_2', to see whether or |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
453 not we should use the fastmap, so we don't set this absolutely |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
454 perfectly; see 're_compile_fastmap' (the "duplicate" case). */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
455 unsigned __REPB_PREFIX(can_be_null) : 1; |
14 | 456 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
457 /* If REGS_UNALLOCATED, allocate space in the 'regs' structure |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
458 for 'max (RE_NREGS, re_nsub + 1)' groups. |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
459 If REGS_REALLOCATE, reallocate space if necessary. |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
460 If REGS_FIXED, use what's there. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
461 #ifdef __USE_GNU |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
462 # define REGS_UNALLOCATED 0 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
463 # define REGS_REALLOCATE 1 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
464 # define REGS_FIXED 2 |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
465 #endif |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
466 unsigned __REPB_PREFIX(regs_allocated) : 2; |
14 | 467 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
468 /* Set to zero when 're_compile_pattern' compiles a pattern; set to |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
469 one by 're_compile_fastmap' if it updates the fastmap. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
470 unsigned __REPB_PREFIX(fastmap_accurate) : 1; |
14 | 471 |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
472 /* If set, 're_match_2' does not return information about |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
473 subexpressions. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
474 unsigned __REPB_PREFIX(no_sub) : 1; |
14 | 475 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
476 /* If set, a beginning-of-line anchor doesn't match at the beginning |
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
477 of the string. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
478 unsigned __REPB_PREFIX(not_bol) : 1; |
14 | 479 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
480 /* Similarly for an end-of-line anchor. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
481 unsigned __REPB_PREFIX(not_eol) : 1; |
14 | 482 |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
483 /* If true, an anchor at a newline matches. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
484 unsigned __REPB_PREFIX(newline_anchor) : 1; |
14 | 485 }; |
486 | |
487 typedef struct re_pattern_buffer regex_t; | |
488 | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
489 /* Type for byte offsets within the string. POSIX mandates this. */ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
490 #ifdef _REGEX_LARGE_OFFSETS |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
491 /* POSIX 1003.1-2008 requires that regoff_t be at least as wide as |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
492 ptrdiff_t and ssize_t. We don't know of any hosts where ptrdiff_t |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
493 is wider than ssize_t, so ssize_t is safe. */ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
494 typedef ssize_t regoff_t; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
495 #else |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
496 /* The traditional GNU regex implementation mishandles strings longer |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
497 than INT_MAX. */ |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
498 typedef int regoff_t; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
499 #endif |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
500 |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
501 |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
502 #ifdef __USE_GNU |
14 | 503 /* This is the structure we store register match data in. See |
504 regex.texinfo for a full description of what registers match. */ | |
505 struct re_registers | |
506 { | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
507 __re_size_t num_regs; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
508 regoff_t *start; |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
509 regoff_t *end; |
14 | 510 }; |
511 | |
512 | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
513 /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer, |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
514 're_match_2' returns information about at least this many registers |
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
515 the first time a 'regs' structure is passed. */ |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
516 # ifndef RE_NREGS |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
517 # define RE_NREGS 30 |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
518 # endif |
14 | 519 #endif |
520 | |
521 | |
522 /* POSIX specification for registers. Aside from the different names than | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
523 're_registers', POSIX uses an array of structures, instead of a |
14 | 524 structure of arrays. */ |
525 typedef struct | |
526 { | |
527 regoff_t rm_so; /* Byte offset from string's start to substring's start. */ | |
528 regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ | |
529 } regmatch_t; | |
530 | |
531 /* Declarations for routines. */ | |
532 | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
533 #ifdef __USE_GNU |
14 | 534 /* Sets the current default syntax to SYNTAX, and return the old syntax. |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
535 You can also simply assign to the 're_syntax_options' variable. */ |
6099 | 536 extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); |
14 | 537 |
538 /* Compile the regular expression PATTERN, with length LENGTH | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
539 and syntax given by the global 're_syntax_options', into the buffer |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
540 BUFFER. Return NULL if successful, and an error string if not. |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
541 |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
542 To free the allocated storage, you must call 'regfree' on BUFFER. |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
543 Note that the translate table must either have been initialised by |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
544 'regcomp', with a malloc'ed value, or set to NULL before calling |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
545 'regfree'. */ |
6099 | 546 extern const char *re_compile_pattern (const char *__pattern, size_t __length, |
547 struct re_pattern_buffer *__buffer); | |
14 | 548 |
549 | |
550 /* Compile a fastmap for the compiled pattern in BUFFER; used to | |
551 accelerate searches. Return 0 if successful and -2 if was an | |
552 internal error. */ | |
6099 | 553 extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); |
14 | 554 |
555 | |
556 /* Search in the string STRING (with length LENGTH) for the pattern | |
557 compiled into BUFFER. Start searching at position START, for RANGE | |
558 characters. Return the starting position of the match, -1 for no | |
559 match, or -2 for an internal error. Also return register | |
6726
af9abbcedfbd
Merge regex changes from libc, removing some of our
Paul Eggert <eggert@cs.ucla.edu>
parents:
6707
diff
changeset
|
560 information in REGS (if REGS and BUFFER->no_sub are nonzero). */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
561 extern regoff_t re_search (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
562 const char *__string, __re_idx_t __length, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
563 __re_idx_t __start, regoff_t __range, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
564 struct re_registers *__regs); |
14 | 565 |
566 | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
567 /* Like 're_search', but search in the concatenation of STRING1 and |
14 | 568 STRING2. Also, stop searching at index START + STOP. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
569 extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
570 const char *__string1, __re_idx_t __length1, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
571 const char *__string2, __re_idx_t __length2, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
572 __re_idx_t __start, regoff_t __range, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
573 struct re_registers *__regs, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
574 __re_idx_t __stop); |
14 | 575 |
576 | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
577 /* Like 're_search', but return how many characters in STRING the regexp |
14 | 578 in BUFFER matched, starting at position START. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
579 extern regoff_t re_match (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
580 const char *__string, __re_idx_t __length, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
581 __re_idx_t __start, struct re_registers *__regs); |
14 | 582 |
583 | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
584 /* Relates to 're_match' as 're_search_2' relates to 're_search'. */ |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
585 extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
586 const char *__string1, __re_idx_t __length1, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
587 const char *__string2, __re_idx_t __length2, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
588 __re_idx_t __start, struct re_registers *__regs, |
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
589 __re_idx_t __stop); |
14 | 590 |
591 | |
592 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | |
593 ENDS. Subsequent matches using BUFFER and REGS will use this memory | |
594 for recording register information. STARTS and ENDS must be | |
16235
18a38c9615f0
In commentary, do not use ` to quote.
Paul Eggert <eggert@cs.ucla.edu>
parents:
16201
diff
changeset
|
595 allocated with malloc, and must each be at least 'NUM_REGS * sizeof |
14 | 596 (regoff_t)' bytes long. |
597 | |
598 If NUM_REGS == 0, then subsequent matches should allocate their own | |
599 register data. | |
600 | |
601 Unless this function is called, the first search or match using | |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
602 BUFFER will allocate its own register data, without |
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
603 freeing the old data. */ |
6099 | 604 extern void re_set_registers (struct re_pattern_buffer *__buffer, |
605 struct re_registers *__regs, | |
6184
f1728546eca4
On 64-bit hosts (where size_t is 64 bits and int is 32 bits), the
Paul Eggert <eggert@cs.ucla.edu>
parents:
6104
diff
changeset
|
606 __re_size_t __num_regs, |
6099 | 607 regoff_t *__starts, regoff_t *__ends); |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
608 #endif /* Use GNU */ |
14 | 609 |
16361
ab59b5080051
regex: merge glibc changes
Paul Eggert <eggert@cs.ucla.edu>
parents:
16235
diff
changeset
|
610 #if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) |
1283 | 611 # ifndef _CRAY |
14 | 612 /* 4.2 bsd compatibility. */ |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
613 extern char *re_comp (const char *); |
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
614 extern int re_exec (const char *); |
1283 | 615 # endif |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
616 #endif |
14 | 617 |
2927 | 618 /* GCC 2.95 and later have "__restrict"; C99 compilers have |
8045
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
619 "restrict", and "configure" may have defined "restrict". |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
620 Other compilers use __restrict, __restrict__, and _Restrict, and |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
621 'configure' might #define 'restrict' to those words, so pick a |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
622 different name. */ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
623 #ifndef _Restrict_ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
624 # if 199901L <= __STDC_VERSION__ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
625 # define _Restrict_ restrict |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
626 # elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__) |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
627 # define _Restrict_ __restrict |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
628 # else |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
629 # define _Restrict_ |
2927 | 630 # endif |
2940
c7143052abca
(__restrict_arr): Move definition out of #ifndef block.
Jim Meyering <jim@meyering.net>
parents:
2927
diff
changeset
|
631 #endif |
7689
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
632 /* gcc 3.1 and up support the [restrict] syntax. Don't trust |
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
633 sys/cdefs.h's definition of __restrict_arr, though, as it |
b4806b1ec9e4
Fix some incompatibilities with gcc -ansi -pedantic.
Paul Eggert <eggert@cs.ucla.edu>
parents:
7110
diff
changeset
|
634 mishandles gcc -ansi -pedantic. */ |
8045
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
635 #ifndef _Restrict_arr_ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
636 # if ((199901L <= __STDC_VERSION__ \ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
637 || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ |
15962
cfa1d5dbe78c
Support for old NeXTstep 3.3 gcc.
Daniel Richard G <skunk@iskunk.org>
parents:
14079
diff
changeset
|
638 && !defined __STRICT_ANSI__)) \ |
8045
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
639 && !defined __GNUG__) |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
640 # define _Restrict_arr_ _Restrict_ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
641 # else |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
642 # define _Restrict_arr_ |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
643 # endif |
3684
c8ed84e015e6
(__restrict_arr): Update from libc.
Jim Meyering <jim@meyering.net>
parents:
3336
diff
changeset
|
644 #endif |
2927 | 645 |
14 | 646 /* POSIX compatibility. */ |
8045
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
647 extern int regcomp (regex_t *_Restrict_ __preg, |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
648 const char *_Restrict_ __pattern, |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
649 int __cflags); |
1466 | 650 |
8045
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
651 extern int regexec (const regex_t *_Restrict_ __preg, |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
652 const char *_Restrict_ __string, size_t __nmatch, |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
653 regmatch_t __pmatch[_Restrict_arr_], |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
654 int __eflags); |
1466 | 655 |
8045
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
656 extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg, |
7dcf8a1f2f5e
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
Paul Eggert <eggert@cs.ucla.edu>
parents:
7692
diff
changeset
|
657 char *_Restrict_ __errbuf, size_t __errbuf_size); |
1466 | 658 |
6076
e2dd51f6e259
* config/srclist.txt: Add glibc bugs 1220, 1221, 1222.
Paul Eggert <eggert@cs.ucla.edu>
parents:
6059
diff
changeset
|
659 extern void regfree (regex_t *__preg); |
14 | 660 |
988
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
661 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
662 #ifdef __cplusplus |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
663 } |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
664 #endif /* C++ */ |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
665 |
ba13cef00d11
replace with new version from libc
Jim Meyering <jim@meyering.net>
parents:
649
diff
changeset
|
666 #endif /* regex.h */ |